Source code for inspirehep.modules.hal.bulk_push
# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.
"""
IMPORTANT This script is a copy/paste of:
https://github.com/inspirehep/inspire-next/issues/2629
It is unreliable and absolutely unmaintainable.
It will be refactored with this user story:
https://its.cern.ch/jira/browse/INSPIR-249
To be run with:
$ /usr/bin/time -v inspirehep hal push
"""
from __future__ import absolute_import, division, print_function
import datetime
import time
from flask import current_app
from invenio_records.models import RecordMetadata
from inspirehep.modules.hal.core.tei import convert_to_tei
from inspirehep.modules.hal.core.sword import create, update
HAL_LOG_FILE = '/opt/inspire/HAL.log'
def _set_config():
# Set the proper configuration.
current_app.config['HAL_COL_IRI'] = 'https://api.archives-ouvertes.fr/sword/hal'
current_app.config['HAL_EDIT_IRI'] = 'https://api.archives-ouvertes.fr/sword/'
[docs]def run(username, password, limit, yield_amt):
start = time.time()
_set_config()
current_app.config['HAL_USER_NAME'] = username
current_app.config['HAL_USER_PASS'] = password
records = RecordMetadata.query.filter(RecordMetadata.json['_export_to'].op('@>')('{"HAL": true}'))
if limit > 0:
records = records.limit(limit)
ok = ko = 0
with open(HAL_LOG_FILE, 'w') as log_file_fd:
for total, raw_record in enumerate(records.yield_per(yield_amt)):
if total % 10 == 0:
now = str(datetime.timedelta(seconds=time.time() - start))
record = raw_record.json
if 'Literature' in record['_collections'] or 'HAL Hidden' in record['_collections']:
try:
tei = convert_to_tei(record)
except Exception as e:
log_file_fd.write('EXC TEI: %s %s\n' % (record['control_number'], str(e)))
# ko.append(record['control_number'])
ko += 1
continue
success = False
for _ in range(2):
try:
hal_id = ''
ids = record.get('external_system_identifiers', [])
for id_ in ids:
if id_['schema'] == 'HAL':
hal_id = id_['value']
if hal_id:
update(tei.encode('utf8'), hal_id.encode('utf8'))
log_file_fd.write('UPD: %s %s\n' % (record['control_number'], hal_id))
else:
receipt = create(tei.encode('utf8'))
log_file_fd.write('NEW: %s %s\n' % (record['control_number'], receipt.id))
success = True
break
except Exception as e:
continue
if success:
# ok.append(record['control_number'])
ok += 1
else:
log_file_fd.write('EXC HAL: %s %s\n' % (record['control_number'], str(e)))
# ko.append(record['control_number'])
ko += 1
return total, now, ok, ko