Source code for inspirehep.modules.records.receivers

# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Records receivers."""

from __future__ import absolute_import, division, print_function

import uuid
import logging
from copy import deepcopy

from flask import current_app
from flask_sqlalchemy import models_committed
from elasticsearch import NotFoundError
from time_execution import time_execution

from invenio_records.models import RecordMetadata
from invenio_records.signals import (
    after_record_update,
    before_record_insert,
    before_record_update,
)

from inspire_utils.record import get_value

from inspirehep.modules.authors.utils import phonetic_blocks
from inspirehep.modules.orcid import tasks as orcid_tasks
from inspirehep.modules.orcid.utils import (
    get_push_access_tokens,
    get_orcids_for_push,
)
from inspirehep.modules.pidstore.utils import get_pid_type_from_schema
from inspirehep.modules.records.api import InspireRecord
from inspirehep.modules.records.errors import MissingInspireRecordError
from inspirehep.modules.records.tasks import index_modified_citations_from_record
from inspirehep.modules.records.utils import (
    is_author,
    is_book,
    is_data,
    is_experiment,
    is_hep,
    is_institution,
    is_journal,
    populate_abstract_source_suggest,
    populate_affiliation_suggest,
    populate_author_count,
    populate_author_suggest,
    populate_authors_full_name_unicode_normalized,
    populate_authors_name_variations,
    populate_bookautocomplete,
    populate_citations_count,
    populate_earliest_date,
    populate_experiment_suggest,
    populate_inspire_document_type,
    populate_name_variations,
    populate_number_of_references,
    populate_recid_from_ref,
    populate_title_suggest,
    populate_facet_author_name,
)
from invenio_indexer.api import RecordIndexer

LOGGER = logging.getLogger(__name__)


@before_record_insert.connect
@before_record_update.connect
[docs]def assign_phonetic_block(sender, record, *args, **kwargs): """Assign a phonetic block to each signature of a Literature record. Uses the NYSIIS algorithm to compute a phonetic block from each signature's full name, skipping those that are not recognized as real names, but logging an error when that happens. """ if not is_hep(record): return author_names = get_value(record, 'authors.full_name', default=[]) try: signature_blocks = phonetic_blocks(author_names) except Exception as err: current_app.logger.error( 'Cannot extract phonetic blocks for record %d: %s', record.get('control_number'), err) return for author in record.get('authors', []): if author['full_name'] in signature_blocks and signature_blocks[author['full_name']]: author['signature_block'] = signature_blocks[author['full_name']]
@before_record_insert.connect @before_record_update.connect
[docs]def assign_uuid(sender, record, *args, **kwargs): """Assign a UUID to each signature of a Literature record.""" if not is_hep(record): return authors = record.get('authors', []) for author in authors: if 'uuid' not in author: author['uuid'] = str(uuid.uuid4())
@after_record_update.connect @time_execution
[docs]def push_to_orcid(sender, record, *args, **kwargs): """If needed, queue the push of the new changes to ORCID.""" if not current_app.config['FEATURE_FLAG_ENABLE_ORCID_PUSH']: LOGGER.warning('ORCID push feature flag not enabled') return if not is_hep(record): return # Ensure there is a control number. This is not always the case because of broken store_record. if 'control_number' not in record: return orcids = get_orcids_for_push(record) orcids_and_tokens = get_push_access_tokens(orcids) kwargs_to_pusher = dict(record_db_version=record.model.version_id) for orcid, access_token in orcids_and_tokens: orcid_tasks.orcid_push.apply_async( queue='orcid_push', kwargs={ 'orcid': orcid, 'rec_id': record['control_number'], 'oauth_token': access_token, 'kwargs_to_pusher': kwargs_to_pusher, }, )
@after_record_update.connect
[docs]def enhance_record(sender, record, *args, **kwargs): """Enhance the record for ES""" if not isinstance(record, InspireRecord): raise MissingInspireRecordError("Record is not InspireRecord!") enhanced_record = deepcopy(record) enhance_before_index(enhanced_record) record.model._enhanced_record = enhanced_record
@models_committed.connect
[docs]def index_after_commit(sender, changes): """Index a record in ES after it was committed to the DB. This cannot happen in an ``after_record_commit`` receiver from Invenio-Records because, despite the name, at that point we are not yet sure whether the record has been really committed to the DB. """ indexer = RecordIndexer() for model_instance, change in changes: if isinstance(model_instance, RecordMetadata): if change in ('insert', 'update') and not model_instance.json.get("deleted"): if hasattr(model_instance, '_enhanced_record'): record = model_instance._enhanced_record else: record = model_instance.json indexer.index(InspireRecord(record, model_instance)) else: try: indexer.delete(InspireRecord( model_instance.json, model_instance)) except NotFoundError: # Record not found in ES LOGGER.debug('Record %s not found in ES', model_instance.json.get("id")) pass pid_type = get_pid_type_from_schema(model_instance.json['$schema']) pid_value = model_instance.json['control_number'] db_version = model_instance.version_id index_modified_citations_from_record.delay(pid_type, pid_value, db_version)
[docs]def enhance_before_index(record): """Run all the receivers that enhance the record for ES in the right order. .. note:: ``populate_recid_from_ref`` **MUST** come before ``populate_bookautocomplete`` because the latter puts a JSON reference in a completion _source, which would be expanded to an incorrect ``_source_recid`` by the former. """ populate_recid_from_ref(record) if is_hep(record): populate_abstract_source_suggest(record) populate_earliest_date(record) populate_author_count(record) populate_authors_full_name_unicode_normalized(record) populate_inspire_document_type(record) populate_name_variations(record) populate_number_of_references(record) populate_citations_count(record) populate_facet_author_name(record) if is_book(record): populate_bookautocomplete(record) elif is_author(record): populate_authors_name_variations(record) populate_author_suggest(record) elif is_institution(record): populate_affiliation_suggest(record) elif is_experiment(record): populate_experiment_suggest(record) elif is_journal(record): populate_title_suggest(record) elif is_data(record): populate_citations_count(record)