Source code for inspirehep.modules.records.receivers

# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Records receivers."""

from __future__ import absolute_import, division, print_function

import uuid
import logging
from copy import deepcopy

from flask import current_app
from flask_sqlalchemy import models_committed
from elasticsearch import NotFoundError
from time_execution import time_execution

from invenio_records.models import RecordMetadata
from invenio_records.signals import (
    after_record_update,
    before_record_insert,
    before_record_update,
)

from inspire_utils.record import get_value

from inspirehep.modules.authors.utils import phonetic_blocks
from inspirehep.modules.orcid import tasks as orcid_tasks
from inspirehep.modules.orcid.utils import (
    get_push_access_tokens,
    get_orcids_for_push,
)
from inspirehep.modules.pidstore.utils import get_pid_type_from_schema
from inspirehep.modules.records.api import InspireRecord
from inspirehep.modules.records.errors import MissingInspireRecordError
from inspirehep.modules.records.tasks import index_modified_citations_from_record
from inspirehep.modules.records.utils import (
    is_author,
    is_book,
    is_data,
    is_experiment,
    is_hep,
    is_institution,
    is_journal,
    populate_abstract_source_suggest,
    populate_affiliation_suggest,
    populate_author_count,
    populate_author_suggest,
    populate_authors_full_name_unicode_normalized,
    populate_authors_name_variations,
    populate_bookautocomplete,
    populate_citations_count,
    populate_earliest_date,
    populate_experiment_suggest,
    populate_inspire_document_type,
    populate_name_variations,
    populate_number_of_references,
    populate_recid_from_ref,
    populate_title_suggest,
    populate_facet_author_name,
)
from invenio_indexer.api import RecordIndexer

LOGGER = logging.getLogger(__name__)


@before_record_insert.connect
@before_record_update.connect
[docs]def assign_phonetic_block(sender, record, *args, **kwargs):
    """Assign a phonetic block to each signature of a Literature record.

    Uses the NYSIIS algorithm to compute a phonetic block from each
    signature's full name, skipping those that are not recognized
    as real names, but logging an error when that happens.
    """
    if not is_hep(record):
        return

    author_names = get_value(record, 'authors.full_name', default=[])

    try:
        signature_blocks = phonetic_blocks(author_names)
    except Exception as err:
        current_app.logger.error(
            'Cannot extract phonetic blocks for record %d: %s',
            record.get('control_number'), err)
        return

    for author in record.get('authors', []):
        if author['full_name'] in signature_blocks and signature_blocks[author['full_name']]:
            author['signature_block'] = signature_blocks[author['full_name']]


@before_record_insert.connect
@before_record_update.connect
[docs]def assign_uuid(sender, record, *args, **kwargs):
    """Assign a UUID to each signature of a Literature record."""
    if not is_hep(record):
        return

    authors = record.get('authors', [])

    for author in authors:
        if 'uuid' not in author:
            author['uuid'] = str(uuid.uuid4())


@after_record_update.connect
@time_execution
[docs]def push_to_orcid(sender, record, *args, **kwargs):
    """If needed, queue the push of the new changes to ORCID."""
    if not current_app.config['FEATURE_FLAG_ENABLE_ORCID_PUSH']:
        LOGGER.warning('ORCID push feature flag not enabled')
        return

    if not is_hep(record):
        return

    # Ensure there is a control number. This is not always the case because of broken store_record.
    if 'control_number' not in record:
        return

    orcids = get_orcids_for_push(record)
    orcids_and_tokens = get_push_access_tokens(orcids)

    kwargs_to_pusher = dict(record_db_version=record.model.version_id)

    for orcid, access_token in orcids_and_tokens:
        orcid_tasks.orcid_push.apply_async(
            queue='orcid_push',
            kwargs={
                'orcid': orcid,
                'rec_id': record['control_number'],
                'oauth_token': access_token,
                'kwargs_to_pusher': kwargs_to_pusher,
            },
        )


@after_record_update.connect
[docs]def enhance_record(sender, record, *args, **kwargs):
    """Enhance the record for ES"""
    if not isinstance(record, InspireRecord):
        raise MissingInspireRecordError("Record is not InspireRecord!")
    enhanced_record = deepcopy(record)
    enhance_before_index(enhanced_record)
    record.model._enhanced_record = enhanced_record


@models_committed.connect
[docs]def index_after_commit(sender, changes):
    """Index a record in ES after it was committed to the DB.

    This cannot happen in an ``after_record_commit`` receiver from Invenio-Records
    because, despite the name, at that point we are not yet sure whether the record
    has been really committed to the DB.
    """
    indexer = RecordIndexer()
    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update') and not model_instance.json.get("deleted"):
                if hasattr(model_instance, '_enhanced_record'):
                    record = model_instance._enhanced_record
                else:
                    record = model_instance.json
                indexer.index(InspireRecord(record, model_instance))
            else:
                try:
                    indexer.delete(InspireRecord(
                        model_instance.json, model_instance))
                except NotFoundError:
                    # Record not found in ES
                    LOGGER.debug('Record %s not found in ES',
                                 model_instance.json.get("id"))
                    pass

            pid_type = get_pid_type_from_schema(model_instance.json['$schema'])
            pid_value = model_instance.json['control_number']
            db_version = model_instance.version_id

            index_modified_citations_from_record.delay(pid_type, pid_value, db_version)


[docs]def enhance_before_index(record):
    """Run all the receivers that enhance the record for ES in the right order.

    .. note::

       ``populate_recid_from_ref`` **MUST** come before ``populate_bookautocomplete``
       because the latter puts a JSON reference in a completion _source, which
       would be expanded to an incorrect ``_source_recid`` by the former.

    """
    populate_recid_from_ref(record)

    if is_hep(record):
        populate_abstract_source_suggest(record)
        populate_earliest_date(record)
        populate_author_count(record)
        populate_authors_full_name_unicode_normalized(record)
        populate_inspire_document_type(record)
        populate_name_variations(record)
        populate_number_of_references(record)
        populate_citations_count(record)
        populate_facet_author_name(record)

        if is_book(record):
            populate_bookautocomplete(record)

    elif is_author(record):
        populate_authors_name_variations(record)
        populate_author_suggest(record)

    elif is_institution(record):
        populate_affiliation_suggest(record)

    elif is_experiment(record):
        populate_experiment_suggest(record)

    elif is_journal(record):
        populate_title_suggest(record)

    elif is_data(record):
        populate_citations_count(record)
Source code for inspirehep.modules.records.receivers

INSPIRE-HEP

Navigation

Related Topics