Source code for inspirehep.modules.workflows.tasks.beard

# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Set of workflow tasks for beard API."""

from __future__ import absolute_import, division, print_function

import requests
from flask import current_app

from inspire_utils.record import get_value
from inspirehep.modules.workflows.utils import json_api_request

from ..utils import with_debug_logging


[docs]def get_beard_url():
    """Return the BEARD URL endpoint, if any."""
    base_url = current_app.config.get('BEARD_API_URL')
    if not base_url:
        return

    return '{base_url}/predictor/coreness'.format(base_url=base_url)


[docs]def prepare_payload(record):
    """Prepare payload to send to Beard API."""
    payload = dict(title="", abstract="", categories=[])
    titles = filter(None, get_value(record, "titles.title", []))
    # FIXME May have to normalize categories in the future
    arxiv_categories = map(
        lambda x: x[0],
        filter(None, get_value(record, "arxiv_eprints.categories", []))
    )
    if titles:
        payload['title'] = titles[0]
    abstracts = filter(None, get_value(record, "abstracts.value", []))
    if abstracts:
        payload['abstract'] = abstracts[0]
    if arxiv_categories:
        payload['categories'] = arxiv_categories
    return payload


@with_debug_logging
[docs]def guess_coreness(obj, eng):
    """Workflow task to ask Beard API for a coreness assessment."""
    predictor_url = get_beard_url()
    if not predictor_url:
        return

    # FIXME: Have option to select different prediction models when
    # available in the API
    payload = prepare_payload(obj.data)

    try:
        results = json_api_request(predictor_url, payload)
    except requests.exceptions.RequestException:
        results = {}

    if results:
        scores = results.get('scores') or []
        max_score = max(scores)
        decision = results.get('decision')
        scores = {
            "CORE": scores[0],
            "Non-CORE": scores[1],
            "Rejected": scores[2],
        }
        # Generate a normalized relevance_score useful for sorting
        # We assume a CORE paper to have the highest relevance so we add a
        # significant value to seperate it from Non-Core and Rejected.
        # Normally scores range from -2 / +2 so 10 is significant.
        # Non-CORE scores are untouched, while Rejected is substracted -10.
        # Finally this provides one normalized score of relevance across
        # all categories of papers.
        relevance_score = max_score
        if decision == "CORE":
            relevance_score += 10
        elif decision == "Rejected":
            relevance_score = (max_score * -1) - 10
        # FIXME: Add top_words info when available from the API
        obj.extra_data["relevance_prediction"] = dict(
            max_score=max_score,
            decision=decision,
            scores=scores,
            relevance_score=relevance_score
        )
        current_app.logger.info("Prediction results: {0}".format(
            obj.extra_data["relevance_prediction"])
        )
Source code for inspirehep.modules.workflows.tasks.beard

INSPIRE-HEP

Navigation

Related Topics