Source code for inspirehep.modules.workflows.tasks.beard

# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Set of workflow tasks for beard API."""

from __future__ import absolute_import, division, print_function

import requests
from flask import current_app

from inspire_utils.record import get_value
from inspirehep.modules.workflows.utils import json_api_request

from ..utils import with_debug_logging


[docs]def get_beard_url(): """Return the BEARD URL endpoint, if any.""" base_url = current_app.config.get('BEARD_API_URL') if not base_url: return return '{base_url}/predictor/coreness'.format(base_url=base_url)
[docs]def prepare_payload(record): """Prepare payload to send to Beard API.""" payload = dict(title="", abstract="", categories=[]) titles = filter(None, get_value(record, "titles.title", [])) # FIXME May have to normalize categories in the future arxiv_categories = map( lambda x: x[0], filter(None, get_value(record, "arxiv_eprints.categories", [])) ) if titles: payload['title'] = titles[0] abstracts = filter(None, get_value(record, "abstracts.value", [])) if abstracts: payload['abstract'] = abstracts[0] if arxiv_categories: payload['categories'] = arxiv_categories return payload
@with_debug_logging
[docs]def guess_coreness(obj, eng): """Workflow task to ask Beard API for a coreness assessment.""" predictor_url = get_beard_url() if not predictor_url: return # FIXME: Have option to select different prediction models when # available in the API payload = prepare_payload(obj.data) try: results = json_api_request(predictor_url, payload) except requests.exceptions.RequestException: results = {} if results: scores = results.get('scores') or [] max_score = max(scores) decision = results.get('decision') scores = { "CORE": scores[0], "Non-CORE": scores[1], "Rejected": scores[2], } # Generate a normalized relevance_score useful for sorting # We assume a CORE paper to have the highest relevance so we add a # significant value to seperate it from Non-Core and Rejected. # Normally scores range from -2 / +2 so 10 is significant. # Non-CORE scores are untouched, while Rejected is substracted -10. # Finally this provides one normalized score of relevance across # all categories of papers. relevance_score = max_score if decision == "CORE": relevance_score += 10 elif decision == "Rejected": relevance_score = (max_score * -1) - 10 # FIXME: Add top_words info when available from the API obj.extra_data["relevance_prediction"] = dict( max_score=max_score, decision=decision, scores=scores, relevance_score=relevance_score ) current_app.logger.info("Prediction results: {0}".format( obj.extra_data["relevance_prediction"]) )