# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.
"""HAL utils."""
from __future__ import absolute_import, division, print_function
from itertools import chain
from elasticsearch import RequestError
from flask import current_app
from six import text_type
from inspire_dojson.utils import get_recid_from_ref
from inspire_schemas.builders.literature import is_citeable
from inspire_utils.helpers import force_list
from inspire_utils.name import ParsedName
from inspire_utils.record import get_value
from inspirehep.modules.records.json_ref_loader import replace_refs
from inspirehep.utils.record_getter import get_es_records
[docs]def get_authors(record):
"""Return the authors of a record.
Queries the Institution records linked from the authors affiliations
to add, whenever it exists, the HAL identifier of the institution to
the affiliation.
Args:
record(InspireRecord): a record.
Returns:
list(dict): the authors of the record.
Examples:
>>> record = {
... 'authors': [
... 'affiliations': [
... {
... 'record': {
... '$ref': 'http://localhost:5000/api/institutions/902725',
... }
... },
... ],
... ],
... }
>>> authors = get_authors(record)
>>> authors[0]['hal_id']
'300037'
"""
hal_id_map = _get_hal_id_map(record)
result = []
for author in record.get('authors', []):
affiliations = []
parsed_name = ParsedName.loads(author['full_name'])
first_name, last_name = parsed_name.first, parsed_name.last
for affiliation in author.get('affiliations', []):
recid = get_recid_from_ref(affiliation.get('record'))
if recid in hal_id_map and hal_id_map[recid]:
affiliations.append({'hal_id': hal_id_map[recid]})
result.append({
'affiliations': affiliations,
'first_name': first_name,
'last_name': last_name,
})
return result
[docs]def get_conference_city(record):
"""Return the first city of a Conference record.
Args:
record(InspireRecord): a Conference record.
Returns:
string: the first city of the Conference record.
Examples:
>>> record = {'address': [{'cities': ['Tokyo']}]}
>>> get_conference_city(record)
'Tokyo'
"""
return get_value(record, 'address[0].cities[0]', default='')
[docs]def get_conference_country(record):
"""Return the first country of a Conference record.
Args:
record(InspireRecord): a Conference record.
Returns:
string: the first country of the Conference record.
Examples:
>>> record = {'address': [{'country_code': 'JP'}]}
>>> get_conference_country(record)
'jp'
"""
return get_value(record, 'address.country_code[0]', default='').lower()
[docs]def get_conference_end_date(record):
"""Return the closing date of a conference record.
Args:
record(InspireRecord): a Conference record.
Returns:
string: the closing date of the Conference record.
Examples:
>>> record = {'closing_date': '1999-11-19'}
>>> get_conference_end_date(record)
'1999-11-19'
"""
return record.get('closing_date', '')
[docs]def get_conference_record(record, default=None):
"""Return the first Conference record associated with a record.
Queries the database to fetch the first Conference record referenced
in the ``publication_info`` of the record.
Args:
record(InspireRecord): a record.
default: value to be returned if no conference record present/found
Returns:
InspireRecord: the first Conference record associated with the record.
Examples:
>>> record = {
... 'publication_info': [
... {
... 'conference_record': {
... '$ref': '/api/conferences/972464',
... },
... },
... ],
... }
>>> conference_record = get_conference_record(record)
>>> conference_record['control_number']
972464
"""
replaced = replace_refs(get_value(record, 'publication_info.conference_record[0]'), 'db')
if replaced:
return replaced
else:
return default
[docs]def get_conference_start_date(record):
"""Return the opening date of a conference record.
Args:
record(InspireRecord): a Conference record.
Returns:
string: the opening date of the Conference record.
Examples:
>>> record = {'opening_date': '1999-11-16'}
>>> get_conference__start_date(record)
'1999-11-16'
"""
return record.get('opening_date', '')
[docs]def get_conference_title(record, default=''):
"""Return the first title of a Conference record.
Args:
record(InspireRecord): a Conference record.
Returns:
string: the first title of the Conference record.
Examples:
>>> record = {'titles': [{'title': 'Workshop on Neutrino Physics'}]}
>>> get_conference_title(record)
'Workshop on Neutrino Physics'
"""
return get_value(record, 'titles.title[0]', default=default)
[docs]def get_divulgation(record):
"""Return 1 if a record is intended for the general public, 0 otherwise.
Args:
record(InspireRecord): a record.
Returns:
int: 1 if the record is intended for the general public, 0 otherwise.
Examples:
>>> get_divulgation({'publication_type': ['introductory']})
1
"""
return 1 if 'introductory' in get_value(record, 'publication_type', []) else 0
[docs]def get_document_types(record):
"""Return all document types of a record.
Args:
record(InspireRecord): a record.
Returns:
list(str): all document types of the record.
Examples:
>>> get_document_types({'document_type': ['article']})
['article']
"""
return get_value(record, 'document_type', default=[])
[docs]def get_doi(record):
"""Return the first DOI of a record.
Args:
record(InspireRecord): a record.
Returns:
string: the first DOI of the record.
Examples:
>>> get_doi({'dois': [{'value': '10.1016/0029-5582(61)90469-2'}]})
'10.1016/0029-5582(61)90469-2'
"""
return get_value(record, 'dois.value[0]', default='')
[docs]def get_domains(record):
"""Return the HAL domains of a record.
Uses the mapping in the configuration to convert all INSPIRE categories
to the corresponding HAL domains.
Args:
record(InspireRecord): a record.
Returns:
list(str): the HAL domains of the record.
Examples:
>>> record = {'inspire_categories': [{'term': 'Experiment-HEP'}]}
>>> get_domains(record)
['phys.hexp']
"""
terms = get_value(record, 'inspire_categories.term', default=[])
mapping = current_app.config['HAL_DOMAIN_MAPPING']
return [mapping[term] for term in terms]
[docs]def get_inspire_id(record):
"""Return the INSPIRE id of a record.
Args:
record(InspireRecord): a record.
Returns:
int: the INSPIRE id of the record.
Examples:
>>> get_inspire_id({'control_number': 1507156})
1507156
"""
return record['control_number']
[docs]def get_journal_issue(record):
"""Return the issue of the journal a record was published into.
Args:
record(InspireRecord): a record.
Returns:
string: the issue of the journal the record was published into.
Examples:
>>> record = {
... 'publication_info': [
... {'journal_issue': '5'},
... ],
... }
>>> get_journal_issue(record)
'5'
"""
return get_value(record, 'publication_info.journal_issue[0]', default='')
[docs]def get_journal_title(record):
"""Return the title of the journal a record was published into.
Args:
record(InspireRecord): a record.
Returns:
string: the title of the journal the record was published into.
Examples:
>>> record = {
... 'publication_info': [
... {'journal_title': 'Phys.Part.Nucl.Lett.'},
... ],
... }
>>> get_journal_title(record)
'Phys.Part.Nucl.Lett.'
"""
return get_value(record, 'publication_info.journal_title[0]', default='')
[docs]def get_journal_volume(record):
"""Return the volume of the journal a record was published into.
Args:
record(InspireRecord): a record.
Returns:
string: the volume of the journal the record was published into.
Examples:
>>> record = {
... 'publication_info': [
... {'journal_volume': 'D94'},
... ],
... }
>>> get_journal_volume(record)
'D94'
"""
return get_value(record, 'publication_info.journal_volume[0]', default='')
[docs]def get_language(record):
"""Return the first language of a record.
If it is not specified in the record we assume that the language
is English, so we return ``'en'``.
Args:
record(InspireRecord): a record.
Returns:
string: the first language of the record.
Examples:
>>> get_language({'languages': ['it']})
'it'
"""
languages = get_value(record, 'languages', default=[])
if not languages:
return 'en'
return languages[0]
[docs]def get_page_artid_for_publication_info(publication_info, separator):
"""Return the page range or the article id of a publication_info entry.
Args:
publication_info(dict): a publication_info field entry of a record
separator(basestring): optional page range symbol, defaults to a single dash
Returns:
string: the page range or the article id of the record.
Examples:
>>> publication_info = {'artid': '054021'}
>>> get_page_artid(publication_info)
'054021'
"""
if 'artid' in publication_info:
artid = publication_info['artid']
return artid
elif 'page_start' in publication_info and 'page_end' in publication_info:
page_start = publication_info['page_start']
page_end = publication_info['page_end']
return text_type('{}{}{}').format(page_start, text_type(separator), page_end)
return ''
[docs]def get_page_artid(record, separator='-'):
"""Return the page range or the article id of a record.
Args:
record(InspireRecord): a record
separator(basestring): optional page range symbol, defaults to a single dash
Returns:
string: the page range or the article id of the record.
Examples:
>>> record = {
... 'publication_info': [
... {'artid': '054021'},
... ],
... }
>>> get_page_artid(record)
'054021'
"""
publication_info = get_value(record, 'publication_info[0]', default={})
return get_page_artid_for_publication_info(publication_info, separator)
[docs]def get_peer_reviewed(record):
"""Return 1 if a record is peer reviewed, 0 otherwise.
Args:
record(InspireRecord): a record.
Returns:
int: 1 if the record is peer reviewed, 0 otherwise.
Examples:
>>> get_peer_reviewed({'refereed': True})
1
"""
return 1 if 'refereed' in record and record['refereed'] else 0
[docs]def get_publication_date(record):
"""Return the date in which a record was published.
Args:
record(InspireRecord): a record.
Returns:
string: the date in which the record was published.
Examples:
>>> get_publication_date({'publication_info': [{'year': 2017}]})
'2017'
"""
return str(get_value(record, 'publication_info.year[0]', default=''))
[docs]def is_published(record):
"""Return if a record is published.
We say that a record is published if it is citeable, which means that
it has enough information in a ``publication_info``, or if we know its
DOI and a ``journal_title``, which means it is in press.
Args:
record(InspireRecord): a record.
Returns:
bool: whether the record is published.
Examples:
>>> record = {
... 'dois': [
... {'value': '10.1016/0029-5582(61)90469-2'},
... ],
... 'publication_info': [
... {'journal_title': 'Nucl.Phys.'},
... ],
... }
>>> is_published(record)
True
"""
citeable = 'publication_info' in record and is_citeable(record['publication_info'])
submitted = 'dois' in record and any(
'journal_title' in el for el in force_list(record.get('publication_info')))
return citeable or submitted
def _get_hal_id_map(record):
affiliation_records = chain.from_iterable(get_value(
record, 'authors.affiliations.record', default=[]))
affiliation_recids = [get_recid_from_ref(el) for el in affiliation_records]
try:
institutions = get_es_records('ins', affiliation_recids)
except RequestError:
institutions = []
return {el['control_number']: _get_hal_id(el) for el in institutions}
def _get_hal_id(record):
for el in record.get('external_system_identifiers', []):
if el.get('schema') == 'HAL':
return el['value']