Source code for inspirehep.modules.records.serializers.fields_export

# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

from __future__ import absolute_import, division, print_function

import re

from babel import Locale
from isbn import ISBNError
from six import text_type

from idutils import is_arxiv_post_2007, normalize_isbn

from inspire_utils.date import PartialDate
from inspire_utils.record import get_value
from inspirehep.modules.hal.utils import (
    get_page_artid_for_publication_info,
    get_conference_record,
    get_conference_title,
)

from .config import COMMON_FIELDS_FOR_ENTRIES, FIELDS_FOR_ENTRY_TYPE


[docs]def make_extractor():
    """Create a function store decorator.

    Creates a decorator function that is used to collect extractor functions.
    They are put in a dictionary with the field they extract as keys.
    An extractor function is a function which returns a BibTeX field value given
    an inspire record and a document type.

    Returns:
        function: a decorator with a store for pre-processing/extracting functions.
    """
    store = {}

    def extractor(field):
        def decorator(decorated_function):
            store[field] = decorated_function
            return decorated_function
        return decorator
    extractor.store = store
    return extractor


[docs]def bibtex_document_type(doc_type, obj):
    """Return the BibTeX entry type.

    Maps the INSPIRE ``document_type`` to a BibTeX entry type. Also checks
    ``thesis_info.degree_type`` in case it's a thesis, as it stores the
    information on which kind of thesis we're dealing with.

    Args:
        doc_type (text_type): INSPIRE document type.
        obj (dict): literature record.

    Returns:
        text_type: bibtex document type for the given INSPIRE entry.
    """
    DOCUMENT_TYPE_MAP = {
        'article': 'article',
        'book': 'book',
        'book chapter': 'inbook',
        'conference paper': 'inproceedings',
        'proceedings': 'proceedings',
        'report': 'techreport',
        'note': 'article',
        # theses handled separately due to masters/phd distinction
    }
    if doc_type in DOCUMENT_TYPE_MAP:
        return DOCUMENT_TYPE_MAP[doc_type]
    # Theses need special treatment, because bibtex differentiates between their types:
    elif doc_type == 'thesis' and get_value(obj, 'thesis_info.degree_type') in ('phd', 'habilitation'):
        return 'phdthesis'
    # Other types of theses (other, bachelor, laurea) don't have separate types in bibtex:
    # We will use the type field (see `get_type`) to indicate the type of diploma.
    elif doc_type == 'thesis':
        return 'mastersthesis'
    return 'misc'


[docs]def bibtex_type_and_fields(data):
    """Return a BibTeX doc type and fields needed to be included in a BibTeX record.

    Args:
        data (dict): inspire record

    Returns:
         tuple: bibtex document type and fields
    """
    # TODO: Establish a better method with which we choose the bibtex type if there is more that one inspire doc type
    bibtex_doc_types = [bibtex_document_type(doc_type, data) for doc_type in data['document_type']] + ['misc']
    # Preference towards article, as it's more prestigious to have sth published:
    chosen_type = 'article' if 'article' in bibtex_doc_types else bibtex_doc_types[0]
    return chosen_type, FIELDS_FOR_ENTRY_TYPE[chosen_type] + COMMON_FIELDS_FOR_ENTRIES


[docs]def get_authors_with_role(authors, role):
    """Extract names of people from an authors field given their roles.

    Args:
        authors: authors field of the record.
        role: string specifying the role 'author', 'editor', etc.

    Returns:
        list of text_type: of names of people
    """
    return [author['full_name'] for author in authors if role in author.get('inspire_roles', ['author'])]


[docs]def get_country_name_by_code(code, default=None):
    """Return a country name string from a country code.

    Args:
        code (str): country code in INSPIRE 2 letter format based on ISO 3166-1 alpha-2
        default: value to be returned if no country of a given code exists

    Returns:
        text_type: name of a country, or ``default`` if no such country.
    """
    try:
        return Locale('en').territories[code]
    except KeyError:
        return default


[docs]def get_best_publication_info(data):
    """Return the most comprehensive publication_info entry.

    Args:
        data (dict): inspire record

    Returns:
        dict: a publication_info entry or default if not found any
    """
    publication_info = get_value(data, 'publication_info', [])
    only_publications = [entry for entry in publication_info if entry.get('material', 'publication') == 'publication']
    if not only_publications:
        return {}

    return sorted(only_publications, key=len, reverse=True)[0]


[docs]def get_date(data, doc_type):
    """Return a publication/thesis/imprint date.

    Args:
      data (dict): INSPIRE literature record to be serialized
      doc_type (text_type): BibTeX document type, as reported by `bibtex_document_type`

    Returns:
        PartialDate: publication date for a record.
    """
    publication_year = get_best_publication_info(data).get('year')
    thesis_date = get_value(data, 'thesis_info.date')
    imprint_date = get_value(data, 'imprints.date[0]')

    if doc_type.endswith('thesis'):
        date_choice = thesis_date or publication_year or imprint_date
    else:
        date_choice = publication_year or thesis_date or imprint_date

    if date_choice:
        return PartialDate.loads(str(date_choice))


# Functions below describe where the non-obvious data is located:
#
# Args:
#   data (dict): JSON literature record to be serialized
#   doc_type (text_type): BibTeX document type, as reported by `bibtex_document_type`

extractor = make_extractor()


@extractor('author')
[docs]def get_author(data, doc_type):
    """Get corporate author of a record.

    Note:
        Only used to generate author field if corporate_author is the author.
    """
    if 'corporate_author' in data:
        return ' and '.join('{{{}}}'.format(author) for author in data['corporate_author'])


@extractor('journal')
[docs]def get_journal(data, doc_type):
    return get_best_publication_info(data).get('journal_title')


@extractor('volume')
[docs]def get_volume(data, doc_type):
    publication_volume = get_best_publication_info(data).get('journal_volume')
    bookseries_volume = get_value(data, 'book_series.volume[0]')
    return publication_volume or bookseries_volume


@extractor('year')
[docs]def get_year(data, doc_type):
    date = get_date(data, doc_type)
    if date:
        return date.year


@extractor('month')
[docs]def get_month(data, doc_type):
    date = get_date(data, doc_type)
    if date:
        return date.month


@extractor('number')
[docs]def get_number(data, doc_type):
    return get_best_publication_info(data).get('journal_issue')


@extractor('pages')
[docs]def get_pages(data, doc_type):
    return get_page_artid_for_publication_info(get_best_publication_info(data), '--')


@extractor('primaryClass')
[docs]def get_primary_class(data, doc_type):
    eprint = get_value(data, 'arxiv_eprints.value[0]')
    if eprint and is_arxiv_post_2007(eprint):
        return get_value(data, 'arxiv_eprints[0].categories[0]')


@extractor('eprint')
[docs]def get_eprint(data, doc_type):
    return get_value(data, 'arxiv_eprints.value[0]')


@extractor('archivePrefix')
[docs]def get_arxiv_prefix(data, doc_type):
    if get_eprint(data, doc_type):
        return "arXiv"


@extractor('school')
[docs]def get_school(data, doc_type):
    schools = [school['name'] for school in get_value(data, 'thesis_info.institutions', [])]
    if schools:
        return ', '.join(schools)


@extractor('address')
[docs]def get_address(data, doc_type):
    conference = get_conference_record(data, default={})
    pubinfo_city = get_value(conference, 'address[0].cities[0]')
    pubinfo_country_code = get_value(conference, 'address[0].country_code')

    if pubinfo_city and pubinfo_country_code:
        return pubinfo_city + ', ' + get_country_name_by_code(pubinfo_country_code, default=pubinfo_country_code)
    return get_value(data, 'imprints[0].place')


@extractor('booktitle')
[docs]def get_booktitle(data, doc_type):
    book_series_title = get_value(data, 'book_series.title[0]')
    conference_record = get_conference_record(data, default={})
    return book_series_title or get_conference_title(conference_record, default=None)


@extractor('publisher')
[docs]def get_publisher(data, doc_type):
    return get_value(data, 'imprints.publisher[0]')


@extractor('reportNumber')
[docs]def get_report_number(data, doc_type):
    if 'report_numbers' in data:
        return ', '.join(report['value'] for report in data.get('report_numbers', []))


@extractor('isbn')
[docs]def get_isbn(data, doc_type):
    def hyphenate_if_possible(no_hyphens):
        try:
            return normalize_isbn(no_hyphens)
        except ISBNError:
            return no_hyphens

    isbns = get_value(data, 'isbns.value', [])
    if isbns:
        return ', '.join(hyphenate_if_possible(isbn) for isbn in isbns)


@extractor('type')
[docs]def get_type(data, doc_type):
    degree_type = get_value(data, 'thesis_info.degree_type', 'other')
    if doc_type == 'mastersthesis' and degree_type not in ('master', 'diploma'):
        return '{} thesis'.format(degree_type.title())


@extractor('edition')
[docs]def get_edition(data, doc_type):
    return get_value(data, 'editions[0]')


@extractor('doi')
[docs]def get_doi(data, doc_type):
    return get_value(data, 'dois.value[0]')


@extractor('title')
[docs]def get_title(data, doc_type):
    return get_value(data, 'titles.title[0]')


@extractor('url')
[docs]def get_url(data, doc_type):
    return get_value(data, 'urls.value[0]')


@extractor('collaboration')
[docs]def get_collaboration(data, doc_type):
    return get_value(data, 'collaborations.value[0]')


@extractor('series')
[docs]def get_series(data, doc_type):
    return get_value(data, 'book_series.title[0]')


@extractor('note')
[docs]def get_note(data, doc_type):
    """Write and addendum/errata information to the BibTeX note field.

    Traverse publication_info looking for erratum and addendum in `publication_info.material`
    field and build a string of references to those publication entries.

    Returns:
        string: formatted list of the errata and addenda available for a given record

    """
    notices = ('erratum', 'addendum')
    entries = [entry for entry in get_value(data, 'publication_info', []) if entry.get('material') in notices]

    if not entries:
        return None

    note_strings = [
        text_type('{field}: {journal} {volume}, {pages} {year}').format(
            field=entry['material'].title(),
            journal=entry.get('journal_title'),
            volume=entry.get('journal_volume'),
            pages=get_page_artid_for_publication_info(entry, '--'),
            year='({})'.format(entry['year']) if 'year' in entry else ''
        ).strip()
        for entry in entries
    ]

    note_string = '[' + ', '.join(note_strings) + ']'
    note_string = re.sub(' +', ' ', note_string)  # Remove possible multiple spaces
    return re.sub(',,', ',', note_string)         # ... and commas
Source code for inspirehep.modules.records.serializers.fields_export

INSPIRE-HEP

Navigation

Related Topics