Source code for inspirehep.modules.authors.rest.citations
# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2017 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.
from __future__ import absolute_import, division, print_function
import json
from elasticsearch_dsl import Q
from inspirehep.modules.search import LiteratureSearch
[docs]class AuthorAPICitations(object):
"""API endpoint for author collection returning citations."""
[docs] def serialize(self, pid, record, links_factory=None):
"""Return a list of citations for a given author recid.
:param pid:
Persistent identifier instance.
:param record:
Record instance.
:param links_factory:
Factory function for the link generation, which are added to
the response.
"""
author_pid = pid.pid_value
citations = {}
query = Q('match', authors__recid=author_pid)
search = LiteratureSearch().query('nested', path='authors', query=query)\
.params(_source=[
'authors.recid',
'control_number',
'self',
])
# For each publication co-authored by a given author...
for result in search.scan():
result_source = result.to_dict()
recid = result_source['control_number']
authors = set([i['recid'] for i in result_source['authors']])
citations[recid] = {}
nested_search = LiteratureSearch().query({
"match": {
"references.recid": recid
}
}).params(
_source=[
"authors.recid",
"collections",
"control_number",
"earliest_date",
"self",
]
)
# The source record that is being cited.
citations[recid]['citee'] = dict(
id=recid,
record=result_source['self'],
)
citations[recid]['citers'] = []
# Check all publications, which cite the parent record.
for nested_result in nested_search.scan():
nested_result_source = nested_result.to_dict()
# Not every signature has a recid (at least for demo records).
try:
nested_authors = set(
[i['recid'] for i in nested_result_source['authors']]
)
except KeyError:
nested_authors = set()
citation = dict(
citer=dict(
id=int(nested_result_source['control_number']),
record=nested_result_source['self']
),
# If at least one author is shared, it's a self-citation.
self_citation=len(authors & nested_authors) > 0,
)
# Get the earliest date of a citer.
try:
citation['date'] = nested_result_source['earliest_date']
except KeyError:
pass
# Get status if a citer is published.
# FIXME: As discussed with Sam, we should have a boolean flag
# for this type of information.
try:
citation['published_paper'] = "Published" in [
i['primary'] for i in nested_result_source[
'collections']]
except KeyError:
citation['published_paper'] = False
citations[recid]['citers'].append(citation)
return json.dumps(citations.values())