Source code for inspirehep.modules.migrator.utils
# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE.
# Copyright (C) 2014-2018 CERN.
#
# INSPIRE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# INSPIRE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.
"""Migrator utils."""
from __future__ import absolute_import, division, print_function
from dojson.contrib.marc21.utils import create_record
from inspire_utils.helpers import force_list
REAL_COLLECTIONS = (
'INSTITUTION',
'EXPERIMENT',
'JOURNALS',
'JOURNALSNEW',
'HEPNAMES',
'HEP',
'JOB',
'JOBHIDDEN',
'CONFERENCES',
'DATA',
)
[docs]def get_collection(marc_record):
collections = set()
for field in force_list(marc_record.get('980__')):
for v in field.values():
for e in force_list(v):
collections.add(e.upper().strip())
if 'DELETED' in collections:
return 'DELETED'
for collection in collections:
if collection in REAL_COLLECTIONS:
return collection
return 'HEP'
[docs]def get_collection_from_marcxml(marcxml):
marc_record = create_record(marcxml, keep_singletons=False)
return get_collection(marc_record)