Source code for ckanext.oaipmh.importformats
# coding: utf-8
# vi:et:ts=8:
import logging
import oaipmh.common as oc
import oaipmh.metadata as om
import lxml.etree
from fn.uniform import range
from ckanext.oaipmh.cmdi_reader import CmdiReader
from ckanext.oaipmh.oai_dc_reader import dc_metadata_reader
import importcore
xml_reader = importcore.generic_xml_metadata_reader
rdf_reader = importcore.generic_rdf_metadata_reader
log = logging.getLogger(__name__)
[docs]def ExceptReturn(exception, returns):
def decorator(f):
def call(*args, **kwargs):
try:
log.debug('call()')
return f(*args, **kwargs)
except exception as e:
log.error('Exception occurred: %s' % e)
return returns
log.debug('decorator()')
return call
log.debug('ExceptReturn()')
return decorator
[docs]def copy_element(source, dest, md, callback=None):
'''Copy element in metadata dictionary from one key to another
This function changes the metadata dictionary, md, by copying the
value corresponding to key source to the value corresponding to
the key dest. It also copies all elements if it is an indexed
element, and language information that pertains to the copied
element. The parameter callback, if given, is called with any
element names formed (indexed or no).
:param source: key to be copied
:type source: string
:param dest: key to copy to
:type dest: string
:param md: a metadata dictionary to update
:type md: hash from string to any value (inout)
:param callback: optional callback function, called with source,
dest and their indexed versions
:type callback: function of (string, string) -> None
'''
# Check if key exists in dictionary
if source in md:
md[dest] = md[source]
copy_element(source + '/language', dest + '/language', md)
copy_element(source + '/@lang', dest + '/language', md)
copy_element(source + '/@xml:lang', dest + '/language', md)
copy_element(source + '/@rdf:resource', dest, md) # overwrites any possible element text
# Call possible callback function
if callback:
callback(source, dest, md)
return
count = md.get(source + '.count', 0)
if not count:
return
# Add {dest}.count field to md
md[dest + '.count'] = count
for i in range(count):
source_n = '%s.%d' % (source, i)
dest_n = '%s.%d' % (dest, i)
copy_element(source_n, dest_n, md, callback)
[docs]def person_attrs(source, dest, result):
'''Callback for copying person attributes'''
# TODO: here we could also fetch from ISNI/ORCID
copy_element(source + '/foaf:name', dest + '/name', result)
copy_element(source + '/foaf:mbox', dest + '/email', result)
copy_element(source + '/foaf:phone', dest + '/phone', result)
[docs]def nrd_metadata_reader(xml):
'''Read metadata in NRD schema
This function takes NRD metadata as an lxml.etree.Element object,
and returns the same metadata as a dictionary, with central TTA
elements picked to format-independent keys.
:param xml: RDF metadata as XML-encoded NRD
:type xml: lxml.etree.Element instance
:returns: a metadata dictionary
:rtype: a hash from string to any value
'''
result = rdf_reader(xml).getMap()
def document_attrs(source, dest, result):
'''Callback for copying document attributes'''
copy_element(source + '/dct:title', dest + '/title', result)
copy_element(source + '/dct:identifier', dest, result)
copy_element(source + '/dct:creator', dest + '/creator.0/name', result)
copy_element(source + '/nrd:creator', dest + '/creator', result, person_attrs)
copy_element(source + '/dct:description', dest + '/description', result)
def funding_attrs(source, dest, result):
'''Callback for copying project attributes'''
copy_element(source + '/rev:arpfo:funds.0/arpfo:grantNumber', dest + '/fundingNumber', result)
copy_element(source + '/rev:arpfo:funds.0/rev:arpfo:provides', dest + '/funder', result, person_attrs)
def file_attrs(source, dest, result):
'''Callback for copying manifestation attributes'''
copy_element(source + '/dcat:mediaType', dest + '/mimetype', result)
copy_element(source + '/fp:checksum.0/fp:checksumValue.0', dest + '/checksum.0', result)
copy_element(source + '/fp:checksum.0/fp:generator.0', dest + '/checksum.0/algorithm', result)
copy_element(source + '/dcat:byteSize', dest + '/size', result)
mapping = [
(u'dataset', u'versionidentifier', None),
(u'dataset/nrd:continuityIdentifier', u'continuityidentifier', None),
(u'dataset/rev:foaf:primaryTopic.0/nrd:metadataIdentifier', u'metadata/identifier', None),
(u'dataset/rev:foaf:primaryTopic.0/nrd:metadataModified', u'metadata/modified', None),
(u'dataset/dct:title', u'title', None),
(u'dataset/nrd:modified', u'modified', None),
(u'dataset/nrd:rights', u'rights', None),
(u'dataset/nrd:language', u'language', None),
(u'dataset/nrd:owner', u'owner', person_attrs),
(u'dataset/nrd:creator', u'creator', person_attrs),
(u'dataset/nrd:distributor', u'distributor', person_attrs),
(u'dataset/nrd:contributor', u'contributor', person_attrs),
(u'dataset/nrd:subject', u'subject', None), # fetch tags?
(u'dataset/nrd:producerProject', u'project', funding_attrs),
(u'dataset/dct:isPartOf', u'collection', document_attrs),
(u'dataset/dct:requires', u'requires', None),
(u'dataset/nrd:discipline', u'discipline', None),
(u'dataset/nrd:temporal', u'temporalcoverage', None),
(u'dataset/nrd:spatial', u'spatialcoverage', None), # names?
(u'dataset/nrd:manifestation', u'resource', file_attrs),
(u'dataset/nrd:observationMatrix', u'variables', None), # TODO
(u'dataset/nrd:usedByPublication', u'publication', document_attrs),
(u'dataset/dct:description', u'description', None),
]
for source, dest, callback in mapping:
copy_element(source, dest, result, callback)
try:
rights = lxml.etree.XML(result[u'rights'])
rightsclass = rights.attrib['RIGHTSCATEGORY'].lower()
result[u'rightsclass'] = rightsclass
if rightsclass == 'licensed':
result[u'license'] = rights[0].text
if rightsclass == 'contractual':
result[u'accessURL'] = rights[0].text
except:
pass
return oc.Metadata(result)
[docs]def create_metadata_registry(harvest_type=None, service_url=None):
'''Return new metadata registry with all common metadata readers
The readers currently implemented are for metadataPrefixes
oai_dc, nrd, rdf and xml.
:returns: metadata registry instance
:rtype: oaipmh.metadata.MetadataRegistry
'''
registry = om.MetadataRegistry()
registry.registerReader('oai_dc', dc_metadata_reader(harvest_type or 'default'))
registry.registerReader('cmdi0571', CmdiReader(service_url))
registry.registerReader('nrd', nrd_metadata_reader)
registry.registerReader('rdf', rdf_reader)
registry.registerReader('xml', xml_reader)
return registry