Source code for ckanext.oaipmh.ida
import json
from lxml import etree
from ckanext.oaipmh.harvester import OAIPMHHarvester
from ckanext.oaipmh.oai_dc_reader import dc_metadata_reader
[docs]class IdaHarvester(OAIPMHHarvester):
'''
OAI-PMH Harvester
'''
md_format = "oai_dc"
[docs] def info(self):
''' See :meth:`ckanext.harvest.harvesters.base.HarvesterBase.info`. '''
return {
'name': 'ida',
'title': 'OAI-PMH IDA',
'description': 'Harvests OAI-PMH IDA providers'
}
[docs] def gather_stage(self, harvest_job):
""" See :meth:`OAIPMHHarvester.gather_stage` """
config = self._get_configuration(harvest_job)
if not config.get('type'):
config['type'] = 'ida'
harvest_job.source.config = json.dumps(config)
harvest_job.source.save()
return super(IdaHarvester, self).gather_stage(harvest_job)
[docs] def parse_xml(self, f, context, orig_url=None, strict=True):
metadata = dc_metadata_reader('ida')(etree.fromstring(f))
return metadata['unified']