Source code for ckanext.oaipmh.cmdi
import httplib
import json
import logging
import urllib2
from lxml import etree
import oaipmh
from ckanext.kata.utils import get_package_id_by_pid
from ckanext.oaipmh import importformats
from ckanext.oaipmh.cmdi_reader import CmdiReader
from ckanext.oaipmh.harvester import OAIPMHHarvester
log = logging.getLogger(__name__)
[docs]class CMDIHarvester(OAIPMHHarvester):
md_format = 'cmdi0571'
client = None # used for testing
[docs] def info(self):
''' See ;meth:`ckanext.harvest.harvesters.base.HarvesterBase.info`. '''
return {
'name': 'cmdi',
'title': 'CMDI',
'description': 'Harvests CMDI dataset'
}
[docs] def on_deleted(self, harvest_object, header):
""" See :meth:`OAIPMHHarvester.on_deleted`
Mark package for deletion.
"""
package_id = get_package_id_by_pid(header.identifier(), 'metadata')
if package_id:
harvest_object.package_id = package_id
harvest_object.content = None
harvest_object.report_status = "deleted"
harvest_object.save()
return True
[docs] def gather_stage(self, harvest_job):
""" See :meth:`OAIPMHHarvester.gather_stage` """
config = self._get_configuration(harvest_job)
if not config.get('type'):
config['type'] = 'cmdi'
harvest_job.source.config = json.dumps(config)
harvest_job.source.save()
registry = self.metadata_registry(config, harvest_job)
client = self.client or oaipmh.client.Client(harvest_job.source.url, registry)
return self.populate_harvest_job(harvest_job, None, config, client)
[docs] def parse_xml(self, f, context, orig_url=None, strict=True):
return CmdiReader().read_data(etree.fromstring(f))