Created
October 19, 2012 13:53
-
-
Save jamesoutterside/3918345 to your computer and use it in GitHub Desktop.
Script to export DLMs oai resources to an LR node
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to export DLMs oai resources to an LR node. | |
import urllib2 | |
import json | |
import settings | |
from oaipmh.client import Client | |
from oaipmh.metadata import MetadataRegistry, oai_dc_reader | |
DEBUG = True | |
def convert_to_envelope(doc, rawMetadata): | |
#add code here to create the document from the Dublin Core metadata | |
doc = { | |
"doc_type": 'resource_data', | |
"doc_version": "0.23.0", | |
"active": True, | |
"resource_data_type": "metadata", | |
"identity":{ | |
"submitter_type": "user", | |
"submitter": "Newcastle University", | |
"curator": "Newcastle University", | |
"owner": "Newcastle University", | |
}, | |
"TOS": { | |
"submission_TOS": "http://www.learningregistry.org/tos/cc0/v0-5/" | |
}, | |
"resource_locator": 'location', | |
"keys": ["DC",], | |
"payload_placement": "inline", | |
"payload_schema": ["oai_dc"], | |
"resource_data": rawMetadata, | |
"publishing_node": 'local', | |
} | |
return doc | |
def acquire_and_publish_documents(oai_url, publish_url, reader, prefix): | |
registry = MetadataRegistry() | |
registry.registerReader(prefix, reader) | |
client = Client(oai_url, registry) | |
documents = [] | |
count = 0 | |
for record in client.listRecords(metadataPrefix=prefix): | |
header = record[0] | |
metadata = record[1] | |
rawMetadata = urllib2.urlopen("{0}?verb=GetRecord&metadataPrefix={1}&identifier={2}".format(oai_url,prefix,header.identifier())).read() | |
value = convert_to_envelope(metadata,rawMetadata) | |
print(dir(header)) | |
if value != None: | |
documents.append(value) | |
count += 1 | |
if count % 10 == 0: | |
publish_documents(publish_url,documents) | |
documents = [] | |
publish_documents(publish_url,documents) | |
def publish_documents(publish_url,documents): | |
data = {'documents':documents} | |
headers = {"Content-Type":"application/json"} | |
req = urllib2.Request(publish_url, json.dumps(data),headers) | |
with open("output.log","a") as f: | |
f.write(urllib2.urlopen(req).read()) | |
def main(): | |
if DEBUG: | |
publish_url = 'http://sandbox.learningregistry.org/publish' | |
else: | |
publish_url = 'http://alpha.mimas.ac.uk/publish' | |
oai_url = 'https://learning-maps.ncl.ac.uk/bangor/resources/export/oai/' | |
acquire_and_publish_documents(oai_url,publish_url,oai_dc_reader,'oai_dc') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment