Created
November 22, 2017 19:05
-
-
Save lorawoodford/bbd0b4ee0f4c32ed30af039e2e903c0d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests, json, secrets, time, urllib, re | |
startTime = time.time() | |
# import secrets | |
ASbaseURL = secrets.ASbaseURL | |
ASuser = secrets.ASuser | |
ASpassword = secrets.ASpassword | |
DSbaseURL = secrets.DSbaseURL | |
# function to find key in nested dicts: see http://stackoverflow.com/questions/9807634/find-all-occurences-of-a-key-in-nested-python-dictionaries-and-lists | |
def gen_dict_extract(key, var): | |
if hasattr(var,'iteritems'): | |
for k, v in var.iteritems(): | |
if k == key: | |
yield v | |
if isinstance(v, dict): | |
for result in gen_dict_extract(key, v): | |
yield result | |
elif isinstance(v, list): | |
for d in v: | |
for result in gen_dict_extract(key, d): | |
yield result | |
# authenticate to ArchivesSpace | |
auth = requests.post(ASbaseURL + '/users/' + ASuser + '/login?password=' + ASpassword).json() | |
session = auth["session"] | |
headers = {'X-ArchivesSpace-Session':session} | |
# test for successful ArchivesSpace connection | |
def test_connection(): | |
try: | |
requests.get(ASbaseURL) | |
print 'Connected to ArchivesSpace!' | |
return True | |
except requests.exceptions.ConnectionError: | |
print 'ArchivesSpace connection error. Please confirm ArchivesSpace is running. Trying again in 10 seconds.' | |
is_connected = test_connection() | |
while not is_connected: | |
time.sleep(10) | |
is_connected = test_connection() | |
# Account for ranges in indicator_2s | |
def hyphen_range(s): | |
# Takes a range in form of "a-b" and generate a list of numbers between a and b inclusive. | |
# Also accepts comma separated ranges like "a-b,c-d,f" will build a list which will include | |
# Numbers from a to b, a to d and f | |
s="".join(s.split()) #removes white space | |
r=set() | |
for x in s.split(','): | |
t=x.split('-') | |
if len(t) not in [1,2]: raise SyntaxError("hash_range is given its arguement as "+s+" which seems not correctly formated.") | |
r.add(int(t[0])) if len(t)==1 else r.update(set(range(int(t[0]),int(t[1])+1))) | |
l=list(r) | |
l.sort() | |
return l | |
# Get all AOs from the resource record | |
resourceID = '1045' # raw_input('Enter resource ID: ') | |
ASendpoint = '/repositories/3/resources/'+resourceID+'/tree' | |
output = requests.get(ASbaseURL + ASendpoint, headers=headers).json() | |
archivalObjects = [] | |
for value in gen_dict_extract('record_uri', output): | |
if 'archival_objects' in value: | |
archivalObjects.append(value) | |
print 'Found ' + str(len(archivalObjects)-1) + ' archival objects attached to resource ' + resourceID +'.' | |
# Get Dspace item list | |
handle = '1774.2/41445'#raw_input('Enter handle: ') | |
DSendpoint = DSbaseURL + 'rest/handle/' + handle | |
if DSendpoint != '': | |
print 'Connected to DSpace!' | |
else: | |
print 'DSpace connection error. Please confirm DSpace is running.' | |
collection = requests.get(DSendpoint).json() | |
collectionID = collection['id'] | |
DSendpoint = DSbaseURL + 'rest/collections/' + str(collectionID)+ '/items?limit=20' | |
itemList = requests.get(DSendpoint).json() | |
print 'Found ' + str(len(itemList)) + ' DSpace items attached to collection.' | |
for item in itemList: | |
match = {} | |
DSitems = {} | |
itemHandle = item['handle'] | |
itemID = str(item['link']) | |
bitstreams = requests.get(DSbaseURL+itemID+'/bitstreams').json() | |
for bitstream in bitstreams: | |
fileName = bitstream['name'] | |
strippedFileName = fileName.replace('.pdf','') | |
DSitems['strippedFileName'] = strippedFileName | |
for archivalObject in archivalObjects: | |
output = requests.get(ASbaseURL + archivalObject, headers=headers).json() | |
for instance in output['instances']: | |
if match == {}: | |
indicator_1 = instance['container']['indicator_1'] | |
if indicator_1.startswith('1-'): | |
try: | |
indicator_2 = instance['container']['indicator_2'] | |
if '-' in indicator_2: | |
indicator_2s = hyphen_range(indicator_2) | |
print indicator_2s | |
for i in range(len(indicator_2s)): | |
print i | |
print hyphen_range(indicator_2)[i] | |
print 'Constructing potential file name from archival object.' | |
indicator_2 = 'test' | |
indicator_1 = instance['container']['indicator_1'] | |
indicator_1 = indicator_1.split('-') | |
indicator_1a = indicator_1[0] | |
indicator_1a = indicator_1a.rjust(2,'0') | |
indicator_1b = re.sub('[a-z]', '', indicator_1[1]) | |
indicator_1b = indicator_1b.rjust(2,'0') | |
try: | |
indicator_3 = instance['container']['indicator_3'] | |
indicator_3 = '_' + indicator_3.rjust(2,'0') | |
except: | |
indicator_3 = '' | |
potentialFilename = indicator_1a + '_' + indicator_1b + indicator_2 + indicator_3 | |
print 'Comparing ' + potentialFilename + ' to ' + DSitems['strippedFileName'] | |
if potentialFilename == DSitems['strippedFileName']: | |
print 'Creating JSON for match between ' + potentialFilename + ' and ' + strippedFileName + '.' | |
match['digital_object_id'] = DSbaseURL + itemHandle | |
match['title'] = output['title'] + '(digital copy)' | |
match['file_versions'] = [{'file_uri': DSbaseURL + itemHandle}] | |
print match | |
indicator_2 = instance['container']['indicator_2'] | |
else: | |
print 'Constructing potential file name from archival object.' | |
indicator_1 = indicator_1.split('-') | |
indicator_1a = indicator_1[0] | |
indicator_1a = indicator_1a.rjust(2,'0') | |
indicator_1b = re.sub('[a-z]', '', indicator_1[1]) | |
indicator_1b = indicator_1b.rjust(2,'0') | |
indicator_2 = '_' + indicator_2.rjust(2,'0') | |
try: | |
indicator_3 = instance['container']['indicator_3'] | |
indicator_3 = '_' + indicator_3.rjust(2,'0') | |
except: | |
indicator_3 = '' | |
potentialFilename = indicator_1a + '_' + indicator_1b + indicator_2 + indicator_3 | |
print 'Comparing ' + potentialFilename + ' to ' + DSitems['strippedFileName'] | |
if potentialFilename == DSitems['strippedFileName']: | |
print 'Creating JSON for match between ' + potentialFilename + ' and ' + strippedFileName + '.' | |
match['digital_object_id'] = DSbaseURL + itemHandle | |
match['title'] = output['title'] + '(digital copy)' | |
match['file_versions'] = [{'file_uri': DSbaseURL + itemHandle}] | |
print match | |
break | |
except: | |
indicator_2 = '' | |
else: | |
continue | |
break | |
# show script runtime | |
elapsedTime = time.time() - startTime | |
m, s = divmod(elapsedTime, 60) | |
h, m = divmod(m, 60) | |
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment