Last active
October 6, 2016 18:59
-
-
Save lawlesst/b758c1573f6fca714c53da60860a0114 to your computer and use it in GitHub Desktop.
Lookup UTs in AMR and output to CSV.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Look up batches of UTs in InCites. | |
Run as: | |
$ python incites_batch_lookup.py sample_file.csv outputfile.csv | |
""" | |
import csv | |
import json | |
from itertools import izip_longest | |
import os | |
import sys | |
import time | |
import requests | |
INCITES_KEY = os.environ['INCITES_KEY'] | |
def grouper(iterable, n, fillvalue=None): | |
""" | |
Group iterable into n sized chunks. | |
See: http://stackoverflow.com/a/312644/758157 | |
""" | |
args = [iter(iterable)] * n | |
return izip_longest(*args, fillvalue=fillvalue) | |
def get(batch): | |
rsp = requests.get( | |
'https://api.thomsonreuters.com/incites_ps/v1/DocumentLevelMetricsByUT/json', | |
params={'X-TR-API-APP-ID': INCITES_KEY, 'UT': ",".join([b for b in batch if b is not None])} | |
) | |
if rsp.status_code != 200: | |
print>>sys.stderr, "Batch failed with", len(batch), "items." | |
print>>sys.stderr, rsp | |
#import ipdb; ipdb.set_trace() | |
return [] | |
data = [item for item in rsp.json()['api'][0]['rval']] | |
return data | |
def main(): | |
# Number of UTs to send to InCites at once | |
BATCH_SIZE = 200 | |
found = [] | |
to_check = [] | |
with open(sys.argv[1]) as infile: | |
for row in csv.DictReader(infile): | |
d = {} | |
for k, v in row.items(): | |
if k.lower().strip() == "ut": | |
to_check.append(v.strip().replace("WOS:", "")) | |
found = [] | |
with open(sys.argv[2], 'wb') as outfile: | |
writer = csv.writer(outfile) | |
first = True | |
for idx, batch in enumerate(grouper(to_check, BATCH_SIZE)): | |
print>>sys.stderr, "Processing batch", idx | |
found = get(batch) | |
for grp in found: | |
if first is True: | |
# write header | |
writer.writerow(grp.keys()) | |
first = False | |
writer.writerow(grp.values()) | |
time.sleep(.5) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Expects an incoming CSV file with ISSNs and will generate output from AMR. | |
E.g. | |
ISSN | |
1234-4900 | |
3902-3829 | |
You can optionally include an ID column for the journal | |
ID,ISSN | |
13, 2309-9302 | |
39, 3990-2123 | |
Run as: | |
$ python issns_to_jcr.py sample_file.csv outputfile.csv | |
""" | |
import csv | |
import json | |
import os | |
import sys | |
import xml.etree.ElementTree as ET | |
import json | |
import requests | |
USER = os.environ['LAMR_USER'] | |
PASSWORD = os.environ['LAMR_PASSWORD'] | |
ns = {'isi': 'http://www.isinet.com/xrpc41'} | |
request_template = u"""<?xml version="1.0" encoding="UTF-8" ?> | |
<request xmlns="http://www.isinet.com/xrpc41" src="app.id=InternalVIVODemo"> | |
<fn name="LinksAMR.retrieve"> | |
<list> | |
<!-- authentication --> | |
<map> | |
<val name="username">{user}</val> | |
<val name="password">{password}</val> | |
</map> | |
<!-- what to to return --> | |
<map> | |
<list name="JCR"> | |
<val>impactGraphURL</val> | |
<val>issn</val> | |
</list> | |
</map> | |
<!-- LOOKUP DATA --> | |
{items} | |
</list> | |
</fn> | |
</request> | |
""" | |
def prep_amr(items): | |
""" | |
<map name="cite_1"> | |
<val name="{id_type}">{value}</val> | |
</map> | |
""" | |
map_items = ET.Element("map") | |
for item_id, issn in items: | |
if (item_id is None) or (issn is None): | |
continue | |
this_item = ET.Element("map", name=str(item_id)) | |
de = ET.Element("val", name="issn") | |
de.text = issn | |
this_item.append(de) | |
map_items.append(this_item) | |
request_items = ET.tostring(map_items) | |
xml = request_template.format(user=USER, password=PASSWORD, items=request_items) | |
return xml | |
def read(raw): | |
raw = ET.fromstring(raw) | |
cites = raw.findall('isi:fn/isi:map/isi:map', ns) | |
out = {} | |
for cite in raw.findall('isi:fn/isi:map/isi:map', ns): | |
cite_key = cite.attrib['name'] | |
meta = {} | |
for val in cite.findall('isi:map/isi:val', ns): | |
meta[val.attrib['name']] = val.text | |
out[cite_key] = meta | |
return out | |
def get(msg): | |
rsp = requests.post( | |
'https://ws.isiknowledge.com/cps/xrpc', | |
data=msg, | |
headers={'Content Type': "application/xml"} | |
) | |
data = read(rsp.text) | |
return data | |
def main(): | |
found = [] | |
journals = [] | |
with open(sys.argv[1]) as infile: | |
for num, row in enumerate(csv.DictReader(infile)): | |
print>>sys.stderr, "Processing", row['ISSN'] | |
jid = row.get('ID', num) | |
journals.append((jid, row['ISSN'])) | |
amr_message = prep_amr(journals) | |
found = get(amr_message) | |
with open(sys.argv[2], 'wb') as outfile: | |
writer = csv.writer(outfile) | |
writer.writerow( ('number', 'ISSN', 'JCR') ) | |
for item in found: | |
writer.writerow([item, found[item].get('issn', 'na'), found[item].get('impactGraphURL', 'na')]) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Expects an incoming CSV file with UT, PMID, or DOI headers and wil post to | |
AMR in batches of 50. | |
E.g. | |
UT | |
01234 | |
02394 | |
039039 | |
PMID | |
2093030 | |
2405903 | |
95930303 | |
Run as: | |
$ python uts_batch_lookup.py sample_file.csv outputfile.csv | |
""" | |
import csv | |
import json | |
from itertools import izip_longest | |
import os | |
import sys | |
import xml.etree.ElementTree as ET | |
import json | |
import requests | |
USER = os.environ['LAMR_USER'] | |
PASSWORD = os.environ['LAMR_PASSWORD'] | |
ns = {'isi': 'http://www.isinet.com/xrpc41'} | |
ET.register_namespace("isi", "http://www.isinet.com/xrpc41") | |
def grouper(iterable, n, fillvalue=None): | |
""" | |
Group iterable into n sized chunks. | |
See: http://stackoverflow.com/a/312644/758157 | |
""" | |
args = [iter(iterable)] * n | |
return izip_longest(*args, fillvalue=fillvalue) | |
def read(raw): | |
raw = ET.fromstring(raw) | |
cites = raw.findall('isi:fn/isi:map/isi:map', ns) | |
out = {} | |
for cite in raw.findall('isi:fn/isi:map/isi:map', ns): | |
cite_key = cite.attrib['name'] | |
meta = {} | |
for val in cite.findall('isi:map/isi:val', ns): | |
meta[val.attrib['name']] = val.text | |
out[cite_key] = meta | |
return out | |
request_template = u"""<?xml version="1.0" encoding="UTF-8" ?> | |
<request xmlns="http://www.isinet.com/xrpc41" src="app.id=InternalVIVODemo"> | |
<fn name="LinksAMR.retrieve"> | |
<list> | |
<!-- authentication --> | |
<map> | |
<val name="username">{user}</val> | |
<val name="password">{password}</val> | |
</map> | |
<!-- what to to return --> | |
<map> | |
<list name="WOS"> | |
<val>sourceURL</val> | |
<val>ut</val> | |
<val>doi</val> | |
<val>pmid</val> | |
<val>timesCited</val> | |
</list> | |
</map> | |
<!-- LOOKUP DATA --> | |
{items} | |
</list> | |
</fn> | |
</request> | |
""" | |
def get(request_xml): | |
rsp = requests.post( | |
'https://ws.isiknowledge.com/cps/xrpc', | |
data=request_xml, | |
headers={'Content Type': "application/xml"} | |
) | |
data = read(rsp.text) | |
return data | |
def prep_amr(items, local_id="id"): | |
""" | |
<map name="cite_1"> | |
<val name="{id_type}">{value}</val> | |
</map> | |
""" | |
map_items = ET.Element("map") | |
for idx, pub in enumerate(items): | |
if pub is None: | |
continue | |
local_id_value = pub.get(local_id) or pub.get(local_id.upper()) | |
if local_id_value is None: | |
local_id_value = str(idx) | |
this_item = ET.Element("map", name=local_id_value) | |
for k,v in pub.items(): | |
if v is None: | |
continue | |
#import ipdb; ipdb.set_trace() | |
de = ET.Element("val", name=k.lower()) | |
de.text = v.strip() | |
this_item.append(de) | |
map_items.append(this_item) | |
request_items = ET.tostring(map_items) | |
xml = request_template.format(user=USER, password=PASSWORD, items=request_items) | |
return xml | |
def main(): | |
# AMR will take 50 items at a time. | |
BATCH_SIZE = 50 | |
found = [] | |
to_check = [] | |
with open(sys.argv[1]) as infile: | |
for row in csv.DictReader(infile): | |
d = {} | |
for k, v in row.items(): | |
d[k.lower()] = v.strip() | |
to_check.append(d) | |
for idx, batch in enumerate(grouper(to_check, BATCH_SIZE)): | |
xml = prep_amr(batch) | |
print>>sys.stderr, "Processing batch", idx | |
# Post the batch | |
found.append(get(xml)) | |
with open(sys.argv[2], 'wb') as outfile: | |
writer = csv.writer(outfile) | |
writer.writerow(('id', 'ut', 'doi', 'pmid', 'times cited', 'source')) | |
for grp in found: | |
for k,item in grp.items(): | |
#import ipdb; ipdb.set_trace() | |
ut = item.get('ut') | |
if ut is not None: | |
ut = "WOS:" + ut | |
writer.writerow([k, ut, item.get('doi', ""), item.get('pmid', ""), item.get('timesCited', '0'), item.get('sourceURL', 'N/A')]) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Expects an incoming CSV file with UTs and will generate output from AMR. | |
E.g. | |
UT | |
01234 | |
02394 | |
039039 | |
Run as: | |
$ python uts_to_links.py sample_file.csv outputfile.csv | |
""" | |
import csv | |
import json | |
import os | |
import sys | |
import xml.etree.ElementTree as ET | |
import json | |
import requests | |
USER = os.environ['LAMR_USER'] | |
PASSWORD = os.environ['LAMR_PASSWORD'] | |
ns = {'isi': 'http://www.isinet.com/xrpc41'} | |
def read(raw): | |
raw = ET.fromstring(raw) | |
cites = raw.findall('isi:fn/isi:map/isi:map', ns) | |
out = {} | |
for cite in raw.findall('isi:fn/isi:map/isi:map', ns): | |
cite_key = cite.attrib['name'] | |
meta = {} | |
for val in cite.findall('isi:map/isi:val', ns): | |
meta[val.attrib['name']] = val.text | |
out[cite_key] = meta | |
return out | |
request_template = u"""<?xml version="1.0" encoding="UTF-8" ?> | |
<request xmlns="http://www.isinet.com/xrpc41" src="app.id=InternalVIVODemo"> | |
<fn name="LinksAMR.retrieve"> | |
<list> | |
<!-- authentication --> | |
<map> | |
<val name="username">{user}</val> | |
<val name="password">{password}</val> | |
</map> | |
<!-- what to to return --> | |
<map> | |
<list name="WOS"> | |
<val>ut</val> | |
<val>timesCited</val> | |
<val>sourceURL</val> | |
<val>citingArticlesURL</val> | |
</list> | |
</map> | |
<!-- LOOKUP DATA --> | |
<map> | |
<map name="cite_1"> | |
<val name="{id_type}">{value}</val> | |
</map> | |
</map> | |
</list> | |
</fn> | |
</request> | |
""" | |
def get(id_type, value): | |
xml = request_template.format(id_type=id_type, value=value, user=USER, password=PASSWORD) | |
rsp = requests.post( | |
'https://ws.isiknowledge.com/cps/xrpc', | |
data=xml, | |
headers={'Content Type': "application/xml"} | |
) | |
data = read(rsp.text) | |
return data | |
def main(): | |
found = [] | |
with open(sys.argv[1]) as infile: | |
for row in csv.DictReader(infile): | |
print>>sys.stderr, "Processing", row['UT'] | |
found.append(get('ut', row['UT'])['cite_1']) | |
with open(sys.argv[2], 'wb') as outfile: | |
writer = csv.writer(outfile) | |
writer.writerow( ('UT', 'times cited', 'source', 'citing articles') ) | |
for item in found: | |
writer.writerow([item['ut'], item.get('timesCited', '0'), item['sourceURL'], item.get('citingArticlesURL', '')]) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment