Last active
August 15, 2019 17:46
-
-
Save AyeGill/421785edbe44c9d67a4da15d785d914c to your computer and use it in GitHub Desktop.
Python script to pull a bibtex reference from arxiv.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import arxiv | |
import sys | |
import PyPDF4 | |
import re | |
## Usage: arxiref.py 1711.07059 | |
## or: arxiref.py paper.pdf | |
## If given something that looks like an arXiv id, searches for something with that name | |
## If given a filename, treats it as a pdf, looks for an arXiv id on page 1, then proceeds as in first case | |
## The produced bibtex code has "**" where you need to choose an id for the reference. | |
regex = "arXiv:([0-9]+\.[0-9]+)" | |
p = re.compile(regex) | |
def check_id(arg): | |
"""Test if input is an arXiv id""" | |
dot = "." in arg | |
numv = True | |
for char in arg: | |
if not(char in "1234567890v."): | |
numv = False | |
return dot and numv | |
def print_ref(arxiv_id): | |
paper = arxiv.query(id_list=[arxiv_id])[0] | |
print('''@Article{** | |
Title = {%s}, | |
Author = {%s}, | |
Year = {%s}, | |
archivePrefix = "arXiv", | |
eprint = {%s} | |
}''' % (paper['title']," and ".join(paper['authors']),paper['published'][:4],arxiv_id)) | |
def get_id_file(filename): | |
with open(filename, 'rb') as f: | |
pdf = PyPDF4.PdfFileReader(f) | |
text = pdf.getPage(0).extractText() | |
m = p.search(text) | |
return m.group(0)[6:] | |
argument = sys.argv[1] | |
if check_id(argument): | |
print_ref(argument) | |
else: #argument is a filename | |
print_ref(get_id_file(argument)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment