Created
April 21, 2015 06:59
-
-
Save haccanri/eac4e7b83e11ae05e3b0 to your computer and use it in GitHub Desktop.
A script to extract title from hand-written bib items and download the bibtex item from Google Scholar with the help of scholar.py (https://github.com/hildensia/scholar).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Mar 23 21:13:21 2015 | |
@author: Qing | |
A script to extract title from hand-written bib items and download the bibtex item | |
from Google Scholar with the help of scholar.py (https://github.com/hildensia/scholar). | |
""" | |
from subprocess import Popen, PIPE | |
import time | |
def extract_title(texFile, output='title.txt'): | |
lines = ''.join(open(texFile).readlines()) | |
papers = lines.split('\\bibitem') | |
titles = [] | |
for p in papers: | |
if p.strip() == '': | |
continue | |
tokens = p.replace(',', '.').split('.') | |
tlen = [len(t) for t in tokens] | |
# pick the longest phrase as title | |
title = tokens[tlen.index(max(tlen))].replace('\n', ' ') | |
# remove latex command | |
latex = ['\\newblock', '\it', '{', '}'] | |
for cmd in latex: | |
title = title.replace(cmd, '').strip() | |
titles.append(title) | |
with open(output, 'w') as fout: | |
fout.write('\n'.join(titles)) | |
def batch_cmd(fname='title.txt', output='bibtext.txt', seconds=0.5): | |
cnt = 1 | |
with open(fname) as fin, open(output, 'w') as fout: | |
for line in fin: | |
title = line.strip() | |
print cnt, | |
bibtex = Popen('scholar.py -c 1 -p "%s" --citation bt' % title, stdout=PIPE, shell=True).stdout.read() | |
cnt += 1 | |
fout.write('%s\n' % bibtex) | |
time.sleep(seconds) | |
if __name__ == "__main__": | |
''' | |
The input file is the hand-written bib items. For example: | |
\bibitem{i}M.T. Barlow, Diffusions on fractals, Lectures on Probability Theory and Statistics, Lect. Notes Math., Springer, 1690(1998),1-121 | |
\bibitem{k}M.T. Barlow, Heat kernels and sets with fractal structure, in Heat kernels and analysis on manifolds graphs, and metric spaces, Contemporary Math. 338(2003), 11-40 | |
\bibitem{l}M.T. Barlow, T.Coulhon and T.Kumagai, Characterization of sub-Gaussian heat kernel estimates on strongly recurrent graphs, Comm. Pure Appl. Math. 58(200) | |
''' | |
extract_title('bibitem.tex') | |
batch_cmd() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment