Skip to content

Instantly share code, notes, and snippets.

@haccanri
Created April 21, 2015 06:59
Show Gist options
  • Save haccanri/eac4e7b83e11ae05e3b0 to your computer and use it in GitHub Desktop.
Save haccanri/eac4e7b83e11ae05e3b0 to your computer and use it in GitHub Desktop.
A script to extract title from hand-written bib items and download the bibtex item from Google Scholar with the help of scholar.py (https://github.com/hildensia/scholar).
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 23 21:13:21 2015
@author: Qing
A script to extract title from hand-written bib items and download the bibtex item
from Google Scholar with the help of scholar.py (https://github.com/hildensia/scholar).
"""
from subprocess import Popen, PIPE
import time
def extract_title(texFile, output='title.txt'):
lines = ''.join(open(texFile).readlines())
papers = lines.split('\\bibitem')
titles = []
for p in papers:
if p.strip() == '':
continue
tokens = p.replace(',', '.').split('.')
tlen = [len(t) for t in tokens]
# pick the longest phrase as title
title = tokens[tlen.index(max(tlen))].replace('\n', ' ')
# remove latex command
latex = ['\\newblock', '\it', '{', '}']
for cmd in latex:
title = title.replace(cmd, '').strip()
titles.append(title)
with open(output, 'w') as fout:
fout.write('\n'.join(titles))
def batch_cmd(fname='title.txt', output='bibtext.txt', seconds=0.5):
cnt = 1
with open(fname) as fin, open(output, 'w') as fout:
for line in fin:
title = line.strip()
print cnt,
bibtex = Popen('scholar.py -c 1 -p "%s" --citation bt' % title, stdout=PIPE, shell=True).stdout.read()
cnt += 1
fout.write('%s\n' % bibtex)
time.sleep(seconds)
if __name__ == "__main__":
'''
The input file is the hand-written bib items. For example:
\bibitem{i}M.T. Barlow, Diffusions on fractals, Lectures on Probability Theory and Statistics, Lect. Notes Math., Springer, 1690(1998),1-121
\bibitem{k}M.T. Barlow, Heat kernels and sets with fractal structure, in Heat kernels and analysis on manifolds graphs, and metric spaces, Contemporary Math. 338(2003), 11-40
\bibitem{l}M.T. Barlow, T.Coulhon and T.Kumagai, Characterization of sub-Gaussian heat kernel estimates on strongly recurrent graphs, Comm. Pure Appl. Math. 58(200)
'''
extract_title('bibitem.tex')
batch_cmd()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment