Skip to content

Instantly share code, notes, and snippets.

@codeswimmer
Forked from olasitarska/pgessays.py
Created November 18, 2012 18:04
Show Gist options
  • Save codeswimmer/4106565 to your computer and use it in GitHub Desktop.
Save codeswimmer/4106565 to your computer and use it in GitHub Desktop.
Builds epub book out of Paul Graham's essays.
# -*- coding: utf-8 -*-
"""
Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html
Author: Ola Sitarska <[email protected]>
This script requires python-epub-library: http://code.google.com/p/python-epub-builder/
"""
import re, ez_epub, urllib2, genshi
from BeautifulSoup import BeautifulSoup
def addSection(link):
page = urllib2.urlopen('http://www.paulgraham.com/'+link).read()
soup = BeautifulSoup(page)
soup.prettify()
section = ez_epub.Section()
try:
section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
print section.title
for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):
section.text.append(genshi.core.Markup(p))
except:
pass
return section
book = ez_epub.Book()
book.title = "Paul Graham's Essays"
book.authors = ['Paul Graham']
page = urllib2.urlopen('http://www.paulgraham.com/articles.html').read()
soup = BeautifulSoup(page)
soup.prettify()
links = soup.findAll('table', {'width': '455'})[1].findAll('a')
sections = []
for link in links:
sections.append(addSection(link['href']))
book.sections = sections
book.make(book.title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment