Skip to content

Instantly share code, notes, and snippets.

@codeswimmer
Forked from olasitarska/pgessays.py
Created November 18, 2012 18:04

Revisions

  1. @olasitarska olasitarska revised this gist Nov 18, 2012. 1 changed file with 27 additions and 22 deletions.
    49 changes: 27 additions & 22 deletions pgessays.py
    Original file line number Diff line number Diff line change
    @@ -10,29 +10,37 @@
    import re, ez_epub, urllib2, genshi
    from BeautifulSoup import BeautifulSoup

    def addSection(link):
    page = urllib2.urlopen('http://www.paulgraham.com/'+link).read()
    soup = BeautifulSoup(page)
    soup.prettify()

    def addSection(link, title):
    if not 'http' in link:
    page = urllib2.urlopen('http://www.paulgraham.com/'+link).read()
    soup = BeautifulSoup(page)
    soup.prettify()
    else:
    page = urllib2.urlopen(link).read()

    section = ez_epub.Section()
    try:
    section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
    section.title = title
    print section.title

    font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
    if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font and not len(font)<100:
    content = font
    else:
    content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0])

    for p in content.split("<br /><br />"):
    section.text.append(genshi.core.Markup(p))
    if not 'http' in link:
    font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
    if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font and not len(font)<100:
    content = font
    else:
    content = ''
    for par in soup.findAll('table', {'width':'455'})[0].findAll('p'):
    content += str(par)

    #exception for Subject: Airbnb
    for pre in soup.findAll('pre'):
    section.text.append(genshi.core.Markup(pre))
    for p in content.split("<br /><br />"):
    section.text.append(genshi.core.Markup(p))

    #exception for Subject: Airbnb
    for pre in soup.findAll('pre'):
    section.text.append(genshi.core.Markup(pre))
    else:
    for p in str(page).replace("\n","<br />").split("<br /><br />"):
    section.text.append(genshi.core.Markup(p))
    except:
    pass

    @@ -50,10 +58,7 @@ def addSection(link):
    links = soup.findAll('table', {'width': '455'})[1].findAll('a')
    sections = []
    for link in links:
    try:
    sections.append(addSection(link['href']))
    except:
    print "Error: URL doesn't exist"

    sections.append(addSection(link['href'], link.text))

    book.sections = sections
    book.make(book.title)
  2. @olasitarska olasitarska revised this gist Nov 18, 2012. 1 changed file with 0 additions and 2 deletions.
    2 changes: 0 additions & 2 deletions pgessays.py
    Original file line number Diff line number Diff line change
    @@ -2,8 +2,6 @@
    """
    Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html
    Current (11/18/2012) version of the book is available here: https://dl.dropbox.com/u/527278/Paul%20Graham%27s%20Essays.epub
    Author: Ola Sitarska <[email protected]>
    This script requires python-epub-library: http://code.google.com/p/python-epub-builder/
  3. @olasitarska olasitarska revised this gist Nov 18, 2012. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions pgessays.py
    Original file line number Diff line number Diff line change
    @@ -2,6 +2,8 @@
    """
    Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html
    Current (11/18/2012) version of the book is available here: https://dl.dropbox.com/u/527278/Paul%20Graham%27s%20Essays.epub
    Author: Ola Sitarska <[email protected]>
    This script requires python-epub-library: http://code.google.com/p/python-epub-builder/
  4. @olasitarska olasitarska revised this gist Nov 18, 2012. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion pgessays.py
    Original file line number Diff line number Diff line change
    @@ -21,7 +21,7 @@ def addSection(link):
    print section.title

    font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
    if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font:
    if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font and not len(font)<100:
    content = font
    else:
    content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0])
  5. @olasitarska olasitarska revised this gist Nov 18, 2012. 1 changed file with 3 additions and 2 deletions.
    5 changes: 3 additions & 2 deletions pgessays.py
    Original file line number Diff line number Diff line change
    @@ -20,8 +20,9 @@ def addSection(link):
    section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
    print section.title

    if not 'Get funded by' in str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0]):
    content = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
    font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
    if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font:
    content = font
    else:
    content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0])

  6. @olasitarska olasitarska revised this gist Nov 18, 2012. 1 changed file with 6 additions and 0 deletions.
    6 changes: 6 additions & 0 deletions pgessays.py
    Original file line number Diff line number Diff line change
    @@ -24,8 +24,14 @@ def addSection(link):
    content = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
    else:
    content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0])

    for p in content.split("<br /><br />"):
    section.text.append(genshi.core.Markup(p))

    #exception for Subject: Airbnb
    for pre in soup.findAll('pre'):
    section.text.append(genshi.core.Markup(pre))

    except:
    pass

  7. @olasitarska olasitarska revised this gist Nov 18, 2012. 1 changed file with 6 additions and 1 deletion.
    7 changes: 6 additions & 1 deletion pgessays.py
    Original file line number Diff line number Diff line change
    @@ -19,7 +19,12 @@ def addSection(link):
    try:
    section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
    print section.title
    for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):

    if not 'Get funded by' in str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0]):
    content = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
    else:
    content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0])
    for p in content.split("<br /><br />"):
    section.text.append(genshi.core.Markup(p))
    except:
    pass
  8. @olasitarska olasitarska revised this gist Nov 18, 2012. 1 changed file with 4 additions and 1 deletion.
    5 changes: 4 additions & 1 deletion pgessays.py
    Original file line number Diff line number Diff line change
    @@ -38,7 +38,10 @@ def addSection(link):
    links = soup.findAll('table', {'width': '455'})[1].findAll('a')
    sections = []
    for link in links:
    sections.append(addSection(link['href']))
    try:
    sections.append(addSection(link['href']))
    except:
    print "Error: URL doesn't exist"

    book.sections = sections
    book.make(book.title)
  9. @olasitarska olasitarska revised this gist Nov 18, 2012. 1 changed file with 7 additions and 4 deletions.
    11 changes: 7 additions & 4 deletions pgessays.py
    Original file line number Diff line number Diff line change
    @@ -16,10 +16,13 @@ def addSection(link):
    soup.prettify()

    section = ez_epub.Section()
    section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
    print section.title
    for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):
    section.text.append(genshi.core.Markup(p))
    try:
    section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
    print section.title
    for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):
    section.text.append(genshi.core.Markup(p))
    except:
    pass

    return section

  10. @olasitarska olasitarska revised this gist Nov 18, 2012. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions pgessays.py
    Original file line number Diff line number Diff line change
    @@ -17,6 +17,7 @@ def addSection(link):

    section = ez_epub.Section()
    section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
    print section.title
    for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):
    section.text.append(genshi.core.Markup(p))

  11. @olasitarska olasitarska created this gist Nov 18, 2012.
    40 changes: 40 additions & 0 deletions pgessays.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,40 @@
    # -*- coding: utf-8 -*-
    """
    Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html
    Author: Ola Sitarska <[email protected]>
    This script requires python-epub-library: http://code.google.com/p/python-epub-builder/
    """

    import re, ez_epub, urllib2, genshi
    from BeautifulSoup import BeautifulSoup

    def addSection(link):
    page = urllib2.urlopen('http://www.paulgraham.com/'+link).read()
    soup = BeautifulSoup(page)
    soup.prettify()

    section = ez_epub.Section()
    section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
    for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):
    section.text.append(genshi.core.Markup(p))

    return section


    book = ez_epub.Book()
    book.title = "Paul Graham's Essays"
    book.authors = ['Paul Graham']

    page = urllib2.urlopen('http://www.paulgraham.com/articles.html').read()
    soup = BeautifulSoup(page)
    soup.prettify()

    links = soup.findAll('table', {'width': '455'})[1].findAll('a')
    sections = []
    for link in links:
    sections.append(addSection(link['href']))

    book.sections = sections
    book.make(book.title)