codeswimmer · November 18, 2012 18:04 · Nov 18, 2012 · Nov 18, 2012 · Nov 18, 2012 · Nov 18, 2012
diff --git a/pgessays.py b/pgessays.py
@@ -10,29 +10,37 @@
 import re, ez_epub, urllib2, genshi
 from BeautifulSoup import BeautifulSoup
 
-def addSection(link):
-    page = urllib2.urlopen('http://www.paulgraham.com/'+link).read()
-    soup = BeautifulSoup(page)
-    soup.prettify()
-
+def addSection(link, title):
+    if not 'http' in link:
+        page = urllib2.urlopen('http://www.paulgraham.com/'+link).read()
+        soup = BeautifulSoup(page)
+        soup.prettify() 
+    else:
+        page = urllib2.urlopen(link).read()
+
     section = ez_epub.Section()
     try:
-        section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
+        section.title = title
         print section.title
 
-        font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
-        if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font  and not len(font)<100:
-            content = font
-        else:
-            content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0])
-
-        for p in content.split("<br /><br />"):
-            section.text.append(genshi.core.Markup(p))
+        if not 'http' in link:
+            font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
+            if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font and not len(font)<100:
+                content = font
+            else:
+                content = ''
+                for par in soup.findAll('table', {'width':'455'})[0].findAll('p'):
+                    content += str(par)
 
-        #exception for Subject: Airbnb
-        for pre in soup.findAll('pre'):
-            section.text.append(genshi.core.Markup(pre))
+            for p in content.split("<br /><br />"):
+                section.text.append(genshi.core.Markup(p))
 
+            #exception for Subject: Airbnb
+            for pre in soup.findAll('pre'):
+                section.text.append(genshi.core.Markup(pre))
+        else:
+            for p in str(page).replace("\n","<br />").split("<br /><br />"):
+                section.text.append(genshi.core.Markup(p))
     except:
         pass
 
@@ -50,10 +58,7 @@ def addSection(link):
 links = soup.findAll('table', {'width': '455'})[1].findAll('a')
 sections = []
 for link in links:
-    try:
-        sections.append(addSection(link['href']))
-    except:
-        print "Error: URL doesn't exist"
-
+    sections.append(addSection(link['href'], link.text))
+
 book.sections = sections
 book.make(book.title)
diff --git a/pgessays.py b/pgessays.py
@@ -2,8 +2,6 @@
 """
 Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html
 
-Current (11/18/2012) version of the book is available here: https://dl.dropbox.com/u/527278/Paul%20Graham%27s%20Essays.epub
-
 Author: Ola Sitarska <[email protected]>
 
 This script requires python-epub-library: http://code.google.com/p/python-epub-builder/

diff --git a/pgessays.py b/pgessays.py
@@ -2,6 +2,8 @@
 """
 Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html
 
+Current (11/18/2012) version of the book is available here: https://dl.dropbox.com/u/527278/Paul%20Graham%27s%20Essays.epub
+
 Author: Ola Sitarska <[email protected]>
 
 This script requires python-epub-library: http://code.google.com/p/python-epub-builder/

diff --git a/pgessays.py b/pgessays.py
@@ -21,7 +21,7 @@ def addSection(link):
         print section.title
 
         font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
-        if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font:
+        if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font  and not len(font)<100:
             content = font
         else:
             content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0])

diff --git a/pgessays.py b/pgessays.py
@@ -20,8 +20,9 @@ def addSection(link):
         section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
         print section.title
 
-        if not 'Get funded by' in str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0]):
-            content = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
+        font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
+        if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font:
+            content = font
         else:
             content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0])
 

diff --git a/pgessays.py b/pgessays.py
@@ -24,8 +24,14 @@ def addSection(link):
             content = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
         else:
             content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0])
+
         for p in content.split("<br /><br />"):
             section.text.append(genshi.core.Markup(p))
+
+        #exception for Subject: Airbnb
+        for pre in soup.findAll('pre'):
+            section.text.append(genshi.core.Markup(pre))
+
     except:
         pass
 

diff --git a/pgessays.py b/pgessays.py
@@ -19,7 +19,12 @@ def addSection(link):
     try:
         section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
         print section.title
-        for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):
+
+        if not 'Get funded by' in str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0]):
+            content = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0])
+        else:
+            content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0])
+        for p in content.split("<br /><br />"):
             section.text.append(genshi.core.Markup(p))
     except:
         pass

diff --git a/pgessays.py b/pgessays.py
@@ -38,7 +38,10 @@ def addSection(link):
 links = soup.findAll('table', {'width': '455'})[1].findAll('a')
 sections = []
 for link in links:
-    sections.append(addSection(link['href']))
+    try:
+        sections.append(addSection(link['href']))
+    except:
+        print "Error: URL doesn't exist"
 
 book.sections = sections
 book.make(book.title)
diff --git a/pgessays.py b/pgessays.py
@@ -16,10 +16,13 @@ def addSection(link):
     soup.prettify()
 
     section = ez_epub.Section()
-    section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
-    print section.title
-    for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):
-        section.text.append(genshi.core.Markup(p))
+    try:
+        section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
+        print section.title
+        for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):
+            section.text.append(genshi.core.Markup(p))
+    except:
+        pass
 
     return section
 

diff --git a/pgessays.py b/pgessays.py
@@ -17,6 +17,7 @@ def addSection(link):
 
     section = ez_epub.Section()
     section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
+    print section.title
     for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):
         section.text.append(genshi.core.Markup(p))
 

diff --git a/pgessays.py b/pgessays.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+"""
+Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html
+
+Author: Ola Sitarska <[email protected]>
+
+This script requires python-epub-library: http://code.google.com/p/python-epub-builder/
+"""
+
+import re, ez_epub, urllib2, genshi
+from BeautifulSoup import BeautifulSoup
+
+def addSection(link):
+    page = urllib2.urlopen('http://www.paulgraham.com/'+link).read()
+    soup = BeautifulSoup(page)
+    soup.prettify()
+
+    section = ez_epub.Section()
+    section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt']
+    for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"):
+        section.text.append(genshi.core.Markup(p))
+
+    return section
+
+
+book = ez_epub.Book()
+book.title = "Paul Graham's Essays"
+book.authors = ['Paul Graham']
+
+page = urllib2.urlopen('http://www.paulgraham.com/articles.html').read()
+soup = BeautifulSoup(page)
+soup.prettify()
+
+links = soup.findAll('table', {'width': '455'})[1].findAll('a')
+sections = []
+for link in links:
+    sections.append(addSection(link['href']))
+
+book.sections = sections
+book.make(book.title)