Revisions
-
olasitarska revised this gist
Nov 18, 2012 . 1 changed file with 27 additions and 22 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -10,29 +10,37 @@ import re, ez_epub, urllib2, genshi from BeautifulSoup import BeautifulSoup def addSection(link, title): if not 'http' in link: page = urllib2.urlopen('http://www.paulgraham.com/'+link).read() soup = BeautifulSoup(page) soup.prettify() else: page = urllib2.urlopen(link).read() section = ez_epub.Section() try: section.title = title print section.title if not 'http' in link: font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0]) if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font and not len(font)<100: content = font else: content = '' for par in soup.findAll('table', {'width':'455'})[0].findAll('p'): content += str(par) for p in content.split("<br /><br />"): section.text.append(genshi.core.Markup(p)) #exception for Subject: Airbnb for pre in soup.findAll('pre'): section.text.append(genshi.core.Markup(pre)) else: for p in str(page).replace("\n","<br />").split("<br /><br />"): section.text.append(genshi.core.Markup(p)) except: pass @@ -50,10 +58,7 @@ def addSection(link): links = soup.findAll('table', {'width': '455'})[1].findAll('a') sections = [] for link in links: sections.append(addSection(link['href'], link.text)) book.sections = sections book.make(book.title) -
olasitarska revised this gist
Nov 18, 2012 . 1 changed file with 0 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,8 +2,6 @@ """ Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html Author: Ola Sitarska <[email protected]> This script requires python-epub-library: http://code.google.com/p/python-epub-builder/ -
olasitarska revised this gist
Nov 18, 2012 . 1 changed file with 2 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,6 +2,8 @@ """ Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html Current (11/18/2012) version of the book is available here: https://dl.dropbox.com/u/527278/Paul%20Graham%27s%20Essays.epub Author: Ola Sitarska <[email protected]> This script requires python-epub-library: http://code.google.com/p/python-epub-builder/ -
olasitarska revised this gist
Nov 18, 2012 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -21,7 +21,7 @@ def addSection(link): print section.title font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0]) if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font and not len(font)<100: content = font else: content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0]) -
olasitarska revised this gist
Nov 18, 2012 . 1 changed file with 3 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,8 +20,9 @@ def addSection(link): section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt'] print section.title font = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0]) if not 'Get funded by' in font and not 'Watch how this essay was' in font and not 'Like to build things?' in font: content = font else: content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0]) -
olasitarska revised this gist
Nov 18, 2012 . 1 changed file with 6 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -24,8 +24,14 @@ def addSection(link): content = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0]) else: content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0]) for p in content.split("<br /><br />"): section.text.append(genshi.core.Markup(p)) #exception for Subject: Airbnb for pre in soup.findAll('pre'): section.text.append(genshi.core.Markup(pre)) except: pass -
olasitarska revised this gist
Nov 18, 2012 . 1 changed file with 6 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -19,7 +19,12 @@ def addSection(link): try: section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt'] print section.title if not 'Get funded by' in str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0]): content = str(soup.findAll('table', {'width':'455'})[0].findAll('font')[0]) else: content = str(soup.findAll('table', {'width':'455'})[0].findAll('p')[0]) for p in content.split("<br /><br />"): section.text.append(genshi.core.Markup(p)) except: pass -
olasitarska revised this gist
Nov 18, 2012 . 1 changed file with 4 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -38,7 +38,10 @@ def addSection(link): links = soup.findAll('table', {'width': '455'})[1].findAll('a') sections = [] for link in links: try: sections.append(addSection(link['href'])) except: print "Error: URL doesn't exist" book.sections = sections book.make(book.title) -
olasitarska revised this gist
Nov 18, 2012 . 1 changed file with 7 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -16,10 +16,13 @@ def addSection(link): soup.prettify() section = ez_epub.Section() try: section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt'] print section.title for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"): section.text.append(genshi.core.Markup(p)) except: pass return section -
olasitarska revised this gist
Nov 18, 2012 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -17,6 +17,7 @@ def addSection(link): section = ez_epub.Section() section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt'] print section.title for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"): section.text.append(genshi.core.Markup(p)) -
olasitarska created this gist
Nov 18, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,40 @@ # -*- coding: utf-8 -*- """ Builds epub book out of Paul Graham's essays: http://paulgraham.com/articles.html Author: Ola Sitarska <[email protected]> This script requires python-epub-library: http://code.google.com/p/python-epub-builder/ """ import re, ez_epub, urllib2, genshi from BeautifulSoup import BeautifulSoup def addSection(link): page = urllib2.urlopen('http://www.paulgraham.com/'+link).read() soup = BeautifulSoup(page) soup.prettify() section = ez_epub.Section() section.title = soup.findAll('table', {'width':'455'})[0].find('img')['alt'] for p in str(soup.findAll('table', {'width':'455'})[0].find('font')).split("<br /><br />"): section.text.append(genshi.core.Markup(p)) return section book = ez_epub.Book() book.title = "Paul Graham's Essays" book.authors = ['Paul Graham'] page = urllib2.urlopen('http://www.paulgraham.com/articles.html').read() soup = BeautifulSoup(page) soup.prettify() links = soup.findAll('table', {'width': '455'})[1].findAll('a') sections = [] for link in links: sections.append(addSection(link['href'])) book.sections = sections book.make(book.title)