Last active
September 7, 2022 22:42
-
-
Save jamesoutterside/3750447 to your computer and use it in GitHub Desktop.
django epub builder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2012, Bin Tan | |
# This file is distributed under the BSD Licence. See python-epub-builder-license.txt for details. | |
# James Outterside - Modified code from http://code.google.com/p/python-epub-builder/source/browse/trunk/epub.py to use | |
# use django templates instead of genshi | |
import itertools | |
import mimetypes | |
import os | |
import shutil | |
import subprocess | |
import uuid | |
import zipfile | |
from django.template import RequestContext,loader, Context | |
from lxml import etree | |
class TocMapNode: | |
def __init__(self): | |
self.playOrder = 0 | |
self.title = '' | |
self.href = '' | |
self.children = [] | |
self.depth = 0 | |
def assignPlayOrder(self): | |
nextPlayOrder = [0] | |
self.__assignPlayOrder(nextPlayOrder) | |
def __assignPlayOrder(self, nextPlayOrder): | |
self.playOrder = nextPlayOrder[0] | |
nextPlayOrder[0] = self.playOrder + 1 | |
for child in self.children: | |
child.__assignPlayOrder(nextPlayOrder) | |
class EpubItem: | |
def __init__(self): | |
self.id = '' | |
self.srcPath = '' | |
self.destPath = '' | |
self.mimeType = '' | |
self.html = '' | |
class EpubBook: | |
def __init__(self): | |
self.loader = loader | |
self.rootDir = '' | |
self.UUID = uuid.uuid1() | |
self.lang = 'en-US' | |
self.title = '' | |
self.creators = [] | |
self.metaInfo = [] | |
self.imageItems = {} | |
self.htmlItems = {} | |
self.cssItems = {} | |
self.coverImage = None | |
self.titlePage = None | |
self.tocPage = None | |
self.spine = [] | |
self.guide = {} | |
self.tocMapRoot = TocMapNode() | |
self.lastNodeAtDepth = {0 : self.tocMapRoot} | |
def setTitle(self, title): | |
self.title = title | |
def setLang(self, lang): | |
self.lang = lang | |
def addCreator(self, name, role = 'aut'): | |
self.creators.append((name, role)) | |
def addMeta(self, metaName, metaValue, **metaAttrs): | |
self.metaInfo.append((metaName, metaValue, metaAttrs)) | |
def getMetaTags(self): | |
l = [] | |
for metaName, metaValue, metaAttr in self.metaInfo: | |
beginTag = '<dc:%s' % metaName | |
if metaAttr: | |
for attrName, attrValue in metaAttr.iteritems(): | |
beginTag += ' opf:%s="%s"' % (attrName, attrValue) | |
beginTag += '>' | |
endTag = '</dc:%s>' % metaName | |
l.append((beginTag, metaValue, endTag)) | |
return l | |
def getImageItems(self): | |
return sorted(self.imageItems.values(), key = lambda x : x.id) | |
def getHtmlItems(self): | |
return sorted(self.htmlItems.values(), key = lambda x : x.id) | |
def getCssItems(self): | |
return sorted(self.cssItems.values(), key = lambda x : x.id) | |
def getAllItems(self): | |
return sorted(itertools.chain(self.imageItems.values(), self.htmlItems.values(), self.cssItems.values()), key = lambda x : x.id) | |
def addImage(self, srcPath, destPath): | |
item = EpubItem() | |
item.id = 'image_%d' % (len(self.imageItems) + 1) | |
item.srcPath = srcPath | |
item.destPath = destPath | |
item.mimeType = mimetypes.guess_type(destPath)[0] | |
assert item.destPath not in self.imageItems | |
self.imageItems[destPath] = item | |
return item | |
def addHtmlForImage(self, imageItem): | |
t = loader.get_template('epub/create/image.html') | |
c = Context({ 'book': self, 'item': imageItem }) | |
rendered = t.render(c) | |
return self.addHtml('', '%s.html' % imageItem.destPath, rendered) | |
def addHtml(self, srcPath, destPath, html): | |
item = EpubItem() | |
item.id = 'html_%d' % (len(self.htmlItems) + 1) | |
item.srcPath = srcPath | |
item.destPath = destPath | |
item.html = html | |
item.mimeType = 'application/xhtml+xml' | |
assert item.destPath not in self.htmlItems | |
self.htmlItems[item.destPath] = item | |
return item | |
def addCss(self, srcPath, destPath): | |
item = EpubItem() | |
item.id = 'css_%d' % (len(self.cssItems) + 1) | |
item.srcPath = srcPath | |
item.destPath = destPath | |
item.mimeType = 'text/css' | |
assert item.destPath not in self.cssItems | |
self.cssItems[item.destPath] = item | |
return item | |
def addCover(self, srcPath): | |
assert not self.coverImage | |
_, ext = os.path.splitext(srcPath) | |
destPath = 'cover%s' % ext | |
self.coverImage = self.addImage(srcPath, destPath) | |
#coverPage = self.addHtmlForImage(self.coverImage) | |
#self.addSpineItem(coverPage, False, -300) | |
#self.addGuideItem(coverPage.destPath, 'Cover', 'cover') | |
def __makeTitlePage(self): | |
if self.titlePage.html: | |
return | |
t = loader.get_template('epub/create/title-page.html') | |
c = Context({ 'book': self }) | |
rendered = t.render(c) | |
self.titlePage.html = rendered | |
def addTitlePage(self, html = ''): | |
assert not self.titlePage | |
self.titlePage = self.addHtml('', 'title-page.html', html) | |
self.addSpineItem(self.titlePage, True, -200) | |
self.addGuideItem('title-page.html', 'Title Page', 'title-page') | |
def __makeTocPage(self): | |
t = loader.get_template('epub/create/toc.html') | |
c = Context({ 'book': self }) | |
rendered = t.render(c) | |
self.tocPage.html = rendered | |
def addTocPage(self): | |
assert not self.tocPage | |
self.tocPage = self.addHtml('', 'toc.html', '') | |
self.addSpineItem(self.tocPage, False, -100) | |
self.addGuideItem('toc.html', 'Table of Contents', 'toc') | |
def getSpine(self): | |
return sorted(self.spine) | |
def addSpineItem(self, item, linear = True, order = None): | |
assert item.destPath in self.htmlItems | |
if order == None: | |
order = (max(order for order, _, _ in self.spine) if self.spine else 0) + 1 | |
self.spine.append((order, item, linear)) | |
def getGuide(self): | |
return sorted(self.guide.values(), key = lambda x : x[2]) | |
def addGuideItem(self, href, title, type): | |
assert type not in self.guide | |
self.guide[type] = (href, title, type) | |
def getTocMapRoot(self): | |
return self.tocMapRoot | |
def getTocMapHeight(self): | |
return max(self.lastNodeAtDepth.keys()) | |
def addTocMapNode(self, href, title, depth = None, parent = None): | |
node = TocMapNode() | |
node.href = href | |
node.title = title | |
if parent == None: | |
if depth == None: | |
parent = self.tocMapRoot | |
else: | |
parent = self.lastNodeAtDepth[depth - 1] | |
parent.children.append(node) | |
node.depth = parent.depth + 1 | |
self.lastNodeAtDepth[node.depth] = node | |
return node | |
def makeDirs(self): | |
try: | |
os.makedirs(os.path.join(self.rootDir, 'META-INF')) | |
except OSError: | |
pass | |
try: | |
os.makedirs(os.path.join(self.rootDir, 'OEBPS')) | |
except OSError: | |
pass | |
def __writeContainerXML(self): | |
fout = open(os.path.join(self.rootDir, 'META-INF', 'container.xml'), 'w') | |
t = loader.get_template('epub/create/container.xml') | |
rendered = t.render(Context({})) | |
fout.write(rendered) | |
fout.close() | |
def __writeTocNCX(self): | |
self.tocMapRoot.assignPlayOrder() | |
fout = open(os.path.join(self.rootDir, 'OEBPS', 'toc.ncx'), 'w') | |
t = loader.get_template('epub/create/toc.ncx') | |
c = Context({ 'book': self }) | |
rendered = t.render(c) | |
fout.write(rendered) | |
fout.close() | |
def __writeContentOPF(self): | |
fout = open(os.path.join(self.rootDir, 'OEBPS', 'content.opf'), 'w') | |
t = loader.get_template('epub/create/content.opf') | |
c = Context({ 'book': self }) | |
rendered = t.render(c) | |
fout.write(rendered) | |
fout.close() | |
def __writeItems(self): | |
for item in self.getAllItems(): | |
print item.id, item.destPath | |
if item.html: | |
fout = open(os.path.join(self.rootDir, 'OEBPS', item.destPath), 'w') | |
fout.write(item.html) | |
fout.close() | |
else: | |
shutil.copyfile(item.srcPath, os.path.join(self.rootDir, 'OEBPS', item.destPath)) | |
def __writeMimeType(self): | |
fout = open(os.path.join(self.rootDir, 'mimetype'), 'w') | |
fout.write('application/epub+zip') | |
fout.close() | |
@staticmethod | |
def __listManifestItems(contentOPFPath): | |
tree = etree.parse(contentOPFPath) | |
return tree.xpath("//opf:manifest/opf:item/@href", namespaces = {'opf': 'http://www.idpf.org/2007/opf'}) | |
@staticmethod | |
def createArchive(rootDir, outputPath): | |
fout = zipfile.ZipFile(outputPath, 'w') | |
cwd = os.getcwd() | |
os.chdir(rootDir) | |
fout.write('mimetype', compress_type = zipfile.ZIP_STORED) | |
fileList = [] | |
fileList.append(os.path.join('META-INF', 'container.xml')) | |
fileList.append(os.path.join('OEBPS', 'content.opf')) | |
for itemPath in EpubBook.__listManifestItems(os.path.join('OEBPS', 'content.opf')): | |
fileList.append(os.path.join('OEBPS', itemPath)) | |
for filePath in fileList: | |
fout.write(filePath, compress_type = zipfile.ZIP_DEFLATED) | |
fout.close() | |
os.chdir(cwd) | |
@staticmethod | |
def checkEpub(checkerPath, epubPath): | |
subprocess.call(['java', '-jar', checkerPath, epubPath], shell = True) | |
def createBook(self, rootDir): | |
if self.titlePage: | |
self.__makeTitlePage() | |
if self.tocPage: | |
self.__makeTocPage() | |
self.rootDir = rootDir | |
self.makeDirs() | |
self.__writeMimeType() | |
self.__writeItems() | |
self.__writeContainerXML() | |
self.__writeContentOPF() | |
self.__writeTocNCX() | |
def test(): | |
def getMinimalHtml(text): | |
return """<!DOCTYPE html PUBLIC "-//W3C//DTD XHtml 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> | |
<html xmlns="http://www.w3.org/1999/xhtml"> | |
<head><title>%s</title></head> | |
<body><p>%s</p></body> | |
</html> | |
""" % (text, text) | |
book = EpubBook() | |
book.setTitle('Most Wanted Tips for Aspiring Young Pirates') | |
book.addCreator('Monkey D Luffy') | |
book.addCreator('Guybrush Threepwood') | |
book.addMeta('contributor', 'Smalltalk80', role = 'bkp') | |
book.addMeta('date', '2010', event = 'publication') | |
book.addTitlePage() | |
book.addTocPage() | |
book.addCover(r'D:\epub\blank.png') | |
book.addCss(r'main.css', 'main.css') | |
n1 = book.addHtml('', '1.html', getMinimalHtml('Chapter 1')) | |
n11 = book.addHtml('', '2.html', getMinimalHtml('Section 1.1')) | |
n111 = book.addHtml('', '3.html', getMinimalHtml('Subsection 1.1.1')) | |
n12 = book.addHtml('', '4.html', getMinimalHtml('Section 1.2')) | |
n2 = book.addHtml('', '5.html', getMinimalHtml('Chapter 2')) | |
book.addSpineItem(n1) | |
book.addSpineItem(n11) | |
book.addSpineItem(n111) | |
book.addSpineItem(n12) | |
book.addSpineItem(n2) | |
# You can use both forms to add TOC map | |
#t1 = book.addTocMapNode(n1.destPath, '1') | |
#t11 = book.addTocMapNode(n11.destPath, '1.1', parent = t1) | |
#t111 = book.addTocMapNode(n111.destPath, '1.1.1', parent = t11) | |
#t12 = book.addTocMapNode(n12.destPath, '1.2', parent = t1) | |
#t2 = book.addTocMapNode(n2.destPath, '2') | |
book.addTocMapNode(n1.destPath, '1') | |
book.addTocMapNode(n11.destPath, '1.1', 2) | |
book.addTocMapNode(n111.destPath, '1.1.1', 3) | |
book.addTocMapNode(n12.destPath, '1.2', 2) | |
book.addTocMapNode(n2.destPath, '2') | |
rootDir = r'd:\epub\test' | |
book.createBook(rootDir) | |
EpubBook.createArchive(rootDir, rootDir + '.epub') | |
EpubBook.checkEpub('epubcheck-1.0.5.jar', rootDir + '.epub') | |
if __name__ == '__main__': | |
test() | |
#epub/create/toc.ncx | |
<?xml version="1.0" encoding="UTF-8" standalone="no"?> | |
{% load epub_tags %} | |
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" | |
version="2005-1"> | |
<head> | |
<meta name="dtb:uid" content="urn:uuid:{{book.UUID}}"/> | |
<meta name="dtb:depth" content="{{book.getTocMapHeight}}"/> | |
<meta name="dtb:totalPageCount" content="0"/> | |
<meta name="dtb:maxPageNumber" content="0"/> | |
</head> | |
<docTitle> | |
<text>{{book.title}}</text> | |
</docTitle> | |
<navMap> | |
{% for node in book.getTocMapRoot.children %} | |
{% toc_nav_children_output node %} | |
{% endfor %} | |
</navMap> | |
</ncx> | |
#epub/create/content.opf | |
<?xml version="1.0" encoding="utf-8" standalone="no"?> | |
<opf:package xmlns:opf="http://www.idpf.org/2007/opf" | |
xmlns:dc="http://purl.org/dc/elements/1.1/" | |
unique-identifier="bookid" version="2.0"> | |
<opf:metadata > | |
<dc:identifier id="bookid">urn:uuid:{{book.UUID}}</dc:identifier> | |
<dc:language>{{book.lang}}</dc:language> | |
<dc:title>{{book.title}}</dc:title> | |
{% for creator in book.creators %} | |
<dc:creator opf:role="{{creator.1}}">{{creator.0}}</dc:creator> | |
{% endfor %} | |
{% for tag in book.getMetaTags %} | |
{{tag.0|safe}}{{tag.1}}{{tag.2|safe}} | |
{% endfor %} | |
{% if book.coverImage %} | |
<opf:meta name="cover" content="{{book.coverImage.id}}"/> | |
{% endif %} | |
</opf:metadata> | |
<opf:manifest> | |
<opf:item id="ncxtoc" media-type="application/x-dtbncx+xml" href="toc.ncx"/> | |
{% for item in book.getAllItems %} | |
<opf:item id="{{item.id}}" media-type="{{item.mimeType}}" href="{{item.destPath}}"/> | |
{% endfor %} | |
</opf:manifest> | |
<opf:spine toc="ncxtoc"> | |
{% for sitem in book.getSpine %} | |
<opf:itemref idref="{{sitem.1.id}}" linear="{{sitem.2|yesno}}"/> | |
{% endfor %} | |
</opf:spine> | |
{% if book.guide %} | |
<opf:guide> | |
{% for gitem in book.getGuide %} | |
<opf:reference href="{{gitem.0}}" type="{{gitem.2}}" title="{{gitem.1}}"/> | |
{% endfor %} | |
</opf:guide> | |
{% endif %} | |
</opf:package> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment