avinassh · October 27, 2015 05:38 · CreativeMK · Aug 7, 2018
diff --git a/container.xml b/container.xml
 <?xml version='1.0' encoding='UTF-8'?>
 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
  <rootfiles>
    <rootfile media-type="application/oebps-package+xml" full-path="content.opf"/>
  </rootfiles>
 </container>
diff --git a/content.opf b/content.opf
 <?xml version='1.0' encoding='UTF-8'?>
 <package xmlns="http://www.idpf.org/2007/opf" prefix="rendition: http://www.ipdf.org/vocab/rendition/#" unique-identifier="uuid_id" version="3.0">
  <metadata xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/">
    <meta property="dcterms:modified"/>
    <meta name="cover" content="cover"/>
    <dc:title/>
    <dc:creator/>
    <dc:date/>
    <dc:identifier id="uuid_id" opf:scheme="uuid"/>
    <dc:language/>
  </metadata>
  <manifest>
    <item href="stylesheet.css" id="stylesheet" media-type="text/css"/>
    <item href="toc.ncx" id="toc" media-type="application/x-dtbncx+xml"/>
    <item href="cover.png" id="cover" media-type="image/png" properties="cover-image"/>
  </manifest>
  <spine toc="toc"/>
 </package>
diff --git a/epub.py b/epub.py
 #!/usr/bin/env python3

 """
 Create Epub files.

 This code was designed to provide a very simple and straight-forward API for
 creating epub files, by sacrificing most of the versatility of the format.

 Example usage:

 >>> book = Book(title='Example Book', author='John Doe')
 >>> with open('cover.png', 'br') as file:
 >>>     book.add_cover(file.read())
 >>> with open('style.css') as file:
 >>>     book.add_stylesheet(file.read())
 >>> book.add_page(title='First Page', content='some text')
 >>> chapter = book.add_page(title='First Chapter', content='more text')
 >>> book.add_page(
 >>>     title='Sub-Page 1',
 >>>     content='first subpage of the chapter',
 >>>     parent=chapter)
 >>> with open('image.jpg', 'br') as file:
 >>>     book.add_image('image.jpg', file.read())
 >>> book.save('example.epub')

 """

 ###############################################################################
 # Module Imports
 ###############################################################################

 import arrow
 import collections
 import itertools
 import logging
 import lxml.etree
 import lxml.html
 import pathlib
 import pkgutil
 import tempfile
 import uuid
 import zipfile

 ###############################################################################

 log = logging.getLogger(__name__)

 ###############################################################################


 class ETreeWrapper:
    """Convinience wrapper around xml trees."""

    def __init__(self, *args, namespaces, **kwargs):
        self.tree = lxml.etree.ElementTree(*args, **kwargs)
        self.namespaces = namespaces

    def __call__(self, tag='*', **kwargs):
        path = './/{}'.format(tag)
        for key, value in kwargs.items():
            path += '[@{}="{}"]'.format(key, value)
        return self.tree.find(path, namespaces=self.namespaces)

    def __getattr__(self, name):
        return getattr(self.tree, name)

    def write(self, path):
        self.tree.write(str(path), xml_declaration=True,
                        encoding='UTF-8', pretty_print=True)


 def template(name):
    """Get file template."""
    with open(name) as file:
        template = file.read()
    return ETreeWrapper(
        lxml.etree.fromstring(
            template,
            lxml.etree.XMLParser(remove_blank_text=True)),
        namespaces=dict(
            opf='http://www.idpf.org/2007/opf',
            dc='http://purl.org/dc/elements/1.1/',
            xhtml='http://www.w3.org/1999/xhtml',
            ncx='http://www.daisy.org/z3986/2005/ncx/'))


 def flatten(tree):
    for item in tree:
        yield item
        yield from flatten(item.children)

 ###############################################################################

 Page = collections.namedtuple('Page', 'uid title children')
 Image = collections.namedtuple('Image', 'name type')


 class Book:
    """Wrapper around a epub archive."""

    def __init__(self, **kwargs):
        self.tempdir = tempfile.TemporaryDirectory()
        self.root = []
        self.images = []
        self.uid_generator = map('{:04}'.format, itertools.count(1))

        self.path = pathlib.Path(self.tempdir.name).resolve()
        (self.path / 'pages').mkdir()
        (self.path / 'images').mkdir()

        self.title = kwargs.get('title', 'Untitled')
        self.language = kwargs.get('language', 'en')
        self.author = kwargs.get('author', 'Unknown Author')

    def add_page(self, title, content, parent=None):
        """Add a new page/chapter to the root of the book."""
        log.info('New page: {}'.format(title))

        page = Page(next(self.uid_generator), title, [])
        self.root.append(page) if not parent else parent.children.append(page)

        file = template('page.xhtml')
        file('xhtml:title').text = title
        file('xhtml:body').append(lxml.html.fromstring(content))
        file.write(self.path / 'pages' / (page.uid + '.xhtml'))
        return page

    def add_image(self, name, data):
        log.info('New image: {}'.format(name))
        if name.endswith('.jpg'):
            media_type = 'image/jpeg'
        if name.endswith('.png'):
            media_type = 'image/png'
        self.images.append(Image(name, media_type))
        with open(str(self.path / 'images' / name), 'wb') as file:
            file.write(data)

    def add_cover(self, data):
        with open(str(self.path / 'cover.png'), 'wb') as file:
            file.write(data)

    def add_stylesheet(self, data):
        with open(str(self.path / 'stylesheet.css'), 'w') as file:
            file.write(data)

    def save(self, filename):
        self._write_spine()
        self._write_container()
        self._write_toc()
        with open(str(self.path / 'mimetype'), 'w') as file:
            file.write('application/epub+zip')
        with zipfile.ZipFile(filename, 'w') as archive:
            archive.write(
                str(self.path / 'mimetype'), 'mimetype',
                compress_type=zipfile.ZIP_STORED)
            for file in self.path.rglob('*.*'):
                archive.write(
                    str(file), str(file.relative_to(self.path)),
                    compress_type=zipfile.ZIP_DEFLATED)
        log.info('Book saved: {}'.format(self.title))

    def _write_spine(self):
        spine = template('content.opf')
        now = arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss')
        spine(property='dcterms:modified').text = now
        spine('dc:date').text = now
        spine('dc:title').text = self.title
        spine('dc:creator').text = self.author
        spine('dc:language').text = self.language
        spine(id='uuid_id').text = str(uuid.uuid4())

        for page in flatten(self.root):
            lxml.etree.SubElement(
                spine('opf:manifest'), 'item',
                href='pages/{}.xhtml'.format(page.uid), id=page.uid,
                **{'media-type': 'application/xhtml+xml'})
            lxml.etree.SubElement(
                spine('opf:spine'), 'itemref', idref=page.uid)

        for uid, image in enumerate(self.images):
            lxml.etree.SubElement(
                spine('opf:manifest'),
                'item',
                href='images/' + image.name,
                id='img{:03}'.format(uid + 1),
                **{'media-type': image.type})

        spine.write(self.path / 'content.opf')

    def _write_container(self):
        container = template('container.xml')
        meta_inf = self.path / 'META-INF'
        meta_inf.mkdir()
        container.write(meta_inf / 'container.xml')

    def _write_toc(self):
        toc = template('toc.ncx')
        toc('ncx:text').text = self.title
        for page in self.root:
            self._page_to_toc(page, toc('ncx:navMap'))
        toc.write(self.path / 'toc.ncx')

    def _page_to_toc(self, page, node):
        navpoint = lxml.etree.SubElement(
            node, 'navPoint', id=page.uid, playOrder=page.uid.lstrip('0'))
        navlabel = lxml.etree.SubElement(navpoint, 'navLabel')
        lxml.etree.SubElement(navlabel, 'text').text = page.title
        lxml.etree.SubElement(
            navpoint, 'content', src='pages/{}.xhtml'.format(page.uid))
        for child in page.children:
            self._page_to_toc(child, navpoint)
diff --git a/page.xhtml b/page.xhtml
 <?xml version='1.0' encoding='UTF-8'?>
 <!DOCTYPE html>
 <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" epub:prefix="z3998: http://www.daisy.org/z3998/2012/vocab/structure/#" lang="en" xml:lang="en">
  <head>
    <title/>
    <link href="../stylesheet.css" rel="stylesheet" type="text/css"/>
  </head>
  <body/>
 </html>
diff --git a/toc.ncx b/toc.ncx
 <?xml version='1.0' encoding='UTF-8'?>
 <ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
  <head>
    <meta content="" name="dtb:uid"/>
    <meta content="0" name="dtb:depth"/>
    <meta content="0" name="dtb:totalPageCount"/>
    <meta content="0" name="dtb:maxPageNumber"/>
  </head>
  <docTitle>
    <text/>
  </docTitle>
  <navMap/>
 </ncx>
	<?xml version='1.0' encoding='UTF-8'?>
	<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
	<rootfiles>
	<rootfile media-type="application/oebps-package+xml" full-path="content.opf"/>
	</rootfiles>
	</container>
	<?xml version='1.0' encoding='UTF-8'?>
	<package xmlns="http://www.idpf.org/2007/opf" prefix="rendition: http://www.ipdf.org/vocab/rendition/#" unique-identifier="uuid_id" version="3.0">
	<metadata xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/">
	<meta property="dcterms:modified"/>
	<meta name="cover" content="cover"/>
	<dc:title/>
	<dc:creator/>
	<dc:date/>
	<dc:identifier id="uuid_id" opf:scheme="uuid"/>
	<dc:language/>
	</metadata>
	<manifest>
	<item href="stylesheet.css" id="stylesheet" media-type="text/css"/>
	<item href="toc.ncx" id="toc" media-type="application/x-dtbncx+xml"/>
	<item href="cover.png" id="cover" media-type="image/png" properties="cover-image"/>
	</manifest>
	<spine toc="toc"/>
	</package>
	#!/usr/bin/env python3

	"""
	Create Epub files.

	This code was designed to provide a very simple and straight-forward API for
	creating epub files, by sacrificing most of the versatility of the format.

	Example usage:

	>>> book = Book(title='Example Book', author='John Doe')
	>>> with open('cover.png', 'br') as file:
	>>> book.add_cover(file.read())
	>>> with open('style.css') as file:
	>>> book.add_stylesheet(file.read())
	>>> book.add_page(title='First Page', content='some text')
	>>> chapter = book.add_page(title='First Chapter', content='more text')
	>>> book.add_page(
	>>> title='Sub-Page 1',
	>>> content='first subpage of the chapter',
	>>> parent=chapter)
	>>> with open('image.jpg', 'br') as file:
	>>> book.add_image('image.jpg', file.read())
	>>> book.save('example.epub')

	"""

	###############################################################################
	# Module Imports
	###############################################################################

	import arrow
	import collections
	import itertools
	import logging
	import lxml.etree
	import lxml.html
	import pathlib
	import pkgutil
	import tempfile
	import uuid
	import zipfile

	###############################################################################

	log = logging.getLogger(__name__)

	###############################################################################


	class ETreeWrapper:
	"""Convinience wrapper around xml trees."""

	def __init__(self, args, namespaces, *kwargs):
	self.tree = lxml.etree.ElementTree(args, *kwargs)
	self.namespaces = namespaces

	def __call__(self, tag='', *kwargs):
	path = './/{}'.format(tag)
	for key, value in kwargs.items():
	path += '[@{}="{}"]'.format(key, value)
	return self.tree.find(path, namespaces=self.namespaces)

	def __getattr__(self, name):
	return getattr(self.tree, name)

	def write(self, path):
	self.tree.write(str(path), xml_declaration=True,
	encoding='UTF-8', pretty_print=True)


	def template(name):
	"""Get file template."""
	with open(name) as file:
	template = file.read()
	return ETreeWrapper(
	lxml.etree.fromstring(
	template,
	lxml.etree.XMLParser(remove_blank_text=True)),
	namespaces=dict(
	opf='http://www.idpf.org/2007/opf',
	dc='http://purl.org/dc/elements/1.1/',
	xhtml='http://www.w3.org/1999/xhtml',
	ncx='http://www.daisy.org/z3986/2005/ncx/'))


	def flatten(tree):
	for item in tree:
	yield item
	yield from flatten(item.children)

	###############################################################################

	Page = collections.namedtuple('Page', 'uid title children')
	Image = collections.namedtuple('Image', 'name type')


	class Book:
	"""Wrapper around a epub archive."""

	def __init__(self, **kwargs):
	self.tempdir = tempfile.TemporaryDirectory()
	self.root = []
	self.images = []
	self.uid_generator = map('{:04}'.format, itertools.count(1))

	self.path = pathlib.Path(self.tempdir.name).resolve()
	(self.path / 'pages').mkdir()
	(self.path / 'images').mkdir()

	self.title = kwargs.get('title', 'Untitled')
	self.language = kwargs.get('language', 'en')
	self.author = kwargs.get('author', 'Unknown Author')

	def add_page(self, title, content, parent=None):
	"""Add a new page/chapter to the root of the book."""
	log.info('New page: {}'.format(title))

	page = Page(next(self.uid_generator), title, [])
	self.root.append(page) if not parent else parent.children.append(page)

	file = template('page.xhtml')
	file('xhtml:title').text = title
	file('xhtml:body').append(lxml.html.fromstring(content))
	file.write(self.path / 'pages' / (page.uid + '.xhtml'))
	return page

	def add_image(self, name, data):
	log.info('New image: {}'.format(name))
	if name.endswith('.jpg'):
	media_type = 'image/jpeg'
	if name.endswith('.png'):
	media_type = 'image/png'
	self.images.append(Image(name, media_type))
	with open(str(self.path / 'images' / name), 'wb') as file:
	file.write(data)

	def add_cover(self, data):
	with open(str(self.path / 'cover.png'), 'wb') as file:
	file.write(data)

	def add_stylesheet(self, data):
	with open(str(self.path / 'stylesheet.css'), 'w') as file:
	file.write(data)

	def save(self, filename):
	self._write_spine()
	self._write_container()
	self._write_toc()
	with open(str(self.path / 'mimetype'), 'w') as file:
	file.write('application/epub+zip')
	with zipfile.ZipFile(filename, 'w') as archive:
	archive.write(
	str(self.path / 'mimetype'), 'mimetype',
	compress_type=zipfile.ZIP_STORED)
	for file in self.path.rglob('.'):
	archive.write(
	str(file), str(file.relative_to(self.path)),
	compress_type=zipfile.ZIP_DEFLATED)
	log.info('Book saved: {}'.format(self.title))

	def _write_spine(self):
	spine = template('content.opf')
	now = arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss')
	spine(property='dcterms:modified').text = now
	spine('dc:date').text = now
	spine('dc:title').text = self.title
	spine('dc:creator').text = self.author
	spine('dc:language').text = self.language
	spine(id='uuid_id').text = str(uuid.uuid4())

	for page in flatten(self.root):
	lxml.etree.SubElement(
	spine('opf:manifest'), 'item',
	href='pages/{}.xhtml'.format(page.uid), id=page.uid,
	**{'media-type': 'application/xhtml+xml'})
	lxml.etree.SubElement(
	spine('opf:spine'), 'itemref', idref=page.uid)

	for uid, image in enumerate(self.images):
	lxml.etree.SubElement(
	spine('opf:manifest'),
	'item',
	href='images/' + image.name,
	id='img{:03}'.format(uid + 1),
	**{'media-type': image.type})

	spine.write(self.path / 'content.opf')

	def _write_container(self):
	container = template('container.xml')
	meta_inf = self.path / 'META-INF'
	meta_inf.mkdir()
	container.write(meta_inf / 'container.xml')

	def _write_toc(self):
	toc = template('toc.ncx')
	toc('ncx:text').text = self.title
	for page in self.root:
	self._page_to_toc(page, toc('ncx:navMap'))
	toc.write(self.path / 'toc.ncx')

	def _page_to_toc(self, page, node):
	navpoint = lxml.etree.SubElement(
	node, 'navPoint', id=page.uid, playOrder=page.uid.lstrip('0'))
	navlabel = lxml.etree.SubElement(navpoint, 'navLabel')
	lxml.etree.SubElement(navlabel, 'text').text = page.title
	lxml.etree.SubElement(
	navpoint, 'content', src='pages/{}.xhtml'.format(page.uid))
	for child in page.children:
	self._page_to_toc(child, navpoint)
	<?xml version='1.0' encoding='UTF-8'?>
	<!DOCTYPE html>
	<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" epub:prefix="z3998: http://www.daisy.org/z3998/2012/vocab/structure/#" lang="en" xml:lang="en">
	<head>
	<title/>
	<link href="../stylesheet.css" rel="stylesheet" type="text/css"/>
	</head>
	<body/>
	</html>
	<?xml version='1.0' encoding='UTF-8'?>
	<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
	<head>
	<meta content="" name="dtb:uid"/>
	<meta content="0" name="dtb:depth"/>
	<meta content="0" name="dtb:totalPageCount"/>
	<meta content="0" name="dtb:maxPageNumber"/>
	</head>
	<docTitle>
	<text/>
	</docTitle>
	<navMap/>
	</ncx>