Created
September 1, 2021 17:18
-
-
Save ArseniyShestakov/8d9d3566115c3c723de9526af87d59d2 to your computer and use it in GitHub Desktop.
Convert a book with glued pages into singular pages using PyPDF2 and MuPDF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script process PDF documents with inconsistent structure like this: | |
# | |
# [ PAGE 1 ] - 595x840 | |
# [ PAGE 2 ] - 595x840 | |
# [ PAGE 3 | PAGE 4 ] - 1191x840 | |
# [ PAGE 5 ] - 595x840 | |
# [ PAGE 6 | PAGE 7 ] - 1191x840 | |
# [ PAGE 8 ] - 595x840 | |
# | |
# I haven't find an easy way to trim and split single page using Python so used MuPDF instead | |
# | |
# Install MuPDF using preferable package manager: | |
# apt install mupdf | |
# brew install mupdf | |
from PyPDF2 import PdfFileReader, PdfFileWriter | |
import subprocess | |
import sys | |
import os | |
inputpdf = PdfFileReader(open("input.pdf", "rb")) | |
outputpdf = PdfFileWriter() | |
for i in range(inputpdf.numPages): | |
page = inputpdf.getPage(i) | |
print(page.mediaBox) | |
pageSizeX = page.mediaBox[2] | |
if pageSizeX > 600: | |
with open("tmp.pdf", "wb") as outputStream: | |
output = PdfFileWriter() | |
output.addPage(inputpdf.getPage(i)) | |
output.write(outputStream) | |
# mutool poster -x 2 -y 1 input.pdf output.pdf | |
result = subprocess.run( | |
["mutool", "poster", "-x", '2', '-y', '1', 'tmp.pdf', 'output.pdf'], capture_output=True, text=True | |
) | |
tmp = PdfFileReader(open("output.pdf", "rb")) | |
for n in range(tmp.numPages): | |
outputpdf.addPage(tmp.getPage(n)) | |
os.remove("tmp.pdf") | |
os.remove("output.pdf") | |
else: | |
outputpdf.addPage(inputpdf.getPage(i)) | |
with open("result.pdf", "wb") as outputStream: | |
outputpdf.write(outputStream) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment