Skip to content

Instantly share code, notes, and snippets.

@jrwrigh
Last active December 11, 2024 17:38
Show Gist options
  • Save jrwrigh/563e27dfc6cfdfa793a27733a75b846a to your computer and use it in GitHub Desktop.
Save jrwrigh/563e27dfc6cfdfa793a27733a75b846a to your computer and use it in GitHub Desktop.
Pdfmerger updated for python3 usage
#! /usr/bin/env python
# Original author Nicholas Kim, modified by Yan Pashkovsky
# New license - GPL v3
import sys
import time
from PyPDF2 import PdfReader, PdfWriter
try:
from PyPDF2.utils import PdfReadError
except ImportError:
from PyPDF2._reader import PdfReadError
def eprint(*args, **kwargs):
"""Print to stderr
Taken from https://stackoverflow.com/a/14981125/7564988
"""
print(*args, file=sys.stderr, **kwargs)
def get_cmdline_arguments():
"""Retrieve command line arguments."""
from optparse import OptionParser
usage_string = "%prog [-o output_name] file1, file2 [, ...]"
parser = OptionParser(usage_string)
parser.add_option(
"-o", "--output",
dest="output_filename",
default=time.strftime("output_%Y%m%d_%H%M%S"),
help="specify output filename (exclude .pdf extension); default is current date/time stamp"
)
options, args = parser.parse_args()
if len(args) < 2:
parser.print_help()
sys.exit(1)
return options, args
def main():
options, filenames = get_cmdline_arguments()
output_pdf_name = options.output_filename + ".pdf"
files_to_merge = []
# get PDF files
for f in filenames:
try:
next_pdf_file = PdfReader(open(f, "rb"))
except(PdfReadError):
eprint("%s is not a valid PDF file." % f)
sys.exit(1)
except(IOError):
eprint("%s could not be found." % f)
sys.exit(1)
else:
files_to_merge.append(next_pdf_file)
# merge page by page
output_pdf_stream = PdfWriter()
j=0
k=0
for f in files_to_merge:
for i in range(len(f.pages)):
output_pdf_stream.add_page(f.pages[i])
if i==0:
output_pdf_stream.add_outline_item(str(filenames[k]),j)
j = j + 1
k += 1
# create output pdf file
try:
output_pdf_file = open(output_pdf_name, "wb")
output_pdf_stream.write(output_pdf_file)
finally:
output_pdf_file.close()
print("%s successfully created." % output_pdf_name)
if __name__ == "__main__":
main()
@jrwrigh
Copy link
Author

jrwrigh commented Dec 11, 2024

Latest revision works with PyPDF2 version 3.0.1.

NOTE PyPDF2 is deprecated in favor of pypdf, which actually gets regular updates. This still works for now, so I'm not going to fix what isn't broken.

@jrwrigh
Copy link
Author

jrwrigh commented Dec 11, 2024

Compatibility with mainline pypdf is as simple as replacing PyPDF2 with pypdf in the import statements; everything else should work.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment