Skip to content

Instantly share code, notes, and snippets.

@kharmaodo
Forked from jeetsukumaran/extract-pdf-pages.py
Created February 19, 2014 11:00
Show Gist options
  • Save kharmaodo/9089882 to your computer and use it in GitHub Desktop.
Save kharmaodo/9089882 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python
###############################################################################
##
## Copyright 2012 Jeet Sukumaran.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program. If not, see <http://www.gnu.org/licenses/>.
##
###############################################################################
"""
Extract specified pages from source PDF.
"""
import sys
import os
import argparse
import pyPdf
__prog__ = os.path.basename(__file__)
__version__ = "1.0.0"
__description__ = __doc__
__author__ = 'Jeet Sukumaran'
__copyright__ = 'Copyright (C) 2012 Jeet Sukumaran.'
def main():
"""
Main CLI handler.
"""
parser = argparse.ArgumentParser(description=__description__)
parser.add_argument("--version", action="version", version="%(prog)s " + __version__)
parser.add_argument("src_pdf",
metavar="SOURCE-PDF",
type=argparse.FileType('rb'),
help="path to input pdf file")
parser.add_argument("first_page",
metavar="FIRST-PAGE",
type=int,
help="number of first page (1-based index: first page is '1')")
parser.add_argument("last_page",
metavar="LAST-PAGE",
type=str,
help="number of last page; if preceded by '+' (e.g., '+30'), specifies number of pages following first page to extract")
parser.add_argument("-o", "--output-filepath",
type=str,
default=None,
help="path to output file (if not given, will write to standard output)")
args = parser.parse_args()
first_page = args.first_page - 1
if args.last_page.startswith("+"):
last_page = args.last_page[1:].replace(" ", "")
if not last_page:
sys.exit("Need to specify number of pages")
last_page = first_page + int(last_page)
else:
last_page = int(args.last_page) - 1
pdf_in = pyPdf.PdfFileReader(args.src_pdf)
pdf_out = pyPdf.PdfFileWriter()
for pg_num in range(first_page, last_page + 1):
pdf_out.addPage(pdf_in.getPage(pg_num))
if args.output_filepath:
out_stream = open(os.path.expandvars(os.path.expanduser(args.output_filepath)), "wb")
else:
out_stream = sys.stdout
pdf_out.write(out_stream)
out_stream.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment