Created
October 18, 2023 14:12
-
-
Save felixmon/92c52d868e8c956175b9ffcc1e273288 to your computer and use it in GitHub Desktop.
Use PDFTK and Convert to convert specified pages from a pdf file into images
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import subprocess | |
def extract_pages_from_pdf(source_file, page_range, output_file): | |
try: | |
subprocess.run(['pdftk', source_file, 'cat', page_range, 'output', output_file], check=True, capture_output=True, text=True) | |
except subprocess.CalledProcessError as e: | |
print(f"Error during 'pdftk' execution:\n{e.stderr}") | |
exit(1) | |
def convert_pdf_to_images(pdf_file, output_prefix): | |
try: | |
# must input the full path of the command covert because there's another 'convert' in the system directory. | |
subprocess.run(['[absolute path of convert.exe]', '-density','300', pdf_file, f"{output_prefix}_%03d.png"], check=True, capture_output=True, text=True) | |
except subprocess.CalledProcessError as e: | |
print(f"Error during 'convert' execution:\n{e.stderr}") | |
exit(1) | |
def main(): | |
# Prompt user for inputs | |
input_pdf_filename = input("Enter the filename of the input PDF (in the current directory): ").strip() | |
pages_to_extract = input("Enter the range of pages to extract (e.g., 1-3, 5, 7-9): ").strip() | |
# Determine the extracted PDF filename and image prefix | |
base_filename, _ = os.path.splitext(input_pdf_filename) | |
extracted_pdf_filename = f"{base_filename}_extracted.pdf" | |
output_image_prefix = f"{base_filename}_image" | |
# Extract pages from the source PDF | |
extract_pages_from_pdf(input_pdf_filename, pages_to_extract, extracted_pdf_filename) | |
# Convert the extracted pages to images | |
convert_pdf_to_images(extracted_pdf_filename, output_image_prefix) | |
print("Done!") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment