Skip to content

Instantly share code, notes, and snippets.

@catwhocode
Created October 31, 2024 03:27
Show Gist options
  • Save catwhocode/d7f7016000b371551fd9d1879d2691c0 to your computer and use it in GitHub Desktop.
Save catwhocode/d7f7016000b371551fd9d1879d2691c0 to your computer and use it in GitHub Desktop.
# Install package first:
# pip install PyMuPDF Pillow
import fitz
import io
from PIL import Image
file = "source.pdf"
pdf_file = fitz.open(file)
for page_index in range(len(pdf_file)):
page = pdf_file.load_page(page_index) # load the page
image_list = page.get_images(full=True) # get images on the page
if image_list:
print(f"[+] {len(image_list)} images on page {page_index}")
else:
print("[!] No image on page", page_index)
for image_index, img in enumerate(image_list, start=1):
xref = img[0]
base_image = pdf_file.extract_image(xref)
image_bytes = base_image["image"]
image_ext = base_image["ext"]
image_name = f"image{page_index+1}_{image_index}.{image_ext}"
with open(image_name, "wb") as image_file:
image_file.write(image_bytes)
print(f"[+] saved as {image_name}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment