mermelstein · May 3, 2024 00:46
diff --git a/text_to_pdf.py b/text_to_pdf.py
 from PIL import Image
 import pytesseract
 from pdf2image import convert_from_path

 # Convert the PDF to a list of images
 images = convert_from_path('path_to_pdf.pdf')

 # Process each image with Tesseract
 for i, img in enumerate(images):
    text = pytesseract.image_to_string(img, lang='eng')
    with open(f'page_{i+1}.txt', 'w') as f:
        f.write(text)
	from PIL import Image
	import pytesseract
	from pdf2image import convert_from_path

	# Convert the PDF to a list of images
	images = convert_from_path('path_to_pdf.pdf')

	# Process each image with Tesseract
	for i, img in enumerate(images):
	text = pytesseract.image_to_string(img, lang='eng')
	with open(f'page_{i+1}.txt', 'w') as f:
	f.write(text)