-
-
Save serrasqueiro/67c1d8fe0ae6b9626481cc46722c27c0 to your computer and use it in GitHub Desktop.
Decrypt password-protected PDF in Python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Decrypt password-protected PDF in Python. | |
# cleaned-up version of http://stackoverflow.com/a/26537710/329263 | |
# | |
# Requirements: | |
# pip install PyPDF2 | |
# | |
# Usage: decrypt_pdf('encrypted.pdf', 'decrypted.pdf', 'secret_password') | |
from PyPDF2 import PdfFileReader, PdfFileWriter | |
def decrypt_pdf(input_path, output_path, password): | |
with open(input_path, 'rb') as input_file, \ | |
open(output_path, 'wb') as output_file: | |
reader = PdfFileReader(input_file) | |
reader.decrypt(password) | |
writer = PdfFileWriter() | |
for i in range(reader.getNumPages()): | |
writer.addPage(reader.getPage(i)) | |
writer.write(output_file) | |
if __name__ == '__main__': | |
# example usage: | |
decrypt_pdf('encrypted.pdf', 'decrypted.pdf', 'secret_password') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# pylint: disable=missing-function-docstring | |
""" Convert PDF with password to a standard PDF | |
""" | |
import sys | |
import os.path | |
from PyPDF2 import PdfReader, PdfWriter | |
def usage(): | |
myfile = os.path.realpath(__file__) | |
print(f"""{myfile} [-p|--pass XYZ] file1 [file2] [...] | |
Decrypts PDF inputs (file1, ...) using password XYZ, | |
and writes into file which is the name, with a suffix: | |
_decrypt.pdf | |
Note that this is a simplification, the password should be stored somewhere safe! | |
""") | |
return 0 | |
def main(): | |
""" Process file(s). | |
""" | |
do_script(sys.argv[1:]) | |
def do_script(args): | |
param = args | |
a_pass = "" | |
while param and param[0].startswith("-"): | |
if param[0].startswith(("-h", "--help")): | |
return usage() | |
if param[0].startswith(("-p", "--pass")): | |
a_pass = param[1] | |
del param[:2] | |
continue | |
print("Bogus option:", param[0]) | |
return usage() | |
run_script( | |
param, | |
(a_pass,), | |
) | |
return 0 | |
def run_script(param, opt_tup): | |
a_pass = opt_tup[0] | |
for fname in param: | |
base = os.path.splitext(fname)[0] | |
if "decrypt" in base.lower(): | |
print("Skipped:", fname) | |
continue | |
outname = base + "_decrypt.pdf" | |
print("Input:", base) | |
n_pages = decrypt_pdf(fname, outname, a_pass) | |
if a_pass: | |
print(f"Decrypted to: {outname}, {n_pages} page(s)") | |
return 0 | |
def decrypt_pdf(input_path, output_path, password=""): | |
""" Reads PDF that may have a password, | |
writes another PDF without password (decrypted), | |
and returns the number of pages processed. | |
""" | |
assert isinstance(password, str), "Password is a string" | |
idx = -1 | |
with open(input_path, 'rb') as input_file: | |
reader = PdfReader(input_file) | |
if password: | |
reader.decrypt(password) | |
writer = PdfWriter() | |
for idx, a_page in enumerate(reader.pages): | |
writer.add_page(a_page) | |
if password: | |
with open(output_path, 'wb') as fdout: | |
#print("# Overwritten:", output_path) | |
writer.write(fdout) | |
return idx + 1 | |
return idx | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PyPDF2>=3.1.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment