Created
November 16, 2016 17:54
-
-
Save moshekaplan/28cf83ba6e5c574d16eae5a5c27e00f4 to your computer and use it in GitHub Desktop.
PyPDF2 attempt at decryption
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PyPDF2 attempt at decryption | |
Modifications to file: pdf.py | |
References: | |
http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf | |
https://github.com/qpdf/qpdf/blob/master/libqpdf/QPDF_encryption.cc#L400 | |
http://security.stackexchange.com/questions/95781/what-security-scheme-is-used-by-pdf-password-encryption-and-why-is-it-so-weak | |
def decode_permissions(self, permissions_code): | |
# Takes the permissions as an integer, returns the allowed access | |
permissions = {} | |
permissions['print'] = permissions_code & (1 << 3-1) != 0 # bit 3 | |
permissions['modify'] = permissions_code & (1 << 4-1) != 0 # bit 4 | |
permissions['copy'] = permissions_code & (1 << 5-1) != 0 # bit 5 | |
permissions['annotations'] = permissions_code & (1 << 6-1) != 0 # bit 6 | |
permissions['forms'] = permissions_code & (1 << 9-1) != 0 # bit 9 | |
permissions['accessability'] = permissions_code & (1 << 10-1) != 0 # bit 10 | |
permissions['assemble'] = permissions_code & (1 << 11-1) != 0 # bit 11 | |
permissions['print_high_quality'] = permissions_code & (1 << 12-1) != 0 # bit 12 | |
return permissions | |
# As per Algorithm 3.2 of the PDF Spec v1.7 | |
def compute_encryption_key(self, password): | |
# Step 1a: Truncate the password to 32 bytes | |
password = password[:32] | |
# Step 1b: Pad the password | |
password_padding = "28BF4E5E4E758A4164004E56FFFA01082E2E00B6D0683E802F0CA9FE6453697A".decode('hex') | |
password = password + password_padding[:32-len(password)] | |
# Step 2: Initialize the MD5 and pass the 32-byte password | |
import hashlib | |
md5 = hashlib.md5(password) | |
# Step 3: Take the output from the previous MD5 and pass it as input to a new MD5 hash | |
for i in range(50): | |
md5 = hashlib.md5(md5.digest()) | |
# As per Algorithm 3.3 of the PDF Spec v1.7 | |
def compute_owner_password(self, owner_password): | |
pass | |
def _decrypt_v4(self, password=""): | |
# Decrypts data as per Section 3.5 (page 117) of PDF spec v1.7 | |
# "The security handler defines the use of encryption and decryption in | |
# the document, using the rules specified by the CF, StmF, and StrF entries" | |
encrypt = self.trailer['/Encrypt'].getObject() | |
# /Encrypt Keys: | |
# Filter (name) : "name of the preferred security handler " | |
# V (number) : Algorithm Code | |
# Length (integer): Length of encryption key, in bits | |
# CF (dictionary) : Crypt filter | |
# StmF (name) : Name of the crypt filter that is used by default when decrypting streams | |
# StrF (name) : The name of the crypt filter that is used when decrypting all strings in the document | |
# R (number) : Standard security handler revision number | |
# U (string) : A 32-byte string, based on the user password | |
# P (integer) : Permissions allowed with user access | |
# "If revision 4 is specified, the standard security handler supports crypt filters (see | |
# Section 3.5.4, “Crypt Filters”). The support is limited to the Identity crypt filter | |
# (see Table 3.23) and crypt filters named StdCF whose dictionaries contain a CFM | |
# value of V2 or AESV2 and an AuthEvent value of DocOpen." | |
# Crypt Filter (/CF) keys: | |
CF = encrypt['/CF'] | |
# Validate the Filter value | |
# "Standard" is the name of the built-in password-based security handler. | |
for k in encrypt.keys(): | |
print k, ":", encrypt[k] | |
pass | |
sys.exit(0) | |
def _decrypt(self, password=""): | |
encrypt = self.trailer['/Encrypt'].getObject() | |
if encrypt['/Filter'] != '/Standard': | |
raise NotImplementedError("only Standard PDF encryption handler is available") | |
if not (encrypt['/V'] in (1, 2)): | |
self._decrypt_v4(password) | |
raise NotImplementedError("only algorithm code 1 and 2 are supported. This uses %s" % encrypt['/V']) | |
user_password, key = self._authenticateUserPassword(password) | |
if user_password: | |
self._decryption_key = key | |
return 1 | |
else: | |
rev = encrypt['/R'].getObject() | |
if rev == 2: | |
keylen = 5 | |
else: | |
keylen = encrypt['/Length'].getObject() // 8 | |
key = _alg33_1(password, rev, keylen) | |
real_O = encrypt["/O"].getObject() | |
if rev == 2: | |
userpass = utils.RC4_encrypt(key, real_O) | |
else: | |
val = real_O | |
for i in range(19, -1, -1): | |
new_key = b_('') | |
for l in range(len(key)): | |
new_key += b_(chr(utils.ord_(key[l]) ^ i)) | |
val = utils.RC4_encrypt(new_key, val) | |
userpass = val | |
owner_password, key = self._authenticateUserPassword(userpass) | |
if owner_password: | |
self._decryption_key = key | |
return 2 | |
return 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment