Created
September 10, 2018 03:32
-
-
Save jiankaiwang/395b82da9a47ddc74aefbcd90a57303e to your computer and use it in GitHub Desktop.
secure the pdf file with a password in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue May 8 21:13:47 2018 | |
@author: JianKai Wang | |
""" | |
import PyPDF2 | |
import os | |
import sys | |
import codecs | |
basedir = os.path.join('.') | |
rawdir = os.path.join(basedir, 'raw') | |
passdir = os.path.join(basedir, 'password') | |
mapfile = os.path.join(basedir,'map.csv') | |
if not os.path.isfile(mapfile): | |
print('Error: No mapping file.') | |
sys.exit(0) | |
if not os.path.isdir(passdir): | |
os.mkdir(passdir) | |
allPDFFile = next(os.walk(rawdir))[2] | |
def extractPwd(email): | |
return email.split('@')[0] | |
def checkPDFExistAndAddToDict(pdfname, email): | |
global rawdir | |
pdfname = pdfname + '.pdf' | |
if os.path.isfile(os.path.join(rawdir, pdfname)): | |
mapDict[pdfname] = extractPwd(email) | |
else: | |
print('Error: Lose pdf file {} and the corresponding email is {}.'\ | |
.format(pdfname, email)) | |
mapDict = {} | |
with codecs.open(mapfile, 'r', 'utf-8') as fin: | |
for line in fin: | |
tmpList = line.strip().split(',') | |
if tmpList[1] == 'no': | |
# header | |
continue | |
if len(list(mapDict.keys())) < 1: | |
checkPDFExistAndAddToDict(tmpList[1], tmpList[0]) | |
continue | |
if len(list(mapDict.keys())) > 0 and tmpList[1] in list(mapDict.keys()) > -1: | |
print('Error: There is the same no.') | |
continue | |
checkPDFExistAndAddToDict(tmpList[1], tmpList[0]) | |
if len(allPDFFile) != len(list(mapDict.keys())): | |
print('Warning: The amount of pdf files is not equal to the map file.') | |
print('Warning: Total PDF file is {}, and total listed email is {}.'.\ | |
format(len(allPDFFile), len(list(mapDict.keys())))) | |
for k in list(mapDict.keys()): | |
input_file = os.path.join(rawdir, k) | |
path, filename = os.path.split(input_file) | |
output_file = os.path.join(passdir, "temp_" + filename) | |
new_output_file = os.path.join(passdir, filename) | |
if os.path.isfile(new_output_file): | |
os.remove(new_output_file) | |
output = PyPDF2.PdfFileWriter() | |
input_stream = PyPDF2.PdfFileReader(open(input_file, "rb")) | |
for i in range(0, input_stream.getNumPages()): | |
output.addPage(input_stream.getPage(i)) | |
outputStream = open(output_file, "wb") | |
# Set user and owner password to pdf file | |
output.encrypt(mapDict[k], mapDict[k], use_128bit=True) | |
output.write(outputStream) | |
outputStream.close() | |
# Rename temporary output file with original filename, this | |
# will automatically delete temporary file | |
os.rename(output_file, new_output_file) | |
print("Securing pdf files is complete.") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment