Created
January 29, 2020 21:15
-
-
Save huzecong/f1250a79f458eddd23ee72bed4e2ae72 to your computer and use it in GitHub Desktop.
A snippet to remove the watermark from a certain PDF book. You'll still need to manually locate the watermark element though.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from PyPDF2 import PdfFileReader, PdfFileWriter | |
from PyPDF2.filters import FlateDecode | |
DIR = "path/to/pdf/file" | |
def main(): | |
pdf = PdfFileReader(os.path.join(DIR, "pfpl.pdf")) | |
writer = PdfFileWriter() | |
# for idx in range(10): | |
for idx in range(pdf.getNumPages()): | |
p = pdf.getPage(idx) | |
content = p["/Contents"].getData() | |
content = content.replace(b"BT\n/F43 141.7323 Tf 114.141 132.299 Td [(PREVIEW)]TJ\n0 g 0 G\nET\n", b"") | |
p["/Contents"]._data = FlateDecode.encode(content) | |
writer.addPage(p) | |
with open(os.path.join(DIR, "pfpl_removed.pdf"), "wb") as f: | |
writer.write(f) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment