-
-
Save b4tman/9cabde75cb2f4c9afb550575ebc5b75e to your computer and use it in GitHub Desktop.
Снимает защиту от редактирования с файлов MS Word (.docx) и MS Excel (.xlsx)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
import sys | |
import tempfile | |
import tkinter as tk | |
from tkinter import filedialog | |
from tkinter import messagebox | |
from xml.sax import make_parser | |
from xml.sax.saxutils import XMLFilterBase, XMLGenerator | |
from zipfile import ZipFile, is_zipfile | |
class XMLTagsFilter(XMLFilterBase): | |
"""This decides which SAX events to forward to the ContentHandler | |
We will not forward events when we are inside any elements with a | |
name specified in the 'tags_names_to_exclude' parameter | |
from: https://stackoverflow.com/a/42411493 | |
""" | |
def __init__(self, tag_names_to_exclude, parent=None): | |
super().__init__(parent) | |
# set of tag names to exclude | |
self._tag_names_to_exclude = [tag.lower() for tag in tag_names_to_exclude] | |
# _excluded_tags_count keeps track of opened elements to exclude | |
self._excluded_tags_count = 0 | |
def _forward_events(self): | |
# will return True when we are not inside excluded element | |
return self._excluded_tags_count == 0 | |
def startElement(self, name, attrs): | |
if name.lower() in self._tag_names_to_exclude: | |
self._excluded_tags_count += 1 | |
if self._forward_events(): | |
super().startElement(name, attrs) | |
def endElement(self, name): | |
if self._forward_events(): | |
super().endElement(name) | |
if name.lower() in self._tag_names_to_exclude: | |
self._excluded_tags_count -= 1 | |
def characters(self, content): | |
if self._forward_events(): | |
super().characters(content) | |
def xml_remove_protection(input_filename, output_filename): | |
tag_names_to_exclude = ['sheetProtection', 'workbookProtection', 'documentProtection'] | |
reader = XMLTagsFilter(tag_names_to_exclude, make_parser()) | |
with open(output_filename, 'wb') as f: | |
handler = XMLGenerator(f, encoding='utf-8', short_empty_elements=True) | |
reader.setContentHandler(handler) | |
reader.parse(input_filename) | |
def unlock_office_file(input_filename, output_filename): | |
files_to_process = ['xl/workbook.xml', 'word/settings.xml'] | |
if not is_zipfile(input_filename): | |
raise NotImplementedError | |
with ZipFile(input_filename, 'r') as zin: | |
content = zin.namelist() | |
# exclude files not in archive | |
files_to_process = list(filter(lambda x: x in content, files_to_process)) | |
# add sheets | |
worksheets = filter(lambda x: x.startswith('xl/worksheets/') and x.endswith('.xml'), content) | |
files_to_process.extend(worksheets) | |
# check files to process | |
if len(files_to_process) == 0: | |
raise NotImplementedError | |
# create empty temp_dir | |
temp_dir = tempfile.mkdtemp('-unlock') | |
# create new archive | |
with ZipFile(output_filename, 'w') as zout: | |
for z_filename in content: | |
info = zin.getinfo(z_filename) | |
if info.is_dir(): | |
continue # skip directories | |
temp_filename = '' | |
if z_filename in files_to_process: | |
# extract and get output path | |
tempfile_orig = zin.extract(z_filename, temp_dir) | |
# build path for unlocked xml file | |
out_dir = os.path.dirname(tempfile_orig) | |
out_base = os.path.basename(tempfile_orig) | |
temp_filename = os.path.join(out_dir, f'unlock-{out_base}') | |
# remove protection tags from xml file | |
xml_remove_protection(tempfile_orig, temp_filename) | |
os.remove(tempfile_orig) | |
else: | |
# just extract file | |
temp_filename = zin.extract(z_filename, temp_dir) | |
# add file to output archive | |
zout.write(temp_filename, z_filename, compress_type=info.compress_type) | |
os.remove(temp_filename) | |
# remove temp dir | |
shutil.rmtree(temp_dir) | |
def unlock(source_filename): | |
file_path = os.path.dirname(source_filename) | |
file_base = os.path.basename(source_filename) | |
file_name = os.path.splitext(file_base)[0] | |
file_extension = os.path.splitext(file_base)[1] | |
target_name = f'{file_name}_unlocked{file_extension}' | |
target_file = os.path.join(file_path, target_name) | |
try: | |
unlock_office_file(source_filename, target_file) | |
except NotImplementedError: | |
messagebox.showerror('Неподдерживаемый тип', f'Формат файла "{file_base}" не поддерживается\nДанный скрипт ' | |
'поддерживает снятие блокировки только с файлов MS Word (.docx)' | |
' и MS Excel (.xlsx)') | |
exit(1) | |
messagebox.showinfo('Операция завершена', f'Файл "{target_name}" в "{file_path}" готов к использованию') | |
def sanitize_filename(filename): | |
replaces = '",\''.split(',') | |
for sub in replaces: | |
filename = filename.replace(sub, '') | |
return filename | |
def main(): | |
root = tk.Tk() | |
root.withdraw() | |
source_file = '' | |
if len(sys.argv) <= 1: | |
source_file = filedialog.askopenfilename() | |
# file dialog closed | |
if source_file == "": | |
exit(0) | |
else: | |
source_file = sys.argv[1] | |
source_file = sanitize_filename(source_file) | |
source_file = os.path.abspath(source_file) | |
try: | |
unlock(source_file) | |
except SystemExit: | |
pass | |
except: | |
messagebox.showerror('Непредвиденная ошибка', sys.exc_info()[0]) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
оригинал на пикабу