|
#!/usr/bin/env python3 |
|
|
|
|
|
"""Remove <script> tags from HTML files |
|
|
|
Usage: `remove_script_tags [path]` |
|
If `path` is a directory, will process all `.html` files in the |
|
directory recursively. |
|
""" |
|
|
|
import html.parser |
|
import os |
|
import os.path |
|
import sys |
|
|
|
|
|
SINGLE_QUOTE = "'" |
|
DOUBLE_QUOTE = '"' |
|
|
|
|
|
class RemoveScriptTags(html.parser.HTMLParser): |
|
|
|
def __init__(self, *args, **kwargs): |
|
super().__init__(*args, **kwargs) |
|
self.output = [] |
|
|
|
def clean_file(self, fname): |
|
with open(fname, "r") as infile: |
|
self.feed(infile.read()) |
|
with open(fname, "w") as outfile: |
|
outfile.write("".join(self.output)) |
|
|
|
def handle_starttag(self, tag, attrs): |
|
if tag == "script": |
|
return |
|
full_tag = [f"<{tag}"] |
|
for attr, val in attrs: |
|
if val is None: |
|
full_tag.append(f" {attr}") |
|
else: |
|
quote = DOUBLE_QUOTE if DOUBLE_QUOTE not in val else SINGLE_QUOTE |
|
full_tag.append(f" {attr}={quote}{val}{quote}") |
|
full_tag.append(">") |
|
self.output.append("".join(full_tag)) |
|
|
|
def handle_endtag(self, tag): |
|
if tag == "script": |
|
return |
|
self.output.append(f"</{tag}>") |
|
|
|
def handle_data(self, data): |
|
in_tag = self.get_starttag_text() |
|
if in_tag is None or not in_tag.startswith("<script"): |
|
self.output.append(data) |
|
|
|
def handle_comment(self, comment): |
|
self.output.append(f"<!--{comment}-->") |
|
|
|
|
|
def main(): |
|
|
|
path = sys.argv[1] |
|
if os.path.isfile(path): |
|
RemoveScriptTags().clean_file(path) |
|
sys.exit(0) |
|
|
|
for dirpath, __, filenames in os.walk(path): |
|
for fname in filenames: |
|
if fname.endswith(".html"): |
|
path_to_file = os.path.join(dirpath, fname) |
|
print(path_to_file) |
|
RemoveScriptTags().clean_file(path_to_file) |
|
|
|
if __name__ == "__main__": |
|
main() |