harryposner · June 24, 2020 17:34
diff --git a/remove_script_tags.py b/remove_script_tags.py
 #!/usr/bin/env python3


 """Remove <script> tags from HTML files

 Usage: `remove_script_tags [path]`
 If `path` is a directory, will process all `.html` files in the
 directory recursively.
 """

 import html.parser
 import os
 import os.path
 import sys


 SINGLE_QUOTE = "'"
 DOUBLE_QUOTE = '"'


 class RemoveScriptTags(html.parser.HTMLParser):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.output = []

    def clean_file(self, fname):
        with open(fname, "r") as infile:
            self.feed(infile.read())
        with open(fname, "w") as outfile:
            outfile.write("".join(self.output))

    def handle_starttag(self, tag, attrs):
        if tag == "script":
            return
        full_tag = [f"<{tag}"]
        for attr, val in attrs:
            if val is None:
                full_tag.append(f" {attr}")
            else:
                quote = DOUBLE_QUOTE if DOUBLE_QUOTE not in val else SINGLE_QUOTE
                full_tag.append(f" {attr}={quote}{val}{quote}")
        full_tag.append(">")
        self.output.append("".join(full_tag))

    def handle_endtag(self, tag):
        if tag == "script":
            return
        self.output.append(f"</{tag}>")

    def handle_data(self, data):
        in_tag = self.get_starttag_text()
        if in_tag is None or not in_tag.startswith("<script"):
            self.output.append(data)

    def handle_comment(self, comment):
        self.output.append(f"<!--{comment}-->")


 def main():

    path = sys.argv[1]
    if os.path.isfile(path):
        RemoveScriptTags().clean_file(path)
        sys.exit(0)

    for dirpath, __, filenames in os.walk(path):
        for fname in filenames:
            if fname.endswith(".html"):
                path_to_file = os.path.join(dirpath, fname)
                print(path_to_file)
                RemoveScriptTags().clean_file(path_to_file)

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3


	"""Remove <script> tags from HTML files

	Usage: `remove_script_tags [path]`
	If `path` is a directory, will process all `.html` files in the
	directory recursively.
	"""

	import html.parser
	import os
	import os.path
	import sys


	SINGLE_QUOTE = "'"
	DOUBLE_QUOTE = '"'


	class RemoveScriptTags(html.parser.HTMLParser):

	def __init__(self, args, *kwargs):
	super().__init__(args, *kwargs)
	self.output = []

	def clean_file(self, fname):
	with open(fname, "r") as infile:
	self.feed(infile.read())
	with open(fname, "w") as outfile:
	outfile.write("".join(self.output))

	def handle_starttag(self, tag, attrs):
	if tag == "script":
	return
	full_tag = [f"<{tag}"]
	for attr, val in attrs:
	if val is None:
	full_tag.append(f" {attr}")
	else:
	quote = DOUBLE_QUOTE if DOUBLE_QUOTE not in val else SINGLE_QUOTE
	full_tag.append(f" {attr}={quote}{val}{quote}")
	full_tag.append(">")
	self.output.append("".join(full_tag))

	def handle_endtag(self, tag):
	if tag == "script":
	return
	self.output.append(f"</{tag}>")

	def handle_data(self, data):
	in_tag = self.get_starttag_text()
	if in_tag is None or not in_tag.startswith("<script"):
	self.output.append(data)

	def handle_comment(self, comment):
	self.output.append(f"<!--{comment}-->")


	def main():

	path = sys.argv[1]
	if os.path.isfile(path):
	RemoveScriptTags().clean_file(path)
	sys.exit(0)

	for dirpath, __, filenames in os.walk(path):
	for fname in filenames:
	if fname.endswith(".html"):
	path_to_file = os.path.join(dirpath, fname)
	print(path_to_file)
	RemoveScriptTags().clean_file(path_to_file)

	if __name__ == "__main__":
	main()