Last active
November 13, 2019 19:39
-
-
Save nico202/80d24b82d1ff1bde2dd40234bccd8125 to your computer and use it in GitHub Desktop.
Includes external resources in html files. Use with: python build.py input.html output.html (adapted from https://stackoverflow.com/questions/28258579/modify-html-file-to-embed-all-external-scripts-and-css-into-script-and-style)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This takes an html, looks for script and stylesheet tags, | |
# download them (when necessary) and then | |
import sys, re, os | |
from collections import deque | |
from bs4 import BeautifulSoup, Tag | |
from jsmin import jsmin | |
from cssmin import cssmin | |
from urllib.parse import urlparse | |
import requests | |
# html param | |
html = sys.argv[1] | |
# target param | |
target = sys.argv[2] | |
# path from html param | |
path = re.sub(r"[^\/]*$", "", html) | |
# open html file | |
soup = BeautifulSoup(open(html), features="html.parser") | |
# find last script as anchorpoint | |
# FAILS IF NO SCRIPT TAG PRESENT! | |
lastScript = soup.findAll("script", attrs = {"src" : True})[-1] | |
# get all scripts containing src attribute (= external scripts) | |
scripts = soup.findAll("script", attrs = {"src" : True}) | |
# find last style link as anchorpoint | |
lastStylesheet = soup.findAll("link", attrs = {"rel" : "stylesheet"})[-1] | |
# get all links to css stylesheets | |
stylesheets = soup.findAll("link", attrs = {"rel" : "stylesheet"}) | |
# create list of script srcs | |
# TODO: download (local_or_url_read) should happen here | |
print("\nRead Scripts:") | |
scriptsSrc = deque() | |
for script in scripts: | |
scriptsSrc.append(path + script.attrs["src"]) | |
print("\t" + path + script.attrs["src"]) | |
# create list of stylesheets srcs | |
print("\nRead Stylesheets:") | |
stylesheetsSrc = deque() | |
for stylesheet in stylesheets: | |
stylesheetsSrc.append(path + stylesheet.attrs["href"]) | |
print("\t" + path + stylesheet.attrs["href"]) | |
def local_or_url_read(fname, output): | |
if not os.path.isfile(fname): | |
if fname.startswith("http"): | |
urlpath = fname | |
fname = os.path.basename(urlparse(fname).path) | |
if os.path.isfile(fname): | |
print("File %s has already been downloaded, skipping" % | |
fname, end="") | |
else: | |
with open(fname, 'w') as ofile: | |
print("downloading %s" % urlpath, end="") | |
c = requests.get(urlpath) | |
ofile.write(str(c.text)) | |
else: | |
print("The file %s is missing and I don't know how to download it!" | |
% fname, end="") | |
with open(fname) as infile: | |
for line in infile: | |
output.write(line) | |
# merge scripts to temp.js | |
print("\nMerge Scripts:") | |
print("\t", end="") | |
with open("temp.js", "w") as outfileScript: | |
for fname in scriptsSrc: | |
# add space every script | |
outfileScript.write("\n") | |
print("~", end="") | |
local_or_url_read(fname, outfileScript) | |
print("\n"); | |
# merge stylsheets to temp.css | |
print("Merge Stylesheets:") | |
print("\t", end="") | |
with open("temp.css", "w") as outfileCSS: | |
for fname in stylesheetsSrc: | |
# add space every script | |
outfileCSS.write("\n") | |
print("~", end="") | |
local_or_url_read(fname, outfileCSS) | |
print("\n"); | |
# minify javascript | |
print("Minify temp.js\n\t~") | |
with open("temp.js") as js: | |
minified_js = jsmin(js.read()) | |
# minify css | |
print("\nMinify temp.css\n\t~") | |
with open("temp.css") as css: | |
minified_css = cssmin(css.read()) | |
# replace scripts with merged and min embed script / css | |
print("\nReplacing and deleting\n\t~") | |
tag = soup.new_tag("script") | |
tag["type"] = "text/javascript" | |
tag.append(minified_js) | |
lastScript.replace_with(tag) | |
tag = soup.new_tag("style") | |
tag["type"] = "text/css" | |
tag.append(minified_css) | |
lastStylesheet.replace_with(tag) | |
#remove script and style tags | |
for script in scripts: | |
script.decompose() | |
for stylesheet in stylesheets: | |
stylesheet.decompose() | |
#remove temp | |
os.remove("temp.js") | |
os.remove("temp.css") | |
#save html as target | |
file = open(target,"w") | |
file.write(soup.prettify()) | |
file.close() | |
print("\nFIN\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment