Last active
February 14, 2021 21:27
-
-
Save pauliusbaulius/69b5c6ab0646bd5eedb8b75b0daffbd5 to your computer and use it in GitHub Desktop.
shitty_site_generator_01
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import shutil | |
from datetime import datetime | |
from functools import wraps | |
from time import time | |
from typing import Optional | |
import jinja2 | |
import requests | |
import tinify | |
from bs4 import BeautifulSoup | |
from dotenv import load_dotenv | |
from jinja2 import Template | |
from markdown import Markdown | |
import collections | |
#TODO add ![[]] obisidian media handling! need to check type etc | |
""" | |
STAGE: | |
USER INTERACTION | |
STEPS: | |
1. tweak global variables to your liking. | |
2. add absolute paths to posts in POSTS. | |
""" | |
PATH_BLOG = "html" | |
PATH_MEDIA = os.path.join(PATH_BLOG, "media") | |
PATH_TEMPLATES = "static" | |
MAX_MEDIA_WIDTH = 500 | |
POSTS = [ | |
"/Users/m1/Desktop/brain/exception.lt/cheap_vps_adventures.md", | |
] | |
""" | |
STAGE: UTILITIES | |
NOTES: | |
various utility functions not directly related to page building. | |
""" | |
def sizeof_fmt(num, suffix='B'): | |
# https://stackoverflow.com/questions/1094841/get-human-readable-version-of-file-size | |
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: | |
if abs(num) < 1024.0: | |
return "%3.1f%s%s" % (num, unit, suffix) | |
num /= 1024.0 | |
return "%.1f%s%s" % (num, 'Yi', suffix) | |
def write_log(function: str, log: str, args: dict = {}): | |
with open(os.path.join(PATH_BLOG, "ssg.txt"), "a") as l: | |
l.write( | |
f'"{datetime.utcnow()}","{os.getpid()}","{function}","{log}","{args}"\n' | |
) | |
def timer(f): | |
@wraps(f) | |
def wrap(*args, **kw): | |
ts = time() | |
result = f(*args, **kw) | |
te = time() | |
write_log(function=f.__name__, log="{:0.2f}ms".format((te - ts) * 1000)) | |
return result | |
return wrap | |
""" | |
STAGE: PREPARATION | |
STEPS: | |
1. load jinja2 | |
2. load secrets from .env | |
3. make required directories | |
4. create log file | |
5. move readme and changelog to html/ for linking purposes | |
6. minify css | |
NOTES: | |
all the things needed to create proper html. | |
css is minified here to inline during post creation. | |
""" | |
print("ssg.py: building started...") | |
time_start = time() | |
JINJA2_LOADER = jinja2.FileSystemLoader(searchpath=PATH_TEMPLATES) | |
JINJA2_ENV = jinja2.Environment(loader=JINJA2_LOADER) | |
load_dotenv(".env") # secrets like api keys for tinypng.com | |
os.makedirs(PATH_BLOG, exist_ok=True) | |
os.makedirs(PATH_MEDIA, exist_ok=True) | |
with open(os.path.join(PATH_BLOG, "ssg.txt"), "w") as l: | |
l.write("DATETIME_UTC,PID,FUNCTION,LOG,ARGS\n") | |
shutil.copyfile("CHANGELOG.txt", os.path.join(PATH_BLOG, "changelog.txt")) | |
shutil.copyfile("README.txt", os.path.join(PATH_BLOG, "readme.txt")) | |
@timer | |
def minify_css(): | |
""" uses cssminifier.com api, is slow but least bloat. """ | |
url = "https://cssminifier.com/raw" | |
response = requests.post(url, data={"input": open("static/style.css", "rb").read()}) | |
with open("static/style.min.css", "w") as fw: | |
fw.write(response.text) | |
minify_css() | |
""" | |
STAGE: BUILD | |
STEPS: | |
1. iterate POSTS to build posts/ | |
2. create index.html, about.html and tags.html | |
""" | |
class Post: | |
def __init__(self, path_markdown): | |
self.path_markdown = path_markdown | |
self.name = self._generate_post_name() | |
self.metadata = self._handle_metadata() | |
self.html = self._convert_to_html() | |
self.url = self.name + ".html" | |
self.toc = None | |
self.images = [] | |
self.new_images = [] | |
self._handle_images() | |
self._add_extra_metadata() | |
self._render_html() | |
def __lt__(self, other): | |
return self.metadata["date"] > other.metadata["date"] | |
def _generate_post_name(self): | |
""" takes your input path, gets filename, makes it lowercase, replaces spaces and appends html extension. """ | |
_, tail = os.path.split(self.path_markdown) | |
return str(os.path.splitext(tail)[0]).replace(" ", "_").lower() | |
@timer | |
def _handle_metadata(self): | |
""" extract yaml header from markdown file if exists and convert to python dict. """ | |
md = Markdown(extensions=["meta"]) | |
with open(self.path_markdown, "r") as fr: | |
html = md.convert(fr.read()) | |
metadata = md.Meta | |
for k, v in metadata.items(): | |
if len(v) == 1: # convert single item lists into string for aesthetics | |
metadata[k] = "".join(v) | |
return metadata | |
@timer | |
def _convert_to_html(self): | |
md = Markdown( | |
extensions=[ | |
"fenced_code", | |
"sane_lists", | |
"smarty", | |
"footnotes", | |
"tables", | |
"attr_list", | |
] | |
) | |
with open(self.path_markdown, "r") as fr: | |
return md.convert(fr.read()) | |
@timer | |
def _extract_toc(self): | |
md = Markdown(extensions=["toc"]) | |
with open(self.path_markdown, "r") as fr: | |
html = md.convert(fr.read()) | |
return md.toc | |
@timer | |
def _add_extra_metadata(self): | |
""" some additional metadata is calculated here. """ | |
def get_filesize(path_markdown): | |
return sizeof_fmt(os.path.getsize(self.path_markdown)) | |
def get_wc(path_markdown): | |
with open(self.path_markdown, "r") as fr: | |
content = fr.read() | |
return f"{len(content.split())} {len(content)}" | |
def get_image_stats(path_markdown): | |
# krc tik ant antro build bus compressed metadata :DD | |
def _get_image_size(image): | |
# additional helper method to handle image not found errors. | |
try: | |
return os.path.getsize(image) | |
except FileNotFoundError: | |
return 0 | |
size_images_og = sum([_get_image_size(image) for image in self.images]) | |
size_images_compressed = sum([_get_image_size(image) for image in self.new_images]) | |
return { | |
"original": sizeof_fmt(size_images_og), | |
"compressed": sizeof_fmt(size_images_compressed), | |
"difference": sizeof_fmt(size_images_og - size_images_compressed), | |
} | |
image_stats = get_image_stats(self.path_markdown) | |
self.metadata["wc"] = get_wc(self.path_markdown) | |
self.metadata["md_file_size"] = get_filesize(self.path_markdown) | |
self.metadata["images_size_original"] = image_stats["original"] | |
self.metadata["images_size_compressed"] = image_stats["compressed"] | |
self.metadata["images_savings"] = image_stats["difference"] | |
@timer | |
def _handle_images(self): | |
""" image handling pain in the ass. """ | |
# TODO hash names, move to media dir, replace names in html. | |
# TODO rename handle_images and do all steps in here. | |
soup = BeautifulSoup(self.html, "html5lib") | |
for media in soup.find_all(["img", "source"]): | |
media_path = media.get("src") | |
head, _ = os.path.split(self.path_markdown) | |
# absolute path to picture in your filesystem | |
absolute_path = os.path.normpath(os.path.join(head, media_path)) | |
self.images.append(absolute_path) | |
# check if image exists in webpage path before copying it | |
image_filename = os.path.basename(absolute_path) | |
if image_filename not in os.listdir(PATH_MEDIA): | |
try: | |
new_path = shutil.copy(absolute_path, PATH_MEDIA) | |
# do not need PATH_BLOG since files are in that path. | |
new_path = new_path.replace(PATH_BLOG, "") | |
# replace old links with new links! | |
self.html = str(self.html).replace(media_path, new_path) | |
except FileNotFoundError: | |
write_log( | |
"ERROR", | |
"copy_media", | |
f"[{media_path}] was not found", | |
) | |
new_path = os.path.join("/media/", image_filename)[1:] | |
self.html = str(self.html).replace( | |
media_path, new_path | |
) | |
self.new_images.append(os.path.join( PATH_BLOG, new_path)) | |
@timer | |
def _render_html(self): | |
template = JINJA2_ENV.get_template("post.html") | |
output = template.render( | |
metadata=json.dumps( | |
self.metadata, indent=4 | |
), # prettifies metadata by converting to indented str | |
toc=self.toc, | |
content=self.html, | |
) | |
with open(os.path.join(PATH_BLOG, self.url), "w") as f: | |
f.write(output) | |
# pool = Pool() | |
# pool.map(create_post, POSTS) | |
# pool.close() | |
# pool.join() | |
for i, post in enumerate(POSTS): # build posts! | |
POSTS[i] = Post(post) # clean code :^) i do not wan't to talk about it | |
# please do not make hiring decisions on this one :') | |
@timer | |
def build_html( | |
template: str, | |
filename: str, | |
content: dict = {}, | |
): | |
template = JINJA2_ENV.get_template(template) | |
output = template.render(content) | |
with open(os.path.join(PATH_BLOG, filename), "w") as f: | |
f.write(output) | |
@timer | |
def build_tags() -> dict: | |
""" builds a dictionary of tag: [Post]. sorted by ?. """ | |
tags = {} | |
for post in sorted(POSTS): | |
for tag in post.metadata["tags"]: | |
if tag in tags: | |
tags[tag].append(post) | |
else: | |
tags[tag] = [post] | |
return collections.OrderedDict(sorted(tags.items())) | |
build_html( | |
template="index.html", filename="index.html", content={"posts": sorted(POSTS)} | |
) # build index | |
build_html(template="about.html", filename="about.html") # build about | |
build_html( | |
template="tags.html", filename="tags.html", content={"tags": build_tags()} | |
) # build tags | |
""" | |
STAGE: OPTIMIZE | |
STEPS: | |
1. add lazy loading to all html <img> attributes | |
2. compress images if needed, use a file to track whether images were compressed before or not | |
3. minify html to further reduce size | |
NOTES: | |
just some additional stuff to make page faster and reduce bandwith waste. | |
can be skipped without breaking anything. | |
""" | |
@timer | |
def add_lazy_loading(): | |
# todo read all .html files in PATH_BLOG, add lazy loading, write them again. | |
soup = BeautifulSoup(html, "lxml") | |
for media in soup.find_all(["img", "source"]): | |
media.attrs["loading"] = "lazy" | |
@timer | |
def compress_images(): | |
tinify.key = os.environ.get("key") | |
for image in os.listdir(PATH_MEDIA): | |
_, ext = os.path.splitext(image) # get file extension | |
if ext.lower() in [".png", ".jpg", ".jpeg"]: # only minify images! | |
image_path = os.path.join(PATH_MEDIA, image) | |
image_size = os.path.getsize(image_path) | |
i = tinify.from_file(image_path) | |
resized = i.resize(method="scale", width=MAX_MEDIA_WIDTH) | |
resized.to_file(image_path) | |
image_size_new = os.path.getsize(image_path) | |
write_log( | |
"compress_media", | |
f"resized [{image}] from {image_size}bytes to {image_size_new}bytes", | |
) | |
@timer | |
def minify_html(): | |
pass | |
# todo https://stackoverflow.com/questions/5597094/compressminimize-html-from-python | |
#compress_images() #TODO logic to not compress already compressed data. keep a log? | |
""" | |
STAGE: END OF THE LINE | |
NOTES: | |
calculate time taken to build exception.lt | |
write last log entry | |
print to console | |
exit | |
""" | |
time_end = time() | |
time_taken = "{:2.2f}s".format((time_end - time_start)) | |
write_log("ssg.py", time_taken) | |
print(f"ssg.py: done in {time_taken}!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment