pauliusbaulius · February 14, 2021 21:27
diff --git a/ssg_01.py b/ssg_01.py
 import json
 import os
 import shutil
 from datetime import datetime
 from functools import wraps
 from time import time
 from typing import Optional
 import jinja2
 import requests
 import tinify
 from bs4 import BeautifulSoup
 from dotenv import load_dotenv
 from jinja2 import Template
 from markdown import Markdown
 import collections     

 #TODO add ![[]] obisidian media handling! need to check type etc

 """
 STAGE: 
    USER INTERACTION
    
 STEPS:
    1. tweak global variables to your liking.
    2. add absolute paths to posts in POSTS.
 """

 PATH_BLOG = "html"
 PATH_MEDIA = os.path.join(PATH_BLOG, "media")
 PATH_TEMPLATES = "static"
 MAX_MEDIA_WIDTH = 500 
 POSTS = [
    "/Users/m1/Desktop/brain/exception.lt/cheap_vps_adventures.md",
 ]

 """
 STAGE: UTILITIES

 NOTES:
    various utility functions not directly related to page building.
 """

 def sizeof_fmt(num, suffix='B'):
    # https://stackoverflow.com/questions/1094841/get-human-readable-version-of-file-size
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)


 def write_log(function: str, log: str, args: dict = {}):
    with open(os.path.join(PATH_BLOG, "ssg.txt"), "a") as l:
        l.write(
            f'"{datetime.utcnow()}","{os.getpid()}","{function}","{log}","{args}"\n'
        )


 def timer(f):
    @wraps(f)
    def wrap(*args, **kw):
        ts = time()
        result = f(*args, **kw)
        te = time()
        write_log(function=f.__name__, log="{:0.2f}ms".format((te - ts) * 1000))
        return result

    return wrap


 """
 STAGE: PREPARATION

 STEPS:
    1. load jinja2
    2. load secrets from .env
    3. make required directories
    4. create log file 
    5. move readme and changelog to html/ for linking purposes
    6. minify css

 NOTES:
    all the things needed to create proper html.
    css is minified here to inline during post creation.
 """

 print("ssg.py: building started...")
 time_start = time()

 JINJA2_LOADER = jinja2.FileSystemLoader(searchpath=PATH_TEMPLATES)
 JINJA2_ENV = jinja2.Environment(loader=JINJA2_LOADER)

 load_dotenv(".env")  # secrets like api keys for tinypng.com
 os.makedirs(PATH_BLOG, exist_ok=True)
 os.makedirs(PATH_MEDIA, exist_ok=True)

 with open(os.path.join(PATH_BLOG, "ssg.txt"), "w") as l:
    l.write("DATETIME_UTC,PID,FUNCTION,LOG,ARGS\n")

 shutil.copyfile("CHANGELOG.txt", os.path.join(PATH_BLOG, "changelog.txt"))
 shutil.copyfile("README.txt", os.path.join(PATH_BLOG, "readme.txt"))

 @timer
 def minify_css():
    """ uses cssminifier.com api, is slow but least bloat. """
    url = "https://cssminifier.com/raw"
    response = requests.post(url, data={"input": open("static/style.css", "rb").read()})
    with open("static/style.min.css", "w") as fw:
        fw.write(response.text)

 minify_css()


 """
 STAGE: BUILD

 STEPS:
    1. iterate POSTS to build posts/
    2. create index.html, about.html and tags.html
 """


 class Post:
    def __init__(self, path_markdown):
        self.path_markdown = path_markdown
        self.name = self._generate_post_name()
        self.metadata = self._handle_metadata()
        self.html = self._convert_to_html()
        self.url = self.name + ".html"
        self.toc = None
        self.images = []
        self.new_images = []

        self._handle_images()
        self._add_extra_metadata()
        self._render_html()

    def __lt__(self, other):
        return self.metadata["date"] > other.metadata["date"]

    def _generate_post_name(self):
        """ takes your input path, gets filename, makes it lowercase, replaces spaces and appends html extension. """
        _, tail = os.path.split(self.path_markdown)
        return str(os.path.splitext(tail)[0]).replace(" ", "_").lower()

    @timer
    def _handle_metadata(self):
        """ extract yaml header from markdown file if exists and convert to python dict. """
        md = Markdown(extensions=["meta"])
        with open(self.path_markdown, "r") as fr:
            html = md.convert(fr.read())
            metadata = md.Meta
        for k, v in metadata.items():
            if len(v) == 1:  # convert single item lists into string for aesthetics
                metadata[k] = "".join(v)
        return metadata

    @timer
    def _convert_to_html(self):
        md = Markdown(
            extensions=[
                "fenced_code",
                "sane_lists",
                "smarty",
                "footnotes",
                "tables",
                "attr_list",
            ]
        )
        with open(self.path_markdown, "r") as fr:
            return md.convert(fr.read())

    @timer
    def _extract_toc(self):
        md = Markdown(extensions=["toc"])
        with open(self.path_markdown, "r") as fr:
            html = md.convert(fr.read())
            return md.toc

    @timer
    def _add_extra_metadata(self):
        """ some additional metadata is calculated here. """

        def get_filesize(path_markdown):
            return sizeof_fmt(os.path.getsize(self.path_markdown))

        def get_wc(path_markdown):
            with open(self.path_markdown, "r") as fr:
                content = fr.read()
                return f"{len(content.split())} {len(content)}"
        
        def get_image_stats(path_markdown):
            # krc tik ant antro build bus compressed metadata :DD

            def _get_image_size(image):
                # additional helper method to handle image not found errors.
                try:
                    return os.path.getsize(image)
                except FileNotFoundError:
                    return 0

            size_images_og = sum([_get_image_size(image) for image in self.images])
            size_images_compressed = sum([_get_image_size(image) for image in self.new_images])

            return {
                "original": sizeof_fmt(size_images_og),
                "compressed": sizeof_fmt(size_images_compressed),
                "difference": sizeof_fmt(size_images_og - size_images_compressed),
            }

        image_stats = get_image_stats(self.path_markdown)
        self.metadata["wc"] = get_wc(self.path_markdown)
        self.metadata["md_file_size"] = get_filesize(self.path_markdown)
        self.metadata["images_size_original"] = image_stats["original"]
        self.metadata["images_size_compressed"] = image_stats["compressed"]
        self.metadata["images_savings"] = image_stats["difference"]

    @timer
    def _handle_images(self):
        """ image handling pain in the ass. """
        # TODO hash names, move to media dir, replace names in html.
        # TODO rename handle_images and do all steps in here.
        soup = BeautifulSoup(self.html, "html5lib")
        for media in soup.find_all(["img", "source"]):
            media_path = media.get("src")
            head, _ = os.path.split(self.path_markdown)
            
            # absolute path to picture in your filesystem
            absolute_path = os.path.normpath(os.path.join(head, media_path))
            self.images.append(absolute_path)
            
            # check if image exists in webpage path before copying it
            image_filename = os.path.basename(absolute_path)

            if image_filename not in os.listdir(PATH_MEDIA):
                try:
                    new_path = shutil.copy(absolute_path, PATH_MEDIA)
                    # do not need PATH_BLOG since files are in that path.
                    new_path = new_path.replace(PATH_BLOG, "")
                    # replace old links with new links!
                    self.html = str(self.html).replace(media_path, new_path)

                except FileNotFoundError:
                    write_log(
                        "ERROR",
                        "copy_media",
                        f"[{media_path}] was not found",
                    )

            new_path = os.path.join("/media/", image_filename)[1:]
            self.html = str(self.html).replace(
                media_path, new_path
            )
            self.new_images.append(os.path.join( PATH_BLOG, new_path))

    @timer
    def _render_html(self):
        template = JINJA2_ENV.get_template("post.html")
        output = template.render(
            metadata=json.dumps(
                self.metadata, indent=4
            ),  # prettifies metadata by converting to indented str
            toc=self.toc,
            content=self.html,
        )
        with open(os.path.join(PATH_BLOG, self.url), "w") as f:
            f.write(output)

 # pool = Pool()
 # pool.map(create_post, POSTS)
 # pool.close()
 # pool.join()

 for i, post in enumerate(POSTS):  # build posts!
    POSTS[i] = Post(post)  # clean code :^) i do not wan't to talk about it
    # please do not make hiring decisions on this one :')


 @timer
 def build_html(
    template: str,
    filename: str,
    content: dict = {},
 ):
    template = JINJA2_ENV.get_template(template)
    output = template.render(content)
    with open(os.path.join(PATH_BLOG, filename), "w") as f:
        f.write(output)


 @timer
 def build_tags() -> dict:
    """ builds a dictionary of tag: [Post]. sorted by ?. """
    tags = {}
    for post in sorted(POSTS):
        for tag in post.metadata["tags"]:
            if tag in tags:
                tags[tag].append(post)
            else:
                tags[tag] = [post]

    return collections.OrderedDict(sorted(tags.items()))


 build_html(
    template="index.html", filename="index.html", content={"posts": sorted(POSTS)}
 )  # build index
 build_html(template="about.html", filename="about.html")  # build about
 build_html(
    template="tags.html", filename="tags.html", content={"tags": build_tags()}
 )  # build tags


 """
 STAGE: OPTIMIZE

 STEPS:
    1. add lazy loading to all html <img> attributes
    2. compress images if needed, use a file to track whether images were compressed before or not
    3. minify html to further reduce size

 NOTES:
    just some additional stuff to make page faster and reduce bandwith waste.
    can be skipped without breaking anything.
 """


 @timer
 def add_lazy_loading():
    # todo read all .html files in PATH_BLOG, add lazy loading, write them again.
    soup = BeautifulSoup(html, "lxml")
    for media in soup.find_all(["img", "source"]):
        media.attrs["loading"] = "lazy"


 @timer
 def compress_images():
    tinify.key = os.environ.get("key")
    for image in os.listdir(PATH_MEDIA):
        _, ext = os.path.splitext(image)  # get file extension
        if ext.lower() in [".png", ".jpg", ".jpeg"]:  # only minify images!
            image_path = os.path.join(PATH_MEDIA, image)
            image_size = os.path.getsize(image_path)
            i = tinify.from_file(image_path)
            resized = i.resize(method="scale", width=MAX_MEDIA_WIDTH)
            resized.to_file(image_path)
            image_size_new = os.path.getsize(image_path)
            write_log(
                "compress_media",
                f"resized [{image}] from {image_size}bytes to {image_size_new}bytes",
            )


 @timer
 def minify_html():
    pass
    # todo https://stackoverflow.com/questions/5597094/compressminimize-html-from-python


 #compress_images() #TODO logic to not compress already compressed data. keep a log?

 """
 STAGE: END OF THE LINE

 NOTES:
    calculate time taken to build exception.lt
    write last log entry
    print to console
    exit
 """

 time_end = time()
 time_taken = "{:2.2f}s".format((time_end - time_start))
 write_log("ssg.py", time_taken)
 print(f"ssg.py: done in {time_taken}!")
	import json
	import os
	import shutil
	from datetime import datetime
	from functools import wraps
	from time import time
	from typing import Optional
	import jinja2
	import requests
	import tinify
	from bs4 import BeautifulSoup
	from dotenv import load_dotenv
	from jinja2 import Template
	from markdown import Markdown
	import collections

	#TODO add ![[]] obisidian media handling! need to check type etc

	"""
	STAGE:
	USER INTERACTION

	STEPS:
	1. tweak global variables to your liking.
	2. add absolute paths to posts in POSTS.
	"""

	PATH_BLOG = "html"
	PATH_MEDIA = os.path.join(PATH_BLOG, "media")
	PATH_TEMPLATES = "static"
	MAX_MEDIA_WIDTH = 500
	POSTS = [
	"/Users/m1/Desktop/brain/exception.lt/cheap_vps_adventures.md",
	]

	"""
	STAGE: UTILITIES

	NOTES:
	various utility functions not directly related to page building.
	"""

	def sizeof_fmt(num, suffix='B'):
	# https://stackoverflow.com/questions/1094841/get-human-readable-version-of-file-size
	for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
	if abs(num) < 1024.0:
	return "%3.1f%s%s" % (num, unit, suffix)
	num /= 1024.0
	return "%.1f%s%s" % (num, 'Yi', suffix)


	def write_log(function: str, log: str, args: dict = {}):
	with open(os.path.join(PATH_BLOG, "ssg.txt"), "a") as l:
	l.write(
	f'"{datetime.utcnow()}","{os.getpid()}","{function}","{log}","{args}"\n'
	)


	def timer(f):
	@wraps(f)
	def wrap(args, *kw):
	ts = time()
	result = f(args, *kw)
	te = time()
	write_log(function=f.__name__, log="{:0.2f}ms".format((te - ts) * 1000))
	return result

	return wrap


	"""
	STAGE: PREPARATION

	STEPS:
	1. load jinja2
	2. load secrets from .env
	3. make required directories
	4. create log file
	5. move readme and changelog to html/ for linking purposes
	6. minify css

	NOTES:
	all the things needed to create proper html.
	css is minified here to inline during post creation.
	"""

	print("ssg.py: building started...")
	time_start = time()

	JINJA2_LOADER = jinja2.FileSystemLoader(searchpath=PATH_TEMPLATES)
	JINJA2_ENV = jinja2.Environment(loader=JINJA2_LOADER)

	load_dotenv(".env") # secrets like api keys for tinypng.com
	os.makedirs(PATH_BLOG, exist_ok=True)
	os.makedirs(PATH_MEDIA, exist_ok=True)

	with open(os.path.join(PATH_BLOG, "ssg.txt"), "w") as l:
	l.write("DATETIME_UTC,PID,FUNCTION,LOG,ARGS\n")

	shutil.copyfile("CHANGELOG.txt", os.path.join(PATH_BLOG, "changelog.txt"))
	shutil.copyfile("README.txt", os.path.join(PATH_BLOG, "readme.txt"))

	@timer
	def minify_css():
	""" uses cssminifier.com api, is slow but least bloat. """
	url = "https://cssminifier.com/raw"
	response = requests.post(url, data={"input": open("static/style.css", "rb").read()})
	with open("static/style.min.css", "w") as fw:
	fw.write(response.text)

	minify_css()


	"""
	STAGE: BUILD

	STEPS:
	1. iterate POSTS to build posts/
	2. create index.html, about.html and tags.html
	"""


	class Post:
	def __init__(self, path_markdown):
	self.path_markdown = path_markdown
	self.name = self._generate_post_name()
	self.metadata = self._handle_metadata()
	self.html = self._convert_to_html()
	self.url = self.name + ".html"
	self.toc = None
	self.images = []
	self.new_images = []

	self._handle_images()
	self._add_extra_metadata()
	self._render_html()

	def __lt__(self, other):
	return self.metadata["date"] > other.metadata["date"]

	def _generate_post_name(self):
	""" takes your input path, gets filename, makes it lowercase, replaces spaces and appends html extension. """
	_, tail = os.path.split(self.path_markdown)
	return str(os.path.splitext(tail)[0]).replace(" ", "_").lower()

	@timer
	def _handle_metadata(self):
	""" extract yaml header from markdown file if exists and convert to python dict. """
	md = Markdown(extensions=["meta"])
	with open(self.path_markdown, "r") as fr:
	html = md.convert(fr.read())
	metadata = md.Meta
	for k, v in metadata.items():
	if len(v) == 1: # convert single item lists into string for aesthetics
	metadata[k] = "".join(v)
	return metadata

	@timer
	def _convert_to_html(self):
	md = Markdown(
	extensions=[
	"fenced_code",
	"sane_lists",
	"smarty",
	"footnotes",
	"tables",
	"attr_list",
	]
	)
	with open(self.path_markdown, "r") as fr:
	return md.convert(fr.read())

	@timer
	def _extract_toc(self):
	md = Markdown(extensions=["toc"])
	with open(self.path_markdown, "r") as fr:
	html = md.convert(fr.read())
	return md.toc

	@timer
	def _add_extra_metadata(self):
	""" some additional metadata is calculated here. """

	def get_filesize(path_markdown):
	return sizeof_fmt(os.path.getsize(self.path_markdown))

	def get_wc(path_markdown):
	with open(self.path_markdown, "r") as fr:
	content = fr.read()
	return f"{len(content.split())} {len(content)}"

	def get_image_stats(path_markdown):
	# krc tik ant antro build bus compressed metadata :DD

	def _get_image_size(image):
	# additional helper method to handle image not found errors.
	try:
	return os.path.getsize(image)
	except FileNotFoundError:
	return 0

	size_images_og = sum([_get_image_size(image) for image in self.images])
	size_images_compressed = sum([_get_image_size(image) for image in self.new_images])

	return {
	"original": sizeof_fmt(size_images_og),
	"compressed": sizeof_fmt(size_images_compressed),
	"difference": sizeof_fmt(size_images_og - size_images_compressed),
	}

	image_stats = get_image_stats(self.path_markdown)
	self.metadata["wc"] = get_wc(self.path_markdown)
	self.metadata["md_file_size"] = get_filesize(self.path_markdown)
	self.metadata["images_size_original"] = image_stats["original"]
	self.metadata["images_size_compressed"] = image_stats["compressed"]
	self.metadata["images_savings"] = image_stats["difference"]

	@timer
	def _handle_images(self):
	""" image handling pain in the ass. """
	# TODO hash names, move to media dir, replace names in html.
	# TODO rename handle_images and do all steps in here.
	soup = BeautifulSoup(self.html, "html5lib")
	for media in soup.find_all(["img", "source"]):
	media_path = media.get("src")
	head, _ = os.path.split(self.path_markdown)

	# absolute path to picture in your filesystem
	absolute_path = os.path.normpath(os.path.join(head, media_path))
	self.images.append(absolute_path)

	# check if image exists in webpage path before copying it
	image_filename = os.path.basename(absolute_path)

	if image_filename not in os.listdir(PATH_MEDIA):
	try:
	new_path = shutil.copy(absolute_path, PATH_MEDIA)
	# do not need PATH_BLOG since files are in that path.
	new_path = new_path.replace(PATH_BLOG, "")
	# replace old links with new links!
	self.html = str(self.html).replace(media_path, new_path)

	except FileNotFoundError:
	write_log(
	"ERROR",
	"copy_media",
	f"[{media_path}] was not found",
	)

	new_path = os.path.join("/media/", image_filename)[1:]
	self.html = str(self.html).replace(
	media_path, new_path
	)
	self.new_images.append(os.path.join( PATH_BLOG, new_path))

	@timer
	def _render_html(self):
	template = JINJA2_ENV.get_template("post.html")
	output = template.render(
	metadata=json.dumps(
	self.metadata, indent=4
	), # prettifies metadata by converting to indented str
	toc=self.toc,
	content=self.html,
	)
	with open(os.path.join(PATH_BLOG, self.url), "w") as f:
	f.write(output)

	# pool = Pool()
	# pool.map(create_post, POSTS)
	# pool.close()
	# pool.join()

	for i, post in enumerate(POSTS): # build posts!
	POSTS[i] = Post(post) # clean code :^) i do not wan't to talk about it
	# please do not make hiring decisions on this one :')


	@timer
	def build_html(
	template: str,
	filename: str,
	content: dict = {},
	):
	template = JINJA2_ENV.get_template(template)
	output = template.render(content)
	with open(os.path.join(PATH_BLOG, filename), "w") as f:
	f.write(output)


	@timer
	def build_tags() -> dict:
	""" builds a dictionary of tag: [Post]. sorted by ?. """
	tags = {}
	for post in sorted(POSTS):
	for tag in post.metadata["tags"]:
	if tag in tags:
	tags[tag].append(post)
	else:
	tags[tag] = [post]

	return collections.OrderedDict(sorted(tags.items()))


	build_html(
	template="index.html", filename="index.html", content={"posts": sorted(POSTS)}
	) # build index
	build_html(template="about.html", filename="about.html") # build about
	build_html(
	template="tags.html", filename="tags.html", content={"tags": build_tags()}
	) # build tags


	"""
	STAGE: OPTIMIZE

	STEPS:
	1. add lazy loading to all html <img> attributes
	2. compress images if needed, use a file to track whether images were compressed before or not
	3. minify html to further reduce size

	NOTES:
	just some additional stuff to make page faster and reduce bandwith waste.
	can be skipped without breaking anything.
	"""


	@timer
	def add_lazy_loading():
	# todo read all .html files in PATH_BLOG, add lazy loading, write them again.
	soup = BeautifulSoup(html, "lxml")
	for media in soup.find_all(["img", "source"]):
	media.attrs["loading"] = "lazy"


	@timer
	def compress_images():
	tinify.key = os.environ.get("key")
	for image in os.listdir(PATH_MEDIA):
	_, ext = os.path.splitext(image) # get file extension
	if ext.lower() in [".png", ".jpg", ".jpeg"]: # only minify images!
	image_path = os.path.join(PATH_MEDIA, image)
	image_size = os.path.getsize(image_path)
	i = tinify.from_file(image_path)
	resized = i.resize(method="scale", width=MAX_MEDIA_WIDTH)
	resized.to_file(image_path)
	image_size_new = os.path.getsize(image_path)
	write_log(
	"compress_media",
	f"resized [{image}] from {image_size}bytes to {image_size_new}bytes",
	)


	@timer
	def minify_html():
	pass
	# todo https://stackoverflow.com/questions/5597094/compressminimize-html-from-python


	#compress_images() #TODO logic to not compress already compressed data. keep a log?

	"""
	STAGE: END OF THE LINE

	NOTES:
	calculate time taken to build exception.lt
	write last log entry
	print to console
	exit
	"""

	time_end = time()
	time_taken = "{:2.2f}s".format((time_end - time_start))
	write_log("ssg.py", time_taken)
	print(f"ssg.py: done in {time_taken}!")