WebFreak001 · March 1, 2025 13:21
diff --git a/pptx_edit.py b/pptx_edit.py
 # Copyright 2025, Jan Jurzitza
 # SPDX-License-Identifier: CC0-1.0

 # dead simple string.replace- & string.index-based pptx creator from template
 # pptx files. Basically performs a simple string search & replace in the pptx
 # and outputs that as a copy.
 #
 # Can also replace images, which have the search key in the "title" property of
 # a placeholder image in the pptx, which can be replaced with an arbitrary
 # replacement image on disk (by path) or an in-memory PNG image.
 #
 # This script is kind of equivalent to:
 # ---
 # unzip template.pptx /tmp/cwd
 # cp inputimage.png /tmp/cwd/ppt/media/tpmedia1.png # as well as putting it into the rels.xml
 # sed -i 's/VARIABLE_NAME/New value/g' /tmp/cwd/ppt/slides/*.xml
 # zip -r output.pptx /tmp/cwd
 # ---

 from typing import Dict
 from pathlib import Path
 import shutil
 import re
 import io
 import tempfile
 import zipfile
 import os
 from xml.sax.saxutils import escape

 show_regex = re.compile(r'SHOW_ONLY_IF\s*\(([^()]+)\)')
 show_if_expressions = re.compile(r'([\w_.-]+)\s*(=|~|!=|!~|<|>|<=|>=)\s*(.+)')

 def render_pptx(template: str, variables: Dict[str, str], images: Dict[str, str | io.BytesIO], out_name: str | io.BytesIO = "generated-presentation"):
 	"""
 	Creates a pptx from an extracted template pptx.

 	template can be a pptx file or the extracted folder of it (which is easier
 	to track in version control).

 	This first copies the template directory to a temporary directory, edits the
 	files there in-place and then compresses the directory into a new pptx.

 	For editing, `variables` is used to replace all occurences of its keys with
 	its values.

 	The images replaces the picture paths with the values given in the
 	dictionary. The paths in the dictionary values will be copied into the pptx.

 	If on a slide there is a text SHOW_ONLY_IF(...), that slide will be removed
 	if the condition does not apply. An example is SHOW_ONLY_IF(PHASE = 1)

 	You can use the following expressions inside the parentheses:
 	- `a = b` check a is equal to b
 	- `a ~ b` check a contains b
 	- `a != b` check a is not equal to b
 	- `a !~ b` check a does not contain b
 	- `a < b`, `a > b`, `a <= b`, `a >= b` are special:
 		- if b is a number, checks that a is a number and then compares the numeric values
 		- othersie performs ascii based comparison

 	"a" may only be a variable name and may not contain any spaces.

 	"b" may be any text or values including spaces, except for parentheses.
 	"""

 	pptx_workdir = tempfile.mkdtemp()
 	if Path(template).is_dir():
 		shutil.copytree(template, pptx_workdir, dirs_exist_ok=True)
 	else:
 		shutil.unpack_archive(template, pptx_workdir, format="zip")

 	media_idx = 0
 	images_rel = {}
 	for k, v in images.items():
 		media_idx += 1
 		if isinstance(v, str):
 			# abspath for copy      = "{pptx}/ppt/media/tpmedia1.png"
 			# relative file for XML = "../media/tpmedia1.png"
 			path = f"media/tpmedia{media_idx}{Path(v).suffix}"
 			dst = Path(pptx_workdir) / "ppt" / path
 			shutil.copy(v, dst)
 			images_rel[k] = "../" + path
 		elif isinstance(v, io.BytesIO):
 			buf = v.getbuffer()
 			header = bytes(buf[0:8])
 			ext = ""
 			if header == b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A':
 				ext = ".png"
 			else:
 				raise Exception("Unidentifiable BytesIO buffer " + repr(header))
 			path = f"media/tpmedia{media_idx}{ext}"
 			dst = Path(pptx_workdir) / "ppt" / path
 			dst.write_bytes(buf)
 			images_rel[k] = "../" + path
 		else:
 			raise Exception("Unknown image type " + str(type(v)))
 	images = images_rel

 	pxml = Path(pptx_workdir, "ppt", "presentation.xml")
 	pxmlrel = Path(pptx_workdir, "ppt", "_rels", "presentation.xml.rels").read_text()
 	pxmltext = pxml.read_text()
 	write_pxml = False

 	for slide in Path(pptx_workdir, "ppt", "slides").iterdir():
 		if not slide.is_file():
 			continue
 		relfile = slide.parent / "_rels" / (slide.name + ".rels")
 		if not relfile.exists():
 			raise Exception("rel file " + relfile + " is missing!")
 		d = slide.read_text()
 		if "SHOW_ONLY_IF" in d:
 			matches = True
 			for m in show_regex.finditer(d):
 				expr = show_if_expressions.fullmatch(m[1])
 				if not expr:
 					raise Exception("malformed SHOW_ONLY_IF expression: " + m[1])
 				key = expr[1]
 				op = expr[2]
 				value = expr[3].strip()

 				var = variables[key].strip()
 				m = False
 				if op == "=":
 					m = var == value
 				elif op == "~":
 					m = value in var
 				elif op == "!=":
 					m = var != value
 				elif op == "!~":
 					m = value not in var
 				else:
 					if value.isnumeric() != var.isnumeric():
 						raise Exception(f"Attempted {op} on numeric and non-numeric variable")
 					if value.isnumeric():
 						value = int(value)
 						var = int(var)

 					if op == "<":
 						m = value < var
 					elif op == "<=":
 						m = value <= var
 					elif op == ">":
 						m = value > var
 					elif op == ">=":
 						m = value >= var
 					else:
 						raise Exception("Unknown SHOW_ONLY_IF operator")

 				if not m:
 					matches = False
 			if not matches:
 				target = f'Target="slides/{slide.name}"'
 				id_start = pxmlrel.rindex('Id="', 0, pxmlrel.index(target)) + len('Id="')
 				id_end = pxmlrel.index('"', id_start)
 				rid = pxmlrel[id_start:id_end]

 				remove_end = pxmltext.index(f'r:id="{rid}"/>') + len(f'r:id="{rid}"/>')
 				remove_start = pxmltext.rindex(f'<', 0, remove_end)
 				pxmltext = pxmltext[:remove_start] + pxmltext[remove_end:]
 				write_pxml = True
 				slide.unlink()
 				continue

 		write = False
 		relwrite = False
 		reld = relfile.read_text()
 		for k, v in sorted(variables.items(), key=lambda i: len(i[0]), reverse=True):
 			if write or k in d:
 				d = d.replace(k, escape(v))
 				write = True
 		rid = 0
 		for img, path in sorted(images.items(), key=lambda i: len(i[0]), reverse=True):
 			if img in d:
 				rid += 1
 				reld = reld.replace(
 					'</Relationships>',
 					f'<Relationship Id="tprId{rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="{escape(path)}"/></Relationships>'
 				)
 				start = d.rindex("<p:pic>", 0, d.index(img))
 				embedstart = d.index('r:embed="', start) + len('r:embed="')
 				embedend = d.index('"', embedstart)
 				d = d[:embedstart] + f"tprId{rid}" + d[embedend:]
 				relwrite = True
 				write = True

 		if write:
 			slide.write_text(d)
 		if relwrite:
 			relfile.write_text(reld)

 	if write_pxml:
 		pxml.write_text(pxmltext)

 	if isinstance(out_name, io.BytesIO):
 		with zipfile.ZipFile(out_name, "a", zipfile.ZIP_DEFLATED, False) as ziph:
 			zipdir(pptx_workdir, ziph)
 		return out_name
 	else:
 		pptx = out_name
 		shutil.make_archive(pptx, 'zip', pptx_workdir)
 		shutil.move(pptx + ".zip", pptx + ".pptx")
 		pptx += '.pptx'
 		shutil.rmtree(pptx_workdir)
 		return pptx

 # https://stackoverflow.com/a/1855118
 def zipdir(path, ziph):
 	# ziph is zipfile handle
 	for root, dirs, files in os.walk(path):
 		for file in files:
 			ziph.write(os.path.join(root, file),
 				os.path.relpath(os.path.join(root, file), path))
	# Copyright 2025, Jan Jurzitza
	# SPDX-License-Identifier: CC0-1.0

	# dead simple string.replace- & string.index-based pptx creator from template
	# pptx files. Basically performs a simple string search & replace in the pptx
	# and outputs that as a copy.
	#
	# Can also replace images, which have the search key in the "title" property of
	# a placeholder image in the pptx, which can be replaced with an arbitrary
	# replacement image on disk (by path) or an in-memory PNG image.
	#
	# This script is kind of equivalent to:
	# ---
	# unzip template.pptx /tmp/cwd
	# cp inputimage.png /tmp/cwd/ppt/media/tpmedia1.png # as well as putting it into the rels.xml
	# sed -i 's/VARIABLE_NAME/New value/g' /tmp/cwd/ppt/slides/*.xml
	# zip -r output.pptx /tmp/cwd
	# ---

	from typing import Dict
	from pathlib import Path
	import shutil
	import re
	import io
	import tempfile
	import zipfile
	import os
	from xml.sax.saxutils import escape

	show_regex = re.compile(r'SHOW_ONLY_IF\s*\(([^()]+)\)')
	show_if_expressions = re.compile(r'([\w_.-]+)\s(=\|~\|!=\|!~\|<\|>\|<=\|>=)\s(.+)')

	def render_pptx(template: str, variables: Dict[str, str], images: Dict[str, str \| io.BytesIO], out_name: str \| io.BytesIO = "generated-presentation"):
	"""
	Creates a pptx from an extracted template pptx.

	template can be a pptx file or the extracted folder of it (which is easier
	to track in version control).

	This first copies the template directory to a temporary directory, edits the
	files there in-place and then compresses the directory into a new pptx.

	For editing, `variables` is used to replace all occurences of its keys with
	its values.

	The images replaces the picture paths with the values given in the
	dictionary. The paths in the dictionary values will be copied into the pptx.

	If on a slide there is a text SHOW_ONLY_IF(...), that slide will be removed
	if the condition does not apply. An example is SHOW_ONLY_IF(PHASE = 1)

	You can use the following expressions inside the parentheses:
	- `a = b` check a is equal to b
	- `a ~ b` check a contains b
	- `a != b` check a is not equal to b
	- `a !~ b` check a does not contain b
	- `a < b`, `a > b`, `a <= b`, `a >= b` are special:
	- if b is a number, checks that a is a number and then compares the numeric values
	- othersie performs ascii based comparison

	"a" may only be a variable name and may not contain any spaces.

	"b" may be any text or values including spaces, except for parentheses.
	"""

	pptx_workdir = tempfile.mkdtemp()
	if Path(template).is_dir():
	shutil.copytree(template, pptx_workdir, dirs_exist_ok=True)
	else:
	shutil.unpack_archive(template, pptx_workdir, format="zip")

	media_idx = 0
	images_rel = {}
	for k, v in images.items():
	media_idx += 1
	if isinstance(v, str):
	# abspath for copy = "{pptx}/ppt/media/tpmedia1.png"
	# relative file for XML = "../media/tpmedia1.png"
	path = f"media/tpmedia{media_idx}{Path(v).suffix}"
	dst = Path(pptx_workdir) / "ppt" / path
	shutil.copy(v, dst)
	images_rel[k] = "../" + path
	elif isinstance(v, io.BytesIO):
	buf = v.getbuffer()
	header = bytes(buf[0:8])
	ext = ""
	if header == b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A':
	ext = ".png"
	else:
	raise Exception("Unidentifiable BytesIO buffer " + repr(header))
	path = f"media/tpmedia{media_idx}{ext}"
	dst = Path(pptx_workdir) / "ppt" / path
	dst.write_bytes(buf)
	images_rel[k] = "../" + path
	else:
	raise Exception("Unknown image type " + str(type(v)))
	images = images_rel

	pxml = Path(pptx_workdir, "ppt", "presentation.xml")
	pxmlrel = Path(pptx_workdir, "ppt", "_rels", "presentation.xml.rels").read_text()
	pxmltext = pxml.read_text()
	write_pxml = False

	for slide in Path(pptx_workdir, "ppt", "slides").iterdir():
	if not slide.is_file():
	continue
	relfile = slide.parent / "_rels" / (slide.name + ".rels")
	if not relfile.exists():
	raise Exception("rel file " + relfile + " is missing!")
	d = slide.read_text()
	if "SHOW_ONLY_IF" in d:
	matches = True
	for m in show_regex.finditer(d):
	expr = show_if_expressions.fullmatch(m[1])
	if not expr:
	raise Exception("malformed SHOW_ONLY_IF expression: " + m[1])
	key = expr[1]
	op = expr[2]
	value = expr[3].strip()

	var = variables[key].strip()
	m = False
	if op == "=":
	m = var == value
	elif op == "~":
	m = value in var
	elif op == "!=":
	m = var != value
	elif op == "!~":
	m = value not in var
	else:
	if value.isnumeric() != var.isnumeric():
	raise Exception(f"Attempted {op} on numeric and non-numeric variable")
	if value.isnumeric():
	value = int(value)
	var = int(var)

	if op == "<":
	m = value < var
	elif op == "<=":
	m = value <= var
	elif op == ">":
	m = value > var
	elif op == ">=":
	m = value >= var
	else:
	raise Exception("Unknown SHOW_ONLY_IF operator")

	if not m:
	matches = False
	if not matches:
	target = f'Target="slides/{slide.name}"'
	id_start = pxmlrel.rindex('Id="', 0, pxmlrel.index(target)) + len('Id="')
	id_end = pxmlrel.index('"', id_start)
	rid = pxmlrel[id_start:id_end]

	remove_end = pxmltext.index(f'r:id="{rid}"/>') + len(f'r:id="{rid}"/>')
	remove_start = pxmltext.rindex(f'<', 0, remove_end)
	pxmltext = pxmltext[:remove_start] + pxmltext[remove_end:]
	write_pxml = True
	slide.unlink()
	continue

	write = False
	relwrite = False
	reld = relfile.read_text()
	for k, v in sorted(variables.items(), key=lambda i: len(i[0]), reverse=True):
	if write or k in d:
	d = d.replace(k, escape(v))
	write = True
	rid = 0
	for img, path in sorted(images.items(), key=lambda i: len(i[0]), reverse=True):
	if img in d:
	rid += 1
	reld = reld.replace(
	'</Relationships>',
	f'<Relationship Id="tprId{rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="{escape(path)}"/></Relationships>'
	)
	start = d.rindex("<p:pic>", 0, d.index(img))
	embedstart = d.index('r:embed="', start) + len('r:embed="')
	embedend = d.index('"', embedstart)
	d = d[:embedstart] + f"tprId{rid}" + d[embedend:]
	relwrite = True
	write = True

	if write:
	slide.write_text(d)
	if relwrite:
	relfile.write_text(reld)

	if write_pxml:
	pxml.write_text(pxmltext)

	if isinstance(out_name, io.BytesIO):
	with zipfile.ZipFile(out_name, "a", zipfile.ZIP_DEFLATED, False) as ziph:
	zipdir(pptx_workdir, ziph)
	return out_name
	else:
	pptx = out_name
	shutil.make_archive(pptx, 'zip', pptx_workdir)
	shutil.move(pptx + ".zip", pptx + ".pptx")
	pptx += '.pptx'
	shutil.rmtree(pptx_workdir)
	return pptx

	# https://stackoverflow.com/a/1855118
	def zipdir(path, ziph):
	# ziph is zipfile handle
	for root, dirs, files in os.walk(path):
	for file in files:
	ziph.write(os.path.join(root, file),
	os.path.relpath(os.path.join(root, file), path))