Created
March 1, 2025 13:21
-
-
Save WebFreak001/51914ab83b08e2119814c6b10b9971d8 to your computer and use it in GitHub Desktop.
Python utility to edit PPTX presentations without dependencies through simple string replacements and image insertions. Basically use this to just replace placeholder variables in slides and conditionally show slides based on variables as well as adding generated images (like plots) in slides at easily editable spots. Tested with LibreOffice PPT…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2025, Jan Jurzitza | |
# SPDX-License-Identifier: CC0-1.0 | |
# dead simple string.replace- & string.index-based pptx creator from template | |
# pptx files. Basically performs a simple string search & replace in the pptx | |
# and outputs that as a copy. | |
# | |
# Can also replace images, which have the search key in the "title" property of | |
# a placeholder image in the pptx, which can be replaced with an arbitrary | |
# replacement image on disk (by path) or an in-memory PNG image. | |
# | |
# This script is kind of equivalent to: | |
# --- | |
# unzip template.pptx /tmp/cwd | |
# cp inputimage.png /tmp/cwd/ppt/media/tpmedia1.png # as well as putting it into the rels.xml | |
# sed -i 's/VARIABLE_NAME/New value/g' /tmp/cwd/ppt/slides/*.xml | |
# zip -r output.pptx /tmp/cwd | |
# --- | |
from typing import Dict | |
from pathlib import Path | |
import shutil | |
import re | |
import io | |
import tempfile | |
import zipfile | |
import os | |
from xml.sax.saxutils import escape | |
show_regex = re.compile(r'SHOW_ONLY_IF\s*\(([^()]+)\)') | |
show_if_expressions = re.compile(r'([\w_.-]+)\s*(=|~|!=|!~|<|>|<=|>=)\s*(.+)') | |
def render_pptx(template: str, variables: Dict[str, str], images: Dict[str, str | io.BytesIO], out_name: str | io.BytesIO = "generated-presentation"): | |
""" | |
Creates a pptx from an extracted template pptx. | |
template can be a pptx file or the extracted folder of it (which is easier | |
to track in version control). | |
This first copies the template directory to a temporary directory, edits the | |
files there in-place and then compresses the directory into a new pptx. | |
For editing, `variables` is used to replace all occurences of its keys with | |
its values. | |
The images replaces the picture paths with the values given in the | |
dictionary. The paths in the dictionary values will be copied into the pptx. | |
If on a slide there is a text SHOW_ONLY_IF(...), that slide will be removed | |
if the condition does not apply. An example is SHOW_ONLY_IF(PHASE = 1) | |
You can use the following expressions inside the parentheses: | |
- `a = b` check a is equal to b | |
- `a ~ b` check a contains b | |
- `a != b` check a is not equal to b | |
- `a !~ b` check a does not contain b | |
- `a < b`, `a > b`, `a <= b`, `a >= b` are special: | |
- if b is a number, checks that a is a number and then compares the numeric values | |
- othersie performs ascii based comparison | |
"a" may only be a variable name and may not contain any spaces. | |
"b" may be any text or values including spaces, except for parentheses. | |
""" | |
pptx_workdir = tempfile.mkdtemp() | |
if Path(template).is_dir(): | |
shutil.copytree(template, pptx_workdir, dirs_exist_ok=True) | |
else: | |
shutil.unpack_archive(template, pptx_workdir, format="zip") | |
media_idx = 0 | |
images_rel = {} | |
for k, v in images.items(): | |
media_idx += 1 | |
if isinstance(v, str): | |
# abspath for copy = "{pptx}/ppt/media/tpmedia1.png" | |
# relative file for XML = "../media/tpmedia1.png" | |
path = f"media/tpmedia{media_idx}{Path(v).suffix}" | |
dst = Path(pptx_workdir) / "ppt" / path | |
shutil.copy(v, dst) | |
images_rel[k] = "../" + path | |
elif isinstance(v, io.BytesIO): | |
buf = v.getbuffer() | |
header = bytes(buf[0:8]) | |
ext = "" | |
if header == b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A': | |
ext = ".png" | |
else: | |
raise Exception("Unidentifiable BytesIO buffer " + repr(header)) | |
path = f"media/tpmedia{media_idx}{ext}" | |
dst = Path(pptx_workdir) / "ppt" / path | |
dst.write_bytes(buf) | |
images_rel[k] = "../" + path | |
else: | |
raise Exception("Unknown image type " + str(type(v))) | |
images = images_rel | |
pxml = Path(pptx_workdir, "ppt", "presentation.xml") | |
pxmlrel = Path(pptx_workdir, "ppt", "_rels", "presentation.xml.rels").read_text() | |
pxmltext = pxml.read_text() | |
write_pxml = False | |
for slide in Path(pptx_workdir, "ppt", "slides").iterdir(): | |
if not slide.is_file(): | |
continue | |
relfile = slide.parent / "_rels" / (slide.name + ".rels") | |
if not relfile.exists(): | |
raise Exception("rel file " + relfile + " is missing!") | |
d = slide.read_text() | |
if "SHOW_ONLY_IF" in d: | |
matches = True | |
for m in show_regex.finditer(d): | |
expr = show_if_expressions.fullmatch(m[1]) | |
if not expr: | |
raise Exception("malformed SHOW_ONLY_IF expression: " + m[1]) | |
key = expr[1] | |
op = expr[2] | |
value = expr[3].strip() | |
var = variables[key].strip() | |
m = False | |
if op == "=": | |
m = var == value | |
elif op == "~": | |
m = value in var | |
elif op == "!=": | |
m = var != value | |
elif op == "!~": | |
m = value not in var | |
else: | |
if value.isnumeric() != var.isnumeric(): | |
raise Exception(f"Attempted {op} on numeric and non-numeric variable") | |
if value.isnumeric(): | |
value = int(value) | |
var = int(var) | |
if op == "<": | |
m = value < var | |
elif op == "<=": | |
m = value <= var | |
elif op == ">": | |
m = value > var | |
elif op == ">=": | |
m = value >= var | |
else: | |
raise Exception("Unknown SHOW_ONLY_IF operator") | |
if not m: | |
matches = False | |
if not matches: | |
target = f'Target="slides/{slide.name}"' | |
id_start = pxmlrel.rindex('Id="', 0, pxmlrel.index(target)) + len('Id="') | |
id_end = pxmlrel.index('"', id_start) | |
rid = pxmlrel[id_start:id_end] | |
remove_end = pxmltext.index(f'r:id="{rid}"/>') + len(f'r:id="{rid}"/>') | |
remove_start = pxmltext.rindex(f'<', 0, remove_end) | |
pxmltext = pxmltext[:remove_start] + pxmltext[remove_end:] | |
write_pxml = True | |
slide.unlink() | |
continue | |
write = False | |
relwrite = False | |
reld = relfile.read_text() | |
for k, v in sorted(variables.items(), key=lambda i: len(i[0]), reverse=True): | |
if write or k in d: | |
d = d.replace(k, escape(v)) | |
write = True | |
rid = 0 | |
for img, path in sorted(images.items(), key=lambda i: len(i[0]), reverse=True): | |
if img in d: | |
rid += 1 | |
reld = reld.replace( | |
'</Relationships>', | |
f'<Relationship Id="tprId{rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="{escape(path)}"/></Relationships>' | |
) | |
start = d.rindex("<p:pic>", 0, d.index(img)) | |
embedstart = d.index('r:embed="', start) + len('r:embed="') | |
embedend = d.index('"', embedstart) | |
d = d[:embedstart] + f"tprId{rid}" + d[embedend:] | |
relwrite = True | |
write = True | |
if write: | |
slide.write_text(d) | |
if relwrite: | |
relfile.write_text(reld) | |
if write_pxml: | |
pxml.write_text(pxmltext) | |
if isinstance(out_name, io.BytesIO): | |
with zipfile.ZipFile(out_name, "a", zipfile.ZIP_DEFLATED, False) as ziph: | |
zipdir(pptx_workdir, ziph) | |
return out_name | |
else: | |
pptx = out_name | |
shutil.make_archive(pptx, 'zip', pptx_workdir) | |
shutil.move(pptx + ".zip", pptx + ".pptx") | |
pptx += '.pptx' | |
shutil.rmtree(pptx_workdir) | |
return pptx | |
# https://stackoverflow.com/a/1855118 | |
def zipdir(path, ziph): | |
# ziph is zipfile handle | |
for root, dirs, files in os.walk(path): | |
for file in files: | |
ziph.write(os.path.join(root, file), | |
os.path.relpath(os.path.join(root, file), path)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment