Skip to content

Instantly share code, notes, and snippets.

@WebFreak001
Created March 1, 2025 13:21
Show Gist options
  • Save WebFreak001/51914ab83b08e2119814c6b10b9971d8 to your computer and use it in GitHub Desktop.
Save WebFreak001/51914ab83b08e2119814c6b10b9971d8 to your computer and use it in GitHub Desktop.
Python utility to edit PPTX presentations without dependencies through simple string replacements and image insertions. Basically use this to just replace placeholder variables in slides and conditionally show slides based on variables as well as adding generated images (like plots) in slides at easily editable spots. Tested with LibreOffice PPT…
# Copyright 2025, Jan Jurzitza
# SPDX-License-Identifier: CC0-1.0
# dead simple string.replace- & string.index-based pptx creator from template
# pptx files. Basically performs a simple string search & replace in the pptx
# and outputs that as a copy.
#
# Can also replace images, which have the search key in the "title" property of
# a placeholder image in the pptx, which can be replaced with an arbitrary
# replacement image on disk (by path) or an in-memory PNG image.
#
# This script is kind of equivalent to:
# ---
# unzip template.pptx /tmp/cwd
# cp inputimage.png /tmp/cwd/ppt/media/tpmedia1.png # as well as putting it into the rels.xml
# sed -i 's/VARIABLE_NAME/New value/g' /tmp/cwd/ppt/slides/*.xml
# zip -r output.pptx /tmp/cwd
# ---
from typing import Dict
from pathlib import Path
import shutil
import re
import io
import tempfile
import zipfile
import os
from xml.sax.saxutils import escape
show_regex = re.compile(r'SHOW_ONLY_IF\s*\(([^()]+)\)')
show_if_expressions = re.compile(r'([\w_.-]+)\s*(=|~|!=|!~|<|>|<=|>=)\s*(.+)')
def render_pptx(template: str, variables: Dict[str, str], images: Dict[str, str | io.BytesIO], out_name: str | io.BytesIO = "generated-presentation"):
"""
Creates a pptx from an extracted template pptx.
template can be a pptx file or the extracted folder of it (which is easier
to track in version control).
This first copies the template directory to a temporary directory, edits the
files there in-place and then compresses the directory into a new pptx.
For editing, `variables` is used to replace all occurences of its keys with
its values.
The images replaces the picture paths with the values given in the
dictionary. The paths in the dictionary values will be copied into the pptx.
If on a slide there is a text SHOW_ONLY_IF(...), that slide will be removed
if the condition does not apply. An example is SHOW_ONLY_IF(PHASE = 1)
You can use the following expressions inside the parentheses:
- `a = b` check a is equal to b
- `a ~ b` check a contains b
- `a != b` check a is not equal to b
- `a !~ b` check a does not contain b
- `a < b`, `a > b`, `a <= b`, `a >= b` are special:
- if b is a number, checks that a is a number and then compares the numeric values
- othersie performs ascii based comparison
"a" may only be a variable name and may not contain any spaces.
"b" may be any text or values including spaces, except for parentheses.
"""
pptx_workdir = tempfile.mkdtemp()
if Path(template).is_dir():
shutil.copytree(template, pptx_workdir, dirs_exist_ok=True)
else:
shutil.unpack_archive(template, pptx_workdir, format="zip")
media_idx = 0
images_rel = {}
for k, v in images.items():
media_idx += 1
if isinstance(v, str):
# abspath for copy = "{pptx}/ppt/media/tpmedia1.png"
# relative file for XML = "../media/tpmedia1.png"
path = f"media/tpmedia{media_idx}{Path(v).suffix}"
dst = Path(pptx_workdir) / "ppt" / path
shutil.copy(v, dst)
images_rel[k] = "../" + path
elif isinstance(v, io.BytesIO):
buf = v.getbuffer()
header = bytes(buf[0:8])
ext = ""
if header == b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A':
ext = ".png"
else:
raise Exception("Unidentifiable BytesIO buffer " + repr(header))
path = f"media/tpmedia{media_idx}{ext}"
dst = Path(pptx_workdir) / "ppt" / path
dst.write_bytes(buf)
images_rel[k] = "../" + path
else:
raise Exception("Unknown image type " + str(type(v)))
images = images_rel
pxml = Path(pptx_workdir, "ppt", "presentation.xml")
pxmlrel = Path(pptx_workdir, "ppt", "_rels", "presentation.xml.rels").read_text()
pxmltext = pxml.read_text()
write_pxml = False
for slide in Path(pptx_workdir, "ppt", "slides").iterdir():
if not slide.is_file():
continue
relfile = slide.parent / "_rels" / (slide.name + ".rels")
if not relfile.exists():
raise Exception("rel file " + relfile + " is missing!")
d = slide.read_text()
if "SHOW_ONLY_IF" in d:
matches = True
for m in show_regex.finditer(d):
expr = show_if_expressions.fullmatch(m[1])
if not expr:
raise Exception("malformed SHOW_ONLY_IF expression: " + m[1])
key = expr[1]
op = expr[2]
value = expr[3].strip()
var = variables[key].strip()
m = False
if op == "=":
m = var == value
elif op == "~":
m = value in var
elif op == "!=":
m = var != value
elif op == "!~":
m = value not in var
else:
if value.isnumeric() != var.isnumeric():
raise Exception(f"Attempted {op} on numeric and non-numeric variable")
if value.isnumeric():
value = int(value)
var = int(var)
if op == "<":
m = value < var
elif op == "<=":
m = value <= var
elif op == ">":
m = value > var
elif op == ">=":
m = value >= var
else:
raise Exception("Unknown SHOW_ONLY_IF operator")
if not m:
matches = False
if not matches:
target = f'Target="slides/{slide.name}"'
id_start = pxmlrel.rindex('Id="', 0, pxmlrel.index(target)) + len('Id="')
id_end = pxmlrel.index('"', id_start)
rid = pxmlrel[id_start:id_end]
remove_end = pxmltext.index(f'r:id="{rid}"/>') + len(f'r:id="{rid}"/>')
remove_start = pxmltext.rindex(f'<', 0, remove_end)
pxmltext = pxmltext[:remove_start] + pxmltext[remove_end:]
write_pxml = True
slide.unlink()
continue
write = False
relwrite = False
reld = relfile.read_text()
for k, v in sorted(variables.items(), key=lambda i: len(i[0]), reverse=True):
if write or k in d:
d = d.replace(k, escape(v))
write = True
rid = 0
for img, path in sorted(images.items(), key=lambda i: len(i[0]), reverse=True):
if img in d:
rid += 1
reld = reld.replace(
'</Relationships>',
f'<Relationship Id="tprId{rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="{escape(path)}"/></Relationships>'
)
start = d.rindex("<p:pic>", 0, d.index(img))
embedstart = d.index('r:embed="', start) + len('r:embed="')
embedend = d.index('"', embedstart)
d = d[:embedstart] + f"tprId{rid}" + d[embedend:]
relwrite = True
write = True
if write:
slide.write_text(d)
if relwrite:
relfile.write_text(reld)
if write_pxml:
pxml.write_text(pxmltext)
if isinstance(out_name, io.BytesIO):
with zipfile.ZipFile(out_name, "a", zipfile.ZIP_DEFLATED, False) as ziph:
zipdir(pptx_workdir, ziph)
return out_name
else:
pptx = out_name
shutil.make_archive(pptx, 'zip', pptx_workdir)
shutil.move(pptx + ".zip", pptx + ".pptx")
pptx += '.pptx'
shutil.rmtree(pptx_workdir)
return pptx
# https://stackoverflow.com/a/1855118
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file), path))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment