Skip to content

Instantly share code, notes, and snippets.

@D4stiny
Created June 8, 2025 05:24
Show Gist options
  • Save D4stiny/328afbad165adb760472d3fbacd2e9fc to your computer and use it in GitHub Desktop.
Save D4stiny/328afbad165adb760472d3fbacd2e9fc to your computer and use it in GitHub Desktop.
geekbench plar archive extraction tool (ai generated based on decompiled code)
#!/usr/bin/env python3
"""
plar_extract.py – Extract files from Geekbench *.plar* archives (Geekbench 6+)
==========================================================================
** Based on this 4 year old script: https://gist.github.com/HarukaMa/0772fc47311f6bbcb79ce3f84a7134d7
Usage
-----
python plar_extract.py <archive.plar> [output_dir]
If *output_dir* is omitted it will be created next to the archive with the
same basename (``geekbench`` for ``geekbench.plar``).
The format was updated after Geekbench 5. Each "extent" is now a fixed
16‑byte record (flags, idx, offset, length) followed by a second‑level table
for directory contents. This script walks the tree starting at extent 0 and
writes every file it finds.
Only one compression scheme has been observed historically – **bzip2** –
and Geekbench 6 no longer uses it. We keep a tiny helper so that older
archives still unpack.
"""
import os
import struct
import sys
import bz2
MAGIC = b"PRLA"
EXTENT_SZ = 16 # bytes
DIR_REC_SZ = 64 # bytes
def _u32(buf: bytes, off: int) -> int:
"""Little‑endian 32‑bit unsigned int."""
return struct.unpack_from("<I", buf, off)[0]
class Extent:
__slots__ = ("flags", "idx", "offset", "length")
def __init__(self, flags: int, idx: int, offset: int, length: int):
self.flags = flags
self.idx = idx
self.offset = offset
self.length = length
@property
def is_dir(self) -> bool:
return self.flags == 0
@property
def is_file(self) -> bool:
return self.flags == 1
def _parse_extents(blob: bytes, table_off: int, table_len: int):
"""Return a dict {idx → Extent}."""
ext = {}
for off in range(0, table_len, EXTENT_SZ):
flags, idx, begin, length = struct.unpack_from("<IIII", blob, table_off + off)
ext[idx] = Extent(flags, idx, begin, length)
return ext
def _walk_dirs(blob: bytes, extents: dict, idx: int, parts: list):
"""Yield (path_components, Extent) for every file in the subtree."""
ext = extents[idx]
assert ext.is_dir, f"Extent {idx} is not a directory"
for off in range(0, ext.length, DIR_REC_SZ):
rec = blob[ext.offset + off : ext.offset + off + DIR_REC_SZ]
child_idx = _u32(rec, 0)
name = rec[4:].split(b"\x00", 1)[0].decode("utf‑8", "replace")
if name in ("", ".", ".."):
continue
child_ext = extents[child_idx]
new_parts = parts + [name]
if child_ext.is_dir:
yield from _walk_dirs(blob, extents, child_idx, new_parts)
else:
yield new_parts, child_ext
def _maybe_decompress(buf: bytes) -> bytes:
"""Auto‑detect and decompress historical bzip2 payloads, else return as‑is."""
if buf.startswith(b"BZh"):
try:
return bz2.decompress(buf)
except OSError:
# Not actually bzip2 – fall through
pass
return buf
def extract_plar(blob: bytes, out_dir: str = "."):
if blob[:4] != MAGIC:
raise ValueError("Not a PLAR archive – wrong magic bytes")
table_off = _u32(blob, 4)
table_len = _u32(blob, 8)
extents = _parse_extents(blob, table_off, table_len)
for path_parts, ext in _walk_dirs(blob, extents, 0, []):
payload = blob[ext.offset : ext.offset + ext.length]
payload = _maybe_decompress(payload)
target = os.path.join(out_dir, *path_parts)
os.makedirs(os.path.dirname(target), exist_ok=True)
with open(target, "wb") as fh:
fh.write(payload)
# ---------------------------------------------------------------------------
# CLI helper
# ---------------------------------------------------------------------------
def main(argv: list[str]) -> None:
if len(argv) < 2:
print(f"Usage: {argv[0]} <archive.plar> [output_dir]", file=sys.stderr)
raise SystemExit(1)
archive_path = argv[1]
out_dir = argv[2] if len(argv) > 2 else os.path.splitext(os.path.basename(archive_path))[0]
with open(archive_path, "rb") as f:
blob = f.read()
extract_plar(blob, out_dir)
print(f"✔ Extraction complete – files written to '{out_dir}/'")
if __name__ == "__main__":
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment