Skip to content

Instantly share code, notes, and snippets.

@simbo1905
Last active May 10, 2026 14:45
Show Gist options
  • Select an option

  • Save simbo1905/34f66e28462c02a2e64ecdf9389fbe51 to your computer and use it in GitHub Desktop.

Select an option

Save simbo1905/34f66e28462c02a2e64ecdf9389fbe51 to your computer and use it in GitHub Desktop.
Codex Chat History: SKILL + codex_chat_history.py + codex_prompt_history_search.py + line_histogram.awk (mirrors https://github.com/simbo1905/codex-chat-history)
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.13.0,<3.14"
# dependencies = []
# ///
"""Codex session rollouts under $CODEX_HOME/sessions: backup, list, profile, bounds, user-messages."""
from __future__ import annotations
import argparse
import gzip
import json
import os
import re
import shutil
import subprocess
import sys
import time
from pathlib import Path
PATH_RE = re.compile(r"(/[A-Za-z0-9._/@+~-]+)+")
# Matches codex-rollout `ARCHIVED_SESSIONS_SUBDIR` (rollout files under CODEX_HOME).
ARCHIVED_SESSIONS_SUBDIR = "archived_sessions"
def _codex_home() -> Path:
raw = os.environ.get("CODEX_HOME", "").strip()
if raw:
return Path(raw).expanduser().resolve()
return Path.home() / ".codex"
def _sessions_root() -> Path:
raw = os.environ.get("CODEX_SESSIONS_ROOT", "").strip()
if raw:
return Path(raw).expanduser().resolve()
return _codex_home() / "sessions"
def _archived_sessions_root() -> Path:
return _codex_home() / ARCHIVED_SESSIONS_SUBDIR
def _default_backup_root() -> Path:
env = os.environ.get("CODEX_SESSIONS_BACKUP_ROOT", "").strip()
if env:
return Path(env).expanduser().resolve()
return (Path.home() / "icloud" / ".codex" / "sessions").expanduser().resolve()
def _read_gzip_mtime(path: Path) -> int | None:
data = path.read_bytes()[:10]
if len(data) < 10 or data[0:2] != b"\x1f\x8b":
return None
return int.from_bytes(data[4:8], "little")
def _should_skip_jsonl(src: Path, dst_gz: Path) -> bool:
if not dst_gz.is_file():
return False
want = int(src.stat().st_mtime)
got = _read_gzip_mtime(dst_gz)
return got == want
def _iter_rollouts(root: Path) -> list[Path]:
if not root.is_dir():
return []
return sorted(root.rglob("rollout-*.jsonl"))
def _mtime_cutoff_seconds(since: str) -> float | None:
since = since.strip().lower()
if since in ("", "all", "*"):
return None
now = time.time()
if since.endswith("d"):
days = float(since[:-1] or "0")
return now - days * 86400.0
if since.endswith("h"):
hours = float(since[:-1] or "0")
return now - hours * 3600.0
raise SystemExit(f"invalid --since {since!r}; use e.g. 1d or 48h")
def _backup_walk_one_tree(
src_root: Path,
dst_root: Path,
*,
dry_run: bool,
force: bool,
) -> tuple[int, int]:
n_gz = 0
n_other = 0
for path in sorted(src_root.rglob("*")):
if not path.is_file():
continue
rel = path.relative_to(src_root)
dst = dst_root / rel
if path.suffix == ".jsonl" and path.name.startswith("rollout-"):
dst_gz = dst.with_suffix(path.suffix + ".gz")
if not force and _should_skip_jsonl(path, dst_gz):
continue
n_gz += 1
if dry_run:
print(f"gz {path} -> {dst_gz}")
continue
dst_gz.parent.mkdir(parents=True, exist_ok=True)
mtime = int(path.stat().st_mtime)
with path.open("rb") as raw:
with gzip.GzipFile(
filename=str(dst_gz),
mode="wb",
compresslevel=9,
mtime=mtime,
) as gz:
shutil.copyfileobj(raw, gz)
try:
os.utime(dst_gz, (mtime, mtime))
except OSError:
pass
else:
n_other += 1
if dry_run:
print(f"cp {path} -> {dst}")
continue
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(path, dst)
return n_gz, n_other
def cmd_backup(args: argparse.Namespace) -> None:
src_root: Path = args.src.expanduser().resolve()
dst_root: Path = args.dst.expanduser().resolve()
if not src_root.is_dir():
print(f"error: source directory missing: {src_root}", file=sys.stderr)
sys.exit(1)
n_gz, n_other = _backup_walk_one_tree(
src_root,
dst_root,
dry_run=args.dry_run,
force=args.force,
)
if args.archived:
archived = _archived_sessions_root()
if archived.is_dir():
g2, o2 = _backup_walk_one_tree(
archived,
dst_root,
dry_run=args.dry_run,
force=args.force,
)
n_gz += g2
n_other += o2
verb = "would write" if args.dry_run else "wrote"
print(f"{verb} {n_gz} rollout gzip file(s), {n_other} other file(s) -> {dst_root}")
def cmd_list(args: argparse.Namespace) -> None:
root: Path = args.src.expanduser().resolve()
cutoff = _mtime_cutoff_seconds(args.since)
paths = _iter_rollouts(root)
for p in paths:
if cutoff is not None and p.stat().st_mtime < cutoff:
continue
print(p)
if args.archived:
for p in _iter_rollouts(_archived_sessions_root()):
if cutoff is not None and p.stat().st_mtime < cutoff:
continue
print(p)
def _profile_one_root(
root: Path,
awk: Path | None,
cutoff: float | None,
) -> None:
for path in _iter_rollouts(root):
if cutoff is not None and path.stat().st_mtime < cutoff:
continue
print(f"=== {path} ===")
if awk:
subprocess.run(
["awk", "-f", str(awk), str(path)],
check=False,
)
else:
nbytes = path.stat().st_size
nlines = 0
with path.open("rb") as bf:
for _ in bf:
nlines += 1
print(f"(no --awk) size={nbytes} bytes lines={nlines}")
def cmd_profile(args: argparse.Namespace) -> None:
root: Path = args.src.expanduser().resolve()
awk: Path | None = args.awk.expanduser().resolve() if args.awk else None
if awk and not awk.is_file():
print(f"error: awk script not found: {awk}", file=sys.stderr)
sys.exit(1)
cutoff = _mtime_cutoff_seconds(args.since)
_profile_one_root(root, awk, cutoff)
if args.archived:
_profile_one_root(_archived_sessions_root(), awk, cutoff)
def _first_last_timestamps(path: Path) -> tuple[str | None, str | None]:
first_ts: str | None = None
last_ts: str | None = None
with path.open("r", encoding="utf-8", errors="replace") as fh:
for line in fh:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
ts = obj.get("timestamp")
if isinstance(ts, str):
if first_ts is None:
first_ts = ts
last_ts = ts
return first_ts, last_ts
def _bounds_emit_paths(paths: list[Path], cutoff: float | None) -> None:
for path in paths:
if cutoff is not None and path.stat().st_mtime < cutoff:
continue
first_ts, last_ts = _first_last_timestamps(path)
print(path)
print(f" FIRST: {first_ts}")
print(f" LAST: {last_ts}")
def cmd_bounds(args: argparse.Namespace) -> None:
root: Path = args.src.expanduser().resolve()
cutoff = _mtime_cutoff_seconds(args.since)
if args.glob:
seen: list[Path] = []
for g in args.glob:
seen.extend(root.glob(g))
paths = sorted(
{p.resolve() for p in seen if p.is_file() and p.name.startswith("rollout-")}
)
else:
paths = _iter_rollouts(root)
_bounds_emit_paths(paths, cutoff)
if args.archived:
archived = _archived_sessions_root()
if archived.is_dir():
if args.glob:
seen_ar: list[Path] = []
for g in args.glob:
seen_ar.extend(archived.glob(g))
paths_ar = sorted(
{
p.resolve()
for p in seen_ar
if p.is_file() and p.name.startswith("rollout-")
}
)
else:
paths_ar = _iter_rollouts(archived)
_bounds_emit_paths(paths_ar, cutoff)
def _redact(text: str) -> str:
text = PATH_RE.sub("<PATH>", text)
text = re.sub(r"/Users/[^/\s]+", "/Users/<USER>", text)
text = re.sub(r"/home/[^/\s]+", "/home/<USER>", text)
return text
def cmd_user_messages(args: argparse.Namespace) -> None:
path: Path = args.file.expanduser().resolve()
if not path.is_file():
print(f"error: not a file: {path}", file=sys.stderr)
sys.exit(1)
n = 0
with path.open("r", encoding="utf-8", errors="replace") as fh:
for line in fh:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
if obj.get("type") != "event_msg":
continue
payload = obj.get("payload")
if not isinstance(payload, dict):
continue
if payload.get("type") != "user_message":
continue
msg = payload.get("message")
if isinstance(msg, str) and msg:
n += 1
print(f"[MSG {n}]: {_redact(msg)}")
if n == 0:
print("(no user_message events found)", file=sys.stderr)
def main() -> None:
parser = argparse.ArgumentParser(
description="Codex sessions: backup (gzip mirror), list, profile, bounds, user-messages.",
)
sub = parser.add_subparsers(dest="cmd", required=True)
sessions_help = (
"Sessions dir (default: $CODEX_SESSIONS_ROOT or $CODEX_HOME/sessions)"
)
p_b = sub.add_parser(
"backup",
help="Gzip mirror of sessions tree; default dest from env (see SKILL.md)",
)
p_b.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
p_b.add_argument(
"--dst",
"--dest",
type=Path,
default=None,
dest="dst",
help="Backup root (default: $CODEX_SESSIONS_BACKUP_ROOT or ~/icloud/.codex/sessions)",
)
p_b.add_argument("-n", "--dry-run", action="store_true")
p_b.add_argument("-f", "--force", action="store_true")
p_b.add_argument(
"--archived",
action="store_true",
help=f"After --src, also mirror $CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR} into the same --dst",
)
p_b.set_defaults(func=cmd_backup)
p_l = sub.add_parser("list", help="List rollout paths under --src")
p_l.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
p_l.add_argument("--since", default="all", help="e.g. 1d, 48h, or all")
p_l.add_argument(
"--archived",
action="store_true",
help=f"After --src, also list rollouts under $CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR}",
)
p_l.set_defaults(func=cmd_list)
p_p = sub.add_parser("profile", help="Histogram each rollout (optional awk)")
p_p.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
p_p.add_argument(
"--awk",
type=Path,
default=None,
help="Path to line_histogram.awk (beside this script in the repo)",
)
p_p.add_argument("--since", default="all")
p_p.add_argument(
"--archived",
action="store_true",
help=f"After --src, also profile rollouts under $CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR}",
)
p_p.set_defaults(func=cmd_profile)
p_x = sub.add_parser("bounds", help="First and last timestamp per rollout")
p_x.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
p_x.add_argument(
"--glob",
action="append",
help="Optional glob(s) relative to --src (repeatable), default all rollouts",
)
p_x.add_argument("--since", default="all")
p_x.add_argument(
"--archived",
action="store_true",
help=(
f"After --src (and any --glob under --src), also bounds rollouts under "
f"$CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR} (same --glob relative to archived root if set)"
),
)
p_x.set_defaults(func=cmd_bounds)
p_u = sub.add_parser("user-messages", help="Extract user_message text from one rollout")
p_u.add_argument("file", type=Path)
p_u.set_defaults(func=cmd_user_messages)
args = parser.parse_args()
if args.cmd == "backup" and args.dst is None:
args.dst = _default_backup_root()
args.func(args)
if __name__ == "__main__":
main()
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.13.0,<3.14"
# dependencies = []
# ///
"""Search ~/.codex/history.jsonl (prompt history: session_id, ts, text)."""
from __future__ import annotations
import argparse
import csv
import io
import json
import os
import re
import sys
from dataclasses import dataclass
from datetime import UTC
from datetime import datetime
from difflib import SequenceMatcher
from enum import Enum
from pathlib import Path
def _codex_home() -> Path:
raw = os.environ.get("CODEX_HOME", "").strip()
if raw:
return Path(raw).expanduser().resolve()
return Path.home() / ".codex"
def _history_path() -> Path:
raw = os.environ.get("CODEX_HISTORY_PATH", "").strip()
if raw:
return Path(raw).expanduser().resolve()
return _codex_home() / "history.jsonl"
class MatchMode(str, Enum):
EXACT = "EXACT"
ANY = "ANY"
FUZZY = "FUZZY"
@dataclass(frozen=True)
class HistoryRow:
session_id: str
ts: int
text: str
def date_utc(self) -> str:
return datetime.fromtimestamp(self.ts, tz=UTC).strftime("%Y-%m-%d")
def as_csv_log4j(self) -> str:
buf = io.StringIO()
w = csv.writer(buf, lineterminator="")
w.writerow([self.date_utc(), str(self.ts), self.session_id, self.text])
return buf.getvalue()
def as_json_line(self) -> str:
return json.dumps(
{
"date": self.date_utc(),
"ts": self.ts,
"session_id": self.session_id,
"text": self.text,
},
ensure_ascii=False,
)
def _word_tokens(phrase: str) -> list[str]:
return [t for t in re.split(r"\s+", phrase.strip()) if t]
def _matches_exact(phrase: str, text: str) -> bool:
return phrase in text
def _matches_any(phrase: str, text: str) -> bool:
words = _word_tokens(phrase)
if not words:
return False
return any(w in text for w in words)
def _matches_fuzzy(phrase: str, text: str, threshold: float) -> bool:
if not phrase.strip():
return False
needle = phrase.strip().lower()
hay = text.lower()
if needle in hay:
return True
return SequenceMatcher(None, needle, hay).quick_ratio() >= threshold
def _parse_line(line: str) -> HistoryRow | None:
line = line.strip()
if not line:
return None
try:
obj = json.loads(line)
except json.JSONDecodeError:
return None
sid = obj.get("session_id")
ts = obj.get("ts")
text = obj.get("text")
if not isinstance(sid, str) or not isinstance(text, str):
return None
if isinstance(ts, bool) or ts is None:
return None
try:
ts_i = int(ts)
except (TypeError, ValueError):
return None
return HistoryRow(session_id=sid, ts=ts_i, text=text)
def cmd_search(args: argparse.Namespace) -> None:
path: Path = args.file.expanduser().resolve()
if not path.is_file():
print(f"error: history file not found: {path}", file=sys.stderr)
sys.exit(1)
mode = MatchMode(args.mode.upper())
phrase: str = args.phrase
threshold = float(args.fuzzy_threshold)
matcher = {
MatchMode.EXACT: lambda row: _matches_exact(phrase, row.text),
MatchMode.ANY: lambda row: _matches_any(phrase, row.text),
MatchMode.FUZZY: lambda row: _matches_fuzzy(phrase, row.text, threshold),
}[mode]
with path.open("r", encoding="utf-8", errors="replace") as fh:
for raw in fh:
row = _parse_line(raw)
if row is None:
continue
if not matcher(row):
continue
if args.json:
print(row.as_json_line())
else:
print(row.as_csv_log4j())
def main() -> None:
p = argparse.ArgumentParser(
description="Search Codex prompt history (history.jsonl: session_id, ts, text).",
)
p.add_argument(
"phrase",
nargs="+",
help="Search phrase (for EXACT, substring match in text field)",
)
p.add_argument(
"--file",
type=Path,
default=None,
help="Path to history.jsonl (default: $CODEX_HISTORY_PATH or $CODEX_HOME/history.jsonl)",
)
p.add_argument(
"-j",
"--json",
action="store_true",
help="Emit one JSON object per matching line",
)
p.add_argument(
"--mode",
choices=[m.value for m in MatchMode],
default=MatchMode.EXACT.value,
help="EXACT substring | ANY any whole word from phrase | FUZZY difflib quick_ratio",
)
p.add_argument(
"--fuzzy-threshold",
type=float,
default=0.65,
help="FUZZY mode: minimum SequenceMatcher.quick_ratio (default 0.65)",
)
args = p.parse_args()
args.phrase = " ".join(args.phrase)
if args.file is None:
args.file = _history_path()
cmd_search(args)
if __name__ == "__main__":
main()
#!/usr/bin/awk -f
# line_histogram.awk - Profile file line sizes or extract specific lines.
# Useful for checking large JSONL before reading it with jq or an agent.
#
# Usage:
# ./line_histogram.awk <file>
# ./line_histogram.awk -v mode=extract -v line=5 <file>
# ./line_histogram.awk -v mode=extract -v start=10 -v end=20 <file>
BEGIN {
if (mode == "") {
mode = "histogram"
}
total_bytes = 0
total_lines = 0
if (outfile == "") {
out = "/dev/stdout"
} else {
out = outfile
}
}
{
total_lines++
line_sizes[total_lines] = length($0)
lines[total_lines] = $0
total_bytes += line_sizes[total_lines]
}
END {
if (mode == "extract") {
if (line != "") {
if (line >= 1 && line <= total_lines) {
print lines[line]
} else {
print "Error: line " line " out of range (1-" total_lines ")" > "/dev/stderr"
exit 1
}
} else if (start != "" && end != "") {
if (start < 1) {
start = 1
}
if (end > total_lines) {
end = total_lines
}
if (start > end) {
print "Error: start " start " > end " end > "/dev/stderr"
exit 1
}
for (i = start; i <= end; i++) {
print lines[i]
}
} else {
print "Error: extract mode requires -v line=N or -v start=X -v end=Y" > "/dev/stderr"
exit 1
}
exit 0
}
print "File: " FILENAME > out
print "Total bytes: " total_bytes > out
print "Total lines: " total_lines > out
print "" > out
print "Bucket Distribution:" > out
print "" > out
if (total_lines == 0) {
print "Empty file" > out
exit 0
}
if (total_lines <= 10) {
num_buckets = total_lines
bucket_size = 1
} else {
num_buckets = 10
bucket_size = int(total_lines / 10)
}
for (i = 1; i <= 10; i++) {
bucket_bytes[i] = 0
}
for (line_num = 1; line_num <= total_lines; line_num++) {
if (total_lines <= 10) {
bucket = line_num
} else {
bucket = int((line_num - 1) / bucket_size) + 1
if (bucket > 10) {
bucket = 10
}
}
bucket_bytes[bucket] += line_sizes[line_num]
}
max_bytes = 0
for (i = 1; i <= num_buckets; i++) {
if (bucket_bytes[i] > max_bytes) {
max_bytes = bucket_bytes[i]
}
}
printf "%-15s | %-12s | %-40s\n", "Line Range", "Bytes", "Distribution" > out
print "-----------------+--------------+------------------------------------------" > out
for (i = 1; i <= 10; i++) {
if (total_lines <= 10) {
if (i <= total_lines) {
start_line = i
end_line = i
} else {
start_line = 0
end_line = 0
}
} else {
start_line = (i - 1) * bucket_size + 1
if (i == 10) {
end_line = total_lines
} else {
end_line = i * bucket_size
}
}
if (start_line == 0) {
range = sprintf("%7s", "-")
} else if (start_line == end_line) {
range = sprintf("%7d", start_line)
} else {
range = sprintf("%d-%d", start_line, end_line)
}
if (max_bytes > 0) {
bar_len = int((bucket_bytes[i] / max_bytes) * 40)
} else {
bar_len = 0
}
bar = ""
for (j = 1; j <= bar_len; j++) {
bar = bar "#"
}
printf "%-15s | %12d | %s\n", range, bucket_bytes[i], bar > out
}
print "-----------------+--------------+------------------------------------------" > out
}
# Copyright (c) 2026 LiveMore Capital www.livemorecapital.com
name codex-chat-history
description Backup, search, and inspect Codex CLI session rollout JSONL under $CODEX_HOME/sessions (date-partitioned tree). Use for mirroring rollouts to sync storage (e.g. iCloud), listing or bounding sessions by time, profiling large JSONL before reading, or extracting user-authored messages with jq or the bundled helper script.

Codex Chat History

What this covers

Codex stores each session as append-only rollout JSONL files. The on-disk layout is stable and predictable:

$CODEX_SESSIONS_ROOT/YYYY/MM/DD/rollout-YYYY-MM-DDThh-mm-ss-<uuid>.jsonl

Defaults (overridable; see Environment variables):

  • $CODEX_SESSIONS_ROOT — if unset, use $CODEX_HOME/sessions.
  • $CODEX_HOME — if unset, use ~/.codex.

So the usual path is ~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl, which matches the structure Codex uses when CODEX_HOME is not set.

Archived sessions (after an archive action in product) may appear under:

$CODEX_HOME/archived_sessions/

with the same rollout-*.jsonl filename pattern.

Each JSON line is a rollout record: a top-level timestamp plus a type / payload pair (for example session_meta, event_msg, response_item, compacted, turn_context). User-typed prompts are generally type == "event_msg" with payload.type == "user_message" and payload.message.

This skill is not about Cursor IDE transcripts (~/.cursor/projects/.../agent-transcripts/...); for those, use cursor-chat-history (script cursor_chat_history.py).

When to use it

  • Backup / mirror rollouts to another directory or cloud-synced folder (gzip mirror preserves YYYY/MM/DD and stores .jsonl.gz).
  • Search or recover what was said: extract user lines, then rg / filter by topic.
  • Inspect large files safely: histogram or line-slice before loading whole files into context.
  • Bound by time: first/last timestamp per file, or filesystem mtime with --since.

Environment variables

All paths support ~ expansion. Use absolute paths when clarity matters.

Variable Purpose If unset
CODEX_HOME Codex data root (config.toml, state DB, sessions/, etc.) ~/.codex
CODEX_SESSIONS_ROOT Explicit directory containing the YYYY/MM/DD rollout tree $CODEX_HOME/sessions
CODEX_SESSIONS_BACKUP_ROOT Default destination root for backup (gzip mirror) ~/icloud/.codex/sessions
CODEX_HISTORY_PATH Override path to history.jsonl for codex_prompt_history_search.py $CODEX_HOME/history.jsonl

CLI overrides: subcommands accept --src; backup also accepts --dst or --dest for the backup root, which wins over env defaults for that run.

--archived (optional, default off): For backup, list, profile, and bounds, pass --archived to also include $CODEX_HOME/archived_sessions/ after finishing the same logic for --src. Archived rollouts use the same rollout-*.jsonl layout as active sessions. Does not apply to user-messages (single file).

Bundled tools (this folder)

File Role
codex_chat_history.py PEP 723 uv run --script helper (requires-python = ">=3.13.0,<3.14", dependencies = []): backup, list, profile, bounds, user-messages; optional --archived on the first four to include $CODEX_HOME/archived_sessions/ after --src.
codex_prompt_history_search.py Search history.jsonl only: **`--mode EXACT
line_histogram.awk Optional: line-size histogram or extract specific line(s) from huge JSONL before parsing.
chmod +x codex_chat_history.py codex_prompt_history_search.py
./codex_chat_history.py --help
./codex_prompt_history_search.py --help

Example profile with histogram (from repo or gist):

./codex_chat_history.py profile --awk ./line_histogram.awk --since 1d

Example backup (default destination = $CODEX_SESSIONS_BACKUP_ROOT or ~/icloud/.codex/sessions):

./codex_chat_history.py backup --dry-run
./codex_chat_history.py backup --dest "$HOME/icloud/.codex/sessions"
./codex_chat_history.py list --archived

Search and extract (workflows)

1) Know where you are reading

Resolve the sessions root once:

echo "${CODEX_SESSIONS_ROOT:-${CODEX_HOME:-$HOME/.codex}/sessions}"

2) Profile before brute-force reading

find "${CODEX_SESSIONS_ROOT:-$HOME/.codex/sessions}" -type f -name 'rollout-*.jsonl' | sort \
  | while read -r f; do
      echo "=== $f ==="
      awk -f line_histogram.awk "$f"
    done

3) Bound time per file

./codex_chat_history.py bounds

4) Extract user-authored text

With jq:

jq -r 'select(.type == "event_msg" and .payload.type == "user_message") | .payload.message' \
  "$ROLL_FILE"

With the script (includes simple path redaction):

./codex_chat_history.py user-messages "$ROLL_FILE"

5) Redact before share

Replace host-specific paths and usernames in anything you copy out (<PATH>, <USER> placeholders).

Compaction signals

Rollouts may contain compaction-related content (compacted lines and/or event_msg variants such as context_compacted). Inspect with small slices:

grep -E 'context_compacted|"compacted"' "$ROLL_FILE" | head
jq -r 'select(.type == "compacted") | .payload.message' "$ROLL_FILE"

Related: history.jsonl

$CODEX_HOME/history.jsonl is a separate, smaller prompt history log (not the full rollout). Rollouts under sessions/ are the complete session transcript for resume/replay tooling.

Each line is JSON: {"session_id":"<uuid>","ts":<unix_seconds>,"text":"<message>"} (ts is Unix seconds; treat calendar YYYY-MM-DD in the first output column as UTC derived from that instant).

Search prompt history (codex_prompt_history_search.py)

chmod +x codex_prompt_history_search.py
./codex_prompt_history_search.py "your phrase"
  • --mode EXACT (default): substring match of the full phrase in text.
  • --mode ANY: phrase split on whitespace; match if any token appears as a substring in text.
  • --mode FUZZY: difflib.SequenceMatcher quick_ratio against full text (plus substring shortcut); tune with --fuzzy-threshold (default 0.65).
  • -j / --json: one JSON object per match (date, ts, session_id, text).
  • --file: override path (default $CODEX_HISTORY_PATH or $CODEX_HOME/history.jsonl).

Default (non-JSON) lines are CSV (four columns: UTC date, ts, session_id, text) suitable for log-style grepping.

Retention

Codex does not auto-expire rollout files by age; backups and pruning are operator concerns. See Codex release notes / issue trackers for current behavior of history.jsonl trimming vs rollouts.

Canonical source and releases

For smoke tests, optional git tag, and gist sync commands, see the Release checklist in the repo root README.md.

Copyright

Skill text and tooling © 2026 LiveMore Capital https://www.livemorecapital.com (where not otherwise noted).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment