simbo1905 · May 10, 2026 14:45
diff --git a/codex_chat_history.py b/codex_chat_history.py
 #!/usr/bin/env -S uv run --script
 # /// script
 # requires-python = ">=3.13.0,<3.14"
 # dependencies = []
 # ///
 """Codex session rollouts under $CODEX_HOME/sessions: backup, list, profile, bounds, user-messages."""

 from __future__ import annotations

 import argparse
 import gzip
 import json
 import os
 import re
 import shutil
 import subprocess
 import sys
 import time
 from pathlib import Path

 PATH_RE = re.compile(r"(/[A-Za-z0-9._/@+~-]+)+")

 # Matches codex-rollout `ARCHIVED_SESSIONS_SUBDIR` (rollout files under CODEX_HOME).
 ARCHIVED_SESSIONS_SUBDIR = "archived_sessions"


 def _codex_home() -> Path:
    raw = os.environ.get("CODEX_HOME", "").strip()
    if raw:
        return Path(raw).expanduser().resolve()
    return Path.home() / ".codex"


 def _sessions_root() -> Path:
    raw = os.environ.get("CODEX_SESSIONS_ROOT", "").strip()
    if raw:
        return Path(raw).expanduser().resolve()
    return _codex_home() / "sessions"


 def _archived_sessions_root() -> Path:
    return _codex_home() / ARCHIVED_SESSIONS_SUBDIR


 def _default_backup_root() -> Path:
    env = os.environ.get("CODEX_SESSIONS_BACKUP_ROOT", "").strip()
    if env:
        return Path(env).expanduser().resolve()
    return (Path.home() / "icloud" / ".codex" / "sessions").expanduser().resolve()


 def _read_gzip_mtime(path: Path) -> int | None:
    data = path.read_bytes()[:10]
    if len(data) < 10 or data[0:2] != b"\x1f\x8b":
        return None
    return int.from_bytes(data[4:8], "little")


 def _should_skip_jsonl(src: Path, dst_gz: Path) -> bool:
    if not dst_gz.is_file():
        return False
    want = int(src.stat().st_mtime)
    got = _read_gzip_mtime(dst_gz)
    return got == want


 def _iter_rollouts(root: Path) -> list[Path]:
    if not root.is_dir():
        return []
    return sorted(root.rglob("rollout-*.jsonl"))


 def _mtime_cutoff_seconds(since: str) -> float | None:
    since = since.strip().lower()
    if since in ("", "all", "*"):
        return None
    now = time.time()
    if since.endswith("d"):
        days = float(since[:-1] or "0")
        return now - days * 86400.0
    if since.endswith("h"):
        hours = float(since[:-1] or "0")
        return now - hours * 3600.0
    raise SystemExit(f"invalid --since {since!r}; use e.g. 1d or 48h")


 def _backup_walk_one_tree(
    src_root: Path,
    dst_root: Path,
    *,
    dry_run: bool,
    force: bool,
 ) -> tuple[int, int]:
    n_gz = 0
    n_other = 0
    for path in sorted(src_root.rglob("*")):
        if not path.is_file():
            continue
        rel = path.relative_to(src_root)
        dst = dst_root / rel
        if path.suffix == ".jsonl" and path.name.startswith("rollout-"):
            dst_gz = dst.with_suffix(path.suffix + ".gz")
            if not force and _should_skip_jsonl(path, dst_gz):
                continue
            n_gz += 1
            if dry_run:
                print(f"gz  {path} -> {dst_gz}")
                continue
            dst_gz.parent.mkdir(parents=True, exist_ok=True)
            mtime = int(path.stat().st_mtime)
            with path.open("rb") as raw:
                with gzip.GzipFile(
                    filename=str(dst_gz),
                    mode="wb",
                    compresslevel=9,
                    mtime=mtime,
                ) as gz:
                    shutil.copyfileobj(raw, gz)
            try:
                os.utime(dst_gz, (mtime, mtime))
            except OSError:
                pass
        else:
            n_other += 1
            if dry_run:
                print(f"cp  {path} -> {dst}")
                continue
            dst.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy2(path, dst)
    return n_gz, n_other


 def cmd_backup(args: argparse.Namespace) -> None:
    src_root: Path = args.src.expanduser().resolve()
    dst_root: Path = args.dst.expanduser().resolve()
    if not src_root.is_dir():
        print(f"error: source directory missing: {src_root}", file=sys.stderr)
        sys.exit(1)

    n_gz, n_other = _backup_walk_one_tree(
        src_root,
        dst_root,
        dry_run=args.dry_run,
        force=args.force,
    )
    if args.archived:
        archived = _archived_sessions_root()
        if archived.is_dir():
            g2, o2 = _backup_walk_one_tree(
                archived,
                dst_root,
                dry_run=args.dry_run,
                force=args.force,
            )
            n_gz += g2
            n_other += o2

    verb = "would write" if args.dry_run else "wrote"
    print(f"{verb} {n_gz} rollout gzip file(s), {n_other} other file(s) -> {dst_root}")


 def cmd_list(args: argparse.Namespace) -> None:
    root: Path = args.src.expanduser().resolve()
    cutoff = _mtime_cutoff_seconds(args.since)
    paths = _iter_rollouts(root)
    for p in paths:
        if cutoff is not None and p.stat().st_mtime < cutoff:
            continue
        print(p)
    if args.archived:
        for p in _iter_rollouts(_archived_sessions_root()):
            if cutoff is not None and p.stat().st_mtime < cutoff:
                continue
            print(p)


 def _profile_one_root(
    root: Path,
    awk: Path | None,
    cutoff: float | None,
 ) -> None:
    for path in _iter_rollouts(root):
        if cutoff is not None and path.stat().st_mtime < cutoff:
            continue
        print(f"=== {path} ===")
        if awk:
            subprocess.run(
                ["awk", "-f", str(awk), str(path)],
                check=False,
            )
        else:
            nbytes = path.stat().st_size
            nlines = 0
            with path.open("rb") as bf:
                for _ in bf:
                    nlines += 1
            print(f"(no --awk) size={nbytes} bytes lines={nlines}")


 def cmd_profile(args: argparse.Namespace) -> None:
    root: Path = args.src.expanduser().resolve()
    awk: Path | None = args.awk.expanduser().resolve() if args.awk else None
    if awk and not awk.is_file():
        print(f"error: awk script not found: {awk}", file=sys.stderr)
        sys.exit(1)

    cutoff = _mtime_cutoff_seconds(args.since)
    _profile_one_root(root, awk, cutoff)
    if args.archived:
        _profile_one_root(_archived_sessions_root(), awk, cutoff)


 def _first_last_timestamps(path: Path) -> tuple[str | None, str | None]:
    first_ts: str | None = None
    last_ts: str | None = None
    with path.open("r", encoding="utf-8", errors="replace") as fh:
        for line in fh:
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue
            ts = obj.get("timestamp")
            if isinstance(ts, str):
                if first_ts is None:
                    first_ts = ts
                last_ts = ts
    return first_ts, last_ts


 def _bounds_emit_paths(paths: list[Path], cutoff: float | None) -> None:
    for path in paths:
        if cutoff is not None and path.stat().st_mtime < cutoff:
            continue
        first_ts, last_ts = _first_last_timestamps(path)
        print(path)
        print(f"  FIRST: {first_ts}")
        print(f"  LAST:  {last_ts}")


 def cmd_bounds(args: argparse.Namespace) -> None:
    root: Path = args.src.expanduser().resolve()
    cutoff = _mtime_cutoff_seconds(args.since)
    if args.glob:
        seen: list[Path] = []
        for g in args.glob:
            seen.extend(root.glob(g))
        paths = sorted(
            {p.resolve() for p in seen if p.is_file() and p.name.startswith("rollout-")}
        )
    else:
        paths = _iter_rollouts(root)
    _bounds_emit_paths(paths, cutoff)
    if args.archived:
        archived = _archived_sessions_root()
        if archived.is_dir():
            if args.glob:
                seen_ar: list[Path] = []
                for g in args.glob:
                    seen_ar.extend(archived.glob(g))
                paths_ar = sorted(
                    {
                        p.resolve()
                        for p in seen_ar
                        if p.is_file() and p.name.startswith("rollout-")
                    }
                )
            else:
                paths_ar = _iter_rollouts(archived)
            _bounds_emit_paths(paths_ar, cutoff)


 def _redact(text: str) -> str:
    text = PATH_RE.sub("<PATH>", text)
    text = re.sub(r"/Users/[^/\s]+", "/Users/<USER>", text)
    text = re.sub(r"/home/[^/\s]+", "/home/<USER>", text)
    return text


 def cmd_user_messages(args: argparse.Namespace) -> None:
    path: Path = args.file.expanduser().resolve()
    if not path.is_file():
        print(f"error: not a file: {path}", file=sys.stderr)
        sys.exit(1)

    n = 0
    with path.open("r", encoding="utf-8", errors="replace") as fh:
        for line in fh:
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue
            if obj.get("type") != "event_msg":
                continue
            payload = obj.get("payload")
            if not isinstance(payload, dict):
                continue
            if payload.get("type") != "user_message":
                continue
            msg = payload.get("message")
            if isinstance(msg, str) and msg:
                n += 1
                print(f"[MSG {n}]: {_redact(msg)}")
    if n == 0:
        print("(no user_message events found)", file=sys.stderr)


 def main() -> None:
    parser = argparse.ArgumentParser(
        description="Codex sessions: backup (gzip mirror), list, profile, bounds, user-messages.",
    )
    sub = parser.add_subparsers(dest="cmd", required=True)

    sessions_help = (
        "Sessions dir (default: $CODEX_SESSIONS_ROOT or $CODEX_HOME/sessions)"
    )
    p_b = sub.add_parser(
        "backup",
        help="Gzip mirror of sessions tree; default dest from env (see SKILL.md)",
    )
    p_b.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
    p_b.add_argument(
        "--dst",
        "--dest",
        type=Path,
        default=None,
        dest="dst",
        help="Backup root (default: $CODEX_SESSIONS_BACKUP_ROOT or ~/icloud/.codex/sessions)",
    )
    p_b.add_argument("-n", "--dry-run", action="store_true")
    p_b.add_argument("-f", "--force", action="store_true")
    p_b.add_argument(
        "--archived",
        action="store_true",
        help=f"After --src, also mirror $CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR} into the same --dst",
    )
    p_b.set_defaults(func=cmd_backup)

    p_l = sub.add_parser("list", help="List rollout paths under --src")
    p_l.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
    p_l.add_argument("--since", default="all", help="e.g. 1d, 48h, or all")
    p_l.add_argument(
        "--archived",
        action="store_true",
        help=f"After --src, also list rollouts under $CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR}",
    )
    p_l.set_defaults(func=cmd_list)

    p_p = sub.add_parser("profile", help="Histogram each rollout (optional awk)")
    p_p.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
    p_p.add_argument(
        "--awk",
        type=Path,
        default=None,
        help="Path to line_histogram.awk (beside this script in the repo)",
    )
    p_p.add_argument("--since", default="all")
    p_p.add_argument(
        "--archived",
        action="store_true",
        help=f"After --src, also profile rollouts under $CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR}",
    )
    p_p.set_defaults(func=cmd_profile)

    p_x = sub.add_parser("bounds", help="First and last timestamp per rollout")
    p_x.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
    p_x.add_argument(
        "--glob",
        action="append",
        help="Optional glob(s) relative to --src (repeatable), default all rollouts",
    )
    p_x.add_argument("--since", default="all")
    p_x.add_argument(
        "--archived",
        action="store_true",
        help=(
            f"After --src (and any --glob under --src), also bounds rollouts under "
            f"$CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR} (same --glob relative to archived root if set)"
        ),
    )
    p_x.set_defaults(func=cmd_bounds)

    p_u = sub.add_parser("user-messages", help="Extract user_message text from one rollout")
    p_u.add_argument("file", type=Path)
    p_u.set_defaults(func=cmd_user_messages)

    args = parser.parse_args()
    if args.cmd == "backup" and args.dst is None:
        args.dst = _default_backup_root()
    args.func(args)


 if __name__ == "__main__":
    main()
diff --git a/codex_prompt_history_search.py b/codex_prompt_history_search.py
 #!/usr/bin/env -S uv run --script
 # /// script
 # requires-python = ">=3.13.0,<3.14"
 # dependencies = []
 # ///
 """Search ~/.codex/history.jsonl (prompt history: session_id, ts, text)."""

 from __future__ import annotations

 import argparse
 import csv
 import io
 import json
 import os
 import re
 import sys
 from dataclasses import dataclass
 from datetime import UTC
 from datetime import datetime
 from difflib import SequenceMatcher
 from enum import Enum
 from pathlib import Path


 def _codex_home() -> Path:
    raw = os.environ.get("CODEX_HOME", "").strip()
    if raw:
        return Path(raw).expanduser().resolve()
    return Path.home() / ".codex"


 def _history_path() -> Path:
    raw = os.environ.get("CODEX_HISTORY_PATH", "").strip()
    if raw:
        return Path(raw).expanduser().resolve()
    return _codex_home() / "history.jsonl"


 class MatchMode(str, Enum):
    EXACT = "EXACT"
    ANY = "ANY"
    FUZZY = "FUZZY"


 @dataclass(frozen=True)
 class HistoryRow:
    session_id: str
    ts: int
    text: str

    def date_utc(self) -> str:
        return datetime.fromtimestamp(self.ts, tz=UTC).strftime("%Y-%m-%d")

    def as_csv_log4j(self) -> str:
        buf = io.StringIO()
        w = csv.writer(buf, lineterminator="")
        w.writerow([self.date_utc(), str(self.ts), self.session_id, self.text])
        return buf.getvalue()

    def as_json_line(self) -> str:
        return json.dumps(
            {
                "date": self.date_utc(),
                "ts": self.ts,
                "session_id": self.session_id,
                "text": self.text,
            },
            ensure_ascii=False,
        )


 def _word_tokens(phrase: str) -> list[str]:
    return [t for t in re.split(r"\s+", phrase.strip()) if t]


 def _matches_exact(phrase: str, text: str) -> bool:
    return phrase in text


 def _matches_any(phrase: str, text: str) -> bool:
    words = _word_tokens(phrase)
    if not words:
        return False
    return any(w in text for w in words)


 def _matches_fuzzy(phrase: str, text: str, threshold: float) -> bool:
    if not phrase.strip():
        return False
    needle = phrase.strip().lower()
    hay = text.lower()
    if needle in hay:
        return True
    return SequenceMatcher(None, needle, hay).quick_ratio() >= threshold


 def _parse_line(line: str) -> HistoryRow | None:
    line = line.strip()
    if not line:
        return None
    try:
        obj = json.loads(line)
    except json.JSONDecodeError:
        return None
    sid = obj.get("session_id")
    ts = obj.get("ts")
    text = obj.get("text")
    if not isinstance(sid, str) or not isinstance(text, str):
        return None
    if isinstance(ts, bool) or ts is None:
        return None
    try:
        ts_i = int(ts)
    except (TypeError, ValueError):
        return None
    return HistoryRow(session_id=sid, ts=ts_i, text=text)


 def cmd_search(args: argparse.Namespace) -> None:
    path: Path = args.file.expanduser().resolve()
    if not path.is_file():
        print(f"error: history file not found: {path}", file=sys.stderr)
        sys.exit(1)

    mode = MatchMode(args.mode.upper())
    phrase: str = args.phrase
    threshold = float(args.fuzzy_threshold)

    matcher = {
        MatchMode.EXACT: lambda row: _matches_exact(phrase, row.text),
        MatchMode.ANY: lambda row: _matches_any(phrase, row.text),
        MatchMode.FUZZY: lambda row: _matches_fuzzy(phrase, row.text, threshold),
    }[mode]

    with path.open("r", encoding="utf-8", errors="replace") as fh:
        for raw in fh:
            row = _parse_line(raw)
            if row is None:
                continue
            if not matcher(row):
                continue
            if args.json:
                print(row.as_json_line())
            else:
                print(row.as_csv_log4j())


 def main() -> None:
    p = argparse.ArgumentParser(
        description="Search Codex prompt history (history.jsonl: session_id, ts, text).",
    )
    p.add_argument(
        "phrase",
        nargs="+",
        help="Search phrase (for EXACT, substring match in text field)",
    )
    p.add_argument(
        "--file",
        type=Path,
        default=None,
        help="Path to history.jsonl (default: $CODEX_HISTORY_PATH or $CODEX_HOME/history.jsonl)",
    )
    p.add_argument(
        "-j",
        "--json",
        action="store_true",
        help="Emit one JSON object per matching line",
    )
    p.add_argument(
        "--mode",
        choices=[m.value for m in MatchMode],
        default=MatchMode.EXACT.value,
        help="EXACT substring | ANY any whole word from phrase | FUZZY difflib quick_ratio",
    )
    p.add_argument(
        "--fuzzy-threshold",
        type=float,
        default=0.65,
        help="FUZZY mode: minimum SequenceMatcher.quick_ratio (default 0.65)",
    )
    args = p.parse_args()
    args.phrase = " ".join(args.phrase)
    if args.file is None:
        args.file = _history_path()
    cmd_search(args)


 if __name__ == "__main__":
    main()
diff --git a/line_histogram.awk b/line_histogram.awk
 #!/usr/bin/awk -f

 # line_histogram.awk - Profile file line sizes or extract specific lines.
 # Useful for checking large JSONL before reading it with jq or an agent.
 #
 # Usage:
 # ./line_histogram.awk <file>
 # ./line_histogram.awk -v mode=extract -v line=5 <file>
 # ./line_histogram.awk -v mode=extract -v start=10 -v end=20 <file>

 BEGIN {
    if (mode == "") {
        mode = "histogram"
    }

    total_bytes = 0
    total_lines = 0

    if (outfile == "") {
        out = "/dev/stdout"
    } else {
        out = outfile
    }
 }

 {
    total_lines++
    line_sizes[total_lines] = length($0)
    lines[total_lines] = $0
    total_bytes += line_sizes[total_lines]
 }

 END {
    if (mode == "extract") {
        if (line != "") {
            if (line >= 1 && line <= total_lines) {
                print lines[line]
            } else {
                print "Error: line " line " out of range (1-" total_lines ")" > "/dev/stderr"
                exit 1
            }
        } else if (start != "" && end != "") {
            if (start < 1) {
                start = 1
            }
            if (end > total_lines) {
                end = total_lines
            }
            if (start > end) {
                print "Error: start " start " > end " end > "/dev/stderr"
                exit 1
            }
            for (i = start; i <= end; i++) {
                print lines[i]
            }
        } else {
            print "Error: extract mode requires -v line=N or -v start=X -v end=Y" > "/dev/stderr"
            exit 1
        }
        exit 0
    }

    print "File: " FILENAME > out
    print "Total bytes: " total_bytes > out
    print "Total lines: " total_lines > out
    print "" > out
    print "Bucket Distribution:" > out
    print "" > out

    if (total_lines == 0) {
        print "Empty file" > out
        exit 0
    }

    if (total_lines <= 10) {
        num_buckets = total_lines
        bucket_size = 1
    } else {
        num_buckets = 10
        bucket_size = int(total_lines / 10)
    }

    for (i = 1; i <= 10; i++) {
        bucket_bytes[i] = 0
    }

    for (line_num = 1; line_num <= total_lines; line_num++) {
        if (total_lines <= 10) {
            bucket = line_num
        } else {
            bucket = int((line_num - 1) / bucket_size) + 1
            if (bucket > 10) {
                bucket = 10
            }
        }
        bucket_bytes[bucket] += line_sizes[line_num]
    }

    max_bytes = 0
    for (i = 1; i <= num_buckets; i++) {
        if (bucket_bytes[i] > max_bytes) {
            max_bytes = bucket_bytes[i]
        }
    }

    printf "%-15s | %-12s | %-40s\n", "Line Range", "Bytes", "Distribution" > out
    print "-----------------+--------------+------------------------------------------" > out

    for (i = 1; i <= 10; i++) {
        if (total_lines <= 10) {
            if (i <= total_lines) {
                start_line = i
                end_line = i
            } else {
                start_line = 0
                end_line = 0
            }
        } else {
            start_line = (i - 1) * bucket_size + 1
            if (i == 10) {
                end_line = total_lines
            } else {
                end_line = i * bucket_size
            }
        }

        if (start_line == 0) {
            range = sprintf("%7s", "-")
        } else if (start_line == end_line) {
            range = sprintf("%7d", start_line)
        } else {
            range = sprintf("%d-%d", start_line, end_line)
        }

        if (max_bytes > 0) {
            bar_len = int((bucket_bytes[i] / max_bytes) * 40)
        } else {
            bar_len = 0
        }

        bar = ""
        for (j = 1; j <= bar_len; j++) {
            bar = bar "#"
        }

        printf "%-15s | %12d | %s\n", range, bucket_bytes[i], bar > out
    }

    print "-----------------+--------------+------------------------------------------" > out
 }

 # Copyright (c) 2026 LiveMore Capital www.livemorecapital.com
diff --git a/SKILL.md b/SKILL.md
	#!/usr/bin/env -S uv run --script
	# /// script
	# requires-python = ">=3.13.0,<3.14"
	# dependencies = []
	# ///
	"""Codex session rollouts under $CODEX_HOME/sessions: backup, list, profile, bounds, user-messages."""

	from __future__ import annotations

	import argparse
	import gzip
	import json
	import os
	import re
	import shutil
	import subprocess
	import sys
	import time
	from pathlib import Path

	PATH_RE = re.compile(r"(/[A-Za-z0-9._/@+~-]+)+")

	# Matches codex-rollout `ARCHIVED_SESSIONS_SUBDIR` (rollout files under CODEX_HOME).
	ARCHIVED_SESSIONS_SUBDIR = "archived_sessions"


	def _codex_home() -> Path:
	raw = os.environ.get("CODEX_HOME", "").strip()
	if raw:
	return Path(raw).expanduser().resolve()
	return Path.home() / ".codex"


	def _sessions_root() -> Path:
	raw = os.environ.get("CODEX_SESSIONS_ROOT", "").strip()
	if raw:
	return Path(raw).expanduser().resolve()
	return _codex_home() / "sessions"


	def _archived_sessions_root() -> Path:
	return _codex_home() / ARCHIVED_SESSIONS_SUBDIR


	def _default_backup_root() -> Path:
	env = os.environ.get("CODEX_SESSIONS_BACKUP_ROOT", "").strip()
	if env:
	return Path(env).expanduser().resolve()
	return (Path.home() / "icloud" / ".codex" / "sessions").expanduser().resolve()


	def _read_gzip_mtime(path: Path) -> int \| None:
	data = path.read_bytes()[:10]
	if len(data) < 10 or data[0:2] != b"\x1f\x8b":
	return None
	return int.from_bytes(data[4:8], "little")


	def _should_skip_jsonl(src: Path, dst_gz: Path) -> bool:
	if not dst_gz.is_file():
	return False
	want = int(src.stat().st_mtime)
	got = _read_gzip_mtime(dst_gz)
	return got == want


	def _iter_rollouts(root: Path) -> list[Path]:
	if not root.is_dir():
	return []
	return sorted(root.rglob("rollout-*.jsonl"))


	def _mtime_cutoff_seconds(since: str) -> float \| None:
	since = since.strip().lower()
	if since in ("", "all", "*"):
	return None
	now = time.time()
	if since.endswith("d"):
	days = float(since[:-1] or "0")
	return now - days * 86400.0
	if since.endswith("h"):
	hours = float(since[:-1] or "0")
	return now - hours * 3600.0
	raise SystemExit(f"invalid --since {since!r}; use e.g. 1d or 48h")


	def _backup_walk_one_tree(
	src_root: Path,
	dst_root: Path,
	*,
	dry_run: bool,
	force: bool,
	) -> tuple[int, int]:
	n_gz = 0
	n_other = 0
	for path in sorted(src_root.rglob("*")):
	if not path.is_file():
	continue
	rel = path.relative_to(src_root)
	dst = dst_root / rel
	if path.suffix == ".jsonl" and path.name.startswith("rollout-"):
	dst_gz = dst.with_suffix(path.suffix + ".gz")
	if not force and _should_skip_jsonl(path, dst_gz):
	continue
	n_gz += 1
	if dry_run:
	print(f"gz {path} -> {dst_gz}")
	continue
	dst_gz.parent.mkdir(parents=True, exist_ok=True)
	mtime = int(path.stat().st_mtime)
	with path.open("rb") as raw:
	with gzip.GzipFile(
	filename=str(dst_gz),
	mode="wb",
	compresslevel=9,
	mtime=mtime,
	) as gz:
	shutil.copyfileobj(raw, gz)
	try:
	os.utime(dst_gz, (mtime, mtime))
	except OSError:
	pass
	else:
	n_other += 1
	if dry_run:
	print(f"cp {path} -> {dst}")
	continue
	dst.parent.mkdir(parents=True, exist_ok=True)
	shutil.copy2(path, dst)
	return n_gz, n_other


	def cmd_backup(args: argparse.Namespace) -> None:
	src_root: Path = args.src.expanduser().resolve()
	dst_root: Path = args.dst.expanduser().resolve()
	if not src_root.is_dir():
	print(f"error: source directory missing: {src_root}", file=sys.stderr)
	sys.exit(1)

	n_gz, n_other = _backup_walk_one_tree(
	src_root,
	dst_root,
	dry_run=args.dry_run,
	force=args.force,
	)
	if args.archived:
	archived = _archived_sessions_root()
	if archived.is_dir():
	g2, o2 = _backup_walk_one_tree(
	archived,
	dst_root,
	dry_run=args.dry_run,
	force=args.force,
	)
	n_gz += g2
	n_other += o2

	verb = "would write" if args.dry_run else "wrote"
	print(f"{verb} {n_gz} rollout gzip file(s), {n_other} other file(s) -> {dst_root}")


	def cmd_list(args: argparse.Namespace) -> None:
	root: Path = args.src.expanduser().resolve()
	cutoff = _mtime_cutoff_seconds(args.since)
	paths = _iter_rollouts(root)
	for p in paths:
	if cutoff is not None and p.stat().st_mtime < cutoff:
	continue
	print(p)
	if args.archived:
	for p in _iter_rollouts(_archived_sessions_root()):
	if cutoff is not None and p.stat().st_mtime < cutoff:
	continue
	print(p)


	def _profile_one_root(
	root: Path,
	awk: Path \| None,
	cutoff: float \| None,
	) -> None:
	for path in _iter_rollouts(root):
	if cutoff is not None and path.stat().st_mtime < cutoff:
	continue
	print(f"=== {path} ===")
	if awk:
	subprocess.run(
	["awk", "-f", str(awk), str(path)],
	check=False,
	)
	else:
	nbytes = path.stat().st_size
	nlines = 0
	with path.open("rb") as bf:
	for _ in bf:
	nlines += 1
	print(f"(no --awk) size={nbytes} bytes lines={nlines}")


	def cmd_profile(args: argparse.Namespace) -> None:
	root: Path = args.src.expanduser().resolve()
	awk: Path \| None = args.awk.expanduser().resolve() if args.awk else None
	if awk and not awk.is_file():
	print(f"error: awk script not found: {awk}", file=sys.stderr)
	sys.exit(1)

	cutoff = _mtime_cutoff_seconds(args.since)
	_profile_one_root(root, awk, cutoff)
	if args.archived:
	_profile_one_root(_archived_sessions_root(), awk, cutoff)


	def _first_last_timestamps(path: Path) -> tuple[str \| None, str \| None]:
	first_ts: str \| None = None
	last_ts: str \| None = None
	with path.open("r", encoding="utf-8", errors="replace") as fh:
	for line in fh:
	line = line.strip()
	if not line:
	continue
	try:
	obj = json.loads(line)
	except json.JSONDecodeError:
	continue
	ts = obj.get("timestamp")
	if isinstance(ts, str):
	if first_ts is None:
	first_ts = ts
	last_ts = ts
	return first_ts, last_ts


	def _bounds_emit_paths(paths: list[Path], cutoff: float \| None) -> None:
	for path in paths:
	if cutoff is not None and path.stat().st_mtime < cutoff:
	continue
	first_ts, last_ts = _first_last_timestamps(path)
	print(path)
	print(f" FIRST: {first_ts}")
	print(f" LAST: {last_ts}")


	def cmd_bounds(args: argparse.Namespace) -> None:
	root: Path = args.src.expanduser().resolve()
	cutoff = _mtime_cutoff_seconds(args.since)
	if args.glob:
	seen: list[Path] = []
	for g in args.glob:
	seen.extend(root.glob(g))
	paths = sorted(
	{p.resolve() for p in seen if p.is_file() and p.name.startswith("rollout-")}
	)
	else:
	paths = _iter_rollouts(root)
	_bounds_emit_paths(paths, cutoff)
	if args.archived:
	archived = _archived_sessions_root()
	if archived.is_dir():
	if args.glob:
	seen_ar: list[Path] = []
	for g in args.glob:
	seen_ar.extend(archived.glob(g))
	paths_ar = sorted(
	{
	p.resolve()
	for p in seen_ar
	if p.is_file() and p.name.startswith("rollout-")
	}
	)
	else:
	paths_ar = _iter_rollouts(archived)
	_bounds_emit_paths(paths_ar, cutoff)


	def _redact(text: str) -> str:
	text = PATH_RE.sub("<PATH>", text)
	text = re.sub(r"/Users/[^/\s]+", "/Users/<USER>", text)
	text = re.sub(r"/home/[^/\s]+", "/home/<USER>", text)
	return text


	def cmd_user_messages(args: argparse.Namespace) -> None:
	path: Path = args.file.expanduser().resolve()
	if not path.is_file():
	print(f"error: not a file: {path}", file=sys.stderr)
	sys.exit(1)

	n = 0
	with path.open("r", encoding="utf-8", errors="replace") as fh:
	for line in fh:
	line = line.strip()
	if not line:
	continue
	try:
	obj = json.loads(line)
	except json.JSONDecodeError:
	continue
	if obj.get("type") != "event_msg":
	continue
	payload = obj.get("payload")
	if not isinstance(payload, dict):
	continue
	if payload.get("type") != "user_message":
	continue
	msg = payload.get("message")
	if isinstance(msg, str) and msg:
	n += 1
	print(f"[MSG {n}]: {_redact(msg)}")
	if n == 0:
	print("(no user_message events found)", file=sys.stderr)


	def main() -> None:
	parser = argparse.ArgumentParser(
	description="Codex sessions: backup (gzip mirror), list, profile, bounds, user-messages.",
	)
	sub = parser.add_subparsers(dest="cmd", required=True)

	sessions_help = (
	"Sessions dir (default: $CODEX_SESSIONS_ROOT or $CODEX_HOME/sessions)"
	)
	p_b = sub.add_parser(
	"backup",
	help="Gzip mirror of sessions tree; default dest from env (see SKILL.md)",
	)
	p_b.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
	p_b.add_argument(
	"--dst",
	"--dest",
	type=Path,
	default=None,
	dest="dst",
	help="Backup root (default: $CODEX_SESSIONS_BACKUP_ROOT or ~/icloud/.codex/sessions)",
	)
	p_b.add_argument("-n", "--dry-run", action="store_true")
	p_b.add_argument("-f", "--force", action="store_true")
	p_b.add_argument(
	"--archived",
	action="store_true",
	help=f"After --src, also mirror $CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR} into the same --dst",
	)
	p_b.set_defaults(func=cmd_backup)

	p_l = sub.add_parser("list", help="List rollout paths under --src")
	p_l.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
	p_l.add_argument("--since", default="all", help="e.g. 1d, 48h, or all")
	p_l.add_argument(
	"--archived",
	action="store_true",
	help=f"After --src, also list rollouts under $CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR}",
	)
	p_l.set_defaults(func=cmd_list)

	p_p = sub.add_parser("profile", help="Histogram each rollout (optional awk)")
	p_p.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
	p_p.add_argument(
	"--awk",
	type=Path,
	default=None,
	help="Path to line_histogram.awk (beside this script in the repo)",
	)
	p_p.add_argument("--since", default="all")
	p_p.add_argument(
	"--archived",
	action="store_true",
	help=f"After --src, also profile rollouts under $CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR}",
	)
	p_p.set_defaults(func=cmd_profile)

	p_x = sub.add_parser("bounds", help="First and last timestamp per rollout")
	p_x.add_argument("--src", type=Path, default=_sessions_root(), help=sessions_help)
	p_x.add_argument(
	"--glob",
	action="append",
	help="Optional glob(s) relative to --src (repeatable), default all rollouts",
	)
	p_x.add_argument("--since", default="all")
	p_x.add_argument(
	"--archived",
	action="store_true",
	help=(
	f"After --src (and any --glob under --src), also bounds rollouts under "
	f"$CODEX_HOME/{ARCHIVED_SESSIONS_SUBDIR} (same --glob relative to archived root if set)"
	),
	)
	p_x.set_defaults(func=cmd_bounds)

	p_u = sub.add_parser("user-messages", help="Extract user_message text from one rollout")
	p_u.add_argument("file", type=Path)
	p_u.set_defaults(func=cmd_user_messages)

	args = parser.parse_args()
	if args.cmd == "backup" and args.dst is None:
	args.dst = _default_backup_root()
	args.func(args)


	if __name__ == "__main__":
	main()
	#!/usr/bin/awk -f

	# line_histogram.awk - Profile file line sizes or extract specific lines.
	# Useful for checking large JSONL before reading it with jq or an agent.
	#
	# Usage:
	# ./line_histogram.awk <file>
	# ./line_histogram.awk -v mode=extract -v line=5 <file>
	# ./line_histogram.awk -v mode=extract -v start=10 -v end=20 <file>

	BEGIN {
	if (mode == "") {
	mode = "histogram"
	}

	total_bytes = 0
	total_lines = 0

	if (outfile == "") {
	out = "/dev/stdout"
	} else {
	out = outfile
	}
	}

	{
	total_lines++
	line_sizes[total_lines] = length($0)
	lines[total_lines] = $0
	total_bytes += line_sizes[total_lines]
	}

	END {
	if (mode == "extract") {
	if (line != "") {
	if (line >= 1 && line <= total_lines) {
	print lines[line]
	} else {
	print "Error: line " line " out of range (1-" total_lines ")" > "/dev/stderr"
	exit 1
	}
	} else if (start != "" && end != "") {
	if (start < 1) {
	start = 1
	}
	if (end > total_lines) {
	end = total_lines
	}
	if (start > end) {
	print "Error: start " start " > end " end > "/dev/stderr"
	exit 1
	}
	for (i = start; i <= end; i++) {
	print lines[i]
	}
	} else {
	print "Error: extract mode requires -v line=N or -v start=X -v end=Y" > "/dev/stderr"
	exit 1
	}
	exit 0
	}

	print "File: " FILENAME > out
	print "Total bytes: " total_bytes > out
	print "Total lines: " total_lines > out
	print "" > out
	print "Bucket Distribution:" > out
	print "" > out

	if (total_lines == 0) {
	print "Empty file" > out
	exit 0
	}

	if (total_lines <= 10) {
	num_buckets = total_lines
	bucket_size = 1
	} else {
	num_buckets = 10
	bucket_size = int(total_lines / 10)
	}

	for (i = 1; i <= 10; i++) {
	bucket_bytes[i] = 0
	}

	for (line_num = 1; line_num <= total_lines; line_num++) {
	if (total_lines <= 10) {
	bucket = line_num
	} else {
	bucket = int((line_num - 1) / bucket_size) + 1
	if (bucket > 10) {
	bucket = 10
	}
	}
	bucket_bytes[bucket] += line_sizes[line_num]
	}

	max_bytes = 0
	for (i = 1; i <= num_buckets; i++) {
	if (bucket_bytes[i] > max_bytes) {
	max_bytes = bucket_bytes[i]
	}
	}

	printf "%-15s \| %-12s \| %-40s\n", "Line Range", "Bytes", "Distribution" > out
	print "-----------------+--------------+------------------------------------------" > out

	for (i = 1; i <= 10; i++) {
	if (total_lines <= 10) {
	if (i <= total_lines) {
	start_line = i
	end_line = i
	} else {
	start_line = 0
	end_line = 0
	}
	} else {
	start_line = (i - 1) * bucket_size + 1
	if (i == 10) {
	end_line = total_lines
	} else {
	end_line = i * bucket_size
	}
	}

	if (start_line == 0) {
	range = sprintf("%7s", "-")
	} else if (start_line == end_line) {
	range = sprintf("%7d", start_line)
	} else {
	range = sprintf("%d-%d", start_line, end_line)
	}

	if (max_bytes > 0) {
	bar_len = int((bucket_bytes[i] / max_bytes) * 40)
	} else {
	bar_len = 0
	}

	bar = ""
	for (j = 1; j <= bar_len; j++) {
	bar = bar "#"
	}

	printf "%-15s \| %12d \| %s\n", range, bucket_bytes[i], bar > out
	}

	print "-----------------+--------------+------------------------------------------" > out
	}

	# Copyright (c) 2026 LiveMore Capital www.livemorecapital.com
name	codex-chat-history
description	Backup, search, and inspect Codex CLI session rollout JSONL under $CODEX_HOME/sessions (date-partitioned tree). Use for mirroring rollouts to sync storage (e.g. iCloud), listing or bounding sessions by time, profiling large JSONL before reading, or extracting user-authored messages with jq or the bundled helper script.
Variable	Purpose	If unset
`CODEX_HOME`	Codex data root (`config.toml`, state DB, `sessions/`, etc.)	`~/.codex`
`CODEX_SESSIONS_ROOT`	Explicit directory containing the `YYYY/MM/DD` rollout tree	`$CODEX_HOME/sessions`
`CODEX_SESSIONS_BACKUP_ROOT`	Default destination root for `backup` (gzip mirror)	`~/icloud/.codex/sessions`
`CODEX_HISTORY_PATH`	Override path to `history.jsonl` for `codex_prompt_history_search.py`	`$CODEX_HOME/history.jsonl`
File	Role
`codex_chat_history.py`	PEP 723 `uv run --script` helper (`requires-python = ">=3.13.0,<3.14"`, `dependencies = []`): `backup`, `list`, `profile`, `bounds`, `user-messages`; optional `--archived` on the first four to include `$CODEX_HOME/archived_sessions/` after `--src`.
`codex_prompt_history_search.py`	Search `history.jsonl` only: **`--mode EXACT
`line_histogram.awk`	Optional: line-size histogram or extract specific line(s) from huge JSONL before parsing.