Helw150 · April 10, 2026 01:29
diff --git a/bench_trace.py b/bench_trace.py
 """Benchmark end-to-end trace pipeline on multiple SWE-rebench-V2 Python images."""

 import json
 import os
 import subprocess
 import sys
 import time

 IMAGES = [
    {"instance_id": "wtforms__wtforms-614", "image_name": "docker.io/swerebenchv2/wtforms-wtforms:614-848d28d", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_fields.py tests/test_validators.py tests/test_widgets.py"},
    {"instance_id": "aio-libs__aiohttp-9047", "image_name": "docker.io/swerebenchv2/aio-libs-aiohttp:9047-aca99bc", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_connector.py tests/test_proxy.py tests/test_resolver.py"},
    {"instance_id": "bvanelli__actualpy-56", "image_name": "docker.io/swerebenchv2/bvanelli-actualpy:56-2ad5f63", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_rules.py"},
    {"instance_id": "keras-team__keras-19955", "image_name": "docker.io/swerebenchv2/keras-team-keras:19955-ca9519b", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning keras/src/backend/common/variables_test.py"},
    {"instance_id": "aws-cloudformation__cfn-lint-3965", "image_name": "docker.io/swerebenchv2/aws-cloudformation-cfn-lint:3965-b4d790d", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning test/unit/rules/functions/test_ref_format.py"},
    {"instance_id": "qiskit__qiskit-terra-5662", "image_name": "docker.io/swerebenchv2/qiskit-qiskit-terra:5662-3accb1b", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning test/python/visualization/test_circuit_text_drawer.py test/python/visualization/timeline/test_core.py"},
    {"instance_id": "mozilla-services__cliquet-203", "image_name": "docker.io/swerebenchv2/mozilla-services-cliquet:203-41a48da", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning cliquet/tests/test_initialization.py"},
    {"instance_id": "azure__walinuxagent-970", "image_name": "docker.io/swerebenchv2/azure-walinuxagent:970-fc2451f", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/common/test_errorstate.py"},
    {"instance_id": "tomwhite__cubed-211", "image_name": "docker.io/swerebenchv2/tomwhite-cubed:211-4305c85", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning cubed/tests/runtime/test_lithops.py cubed/tests/runtime/test_modal_async.py cubed/tests/runtime/test_python_async.py cubed/tests/runtime/utils.py"},
    {"instance_id": "pandas-dev__pandas-59608", "image_name": "docker.io/swerebenchv2/pandas-dev-pandas:59608-360597c", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning pandas/tests/reshape/merge/test_join.py pandas/tests/series/test_arrow_interface.py"},
    {"instance_id": "sissbruecker__linkding-984", "image_name": "docker.io/swerebenchv2/sissbruecker-linkding:984-c5a300a", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning bookmarks/tests/test_bookmark_archived_view.py bookmarks/tests/test_bookmark_index_view.py bookmarks/tests/test_bookmark_shared_view.py"},
    {"instance_id": "databricks__dbt-databricks-935", "image_name": "docker.io/swerebenchv2/databricks-dbt-databricks:935-4b1d2d9", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/functional/adapter/liquid_clustering/fixtures.py tests/functional/adapter/liquid_clustering/test_liquid_clustering.py tests/unit/macros/relations/test_table_macros.py"},
    {"instance_id": "tox-dev__sphinx-autodoc-typehints-474", "image_name": "docker.io/swerebenchv2/tox-dev-sphinx-autodoc-typehints:474-c8be42f", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_sphinx_autodoc_typehints.py"},
    {"instance_id": "meltano__sdk-1881", "image_name": "docker.io/swerebenchv2/meltano-sdk:1881-c3a8f90", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/core/test_target_base.py"},
    {"instance_id": "fsspec__universal_pathlib-148", "image_name": "docker.io/swerebenchv2/fsspec-universal_pathlib:148-2a29aa4", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning upath/tests/cases.py upath/tests/implementations/test_gcs.py upath/tests/implementations/test_hdfs.py upath/tests/implementations/test_webdav.py"},
    {"instance_id": "tefra__xsdata-310", "image_name": "docker.io/swerebenchv2/tefra-xsdata:310-ce88e71", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/formats/dataclass/parsers/test_nodes.py tests/formats/dataclass/parsers/test_utils.py tests/formats/dataclass/serializers/test_xml.py tests/formats/dataclass/test_context.py tests/formats/dataclass/test_elements.py tests/models/enums/test_datatype.py"},
    {"instance_id": "pyqtgraph__pyqtgraph-1845", "image_name": "docker.io/swerebenchv2/pyqtgraph-pyqtgraph:1845-ba517ab", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_colormap.py"},
    {"instance_id": "pallets__werkzeug-2583", "image_name": "docker.io/swerebenchv2/pallets-werkzeug:2583-1ce57f6", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_routing.py"},
    {"instance_id": "pytest-dev__pyfakefs-916", "image_name": "docker.io/swerebenchv2/pytest-dev-pyfakefs:916-95b2de3", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning pyfakefs/tests/fake_pathlib_test.py"},
    {"instance_id": "cs-si__eodag-228", "image_name": "docker.io/swerebenchv2/cs-si-eodag:228-fec965f", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_end_to_end.py tests/units/test_core.py"},
 ]

 TRACER_DIR = os.path.expanduser("~/marin/symbolic/pytracer")
 OUTPUT_DIR = "/tmp/tracer_bench"


 def run(cmd, timeout=300):
    return subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout)


 def bench_one(img):
    instance_id = img["instance_id"]
    image_name = img["image_name"]
    test_cmd = img["test_cmd"]
    output_file = f"{OUTPUT_DIR}/{instance_id}.jsonl"

    print(f"\n{'='*60}")
    print(f"Instance: {instance_id}")
    print(f"Image: {image_name}")
    print(f"Test cmd: {test_cmd}")
    print(f"{'='*60}")

    timings = {}

    # Pull
    t0 = time.monotonic()
    r = run(f"sudo docker pull {image_name}", timeout=120)
    timings["pull"] = time.monotonic() - t0
    if r.returncode != 0:
        print(f"  PULL FAILED: {r.stderr[-200:]}")
        return None

    # Run tests with tracer
    docker_cmd = (
        f"sudo docker run --rm "
        f"-v {TRACER_DIR}:/pytracer "
        f"-v {OUTPUT_DIR}:{OUTPUT_DIR} "
        f"-e PYTHONPATH=/pytracer "
        f"-e TRACER_OUTPUT={output_file} "
        f"-e TRACER_MAX_EVENTS=500000 "
        f"{image_name} {test_cmd}"
    )
    t0 = time.monotonic()
    r = run(docker_cmd, timeout=300)
    timings["test+trace"] = time.monotonic() - t0

    # Print test output tail
    lines = (r.stdout + r.stderr).strip().split("\n")
    for line in lines[-5:]:
        print(f"  {line}")

    # Analyze output
    events = 0
    chars = 0
    size_mb = 0
    if os.path.exists(output_file):
        size_mb = os.path.getsize(output_file) / 1024 / 1024
        with open(output_file) as f:
            for line in f:
                events += 1
                chars += len(line)
        os.remove(output_file)

    # Delete image
    t0 = time.monotonic()
    run(f"sudo docker rmi {image_name}")
    timings["rmi"] = time.monotonic() - t0

    total = sum(timings.values())
    est_tokens = chars // 4

    print(f"\n  Timings: pull={timings['pull']:.1f}s  test+trace={timings['test+trace']:.1f}s  rmi={timings['rmi']:.1f}s  total={total:.1f}s")
    print(f"  Events: {events:,}  Size: {size_mb:.1f}MB  Est tokens: {est_tokens:,}")

    return {
        "instance_id": instance_id,
        "timings": timings,
        "total_time": total,
        "events": events,
        "size_mb": size_mb,
        "est_tokens": est_tokens,
    }


 def main():
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    print(f"Benchmarking {len(IMAGES)} unique Python repos\n")

    results = []
    for img in IMAGES:
        try:
            result = bench_one(img)
            if result:
                results.append(result)
        except Exception as e:
            print(f"  ERROR: {e}")

    print(f"\n{'='*60}")
    print("SUMMARY")
    print(f"{'='*60}")
    for r in results:
        print(f"  {r['instance_id']:45s}  {r['total_time']:6.1f}s  {r['events']:>8,} events  {r['est_tokens']:>10,} tokens")

    if results:
        avg_time = sum(r["total_time"] for r in results) / len(results)
        avg_tokens = sum(r["est_tokens"] for r in results) / len(results)
        print(f"\n  Average: {avg_time:.1f}s per image, {avg_tokens:,.0f} tokens per image")
        print(f"  Projected for 7,243 images: {avg_time * 7243 / 3600:.1f} hours, {avg_tokens * 7243 / 1e9:.1f}B tokens")


 if __name__ == "__main__":
    t0 = time.monotonic()
    main()
    print(f"\nTotal wall time: {time.monotonic() - t0:.1f}s")
diff --git a/show_trace.py b/show_trace.py
 """Show interesting trace events from a JSONL trace file."""
 import json
 import sys

 path = sys.argv[1] if len(sys.argv) > 1 else "/tmp/tracer_out/trace.jsonl"

 events = [json.loads(line) for line in open(path)]
 print(f"Total events: {len(events)}")

 # Show event type distribution
 from collections import Counter
 kinds = Counter(e["event"] for e in events)
 print(f"Event types: {dict(kinds)}")

 # Show unique functions traced
 funcs = Counter(f"{e['file']}::{e['function']}" for e in events)
 print(f"\nTop 20 functions by event count:")
 for func, count in funcs.most_common(20):
    print(f"  {count:5d}  {func}")

 # Show a sample function trace (first non-module call with >3 events)
 seen = {}
 for e in events:
    key = (e["file"], e["function"])
    if e["function"] == "<module>":
        continue
    seen.setdefault(key, []).append(e)

 print("\n--- Sample function traces (with locals) ---")
 # Group into individual invocations (call->return sequences)
 invocations = []
 stack = {}
 for e in events:
    key = (e["file"], e["function"], e["depth"])
    if e["event"] == "call":
        stack[key] = [e]
    elif key in stack:
        stack[key].append(e)
        if e["event"] == "return":
            invocations.append(stack.pop(key))

 # Find invocations with real locals (not class defs, not trivial)
 shown = 0
 for inv in invocations:
    non_ellipsis_locals = sum(
        1 for e in inv for v in e["locals"].values() if v != "..."
    )
    if (len(inv) >= 5 and non_ellipsis_locals >= 3
            and inv[0]["function"] not in ("<module>", "<dictcomp>", "<listcomp>")
            and not inv[0]["function"][0].isupper()  # skip class defs
            and shown < 3):
        print(f"\nFunction: {inv[0]['function']} in {inv[0]['file']} ({len(inv)} events)")
        for e in inv[:25]:
            print(json.dumps(e))
        if len(inv) > 25:
            print(f"  ... ({len(inv) - 25} more events)")
        shown += 1
diff --git a/sitecustomize.py b/sitecustomize.py
 """Auto-inject tracer when imported via PYTHONPATH.

 Activated by setting PYTHONPATH to include the directory containing this file.
 Configure via environment variables:

    TRACER_OUTPUT    - path to write JSONL output (default: /tmp/trace.jsonl)
    TRACER_REPO_ROOT - repo root for relative paths (default: cwd)
    TRACER_MAX_EVENTS - max events before stopping (default: 50000)
    TRACER_MAX_REPR  - max repr length per variable (default: 200)
 """

 import atexit
 import os
 import sys


 def _setup_tracer():
    from tracer import Tracer

    repo_root = os.environ.get("TRACER_REPO_ROOT", os.getcwd())
    output_path = os.environ.get("TRACER_OUTPUT", "/tmp/trace.jsonl")

    tracer = Tracer(repo_root=repo_root)
    tracer.start()

    def _on_exit():
        tracer.stop()
        tracer.dump(output_path)
        count = len(tracer.events)
        print(f"\n[tracer] Wrote {count} events to {output_path}", file=sys.stderr)

    atexit.register(_on_exit)


 _setup_tracer()
diff --git a/tracer.py b/tracer.py
 """Execution tracer that captures Python program state at interpreter events.

 Uses sys.settrace to record call/line/return/exception events with local
 variable snapshots. Filters to repo code only (skips stdlib, site-packages).
 Outputs JSONL with compressed unchanged variables.
 """

 import json
 import os
 import sys
 import threading
 from pathlib import Path


 # Paths that indicate non-repo code.
 _SKIP_MARKERS = (
    "site-packages",
    "lib/python",
    "/usr/lib",
    "importlib",
    "<frozen",
    "/pytracer/",
    "/.local/",
    "/usr/local/lib/",
    "_pytest/",
    "pluggy/",
    "conftest.py",
    "_distutils_hack",
    "pkg_resources",
 )

 # Max events before we stop tracing (safety valve).
 MAX_EVENTS = int(os.environ.get("TRACER_MAX_EVENTS", "50000"))

 # Max repr length for a single variable value.
 MAX_REPR_LEN = int(os.environ.get("TRACER_MAX_REPR", "200"))


 def _safe_repr(obj):
    """repr() that never raises and truncates long values."""
    try:
        r = repr(obj)
    except Exception:
        return "<repr-error>"
    if len(r) > MAX_REPR_LEN:
        return r[:MAX_REPR_LEN] + "..."
    return r


 def _should_skip(filename):
    if not filename:
        return True
    for marker in _SKIP_MARKERS:
        if marker in filename:
            return True
    return False


 class Tracer:
    """Captures execution traces as JSONL.

    Usage:
        tracer = Tracer(repo_root="/path/to/repo")
        tracer.start()
        # ... run code ...
        tracer.stop()
        tracer.dump("/path/to/output.jsonl")
    """

    def __init__(self, repo_root=None):
        self.repo_root = str(Path(repo_root).resolve()) if repo_root else None
        self.events = []
        self.prev_locals = {}  # keyed by frame id
        self._lock = threading.Lock()
        self._stopped = False

    def start(self):
        self._stopped = False
        sys.settrace(self._trace)
        threading.settrace(self._trace)

    def stop(self):
        self._stopped = True
        sys.settrace(None)
        threading.settrace(None)

    def _trace(self, frame, event, arg):
        try:
            if self._stopped or len(self.events) >= MAX_EVENTS:
                return None

            filename = frame.f_code.co_filename
            if _should_skip(filename):
                return None

            record = self._build_record(frame, event, arg)
            if record:
                with self._lock:
                    self.events.append(record)

            return self._trace
        except Exception:
            return self._trace

    def _build_record(self, frame, event, arg):
        filename = frame.f_code.co_filename
        if self.repo_root:
            filename = os.path.relpath(filename, self.repo_root)

        frame_id = id(frame)

        # Snapshot locals, compress unchanged.
        current_locals = {}
        for k, v in frame.f_locals.items():
            if k.startswith("__") and k.endswith("__"):
                continue
            current_locals[k] = _safe_repr(v)

        prev = self.prev_locals.get(frame_id, {})
        compressed = {}
        for k, v in current_locals.items():
            if prev.get(k) == v:
                compressed[k] = "..."
            else:
                compressed[k] = v
        self.prev_locals[frame_id] = current_locals

        record = {
            "event": event,
            "file": filename,
            "function": frame.f_code.co_name,
            "line": frame.f_lineno,
            "locals": compressed,
            "depth": _frame_depth(frame),
        }

        if event == "return":
            record["return_value"] = _safe_repr(arg)
            # Clean up prev_locals for this frame.
            self.prev_locals.pop(frame_id, None)
        elif event == "exception":
            exc_type, exc_value, _ = arg
            record["exception"] = {
                "type": getattr(exc_type, "__name__", str(exc_type)),
                "value": _safe_repr(exc_value),
            }

        return record

    def dump(self, path):
        """Write events to a JSONL file."""
        with open(path, "w") as f:
            for event in self.events:
                f.write(json.dumps(event) + "\n")

    def to_jsonl(self):
        """Return events as a JSONL string."""
        return "\n".join(json.dumps(e) for e in self.events)


 def _frame_depth(frame):
    depth = 0
    f = frame.f_back
    while f is not None:
        depth += 1
        f = f.f_back
    return depth
	"""Benchmark end-to-end trace pipeline on multiple SWE-rebench-V2 Python images."""

	import json
	import os
	import subprocess
	import sys
	import time

	IMAGES = [
	{"instance_id": "wtforms__wtforms-614", "image_name": "docker.io/swerebenchv2/wtforms-wtforms:614-848d28d", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_fields.py tests/test_validators.py tests/test_widgets.py"},
	{"instance_id": "aio-libs__aiohttp-9047", "image_name": "docker.io/swerebenchv2/aio-libs-aiohttp:9047-aca99bc", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_connector.py tests/test_proxy.py tests/test_resolver.py"},
	{"instance_id": "bvanelli__actualpy-56", "image_name": "docker.io/swerebenchv2/bvanelli-actualpy:56-2ad5f63", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_rules.py"},
	{"instance_id": "keras-team__keras-19955", "image_name": "docker.io/swerebenchv2/keras-team-keras:19955-ca9519b", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning keras/src/backend/common/variables_test.py"},
	{"instance_id": "aws-cloudformation__cfn-lint-3965", "image_name": "docker.io/swerebenchv2/aws-cloudformation-cfn-lint:3965-b4d790d", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning test/unit/rules/functions/test_ref_format.py"},
	{"instance_id": "qiskit__qiskit-terra-5662", "image_name": "docker.io/swerebenchv2/qiskit-qiskit-terra:5662-3accb1b", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning test/python/visualization/test_circuit_text_drawer.py test/python/visualization/timeline/test_core.py"},
	{"instance_id": "mozilla-services__cliquet-203", "image_name": "docker.io/swerebenchv2/mozilla-services-cliquet:203-41a48da", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning cliquet/tests/test_initialization.py"},
	{"instance_id": "azure__walinuxagent-970", "image_name": "docker.io/swerebenchv2/azure-walinuxagent:970-fc2451f", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/common/test_errorstate.py"},
	{"instance_id": "tomwhite__cubed-211", "image_name": "docker.io/swerebenchv2/tomwhite-cubed:211-4305c85", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning cubed/tests/runtime/test_lithops.py cubed/tests/runtime/test_modal_async.py cubed/tests/runtime/test_python_async.py cubed/tests/runtime/utils.py"},
	{"instance_id": "pandas-dev__pandas-59608", "image_name": "docker.io/swerebenchv2/pandas-dev-pandas:59608-360597c", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning pandas/tests/reshape/merge/test_join.py pandas/tests/series/test_arrow_interface.py"},
	{"instance_id": "sissbruecker__linkding-984", "image_name": "docker.io/swerebenchv2/sissbruecker-linkding:984-c5a300a", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning bookmarks/tests/test_bookmark_archived_view.py bookmarks/tests/test_bookmark_index_view.py bookmarks/tests/test_bookmark_shared_view.py"},
	{"instance_id": "databricks__dbt-databricks-935", "image_name": "docker.io/swerebenchv2/databricks-dbt-databricks:935-4b1d2d9", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/functional/adapter/liquid_clustering/fixtures.py tests/functional/adapter/liquid_clustering/test_liquid_clustering.py tests/unit/macros/relations/test_table_macros.py"},
	{"instance_id": "tox-dev__sphinx-autodoc-typehints-474", "image_name": "docker.io/swerebenchv2/tox-dev-sphinx-autodoc-typehints:474-c8be42f", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_sphinx_autodoc_typehints.py"},
	{"instance_id": "meltano__sdk-1881", "image_name": "docker.io/swerebenchv2/meltano-sdk:1881-c3a8f90", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/core/test_target_base.py"},
	{"instance_id": "fsspec__universal_pathlib-148", "image_name": "docker.io/swerebenchv2/fsspec-universal_pathlib:148-2a29aa4", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning upath/tests/cases.py upath/tests/implementations/test_gcs.py upath/tests/implementations/test_hdfs.py upath/tests/implementations/test_webdav.py"},
	{"instance_id": "tefra__xsdata-310", "image_name": "docker.io/swerebenchv2/tefra-xsdata:310-ce88e71", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/formats/dataclass/parsers/test_nodes.py tests/formats/dataclass/parsers/test_utils.py tests/formats/dataclass/serializers/test_xml.py tests/formats/dataclass/test_context.py tests/formats/dataclass/test_elements.py tests/models/enums/test_datatype.py"},
	{"instance_id": "pyqtgraph__pyqtgraph-1845", "image_name": "docker.io/swerebenchv2/pyqtgraph-pyqtgraph:1845-ba517ab", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_colormap.py"},
	{"instance_id": "pallets__werkzeug-2583", "image_name": "docker.io/swerebenchv2/pallets-werkzeug:2583-1ce57f6", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_routing.py"},
	{"instance_id": "pytest-dev__pyfakefs-916", "image_name": "docker.io/swerebenchv2/pytest-dev-pyfakefs:916-95b2de3", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning pyfakefs/tests/fake_pathlib_test.py"},
	{"instance_id": "cs-si__eodag-228", "image_name": "docker.io/swerebenchv2/cs-si-eodag:228-fec965f", "test_cmd": "pytest --no-header -rA --tb=line --color=no -p no:cacheprovider -W ignore::DeprecationWarning tests/test_end_to_end.py tests/units/test_core.py"},
	]

	TRACER_DIR = os.path.expanduser("~/marin/symbolic/pytracer")
	OUTPUT_DIR = "/tmp/tracer_bench"


	def run(cmd, timeout=300):
	return subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout)


	def bench_one(img):
	instance_id = img["instance_id"]
	image_name = img["image_name"]
	test_cmd = img["test_cmd"]
	output_file = f"{OUTPUT_DIR}/{instance_id}.jsonl"

	print(f"\n{'='*60}")
	print(f"Instance: {instance_id}")
	print(f"Image: {image_name}")
	print(f"Test cmd: {test_cmd}")
	print(f"{'='*60}")

	timings = {}

	# Pull
	t0 = time.monotonic()
	r = run(f"sudo docker pull {image_name}", timeout=120)
	timings["pull"] = time.monotonic() - t0
	if r.returncode != 0:
	print(f" PULL FAILED: {r.stderr[-200:]}")
	return None

	# Run tests with tracer
	docker_cmd = (
	f"sudo docker run --rm "
	f"-v {TRACER_DIR}:/pytracer "
	f"-v {OUTPUT_DIR}:{OUTPUT_DIR} "
	f"-e PYTHONPATH=/pytracer "
	f"-e TRACER_OUTPUT={output_file} "
	f"-e TRACER_MAX_EVENTS=500000 "
	f"{image_name} {test_cmd}"
	)
	t0 = time.monotonic()
	r = run(docker_cmd, timeout=300)
	timings["test+trace"] = time.monotonic() - t0

	# Print test output tail
	lines = (r.stdout + r.stderr).strip().split("\n")
	for line in lines[-5:]:
	print(f" {line}")

	# Analyze output
	events = 0
	chars = 0
	size_mb = 0
	if os.path.exists(output_file):
	size_mb = os.path.getsize(output_file) / 1024 / 1024
	with open(output_file) as f:
	for line in f:
	events += 1
	chars += len(line)
	os.remove(output_file)

	# Delete image
	t0 = time.monotonic()
	run(f"sudo docker rmi {image_name}")
	timings["rmi"] = time.monotonic() - t0

	total = sum(timings.values())
	est_tokens = chars // 4

	print(f"\n Timings: pull={timings['pull']:.1f}s test+trace={timings['test+trace']:.1f}s rmi={timings['rmi']:.1f}s total={total:.1f}s")
	print(f" Events: {events:,} Size: {size_mb:.1f}MB Est tokens: {est_tokens:,}")

	return {
	"instance_id": instance_id,
	"timings": timings,
	"total_time": total,
	"events": events,
	"size_mb": size_mb,
	"est_tokens": est_tokens,
	}


	def main():
	os.makedirs(OUTPUT_DIR, exist_ok=True)

	print(f"Benchmarking {len(IMAGES)} unique Python repos\n")

	results = []
	for img in IMAGES:
	try:
	result = bench_one(img)
	if result:
	results.append(result)
	except Exception as e:
	print(f" ERROR: {e}")

	print(f"\n{'='*60}")
	print("SUMMARY")
	print(f"{'='*60}")
	for r in results:
	print(f" {r['instance_id']:45s} {r['total_time']:6.1f}s {r['events']:>8,} events {r['est_tokens']:>10,} tokens")

	if results:
	avg_time = sum(r["total_time"] for r in results) / len(results)
	avg_tokens = sum(r["est_tokens"] for r in results) / len(results)
	print(f"\n Average: {avg_time:.1f}s per image, {avg_tokens:,.0f} tokens per image")
	print(f" Projected for 7,243 images: {avg_time * 7243 / 3600:.1f} hours, {avg_tokens * 7243 / 1e9:.1f}B tokens")


	if __name__ == "__main__":
	t0 = time.monotonic()
	main()
	print(f"\nTotal wall time: {time.monotonic() - t0:.1f}s")
	"""Show interesting trace events from a JSONL trace file."""
	import json
	import sys

	path = sys.argv[1] if len(sys.argv) > 1 else "/tmp/tracer_out/trace.jsonl"

	events = [json.loads(line) for line in open(path)]
	print(f"Total events: {len(events)}")

	# Show event type distribution
	from collections import Counter
	kinds = Counter(e["event"] for e in events)
	print(f"Event types: {dict(kinds)}")

	# Show unique functions traced
	funcs = Counter(f"{e['file']}::{e['function']}" for e in events)
	print(f"\nTop 20 functions by event count:")
	for func, count in funcs.most_common(20):
	print(f" {count:5d} {func}")

	# Show a sample function trace (first non-module call with >3 events)
	seen = {}
	for e in events:
	key = (e["file"], e["function"])
	if e["function"] == "<module>":
	continue
	seen.setdefault(key, []).append(e)

	print("\n--- Sample function traces (with locals) ---")
	# Group into individual invocations (call->return sequences)
	invocations = []
	stack = {}
	for e in events:
	key = (e["file"], e["function"], e["depth"])
	if e["event"] == "call":
	stack[key] = [e]
	elif key in stack:
	stack[key].append(e)
	if e["event"] == "return":
	invocations.append(stack.pop(key))

	# Find invocations with real locals (not class defs, not trivial)
	shown = 0
	for inv in invocations:
	non_ellipsis_locals = sum(
	1 for e in inv for v in e["locals"].values() if v != "..."
	)
	if (len(inv) >= 5 and non_ellipsis_locals >= 3
	and inv[0]["function"] not in ("<module>", "<dictcomp>", "<listcomp>")
	and not inv[0]["function"][0].isupper() # skip class defs
	and shown < 3):
	print(f"\nFunction: {inv[0]['function']} in {inv[0]['file']} ({len(inv)} events)")
	for e in inv[:25]:
	print(json.dumps(e))
	if len(inv) > 25:
	print(f" ... ({len(inv) - 25} more events)")
	shown += 1
	"""Auto-inject tracer when imported via PYTHONPATH.

	Activated by setting PYTHONPATH to include the directory containing this file.
	Configure via environment variables:

	TRACER_OUTPUT - path to write JSONL output (default: /tmp/trace.jsonl)
	TRACER_REPO_ROOT - repo root for relative paths (default: cwd)
	TRACER_MAX_EVENTS - max events before stopping (default: 50000)
	TRACER_MAX_REPR - max repr length per variable (default: 200)
	"""

	import atexit
	import os
	import sys


	def _setup_tracer():
	from tracer import Tracer

	repo_root = os.environ.get("TRACER_REPO_ROOT", os.getcwd())
	output_path = os.environ.get("TRACER_OUTPUT", "/tmp/trace.jsonl")

	tracer = Tracer(repo_root=repo_root)
	tracer.start()

	def _on_exit():
	tracer.stop()
	tracer.dump(output_path)
	count = len(tracer.events)
	print(f"\n[tracer] Wrote {count} events to {output_path}", file=sys.stderr)

	atexit.register(_on_exit)


	_setup_tracer()
	"""Execution tracer that captures Python program state at interpreter events.

	Uses sys.settrace to record call/line/return/exception events with local
	variable snapshots. Filters to repo code only (skips stdlib, site-packages).
	Outputs JSONL with compressed unchanged variables.
	"""

	import json
	import os
	import sys
	import threading
	from pathlib import Path


	# Paths that indicate non-repo code.
	_SKIP_MARKERS = (
	"site-packages",
	"lib/python",
	"/usr/lib",
	"importlib",
	"<frozen",
	"/pytracer/",
	"/.local/",
	"/usr/local/lib/",
	"_pytest/",
	"pluggy/",
	"conftest.py",
	"_distutils_hack",
	"pkg_resources",
	)

	# Max events before we stop tracing (safety valve).
	MAX_EVENTS = int(os.environ.get("TRACER_MAX_EVENTS", "50000"))

	# Max repr length for a single variable value.
	MAX_REPR_LEN = int(os.environ.get("TRACER_MAX_REPR", "200"))


	def _safe_repr(obj):
	"""repr() that never raises and truncates long values."""
	try:
	r = repr(obj)
	except Exception:
	return "<repr-error>"
	if len(r) > MAX_REPR_LEN:
	return r[:MAX_REPR_LEN] + "..."
	return r


	def _should_skip(filename):
	if not filename:
	return True
	for marker in _SKIP_MARKERS:
	if marker in filename:
	return True
	return False


	class Tracer:
	"""Captures execution traces as JSONL.

	Usage:
	tracer = Tracer(repo_root="/path/to/repo")
	tracer.start()
	# ... run code ...
	tracer.stop()
	tracer.dump("/path/to/output.jsonl")
	"""

	def __init__(self, repo_root=None):
	self.repo_root = str(Path(repo_root).resolve()) if repo_root else None
	self.events = []
	self.prev_locals = {} # keyed by frame id
	self._lock = threading.Lock()
	self._stopped = False

	def start(self):
	self._stopped = False
	sys.settrace(self._trace)
	threading.settrace(self._trace)

	def stop(self):
	self._stopped = True
	sys.settrace(None)
	threading.settrace(None)

	def _trace(self, frame, event, arg):
	try:
	if self._stopped or len(self.events) >= MAX_EVENTS:
	return None

	filename = frame.f_code.co_filename
	if _should_skip(filename):
	return None

	record = self._build_record(frame, event, arg)
	if record:
	with self._lock:
	self.events.append(record)

	return self._trace
	except Exception:
	return self._trace

	def _build_record(self, frame, event, arg):
	filename = frame.f_code.co_filename
	if self.repo_root:
	filename = os.path.relpath(filename, self.repo_root)

	frame_id = id(frame)

	# Snapshot locals, compress unchanged.
	current_locals = {}
	for k, v in frame.f_locals.items():
	if k.startswith("__") and k.endswith("__"):
	continue
	current_locals[k] = _safe_repr(v)

	prev = self.prev_locals.get(frame_id, {})
	compressed = {}
	for k, v in current_locals.items():
	if prev.get(k) == v:
	compressed[k] = "..."
	else:
	compressed[k] = v
	self.prev_locals[frame_id] = current_locals

	record = {
	"event": event,
	"file": filename,
	"function": frame.f_code.co_name,
	"line": frame.f_lineno,
	"locals": compressed,
	"depth": _frame_depth(frame),
	}

	if event == "return":
	record["return_value"] = _safe_repr(arg)
	# Clean up prev_locals for this frame.
	self.prev_locals.pop(frame_id, None)
	elif event == "exception":
	exc_type, exc_value, _ = arg
	record["exception"] = {
	"type": getattr(exc_type, "__name__", str(exc_type)),
	"value": _safe_repr(exc_value),
	}

	return record

	def dump(self, path):
	"""Write events to a JSONL file."""
	with open(path, "w") as f:
	for event in self.events:
	f.write(json.dumps(event) + "\n")

	def to_jsonl(self):
	"""Return events as a JSONL string."""
	return "\n".join(json.dumps(e) for e in self.events)


	def _frame_depth(frame):
	depth = 0
	f = frame.f_back
	while f is not None:
	depth += 1
	f = f.f_back
	return depth