Last active
March 30, 2026 20:39
-
-
Save Luke-Rogerson/98213b51cd77bb3316b33109250be4a5 to your computer and use it in GitHub Desktop.
doge tx tracer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| doge_trace_blockchair.py | |
| Forward-trace Dogecoin UTXOs hop by hop using Blockchair's DOGE API. | |
| What it does | |
| ------------ | |
| - Starts from one or more seed txids | |
| - Fetches each transaction's outputs | |
| - Follows any spent output to the spending transaction | |
| - Builds an edge list and a set of unspent leaf outputs | |
| - Tracks "frontier" outputs that were already spent but whose next spending | |
| transaction lies beyond the chosen max depth | |
| - Produces conservative heuristics for: | |
| * still_unspent_total_doge | |
| * likely_cold_wallet_total_doge | |
| * frontier_spent_beyond_depth_total_doge | |
| * high_fanout_value_doge | |
| * peel_chain_events | |
| Notes | |
| ----- | |
| - This traces wallet flows, not real-world identity. | |
| - "High fanout" is just a structural heuristic, not proof of laundering. | |
| - Amounts from Blockchair are returned in DOGE's smallest unit (1e-8 DOGE). | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import csv | |
| import json | |
| import os | |
| import time | |
| from collections import Counter | |
| from dataclasses import dataclass, asdict | |
| from decimal import Decimal, getcontext | |
| from typing import Any, Dict, Iterable, List, Optional, Set | |
| import requests | |
| getcontext().prec = 28 | |
| SAT = Decimal("100000000") | |
| DEFAULT_BASE_URL = "https://api.blockchair.com" | |
| DEFAULT_CHAIN = "dogecoin" | |
| USER_AGENT = "doge-trace-blockchair/2.0" | |
| DEFAULT_SLEEP_SECONDS = 0.35 | |
| DEFAULT_TIMEOUT_SECONDS = 20 | |
| DEFAULT_MAX_RETRIES = 4 | |
| def units_to_doge(value: int | str | Decimal) -> Decimal: | |
| return Decimal(value) / SAT | |
| def doge_to_str(d: Decimal) -> str: | |
| return format(d.quantize(Decimal("0.00000001")), "f") | |
| def safe_get(d: Any, *path: Any, default=None): | |
| cur = d | |
| for p in path: | |
| try: | |
| if isinstance(cur, dict): | |
| cur = cur[p] | |
| elif isinstance(cur, list) and isinstance(p, int): | |
| cur = cur[p] | |
| else: | |
| return default | |
| except Exception: | |
| return default | |
| return cur | |
| @dataclass | |
| class Edge: | |
| from_txid: str | |
| from_vout: int | |
| value_doge: str | |
| recipient: str | |
| spent: bool | |
| to_txid: Optional[str] | |
| hop: int | |
| @dataclass | |
| class LeafOutput: | |
| txid: str | |
| vout: int | |
| recipient: str | |
| value_doge: str | |
| status: str | |
| reason: str | |
| class BlockchairClient: | |
| def __init__( | |
| self, | |
| api_key: Optional[str] = None, | |
| base_url: str = DEFAULT_BASE_URL, | |
| chain: str = DEFAULT_CHAIN, | |
| timeout: int = DEFAULT_TIMEOUT_SECONDS, | |
| sleep_seconds: float = DEFAULT_SLEEP_SECONDS, | |
| max_retries: int = DEFAULT_MAX_RETRIES, | |
| ): | |
| self.api_key = api_key | |
| self.base_url = base_url.rstrip("/") | |
| self.chain = chain | |
| self.timeout = timeout | |
| self.sleep_seconds = sleep_seconds | |
| self.max_retries = max_retries | |
| self.session = requests.Session() | |
| self.session.headers.update({"User-Agent": USER_AGENT}) | |
| self.tx_cache: Dict[str, Dict[str, Any]] = {} | |
| self.address_cache: Dict[str, Dict[str, Any]] = {} | |
| def _request_json(self, path: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: | |
| url = f"{self.base_url}/{self.chain}/{path.lstrip('/')}" | |
| params = dict(params or {}) | |
| if self.api_key: | |
| params["key"] = self.api_key | |
| backoff = 1.0 | |
| last_err = None | |
| for attempt in range(1, self.max_retries + 1): | |
| try: | |
| r = self.session.get(url, params=params, timeout=self.timeout) | |
| if r.status_code == 429: | |
| last_err = RuntimeError(f"Rate limited by Blockchair at {url}") | |
| else: | |
| r.raise_for_status() | |
| data = r.json() | |
| if "data" not in data: | |
| raise RuntimeError(f"Unexpected response from {url}: missing 'data'") | |
| time.sleep(self.sleep_seconds) | |
| return data | |
| except Exception as e: | |
| last_err = e | |
| if attempt < self.max_retries: | |
| time.sleep(backoff) | |
| backoff *= 2 | |
| raise RuntimeError(f"Failed request {url}: {last_err}") | |
| def get_transaction(self, txid: str) -> Dict[str, Any]: | |
| if txid not in self.tx_cache: | |
| data = self._request_json(f"dashboards/transaction/{txid}") | |
| tx_payload = safe_get(data, "data", txid) | |
| if not tx_payload: | |
| raise RuntimeError(f"Transaction not found in response: {txid}") | |
| self.tx_cache[txid] = tx_payload | |
| return self.tx_cache[txid] | |
| def get_address(self, address: str) -> Dict[str, Any]: | |
| if address not in self.address_cache: | |
| data = self._request_json(f"dashboards/address/{address}") | |
| addr_payload = safe_get(data, "data", address) | |
| if not addr_payload: | |
| raise RuntimeError(f"Address not found in response: {address}") | |
| self.address_cache[address] = addr_payload | |
| return self.address_cache[address] | |
| class Tracer: | |
| def __init__( | |
| self, | |
| client: BlockchairClient, | |
| max_depth: int = 6, | |
| min_value_doge: Decimal = Decimal("0"), | |
| include_change_like: bool = False, | |
| ): | |
| self.client = client | |
| self.max_depth = max_depth | |
| self.min_value_doge = min_value_doge | |
| self.include_change_like = include_change_like | |
| self.edges: List[Edge] = [] | |
| self.frontier_outputs: List[Edge] = [] | |
| self.visited_txs: Set[str] = set() | |
| self.tx_fanout: Dict[str, int] = {} | |
| self.tx_output_values: Dict[str, List[Decimal]] = {} | |
| self.recipient_counter: Counter[str] = Counter() | |
| self.address_stats_cache: Dict[str, Dict[str, Any]] = {} | |
| self.trace_starts: List[str] = [] | |
| def trace_many(self, seed_txids: Iterable[str]) -> Dict[str, Any]: | |
| self.trace_starts = list(seed_txids) | |
| for txid in self.trace_starts: | |
| self._trace_tx(txid, hop=0) | |
| leaf_outputs = self._collect_leaf_outputs() | |
| heuristics = self._compute_heuristics(leaf_outputs) | |
| return { | |
| "seed_txids": self.trace_starts, | |
| "settings": { | |
| "max_depth": self.max_depth, | |
| "min_value_doge": doge_to_str(self.min_value_doge), | |
| "include_change_like": self.include_change_like, | |
| }, | |
| "summary": { | |
| "transactions_seen": len(self.visited_txs), | |
| "edges": len(self.edges), | |
| "frontier_outputs": len(self.frontier_outputs), | |
| **heuristics, | |
| }, | |
| "edges": [asdict(e) for e in self.edges], | |
| "frontier_outputs": [asdict(e) for e in self.frontier_outputs], | |
| "leaf_outputs": [asdict(l) for l in leaf_outputs], | |
| } | |
| def _trace_tx(self, txid: str, hop: int) -> None: | |
| if hop > self.max_depth or txid in self.visited_txs: | |
| return | |
| tx = self.client.get_transaction(txid) | |
| self.visited_txs.add(txid) | |
| outputs = safe_get(tx, "outputs", default=[]) or [] | |
| self.tx_fanout[txid] = len(outputs) | |
| self.tx_output_values[txid] = [units_to_doge(out["value"]) for out in outputs] | |
| for out in outputs: | |
| value = units_to_doge(out["value"]) | |
| recipient = out.get("recipient") or "" | |
| index = int(out["index"]) | |
| is_spent = bool(out.get("is_spent")) | |
| spending_txid = out.get("spending_transaction_hash") | |
| self.recipient_counter[recipient] += 1 | |
| if value < self.min_value_doge: | |
| continue | |
| if not self.include_change_like and self._looks_change_like(tx, recipient): | |
| continue | |
| edge = Edge( | |
| from_txid=txid, | |
| from_vout=index, | |
| value_doge=doge_to_str(value), | |
| recipient=recipient, | |
| spent=is_spent, | |
| to_txid=spending_txid if is_spent else None, | |
| hop=hop, | |
| ) | |
| self.edges.append(edge) | |
| if is_spent and spending_txid: | |
| if hop + 1 > self.max_depth: | |
| self.frontier_outputs.append(edge) | |
| else: | |
| self._trace_tx(spending_txid, hop + 1) | |
| def _looks_change_like(self, tx: Dict[str, Any], recipient: str) -> bool: | |
| inputs = safe_get(tx, "inputs", default=[]) or [] | |
| input_recipients = {i.get("recipient") for i in inputs if i.get("recipient")} | |
| return recipient in input_recipients | |
| def _address_stats(self, address: str) -> Dict[str, Any]: | |
| if address in self.address_stats_cache: | |
| return self.address_stats_cache[address] | |
| try: | |
| payload = self.client.get_address(address) | |
| a = payload.get("address", {}) | |
| transactions = payload.get("transactions", []) or [] | |
| stats = { | |
| "balance_doge": units_to_doge(a.get("balance", 0)), | |
| "received_doge": units_to_doge(a.get("received", 0)), | |
| "spent_doge": units_to_doge(a.get("spent", 0)), | |
| "transaction_count": int(a.get("transaction_count", len(transactions) or 0) or 0), | |
| } | |
| except Exception: | |
| stats = { | |
| "balance_doge": Decimal("0"), | |
| "received_doge": Decimal("0"), | |
| "spent_doge": Decimal("0"), | |
| "transaction_count": 0, | |
| } | |
| self.address_stats_cache[address] = stats | |
| return stats | |
| def _collect_leaf_outputs(self) -> List[LeafOutput]: | |
| leaves: List[LeafOutput] = [] | |
| for edge in self.edges: | |
| if edge.spent: | |
| continue | |
| value = Decimal(edge.value_doge) | |
| addr_stats = self._address_stats(edge.recipient) | |
| status = "still_unspent" | |
| reason = "Output remains unspent at current trace depth." | |
| if ( | |
| value >= Decimal("1000") | |
| and addr_stats["spent_doge"] == 0 | |
| and addr_stats["transaction_count"] <= 3 | |
| and addr_stats["balance_doge"] >= value * Decimal("0.95") | |
| ): | |
| status = "likely_cold_wallet" | |
| reason = ( | |
| "Unspent at a low-activity address with no recorded outgoing volume " | |
| "and a balance that still largely contains this output." | |
| ) | |
| leaves.append( | |
| LeafOutput( | |
| txid=edge.from_txid, | |
| vout=edge.from_vout, | |
| recipient=edge.recipient, | |
| value_doge=edge.value_doge, | |
| status=status, | |
| reason=reason, | |
| ) | |
| ) | |
| return sorted(leaves, key=lambda x: Decimal(x.value_doge), reverse=True) | |
| def _compute_heuristics(self, leaf_outputs: List[LeafOutput]) -> Dict[str, Any]: | |
| still_unspent_total = Decimal("0") | |
| likely_cold_wallet_total = Decimal("0") | |
| frontier_spent_beyond_depth_total = Decimal("0") | |
| high_fanout_value = Decimal("0") | |
| peel_chain_events = 0 | |
| for leaf in leaf_outputs: | |
| value = Decimal(leaf.value_doge) | |
| still_unspent_total += value | |
| if leaf.status == "likely_cold_wallet": | |
| likely_cold_wallet_total += value | |
| for edge in self.frontier_outputs: | |
| frontier_spent_beyond_depth_total += Decimal(edge.value_doge) | |
| for txid, fanout in self.tx_fanout.items(): | |
| values = sorted(self.tx_output_values.get(txid, []), reverse=True) | |
| total = sum(values, Decimal("0")) | |
| if fanout >= 5: | |
| high_fanout_value += total | |
| if ( | |
| 2 <= len(values) <= 3 | |
| and total > 0 | |
| and values[0] / total >= Decimal("0.80") | |
| and values[-1] > 0 | |
| and values[0] / values[-1] >= Decimal("10") | |
| ): | |
| peel_chain_events += 1 | |
| return { | |
| "still_unspent_total_doge": doge_to_str(still_unspent_total), | |
| "likely_cold_wallet_total_doge": doge_to_str(likely_cold_wallet_total), | |
| "frontier_spent_beyond_depth_total_doge": doge_to_str(frontier_spent_beyond_depth_total), | |
| "high_fanout_value_doge": doge_to_str(high_fanout_value), | |
| "peel_chain_events": peel_chain_events, | |
| } | |
| def write_json(path: str, data: Dict[str, Any]) -> None: | |
| with open(path, "w", encoding="utf-8") as f: | |
| json.dump(data, f, indent=2) | |
| def write_edges_csv(path: str, edges: List[Dict[str, Any]]) -> None: | |
| fieldnames = ["from_txid", "from_vout", "value_doge", "recipient", "spent", "to_txid", "hop"] | |
| with open(path, "w", newline="", encoding="utf-8") as f: | |
| writer = csv.DictWriter(f, fieldnames=fieldnames) | |
| writer.writeheader() | |
| for row in edges: | |
| writer.writerow(row) | |
| def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace: | |
| p = argparse.ArgumentParser(description="Hop-by-hop DOGE tracer using Blockchair.") | |
| p.add_argument("--txid", action="append", required=True, help="Seed txid. Use multiple times.") | |
| p.add_argument("--api-key", default=os.getenv("BLOCKCHAIR_API_KEY"), help="Optional Blockchair API key.") | |
| p.add_argument("--max-depth", type=int, default=6, help="Maximum hop depth to follow.") | |
| p.add_argument("--min-value", type=Decimal, default=Decimal("0"), help="Ignore outputs smaller than this many DOGE.") | |
| p.add_argument("--include-change-like", action="store_true", help="Also follow outputs that look like change.") | |
| p.add_argument("--json-out", default="doge_trace_report.json", help="JSON report output path.") | |
| p.add_argument("--csv-out", default="", help="Optional CSV edge-list output path.") | |
| p.add_argument("--sleep-seconds", type=float, default=DEFAULT_SLEEP_SECONDS, help="Delay between API calls.") | |
| p.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT_SECONDS, help="HTTP timeout in seconds.") | |
| p.add_argument("--base-url", default=DEFAULT_BASE_URL, help="Blockchair API base URL.") | |
| p.add_argument("--chain", default=DEFAULT_CHAIN, help="Chain name. Default: dogecoin.") | |
| return p.parse_args(argv) | |
| def main(argv: Optional[List[str]] = None) -> int: | |
| args = parse_args(argv) | |
| client = BlockchairClient( | |
| api_key=args.api_key, | |
| base_url=args.base_url, | |
| chain=args.chain, | |
| timeout=args.timeout, | |
| sleep_seconds=args.sleep_seconds, | |
| ) | |
| tracer = Tracer( | |
| client=client, | |
| max_depth=args.max_depth, | |
| min_value_doge=args.min_value, | |
| include_change_like=args.include_change_like, | |
| ) | |
| report = tracer.trace_many(args.txid) | |
| write_json(args.json_out, report) | |
| if args.csv_out: | |
| write_edges_csv(args.csv_out, report["edges"]) | |
| print(f"Wrote JSON report: {args.json_out}") | |
| if args.csv_out: | |
| print(f"Wrote CSV edges: {args.csv_out}") | |
| print("\nSummary") | |
| print("-------") | |
| for k, v in report["summary"].items(): | |
| print(f"{k}: {v}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment