#!/usr/bin/env python3 """ verify.py ========= End-to-end proof that detect.py catches every vulnerability that reproduce.py creates — on a REGTEST chain. Steps: 1. Wipe & restart regtest 2. Create wallets, fund miner 3. Run reproduce.py (create all 12 vulnerability scenarios) 4. Run detect.py --wallet alice (capture output) 5. Parse output and assert every detector (1–12) produced ≥1 finding 6. Print a 12-row proof table Usage: python3 verify.py """ import subprocess import sys import os import re import time DIR = os.path.dirname(os.path.abspath(__file__)) WALLETS = ["miner", "alice", "bob", "carol", "exchange", "risky"] G = "\033[92m" R = "\033[91m" B = "\033[1m" C = "\033[96m" Y = "\033[93m" RST = "\033[0m" def run(cmd, check=True, timeout=300): """Run a shell command, return stdout.""" result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout) if check and result.returncode != 0: print(f" {R}FAIL:{RST} {cmd}") print(f" stderr: {result.stderr.strip()}") sys.exit(1) return result.stdout.strip() def btc(cmd): return run(f"bitcoin-cli -regtest {cmd}") def btcw(wallet, cmd): return run(f"bitcoin-cli -regtest -rpcwallet={wallet} {cmd}") def banner(msg): print(f"\n{B}{C}{'═' * 70}{RST}") print(f"{B}{C} {msg}{RST}") print(f"{B}{C}{'═' * 70}{RST}") # ───────────────────────────────────────────────────────────────────────────── # Step 1: Fresh regtest # ───────────────────────────────────────────────────────────────────────────── def setup_regtest(): banner("Step 1: Fresh regtest chain") # Stop if running run("bitcoin-cli -regtest stop 2>/dev/null || true", check=False) time.sleep(2) # Wipe run("rm -rf ~/.bitcoin/regtest") print(" ✓ Wiped regtest datadir") # Ensure bitcoin.conf exists with regtest settings conf = os.path.expanduser("~/.bitcoin/bitcoin.conf") with open(conf, "w") as f: f.write("regtest=1\ntxindex=1\n\n[regtest]\n" "fallbackfee=0.00010\ndustrelayfee=0.00000001\n" "acceptnonstdtxn=1\nserver=1\n") print(" ✓ Wrote bitcoin.conf") # Start run("bitcoind -regtest -daemon") # Wait for RPC to become ready print(" … waiting for bitcoind RPC …", end="", flush=True) for i in range(30): time.sleep(1) res = subprocess.run("bitcoin-cli -regtest getblockchaininfo", shell=True, capture_output=True, text=True, timeout=10) if res.returncode == 0: print(f" ready after {i+1}s") break else: print(f"\n {R}ERROR: bitcoind didn't start after 30s{RST}") sys.exit(1) print(" ✓ bitcoind started") # Create wallets for w in WALLETS: btc(f'createwallet "{w}"') print(f" ✓ Created wallets: {', '.join(WALLETS)}") # Mine 110 blocks to get mature coinbases addr = btcw("miner", 'getnewaddress "" bech32') btc(f"generatetoaddress 110 {addr}") balance = btcw("miner", "getbalance") print(f" ✓ Mined 110 blocks — miner balance: {balance} BTC") # ───────────────────────────────────────────────────────────────────────────── # Step 2: Reproduce # ───────────────────────────────────────────────────────────────────────────── def run_reproduce(): banner("Step 2: Run reproduce.py (create 12 vulnerability scenarios)") result = subprocess.run( [sys.executable, os.path.join(DIR, "reproduce.py")], capture_output=True, text=True, timeout=300, ) if result.returncode != 0: print(f" {R}reproduce.py FAILED:{RST}") print(result.stderr) sys.exit(1) # Count successes successes = result.stdout.count("✓") print(f" ✓ reproduce.py completed — {successes} scenario(s) created") # Print abbreviated output for line in result.stdout.split("\n"): if "✓" in line or "REPRODUCE" in line: print(f" {line.strip()}") return result.stdout # ───────────────────────────────────────────────────────────────────────────── # Step 3: Detect # ───────────────────────────────────────────────────────────────────────────── def run_detect(): banner("Step 3: Run detect.py --wallet alice") result = subprocess.run( [sys.executable, os.path.join(DIR, "detect.py"), "--wallet", "alice", "--known-risky-wallets", "risky", "--known-exchange-wallets", "exchange"], capture_output=True, text=True, timeout=300, ) if result.returncode != 0: print(f" {R}detect.py FAILED:{RST}") print(result.stderr) sys.exit(1) print(f" ✓ detect.py completed") return result.stdout # ───────────────────────────────────────────────────────────────────────────── # Step 4: Parse & verify # ───────────────────────────────────────────────────────────────────────────── DETECTORS = { 1: ("Address Reuse", r"1 · Address Reuse"), 2: ("CIOH", r"2 · Common Input Ownership"), 3: ("Dust UTXO Detection", r"3 · Dust UTXO Detection"), 4: ("Dust Spent with Normal", r"4 · Dust Spent with Normal"), 5: ("Change Output Detection", r"5 · Probable Change Output"), 6: ("Consolidation Origin", r"6 · UTXOs from Prior Consolidation"), 7: ("Script Type Mixing", r"7 · Script Type Mixing"), 8: ("Cluster Merge", r"8 · Cluster Merge"), 9: ("UTXO Age / Lookback", r"9 · UTXO Age"), 10: ("Exchange Origin", r"10 · Probable Exchange Origin"), 11: ("Tainted UTXOs", r"11 · Tainted UTXOs"), 12: ("Behavioral Fingerprint", r"12 · Behavioral Fingerprint"), } def parse_and_verify(detect_output): banner("Step 4: Verification — does detect catch every reproduced vulnerability?") # Split output into sections per detector lines = detect_output.split("\n") results = {} current_id = None for line in lines: # Check if this line starts a detector section for did, (name, pattern) in DETECTORS.items(): if pattern in line: current_id = did results[did] = {"findings": 0, "warnings": 0, "lines": []} break # Count findings/warnings within current section if current_id is not None: if "FINDING" in line: results[current_id]["findings"] += 1 if "WARNING" in line: results[current_id]["warnings"] += 1 results[current_id]["lines"].append(line) # Also parse the summary line total_findings = 0 total_warnings = 0 m = re.search(r"Findings:\s+(\d+)", detect_output) if m: total_findings = int(m.group(1)) m = re.search(r"Warnings:\s+(\d+)", detect_output) if m: total_warnings = int(m.group(1)) # ── Print proof table ── print() print(f" {'#':>3} {'Detector':<30} {'Findings':>8} {'Warnings':>8} {'Status'}") print(f" {'─'*3} {'─'*30} {'─'*8} {'─'*8} {'─'*8}") all_pass = True for did in sorted(DETECTORS.keys()): name = DETECTORS[did][0] r = results.get(did, {"findings": 0, "warnings": 0}) f_count = r["findings"] w_count = r["warnings"] detected = f_count > 0 or w_count > 0 status = f"{G}PASS ✓{RST}" if detected else f"{R}FAIL ✗{RST}" if not detected: all_pass = False print(f" {did:>3} {name:<30} {f_count:>8} {w_count:>8} {status}") print(f" {'─'*3} {'─'*30} {'─'*8} {'─'*8} {'─'*8}") print(f" {'':>3} {'TOTAL':<30} {total_findings:>8} {total_warnings:>8}") print() if all_pass: print(f" {G}{B}═══ ALL 12 DETECTORS FIRED — PROOF COMPLETE ═══{RST}") print(f" {G}Every reproduced vulnerability was caught by detect.py on regtest.{RST}") else: failed = [did for did in DETECTORS if results.get(did, {}).get("findings", 0) == 0 and results.get(did, {}).get("warnings", 0) == 0] print(f" {R}{B}═══ FAILURE — {len(failed)} detector(s) did not fire ═══{RST}") for did in failed: print(f" {R} Detector {did}: {DETECTORS[did][0]}{RST}") return all_pass # ───────────────────────────────────────────────────────────────────────────── # Main # ───────────────────────────────────────────────────────────────────────────── def main(): print(f"\n{B}{'═' * 70}{RST}") print(f"{B}{C} VERIFY: reproduce → detect end-to-end proof on REGTEST{RST}") print(f"{B}{'═' * 70}{RST}") setup_regtest() run_reproduce() detect_output = run_detect() passed = parse_and_verify(detect_output) print() sys.exit(0 if passed else 1) if __name__ == "__main__": main()