stealth/backend/script/detect.py

#!/usr/bin/env python3
"""
detect.py
=========
Blockchain privacy vulnerability detector.

INPUT:  One or more output descriptors (or --wallet <name> to read them).
OUTPUT: Every privacy vulnerability found for that descriptor's address set.

The detector creates a temporary watch-only wallet, imports descriptors with
a full rescan, then analyses all historical transactions touching any derived
address. It never scans the entire chain — only transactions the wallet knows.

Usage:
    python3 detect.py --wallet alice
    python3 detect.py "wpkh([fp/84h/1h/0h]tpub.../0/*)#checksum" "wpkh([fp/84h/1h/0h]tpub.../1/*)#checksum"
    python3 detect.py --wallet alice --known-risky-wallets risky --known-exchange-wallets exchange
"""

import sys
import os
import json
import time
import hashlib
import argparse
from collections import defaultdict
from math import log2

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from bitcoin_rpc import cli, get_tx

FINDINGS = []
WARNINGS = []

def section(title):
    print(f"[{title}]", file=sys.stderr)

def finding(msg):
    FINDINGS.append(msg)

def warn(msg):
    WARNINGS.append(msg)

def ok(msg):
    print(f"ok: {msg}", file=sys.stderr)

def info(msg):
    print(f"  {msg}", file=sys.stderr)


# ═══════════════════════════════════════════════════════════════════════════════
# 1. WALLET + ADDRESS RESOLUTION
# ═══════════════════════════════════════════════════════════════════════════════

def resolve_descriptors(args):
    """Get the descriptor list from args: either --wallet or positional descriptors."""
    descs = []
    if args.wallet:
        result = cli("listdescriptors", wallet=args.wallet)
        for d in result["descriptors"]:
            descs.append({
                "desc": d["desc"],
                "internal": d.get("internal", False),
                "active": d.get("active", True),
                "range_end": d.get("range", [0, 999])[1] if isinstance(d.get("range"), list) else d.get("range", 999),
            })
    else:
        for raw in args.descriptors:
            base = raw.split("#")[0]
            if "/0/*" in base:
                candidates = [(base, False), (base.replace("/0/*", "/1/*"), True)]
            elif "/1/*" in base:
                candidates = [(base.replace("/1/*", "/0/*"), False), (base, True)]
            else:
                candidates = [(base, False)]
            for desc, internal in candidates:
                try:
                    normalized = cli("getdescriptorinfo", desc)["descriptor"]
                except Exception:
                    normalized = desc
                descs.append({
                    "desc": normalized,
                    "internal": internal,
                    "active": True,
                    "range_end": 999,
                })
    return descs


def derive_all_addresses(descriptors):
    """Derive addresses from all descriptors, return {address -> (desc_type, internal, index)}."""
    addr_map = {}  # address -> metadata
    for dinfo in descriptors:
        desc = dinfo["desc"]
        rng = min(dinfo["range_end"], 999)
        # Detect descriptor type
        dtype = "unknown"
        if desc.startswith("wpkh("): dtype = "p2wpkh"
        elif desc.startswith("tr("): dtype = "p2tr"
        elif desc.startswith("sh(wpkh("): dtype = "p2sh-p2wpkh"
        elif desc.startswith("pkh("): dtype = "p2pkh"

        try:
            addrs = cli("deriveaddresses", desc, f"[0,{rng}]")
            if addrs:
                for i, a in enumerate(addrs):
                    addr_map[a] = {
                        "type": dtype,
                        "internal": dinfo["internal"],
                        "index": i,
                    }
        except Exception as e:
            info(f"Could not derive from {desc[:40]}…: {e}")
    return addr_map


def build_scan_wallet(descriptors, wallet_name="_detect_scan"):
    """Create a temporary watch-only wallet with descriptors, do full rescan."""
    # Clean up if exists
    try:
        cli("unloadwallet", wallet_name)
    except Exception:
        pass

    try:
        cli("createwallet", wallet_name, "true", "true", "", "false", "true")
    except Exception:
        try:
            cli("loadwallet", wallet_name)
        except Exception:
            pass

    import_batch = []
    for d in descriptors:
        import_batch.append({
            "desc": d["desc"],
            "timestamp": 0,  # full rescan
            "internal": d["internal"],
            "active": d["active"],
            "range": [0, d["range_end"]],
        })

    result = cli("importdescriptors", json.dumps(import_batch), wallet=wallet_name)
    # Check results
    for r in (result or []):
        if not r.get("success"):
            info(f"Import warning: {r.get('error', {}).get('message', 'unknown')}")

    return wallet_name


def get_all_transactions(wallet_name, count=10000):
    """Get full transaction history for the wallet."""
    txs = cli("listtransactions", "*", count, 0, "true", wallet=wallet_name)
    return txs or []


def get_all_utxos(wallet_name):
    """Get all UTXOs (confirmed and unconfirmed)."""
    return cli("listunspent", 0, 9999999, wallet=wallet_name) or []


# ═══════════════════════════════════════════════════════════════════════════════
# 2. TRANSACTION GRAPH BUILDER
# ═══════════════════════════════════════════════════════════════════════════════

class TxGraph:
    """Indexed view of all transactions touching our address set."""

    def __init__(self, addr_map, wallet_txs, utxos):
        self.addr_map = addr_map          # {address -> metadata}
        self.our_addrs = set(addr_map.keys())
        self.utxos = utxos                # current UTXOs
        self.tx_cache = {}                # txid -> decoded tx
        self.our_txids = set()            # txids we participate in

        # Index: address -> list of (txid, direction, value)
        self.addr_txs = defaultdict(list)  # address -> [{txid, direction, amount}]
        # Index: txid -> list of our addresses involved
        self.tx_addrs = defaultdict(set)

        # Build from wallet tx list
        for wtx in wallet_txs:
            txid = wtx.get("txid", "")
            addr = wtx.get("address", "")
            cat = wtx.get("category", "")  # send/receive
            amount = wtx.get("amount", 0)
            if txid:
                self.our_txids.add(txid)
            if addr and txid:
                self.addr_txs[addr].append({
                    "txid": txid, "category": cat, "amount": amount,
                    "confirmations": wtx.get("confirmations", 0),
                    "blockheight": wtx.get("blockheight", 0),
                })
                self.tx_addrs[txid].add(addr)

    def fetch_tx(self, txid):
        """Get decoded transaction (cached)."""
        if txid not in self.tx_cache:
            try:
                self.tx_cache[txid] = get_tx(txid)
            except Exception:
                return None
        return self.tx_cache[txid]

    def get_input_addresses(self, txid):
        """Get all input addresses for a transaction."""
        tx = self.fetch_tx(txid)
        if not tx:
            return []
        addrs = []
        for vin in tx.get("vin", []):
            if vin.get("coinbase"):
                continue
            parent = self.fetch_tx(vin["txid"])
            if parent:
                vout_data = parent["vout"][vin["vout"]]
                addr = vout_data.get("scriptPubKey", {}).get("address", "")
                value = vout_data.get("value", 0)
                addrs.append({"address": addr, "value": value, "txid": vin["txid"], "vout": vin["vout"]})
        return addrs

    def get_output_addresses(self, txid):
        """Get all output addresses for a transaction."""
        tx = self.fetch_tx(txid)
        if not tx:
            return []
        addrs = []
        for vout in tx.get("vout", []):
            addr = vout.get("scriptPubKey", {}).get("address", "")
            addrs.append({
                "address": addr,
                "value": vout["value"],
                "n": vout["n"],
                "type": vout.get("scriptPubKey", {}).get("type", "unknown"),
            })
        return addrs

    def is_ours(self, address):
        return address in self.our_addrs

    def get_script_type(self, address):
        """Return the script type metadata for one of our addresses."""
        meta = self.addr_map.get(address)
        if meta:
            return meta["type"]
        # Heuristic from prefix (supports mainnet, testnet/signet, regtest)
        if address.startswith(("tb1q", "bc1q", "bcrt1q")):
            return "p2wpkh"
        if address.startswith(("tb1p", "bc1p", "bcrt1p")):
            return "p2tr"
        if address.startswith(("2", "3")):
            return "p2sh-p2wpkh"
        return "unknown"


# ═══════════════════════════════════════════════════════════════════════════════
# 3. VULNERABILITY DETECTORS
#
# Each detector receives the TxGraph and reports findings.
# ═══════════════════════════════════════════════════════════════════════════════

def detect_01_address_reuse(g: TxGraph):
    """Detect addresses that appear as recipients in multiple transactions."""
    section("1 · Address Reuse")
    reused = {}
    for addr in g.our_addrs:
        # Count distinct TXIDs where this address received funds
        receive_txids = set()
        for entry in g.addr_txs.get(addr, []):
            if entry["category"] == "receive":
                receive_txids.add(entry["txid"])
        if len(receive_txids) >= 2:
            reused[addr] = receive_txids

    if not reused:
        ok("No address reuse detected.")
        return

    for addr, txids in reused.items():
        meta = g.addr_map.get(addr, {})
        role = "change" if meta.get("internal") else "receive"
        tx_list = []
        for txid in sorted(txids):
            tx = g.fetch_tx(txid)
            tx_list.append({"txid": txid, "confirmations": tx.get("confirmations", 0) if tx else 0})
        finding({
            "type": "ADDRESS_REUSE",
            "severity": "HIGH",
            "description": f"Address {addr} ({role}) reused across {len(txids)} transactions",
            "details": {
                "address": addr,
                "role": role,
                "tx_count": len(txids),
                "txids": tx_list,
            },
        })


def detect_02_cioh(g: TxGraph):
    """Detect multi-input transactions (CIOH) and verify input ownership."""
    section("2 · Common Input Ownership Heuristic (CIOH)")
    found_any = False

    for txid in g.our_txids:
        tx = g.fetch_tx(txid)
        if not tx or len(tx.get("vin", [])) < 2:
            continue

        input_addrs = g.get_input_addresses(txid)
        if len(input_addrs) < 2:
            continue

        # Classify inputs: ours vs external
        our_inputs = [ia for ia in input_addrs if g.is_ours(ia["address"])]
        ext_inputs = [ia for ia in input_addrs if not g.is_ours(ia["address"])]
        total_inputs = len(input_addrs)
        n_ours = len(our_inputs)

        if n_ours < 2:
            # Only 1 of ours — CIOH doesn't expose us
            continue

        found_any = True
        n_outputs = len(tx.get("vout", []))
        ownership_pct = n_ours / total_inputs * 100

        severity = "CRITICAL" if n_ours == total_inputs else "HIGH"
        finding({
            "type": "CIOH",
            "severity": severity,
            "description": f"TX {txid} merges {n_ours}/{total_inputs} of your inputs ({round(ownership_pct)}% ownership)",
            "details": {
                "txid": txid,
                "total_inputs": total_inputs,
                "our_inputs": n_ours,
                "external_inputs": len(ext_inputs),
                "ownership_pct": round(ownership_pct),
                "our_addresses": [
                    {
                        "address": ia["address"],
                        "role": "change" if g.addr_map.get(ia["address"], {}).get("internal") else "receive",
                        "amount_btc": round(ia["value"], 8),
                    }
                    for ia in our_inputs
                ],
            },
        })

    if not found_any:
        ok("No multi-input transactions with ≥2 of your addresses detected.")


def detect_03_dust(g: TxGraph):
    """Detect dust UTXOs (current and historical)."""
    section("3 · Dust UTXO Detection")
    DUST_SATS = 1000
    STRICT_DUST = 546

    found = []
    for utxo in g.utxos:
        sats = int(round(utxo["amount"] * 1e8))
        if sats <= DUST_SATS and g.is_ours(utxo.get("address", "")):
            found.append(utxo)

    # Also check historical: any tx that sent dust to our addresses
    hist_dust = []
    for txid in g.our_txids:
        outputs = g.get_output_addresses(txid)
        for out in outputs:
            sats = int(round(out["value"] * 1e8))
            if sats <= DUST_SATS and g.is_ours(out["address"]):
                hist_dust.append({"txid": txid, "address": out["address"], "sats": sats})

    if not found and not hist_dust:
        ok("No dust UTXOs detected.")
        return

    if found:
        for u in found:
            sats = int(round(u["amount"] * 1e8))
            label = "STRICT_DUST" if sats <= STRICT_DUST else "dust-class"
            finding({
                "type": "DUST",
                "severity": "HIGH" if label == "STRICT_DUST" else "MEDIUM",
                "description": f"Dust UTXO at {u['address']} ({sats} sats, {label}, unspent)",
                "details": {
                    "status": "unspent",
                    "address": u["address"],
                    "sats": sats,
                    "label": label,
                    "txid": u["txid"],
                    "vout": u["vout"],
                },
            })

    # Deduplicate historical
    seen = set()
    unique_hist = []
    for h in hist_dust:
        key = (h["txid"], h["address"])
        if key not in seen:
            seen.add(key)
            unique_hist.append(h)

    if unique_hist:
        current_keys = {(u["txid"], u.get("address", "")) for u in found}
        for h in unique_hist:
            if (h["txid"], h["address"]) not in current_keys:
                finding({
                    "type": "DUST",
                    "severity": "LOW",
                    "description": f"Historical dust output at {h['address']} ({h['sats']} sats, already spent)",
                    "details": {
                        "status": "spent",
                        "address": h["address"],
                        "sats": h["sats"],
                        "txid": h["txid"],
                    },
                })


def detect_04_dust_spending(g: TxGraph):
    """Detect transactions that spend dust alongside normal inputs."""
    section("4 · Dust Spent with Normal Inputs")
    DUST_SATS = 1000
    found_any = False

    for txid in g.our_txids:
        input_addrs = g.get_input_addresses(txid)
        if not input_addrs or len(input_addrs) < 2:
            continue

        dust_inputs = []
        normal_inputs = []
        for ia in input_addrs:
            if not g.is_ours(ia["address"]):
                continue
            sats = int(round(ia["value"] * 1e8))
            if sats <= DUST_SATS:
                dust_inputs.append(ia)
            elif sats > 10000:  # > 10k sats = clearly normal
                normal_inputs.append(ia)

        if dust_inputs and normal_inputs:
            found_any = True
            finding({
                "type": "DUST_SPENDING",
                "severity": "HIGH",
                "description": f"TX {txid} spends {len(dust_inputs)} dust input(s) alongside {len(normal_inputs)} normal input(s)",
                "details": {
                    "txid": txid,
                    "dust_inputs": [{"address": d["address"], "sats": int(round(d["value"] * 1e8))} for d in dust_inputs],
                    "normal_inputs": [{"address": n["address"], "amount_btc": round(n["value"], 8)} for n in normal_inputs],
                },
            })

    if not found_any:
        ok("No dust spending mixed with normal inputs detected.")


def detect_05_change_detection(g: TxGraph):
    """Detect transactions where change output is easily distinguishable."""
    section("5 · Probable Change Output Detection")
    found_any = False

    for txid in g.our_txids:
        tx = g.fetch_tx(txid)
        if not tx:
            continue
        outputs = g.get_output_addresses(txid)
        input_addrs = g.get_input_addresses(txid)
        if not outputs or len(outputs) < 2:
            continue

        # We only care about sends (where at least 1 input is ours)
        our_in = [ia for ia in input_addrs if g.is_ours(ia["address"])]
        if not our_in:
            continue

        # Identify which outputs are ours (change) vs external (payment)
        our_outs = [o for o in outputs if g.is_ours(o["address"])]
        ext_outs = [o for o in outputs if not g.is_ours(o["address"])]

        if not our_outs or not ext_outs:
            continue  # can't distinguish change if all outputs are ours or all external

        # Check change-detection heuristics
        problems = []

        for change in our_outs:
            ch_sats = int(round(change["value"] * 1e8))
            ch_round = ch_sats % 100000 == 0 or ch_sats % 1000000 == 0

            for payment in ext_outs:
                pay_sats = int(round(payment["value"] * 1e8))
                pay_round = pay_sats % 100000 == 0 or pay_sats % 1000000 == 0

                # Heuristic 1: payment is round, change is not
                if pay_round and not ch_round:
                    problems.append(f"Round payment ({pay_sats} sats) vs non-round change ({ch_sats} sats)")

                # Heuristic 2: change has same script type as input
                in_types = set(g.get_script_type(ia["address"]) for ia in our_in)
                ch_type = g.get_script_type(change["address"])
                if ch_type in in_types and change["type"] != payment["type"]:
                    problems.append(
                        f"Change script type ({change['type']}) matches input type — different from payment ({payment['type']})"
                    )

                # Heuristic 3: change address is internal (derivation /1/*)
                ch_meta = g.addr_map.get(change["address"], {})
                if ch_meta.get("internal"):
                    problems.append("Change uses an internal (BIP-44 /1/*) derivation path — standard wallet change pattern")

        if problems:
            found_any = True
            finding({
                "type": "CHANGE_DETECTION",
                "severity": "MEDIUM",
                "description": f"TX {txid} has identifiable change output(s) ({len(problems)} heuristic(s) matched)",
                "details": {
                    "txid": txid,
                    "reasons": problems[:6],
                    "change_outputs": [{"address": co["address"], "amount_btc": round(co["value"], 8)} for co in our_outs],
                },
            })

    if not found_any:
        ok("No easily identifiable change outputs detected.")


def detect_06_consolidation_origin(g: TxGraph):
    """Detect UTXOs that originate from a prior consolidation transaction."""
    section("6 · UTXOs from Prior Consolidation")
    CONSOLIDATION_THRESHOLD = 3  # ≥3 inputs with ≤2 outputs = consolidation
    found_any = False

    for utxo in g.utxos:
        if not g.is_ours(utxo.get("address", "")):
            continue
        parent = g.fetch_tx(utxo["txid"])
        if not parent:
            continue
        n_in = len(parent.get("vin", []))
        n_out = len(parent.get("vout", []))
        if n_in >= CONSOLIDATION_THRESHOLD and n_out <= 2:
            found_any = True
            # Check how many of the consolidation inputs were ours
            parent_inputs = g.get_input_addresses(utxo["txid"])
            our_parent_in = [ia for ia in parent_inputs if g.is_ours(ia["address"])]
            finding({
                "type": "CONSOLIDATION",
                "severity": "MEDIUM",
                "description": f"UTXO {utxo['txid']}:{utxo['vout']} ({utxo['amount']:.8f} BTC) born from a {n_in}-input consolidation",
                "details": {
                    "txid": utxo["txid"],
                    "vout": utxo["vout"],
                    "amount_btc": round(utxo["amount"], 8),
                    "consolidation_inputs": n_in,
                    "consolidation_outputs": n_out,
                    "our_inputs_in_consolidation": len(our_parent_in),
                },
            })

    if not found_any:
        ok("No UTXOs from prior consolidation detected.")


def detect_07_script_type_mixing(g: TxGraph):
    """Detect transactions mixing different script types in inputs."""
    section("7 · Script Type Mixing in Inputs")
    found_any = False

    for txid in g.our_txids:
        input_addrs = g.get_input_addresses(txid)
        if len(input_addrs) < 2:
            continue

        our_in = [ia for ia in input_addrs if g.is_ours(ia["address"])]
        if len(our_in) < 2:
            continue

        types = set()
        for ia in input_addrs:
            types.add(g.get_script_type(ia["address"]))

        types.discard("unknown")
        if len(types) >= 2:
            found_any = True
            finding({
                "type": "SCRIPT_TYPE_MIXING",
                "severity": "HIGH",
                "description": f"TX {txid} mixes input script types: {sorted(types)}",
                "details": {
                    "txid": txid,
                    "script_types": sorted(types),
                    "inputs": [
                        {"address": ia["address"], "script_type": g.get_script_type(ia["address"]), "ours": g.is_ours(ia["address"])}
                        for ia in input_addrs
                    ],
                },
            })

    if not found_any:
        ok("No script type mixing detected.")


def detect_08_cluster_merge(g: TxGraph):
    """Detect transactions that merge UTXOs from different funding sources (clusters)."""
    section("8 · Cluster Merge (Cross-Origin Input Mixing)")
    found_any = False

    for txid in g.our_txids:
        input_addrs = g.get_input_addresses(txid)
        if len(input_addrs) < 2:
            continue

        our_in = [ia for ia in input_addrs if g.is_ours(ia["address"])]
        if len(our_in) < 2:
            continue

        # Trace each of our inputs one hop back to find their funding sources
        funding_sources = {}  # our_input_txid:vout -> set of grandparent source txids
        for ia in our_in:
            parent_tx = g.fetch_tx(ia["txid"])
            if not parent_tx:
                continue
            gp_sources = set()
            for p_vin in parent_tx.get("vin", []):
                if p_vin.get("coinbase"):
                    gp_sources.add("coinbase")
                else:
                    gp_sources.add(p_vin["txid"][:16])
            funding_sources[f"{ia['txid'][:16]}:{ia['vout']}"] = gp_sources

        # Check if funding sources differ
        all_sources = list(funding_sources.values())
        if len(all_sources) >= 2:
            # Are the source sets disjoint? (different clusters)
            merged_clusters = False
            for i in range(len(all_sources)):
                for j in range(i + 1, len(all_sources)):
                    if all_sources[i].isdisjoint(all_sources[j]):
                        merged_clusters = True

            if merged_clusters:
                found_any = True
                finding({
                    "type": "CLUSTER_MERGE",
                    "severity": "HIGH",
                    "description": f"TX {txid} merges UTXOs from {len(funding_sources)} different funding chains",
                    "details": {
                        "txid": txid,
                        "funding_sources": {k: sorted(v) for k, v in funding_sources.items()},
                    },
                })

    if not found_any:
        ok("No cross-origin cluster merges detected.")


def detect_09_lookback_depth(g: TxGraph):
    """Detect UTXOs with significantly different ages (dormancy patterns)."""
    section("9 · UTXO Age / Lookback Depth")

    if not g.utxos:
        ok("No UTXOs to analyze.")
        return

    our_utxos = [u for u in g.utxos if g.is_ours(u.get("address", ""))]
    if not our_utxos:
        ok("No UTXOs belonging to the descriptor.")
        return

    # Get confirmation counts
    aged = []
    for u in our_utxos:
        confs = u.get("confirmations", 0)
        aged.append({"utxo": u, "confirmations": confs})

    if len(aged) < 2:
        ok("Only one UTXO, no age comparison possible.")
        return

    aged.sort(key=lambda x: x["confirmations"], reverse=True)
    oldest = aged[0]
    newest = aged[-1]
    spread = oldest["confirmations"] - newest["confirmations"]

    if spread < 10:
        ok(f"UTXO age spread is small ({spread} blocks). No dormancy pattern.")
        return

    finding({
        "type": "UTXO_AGE_SPREAD",
        "severity": "LOW",
        "description": f"UTXO age spread of {spread} blocks between oldest and newest",
        "details": {
            "spread_blocks": spread,
            "oldest": {"txid": oldest["utxo"]["txid"], "confirmations": oldest["confirmations"], "amount_btc": round(oldest["utxo"]["amount"], 8)},
            "newest": {"txid": newest["utxo"]["txid"], "confirmations": newest["confirmations"], "amount_btc": round(newest["utxo"]["amount"], 8)},
        },
    })

    OLD_THRESHOLD = 100  # blocks
    old_utxos = [a for a in aged if a["confirmations"] >= OLD_THRESHOLD]
    if old_utxos:
        warn({
            "type": "DORMANT_UTXOS",
            "severity": "LOW",
            "description": f"{len(old_utxos)} UTXO(s) have ≥{OLD_THRESHOLD} confirmations (dormant/hoarded coins pattern)",
            "details": {
                "count": len(old_utxos),
                "threshold_blocks": OLD_THRESHOLD,
            },
        })


def detect_10_exchange_origin(g: TxGraph, known_exchange_wallets=None):
    """Detect UTXOs that likely originated from exchange batch withdrawals."""
    section("10 · Probable Exchange Origin")

    # Build set of known exchange txids if wallet names provided
    exchange_txids = set()
    if known_exchange_wallets:
        for ew in known_exchange_wallets:
            try:
                etxs = cli("listtransactions", "*", 10000, 0, "true", wallet=ew)
                for etx in (etxs or []):
                    if etx.get("txid"):
                        exchange_txids.add(etx["txid"])
            except Exception:
                pass

    BATCH_THRESHOLD = 5  # ≥5 outputs = likely batch withdrawal
    found_any = False

    for txid in g.our_txids:
        tx = g.fetch_tx(txid)
        if not tx:
            continue

        n_out = len(tx.get("vout", []))
        if n_out < BATCH_THRESHOLD:
            continue

        # Check: do we RECEIVE in this tx? (we're a recipient, not sender)
        our_inputs = [ia for ia in g.get_input_addresses(txid) if g.is_ours(ia["address"])]
        our_outputs = [o for o in g.get_output_addresses(txid) if g.is_ours(o["address"])]

        if our_inputs:
            # We're a sender in a many-output TX — that's OUR batch, not exchange
            continue

        if not our_outputs:
            continue

        # Heuristics for exchange batch
        signals = []

        # 1. High output count
        signals.append(f"High output count: {n_out}")

        # 2. Many unique addresses
        unique_addrs = set()
        for vout in tx["vout"]:
            a = vout.get("scriptPubKey", {}).get("address", "")
            if a:
                unique_addrs.add(a)
        if len(unique_addrs) >= BATCH_THRESHOLD:
            signals.append(f"{len(unique_addrs)} unique recipient addresses")

        # 3. Known exchange wallet
        if txid in exchange_txids:
            signals.append("TX matches known exchange wallet history")

        # 4. Large input relative to individual outputs
        input_addrs = g.get_input_addresses(txid)
        input_total = sum(ia["value"] for ia in input_addrs)
        output_vals = sorted(v.get("value", 0) for v in tx["vout"])
        if output_vals:
            median_out = output_vals[len(output_vals) // 2]
            if median_out > 0:
                ratio = input_total / median_out
                if ratio > 10:
                    signals.append(f"Input/median-output ratio: {ratio:.0f}x (hot wallet pattern)")

        if len(signals) >= 2:
            found_any = True
            finding({
                "type": "EXCHANGE_ORIGIN",
                "severity": "MEDIUM",
                "description": f"TX {txid} looks like an exchange batch withdrawal ({len(signals)} signal(s))",
                "details": {
                    "txid": txid,
                    "signals": signals,
                    "received_outputs": [{"address": o["address"], "amount_btc": round(o["value"], 8)} for o in our_outputs],
                },
            })

    if not found_any:
        ok("No exchange-origin batch patterns detected.")


def detect_11_tainted_utxos(g: TxGraph, known_risky_wallets=None):
    """Detect UTXOs that have taint from known risky sources."""
    section("11 · Tainted UTXOs / Risky Source Exposure")

    if not known_risky_wallets:
        info("No --known-risky-wallets provided. Skipping taint analysis.")
        info("(Provide wallet names to enable: --known-risky-wallets risky)")
        ok("Taint detection requires known-risky wallet metadata.")
        return

    # Build set of risky TXIDs
    risky_txids = set()
    for rw in known_risky_wallets:
        try:
            rtxs = cli("listtransactions", "*", 10000, 0, "true", wallet=rw)
            for rtx in (rtxs or []):
                if rtx.get("txid"):
                    risky_txids.add(rtx["txid"])
        except Exception:
            info(f"Could not read wallet '{rw}'")

    if not risky_txids:
        info("No transactions found in risky wallets.")
        return

    found_any = False

    for txid in g.our_txids:
        input_addrs = g.get_input_addresses(txid)
        our_in = [ia for ia in input_addrs if g.is_ours(ia["address"])]
        if not our_in or len(input_addrs) < 2:
            continue

        tainted = []
        clean = []
        for ia in input_addrs:
            # An input is tainted if its funding TX is in a risky wallet's history
            if ia["txid"] in risky_txids:
                tainted.append(ia)
            else:
                clean.append(ia)

        if tainted and clean:
            found_any = True
            taint_pct = len(tainted) / len(input_addrs) * 100
            finding({
                "type": "TAINTED_UTXO_MERGE",
                "severity": "HIGH",
                "description": f"TX {txid} merges {len(tainted)} tainted + {len(clean)} clean inputs ({round(taint_pct)}% taint)",
                "details": {
                    "txid": txid,
                    "tainted_inputs": [{"address": t["address"], "amount_btc": round(t["value"], 8), "source_txid": t["txid"]} for t in tainted],
                    "clean_inputs": [{"address": c["address"], "amount_btc": round(c["value"], 8)} for c in clean],
                    "taint_pct": round(taint_pct),
                },
            })

    # Also check: did we receive directly from a risky source?
    for txid in g.our_txids:
        if txid in risky_txids:
            our_outs = [o for o in g.get_output_addresses(txid) if g.is_ours(o["address"])]
            if our_outs:
                found_any = True
                warn({
                    "type": "DIRECT_TAINT",
                    "severity": "HIGH",
                    "description": f"TX {txid} is directly from a known risky source",
                    "details": {
                        "txid": txid,
                        "received_outputs": [{"address": o["address"], "amount_btc": round(o["value"], 8)} for o in our_outs],
                    },
                })

    if not found_any:
        ok("No tainted UTXO merges detected.")


def detect_12_behavioral_fingerprint(g: TxGraph):
    """
    Analyze the descriptor's transaction set for patterns that make the user
    identifiable through behavioral consistency.

    We evaluate OBJECTIVE, measurable features that chain analysis firms
    actually use to cluster and fingerprint wallets.
    """
    section("12 · Behavioral Fingerprint Analysis")

    # Collect send transactions (where we have inputs)
    send_txids = []
    for txid in g.our_txids:
        input_addrs = g.get_input_addresses(txid)
        our_in = [ia for ia in input_addrs if g.is_ours(ia["address"])]
        if our_in:
            send_txids.append(txid)

    if len(send_txids) < 3:
        ok(f"Only {len(send_txids)} send transactions — not enough data for fingerprinting.")
        return

    # ── Feature extraction ──
    output_counts = []
    payment_amounts_sats = []
    change_amounts_sats = []
    input_script_types = []
    output_script_types = []
    rbf_signals = []
    locktime_values = []
    fee_rates = []     # sat/vB
    n_inputs_list = []
    uses_round_amounts = 0
    total_payments = 0
    change_address_types_used = set()
    payment_address_types_used = set()
    version_numbers = set()

    for txid in send_txids:
        tx = g.fetch_tx(txid)
        if not tx:
            continue

        n_in = len(tx.get("vin", []))
        n_out = len(tx.get("vout", []))
        n_inputs_list.append(n_in)
        output_counts.append(n_out)

        # Version
        version_numbers.add(tx.get("version", 2))

        # Locktime
        locktime_values.append(tx.get("locktime", 0))

        # RBF signalling
        for vin in tx.get("vin", []):
            seq = vin.get("sequence", 0xffffffff)
            rbf_signals.append(seq < 0xfffffffe)

        # Input script types
        for ia in g.get_input_addresses(txid):
            if g.is_ours(ia["address"]):
                input_script_types.append(g.get_script_type(ia["address"]))

        # Output analysis
        outputs = g.get_output_addresses(txid)
        for out in outputs:
            sats = int(round(out["value"] * 1e8))
            if g.is_ours(out["address"]):
                # Change output
                change_amounts_sats.append(sats)
                change_address_types_used.add(out["type"])
            else:
                # Payment output
                payment_amounts_sats.append(sats)
                output_script_types.append(out["type"])
                payment_address_types_used.add(out["type"])
                total_payments += 1
                if sats > 0 and (sats % 100000 == 0 or sats % 1000000 == 0):
                    uses_round_amounts += 1

        # Fee rate
        if "vsize" in tx and tx["vsize"] > 0:
            # Compute fee from inputs - outputs
            in_total = sum(ia["value"] for ia in g.get_input_addresses(txid))
            out_total = sum(v.get("value", 0) for v in tx["vout"])
            fee_sats = int(round((in_total - out_total) * 1e8))
            if fee_sats > 0:
                fee_rates.append(fee_sats / tx["vsize"])

    # ── Analysis ──
    problems = []

    # 1. Round amount usage pattern
    if total_payments > 0:
        round_pct = uses_round_amounts / total_payments * 100
        if round_pct > 60:
            problems.append(
                f"Round payment amounts: {round_pct:.0f}% of payments are round numbers. "
                "This is a distinctive behavioral pattern that aids clustering."
            )

    # 2. Consistent output count (always 2 outputs = simple spend pattern)
    if output_counts:
        avg_outs = sum(output_counts) / len(output_counts)
        if all(c == output_counts[0] for c in output_counts) and len(output_counts) >= 3:
            problems.append(
                f"Uniform output count: all {len(output_counts)} send TXs have exactly "
                f"{output_counts[0]} outputs. Consistent structure aids fingerprinting."
            )

    # 3. Script type consistency or mixing
    input_types_set = set(input_script_types)
    if len(input_types_set) > 1:
        problems.append(
            f"Mixed input script types used across TXs: {input_types_set}. "
            "Mixing address families is rare and highly identifying."
        )
    elif len(input_types_set) == 1 and input_script_types:
        t = input_types_set.pop()
        if t == "p2pkh":
            problems.append(
                f"All inputs use legacy P2PKH — a very uncommon script type today. "
                "This alone narrows your anonymity set significantly."
            )

    # 4. RBF signaling consistency
    if rbf_signals:
        rbf_pct = sum(rbf_signals) / len(rbf_signals) * 100
        if rbf_pct == 100:
            problems.append(
                f"RBF always enabled: 100% of inputs signal replace-by-fee. "
                "While increasingly common, it's a distinguishing feature vs non-RBF wallets."
            )
        elif rbf_pct == 0:
            problems.append(
                "RBF never enabled: 0% of inputs signal replace-by-fee. "
                "This is uncommon in modern wallets and distinguishes your software."
            )

    # 5. Locktime pattern
    if locktime_values:
        nonzero_lt = [lt for lt in locktime_values if lt > 0]
        if len(nonzero_lt) == len(locktime_values) and len(locktime_values) >= 3:
            problems.append(
                "Anti-fee-sniping locktime always set — consistent with Bitcoin Core / Electrum. "
                "Absence or presence of this reveals your wallet software."
            )
        elif not nonzero_lt and len(locktime_values) >= 3:
            problems.append(
                "Locktime always 0 — no anti-fee-sniping. "
                "This distinguishes your wallet from Bitcoin Core / Electrum defaults."
            )

    # 6. Fee rate consistency
    if len(fee_rates) >= 3:
        avg_fee = sum(fee_rates) / len(fee_rates)
        if avg_fee > 0:
            variance = sum((f - avg_fee) ** 2 for f in fee_rates) / len(fee_rates)
            stddev = variance ** 0.5
            cv = stddev / avg_fee  # coefficient of variation
            if cv < 0.15:
                problems.append(
                    f"Very consistent fee rate: avg {avg_fee:.1f} sat/vB ± {stddev:.1f} "
                    f"(CV={cv:.2f}). Low variance suggests fixed-fee-rate wallet configuration."
                )

    # 7. Change address type pattern
    if change_address_types_used and payment_address_types_used:
        if change_address_types_used != payment_address_types_used:
            # This leaks which outputs are change
            problems.append(
                f"Change uses different script type ({change_address_types_used}) "
                f"than payments ({payment_address_types_used}) — trivially identifies change outputs."
            )

    # 8. Input count pattern (always 1 input = no consolidation; always many = distinctive)
    if n_inputs_list and len(n_inputs_list) >= 3:
        if all(n == 1 for n in n_inputs_list):
            pass  # normal, not distinctive
        elif all(n == n_inputs_list[0] for n in n_inputs_list) and n_inputs_list[0] > 1:
            problems.append(
                f"Always uses exactly {n_inputs_list[0]} inputs per TX — unusual and identifying."
            )

    # ── Report ──
    if not problems:
        ok(f"Analyzed {len(send_txids)} transactions. No strong behavioral fingerprints detected.")
        return

    finding({
        "type": "BEHAVIORAL_FINGERPRINT",
        "severity": "MEDIUM",
        "description": f"Behavioral fingerprint detected across {len(send_txids)} send transactions ({len(problems)} pattern(s))",
        "details": {
            "send_tx_count": len(send_txids),
            "patterns": problems,
        },
    })


# ═══════════════════════════════════════════════════════════════════════════════
# 4. MAIN
# ═══════════════════════════════════════════════════════════════════════════════

def main():
    parser = argparse.ArgumentParser(
        description="Detect Bitcoin privacy vulnerabilities from output descriptors.",
        epilog="Examples:\n"
               "  python3 detect.py --wallet alice\n"
               '  python3 detect.py --wallet alice --known-risky-wallets risky\n'
               '  python3 detect.py "wpkh(tpub.../0/*)#chk" "wpkh(tpub.../1/*)#chk"\n',
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument("descriptors", nargs="*", help="Output descriptors to scan")
    parser.add_argument("--wallet", "-w", help="Read descriptors from an existing wallet")
    parser.add_argument("--known-risky-wallets", nargs="*", default=None,
                        help="Wallet names whose TXIDs are considered tainted")
    parser.add_argument("--known-exchange-wallets", nargs="*", default=None,
                        help="Wallet names whose TXIDs are considered exchange-origin")
    parser.add_argument("--keep-scan-wallet", action="store_true",
                        help="Don't delete the temporary scan wallet after running")
    args = parser.parse_args()

    if not args.wallet and not args.descriptors:
        parser.error("Provide either --wallet <name> or one or more descriptors.")

    # ── Step 1: Resolve descriptors ──
    section("Setup: Resolving Descriptors")
    descriptors = resolve_descriptors(args)
    info(f"Found {len(descriptors)} descriptors")
    for d in descriptors:
        dtype = d["desc"].split("(")[0]
        role = "internal/change" if d["internal"] else "external/receive"
        info(f"  {dtype:15} {role:20} range [0..{d['range_end']}]")

    # ── Step 2: Derive all addresses ──
    section("Setup: Deriving Addresses")
    addr_map = derive_all_addresses(descriptors)
    info(f"Derived {len(addr_map)} addresses across all descriptor types")

    # Count by type
    type_counts = defaultdict(int)
    for meta in addr_map.values():
        type_counts[meta["type"]] += 1
    for t, c in sorted(type_counts.items()):
        info(f"  {t}: {c} addresses")

    # ── Step 3: Build watch-only wallet ──
    section("Setup: Building Scan Wallet")
    scan_wallet = "_detect_scan"
    if args.wallet:
        # If they gave us a wallet, just use it directly — faster, no rescan needed
        scan_wallet = args.wallet
        info(f"Using existing wallet '{scan_wallet}' directly (no rescan needed)")
    else:
        scan_wallet = build_scan_wallet(descriptors)
        info(f"Created temporary watch-only wallet '{scan_wallet}' with full rescan")

    # ── Step 4: Gather transaction history ──
    section("Setup: Loading Transaction History")
    wallet_txs = get_all_transactions(scan_wallet)
    utxos = get_all_utxos(scan_wallet)
    info(f"Transaction history: {len(wallet_txs)} entries")
    info(f"Current UTXOs: {len(utxos)}")

    if not wallet_txs:
        print(json.dumps({"error": "No transactions found for these descriptors."}))
        return

    # ── Step 5: Build transaction graph ──
    g = TxGraph(addr_map, wallet_txs, utxos)
    info(f"Unique transaction IDs: {len(g.our_txids)}")

    # ── Step 6: Run all detectors ──
    detect_01_address_reuse(g)
    detect_02_cioh(g)
    detect_03_dust(g)
    detect_04_dust_spending(g)
    detect_05_change_detection(g)
    detect_06_consolidation_origin(g)
    detect_07_script_type_mixing(g)
    detect_08_cluster_merge(g)
    detect_09_lookback_depth(g)
    detect_10_exchange_origin(g, args.known_exchange_wallets)
    detect_11_tainted_utxos(g, args.known_risky_wallets)
    detect_12_behavioral_fingerprint(g)

    # ── JSON output ──
    report = {
        "stats": {
            "transactions_analyzed": len(g.our_txids),
            "addresses_derived": len(addr_map),
        },
        "findings": FINDINGS,
        "warnings": WARNINGS,
        "summary": {
            "findings": len(FINDINGS),
            "warnings": len(WARNINGS),
            "clean": len(FINDINGS) == 0 and len(WARNINGS) == 0,
        },
    }
    print(json.dumps(report, indent=2))

    # Cleanup
    if not args.wallet and not args.keep_scan_wallet:
        try:
            cli("unloadwallet", "_detect_scan")
        except Exception:
            pass


if __name__ == "__main__":
    main()