brk/crates/brk_oracle/examples/report_from.rs

//! Generate detailed oracle accuracy report for README / documentation.
//!
//! Run with: cargo run -p brk_oracle --example report --release

use std::path::PathBuf;

use brk_indexer::Indexer;
use brk_oracle::{
    Config, HistogramEma, HistogramRaw, NUM_BINS, PRICES, START_HEIGHT_FAST, bin_to_cents,
    cents_to_bin, eligible_bin,
};
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
use vecdb::{AnyVec, ReadableVec, VecIndex};

/// Day1 1 = Jan 9, 2009 (block 1). For dates after genesis week:
/// day1 = floor(timestamp / 86400) - 14252.
const GENESIS_DAY: u32 = 14252;

const BINS_5PCT: f64 = 4.24;
const BINS_10PCT: f64 = 8.28;
const BINS_20PCT: f64 = 15.84;

/// Local copy of the oracle's 19 round-USD stencil offsets (private in lib.rs),
/// used here only for per-block alias diagnostics.
const STENCIL_OFFSETS: [i32; 19] = [
    -400, -340, -305, -260, -200, -165, -140, -120, -105, -60, 0, 35, 60, 95, 140, 200, 260, 340,
    400,
];
const N_ARMS: usize = STENCIL_OFFSETS.len();

/// Canonical L1-normalized payment shape across the 19 stencil arms, estimated
/// from true-center arm vectors over a validated block range (~$1.8k era).
/// The real price center reproduces this profile; a ½×/2× alias distorts it
/// (dark holes at no-ladder-partner arms, spurious mass from between-rung
/// payments), so correlation against it discriminates octaves the raw stencil
/// sum cannot. Order matches STENCIL_OFFSETS / the $1..$10k ladder.
const ARM_PROFILE: [f64; N_ARMS] = [
    0.022, 0.029, 0.021, 0.045, 0.060, 0.053, 0.092, 0.066, 0.077, 0.075, 0.105, 0.052, 0.075,
    0.049, 0.059, 0.043, 0.044, 0.021, 0.014,
];

/// Raw EMA arm vector at `center` (mass on each of the 19 stencil offsets).
fn arms_at(ema: &HistogramEma, center: i64) -> [f64; N_ARMS] {
    let mut arms = [0.0f64; N_ARMS];
    for (i, &off) in STENCIL_OFFSETS.iter().enumerate() {
        let idx = center + off as i64;
        if idx >= 0 && (idx as usize) < NUM_BINS {
            arms[i] = ema[idx as usize];
        }
    }
    arms
}

/// Pearson correlation between the raw EMA arm vector at `center` and a payment
/// shape `profile`. High when the local shape matches real payments, low at a
/// ½×/2× alias whose holes and spurious arms distort the shape.
fn arm_profile_corr(ema: &HistogramEma, center: i64, profile: &[f64; N_ARMS]) -> f64 {
    let arms = arms_at(ema, center);
    let n = N_ARMS as f64;
    let ma = arms.iter().sum::<f64>() / n;
    let mb = profile.iter().sum::<f64>() / n;
    let (mut num, mut da, mut db) = (0.0, 0.0, 0.0);
    for i in 0..N_ARMS {
        let (xa, xb) = (arms[i] - ma, profile[i] - mb);
        num += xa * xb;
        da += xa * xa;
        db += xb * xb;
    }
    if da > 0.0 && db > 0.0 {
        num / (da * db).sqrt()
    } else {
        0.0
    }
}

/// Shape-match via negative L1 distance between the candidate's L1-normalized arm
/// vector and the L1-normalized `profile`. 1.0 = identical shape, lower as the
/// shapes diverge. A covariance-free alternative to arm_profile_corr.
fn arm_profile_l1(ema: &HistogramEma, center: i64, profile: &[f64; N_ARMS]) -> f64 {
    let arms = arms_at(ema, center);
    let s: f64 = arms.iter().sum();
    if s <= 0.0 {
        return 0.0;
    }
    let mut dist = 0.0;
    for i in 0..N_ARMS {
        dist += (arms[i] / s - profile[i]).abs();
    }
    1.0 - dist
}

/// Shape-match via the dot product of the candidate's L1-normalized arm vector
/// with the L1-normalized `profile`. The minimal matched-filter form: the same
/// multiply-accumulate the stencil sum already does, but profile-weighted instead
/// of uniform. No covariance, no abs. Rewards mass on profile-heavy arms but
/// (unlike L1/Pearson) does NOT penalize missing mass elsewhere.
fn arm_profile_dot(ema: &HistogramEma, center: i64, profile: &[f64; N_ARMS]) -> f64 {
    let arms = arms_at(ema, center);
    let s: f64 = arms.iter().sum();
    if s <= 0.0 {
        return 0.0;
    }
    let mut dot = 0.0;
    for i in 0..N_ARMS {
        dot += (arms[i] / s) * profile[i];
    }
    dot
}

/// Stencil-arm indices whose value v has 2v NOT on the round-USD ladder
/// ($2 $3 $20 $30 $200 $300 $2000 $10000). A half-price hypothesis shifts the
/// center +60 bins; an arm is lit there only if 2v is itself a round-USD amount
/// people pay, so these eight are the only arms that fall dark at the ½x alias.
/// They carry the entire octave discrimination; the other eleven alias cleanly.
const DISC_ARMS: [usize; 8] = [1, 2, 6, 8, 12, 13, 16, 18];

/// The four "decade-anchor" arms ($10 $50 $100 $1000) whose value has BOTH 2v
/// and v/2 on the round-USD ladder, so they alias across the octave in either
/// direction and carry zero up/down information. Down-weighting them is the
/// symmetric counterpart to up-weighting the half-only DISC_ARMS, meant to
/// resist the 2x climb as well as the 1/2x slide.
const ALIAS_ARMS: [usize; 4] = [4, 9, 10, 15];

/// Sum of EMA mass on a chosen subset of stencil arms at `center`.
fn arm_subset_sum(ema: &HistogramEma, center: i64, arms: &[usize]) -> f64 {
    arms.iter()
        .map(|&i| {
            let idx = center + STENCIL_OFFSETS[i] as i64;
            if idx >= 0 && (idx as usize) < NUM_BINS {
                ema[idx as usize]
            } else {
                0.0
            }
        })
        .sum()
}

/// Raw sum of EMA mass landing on the 19 stencil arms when centered at `center`.
fn ema_stencil_sum(ema: &HistogramEma, center: i64) -> f64 {
    STENCIL_OFFSETS
        .iter()
        .map(|&off| {
            let idx = center + off as i64;
            if idx >= 0 && (idx as usize) < brk_oracle::NUM_BINS {
                ema[idx as usize]
            } else {
                0.0
            }
        })
        .sum()
}

/// log10(2) * 200 = one price octave (½× / 2×) in bins.
const OCTAVE_BINS: i64 = 60;

/// Tunable octave-guard thresholds (env-overridable for sweeping).
struct GuardCfg {
    enabled: bool,
    tau: f64,        // arm "lit" if >= tau * peak arm
    raw_margin: f64, // octave neighbor raw mass must be >= raw_margin * current
    q_margin: usize, // neighbor must have >= q_margin MORE lit arms than current
    q_min: usize,    // neighbor must have at least this many lit arms (looks full)
    // Lever 2: global re-acquire. Instead of only checking the +-60 octave
    // neighbors, scan a wide band beyond the local search window for the
    // strongest true-price peak (most lit arms, raw mass as tiebreak) and snap
    // to it when it clearly beats the locally-trapped pick. Escapes any
    // local-max trap, not just the octave alias.
    global: bool,
    global_radius: i64, // bins scanned on each side of the local pick
}

impl GuardCfg {
    fn from_env() -> Self {
        let g = |k: &str, d: f64| -> f64 {
            std::env::var(k)
                .ok()
                .and_then(|s| s.parse().ok())
                .unwrap_or(d)
        };
        Self {
            enabled: std::env::var("OCTAVE_GUARD")
                .ok()
                .map(|v| v != "0")
                .unwrap_or(false),
            tau: g("GUARD_TAU", 0.15),
            raw_margin: g("GUARD_RAW", 1.0),
            q_margin: g("GUARD_QMARGIN", 4.0) as usize,
            q_min: g("GUARD_QMIN", 14.0) as usize,
            global: std::env::var("GLOBAL_REACQUIRE")
                .ok()
                .map(|v| v != "0")
                .unwrap_or(false),
            global_radius: g("GLOBAL_RADIUS", 600.0) as i64,
        }
    }
}

/// Number of stencil arms carrying real mass at `center`. The true price lights
/// up ~all 19; a ½×/2× alias leaves ~8 structural holes (amounts with no ladder
/// partner one octave away), so this count separates truth from alias even when
/// the normalized score-sum cannot.
fn arm_count(ema: &HistogramEma, center: i64, tau: f64) -> usize {
    let mut arms = [0.0f64; N_ARMS];
    let mut peak = 0.0f64;
    for (i, &off) in STENCIL_OFFSETS.iter().enumerate() {
        let idx = center + off as i64;
        let v = if idx >= 0 && (idx as usize) < brk_oracle::NUM_BINS {
            ema[idx as usize]
        } else {
            0.0
        };
        arms[i] = v;
        if v > peak {
            peak = v;
        }
    }
    if peak <= 0.0 {
        return 0;
    }
    arms.iter().filter(|&&v| v >= tau * peak).count()
}

/// 19-char lit/dark pattern of the stencil arms at `center` (arm i lit if its
/// EMA mass >= tau * peak arm). Order: $1 $2 $3 $5 $10 $15 $20 $25 $30 $50 $100
/// $150 $200 $300 $500 $1k $2k $5k $10k. Reveals WHICH amounts are present.
fn arm_pattern(ema: &HistogramEma, center: i64, tau: f64) -> String {
    let mut arms = [0.0f64; N_ARMS];
    let mut peak = 0.0f64;
    for (i, &off) in STENCIL_OFFSETS.iter().enumerate() {
        let idx = center + off as i64;
        let v = if idx >= 0 && (idx as usize) < brk_oracle::NUM_BINS {
            ema[idx as usize]
        } else {
            0.0
        };
        arms[i] = v;
        if v > peak {
            peak = v;
        }
    }
    arms.iter()
        .map(|&v| {
            if peak > 0.0 && v >= tau * peak {
                'L'
            } else {
                '.'
            }
        })
        .collect()
}

/// In-window stencil search (mirrors `Oracle::find_best_bin`) plus an octave
/// guard: if the half- or double-price bin lights up strictly more stencil arms
/// and carries comparable mass, snap to it. This escapes a ½×/2× alias lock that
/// the ±window can never climb the 60 bins out of on its own.
#[allow(clippy::too_many_arguments)]
fn guarded_best_bin(
    ema: &HistogramEma,
    prev_bin: f64,
    search_below: usize,
    search_above: usize,
    guard: &GuardCfg,
    arm_weights: &[f64; N_ARMS],
    corr_weight: f64,
    profile: &[f64; N_ARMS],
    metric: u8,
    stencil_weight: f64,
) -> f64 {
    let center = prev_bin.round() as usize;
    let search_start = center.saturating_sub(search_below);
    let search_end = (center + search_above + 1).min(brk_oracle::NUM_BINS);
    if search_start >= search_end {
        return prev_bin;
    }

    let mut track_norm = [0.0f64; N_ARMS];
    for (i, &off) in STENCIL_OFFSETS.iter().enumerate() {
        for bin in search_start..search_end {
            let idx = bin as i32 + off;
            if idx >= 0 && (idx as usize) < brk_oracle::NUM_BINS {
                track_norm[i] = track_norm[i].max(ema[idx as usize]);
            }
        }
    }
    let score = |bin: usize| -> f64 {
        let mut total = 0.0;
        if stencil_weight != 0.0 {
            for (i, &off) in STENCIL_OFFSETS.iter().enumerate() {
                let idx = bin as i32 + off;
                if idx >= 0 && (idx as usize) < brk_oracle::NUM_BINS && track_norm[i] > 0.0 {
                    total += stencil_weight * arm_weights[i] * ema[idx as usize] / track_norm[i];
                }
            }
        }
        if corr_weight != 0.0 {
            let shape = match metric {
                1 => arm_profile_l1(ema, bin as i64, profile),
                2 => arm_profile_dot(ema, bin as i64, profile),
                _ => arm_profile_corr(ema, bin as i64, profile),
            };
            total += corr_weight * shape;
        }
        total
    };

    let mut best_bin = search_start;
    let mut best_score = score(search_start);
    for bin in (search_start + 1)..search_end {
        let c = score(bin);
        if c > best_score {
            best_score = c;
            best_bin = bin;
        }
    }

    if guard.enabled {
        let b = best_bin as i64;
        let qb = arm_count(ema, b, guard.tau);
        let raw_b = ema_stencil_sum(ema, b);
        let mut target = b;
        if guard.global {
            // Scan beyond the local window for the strongest peak by lit-arm
            // count (raw mass as tiebreak), considering only bins carrying at
            // least the local pick's raw mass. Snap to it when it lights up
            // q_margin more arms and looks full (>= q_min), regardless of how
            // many bins away it sits.
            let lo = (b - guard.global_radius).max(0);
            let hi = (b + guard.global_radius).min(brk_oracle::NUM_BINS as i64 - 1);
            let mut best: Option<(i64, usize, f64)> = None;
            for n in lo..=hi {
                if n >= search_start as i64 && n < search_end as i64 {
                    continue; // window interior is owned by the local search
                }
                let raw_n = ema_stencil_sum(ema, n);
                if raw_n < guard.raw_margin * raw_b {
                    continue;
                }
                let qn = arm_count(ema, n, guard.tau);
                let better = best.is_none_or(|(_, sq, sr)| qn > sq || (qn == sq && raw_n > sr));
                if better {
                    best = Some((n, qn, raw_n));
                }
            }
            if let Some((n, qn, _)) = best
                && qn >= qb + guard.q_margin
                && qn >= guard.q_min
            {
                target = n;
            }
        } else {
            let mut best: Option<(usize, f64)> = None;
            for &delta in &[-OCTAVE_BINS, OCTAVE_BINS] {
                let n = b + delta;
                if n < 0 || n as usize >= brk_oracle::NUM_BINS {
                    continue;
                }
                let qn = arm_count(ema, n, guard.tau);
                let raw_n = ema_stencil_sum(ema, n);
                if qn >= qb + guard.q_margin
                    && qn >= guard.q_min
                    && raw_n >= guard.raw_margin * raw_b
                {
                    let better = best.is_none_or(|(sq, sr)| qn > sq || (qn == sq && raw_n > sr));
                    if better {
                        best = Some((qn, raw_n));
                        target = n;
                    }
                }
            }
        }
        if target != b {
            return target as f64;
        }
    }

    let score_center = best_score;
    let score_left = if best_bin > search_start {
        score(best_bin - 1)
    } else {
        score_center
    };
    let score_right = if best_bin + 1 < search_end {
        score(best_bin + 1)
    } else {
        score_center
    };
    let denom = score_left - 2.0 * score_center + score_right;
    let sub_bin = if denom.abs() > 1e-10 {
        (0.5 * (score_left - score_right) / denom).clamp(-0.5, 0.5)
    } else {
        0.0
    };
    best_bin as f64 + sub_bin
}

fn bins_to_pct(bins: f64) -> f64 {
    (10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
}

/// Per-block EMA contribution weighting. `Off` keeps the raw count sum (a flood
/// block dominates the window); `Unit` rescales every block to the same total
/// mass (one block = one vote); `Cap` only scales down blocks above a ceiling.
#[derive(Clone, Copy, PartialEq)]
enum NormMode {
    Off,
    Unit,
    Cap,
}

/// Scale factor applied to a block's bin counts before folding into the EMA.
fn norm_scale(total: u64, mode: NormMode, cap: f64, target: f64) -> f64 {
    if total == 0 {
        return 0.0;
    }
    match mode {
        NormMode::Off => 1.0,
        NormMode::Unit => target / total as f64,
        NormMode::Cap => (cap / total as f64).min(1.0),
    }
}

fn timestamp_to_year(ts: u32) -> u16 {
    let years_since_1970 = ts as f64 / 31557600.0;
    (1970.0 + years_since_1970) as u16
}

struct YearStats {
    year: u16,
    total_sq_err: f64,
    max_err: f64,
    total_blocks: u64,
    gt_5pct: u64,
    gt_10pct: u64,
    gt_20pct: u64,
    min_price: f64,
    max_price: f64,
    errors: Vec<f64>,
}

impl YearStats {
    fn new(year: u16) -> Self {
        Self {
            year,
            total_sq_err: 0.0,
            max_err: 0.0,
            total_blocks: 0,
            gt_5pct: 0,
            gt_10pct: 0,
            gt_20pct: 0,
            min_price: f64::MAX,
            max_price: 0.0,
            errors: Vec::new(),
        }
    }

    fn update(&mut self, err: f64, exchange_high: f64, exchange_low: f64) {
        let abs_err = err.abs();
        self.total_sq_err += err * err;
        self.total_blocks += 1;
        self.errors.push(bins_to_pct(abs_err));
        if abs_err > self.max_err {
            self.max_err = abs_err;
        }
        if abs_err > BINS_5PCT {
            self.gt_5pct += 1;
        }
        if abs_err > BINS_10PCT {
            self.gt_10pct += 1;
        }
        if abs_err > BINS_20PCT {
            self.gt_20pct += 1;
        }
        if exchange_high > self.max_price {
            self.max_price = exchange_high;
        }
        if exchange_low > 0.0 && exchange_low < self.min_price {
            self.min_price = exchange_low;
        }
    }

    fn rmse_pct(&self) -> f64 {
        bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
    }

    fn max_pct(&self) -> f64 {
        bins_to_pct(self.max_err)
    }

    fn median_pct(&mut self) -> f64 {
        self.errors.sort_by(|a, b| a.partial_cmp(b).unwrap());
        let n = self.errors.len();
        if n == 0 { 0.0 } else { self.errors[n / 2] }
    }

    fn percentile(&self, p: f64) -> f64 {
        let n = self.errors.len();
        if n == 0 {
            return 0.0;
        }
        let idx = ((p / 100.0) * (n - 1) as f64).round() as usize;
        self.errors[idx.min(n - 1)]
    }
}

/// Oracle OHLC for a single day, built from per-block prices.
struct DayCandle {
    day1: usize,
    open: f64,
    high: f64,
    low: f64,
    close: f64,
}

struct BlockError {
    height: usize,
    oracle_price: f64,
    exchange_low: f64,
    exchange_high: f64,
    error_pct: f64,
}

fn main() {
    let data_dir = std::env::var("BRK_DIR")
        .map(PathBuf::from)
        .unwrap_or_else(|_| {
            let home = std::env::var("HOME").unwrap();
            PathBuf::from(home).join(".brk")
        });

    let start = std::env::var("ORACLE_START")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(START_HEIGHT_FAST);
    let end_override = std::env::var("ORACLE_END")
        .ok()
        .and_then(|s| s.parse::<usize>().ok());
    let trace_every: usize = std::env::var("TRACE_EVERY")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(5000);

    let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
    let total_heights = indexer.vecs.blocks.timestamp.len();
    let manifest_dir = env!("CARGO_MANIFEST_DIR");

    let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
        &std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
            .expect("Failed to read height_price_ohlc.json"),
    )
    .expect("Failed to parse height OHLC");

    let daily_ohlc: Vec<[f64; 4]> = serde_json::from_str(
        &std::fs::read_to_string(format!("{manifest_dir}/examples/date_price_ohlc.json"))
            .expect("Failed to read date_price_ohlc.json"),
    )
    .expect("Failed to parse daily OHLC");

    let height_bands: Vec<(f64, f64)> = height_ohlc
        .iter()
        .map(|ohlc| {
            let high = ohlc[1];
            let low = ohlc[2];
            if high > 0.0 && low > 0.0 {
                (cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
            } else {
                (0.0, 0.0)
            }
        })
        .collect();

    // Read block timestamps for year + day1 mapping.
    let timestamps: Vec<brk_types::Timestamp> = indexer.vecs.blocks.timestamp.collect();
    let height_years: Vec<u16> = timestamps
        .iter()
        .map(|ts| timestamp_to_year(**ts))
        .collect();
    let height_day1s: Vec<usize> = timestamps
        .iter()
        .map(|ts| (**ts / 86400).saturating_sub(GENESIS_DAY) as usize)
        .collect();

    // Seed price at height `start - 1`. The baked prices.txt only covers up to
    // 508k (the cold-start seed); past it we warm-start from the exchange close
    // so any later start height gets a primed ref_bin without the cold-start
    // alias zone. start <= 508k stays bit-identical to the old baseline.
    let start_price: f64 = PRICES
        .lines()
        .nth(start - 1)
        .and_then(|l| l.parse().ok())
        .unwrap_or_else(|| {
            let o = height_ohlc.get(start - 1).copied().unwrap_or([0.0; 4]);
            if o[3] > 0.0 {
                o[3]
            } else {
                (o[1] + o[2]) / 2.0
            }
        });
    // Exact seed override (reproduce the committed prices.txt seed at a start the
    // truncated working-tree prices.txt no longer covers).
    let start_price = std::env::var("SEED")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(start_price);

    let mut config = Config::default();
    if let Some(w) = std::env::var("EMA_WINDOW")
        .ok()
        .and_then(|s| s.parse().ok())
    {
        config.window_size = w;
    }
    if let Some(a) = std::env::var("EMA_ALPHA").ok().and_then(|s| s.parse().ok()) {
        config.alpha = a;
    }
    // Investigation default: widened up-reach (9 -> 12) to survive fast rallies
    // like the 2018-04-12 candle. Kept here only; config.rs is untouched.
    config.search_below = std::env::var("SEARCH_BELOW")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(12);
    if let Some(sa) = std::env::var("SEARCH_ABOVE")
        .ok()
        .and_then(|s| s.parse().ok())
    {
        config.search_above = sa;
    }
    let guard = GuardCfg::from_env();
    // Lever 3: up-weight the 8 octave-discriminating arms (2v not on the ladder)
    // in the stencil score. They alone separate a center from its half-price
    // alias; the other 11 alias cleanly and only dilute the up/down decision.
    let disc_weight: f64 = std::env::var("DISC_WEIGHT")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(1.0);
    let alias_weight: f64 = std::env::var("ALIAS_WEIGHT")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(1.0);
    // Shape-correlation restoring force: add corr_weight * Pearson(arms, profile)
    // to each candidate bin's stencil score. Pulls the ±window pick toward the
    // octave whose arm-shape matches real payments, resisting the ½×/2× slide
    // without a hard continuity clamp. 0 = off (bit-identical to baseline).
    let corr_weight: f64 = std::env::var("CORR_WEIGHT")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(0.0);
    // EMA rate for the adaptive shape template. The profile tracks the current
    // price regime (which arms are tall) so correlation stays meaningful as the
    // price moves an octave over months, while remaining slow enough to ride
    // through a transient ½×/2× slide (tens of blocks) without adapting to it.
    let corr_beta: f64 = std::env::var("CORR_BETA")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(0.002);
    // Apply the corr term only below this height. Lets the pre-X (slow) leg use
    // corr while the post-X (fast) leg stays bit-identical to the no-corr baseline.
    // Default = always on (global corr).
    let corr_until: usize = std::env::var("CORR_UNTIL")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(usize::MAX);
    // Shape-match metric: "l1" = negative L1 distance, "dot" = matched-filter dot
    // product (both covariance-free), else Pearson.
    let metric: u8 = match std::env::var("PROFILE_METRIC").as_deref() {
        Ok("l1") => 1,
        Ok("dot") => 2,
        _ => 0,
    };
    let metric_name = ["pearson", "l1", "dot"][metric as usize];
    // Profile seed: "bootstrap" = seed from the first warm-up pick's shape (no magic
    // constant), "uniform"/"flat" = every arm equal (1/N_ARMS), else the static
    // ARM_PROFILE.
    let profile_seed = std::env::var("PROFILE_SEED").ok();
    let bootstrap_profile = profile_seed.as_deref() == Some("bootstrap");
    let uniform_profile = matches!(profile_seed.as_deref(), Some("uniform") | Some("flat"));
    // Stencil-sum weight (default 1). Set 0 for SHAPE-ONLY scoring: the shape match
    // does both within-octave localization and octave discrimination, no stencil
    // term and no cw balance to tune.
    let stencil_weight: f64 = std::env::var("STENCIL_WEIGHT")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(1.0);
    eprintln!(
        "  shape: metric={} seed={} stencil_weight={}",
        metric_name,
        if bootstrap_profile {
            "bootstrap"
        } else if uniform_profile {
            "uniform"
        } else {
            "static"
        },
        stencil_weight,
    );
    // Mid-run regime switch, mirrors production Oracle::reconfigure at START_HEIGHT_FAST:
    // at SWITCH_AT rebuild the EMA to SWITCH_WINDOW/SWITCH_ALPHA and warm-start fresh
    // (ring reset, ref_bin kept) - the same state as a fresh warm-up. Search window
    // is unchanged (both regimes share it). 0 = no switch (single-config baseline).
    let switch_at: usize = std::env::var("SWITCH_AT")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(0);
    let switch_window: usize = std::env::var("SWITCH_WINDOW")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(12);
    let switch_alpha: f64 = std::env::var("SWITCH_ALPHA")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(2.0 / 7.0);
    let mut arm_weights = [1.0f64; N_ARMS];
    for &i in &DISC_ARMS {
        arm_weights[i] = disc_weight;
    }
    for &i in &ALIAS_ARMS {
        arm_weights[i] = alias_weight;
    }
    eprintln!(
        "  disc_weight={disc_weight} on {DISC_ARMS:?}; alias_weight={alias_weight} on {ALIAS_ARMS:?}; corr_weight={corr_weight}"
    );
    let anom_thresh: f64 = std::env::var("ANOM_THRESH")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(0.0);
    let norm_mode = match std::env::var("NORM_MODE").as_deref() {
        Ok("unit") => NormMode::Unit,
        Ok("cap") => NormMode::Cap,
        _ => NormMode::Off,
    };
    let norm_cap: f64 = std::env::var("NORM_CAP")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(8000.0);
    let norm_target: f64 = std::env::var("NORM_TARGET")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(4000.0);
    // Drop batch-payout txs (UTXOracle uses exactly-2-output; we cap instead).
    // 0 = disabled. A flood block's 591-output txs are dropped at 100.
    let max_outputs: usize = std::env::var("MAX_OUTPUTS")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(100);
    // Apply the output-count filter only below this height (it helps the thin
    // 2018-2020 era, mildly hurts high-volume years). Default = always on.
    let max_outputs_until: usize = std::env::var("MAX_OUTPUTS_UNTIL")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(usize::MAX);
    eprintln!(
        "  norm: mode={} cap={} target={} max_outputs={}",
        match norm_mode {
            NormMode::Off => "off",
            NormMode::Unit => "unit",
            NormMode::Cap => "cap",
        },
        norm_cap,
        norm_target,
        max_outputs,
    );
    eprintln!(
        "  cfg: window_size={} alpha={:.5} (~{:.0}-block span) search -{}/+{} guard={} (tau={} raw={} qm={} qmin={}) global={} radius={}",
        config.window_size,
        config.alpha,
        2.0 / config.alpha - 1.0,
        config.search_below,
        config.search_above,
        guard.enabled,
        guard.tau,
        guard.raw_margin,
        guard.q_margin,
        guard.q_min,
        guard.global,
        guard.global_radius,
    );
    if switch_at != 0 {
        eprintln!(
            "  switch: at height {switch_at} -> window={switch_window} alpha={switch_alpha:.5}"
        );
    }
    let (sb, sa) = (config.search_below, config.search_above);
    let mut window_size = config.window_size;
    let alpha = config.alpha;
    let mut weights: Vec<f64> = (0..window_size)
        .map(|i| alpha * (1.0 - alpha).powi(i as i32))
        .collect();
    let mut ring: Vec<Vec<f64>> = vec![vec![0.0; NUM_BINS]; window_size];
    let mut ring_cursor = 0usize;
    let mut filled = 0usize;
    let mut ema = HistogramEma::zeros();
    let mut ref_bin = cents_to_bin(start_price * 100.0);
    // Adaptive shape template, re-estimated each block from the L1-normalized arm
    // vector at the pick. Static seed = ARM_PROFILE; bootstrap = filled from the
    // first warm-up pick (zeros until then, so corr contributes nothing yet).
    let mut profile = if bootstrap_profile {
        [0.0f64; N_ARMS]
    } else if uniform_profile {
        [1.0 / N_ARMS as f64; N_ARMS]
    } else {
        ARM_PROFILE
    };
    let mut profile_seeded = !bootstrap_profile;

    // Parity check (VERIFY_PROD=1): drive the PRODUCTION Oracle (lib.rs) over the
    // same per-block histograms and confirm its ref_bin matches this harness pick
    // bit-for-bit. Only meaningful under the shipped slow config (EMA_ALPHA=0.10
    // EMA_WINDOW=40 search 12/11, metric=l1, cw=8, norm off, ORACLE_END<=508000 so
    // corr stays on the whole run).
    let verify_prod = std::env::var("VERIFY_PROD").as_deref() == Ok("1");
    let mut prod_oracle = brk_oracle::Oracle::new(ref_bin, brk_oracle::Config::slow());
    let mut prod_max_diff = 0.0f64;
    let mut prod_diff_blocks = 0usize;

    // Lever 4: a parallel "sharp" detection EMA (fast span, short window) folded
    // from the same per-block hists. The slow EMA above still sets the price; this
    // is diagnostic only, used to check whether the true-price stencil holes (the
    // arm-count contrast that the smeared slow EMA flattens during a crash) survive
    // when the histogram is not smoothed.
    let sharp_span: f64 = std::env::var("SHARP_SPAN")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(3.0);
    let sharp_window: usize = std::env::var("SHARP_WINDOW")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(6);
    let sharp_alpha = 2.0 / (sharp_span + 1.0);
    let sharp_weights: Vec<f64> = (0..sharp_window)
        .map(|i| sharp_alpha * (1.0 - sharp_alpha).powi(i as i32))
        .collect();
    let mut sharp_ring: Vec<Vec<f64>> = vec![vec![0.0; NUM_BINS]; sharp_window];
    let mut sharp_cursor = 0usize;
    let mut sharp_filled = 0usize;
    let mut sharp_ema = HistogramEma::zeros();
    eprintln!("  sharp: span={sharp_span:.0} window={sharp_window} alpha={sharp_alpha:.5}");

    let total_txs = indexer.vecs.transactions.txid.len();
    let total_outputs = indexer.vecs.outputs.value.len();

    // Pre-collect height-indexed vecs (small). Transaction-indexed vecs are too
    // large, so the tx-indexed first_txout_index is read through a forward cursor.
    let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
    let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
    let mut txout_cursor = indexer.vecs.transactions.first_txout_index.cursor();
    let mut tx_starts: Vec<usize> = Vec::new();

    let mut year_stats: Vec<YearStats> = Vec::new();
    let mut overall = YearStats::new(0);
    let mut worst_blocks: Vec<BlockError> = Vec::new();
    let mut total_bias = 0.0f64;

    // Track oracle daily candles.
    let mut oracle_candles: Vec<DayCandle> = Vec::new();
    let mut current_di: Option<usize> = None;

    let loop_end = end_override.unwrap_or(total_heights).min(total_heights);
    for h in start..loop_end {
        if switch_at != 0 && h == switch_at {
            window_size = switch_window;
            weights = (0..window_size)
                .map(|i| switch_alpha * (1.0 - switch_alpha).powi(i as i32))
                .collect();
            ring = vec![vec![0.0; NUM_BINS]; window_size];
            ring_cursor = 0;
            filled = 0;
        }
        let ft = first_tx_index[h];
        let next_ft = first_tx_index
            .get(h + 1)
            .copied()
            .unwrap_or(TxIndex::from(total_txs));

        let block_first_tx = ft.to_usize() + 1;
        let tx_count = next_ft.to_usize() - block_first_tx;
        let out_end = out_first
            .get(h + 1)
            .copied()
            .unwrap_or(TxOutIndex::from(total_outputs))
            .to_usize();

        // First txout index of each non-coinbase tx, for per-tx grouping.
        txout_cursor.advance(block_first_tx - txout_cursor.position());
        tx_starts.clear();
        for _ in 0..tx_count {
            tx_starts.push(txout_cursor.next().unwrap().to_usize());
        }
        let out_start = tx_starts.first().copied().unwrap_or(out_end);

        let values: Vec<Sats> = indexer
            .vecs
            .outputs
            .value
            .collect_range_at(out_start, out_end);
        let output_types: Vec<OutputType> = indexer
            .vecs
            .outputs
            .output_type
            .collect_range_at(out_start, out_end);

        // Drop every output of a tx carrying an OP_RETURN (protocol machinery).
        let mut hist = HistogramRaw::zeros();
        for tx in 0..tx_count {
            let lo = tx_starts[tx] - out_start;
            let hi = tx_starts
                .get(tx + 1)
                .map(|s| s - out_start)
                .unwrap_or(out_end - out_start);
            if output_types[lo..hi].contains(&OutputType::OpReturn) {
                continue;
            }
            if max_outputs > 0 && h < max_outputs_until && (hi - lo) > max_outputs {
                continue;
            }
            for i in lo..hi {
                if let Some(bin) = eligible_bin(values[i], output_types[i]) {
                    hist.increment(bin as usize);
                }
            }
        }

        let total: u64 = (0..NUM_BINS).map(|b| hist[b] as u64).sum();
        let scale = norm_scale(total, norm_mode, norm_cap, norm_target);
        {
            let slot = &mut ring[ring_cursor];
            for b in 0..NUM_BINS {
                slot[b] = hist[b] as f64 * scale;
            }
        }
        ring_cursor = (ring_cursor + 1) % window_size;
        if filled < window_size {
            filled += 1;
        }
        ema.fill(0.0);
        (0..filled).for_each(|age| {
            let idx = (ring_cursor + window_size - 1 - age) % window_size;
            let w = weights[age];
            let block = &ring[idx];
            for b in 0..NUM_BINS {
                ema[b] += w * block[b];
            }
        });
        // Sharp detection EMA (diagnostic only - does not drive the price).
        {
            let slot = &mut sharp_ring[sharp_cursor];
            for b in 0..NUM_BINS {
                slot[b] = hist[b] as f64 * scale;
            }
        }
        sharp_cursor = (sharp_cursor + 1) % sharp_window;
        if sharp_filled < sharp_window {
            sharp_filled += 1;
        }
        sharp_ema.fill(0.0);
        (0..sharp_filled).for_each(|age| {
            let idx = (sharp_cursor + sharp_window - 1 - age) % sharp_window;
            let w = sharp_weights[age];
            let block = &sharp_ring[idx];
            for b in 0..NUM_BINS {
                sharp_ema[b] += w * block[b];
            }
        });
        let cw = if h < corr_until { corr_weight } else { 0.0 };
        ref_bin = guarded_best_bin(
            &ema,
            ref_bin,
            sb,
            sa,
            &guard,
            &arm_weights,
            cw,
            &profile,
            metric,
            stencil_weight,
        );
        let oracle_price = bin_to_cents(ref_bin) as f64 / 100.0;

        if verify_prod {
            let prod_bin = prod_oracle.process_histogram(&hist);
            let d = (prod_bin - ref_bin).abs();
            prod_max_diff = prod_max_diff.max(d);
            if prod_bin != ref_bin {
                prod_diff_blocks += 1;
            }
        }

        // Re-estimate the shape template from the L1-normalized arm vector at the
        // new pick, blended in slowly so a transient octave slide cannot corrupt it.
        if cw != 0.0 {
            let arms = arms_at(&ema, ref_bin.round() as i64);
            let s: f64 = arms.iter().sum();
            if s > 0.0 {
                if !profile_seeded {
                    for i in 0..N_ARMS {
                        profile[i] = arms[i] / s;
                    }
                    profile_seeded = true;
                } else {
                    for i in 0..N_ARMS {
                        profile[i] = (1.0 - corr_beta) * profile[i] + corr_beta * (arms[i] / s);
                    }
                }
            }
        }

        let o = height_ohlc.get(h).copied().unwrap_or([0.0; 4]);
        let (ex_high, ex_low, ex_close) = (o[1], o[2], o[3]);
        let band_err = if ex_high > 0.0 && ex_low > 0.0 {
            if oracle_price > ex_high {
                (oracle_price - ex_high) / ex_high * 100.0
            } else if oracle_price < ex_low {
                (oracle_price - ex_low) / ex_low * 100.0
            } else {
                0.0
            }
        } else {
            0.0
        };
        let do_print = h % trace_every == 0 || (anom_thresh > 0.0 && band_err.abs() >= anom_thresh);
        if do_print {
            let eligible: u32 = (0..brk_oracle::NUM_BINS).map(|b| hist[b]).sum();
            // true_bin centered on exchange close; +60 bins = half price, -60 = double.
            let true_bin = if ex_close > 0.0 {
                cents_to_bin(ex_close * 100.0).round() as i64
            } else {
                ref_bin.round() as i64
            };
            let s_true = ema_stencil_sum(&ema, true_bin);
            let s_half = ema_stencil_sum(&ema, true_bin + 60);
            let s_dbl = ema_stencil_sum(&ema, true_bin - 60);
            let qt = arm_count(&ema, true_bin, guard.tau);
            let qh = arm_count(&ema, true_bin + 60, guard.tau);
            let qd = arm_count(&ema, true_bin - 60, guard.tau);
            let pat = arm_pattern(&ema, true_bin, guard.tau);
            // Octave-discriminating subset only: mass at true vs half center.
            let dt = arm_subset_sum(&ema, true_bin, &DISC_ARMS);
            let dh = arm_subset_sum(&ema, true_bin + 60, &DISC_ARMS);
            // Same arm-count contrast measured on the sharp detection EMA.
            let qst = arm_count(&sharp_ema, true_bin, guard.tau);
            let qsh = arm_count(&sharp_ema, true_bin + 60, guard.tau);
            let qsd = arm_count(&sharp_ema, true_bin - 60, guard.tau);
            let spat = arm_pattern(&sharp_ema, true_bin, guard.tau);
            let ts_secs: u32 = *timestamps[h];
            eprintln!(
                "{h}\t{ts_secs}\t{oracle_price:.0}\t{ex_close:.0}\t{band_err:+.2}\t{eligible}\tT={s_true:.1}\tH={s_half:.1}\tD={s_dbl:.1}\tQt={qt}\tQh={qh}\tQd={qd}\tDt={dt:.1}\tDh={dh:.1}\t{pat}\t|sharp Qt={qst} Qh={qsh} Qd={qsd}\t{spat}"
            );
        }

        // Build oracle daily candle.
        let di = height_day1s[h];
        if current_di != Some(di) {
            current_di = Some(di);
            oracle_candles.push(DayCandle {
                day1: di,
                open: oracle_price,
                high: oracle_price,
                low: oracle_price,
                close: oracle_price,
            });
        } else {
            let candle = oracle_candles.last_mut().unwrap();
            if oracle_price > candle.high {
                candle.high = oracle_price;
            }
            if oracle_price < candle.low {
                candle.low = oracle_price;
            }
            candle.close = oracle_price;
        }

        // Per-block error stats.
        if h < height_bands.len() {
            let (high_bin, low_bin) = height_bands[h];
            if high_bin > 0.0 && low_bin > 0.0 {
                let err = if ref_bin < high_bin {
                    ref_bin - high_bin
                } else if ref_bin > low_bin {
                    ref_bin - low_bin
                } else {
                    0.0
                };

                let exchange_high = height_ohlc[h][1];
                let exchange_low = height_ohlc[h][2];

                overall.update(err, exchange_high, exchange_low);
                total_bias += err;

                let year = height_years[h];
                if year_stats.is_empty() || year_stats.last().unwrap().year != year {
                    year_stats.push(YearStats::new(year));
                }
                year_stats
                    .last_mut()
                    .unwrap()
                    .update(err, exchange_high, exchange_low);

                if err.abs() > BINS_5PCT {
                    worst_blocks.push(BlockError {
                        height: h,
                        oracle_price,
                        exchange_low,
                        exchange_high,
                        error_pct: if err < 0.0 {
                            -bins_to_pct(err.abs())
                        } else {
                            bins_to_pct(err.abs())
                        },
                    });
                }
            }
        }
    }

    if verify_prod {
        eprintln!(
            "  VERIFY_PROD: production Oracle vs harness - max ref_bin diff {prod_max_diff:.6}, {prod_diff_blocks} blocks differ"
        );
    }

    worst_blocks.sort_by(|a, b| b.error_pct.abs().partial_cmp(&a.error_pct.abs()).unwrap());
    overall.errors.sort_by(|a, b| a.partial_cmp(b).unwrap());

    // Daily candle comparison: oracle OHLC vs exchange OHLC.
    let mut daily_open_errors: Vec<f64> = Vec::new();
    let mut daily_high_errors: Vec<f64> = Vec::new();
    let mut daily_low_errors: Vec<f64> = Vec::new();
    let mut daily_close_errors: Vec<f64> = Vec::new();
    let mut daily_days = 0u64;

    for candle in &oracle_candles {
        let di = candle.day1;
        if di >= daily_ohlc.len() {
            continue;
        }
        let ex = &daily_ohlc[di];
        if ex[0] <= 0.0 || ex[3] <= 0.0 {
            continue;
        }
        let ex_open = ex[0];
        let ex_high = ex[1];
        let ex_low = ex[2];
        let ex_close = ex[3];

        // Error as percentage: (oracle - exchange) / exchange * 100
        daily_open_errors.push((candle.open - ex_open) / ex_open * 100.0);
        daily_high_errors.push((candle.high - ex_high) / ex_high * 100.0);
        daily_low_errors.push((candle.low - ex_low) / ex_low * 100.0);
        daily_close_errors.push((candle.close - ex_close) / ex_close * 100.0);
        daily_days += 1;
    }

    fn daily_stats(errors: &mut [f64]) -> (f64, f64, f64) {
        let n = errors.len() as f64;
        let rmse = (errors.iter().map(|e| e * e).sum::<f64>() / n).sqrt();
        errors.sort_by(|a, b| a.abs().partial_cmp(&b.abs()).unwrap());
        let max = errors.last().map(|e| e.abs()).unwrap_or(0.0);
        let median = errors[errors.len() / 2].abs();
        (median, rmse, max)
    }

    let (open_med, open_rmse, open_max) = daily_stats(&mut daily_open_errors);
    let (high_med, high_rmse, high_max) = daily_stats(&mut daily_high_errors);
    let (low_med, low_rmse, low_max) = daily_stats(&mut daily_low_errors);
    let (close_med, close_rmse, close_max) = daily_stats(&mut daily_close_errors);

    // Print report.
    println!();
    println!("  brk_oracle accuracy report");
    println!("  ══════════════════════════");
    println!();
    println!("  Config:       w12, alpha=2/7, search -9/+11, noisy/dust/round-btc filtered");
    println!(
        "  Test range:   height {} .. {} ({} blocks), seed ${:.2}",
        start,
        loop_end - 1,
        overall.total_blocks,
        start_price,
    );
    println!(
        "  Price range:  ${:.0} .. ${:.0}",
        overall.min_price, overall.max_price
    );

    println!();
    println!("  Per-block accuracy (vs per-height exchange OHLC):");
    println!("    Median:      {:.3}%", overall.percentile(50.0));
    println!("    95th pct:    {:.3}%", overall.percentile(95.0));
    println!("    99th pct:    {:.3}%", overall.percentile(99.0));
    println!("    99.9th pct:  {:.3}%", overall.percentile(99.9));
    println!("    RMSE:        {:.3}%", overall.rmse_pct());
    println!("    Max:         {:.1}%", overall.max_pct());
    println!(
        "    Bias:        {:+.2} bins",
        total_bias / overall.total_blocks as f64
    );
    println!(
        "    > 5%:        {} blocks ({:.3}%)",
        overall.gt_5pct,
        overall.gt_5pct as f64 / overall.total_blocks as f64 * 100.0
    );
    println!("    > 10%:       {} blocks", overall.gt_10pct);
    println!("    > 20%:       {} blocks", overall.gt_20pct);

    println!();
    println!(
        "  Daily candle accuracy ({} days, vs exchange daily OHLC):",
        daily_days
    );
    println!(
        "    {:>8} {:>10} {:>10} {:>10}",
        "", "Median", "RMSE", "Max"
    );
    println!(
        "    {:>8} {:>9.2}% {:>9.2}% {:>9.1}%",
        "Open", open_med, open_rmse, open_max
    );
    println!(
        "    {:>8} {:>9.2}% {:>9.2}% {:>9.1}%",
        "High", high_med, high_rmse, high_max
    );
    println!(
        "    {:>8} {:>9.2}% {:>9.2}% {:>9.1}%",
        "Low", low_med, low_rmse, low_max
    );
    println!(
        "    {:>8} {:>9.2}% {:>9.2}% {:>9.1}%",
        "Close", close_med, close_rmse, close_max
    );

    println!();
    println!("  By year:");
    println!(
        "    {:<6} {:>7} {:>9} {:>9} {:>9} {:>6} {:>5} {:>5} {:>14}",
        "Year", "Blocks", "Median", "RMSE", "Max", ">5%", ">10%", ">20%", "Price range"
    );
    println!("    {}", "-".repeat(80));
    for ys in &mut year_stats {
        let median = ys.median_pct();
        println!(
            "    {:<6} {:>7} {:>8.3}% {:>8.3}% {:>8.1}% {:>6} {:>5} {:>5}   ${:.0}..${:.0}",
            ys.year,
            ys.total_blocks,
            median,
            ys.rmse_pct(),
            ys.max_pct(),
            ys.gt_5pct,
            ys.gt_10pct,
            ys.gt_20pct,
            ys.min_price,
            ys.max_price,
        );
    }

    if !worst_blocks.is_empty() {
        println!();
        println!("  Worst blocks:");
        let show = worst_blocks.len().min(10);
        for wb in &worst_blocks[..show] {
            let dir = if wb.error_pct < 0.0 { "above" } else { "below" };
            println!(
                "    height {:>7}: oracle ${:>9.0}, exchange ${:.0}..${:.0} ({:+.1}%, {})",
                wb.height, wb.oracle_price, wb.exchange_low, wb.exchange_high, wb.error_pct, dir
            );
        }
        if worst_blocks.len() > show {
            println!("    ... and {} more", worst_blocks.len() - show);
        }
    }

    println!();
}