oracle: cleanup

2026-07-23 17:08:10 -07:00 · 2026-05-21 11:02:56 +02:00
parent 56e8103178
commit bf8de73541
9 changed files with 137 additions and 106858 deletions
@@ -124,7 +124,7 @@ The oracle consumes one pre-built histogram per block via `process_histogram(&hi

 The caller does the filtering when it builds the histogram. For each block it skips the coinbase, drops every output of a transaction carrying an `OP_RETURN`, then bins the rest. `default_eligible_bin(sats, output_type)` (or `Oracle::output_to_bin` for a non-default `Config`) applies the per-output rules: excluded script types, dust, and round-BTC values. It returns the bin index, or `None` for a filtered output.

-The initial seed must be close to the real price at the starting height. The crate includes a `PRICES` constant with exchange prices for every height up to 630,000 to derive a seed from.
+The initial seed must be close to the real price at the starting height. The crate includes a `PRICES` constant with exchange prices for heights 0..525,000; the last entry (height 524,999) seeds the oracle's first on-chain computation at `START_HEIGHT`.

 ## Configuration

@@ -145,15 +145,16 @@ All parameters via `Config` with sensible defaults:

 | | brk_oracle | UTXOracle |
 |---|---|---|
-| Resolution | Per-block (~10 min) + daily candles | Per-run consensus price + per-output intraday scatter |
+| Resolution | Per-block (~10 min); daily OHLC built downstream | Per-run consensus price + per-output intraday scatter |
 | Operation | Rolling: EMA over ring buffer, updates each block | Batch: processes a full day from scratch, stateless |
 | Algorithm | Single-pass stencil scoring with per-offset normalization | Multi-step: dual stencil → rough estimate → output-to-USD mapping → iterative convergence |
+| Steps to compute price | 7 (filter+bin → ring insert → EMA → per-offset peaks → score → argmax+parabolic → bin→price) | 10 (filter+bin → clip → smooth round BTC → sum → normalize → cap extremes → dual-stencil slide → neighbor weight-avg → output-to-USD map → iterative central price) |
 | Stencil | 19 round-USD offsets ($1 to $10k), each normalized to its own peak | 803-point Gaussian + weighted spike template targeting 17 round-USD amounts |
 | Round BTC handling | Excluded from histogram entirely | Histogram bins smoothed by averaging neighbors |
-| Output filtering | Per-tx OP_RETURN drop, then per-output: script type, dust threshold, round BTC | Per-tx: exactly 2 outputs, ≤5 inputs, no same-day inputs, ≤500-byte witness |
-| Validated from | Height 525,000 (May 2018) | December 2023 |
+| Output filtering | Per-tx OP_RETURN drop, then per-output: script type, dust threshold, round BTC | Per-tx: not coinbase, no OP_RETURN, exactly 2 outputs, ≤5 inputs, no same-day inputs, ≤500-byte witness |
+| Validated from | Height 525,000 (May 2018) | Dec 15, 2023 |
 | Language | Rust | Python |
-| Dependencies | None (pure computation, caller provides block data) | Bitcoin Core RPC |
+| Dependencies | None (pure computation, caller provides block data) | bitcoin-cli + direct blk file reads |
 | Bins per decade | 200 | 200 |

 ## Accuracy
@@ -1,295 +0,0 @@
-//! Compare specific digit filter configurations across multiple start heights.
-//!
-//! Run with: cargo run -p brk_oracle --example compare_digits --release
-
-use std::path::PathBuf;
-use std::time::Instant;
-
-use brk_indexer::Indexer;
-use brk_oracle::{Config, Histogram, NUM_BINS, Oracle, PRICES, cents_to_bin, sats_to_bin};
-use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
-use vecdb::{AnyVec, ReadableVec, VecIndex};
-
-const BINS_5PCT: f64 = 4.24;
-const BINS_10PCT: f64 = 8.28;
-const BINS_20PCT: f64 = 15.84;
-
-fn bins_to_pct(bins: f64) -> f64 {
-    (10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
-}
-
-fn seed_bin(start_height: usize) -> f64 {
-    let price: f64 = PRICES
-        .lines()
-        .nth(start_height - 1)
-        .expect("prices.txt too short")
-        .parse()
-        .expect("Failed to parse seed price");
-    cents_to_bin(price * 100.0)
-}
-
-fn leading_digit(sats: u64) -> u8 {
-    let log = (sats as f64).log10();
-    let magnitude = 10.0_f64.powf(log.floor());
-    let d = (sats as f64 / magnitude).round() as u8;
-    if d >= 10 { 1 } else { d }
-}
-
-fn is_round(sats: u64) -> bool {
-    let log = (sats as f64).log10();
-    let magnitude = 10.0_f64.powf(log.floor());
-    let leading = (sats as f64 / magnitude).round();
-    let round_val = leading * magnitude;
-    (sats as f64 - round_val).abs() <= round_val * 0.001
-}
-
-struct Stats {
-    total_sq_err: f64,
-    total_bias: f64,
-    max_err: f64,
-    total_blocks: u64,
-    gt_5pct: u64,
-    gt_10pct: u64,
-    gt_20pct: u64,
-}
-
-impl Stats {
-    fn new() -> Self {
-        Self {
-            total_sq_err: 0.0,
-            total_bias: 0.0,
-            max_err: 0.0,
-            total_blocks: 0,
-            gt_5pct: 0,
-            gt_10pct: 0,
-            gt_20pct: 0,
-        }
-    }
-
-    fn update(&mut self, err: f64) {
-        self.total_sq_err += err * err;
-        self.total_bias += err;
-        self.total_blocks += 1;
-        let abs_err = err.abs();
-        if abs_err > self.max_err {
-            self.max_err = abs_err;
-        }
-        if abs_err > BINS_5PCT {
-            self.gt_5pct += 1;
-        }
-        if abs_err > BINS_10PCT {
-            self.gt_10pct += 1;
-        }
-        if abs_err > BINS_20PCT {
-            self.gt_20pct += 1;
-        }
-    }
-
-    fn rmse_pct(&self) -> f64 {
-        bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
-    }
-
-    fn max_pct(&self) -> f64 {
-        bins_to_pct(self.max_err)
-    }
-
-    fn bias(&self) -> f64 {
-        self.total_bias / self.total_blocks as f64
-    }
-}
-
-fn main() {
-    let t0 = Instant::now();
-
-    let data_dir = std::env::var("BRK_DIR")
-        .map(PathBuf::from)
-        .unwrap_or_else(|_| {
-            let home = std::env::var("HOME").unwrap();
-            PathBuf::from(home).join(".brk")
-        });
-
-    let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
-    let total_heights = indexer.vecs.blocks.timestamp.len();
-
-    let manifest_dir = env!("CARGO_MANIFEST_DIR");
-
-    let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
-        &std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
-            .expect("Failed to read height_price_ohlc.json"),
-    )
-    .expect("Failed to parse height OHLC");
-
-    let height_bands: Vec<(f64, f64)> = height_ohlc
-        .iter()
-        .map(|ohlc| {
-            let high = ohlc[1];
-            let low = ohlc[2];
-            if high > 0.0 && low > 0.0 {
-                (cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
-            } else {
-                (0.0, 0.0)
-            }
-        })
-        .collect();
-
-    // Configs to compare.
-    //                              987654321
-    let masks: &[(u16, &str)] = &[
-        (0b0_0111_0111, "{1,2,3,5,6,7}"),
-        (0b0_0011_0111, "{1,2,3,5,6}"),
-        (0b0_0001_1111, "{1,2,3,4,5}"),
-        (0b0_0001_0111, "{1,2,3,5}"),
-    ];
-
-    let start_heights: &[usize] = &[575_000, 600_000, 630_000];
-
-    // (mask_idx, start_idx) -> (Oracle, Stats)
-    let n = masks.len() * start_heights.len();
-    let mut oracles: Vec<Option<Oracle>> = (0..n).map(|_| None).collect();
-    let mut stats: Vec<Stats> = (0..n).map(|_| Stats::new()).collect();
-
-    let idx = |m: usize, s: usize| -> usize { m * start_heights.len() + s };
-
-    let total_txs = indexer.vecs.transactions.txid.len();
-    let total_outputs = indexer.vecs.outputs.value.len();
-
-    let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
-    let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
-
-    let ref_config = Config::default();
-    let earliest_start = *start_heights.iter().min().unwrap();
-
-    for h in earliest_start..total_heights {
-        let ft = first_tx_index[h];
-        let next_ft = first_tx_index
-            .get(h + 1)
-            .copied()
-            .unwrap_or(TxIndex::from(total_txs));
-
-        let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
-            indexer
-                .vecs
-                .transactions
-                .first_txout_index
-                .collect_one(ft + 1)
-                .unwrap()
-                .to_usize()
-        } else {
-            out_first
-                .get(h + 1)
-                .copied()
-                .unwrap_or(TxOutIndex::from(total_outputs))
-                .to_usize()
-        };
-        let out_end = out_first
-            .get(h + 1)
-            .copied()
-            .unwrap_or(TxOutIndex::from(total_outputs))
-            .to_usize();
-
-        let values: Vec<Sats> = indexer
-            .vecs
-            .outputs
-            .value
-            .collect_range_at(out_start, out_end);
-        let output_types: Vec<OutputType> = indexer
-            .vecs
-            .outputs
-            .output_type
-            .collect_range_at(out_start, out_end);
-
-        // Build full histogram and per-digit histograms.
-        let mut full_hist = Histogram::zeros();
-        let mut digit_hist: [Histogram; 9] = std::array::from_fn(|_| Histogram::zeros());
-
-        for (sats, output_type) in values.into_iter().zip(output_types) {
-            if ref_config.excluded_output_types.contains(&output_type) {
-                continue;
-            }
-            if *sats < ref_config.min_sats {
-                continue;
-            }
-            if let Some(bin) = sats_to_bin(sats) {
-                full_hist.increment(bin);
-                if is_round(*sats) {
-                    let d = leading_digit(*sats);
-                    if (1..=9).contains(&d) {
-                        digit_hist[(d - 1) as usize].increment(bin);
-                    }
-                }
-            }
-        }
-
-        // Feed each (mask, start_height) combo.
-        for (mi, &(mask, _)) in masks.iter().enumerate() {
-            // Build filtered histogram for this mask.
-            let mut hist = full_hist.clone();
-            (0..9usize).for_each(|d| {
-                if mask & (1 << d) != 0 {
-                    for bin in 0..NUM_BINS {
-                        hist[bin] -= digit_hist[d][bin];
-                    }
-                }
-            });
-
-            for (si, &sh) in start_heights.iter().enumerate() {
-                if h < sh {
-                    continue;
-                }
-                let i = idx(mi, si);
-                if oracles[i].is_none() {
-                    oracles[i] = Some(Oracle::new(
-                        seed_bin(sh),
-                        Config {
-                            exclude_common_round_values: false,
-                            ..Default::default()
-                        },
-                    ));
-                }
-
-                let ref_bin = oracles[i].as_mut().unwrap().process_histogram(&hist);
-
-                if h < height_bands.len() {
-                    let (high_bin, low_bin) = height_bands[h];
-                    if high_bin > 0.0 && low_bin > 0.0 {
-                        let err = if ref_bin < high_bin {
-                            ref_bin - high_bin
-                        } else if ref_bin > low_bin {
-                            ref_bin - low_bin
-                        } else {
-                            0.0
-                        };
-                        stats[i].update(err);
-                    }
-                }
-            }
-        }
-    }
-
-    // Print results grouped by start height.
-    for (si, &sh) in start_heights.iter().enumerate() {
-        println!();
-        println!("@ {}k:", sh / 1000);
-        println!(
-            "  {:<16} {:>8} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
-            "Digits", "Blocks", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
-        );
-        println!("  {}", "-".repeat(72));
-        for (mi, &(_, label)) in masks.iter().enumerate() {
-            let s = &stats[idx(mi, si)];
-            println!(
-                "  {:<16} {:>8}   {:>7.3}%   {:>7.1}% {:>6} {:>6} {:>6} {:>+8.2}",
-                label,
-                s.total_blocks,
-                s.rmse_pct(),
-                s.max_pct(),
-                s.gt_5pct,
-                s.gt_10pct,
-                s.gt_20pct,
-                s.bias()
-            );
-        }
-    }
-
-    println!("\nDone in {:.1}s", t0.elapsed().as_secs_f64());
-}
@@ -1,10 +1,12 @@
-//! Verify oracle determinism: oracles started from different heights converge
-//! to identical ref_bin values after the ring buffer fills.
+//! Verify the production restart property: an oracle restored via
+//! `from_checkpoint` (seeded from the previous block's stored cents price,
+//! replayed over the last `window_size` blocks) produces bit-exact `ref_bin`
+//! values matching a continuously-running oracle from the restart height
+//! onward.
 //!
-//! Creates a reference oracle at height 575k and test oracles every 1000 blocks
-//! up to 630k. After window_size blocks, each test oracle should produce the
-//! same ref_bin as the reference, proving the truncated EMA provides
-//! start-point independence.
+//! Mirrors the production filter exactly (per-tx OP_RETURN drop + per-output
+//! `default_eligible_bin`), so it exercises the same code path
+//! `brk_computer::prices::compute::feed_blocks` uses at runtime.
 //!
 //! Run with: cargo run -p brk_oracle --example determinism --release

@@ -12,26 +14,49 @@ use std::path::PathBuf;

 use brk_indexer::Indexer;
 use brk_oracle::{
-    Config, Histogram, Oracle, PRICES, START_HEIGHT, cents_to_bin, default_eligible_bin,
+    Config, Histogram, Oracle, PRICES, START_HEIGHT, bin_to_cents, cents_to_bin,
+    default_eligible_bin,
 };
 use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
 use vecdb::{AnyVec, ReadableVec, VecIndex};

-fn seed_bin(height: usize) -> f64 {
+fn seed_bin_for_start_height() -> f64 {
    let price: f64 = PRICES
        .lines()
-        .nth(height - 1)
-        .expect("prices.txt too short")
+        .nth(START_HEIGHT - 1)
+        .expect("prices.txt too short for START_HEIGHT")
        .parse()
        .expect("Failed to parse seed price");
    cents_to_bin(price * 100.0)
 }

-struct TestRun {
-    start_height: usize,
-    oracle: Option<Oracle>,
-    converged_at: Option<usize>,
-    diverged_after: bool,
+struct Block {
+    values: Vec<Sats>,
+    output_types: Vec<OutputType>,
+    tx_starts: Vec<usize>,
+    out_start: usize,
+    out_end: usize,
+}
+
+fn build_histogram(block: &Block) -> Histogram {
+    let mut hist = Histogram::zeros();
+    for tx in 0..block.tx_starts.len() {
+        let lo = block.tx_starts[tx] - block.out_start;
+        let hi = block
+            .tx_starts
+            .get(tx + 1)
+            .map(|s| s - block.out_start)
+            .unwrap_or(block.out_end - block.out_start);
+        if block.output_types[lo..hi].contains(&OutputType::OpReturn) {
+            continue;
+        }
+        for i in lo..hi {
+            if let Some(bin) = default_eligible_bin(block.values[i], block.output_types[i]) {
+                hist.increment(bin as usize);
+            }
+        }
+    }
+    hist
 }

 fn main() {
@@ -48,59 +73,50 @@ fn main() {
    let config = Config::default();
    let window_size = config.window_size;

+    let restart_offset = 1000;
+    let end_offset = restart_offset + window_size * 4;
+    let end_height = (START_HEIGHT + end_offset).min(total_heights);
+    let restart_at = START_HEIGHT + restart_offset;
+    let warmup_start = restart_at - window_size;
+
+    assert!(
+        end_height > restart_at,
+        "indexer has {total_heights} blocks; need at least {} to test restart at {restart_at}",
+        restart_at + 1
+    );
+
+    println!(
+        "Loading {} blocks ({START_HEIGHT}..{end_height})...",
+        end_height - START_HEIGHT
+    );
    let total_txs = indexer.vecs.transactions.txid.len();
    let total_outputs = indexer.vecs.outputs.value.len();
-
    let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
    let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
+    let mut txout_cursor = indexer.vecs.transactions.first_txout_index.cursor();

-    // Reference oracle at 575k.
-    let ref_start = START_HEIGHT;
-    let mut ref_oracle = Oracle::new(seed_bin(ref_start), Config::default());
-
-    // Test oracles every 1000 blocks from 576k to 630k.
-    let mut runs: Vec<TestRun> = (576_000..=630_000)
-        .step_by(1000)
-        .map(|h| TestRun {
-            start_height: h,
-            oracle: None,
-            converged_at: None,
-            diverged_after: false,
-        })
-        .collect();
-
-    let last_start = runs.last().map(|r| r.start_height).unwrap_or(ref_start);
-    // Process enough blocks for all oracles to converge + verification margin.
-    let end_height = (last_start + window_size + 100).min(total_heights);
-
+    let mut blocks: Vec<Block> = Vec::with_capacity(end_height - START_HEIGHT);
    for h in START_HEIGHT..end_height {
        let ft = first_tx_index[h];
        let next_ft = first_tx_index
            .get(h + 1)
            .copied()
            .unwrap_or(TxIndex::from(total_txs));
-
-        let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
-            indexer
-                .vecs
-                .transactions
-                .first_txout_index
-                .collect_one(ft + 1)
-                .unwrap()
-                .to_usize()
-        } else {
-            out_first
-                .get(h + 1)
-                .copied()
-                .unwrap_or(TxOutIndex::from(total_outputs))
-                .to_usize()
-        };
+        let block_first_tx = ft.to_usize() + 1;
+        let tx_count = next_ft.to_usize() - block_first_tx;
        let out_end = out_first
            .get(h + 1)
            .copied()
            .unwrap_or(TxOutIndex::from(total_outputs))
            .to_usize();

+        txout_cursor.advance(block_first_tx - txout_cursor.position());
+        let mut tx_starts: Vec<usize> = Vec::with_capacity(tx_count);
+        for _ in 0..tx_count {
+            tx_starts.push(txout_cursor.next().unwrap().to_usize());
+        }
+        let out_start = tx_starts.first().copied().unwrap_or(out_end);
+
        let values: Vec<Sats> = indexer
            .vecs
            .outputs
@@ -112,95 +128,76 @@ fn main() {
            .output_type
            .collect_range_at(out_start, out_end);

-        let mut hist = Histogram::zeros();
-        for (sats, output_type) in values.into_iter().zip(output_types) {
-            if let Some(bin) = default_eligible_bin(sats, output_type) {
-                hist.increment(bin as usize);
-            }
-        }
-
-        let ref_bin = ref_oracle.process_histogram(&hist);
-
-        for run in &mut runs {
-            if h < run.start_height {
-                continue;
-            }
-            if run.oracle.is_none() {
-                run.oracle = Some(Oracle::new(seed_bin(run.start_height), Config::default()));
-            }
-            let test_bin = run.oracle.as_mut().unwrap().process_histogram(&hist);
-
-            if run.converged_at.is_some() {
-                if test_bin != ref_bin {
-                    run.diverged_after = true;
-                }
-            } else if test_bin == ref_bin {
-                run.converged_at = Some(h);
-            }
-        }
+        blocks.push(Block {
+            values,
+            output_types,
+            tx_starts,
+            out_start,
+            out_end,
+        });
    }

-    // Print results.
-    println!();
-    println!("{:<12} {:>16} {:>8}", "Start", "Converged at", "Blocks");
-    println!("{}", "-".repeat(40));
+    let mut continuous = Oracle::new(seed_bin_for_start_height(), config.clone());
+    let continuous_bins: Vec<f64> = blocks
+        .iter()
+        .map(|b| continuous.process_histogram(&build_histogram(b)))
+        .collect();
+    println!("Continuous oracle: {} blocks processed", continuous_bins.len());

-    let mut max_blocks = 0usize;
-    let mut failed = Vec::new();
-    let mut diverged = Vec::new();
-
-    for run in &runs {
-        if let Some(converged) = run.converged_at {
-            let blocks = converged - run.start_height;
-            if blocks > max_blocks {
-                max_blocks = blocks;
-            }
-            println!("{:<12} {:>16} {:>8}", run.start_height, converged, blocks);
-            if run.diverged_after {
-                diverged.push(run.start_height);
-            }
-        } else {
-            println!("{:<12} {:>16} {:>8}", run.start_height, "NEVER", "-");
-            failed.push(run.start_height);
-        }
-    }
-
-    println!();
+    let prev_bin = continuous_bins[restart_at - START_HEIGHT - 1];
+    let seed_bin = cents_to_bin(bin_to_cents(prev_bin) as f64);
    println!(
-        "{}/{} converged, max {} blocks to converge (window_size={})",
-        runs.len() - failed.len(),
-        runs.len(),
-        max_blocks,
-        window_size,
+        "Restart at {restart_at}: prev_bin={prev_bin:.4} -> cents -> seed_bin={seed_bin:.4} (delta {:.6})",
+        seed_bin - prev_bin
    );

-    if !diverged.is_empty() {
-        println!("DIVERGED after convergence: {:?}", diverged);
-    }
-    if !failed.is_empty() {
-        println!("NEVER converged: {:?}", failed);
+    let warmup_slice = &blocks[warmup_start - START_HEIGHT..restart_at - START_HEIGHT];
+    let mut restored = Oracle::from_checkpoint(seed_bin, config.clone(), |o| {
+        for b in warmup_slice {
+            o.process_histogram(&build_histogram(b));
+        }
+    });
+
+    let restored_bins: Vec<f64> = blocks[restart_at - START_HEIGHT..]
+        .iter()
+        .map(|b| restored.process_histogram(&build_histogram(b)))
+        .collect();
+    println!("Restored oracle: {} blocks processed", restored_bins.len());
+
+    let mut mismatches: Vec<(usize, f64, f64)> = Vec::new();
+    for (i, &r) in restored_bins.iter().enumerate() {
+        let c = continuous_bins[restart_at - START_HEIGHT + i];
+        if r != c {
+            mismatches.push((restart_at + i, c, r));
+        }
    }

-    // Assertions.
-    assert!(
-        failed.is_empty(),
-        "{} oracles never converged: {:?}",
-        failed.len(),
-        failed
-    );
-    assert!(
-        diverged.is_empty(),
-        "{} oracles diverged after convergence: {:?}",
-        diverged.len(),
-        diverged
-    );
-    assert!(
-        max_blocks <= window_size * 2,
-        "Convergence took {} blocks, expected <= {} (2 * window_size)",
-        max_blocks,
-        window_size * 2
+    println!();
+    if mismatches.is_empty() {
+        println!(
+            "All {} blocks from {restart_at} onward match exactly.",
+            restored_bins.len()
+        );
+    } else {
+        println!(
+            "{} of {} blocks differ (showing up to 5):",
+            mismatches.len(),
+            restored_bins.len()
+        );
+        for (h, c, r) in mismatches.iter().take(5) {
+            println!(
+                "  h={h}: continuous={c:.6}, restored={r:.6}, delta={:.6}",
+                r - c
+            );
+        }
+    }
+
+    assert_eq!(
+        mismatches.len(),
+        0,
+        "restored oracle diverged from continuous oracle"
    );

    println!();
-    println!("All assertions passed!");
+    println!("Assertion passed: from_checkpoint restart is bit-exact.");
 }
@@ -1,272 +0,0 @@
-//! Diagnostic: sweep oracle start heights and clamp-top-N strategies.
-//!
-//! Run with: cargo run -p brk_oracle --example noise --release
-
-use std::path::PathBuf;
-use std::time::Instant;
-
-use brk_indexer::Indexer;
-use brk_oracle::{Config, Histogram, Oracle, PRICES, cents_to_bin, default_eligible_bin};
-use brk_types::{Sats, TxIndex, TxOutIndex};
-use vecdb::{AnyVec, ReadableVec, VecIndex};
-
-const BINS_5PCT: f64 = 4.24;
-const BINS_10PCT: f64 = 8.28;
-const BINS_20PCT: f64 = 15.84;
-const BPD: f64 = 200.0;
-
-fn bins_to_pct(bins: f64) -> f64 {
-    (10.0_f64.powf(bins / BPD) - 1.0) * 100.0
-}
-
-fn seed_bin(start_height: usize) -> f64 {
-    let price: f64 = PRICES
-        .lines()
-        .nth(start_height - 1)
-        .expect("prices.txt too short")
-        .parse()
-        .expect("Failed to parse seed price");
-    cents_to_bin(price * 100.0)
-}
-
-/// Clamp the top N bins in `src` down to the (N+1)th highest value, writing into `dst`.
-fn clamp_top_n(src: &Histogram, dst: &mut Histogram, n: usize) {
-    let mut top: Vec<u32> = src.iter().copied().filter(|&v| v > 0).collect();
-    top.sort_unstable_by(|a, b| b.cmp(a));
-    let clamp_to = if top.len() > n { top[n] } else { 0 };
-
-    for (i, &v) in src.iter().enumerate() {
-        dst[i] = v.min(clamp_to.max(v.min(clamp_to)));
-    }
-}
-
-fn main() {
-    let t0 = Instant::now();
-
-    let data_dir = std::env::var("BRK_DIR")
-        .map(PathBuf::from)
-        .unwrap_or_else(|_| {
-            let home = std::env::var("HOME").unwrap();
-            PathBuf::from(home).join(".brk")
-        });
-
-    let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
-    let total_heights = indexer.vecs.blocks.timestamp.len();
-
-    let manifest_dir = env!("CARGO_MANIFEST_DIR");
-
-    let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
-        &std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
-            .expect("Failed to read height_price_ohlc.json"),
-    )
-    .expect("Failed to parse height OHLC");
-
-    let height_bands: Vec<(f64, f64)> = height_ohlc
-        .iter()
-        .map(|ohlc| {
-            let high = ohlc[1];
-            let low = ohlc[2];
-            if high > 0.0 && low > 0.0 {
-                (cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
-            } else {
-                (0.0, 0.0)
-            }
-        })
-        .collect();
-
-    // Start heights: 630k, 600k, 575k, then 570k down to 500k by 5k.
-    let mut start_heights: Vec<usize> = vec![630_000, 600_000, 575_000];
-    let mut h = 570_000;
-    while h >= 500_000 {
-        start_heights.push(h);
-        h -= 5_000;
-    }
-    let lowest = *start_heights.iter().min().unwrap();
-
-    // Clamp-top-N values to test: 0 (no clamp), 2, 3, 5, 10.
-    let clamp_values: Vec<usize> = vec![0, 2, 3, 5, 10];
-
-    // Build per-block RAW histograms from the lowest start height.
-    eprintln!("Building histograms from height {}...", lowest);
-
-    let total_txs = indexer.vecs.transactions.txid.len();
-    let total_outputs = indexer.vecs.outputs.value.len();
-
-    let first_txout_index_reader = indexer.vecs.transactions.first_txout_index.reader();
-    let value_reader = indexer.vecs.outputs.value.reader();
-    let output_type_reader = indexer.vecs.outputs.output_type.reader();
-
-    let config = Config::default();
-    let total_blocks = total_heights - lowest;
-
-    struct BlockData {
-        hist: Histogram,
-        high_bin: f64,
-        low_bin: f64,
-    }
-
-    let mut blocks: Vec<BlockData> = Vec::with_capacity(total_blocks);
-
-    for h in lowest..total_heights {
-        let first_tx_index: TxIndex = indexer
-            .vecs
-            .transactions
-            .first_tx_index
-            .collect_one_at(h)
-            .unwrap();
-        let next_first_tx_index: TxIndex = indexer
-            .vecs
-            .transactions
-            .first_tx_index
-            .collect_one_at(h + 1)
-            .unwrap_or(TxIndex::from(total_txs));
-
-        let out_start = if first_tx_index.to_usize() + 1 < next_first_tx_index.to_usize() {
-            first_txout_index_reader
-                .get(first_tx_index.to_usize() + 1)
-                .to_usize()
-        } else {
-            indexer
-                .vecs
-                .outputs
-                .first_txout_index
-                .collect_one_at(h + 1)
-                .unwrap_or(TxOutIndex::from(total_outputs))
-                .to_usize()
-        };
-        let out_end: usize = indexer
-            .vecs
-            .outputs
-            .first_txout_index
-            .collect_one_at(h + 1)
-            .unwrap_or(TxOutIndex::from(total_outputs))
-            .to_usize();
-
-        let mut hist = Histogram::zeros();
-        for i in out_start..out_end {
-            let sats: Sats = value_reader.get(i);
-            let output_type = output_type_reader.get(i);
-            if let Some(bin) = default_eligible_bin(sats, output_type) {
-                hist.increment(bin as usize);
-            }
-        }
-
-        let (high_bin, low_bin) = if h < height_bands.len() {
-            height_bands[h]
-        } else {
-            (0.0, 0.0)
-        };
-
-        blocks.push(BlockData {
-            hist,
-            high_bin,
-            low_bin,
-        });
-
-        if (h - lowest).is_multiple_of(50_000) {
-            eprint!(
-                "\r  {}/{} ({:.0}%)",
-                h - lowest,
-                total_blocks,
-                (h - lowest) as f64 / total_blocks as f64 * 100.0
-            );
-        }
-    }
-
-    eprintln!(
-        "\r  {} blocks built in {:.1}s",
-        blocks.len(),
-        t0.elapsed().as_secs_f64()
-    );
-
-    // For each clamp value, run all start heights.
-    for &clamp_n in &clamp_values {
-        println!();
-        let label = if clamp_n == 0 {
-            "no clamp".to_string()
-        } else {
-            format!("clamp top {}", clamp_n)
-        };
-        println!("=== {} ===", label);
-        println!(
-            "{:>8} {:>8} {:>8} {:>8} {:>6} {:>6} {:>6} {:>8}",
-            "Start", "Blocks", "RMSE%", "Worst%", ">5%", ">10%", ">20%", "Worst@"
-        );
-        println!("{}", "-".repeat(72));
-
-        for &start_height in &start_heights {
-            let mut oracle = Oracle::new(seed_bin(start_height), config.clone());
-            let block_offset = start_height - lowest;
-
-            let mut worst_err: f64 = 0.0;
-            let mut worst_height: usize = 0;
-            let mut gt_5: u64 = 0;
-            let mut gt_10: u64 = 0;
-            let mut gt_20: u64 = 0;
-            let mut total_sq_err: f64 = 0.0;
-            let mut total_measured: u64 = 0;
-
-            let mut clamped_hist = Histogram::zeros();
-            for (i, bd) in blocks[block_offset..].iter().enumerate() {
-                if clamp_n > 0 {
-                    clamp_top_n(&bd.hist, &mut clamped_hist, clamp_n);
-                    oracle.process_histogram(&clamped_hist);
-                } else {
-                    oracle.process_histogram(&bd.hist);
-                }
-
-                let height = start_height + i;
-                let ref_bin = oracle.ref_bin();
-
-                if bd.high_bin <= 0.0 || bd.low_bin <= 0.0 {
-                    continue;
-                }
-
-                let err = if ref_bin < bd.high_bin {
-                    ref_bin - bd.high_bin
-                } else if ref_bin > bd.low_bin {
-                    ref_bin - bd.low_bin
-                } else {
-                    0.0
-                };
-
-                total_measured += 1;
-                total_sq_err += err * err;
-                let abs_err = err.abs();
-                if abs_err > BINS_5PCT {
-                    gt_5 += 1;
-                }
-                if abs_err > BINS_10PCT {
-                    gt_10 += 1;
-                }
-                if abs_err > BINS_20PCT {
-                    gt_20 += 1;
-                }
-                if abs_err > worst_err {
-                    worst_err = abs_err;
-                    worst_height = height;
-                }
-            }
-
-            let rmse = if total_measured > 0 {
-                bins_to_pct((total_sq_err / total_measured as f64).sqrt())
-            } else {
-                0.0
-            };
-
-            println!(
-                "{:>8} {:>8} {:>7.3}% {:>7.1}% {:>6} {:>6} {:>6}   {}",
-                format!("{}k", start_height / 1000),
-                total_measured,
-                rmse,
-                bins_to_pct(worst_err),
-                gt_5,
-                gt_10,
-                gt_20,
-                worst_height,
-            );
-        }
-    }
-
-    println!("\nTotal time: {:.1}s", t0.elapsed().as_secs_f64());
-}
@@ -1,416 +0,0 @@
-//! Sweep round-value digit filter to find optimal configuration.
-//!
-//! Tests all 512 subsets of leading digits {1,...,9} to find which
-//! digits to filter out for best oracle accuracy.
-//!
-//! Phase 1: single pass over indexer, precompute per-block histograms.
-//! Phase 2: run 512 configs in parallel across CPU cores.
-//!
-//! Run with: cargo run -p brk_oracle --example sweep_digits --release
-
-use std::path::PathBuf;
-use std::time::Instant;
-
-use brk_indexer::Indexer;
-use brk_oracle::{Config, Histogram, Oracle, PRICES, cents_to_bin, sats_to_bin};
-use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
-use vecdb::{AnyVec, ReadableVec, VecIndex};
-
-const BINS_5PCT: f64 = 4.24;
-const BINS_10PCT: f64 = 8.28;
-const BINS_20PCT: f64 = 15.84;
-
-fn bins_to_pct(bins: f64) -> f64 {
-    (10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
-}
-
-fn seed_bin(start_height: usize) -> f64 {
-    let price: f64 = PRICES
-        .lines()
-        .nth(start_height - 1)
-        .expect("prices.txt too short")
-        .parse()
-        .expect("Failed to parse seed price");
-    cents_to_bin(price * 100.0)
-}
-
-fn leading_digit(sats: u64) -> u8 {
-    let log = (sats as f64).log10();
-    let magnitude = 10.0_f64.powf(log.floor());
-    let d = (sats as f64 / magnitude).round() as u8;
-    if d >= 10 { 1 } else { d }
-}
-
-fn is_round(sats: u64) -> bool {
-    let log = (sats as f64).log10();
-    let magnitude = 10.0_f64.powf(log.floor());
-    let leading = (sats as f64 / magnitude).round();
-    let round_val = leading * magnitude;
-    (sats as f64 - round_val).abs() <= round_val * 0.001
-}
-
-fn mask_label(mask: u16) -> String {
-    let digits: String = (1..=9u8)
-        .filter(|&d| mask & (1 << (d - 1)) != 0)
-        .map(|d| char::from_digit(d as u32, 10).unwrap())
-        .collect();
-    if digits.is_empty() {
-        "none".to_string()
-    } else {
-        digits
-    }
-}
-
-struct Stats {
-    total_sq_err: f64,
-    total_bias: f64,
-    max_err: f64,
-    total_blocks: u64,
-    gt_5pct: u64,
-    gt_10pct: u64,
-    gt_20pct: u64,
-}
-
-impl Stats {
-    fn new() -> Self {
-        Self {
-            total_sq_err: 0.0,
-            total_bias: 0.0,
-            max_err: 0.0,
-            total_blocks: 0,
-            gt_5pct: 0,
-            gt_10pct: 0,
-            gt_20pct: 0,
-        }
-    }
-
-    fn update(&mut self, err: f64) {
-        self.total_sq_err += err * err;
-        self.total_bias += err;
-        self.total_blocks += 1;
-        let abs_err = err.abs();
-        if abs_err > self.max_err {
-            self.max_err = abs_err;
-        }
-        if abs_err > BINS_5PCT {
-            self.gt_5pct += 1;
-        }
-        if abs_err > BINS_10PCT {
-            self.gt_10pct += 1;
-        }
-        if abs_err > BINS_20PCT {
-            self.gt_20pct += 1;
-        }
-    }
-
-    fn rmse_pct(&self) -> f64 {
-        bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
-    }
-
-    fn max_pct(&self) -> f64 {
-        bins_to_pct(self.max_err)
-    }
-
-    fn bias(&self) -> f64 {
-        self.total_bias / self.total_blocks as f64
-    }
-}
-
-struct BlockData {
-    full_hist: Histogram,
-    /// (bin_index, leading_digit) for outputs that are round values.
-    round_outputs: Vec<(u16, u8)>,
-    high_bin: f64,
-    low_bin: f64,
-}
-
-fn main() {
-    let t0 = Instant::now();
-
-    let data_dir = std::env::var("BRK_DIR")
-        .map(PathBuf::from)
-        .unwrap_or_else(|_| {
-            let home = std::env::var("HOME").unwrap();
-            PathBuf::from(home).join(".brk")
-        });
-
-    let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
-    let total_heights = indexer.vecs.blocks.timestamp.len();
-
-    let manifest_dir = env!("CARGO_MANIFEST_DIR");
-
-    let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
-        &std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
-            .expect("Failed to read height_price_ohlc.json"),
-    )
-    .expect("Failed to parse height OHLC");
-
-    let height_bands: Vec<(f64, f64)> = height_ohlc
-        .iter()
-        .map(|ohlc| {
-            let high = ohlc[1];
-            let low = ohlc[2];
-            if high > 0.0 && low > 0.0 {
-                (cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
-            } else {
-                (0.0, 0.0)
-            }
-        })
-        .collect();
-
-    let sweep_start: usize = 575_000;
-
-    // Phase 1: precompute per-block data in a single pass over the indexer.
-    eprintln!("Phase 1: precomputing block data...");
-
-    let total_txs = indexer.vecs.transactions.txid.len();
-    let total_outputs = indexer.vecs.outputs.value.len();
-
-    let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
-    let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
-
-    let ref_config = Config::default();
-    let total_blocks = total_heights - sweep_start;
-    let mut blocks: Vec<BlockData> = Vec::with_capacity(total_blocks);
-
-    for h in sweep_start..total_heights {
-        let ft = first_tx_index[h];
-        let next_ft = first_tx_index
-            .get(h + 1)
-            .copied()
-            .unwrap_or(TxIndex::from(total_txs));
-
-        let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
-            indexer
-                .vecs
-                .transactions
-                .first_txout_index
-                .collect_one(ft + 1)
-                .unwrap()
-                .to_usize()
-        } else {
-            out_first
-                .get(h + 1)
-                .copied()
-                .unwrap_or(TxOutIndex::from(total_outputs))
-                .to_usize()
-        };
-        let out_end = out_first
-            .get(h + 1)
-            .copied()
-            .unwrap_or(TxOutIndex::from(total_outputs))
-            .to_usize();
-
-        let values: Vec<Sats> = indexer
-            .vecs
-            .outputs
-            .value
-            .collect_range_at(out_start, out_end);
-        let output_types: Vec<OutputType> = indexer
-            .vecs
-            .outputs
-            .output_type
-            .collect_range_at(out_start, out_end);
-
-        let mut full_hist = Histogram::zeros();
-        let mut round_outputs = Vec::new();
-
-        for (sats, output_type) in values.into_iter().zip(output_types) {
-            if ref_config.excluded_output_types.contains(&output_type) {
-                continue;
-            }
-            if *sats < ref_config.min_sats {
-                continue;
-            }
-            if let Some(bin) = sats_to_bin(sats) {
-                full_hist.increment(bin);
-                if is_round(*sats) {
-                    let d = leading_digit(*sats);
-                    if (1..=9).contains(&d) {
-                        round_outputs.push((bin as u16, d));
-                    }
-                }
-            }
-        }
-
-        let (high_bin, low_bin) = if h < height_bands.len() {
-            height_bands[h]
-        } else {
-            (0.0, 0.0)
-        };
-
-        blocks.push(BlockData {
-            full_hist,
-            round_outputs,
-            high_bin,
-            low_bin,
-        });
-
-        if (h - sweep_start).is_multiple_of(50_000) {
-            eprint!(
-                "\r  {}/{} ({:.0}%)",
-                h - sweep_start,
-                total_blocks,
-                (h - sweep_start) as f64 / total_blocks as f64 * 100.0
-            );
-        }
-    }
-
-    let mem_hists = blocks.len() * std::mem::size_of::<Histogram>();
-    let mem_rounds: usize = blocks.iter().map(|b| b.round_outputs.len() * 3).sum();
-    eprintln!(
-        "\r  {} blocks precomputed ({:.1} GB hists + {:.0} MB rounds) in {:.1}s",
-        blocks.len(),
-        mem_hists as f64 / 1e9,
-        mem_rounds as f64 / 1e6,
-        t0.elapsed().as_secs_f64()
-    );
-
-    // Phase 2: sweep digit masks in parallel.
-    // Always filter digit 1 (powers of 10), sweep digits 2-9.
-    let base_mask: u16 = 1 << 0; // digit 1 always on
-    let num_masks: usize = 256; // 2^8 subsets of {2,...,9}
-    let num_threads = std::thread::available_parallelism()
-        .map(|n| n.get())
-        .unwrap_or(8);
-    eprintln!(
-        "Phase 2: sweeping {} masks across {} threads...",
-        num_masks, num_threads
-    );
-
-    let t1 = Instant::now();
-    let blocks = &blocks; // shared reference for threads
-
-    let all_results: Vec<(u16, Stats)> = std::thread::scope(|s| {
-        let masks_per_thread = num_masks.div_ceil(num_threads);
-
-        let handles: Vec<_> = (0..num_threads)
-            .map(|t| {
-                s.spawn(move || {
-                    let mask_start = t * masks_per_thread;
-                    let mask_end = ((t + 1) * masks_per_thread).min(num_masks);
-                    let mut results = Vec::with_capacity(mask_end - mask_start);
-
-                    for idx in mask_start..mask_end {
-                        // Shift idx bits into positions 1-8 (digits 2-9) and add base_mask (digit 1).
-                        let mask = base_mask | ((idx as u16) << 1);
-                        let mut oracle = Oracle::new(
-                            seed_bin(sweep_start),
-                            Config {
-                                exclude_common_round_values: false,
-                                ..Default::default()
-                            },
-                        );
-                        let mut stats = Stats::new();
-
-                        for bd in blocks.iter() {
-                            let mut hist = bd.full_hist.clone();
-                            for &(bin, digit) in &bd.round_outputs {
-                                if mask & (1 << (digit - 1)) != 0 {
-                                    hist[bin as usize] -= 1;
-                                }
-                            }
-
-                            let ref_bin = oracle.process_histogram(&hist);
-
-                            if bd.high_bin > 0.0 && bd.low_bin > 0.0 {
-                                let err = if ref_bin < bd.high_bin {
-                                    ref_bin - bd.high_bin
-                                } else if ref_bin > bd.low_bin {
-                                    ref_bin - bd.low_bin
-                                } else {
-                                    0.0
-                                };
-                                stats.update(err);
-                            }
-                        }
-
-                        results.push((mask, stats));
-                    }
-
-                    results
-                })
-            })
-            .collect();
-
-        handles
-            .into_iter()
-            .flat_map(|h| h.join().unwrap())
-            .collect()
-    });
-
-    eprintln!("  Done in {:.1}s.", t1.elapsed().as_secs_f64());
-
-    // Sort by RMSE.
-    let mut results: Vec<&(u16, Stats)> = all_results.iter().collect();
-    results.sort_by(|a, b| a.1.rmse_pct().partial_cmp(&b.1.rmse_pct()).unwrap());
-
-    // Print top 20.
-    println!();
-    println!("Top 20 (by RMSE):");
-    println!(
-        "{:>4} {:>12} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
-        "#", "Digits", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
-    );
-    println!("{}", "-".repeat(70));
-    for (rank, (mask, s)) in results.iter().take(20).enumerate() {
-        println!(
-            "{:>4} {:>12} {:>8.3}% {:>8.1}% {:>6} {:>6} {:>6} {:>+8.2}",
-            rank + 1,
-            mask_label(*mask),
-            s.rmse_pct(),
-            s.max_pct(),
-            s.gt_5pct,
-            s.gt_10pct,
-            s.gt_20pct,
-            s.bias()
-        );
-    }
-
-    // Print bottom 5.
-    println!();
-    println!("Bottom 5 (worst):");
-    println!(
-        "{:>4} {:>12} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
-        "#", "Digits", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
-    );
-    println!("{}", "-".repeat(70));
-    for (mask, s) in results.iter().rev().take(5) {
-        println!(
-            "{:>4} {:>12} {:>8.3}% {:>8.1}% {:>6} {:>6} {:>6} {:>+8.2}",
-            "",
-            mask_label(*mask),
-            s.rmse_pct(),
-            s.max_pct(),
-            s.gt_5pct,
-            s.gt_10pct,
-            s.gt_20pct,
-            s.bias()
-        );
-    }
-
-    // Print current config {1,2,3,5} for reference.
-    let current_mask: u16 = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 4); // digits 1,2,3,5
-    let current_stats = all_results
-        .iter()
-        .find(|(m, _)| *m == current_mask)
-        .map(|(_, s)| s)
-        .unwrap();
-    let current_rank = results
-        .iter()
-        .position(|(m, _)| *m == current_mask)
-        .unwrap();
-    println!();
-    println!(
-        "Current {{1,2,3,5}} = rank {}/{}: RMSE {:.3}%, Max {:.1}%, >5%: {}, >10%: {}, >20%: {}",
-        current_rank + 1,
-        num_masks,
-        current_stats.rmse_pct(),
-        current_stats.max_pct(),
-        current_stats.gt_5pct,
-        current_stats.gt_10pct,
-        current_stats.gt_20pct,
-    );
-
-    println!("\nTotal time: {:.1}s", t0.elapsed().as_secs_f64());
-}
@@ -1,452 +0,0 @@
-//! Sweep round-value tolerance to find optimal rounding threshold.
-//!
-//! Tests different tolerance percentages (0%, 0.01%, 0.1%, 1%, etc.) for
-//! detecting round BTC amounts, combined with several digit filter masks.
-//!
-//! Phase 1: single pass over indexer, store per-output relative errors.
-//! Phase 2: sweep tolerance × mask combos across CPU cores.
-//!
-//! Run with: cargo run -p brk_oracle --example sweep_tolerance --release
-
-use std::path::PathBuf;
-use std::time::Instant;
-
-use brk_indexer::Indexer;
-use brk_oracle::{Config, Histogram, Oracle, PRICES, cents_to_bin, sats_to_bin};
-use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
-use vecdb::{AnyVec, ReadableVec, VecIndex};
-
-const BINS_5PCT: f64 = 4.24;
-const BINS_10PCT: f64 = 8.28;
-const BINS_20PCT: f64 = 15.84;
-
-fn bins_to_pct(bins: f64) -> f64 {
-    (10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
-}
-
-fn seed_bin(start_height: usize) -> f64 {
-    let price: f64 = PRICES
-        .lines()
-        .nth(start_height - 1)
-        .expect("prices.txt too short")
-        .parse()
-        .expect("Failed to parse seed price");
-    cents_to_bin(price * 100.0)
-}
-
-fn leading_digit(sats: u64) -> u8 {
-    let log = (sats as f64).log10();
-    let magnitude = 10.0_f64.powf(log.floor());
-    let d = (sats as f64 / magnitude).round() as u8;
-    if d >= 10 { 1 } else { d }
-}
-
-/// Returns the relative error of `sats` from its nearest round value (d × 10^n).
-/// e.g. 10_050 → leading=1, round_val=10_000, rel_err = 50/10000 = 0.005
-fn relative_roundness(sats: u64) -> f64 {
-    let log = (sats as f64).log10();
-    let magnitude = 10.0_f64.powf(log.floor());
-    let leading = (sats as f64 / magnitude).round();
-    let round_val = leading * magnitude;
-    (sats as f64 - round_val).abs() / round_val
-}
-
-struct Stats {
-    total_sq_err: f64,
-    total_bias: f64,
-    max_err: f64,
-    total_blocks: u64,
-    gt_5pct: u64,
-    gt_10pct: u64,
-    gt_20pct: u64,
-}
-
-impl Stats {
-    fn new() -> Self {
-        Self {
-            total_sq_err: 0.0,
-            total_bias: 0.0,
-            max_err: 0.0,
-            total_blocks: 0,
-            gt_5pct: 0,
-            gt_10pct: 0,
-            gt_20pct: 0,
-        }
-    }
-
-    fn update(&mut self, err: f64) {
-        self.total_sq_err += err * err;
-        self.total_bias += err;
-        self.total_blocks += 1;
-        let abs_err = err.abs();
-        if abs_err > self.max_err {
-            self.max_err = abs_err;
-        }
-        if abs_err > BINS_5PCT {
-            self.gt_5pct += 1;
-        }
-        if abs_err > BINS_10PCT {
-            self.gt_10pct += 1;
-        }
-        if abs_err > BINS_20PCT {
-            self.gt_20pct += 1;
-        }
-    }
-
-    fn rmse_pct(&self) -> f64 {
-        bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
-    }
-
-    fn max_pct(&self) -> f64 {
-        bins_to_pct(self.max_err)
-    }
-
-    fn bias(&self) -> f64 {
-        self.total_bias / self.total_blocks as f64
-    }
-}
-
-/// Per-output data: bin index, leading digit, relative error from round value.
-struct RoundOutput {
-    bin: u16,
-    digit: u8,
-    rel_err: f32, // f32 is plenty of precision, saves memory
-}
-
-struct BlockData {
-    full_hist: Histogram,
-    round_outputs: Vec<RoundOutput>,
-    high_bin: f64,
-    low_bin: f64,
-}
-
-fn main() {
-    let t0 = Instant::now();
-
-    let data_dir = std::env::var("BRK_DIR")
-        .map(PathBuf::from)
-        .unwrap_or_else(|_| {
-            let home = std::env::var("HOME").unwrap();
-            PathBuf::from(home).join(".brk")
-        });
-
-    let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
-    let total_heights = indexer.vecs.blocks.timestamp.len();
-
-    let manifest_dir = env!("CARGO_MANIFEST_DIR");
-
-    let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
-        &std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
-            .expect("Failed to read height_price_ohlc.json"),
-    )
-    .expect("Failed to parse height OHLC");
-
-    let height_bands: Vec<(f64, f64)> = height_ohlc
-        .iter()
-        .map(|ohlc| {
-            let high = ohlc[1];
-            let low = ohlc[2];
-            if high > 0.0 && low > 0.0 {
-                (cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
-            } else {
-                (0.0, 0.0)
-            }
-        })
-        .collect();
-
-    let sweep_start: usize = 575_000;
-
-    // Phase 1: precompute per-block data.
-    // Store all potentially-round outputs with their relative error so we can
-    // filter at different tolerance thresholds in Phase 2.
-    eprintln!("Phase 1: precomputing block data...");
-
-    let total_txs = indexer.vecs.transactions.txid.len();
-    let total_outputs = indexer.vecs.outputs.value.len();
-
-    let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
-    let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
-
-    let ref_config = Config::default();
-    let total_blocks = total_heights - sweep_start;
-    let mut blocks: Vec<BlockData> = Vec::with_capacity(total_blocks);
-
-    // Use the widest tolerance we'll test (5%) to decide what to store.
-    // Outputs beyond 5% relative error will never be filtered at any tolerance.
-    let max_tolerance: f64 = 0.05;
-
-    for h in sweep_start..total_heights {
-        let ft = first_tx_index[h];
-        let next_ft = first_tx_index
-            .get(h + 1)
-            .copied()
-            .unwrap_or(TxIndex::from(total_txs));
-
-        let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
-            indexer
-                .vecs
-                .transactions
-                .first_txout_index
-                .collect_one(ft + 1)
-                .unwrap()
-                .to_usize()
-        } else {
-            out_first
-                .get(h + 1)
-                .copied()
-                .unwrap_or(TxOutIndex::from(total_outputs))
-                .to_usize()
-        };
-        let out_end = out_first
-            .get(h + 1)
-            .copied()
-            .unwrap_or(TxOutIndex::from(total_outputs))
-            .to_usize();
-
-        let values: Vec<Sats> = indexer
-            .vecs
-            .outputs
-            .value
-            .collect_range_at(out_start, out_end);
-        let output_types: Vec<OutputType> = indexer
-            .vecs
-            .outputs
-            .output_type
-            .collect_range_at(out_start, out_end);
-
-        let mut full_hist = Histogram::zeros();
-        let mut round_outputs = Vec::new();
-
-        for (sats, output_type) in values.into_iter().zip(output_types) {
-            if ref_config.excluded_output_types.contains(&output_type) {
-                continue;
-            }
-            if *sats < ref_config.min_sats {
-                continue;
-            }
-            if let Some(bin) = sats_to_bin(sats) {
-                full_hist.increment(bin);
-                let d = leading_digit(*sats);
-                if (1..=9).contains(&d) {
-                    let rel_err = relative_roundness(*sats);
-                    if rel_err <= max_tolerance {
-                        round_outputs.push(RoundOutput {
-                            bin: bin as u16,
-                            digit: d,
-                            rel_err: rel_err as f32,
-                        });
-                    }
-                }
-            }
-        }
-
-        let (high_bin, low_bin) = if h < height_bands.len() {
-            height_bands[h]
-        } else {
-            (0.0, 0.0)
-        };
-
-        blocks.push(BlockData {
-            full_hist,
-            round_outputs,
-            high_bin,
-            low_bin,
-        });
-
-        if (h - sweep_start).is_multiple_of(50_000) {
-            eprint!(
-                "\r  {}/{} ({:.0}%)",
-                h - sweep_start,
-                total_blocks,
-                (h - sweep_start) as f64 / total_blocks as f64 * 100.0
-            );
-        }
-    }
-
-    let mem_hists = blocks.len() * std::mem::size_of::<Histogram>();
-    let mem_rounds: usize = blocks
-        .iter()
-        .map(|b| b.round_outputs.len() * std::mem::size_of::<RoundOutput>())
-        .sum();
-    eprintln!(
-        "\r  {} blocks precomputed ({:.1} GB hists + {:.0} MB rounds) in {:.1}s",
-        blocks.len(),
-        mem_hists as f64 / 1e9,
-        mem_rounds as f64 / 1e6,
-        t0.elapsed().as_secs_f64()
-    );
-
-    // Phase 2: sweep tolerance × mask combos.
-    // Tolerances as fractions (not percentages).
-    let tolerances: &[(f64, &str)] = &[
-        (0.0, "0%"),
-        (0.0001, "0.01%"),
-        (0.0005, "0.05%"),
-        (0.001, "0.1%"),
-        (0.002, "0.2%"),
-        (0.005, "0.5%"),
-        (0.01, "1%"),
-        (0.02, "2%"),
-        (0.05, "5%"),
-    ];
-
-    //                              987654321
-    let masks: &[(u16, &str)] = &[
-        (0b0_0000_0000, "none"),
-        (0b0_0001_0111, "{1,2,3,5}"),
-        (0b0_0001_1111, "{1,2,3,4,5}"),
-        (0b0_0011_0111, "{1,2,3,5,6}"),
-        (0b0_0111_0111, "{1,2,3,5,6,7}"),
-        (0b1_1111_1111, "{1-9}"),
-    ];
-
-    let num_configs = tolerances.len() * masks.len();
-    let num_threads = std::thread::available_parallelism()
-        .map(|n| n.get())
-        .unwrap_or(8);
-    eprintln!(
-        "Phase 2: sweeping {} configs ({} tolerances × {} masks) across {} threads...",
-        num_configs,
-        tolerances.len(),
-        masks.len(),
-        num_threads
-    );
-
-    let t1 = Instant::now();
-    let blocks = &blocks;
-    let tolerances_ref = tolerances;
-    let masks_ref = masks;
-
-    let all_results: Vec<(usize, usize, Stats)> = std::thread::scope(|s| {
-        let configs_per_thread = num_configs.div_ceil(num_threads);
-
-        let handles: Vec<_> = (0..num_threads)
-            .map(|t| {
-                s.spawn(move || {
-                    let cfg_start = t * configs_per_thread;
-                    let cfg_end = ((t + 1) * configs_per_thread).min(num_configs);
-                    if cfg_start >= cfg_end {
-                        return vec![];
-                    }
-                    let mut results = Vec::with_capacity(cfg_end - cfg_start);
-
-                    for cfg_idx in cfg_start..cfg_end {
-                        let ti = cfg_idx / masks_ref.len();
-                        let mi = cfg_idx % masks_ref.len();
-                        let (tolerance, _) = tolerances_ref[ti];
-                        let (mask, _) = masks_ref[mi];
-
-                        let mut oracle = Oracle::new(
-                            seed_bin(sweep_start),
-                            Config {
-                                exclude_common_round_values: false,
-                                ..Default::default()
-                            },
-                        );
-                        let mut stats = Stats::new();
-
-                        for bd in blocks.iter() {
-                            let mut hist = bd.full_hist.clone();
-
-                            // Remove outputs matching this tolerance + mask.
-                            let tol_f32 = tolerance as f32;
-                            for ro in &bd.round_outputs {
-                                if mask & (1 << (ro.digit - 1)) != 0 && ro.rel_err <= tol_f32 {
-                                    hist[ro.bin as usize] -= 1;
-                                }
-                            }
-
-                            let ref_bin = oracle.process_histogram(&hist);
-
-                            if bd.high_bin > 0.0 && bd.low_bin > 0.0 {
-                                let err = if ref_bin < bd.high_bin {
-                                    ref_bin - bd.high_bin
-                                } else if ref_bin > bd.low_bin {
-                                    ref_bin - bd.low_bin
-                                } else {
-                                    0.0
-                                };
-                                stats.update(err);
-                            }
-                        }
-
-                        results.push((ti, mi, stats));
-                    }
-
-                    results
-                })
-            })
-            .collect();
-
-        handles
-            .into_iter()
-            .flat_map(|h| h.join().unwrap())
-            .collect()
-    });
-
-    eprintln!("  Done in {:.1}s.", t1.elapsed().as_secs_f64());
-
-    // Print results grouped by tolerance.
-    println!();
-    println!(
-        "{:>8} {:>16} {:>8} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
-        "Tol", "Digits", "Blocks", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
-    );
-    println!("{}", "-".repeat(88));
-
-    for (ti, &(_, tol_label)) in tolerances.iter().enumerate() {
-        for (mi, &(_, mask_label)) in masks.iter().enumerate() {
-            let (_, _, stats) = all_results
-                .iter()
-                .find(|(t, m, _)| *t == ti && *m == mi)
-                .unwrap();
-            println!(
-                "{:>8} {:>16} {:>8} {:>8.3}% {:>8.1}% {:>6} {:>6} {:>6} {:>+8.2}",
-                tol_label,
-                mask_label,
-                stats.total_blocks,
-                stats.rmse_pct(),
-                stats.max_pct(),
-                stats.gt_5pct,
-                stats.gt_10pct,
-                stats.gt_20pct,
-                stats.bias()
-            );
-        }
-        println!();
-    }
-
-    // Find overall best config by RMSE.
-    let best = all_results
-        .iter()
-        .min_by(|a, b| a.2.rmse_pct().partial_cmp(&b.2.rmse_pct()).unwrap())
-        .unwrap();
-    let (bti, bmi, bs) = best;
-    println!(
-        "Best: tolerance={}, digits={} → RMSE {:.3}%, Max {:.1}%, >5%: {}, >10%: {}, >20%: {}",
-        tolerances[*bti].1,
-        masks[*bmi].1,
-        bs.rmse_pct(),
-        bs.max_pct(),
-        bs.gt_5pct,
-        bs.gt_10pct,
-        bs.gt_20pct,
-    );
-
-    // Show current config for reference.
-    let current = all_results
-        .iter()
-        .find(|(t, m, _)| tolerances[*t].0 == 0.001 && masks[*m].0 == 0b0_0011_0111)
-        .unwrap();
-    let (_, _, cs) = current;
-    println!(
-        "Current: tolerance=0.1%, digits={{1,2,3,5,6}} → RMSE {:.3}%, Max {:.1}%, >5%: {}, >10%: {}, >20%: {}",
-        cs.rmse_pct(),
-        cs.max_pct(),
-        cs.gt_5pct,
-        cs.gt_10pct,
-        cs.gt_20pct,
-    );
-
-    println!("\nTotal time: {:.1}s", t0.elapsed().as_secs_f64());
-}
@@ -1,286 +0,0 @@
-//! Validate oracle accuracy against exchange reference prices.
-//!
-//! Run with: cargo run -p brk_oracle --example validate --release
-//!
-//! Requires:
-//! - ~/.brk indexed blockchain data (brk_indexer)
-//! - examples/height_price_ohlc.json (per-height [open, high, low, close] in dollars)
-
-use std::path::PathBuf;
-
-use brk_indexer::Indexer;
-use brk_oracle::{
-    Config, Histogram, Oracle, PRICES, START_HEIGHT, cents_to_bin, default_eligible_bin,
-};
-use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
-use vecdb::{AnyVec, ReadableVec, VecIndex};
-
-const BINS_5PCT: f64 = 4.24;
-const BINS_10PCT: f64 = 8.28;
-const BINS_20PCT: f64 = 15.84;
-
-fn bins_to_pct(bins: f64) -> f64 {
-    (10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
-}
-
-fn seed_bin(start_height: usize) -> f64 {
-    let price: f64 = PRICES
-        .lines()
-        .nth(start_height - 1)
-        .expect("prices.txt too short")
-        .parse()
-        .expect("Failed to parse seed price");
-    cents_to_bin(price * 100.0)
-}
-
-struct Stats {
-    total_sq_err: f64,
-    total_bias: f64,
-    max_err: f64,
-    total_blocks: u64,
-    gt_5pct: u64,
-    gt_10pct: u64,
-    gt_20pct: u64,
-}
-
-impl Stats {
-    fn new() -> Self {
-        Self {
-            total_sq_err: 0.0,
-            total_bias: 0.0,
-            max_err: 0.0,
-            total_blocks: 0,
-            gt_5pct: 0,
-            gt_10pct: 0,
-            gt_20pct: 0,
-        }
-    }
-
-    fn update(&mut self, err: f64) {
-        self.total_sq_err += err * err;
-        self.total_bias += err;
-        self.total_blocks += 1;
-        let abs_err = err.abs();
-        if abs_err > self.max_err {
-            self.max_err = abs_err;
-        }
-        if abs_err > BINS_5PCT {
-            self.gt_5pct += 1;
-        }
-        if abs_err > BINS_10PCT {
-            self.gt_10pct += 1;
-        }
-        if abs_err > BINS_20PCT {
-            self.gt_20pct += 1;
-        }
-    }
-
-    fn rmse_pct(&self) -> f64 {
-        bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
-    }
-
-    fn max_pct(&self) -> f64 {
-        bins_to_pct(self.max_err)
-    }
-
-    fn bias(&self) -> f64 {
-        self.total_bias / self.total_blocks as f64
-    }
-}
-
-struct Run {
-    label: &'static str,
-    start_height: usize,
-    oracle: Option<Oracle>,
-    stats: Stats,
-}
-
-fn main() {
-    let data_dir = std::env::var("BRK_DIR")
-        .map(PathBuf::from)
-        .unwrap_or_else(|_| {
-            let home = std::env::var("HOME").unwrap();
-            PathBuf::from(home).join(".brk")
-        });
-
-    let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
-    let total_heights = indexer.vecs.blocks.timestamp.len();
-
-    let manifest_dir = env!("CARGO_MANIFEST_DIR");
-
-    let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
-        &std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
-            .expect("Failed to read height_price_ohlc.json"),
-    )
-    .expect("Failed to parse height OHLC");
-
-    // Pre-compute per-height (high_bin, low_bin) tolerance band.
-    let height_bands: Vec<(f64, f64)> = height_ohlc
-        .iter()
-        .map(|ohlc| {
-            let high = ohlc[1];
-            let low = ohlc[2];
-            if high > 0.0 && low > 0.0 {
-                (cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
-            } else {
-                (0.0, 0.0)
-            }
-        })
-        .collect();
-
-    let mut runs = vec![
-        Run {
-            label: "w12 @ 575k",
-            start_height: 575_000,
-            oracle: None,
-            stats: Stats::new(),
-        },
-        Run {
-            label: "w12 @ 600k",
-            start_height: 600_000,
-            oracle: None,
-            stats: Stats::new(),
-        },
-        Run {
-            label: "w12 @ 630k",
-            start_height: 630_000,
-            oracle: None,
-            stats: Stats::new(),
-        },
-    ];
-
-    // Build per-block filtered histograms from the indexer, feeding all oracles in one pass.
-    let total_txs = indexer.vecs.transactions.txid.len();
-    let total_outputs = indexer.vecs.outputs.value.len();
-
-    // Pre-collect height-indexed vecs (small). Transaction-indexed vecs are too large.
-    let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
-    let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
-
-    for h in START_HEIGHT..total_heights {
-        let ft = first_tx_index[h];
-        let next_ft = first_tx_index
-            .get(h + 1)
-            .copied()
-            .unwrap_or(TxIndex::from(total_txs));
-
-        let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
-            indexer
-                .vecs
-                .transactions
-                .first_txout_index
-                .collect_one(ft + 1)
-                .unwrap()
-                .to_usize()
-        } else {
-            out_first
-                .get(h + 1)
-                .copied()
-                .unwrap_or(TxOutIndex::from(total_outputs))
-                .to_usize()
-        };
-        let out_end = out_first
-            .get(h + 1)
-            .copied()
-            .unwrap_or(TxOutIndex::from(total_outputs))
-            .to_usize();
-
-        // Build filtered histogram once for all oracles.
-        let values: Vec<Sats> = indexer
-            .vecs
-            .outputs
-            .value
-            .collect_range_at(out_start, out_end);
-        let output_types: Vec<OutputType> = indexer
-            .vecs
-            .outputs
-            .output_type
-            .collect_range_at(out_start, out_end);
-
-        let mut hist = Histogram::zeros();
-        for (sats, output_type) in values.into_iter().zip(output_types) {
-            if let Some(bin) = default_eligible_bin(sats, output_type) {
-                hist.increment(bin as usize);
-            }
-        }
-
-        for run in &mut runs {
-            if h < run.start_height {
-                continue;
-            }
-            if run.oracle.is_none() {
-                let config = Config::default();
-                run.oracle = Some(Oracle::new(seed_bin(run.start_height), config));
-            }
-            let ref_bin = run.oracle.as_mut().unwrap().process_histogram(&hist);
-
-            if h < height_bands.len() {
-                let (high_bin, low_bin) = height_bands[h];
-                if high_bin > 0.0 && low_bin > 0.0 {
-                    let err = if ref_bin < high_bin {
-                        ref_bin - high_bin
-                    } else if ref_bin > low_bin {
-                        ref_bin - low_bin
-                    } else {
-                        0.0
-                    };
-                    run.stats.update(err);
-                }
-            }
-        }
-    }
-
-    // Print results.
-    println!();
-    println!(
-        "{:<14} {:>8} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
-        "Config", "Blocks", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
-    );
-    println!("{}", "-".repeat(72));
-    for run in &runs {
-        let s = &run.stats;
-        println!(
-            "{:<14} {:>8}   {:>7.2}%   {:>7.1}% {:>6} {:>6} {:>6} {:>+8.2}",
-            run.label,
-            s.total_blocks,
-            s.rmse_pct(),
-            s.max_pct(),
-            s.gt_5pct,
-            s.gt_10pct,
-            s.gt_20pct,
-            s.bias()
-        );
-    }
-    println!();
-
-    // Verify exact counts against reference.
-    // Reference: trunc w12 @ 575k: 261 >5%, 40 >10%, 0 >20%
-    //            trunc w12 @ 600k: 174 >5%, 31 >10%, 0 >20%
-    //            trunc w12 @ 630k:  84 >5%,  9 >10%, 0 >20%
-    let expected: &[(&str, u64, u64, u64)] = &[
-        ("w12 @ 575k", 237, 22, 0),
-        ("w12 @ 600k", 152, 15, 0),
-        ("w12 @ 630k", 84, 9, 0),
-    ];
-
-    for (run, &(label, exp_5, exp_10, exp_20)) in runs.iter().zip(expected) {
-        let s = &run.stats;
-        assert_eq!(
-            s.gt_20pct, exp_20,
-            "{label}: expected {exp_20} blocks >20%, got {}",
-            s.gt_20pct
-        );
-        assert_eq!(
-            s.gt_10pct, exp_10,
-            "{label}: expected {exp_10} blocks >10%, got {}",
-            s.gt_10pct
-        );
-        assert_eq!(
-            s.gt_5pct, exp_5,
-            "{label}: expected {exp_5} blocks >5%, got {}",
-            s.gt_5pct
-        );
-    }
-
-    println!("All assertions passed!");
-}
@@ -16,7 +16,9 @@ pub use histogram::Histogram;
 /// so downstream consumers can invalidate cached results.
 pub const VERSION: u32 = 2;

-/// Pre-oracle dollar prices, one per line, heights 0..630_000.
+/// Pre-oracle dollar prices, one per line, heights 0..525_000. The last
+/// entry (height 524_999) seeds the oracle's first on-chain computation
+/// at `START_HEIGHT`.
 pub const PRICES: &str = include_str!("prices.txt");

 /// First height where the oracle computes from on-chain data.