From ee20175cbfa3a87a4be6710a225ee2471353dc33 Mon Sep 17 00:00:00 2001 From: nym21 Date: Sun, 24 May 2026 18:40:35 +0200 Subject: [PATCH] oracle: cleanup + split lib.rs --- crates/brk_computer/src/prices/compute.rs | 14 +- crates/brk_oracle/README.md | 56 +-- crates/brk_oracle/examples/determinism.rs | 28 +- crates/brk_oracle/examples/report.rs | 12 +- crates/brk_oracle/examples/report_from.rs | 10 +- crates/brk_oracle/src/config.rs | 44 +-- crates/brk_oracle/src/filter.rs | 75 ++++ crates/brk_oracle/src/lib.rs | 439 +++------------------- crates/brk_oracle/src/scale.rs | 72 ++++ crates/brk_oracle/src/shape.rs | 68 ++++ crates/brk_oracle/src/stencil.rs | 129 +++++++ crates/brk_oracle/src/window.rs | 74 ++++ 12 files changed, 545 insertions(+), 476 deletions(-) create mode 100644 crates/brk_oracle/src/filter.rs create mode 100644 crates/brk_oracle/src/scale.rs create mode 100644 crates/brk_oracle/src/shape.rs create mode 100644 crates/brk_oracle/src/stencil.rs create mode 100644 crates/brk_oracle/src/window.rs diff --git a/crates/brk_computer/src/prices/compute.rs b/crates/brk_computer/src/prices/compute.rs index 617a983ad..f62ca4447 100644 --- a/crates/brk_computer/src/prices/compute.rs +++ b/crates/brk_computer/src/prices/compute.rs @@ -3,7 +3,7 @@ use std::ops::Range; use brk_error::Result; use brk_indexer::{Indexer, Lengths}; use brk_oracle::{ - Config, HistogramRaw, Oracle, START_HEIGHT, START_HEIGHT_SLOW, bin_to_cents, cents_to_bin, + Config, HistogramRaw, Oracle, START_HEIGHT_FAST, START_HEIGHT_SLOW, bin_to_cents, cents_to_bin, for_each_round_dollar_bin, }; use brk_types::{Cents, OutputType, Sats, TxIndex, TxOutIndex}; @@ -124,18 +124,18 @@ impl Vecs { committed, total_heights ); - // Slow cold-start EMA up to START_HEIGHT, then switch to the fast - // mature-market EMA. Steady-state runs start past START_HEIGHT and skip + // Slow cold-start EMA up to START_HEIGHT_FAST, then switch to the fast + // mature-market EMA. Steady-state runs start past START_HEIGHT_FAST and skip // the slow segment entirely. let mut ref_bins = Vec::with_capacity(num_new); - if committed < START_HEIGHT { - let slow_end = START_HEIGHT.min(total_heights); + if committed < START_HEIGHT_FAST { + let slow_end = START_HEIGHT_FAST.min(total_heights); ref_bins.extend(Self::feed_blocks(&mut oracle, indexer, committed..slow_end, None)); - if slow_end == START_HEIGHT { + if slow_end == START_HEIGHT_FAST { oracle.reconfigure(Config::default()); } } - let fast_start = committed.max(START_HEIGHT); + let fast_start = committed.max(START_HEIGHT_FAST); if fast_start < total_heights { ref_bins.extend(Self::feed_blocks( &mut oracle, diff --git a/crates/brk_oracle/README.md b/crates/brk_oracle/README.md index b82120c5f..a8327c9a6 100644 --- a/crates/brk_oracle/README.md +++ b/crates/brk_oracle/README.md @@ -2,7 +2,7 @@ **Version 3** -Pure on-chain BTC/USD price oracle. No exchange feeds, no external APIs. Derives the bitcoin price from transaction data alone. Tracks block by block from height 470,000 (June 2017) onward. +Pure on-chain BTC/USD price oracle. No exchange feeds, no external APIs. Derives the bitcoin price from transaction data alone. Tracks block by block from height 340,000 (January 2015) onward. Inspired by [UTXOracle](https://utxo.live/oracle/) by [@SteveSimple](https://x.com/SteveSimple), which proved the concept. brk_oracle takes the same core insight and redesigns the algorithm for per-block resolution and rolling operation. See [comparison](#comparison-with-utxoracle) below. @@ -122,9 +122,9 @@ Parabolic interpolation between the best bin and its two neighbors refines the e The oracle consumes one pre-built histogram per block via `process_histogram(&hist)`, a `[u32; 2400]` bin-count array, and returns the updated reference bin. -The caller does the filtering when it builds the histogram. For each block it skips the coinbase, drops every output of a transaction carrying an `OP_RETURN` (and, below height 630,000, every output of a transaction with more than 100 outputs), then bins the rest. `default_eligible_bin(sats, output_type)` (or `Oracle::output_to_bin` for a non-default `Config`) applies the per-output rules: excluded script types, dust, and round-BTC values. It returns the bin index, or `None` for a filtered output. +The caller filters as it builds the histogram, applying the [step 1](#1-filter-outputs) rules. Two helpers are exported for this: `eligible_bin(sats, output_type)` returns an output's bin index, or `None` if filtered, and `for_each_round_dollar_bin` wraps it with the per-transaction drops (coinbase, OP_RETURN, the >100-output cap below height 630,000) for callers holding a whole transaction's outputs. -The initial seed must be close to the real price at the starting height. The crate includes a `PRICES` constant with exchange prices for heights 0..470,000. Its last entry, height 469,999 (one below `START_HEIGHT_SLOW`), seeds the oracle's first on-chain computation at height 470,000. +The initial seed must be close to the real price at the starting height. The crate includes a `PRICES` constant with exchange prices for heights 0..340,000. Its last entry, height 339,999 (one below `START_HEIGHT_SLOW`), seeds the oracle's first on-chain computation at height 340,000. ## Configuration @@ -135,11 +135,11 @@ All parameters via `Config` with sensible defaults: | `alpha` | 2/7 | EMA decay rate (~6-block span) | | `window_size` | 12 | Ring buffer depth in blocks | | `search_below` / `search_above` | 12 / 11 | Search window around previous estimate (bins) | -| `min_sats` | 1,000 | Dust threshold | -| `exclude_common_round_values` | true | Filter d × 10ⁿ (d ∈ {1,2,3,5,6}) to prevent false stencil matches | -| `excluded_output_types` | P2TR | Script types dominated by protocol activity | +| `shape_weight` | 0 | Shape-anchoring restoring-force weight. 0 disables it. `Config::slow()` sets 8 for the cold-start | -Between height 470,000 and 508,000 the oracle runs a slower cold-start configuration (`Config::slow()`: `alpha` = 0.10, ~19-block span, `window_size` = 40). The thinner pre-2018 output mix lets the fast default octave-lock onto the round-dollar half-price pattern, and the slow EMA resists that drift. At height 508,000 `Oracle::reconfigure` switches to the defaults above. `Config::for_height` returns the right one for any height. +The output-filtering rules (1,000-sat dust floor, excluded P2TR, round-BTC exclusion) are not `Config` parameters: they are constants in the `filter` module so the indexer, per-request reconstruction, and mempool all bin identically. See [Input](#input). + +Between heights 340,000 and 508,000 the oracle runs a slower cold-start configuration (`Config::slow()`: `alpha` = 0.10, ~19-block span, `window_size` = 40, `shape_weight` = 8). In the thin pre-2018 output mix the fast default octave-locks onto the round-dollar half-price pattern, so the slow EMA and the shape-anchoring restoring force resist that drift. At 508,000 `Oracle::reconfigure` switches to the defaults above (`shape_weight` back to 0), and `Config::for_height` returns the right one for any height. ## Comparison with UTXOracle @@ -154,29 +154,29 @@ Between height 470,000 and 508,000 the oracle runs a slower cold-start configura | Stencil | 19 round-USD offsets ($1 to $10k), each normalized to its own peak | 803-point Gaussian + weighted spike template targeting 17 round-USD amounts | | Round BTC handling | Excluded from histogram entirely | Histogram bins smoothed by averaging neighbors | | Output filtering | Per-tx OP_RETURN drop, then per-output: script type, dust threshold, round BTC | Per-tx: not coinbase, no OP_RETURN, exactly 2 outputs, ≤5 inputs, no same-day inputs, ≤500-byte witness | -| Validated from | Height 470,000 (June 2017) | Dec 15, 2023 | +| Validated from | Height 340,000 (January 2015) | Dec 15, 2023 | | Language | Rust | Python | | Dependencies | None (pure computation, caller provides block data) | bitcoin-cli + direct blk file reads | | Bins per decade | 200 | 200 | ## Accuracy -Tested over 466,251 blocks (heights 470,000 to 950,694, as of May 2026) against exchange OHLC data. Error is measured per block as distance from the oracle estimate to the exchange high/low range at that height. If the oracle falls within the range, the error is zero. +Tested over 596,251 blocks (heights 340,000 to 950,800, as of May 2026) against exchange OHLC data. Error is measured per block as distance from the oracle estimate to the exchange high/low range at that height. If the oracle falls within the range, the error is zero. ### Per-block | Metric | Value | |--------|-------| -| Median error | 0.12% | -| 95th percentile | 0.91% | -| 99th percentile | 2.6% | -| 99.9th percentile | 12.0% | -| RMSE | 0.85% | -| Max error | 47.7% | -| Bias | +0.03 bins (essentially zero) | -| Blocks > 5% error | 1,605 (0.344%) | -| Blocks > 10% error | 609 | -| Blocks > 20% error | 189 | +| Median error | 0.15% | +| 95th percentile | 1.2% | +| 99th percentile | 3.4% | +| 99.9th percentile | 15.6% | +| RMSE | 0.97% | +| Max error | 33.8% | +| Bias | +0.05 bins (essentially zero) | +| Blocks > 5% error | 3,235 (0.543%) | +| Blocks > 10% error | 1,324 | +| Blocks > 20% error | 154 | ### Daily candles @@ -184,17 +184,19 @@ Oracle daily OHLC built from per-block prices vs exchange daily OHLC: | | Median | RMSE | Max | |-------|--------|------|-----| -| Open | 0.22% | 0.90% | 21.1% | -| High | 0.55% | 1.08% | 15.4% | -| Low | 0.54% | 1.65% | 20.5% | -| Close | 0.26% | 0.98% | 21.1% | +| Open | 0.24% | 1.07% | 29.1% | +| High | 0.58% | 1.48% | 27.3% | +| Low | 0.53% | 1.95% | 55.1% | +| Close | 0.27% | 1.18% | 29.2% | ### By year | Year | Blocks | Median | RMSE | Max | >5% | >10% | >20% | Price range | |------|--------|--------|------|-----|-----|------|------|-------------| -| 2017 | 31,961 | 0.39% | 2.37% | 47.7% | 980 | 373 | 116 | $1,758–$19,892 | -| 2018 | 54,531 | 0.18% | 1.35% | 32.2% | 394 | 207 | 73 | $3,129–$17,178 | +| 2015 | 51,249 | 0.26% | 1.67% | 33.8% | 916 | 449 | 25 | $198–$500 | +| 2016 | 54,753 | 0.33% | 0.80% | 16.9% | 150 | 33 | 0 | $351–$989 | +| 2017 | 55,959 | 0.45% | 2.05% | 28.6% | 1,527 | 606 | 67 | $0–$19,892 | +| 2018 | 54,531 | 0.18% | 1.31% | 31.6% | 411 | 207 | 62 | $3,129–$17,178 | | 2019 | 54,272 | 0.16% | 0.59% | 17.4% | 100 | 16 | 0 | $3,338–$13,868 | | 2020 | 53,102 | 0.10% | 0.42% | 11.6% | 61 | 3 | 0 | $3,858–$29,322 | | 2021 | 52,733 | 0.07% | 0.47% | 14.4% | 43 | 10 | 0 | $27,678–$69,000 | @@ -204,7 +206,7 @@ Oracle daily OHLC built from per-block prices vs exchange daily OHLC: | 2025 | 53,113 | 0.11% | 0.25% | 5.8% | 4 | 0 | 0 | $74,409–$126,198 | | 2026 | 5,910 | 0.10% | 0.27% | 3.2% | 0 | 0 | 0 | $60,000–$97,900 | -The oracle is only as good as the signal it reads. The largest errors cluster in late 2017: the parabolic December run-up toward $20,000 rose faster than the slow cold-start EMA could follow, so the oracle lagged low (47.7% max error, at height 498,246, oracle ~$11,100 vs exchange ~$16,400). The thinner pre-2018 on-chain volume also weakens the round-dollar pattern, so 2017 and 2018 carry the bulk of the error (2.37% and 1.35% RMSE). From 2019 the signal strengthens: by 2020 the oracle reaches 0.1% median accuracy, and since 2022 no block exceeds 10% error. +The oracle is only as good as the signal it reads. The largest errors cluster in the early cold-start, where thin 2015 on-chain volume gives a weaker round-dollar pattern: the 33.8% max error sits at height 341,498 (oracle ~$287 vs exchange ~$213) during the first weeks of warm-up. A second cluster sits just below the 508,000 regime switch, where the slow EMA lagged the fast early-2018 rally (~31.6% at height 507,278, oracle ~$6,685 vs exchange ~$8,800) before handing off to the fast default. The thin pre-2018 mix means 2015, 2017, and 2018 carry the bulk of the error (1.67%, 2.05%, and 1.31% RMSE). From 2019 the signal strengthens: by 2020 the oracle reaches 0.1% median accuracy, and since 2022 no block exceeds 10% error. ### Why no outlier smoothing? @@ -219,7 +221,7 @@ Post-hoc smoothing, for example correcting any block whose price deviates more t Changes from v2: -- **Earlier start with a cold-start regime**: on-chain tracking begins at height 470,000 (June 2017) instead of 525,000, adding about 55,000 blocks of history. Below height 508,000 the oracle runs a slower EMA (`Config::slow()`, ~19-block span, window 40) that resists the round-dollar half-price drift the fast default octave-locks onto in the thinner pre-2018 output mix, then switches to the fast default at 508,000 via `Oracle::reconfigure`. +- **Earlier start with a cold-start regime**: on-chain tracking begins at height 340,000 (January 2015) instead of 525,000, adding about 185,000 blocks of history. Below height 508,000 the oracle runs a slower EMA (`Config::slow()`, ~19-block span, window 40) paired with a shape-anchoring restoring force (`shape_weight` 8) that pulls candidate scores toward a slowly-adapted profile of the round-dollar arm shape, resisting the half-price octave drift the fast default locks onto in the thinner pre-2018 output mix. At height 508,000 it switches to the fast default via `Oracle::reconfigure`, which restores `shape_weight` to 0 and turns the force off. - **Max-outputs filter**: a transaction with more than 100 outputs is dropped from the histogram below height 630,000. Large fan-outs (exchange sweeps, mixer payouts) are batch machinery, not round-dollar payments, and the thin 2018-2020 signal needs them removed to stay locked onto the pattern. Above 630,000 on-chain volume is dense enough that the cap removes more genuine signal than noise, so it is lifted. - **Wider up-reach**: `search_below` raised from 9 to 12 bins. The sharp 2018 reversal candles need extra room to follow a fast move upward in price. diff --git a/crates/brk_oracle/examples/determinism.rs b/crates/brk_oracle/examples/determinism.rs index af6fd9895..6e6961384 100644 --- a/crates/brk_oracle/examples/determinism.rs +++ b/crates/brk_oracle/examples/determinism.rs @@ -5,7 +5,7 @@ //! onward. //! //! Mirrors the production filter exactly (per-tx OP_RETURN drop + per-output -//! `default_eligible_bin`), so it exercises the same code path +//! `eligible_bin`), so it exercises the same code path //! `brk_computer::prices::compute::feed_blocks` uses at runtime. //! //! Run with: cargo run -p brk_oracle --example determinism --release @@ -14,7 +14,7 @@ use std::path::PathBuf; use brk_indexer::Indexer; use brk_oracle::{ - Config, HistogramRaw, Oracle, PRICES, START_HEIGHT, bin_to_cents, cents_to_bin, + Config, HistogramRaw, Oracle, PRICES, START_HEIGHT_FAST, bin_to_cents, cents_to_bin, for_each_round_dollar_bin, }; use brk_types::{OutputType, Sats, TxIndex, TxOutIndex}; @@ -23,8 +23,8 @@ use vecdb::{AnyVec, ReadableVec, VecIndex}; fn seed_bin_for_start_height() -> f64 { let price: f64 = PRICES .lines() - .nth(START_HEIGHT - 1) - .expect("prices.txt too short for START_HEIGHT") + .nth(START_HEIGHT_FAST - 1) + .expect("prices.txt too short for START_HEIGHT_FAST") .parse() .expect("Failed to parse seed price"); cents_to_bin(price * 100.0) @@ -73,8 +73,8 @@ fn main() { let restart_offset = 1000; let end_offset = restart_offset + window_size * 4; - let end_height = (START_HEIGHT + end_offset).min(total_heights); - let restart_at = START_HEIGHT + restart_offset; + let end_height = (START_HEIGHT_FAST + end_offset).min(total_heights); + let restart_at = START_HEIGHT_FAST + restart_offset; let warmup_start = restart_at - window_size; assert!( @@ -84,8 +84,8 @@ fn main() { ); println!( - "Loading {} blocks ({START_HEIGHT}..{end_height})...", - end_height - START_HEIGHT + "Loading {} blocks ({START_HEIGHT_FAST}..{end_height})...", + end_height - START_HEIGHT_FAST ); let total_txs = indexer.vecs.transactions.txid.len(); let total_outputs = indexer.vecs.outputs.value.len(); @@ -93,8 +93,8 @@ fn main() { let out_first: Vec = indexer.vecs.outputs.first_txout_index.collect(); let mut txout_cursor = indexer.vecs.transactions.first_txout_index.cursor(); - let mut blocks: Vec = Vec::with_capacity(end_height - START_HEIGHT); - for h in START_HEIGHT..end_height { + let mut blocks: Vec = Vec::with_capacity(end_height - START_HEIGHT_FAST); + for h in START_HEIGHT_FAST..end_height { let ft = first_tx_index[h]; let next_ft = first_tx_index .get(h + 1) @@ -146,21 +146,21 @@ fn main() { continuous_bins.len() ); - let prev_bin = continuous_bins[restart_at - START_HEIGHT - 1]; + let prev_bin = continuous_bins[restart_at - START_HEIGHT_FAST - 1]; let seed_bin = cents_to_bin(bin_to_cents(prev_bin) as f64); println!( "Restart at {restart_at}: prev_bin={prev_bin:.4} -> cents -> seed_bin={seed_bin:.4} (delta {:.6})", seed_bin - prev_bin ); - let warmup_slice = &blocks[warmup_start - START_HEIGHT..restart_at - START_HEIGHT]; + let warmup_slice = &blocks[warmup_start - START_HEIGHT_FAST..restart_at - START_HEIGHT_FAST]; let mut restored = Oracle::from_checkpoint(seed_bin, config.clone(), |o| { for b in warmup_slice { o.process_histogram(&build_histogram(b)); } }); - let restored_bins: Vec = blocks[restart_at - START_HEIGHT..] + let restored_bins: Vec = blocks[restart_at - START_HEIGHT_FAST..] .iter() .map(|b| restored.process_histogram(&build_histogram(b))) .collect(); @@ -168,7 +168,7 @@ fn main() { let mut mismatches: Vec<(usize, f64, f64)> = Vec::new(); for (i, &r) in restored_bins.iter().enumerate() { - let c = continuous_bins[restart_at - START_HEIGHT + i]; + let c = continuous_bins[restart_at - START_HEIGHT_FAST + i]; if r != c { mismatches.push((restart_at + i, c, r)); } diff --git a/crates/brk_oracle/examples/report.rs b/crates/brk_oracle/examples/report.rs index 44db6b59c..8298aea80 100644 --- a/crates/brk_oracle/examples/report.rs +++ b/crates/brk_oracle/examples/report.rs @@ -6,8 +6,8 @@ use std::path::PathBuf; use brk_indexer::Indexer; use brk_oracle::{ - Config, Oracle, PRICES, HistogramRaw, START_HEIGHT, bin_to_cents, cents_to_bin, - default_eligible_bin, + Config, Oracle, PRICES, HistogramRaw, START_HEIGHT_FAST, bin_to_cents, cents_to_bin, + eligible_bin, }; use brk_types::{OutputType, Sats, TxIndex, TxOutIndex}; use vecdb::{AnyVec, ReadableVec, VecIndex}; @@ -174,7 +174,7 @@ fn main() { let start_price: f64 = PRICES .lines() - .nth(START_HEIGHT - 1) + .nth(START_HEIGHT_FAST - 1) .expect("prices.txt too short") .parse() .expect("Failed to parse seed price"); @@ -201,7 +201,7 @@ fn main() { let mut oracle_candles: Vec = Vec::new(); let mut current_di: Option = None; - for h in START_HEIGHT..total_heights { + for h in START_HEIGHT_FAST..total_heights { let ft = first_tx_index[h]; let next_ft = first_tx_index .get(h + 1) @@ -247,7 +247,7 @@ fn main() { continue; } for i in lo..hi { - if let Some(bin) = default_eligible_bin(values[i], output_types[i]) { + if let Some(bin) = eligible_bin(values[i], output_types[i]) { hist.increment(bin as usize); } } @@ -376,7 +376,7 @@ fn main() { println!(" Config: w12, alpha=2/7, search -9/+11, noisy/dust/round-btc filtered"); println!( " Test range: height {} .. {} ({} blocks)", - START_HEIGHT, + START_HEIGHT_FAST, total_heights - 1, overall.total_blocks ); diff --git a/crates/brk_oracle/examples/report_from.rs b/crates/brk_oracle/examples/report_from.rs index fad3b4547..9d3ae091d 100644 --- a/crates/brk_oracle/examples/report_from.rs +++ b/crates/brk_oracle/examples/report_from.rs @@ -6,8 +6,8 @@ use std::path::PathBuf; use brk_indexer::Indexer; use brk_oracle::{ - Config, HistogramEma, HistogramRaw, NUM_BINS, PRICES, START_HEIGHT, bin_to_cents, cents_to_bin, - default_eligible_bin, + Config, HistogramEma, HistogramRaw, NUM_BINS, PRICES, START_HEIGHT_FAST, bin_to_cents, + cents_to_bin, eligible_bin, }; use brk_types::{OutputType, Sats, TxIndex, TxOutIndex}; use vecdb::{AnyVec, ReadableVec, VecIndex}; @@ -528,7 +528,7 @@ fn main() { let start = std::env::var("ORACLE_START") .ok() .and_then(|s| s.parse().ok()) - .unwrap_or(START_HEIGHT); + .unwrap_or(START_HEIGHT_FAST); let end_override = std::env::var("ORACLE_END") .ok() .and_then(|s| s.parse::().ok()); @@ -685,7 +685,7 @@ fn main() { }, stencil_weight, ); - // Mid-run regime switch, mirrors production Oracle::reconfigure at START_HEIGHT: + // Mid-run regime switch, mirrors production Oracle::reconfigure at START_HEIGHT_FAST: // at SWITCH_AT rebuild the EMA to SWITCH_WINDOW/SWITCH_ALPHA and warm-start fresh // (ring reset, ref_bin kept) - the same state as a fresh warm-up. Search window // is unchanged (both regimes share it). 0 = no switch (single-config baseline). @@ -903,7 +903,7 @@ fn main() { continue; } for i in lo..hi { - if let Some(bin) = default_eligible_bin(values[i], output_types[i]) { + if let Some(bin) = eligible_bin(values[i], output_types[i]) { hist.increment(bin as usize); } } diff --git a/crates/brk_oracle/src/config.rs b/crates/brk_oracle/src/config.rs index 8da2056f1..e8d799f09 100644 --- a/crates/brk_oracle/src/config.rs +++ b/crates/brk_oracle/src/config.rs @@ -1,11 +1,12 @@ -use brk_types::OutputType; +/// First height the oracle computes on-chain, with the slow cold-start EMA +/// ([`slow`](Config::slow)). Below it, prices come from [`PRICES`](crate::PRICES). +pub const START_HEIGHT_SLOW: usize = 340_000; -/// Dust floor used by `Config::default()` and `default_eligible_bin`. -pub(crate) const DEFAULT_MIN_SATS: u64 = 1000; - -/// Output types skipped by `Config::default()` (protocol-dominated) and the -/// source of truth for `default_eligible_bin`'s precomputed exclusion mask. -pub(crate) const DEFAULT_EXCLUDED_OUTPUT_TYPES: &[OutputType] = &[OutputType::P2TR]; +/// Height where the oracle switches slow -> fast EMA ([`default`](Config::default)). +/// The regimes are complementary: slow resists the round-USD half-price drift +/// that locks fast below here, while fast tracks the 2018-2019 crashes that lock +/// slow. +pub const START_HEIGHT_FAST: usize = 508_000; #[derive(Clone)] pub struct Config { @@ -16,18 +17,12 @@ pub struct Config { /// Search window bins below/above previous estimate. Asymmetric for log-scale. pub search_below: usize, pub search_above: usize, - /// Weight of the adaptive shape-correlation restoring force added to the + /// Weight of the adaptive shape-anchoring restoring force added to the /// stencil score. `0.0` disables it (mature regime, where the fast EMA - /// tracks real moves the shape term would resist); the slow cold-start uses + /// tracks real moves the shape term would resist). The slow cold-start uses /// a positive weight to resist round-USD octave aliasing in the thin early /// output mix. - pub corr_weight: f64, - /// Minimum output value in sats (dust filter). - pub min_sats: u64, - /// Exclude round BTC amounts that create false stencil matches. - pub exclude_common_round_values: bool, - /// Output types to ignore (e.g. P2TR, P2WSH are noisy). - pub excluded_output_types: Vec, + pub shape_weight: f64, } impl Default for Config { @@ -37,33 +32,30 @@ impl Default for Config { window_size: 12, search_below: 12, search_above: 11, - corr_weight: 0.0, - min_sats: DEFAULT_MIN_SATS, - exclude_common_round_values: true, - excluded_output_types: DEFAULT_EXCLUDED_OUTPUT_TYPES.to_vec(), + shape_weight: 0.0, } } } impl Config { - /// Cold-start config below [`START_HEIGHT`](crate::START_HEIGHT): a slow EMA + /// Cold-start config below [`START_HEIGHT_FAST`]: a slow EMA /// (span ~19) that resists the round-USD half-price drift the fast default /// octave-locks onto in the thin pre-2018 output mix. Window grows to 40 to - /// hold the decay, and a shape-correlation restoring force (`corr_weight`) + /// hold the decay, and a shape-anchoring restoring force (`shape_weight`) /// pulls the pick toward the octave whose arm-shape looks like real payments. pub fn slow() -> Self { Self { alpha: 0.10, window_size: 40, - corr_weight: 8.0, + shape_weight: 8.0, ..Self::default() } } - /// Config for `height`: [`slow`](Self::slow) below - /// [`START_HEIGHT`](crate::START_HEIGHT), else [`default`](Self::default). + /// Config for `height`: [`slow`](Self::slow) below [`START_HEIGHT_FAST`], else + /// [`default`](Self::default). pub fn for_height(height: usize) -> Self { - if height < crate::START_HEIGHT { + if height < START_HEIGHT_FAST { Self::slow() } else { Self::default() diff --git a/crates/brk_oracle/src/filter.rs b/crates/brk_oracle/src/filter.rs new file mode 100644 index 000000000..771a45071 --- /dev/null +++ b/crates/brk_oracle/src/filter.rs @@ -0,0 +1,75 @@ +use brk_types::{OutputType, Sats}; + +use crate::scale::sats_to_bin; + +/// Dust floor: outputs below this many sats are too small to be payments. +const MIN_SATS: u64 = 1000; + +/// Output types skipped entirely (protocol-dominated, too noisy to carry the +/// round-dollar signal). +const EXCLUDED_OUTPUT_TYPES: &[OutputType] = &[OutputType::P2TR]; + +/// Bitmask form of [`EXCLUDED_OUTPUT_TYPES`], folded at compile time so +/// [`eligible_bin`] checks membership with a single AND. +const EXCLUDED_MASK: u16 = { + let mut mask = 0u16; + let mut i = 0; + while i < EXCLUDED_OUTPUT_TYPES.len() { + mask |= 1u16 << EXCLUDED_OUTPUT_TYPES[i] as u8; + i += 1; + } + mask +}; + +/// A transaction with more than this many outputs is a batch payout (exchange +/// sweep, mixer fan-out), not a round-dollar payment, so it is dropped below +/// [`MAX_OUTPUTS_UNTIL_HEIGHT`]. +pub const MAX_OUTPUTS: usize = 100; + +/// Height below which the [`MAX_OUTPUTS`] cap applies. The thin 2018-2020 +/// signal needs batch payouts removed to stay locked onto the round-dollar +/// pattern. Above this height on-chain volume is dense enough that the cap +/// removes more genuine signal than noise, so it is lifted. +pub const MAX_OUTPUTS_UNTIL_HEIGHT: usize = 630_000; + +/// Bin index for `(sats, output_type)`, or `None` for an excluded type (P2TR), +/// dust, a round-BTC value, or an out-of-range bin. The per-output half of the +/// round-dollar payment filter. +#[inline(always)] +pub fn eligible_bin(sats: Sats, output_type: OutputType) -> Option { + if EXCLUDED_MASK & (1u16 << output_type as u8) != 0 { + return None; + } + if *sats < MIN_SATS || sats.is_common_round_value() { + return None; + } + sats_to_bin(sats).map(|b| b as u16) +} + +/// The on-chain round-dollar payment filter, shared by the indexer warm-up, +/// per-request reconstruction, and the mempool's live histogram so every path +/// bins identically. Calls `emit(bin)` for each eligible output, in order. +/// +/// A whole transaction is dropped when it carries any OP_RETURN output (data +/// carriers, not payments) or, below [`MAX_OUTPUTS_UNTIL_HEIGHT`], when it has +/// more than [`MAX_OUTPUTS`] outputs (batch payouts). `height` is the block these +/// outputs belong to. The mempool, always past the cap window, passes +/// `usize::MAX`. +#[inline] +pub fn for_each_round_dollar_bin( + height: usize, + outputs: impl ExactSizeIterator + Clone, + mut emit: impl FnMut(u16), +) { + if height < MAX_OUTPUTS_UNTIL_HEIGHT && outputs.len() > MAX_OUTPUTS { + return; + } + if outputs.clone().any(|(_, ty)| ty == OutputType::OpReturn) { + return; + } + for (sats, ty) in outputs { + if let Some(bin) = eligible_bin(sats, ty) { + emit(bin); + } + } +} diff --git a/crates/brk_oracle/src/lib.rs b/crates/brk_oracle/src/lib.rs index 03b5d705e..ac88e5e31 100644 --- a/crates/brk_oracle/src/lib.rs +++ b/crates/brk_oracle/src/lib.rs @@ -2,340 +2,71 @@ //! //! Detects round-dollar transaction patterns ($1, $5, $10, ... $10,000) in Bitcoin //! block outputs to derive the current price without any exchange data. +//! +//! Behavior changes by height along two independent axes, each in its own module: +//! +//! - EMA regime (`config`): below [`START_HEIGHT_SLOW`] prices come from the baked +//! [`PRICES`]. From there to [`START_HEIGHT_FAST`] a slow cold-start EMA runs with +//! a shape-anchoring restoring force. At [`START_HEIGHT_FAST`] it switches to a +//! fast EMA that tracks mature-market volatility. +//! - Output filter (`filter`): below [`MAX_OUTPUTS_UNTIL_HEIGHT`] batch-payout +//! transactions are dropped from the histogram. Above it the cap is lifted. +//! +//! The two boundaries differ on purpose. The EMA must hand off to fast before the +//! 2020 crash, while the output cap helps the thin pre-2020 mix for longer. -use brk_types::{Cents, Dollars, Histogram, OutputType, Sats}; +use brk_types::{Cents, Dollars}; mod config; +mod filter; +mod scale; +mod shape; +mod stencil; +mod window; -pub use config::Config; -use config::{DEFAULT_EXCLUDED_OUTPUT_TYPES, DEFAULT_MIN_SATS}; +pub use config::{Config, START_HEIGHT_FAST, START_HEIGHT_SLOW}; +pub use filter::{MAX_OUTPUTS, MAX_OUTPUTS_UNTIL_HEIGHT, eligible_bin, for_each_round_dollar_bin}; +pub use scale::{ + BINS_PER_DECADE, HistogramEma, HistogramEmaCompact, HistogramRaw, NUM_BINS, bin_to_cents, + cents_to_bin, sats_to_bin, +}; + +use shape::ShapeAnchor; +use stencil::find_best_bin; +use window::EmaWindow; /// Oracle algorithm version. Bump on any change that alters computed prices /// so downstream consumers can invalidate cached results. pub const VERSION: u32 = 3; /// Pre-oracle dollar prices, one per line, heights 0..340_000. The last entry -/// seeds the oracle's first on-chain computation at `START_HEIGHT_SLOW`. +/// seeds the oracle's first on-chain computation at [`START_HEIGHT_SLOW`]. pub const PRICES: &str = include_str!("prices.txt"); -/// First height the oracle computes on-chain, with the slow cold-start EMA -/// ([`Config::slow`]). Below it, prices come from [`PRICES`]. -pub const START_HEIGHT_SLOW: usize = 340_000; - -/// Height where the oracle switches slow -> fast EMA ([`Config::default`]). -/// The regimes are complementary: slow resists the round-USD half-price drift -/// that locks fast below here; fast tracks the 2018-2019 crashes that lock slow. -pub const START_HEIGHT: usize = 508_000; - -/// A transaction with more than this many outputs is a batch payout (exchange -/// sweep, mixer fan-out), not a round-dollar payment, so it is dropped below -/// [`MAX_OUTPUTS_UNTIL_HEIGHT`]. -pub const MAX_OUTPUTS: usize = 100; - -/// Height below which the [`MAX_OUTPUTS`] cap applies. The thin 2018-2020 -/// signal needs batch payouts removed to stay locked onto the round-dollar -/// pattern. Above this height on-chain volume is dense enough that the cap -/// removes more genuine signal than noise, so it is lifted. -pub const MAX_OUTPUTS_UNTIL_HEIGHT: usize = 630_000; - -pub const BINS_PER_DECADE: usize = 200; -const MIN_LOG_BTC: i32 = -8; -const MAX_LOG_BTC: i32 = 4; -pub const NUM_BINS: usize = BINS_PER_DECADE * (MAX_LOG_BTC - MIN_LOG_BTC) as usize; - -/// Per-block round-dollar payment counts, one `u32` per log-scale bin: the -/// oracle's ring-buffer element and the `histogram/raw/*` wire payload. -pub type HistogramRaw = Histogram; - -/// Smoothed EMA over the window, one `f64` per bin. The stencil search reads it, -/// never serialized (projected to [`HistogramEmaCompact`] for the wire). -pub type HistogramEma = Histogram; - -/// Quantized `u16` projection of [`HistogramEma`] for the `histogram/ema/*` wire. -pub type HistogramEmaCompact = Histogram; - -/// Bin offsets for 19 round-USD amounts relative to the $100 reference (offset 0). -/// Each offset = log10(amount / 100) * BINS_PER_DECADE. -const STENCIL_OFFSETS: [i32; 19] = [ - -400, // $1 - -340, // $2 - -305, // $3 - -260, // $5 - -200, // $10 - -165, // $15 - -140, // $20 - -120, // $25 - -105, // $30 - -60, // $50 - 0, // $100 - 35, // $150 - 60, // $200 - 95, // $300 - 140, // $500 - 200, // $1000 - 260, // $2000 - 340, // $5000 - 400, // $10000 -]; - -/// Number of round-USD stencil arms. -const N_ARMS: usize = STENCIL_OFFSETS.len(); - -/// EMA rate for the adaptive shape template (~250-block time constant), slow -/// enough that a transient octave slide can't corrupt the profile before the -/// pick recovers. -const CORR_BETA: f64 = 0.004; - -/// Maps a satoshi value to its log-scale bin index. -/// bin = round(log10(sats) * BINS_PER_DECADE). -#[inline(always)] -pub fn sats_to_bin(sats: Sats) -> Option { - if sats.is_zero() { - return None; - } - let bin = ((*sats as f64).log10() * BINS_PER_DECADE as f64).round() as i64; - if bin >= 0 && (bin as usize) < NUM_BINS { - Some(bin as usize) - } else { - None - } -} - -/// Bitmask form of `DEFAULT_EXCLUDED_OUTPUT_TYPES`, evaluated at compile -/// time so `default_eligible_bin` checks membership with a single AND. -const DEFAULT_EXCLUDED_MASK: u16 = { - let mut mask = 0u16; - let mut i = 0; - while i < DEFAULT_EXCLUDED_OUTPUT_TYPES.len() { - mask |= 1u16 << DEFAULT_EXCLUDED_OUTPUT_TYPES[i] as u8; - i += 1; - } - mask -}; - -/// Bin index for `(sats, output_type)` under `Config::default()` rules. -/// Returns `None` for excluded types (P2TR/P2WSH), dust, round-BTC values, -/// or out-of-range bins. Mirror of `Oracle::output_to_bin` for callers that -/// can pre-bin outputs at write time and don't have an `Oracle` handle. -#[inline(always)] -pub fn default_eligible_bin(sats: Sats, output_type: OutputType) -> Option { - if DEFAULT_EXCLUDED_MASK & (1u16 << output_type as u8) != 0 { - return None; - } - if *sats < DEFAULT_MIN_SATS || sats.is_common_round_value() { - return None; - } - sats_to_bin(sats).map(|b| b as u16) -} - -/// The single definition of the on-chain round-dollar payment filter, shared by -/// the indexer warm-up, per-request reconstruction, and the mempool's live -/// histogram so every path bins identically. Calls `emit(bin)` for each eligible -/// output, in order. -/// -/// A whole transaction is dropped when it carries any OP_RETURN output (data -/// carriers like consolidations and inscriptions aren't payments and would -/// pollute the signal) or, below [`MAX_OUTPUTS_UNTIL_HEIGHT`], when it has more -/// than [`MAX_OUTPUTS`] outputs (batch payouts). `height` is the block these -/// outputs belong to. The mempool, always past the cap window, passes -/// `usize::MAX`. -#[inline] -pub fn for_each_round_dollar_bin( - height: usize, - outputs: impl ExactSizeIterator + Clone, - mut emit: impl FnMut(u16), -) { - if height < MAX_OUTPUTS_UNTIL_HEIGHT && outputs.len() > MAX_OUTPUTS { - return; - } - if outputs.clone().any(|(_, ty)| ty == OutputType::OpReturn) { - return; - } - for (sats, ty) in outputs { - if let Some(bin) = default_eligible_bin(sats, ty) { - emit(bin); - } - } -} - -/// Converts a fractional bin to a USD price in cents. -/// For a $D output at price P: sats = D * 1e8 / P, so P = 10^(10 - bin/200) dollars, -/// where 10 = log10($100 reference * 1e8 sats/BTC). -#[inline] -pub fn bin_to_cents(bin: f64) -> u64 { - let dollars = 10.0_f64.powf(10.0 - bin / BINS_PER_DECADE as f64); - (dollars * 100.0).round() as u64 -} - -/// Converts a USD price in cents to a fractional bin (inverse of bin_to_cents). -#[inline] -pub fn cents_to_bin(cents: f64) -> f64 { - (10.0 - (cents / 100.0).log10()) * BINS_PER_DECADE as f64 -} - -/// Raw EMA mass on each of the 19 stencil arms at `center`. -fn arms_at(ema: &HistogramEma, center: i64) -> [f64; N_ARMS] { - let mut arms = [0.0; N_ARMS]; - for (i, &offset) in STENCIL_OFFSETS.iter().enumerate() { - let idx = center + offset as i64; - if idx >= 0 && (idx as usize) < NUM_BINS { - arms[i] = ema[idx as usize]; - } - } - arms -} - -/// [`arms_at`] L1-normalized to sum 1, or `None` when the center carries no mass. -fn normalized_arms_at(ema: &HistogramEma, center: i64) -> Option<[f64; N_ARMS]> { - let mut arms = arms_at(ema, center); - let sum: f64 = arms.iter().sum(); - if sum <= 0.0 { - return None; - } - for arm in &mut arms { - *arm /= sum; - } - Some(arms) -} - -/// Shape match `1 - L1distance` between the candidate's L1-normalized arm vector -/// and the L1-normalized `profile`. 1.0 is an identical shape and it falls as -/// mass shifts off the round-USD ladder, so it pulls the pick toward the octave -/// whose payment shape looks real. Returns 0 for an empty (no-mass) center. -fn arm_profile_match(ema: &HistogramEma, center: i64, profile: &[f64; N_ARMS]) -> f64 { - match normalized_arms_at(ema, center) { - Some(arms) => { - 1.0 - (0..N_ARMS) - .map(|i| (arms[i] - profile[i]).abs()) - .sum::() - } - None => 0.0, - } -} - -/// Scores each candidate bin in the search window by summing normalized stencil -/// matches across the EMA histogram, then refines with parabolic interpolation. -/// When `corr_weight` is non-zero the [`arm_profile_match`] shape term is added -/// to each candidate's score as an octave-discriminating restoring force. -fn find_best_bin( - ema: &HistogramEma, - prev_bin: f64, - search_below: usize, - search_above: usize, - corr_weight: f64, - profile: &[f64; N_ARMS], -) -> f64 { - let center = prev_bin.round() as usize; - let search_start = center.saturating_sub(search_below); - let search_end = (center + search_above + 1).min(NUM_BINS); - - if search_start >= search_end { - return prev_bin; - } - - // Per-offset peak within the search window (for normalization). - let mut track_norm = [0.0f64; 19]; - for (i, &offset) in STENCIL_OFFSETS.iter().enumerate() { - for bin in search_start..search_end { - let idx = bin as i32 + offset; - if idx >= 0 && (idx as usize) < NUM_BINS { - track_norm[i] = track_norm[i].max(ema[idx as usize]); - } - } - } - - let score = |bin: usize| -> f64 { - let mut total = 0.0; - for (i, &offset) in STENCIL_OFFSETS.iter().enumerate() { - let idx = bin as i32 + offset; - if idx >= 0 && (idx as usize) < NUM_BINS && track_norm[i] > 0.0 { - total += ema[idx as usize] / track_norm[i]; - } - } - if corr_weight != 0.0 { - total += corr_weight * arm_profile_match(ema, bin as i64, profile); - } - total - }; - - let mut best_bin = search_start; - let mut best_score = score(search_start); - for bin in (search_start + 1)..search_end { - let candidate = score(bin); - if candidate > best_score { - best_score = candidate; - best_bin = bin; - } - } - - // Parabolic sub-bin interpolation for fractional precision. - let score_center = best_score; - let score_left = if best_bin > search_start { - score(best_bin - 1) - } else { - score_center - }; - let score_right = if best_bin + 1 < search_end { - score(best_bin + 1) - } else { - score_center - }; - let denom = score_left - 2.0 * score_center + score_right; - let sub_bin = if denom.abs() > 1e-10 { - (0.5 * (score_left - score_right) / denom).clamp(-0.5, 0.5) - } else { - 0.0 - }; - - best_bin as f64 + sub_bin -} - #[derive(Clone)] pub struct Oracle { - histograms: Vec, - ema: Box, - cursor: usize, - filled: usize, + window: EmaWindow, ref_bin: f64, config: Config, - weights: Vec, - excluded_mask: u16, warmup: bool, - /// Adaptive round-USD shape template, re-estimated each non-warmup block from - /// the arm vector at the pick. Seeded flat (every arm equal) and only - /// read/updated when `config.corr_weight` is non-zero (the slow cold-start - /// regime), so the EMA learns the real payment shape within a few hundred - /// blocks without a hand-tuned starting guess biasing acquisition. - profile: [f64; N_ARMS], + /// Shape-anchoring restoring force, inert outside the slow cold-start + /// regime (zero weight). See [`ShapeAnchor`](shape::ShapeAnchor). + shape: ShapeAnchor, } impl Oracle { pub fn new(start_bin: f64, config: Config) -> Self { - let window_size = config.window_size; - let decay = 1.0 - config.alpha; - let weights: Vec = (0..window_size) - .map(|i| config.alpha * decay.powi(i as i32)) - .collect(); - let excluded_mask = config - .excluded_output_types - .iter() - .fold(0u16, |mask, ot| mask | (1 << *ot as u8)); Self { - histograms: vec![HistogramRaw::zeros(); window_size], - ema: Box::new(HistogramEma::zeros()), - cursor: 0, - filled: 0, + window: EmaWindow::new(config.window_size, config.alpha), ref_bin: start_bin, - weights, - excluded_mask, warmup: false, + shape: ShapeAnchor::new(config.shape_weight), config, - profile: [1.0 / N_ARMS as f64; N_ARMS], } } /// Create an oracle restored from a known price. `fill` should call - /// `process_histogram` for the warmup blocks; during warmup the ring + /// `process_histogram` for the warmup blocks. During warmup the ring /// fills without recomputing EMA or searching, then we recompute once /// at the end so the first non-warmup call has a primed EMA. pub fn from_checkpoint(ref_bin: f64, config: Config, fill: impl FnOnce(&mut Self)) -> Self { @@ -343,57 +74,34 @@ impl Oracle { oracle.warmup = true; fill(&mut oracle); oracle.warmup = false; - oracle.recompute_ema(); + oracle.window.recompute(); oracle } pub fn process_histogram(&mut self, hist: &HistogramRaw) -> f64 { - self.histograms[self.cursor] = hist.clone(); - self.cursor = (self.cursor + 1) % self.config.window_size; - if self.filled < self.config.window_size { - self.filled += 1; - } + self.window.push(hist); if !self.warmup { - self.recompute_ema(); + self.window.recompute(); self.ref_bin = find_best_bin( - &self.ema, + self.window.ema(), self.ref_bin, self.config.search_below, self.config.search_above, - self.config.corr_weight, - &self.profile, + &self.shape, ); - if self.config.corr_weight != 0.0 { - self.update_profile(); - } + self.shape.update(self.window.ema(), self.ref_bin.round() as i64); } self.ref_bin } - /// Blend the L1-normalized arm shape at the current pick into the adaptive - /// `profile` (slow EMA, [`CORR_BETA`]). The slow rate lets the template ride - /// through a transient octave dip without locking onto it. No-op when the - /// pick carries no mass. - fn update_profile(&mut self) { - if let Some(arms) = normalized_arms_at(&self.ema, self.ref_bin.round() as i64) { - (0..N_ARMS).for_each(|i| { - self.profile[i] = (1.0 - CORR_BETA) * self.profile[i] + CORR_BETA * arms[i]; - }); - } - } - - /// Switch EMA regime mid-stream (slow -> fast at [`START_HEIGHT`]) by + /// Switch EMA regime mid-stream (slow -> fast at [`START_HEIGHT_FAST`]) by /// re-warming under `config` over the most recent `config.window_size` raw /// histograms, so a continuous build and an incremental warm-up reach the - /// same state; `ref_bin` carries over. + /// same state. `ref_bin` carries over. pub fn reconfigure(&mut self, config: Config) { - let window = self.config.window_size; - let kept: Vec = (0..self.filled.min(config.window_size)) - .rev() - .map(|age| self.histograms[(self.cursor + window - 1 - age) % window].clone()) - .collect(); + let kept = self.window.recent(config.window_size); *self = Self::from_checkpoint(self.ref_bin, config, |o| { kept.iter().for_each(|h| { o.process_histogram(h); @@ -406,10 +114,9 @@ impl Oracle { } /// The current weighted EMA over the window, one value per log-scale bin. - /// `ema()[i]` is bin `i` (see `sats_to_bin`); callers transporting it - /// round/clamp to a smaller type. + /// `ema()[i]` is bin `i` (see `sats_to_bin`). pub fn ema(&self) -> &HistogramEma { - &self.ema + self.window.ema() } pub fn price_cents(&self) -> Cents { @@ -419,62 +126,12 @@ impl Oracle { pub fn price_dollars(&self) -> Dollars { self.price_cents().into() } - - /// Config-aware bin index for `(sats, output_type)`. Returns `None` - /// for excluded types, dust, round-BTC values, or out-of-range bins. - /// Callers under `Config::default()` should use `default_eligible_bin` - /// (free function) to skip the `&self` indirection. - #[inline(always)] - pub fn output_to_bin(&self, sats: Sats, output_type: OutputType) -> Option { - if self.excluded_mask & (1 << output_type as u8) != 0 { - return None; - } - if *sats < self.config.min_sats - || (self.config.exclude_common_round_values && sats.is_common_round_value()) - { - return None; - } - sats_to_bin(sats) - } - - fn recompute_ema(&mut self) { - self.ema.fill(0.0); - for age in 0..self.filled { - let idx = (self.cursor + self.config.window_size - 1 - age) % self.config.window_size; - let weight = self.weights[age]; - let h = &self.histograms[idx]; - self.ema - .iter_mut() - .zip(h.iter()) - .for_each(|(e, &c)| *e += weight * c as f64); - } - } } #[cfg(test)] mod tests { use super::*; - #[test] - fn sats_to_bin_round_trip() { - assert_eq!(sats_to_bin(Sats::new(100_000_000)), Some(1600)); - assert_eq!(sats_to_bin(Sats::new(1)), Some(0)); - assert_eq!(sats_to_bin(Sats::ZERO), None); - } - - #[test] - fn bin_to_cents_known_values() { - assert_eq!(bin_to_cents(1600.0), 10000); - assert_eq!(bin_to_cents(1800.0), 1000); - } - - #[test] - fn sats_to_bin_boundary() { - assert_eq!(sats_to_bin(Sats::new(1_000_000_000_000)), None); - let sats = 10.0_f64.powf(11.995) as u64; - assert!(sats_to_bin(Sats::new(sats)).is_some()); - } - #[test] fn oracle_basic() { let oracle = Oracle::new(1600.0, Config::default()); @@ -483,7 +140,7 @@ mod tests { } // reconfigure must leave the oracle in the same state as a fresh warm-up - // over the most recent window of raw histograms; the continuous build and + // over the most recent window of raw histograms. The continuous build and // the incremental resume rely on this agreeing at the slow -> fast seam. #[test] fn reconfigure_matches_fresh_warmup() { diff --git a/crates/brk_oracle/src/scale.rs b/crates/brk_oracle/src/scale.rs new file mode 100644 index 000000000..85b225ccd --- /dev/null +++ b/crates/brk_oracle/src/scale.rs @@ -0,0 +1,72 @@ +use brk_types::{Histogram, Sats}; + +pub const BINS_PER_DECADE: usize = 200; +const MIN_LOG_BTC: i32 = -8; +const MAX_LOG_BTC: i32 = 4; +pub const NUM_BINS: usize = BINS_PER_DECADE * (MAX_LOG_BTC - MIN_LOG_BTC) as usize; + +/// Per-block round-dollar payment counts, one `u32` per log-scale bin: the +/// oracle's ring-buffer element and the `histogram/raw/*` wire payload. +pub type HistogramRaw = Histogram; + +/// Smoothed EMA over the window, one `f64` per bin. The stencil search reads it, +/// never serialized (projected to [`HistogramEmaCompact`] for the wire). +pub type HistogramEma = Histogram; + +/// Quantized `u16` projection of [`HistogramEma`] for the `histogram/ema/*` wire. +pub type HistogramEmaCompact = Histogram; + +/// Maps a satoshi value to its log-scale bin index. +/// bin = round(log10(sats) * BINS_PER_DECADE). +#[inline(always)] +pub fn sats_to_bin(sats: Sats) -> Option { + if sats.is_zero() { + return None; + } + let bin = ((*sats as f64).log10() * BINS_PER_DECADE as f64).round() as i64; + if bin >= 0 && (bin as usize) < NUM_BINS { + Some(bin as usize) + } else { + None + } +} + +/// Converts a fractional bin to a USD price in cents. +/// For a $D output at price P: sats = D * 1e8 / P, so P = 10^(10 - bin/200) dollars, +/// where 10 = log10($100 reference * 1e8 sats/BTC). +#[inline] +pub fn bin_to_cents(bin: f64) -> u64 { + let dollars = 10.0_f64.powf(10.0 - bin / BINS_PER_DECADE as f64); + (dollars * 100.0).round() as u64 +} + +/// Converts a USD price in cents to a fractional bin (inverse of bin_to_cents). +#[inline] +pub fn cents_to_bin(cents: f64) -> f64 { + (10.0 - (cents / 100.0).log10()) * BINS_PER_DECADE as f64 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sats_to_bin_round_trip() { + assert_eq!(sats_to_bin(Sats::new(100_000_000)), Some(1600)); + assert_eq!(sats_to_bin(Sats::new(1)), Some(0)); + assert_eq!(sats_to_bin(Sats::ZERO), None); + } + + #[test] + fn bin_to_cents_known_values() { + assert_eq!(bin_to_cents(1600.0), 10000); + assert_eq!(bin_to_cents(1800.0), 1000); + } + + #[test] + fn sats_to_bin_boundary() { + assert_eq!(sats_to_bin(Sats::new(1_000_000_000_000)), None); + let sats = 10.0_f64.powf(11.995) as u64; + assert!(sats_to_bin(Sats::new(sats)).is_some()); + } +} diff --git a/crates/brk_oracle/src/shape.rs b/crates/brk_oracle/src/shape.rs new file mode 100644 index 000000000..078eed223 --- /dev/null +++ b/crates/brk_oracle/src/shape.rs @@ -0,0 +1,68 @@ +use crate::{ + scale::HistogramEma, + stencil::{N_ARMS, normalized_arms_at}, +}; + +/// EMA rate for the adaptive shape template (~250-block time constant), slow +/// enough that a transient octave slide can't corrupt the profile before the +/// pick recovers. +const SHAPE_BETA: f64 = 0.004; + +/// Adaptive shape-anchoring restoring force for the slow cold-start regime. +/// +/// Holds a round-USD shape template (`profile`), re-estimated each block from the +/// arm vector at the pick, and adds a per-candidate score pulling the search +/// toward the octave whose payment shape looks real. This lets the slow EMA +/// resist round-USD octave aliasing in the thin pre-2018 output mix. +/// +/// A zero `weight` makes it inert ([`score`](Self::score) returns 0, +/// [`update`](Self::update) is a no-op), so the fast regime carries it for free +/// without call sites special-casing the disabled path. +#[derive(Clone)] +pub(crate) struct ShapeAnchor { + weight: f64, + /// Seeded flat (every arm equal). The slow EMA learns the real payment shape + /// within a few hundred blocks, so no hand-tuned starting guess is needed. + profile: [f64; N_ARMS], +} + +impl ShapeAnchor { + pub(crate) fn new(weight: f64) -> Self { + Self { + weight, + profile: [1.0 / N_ARMS as f64; N_ARMS], + } + } + + /// Restoring-force contribution to a candidate bin's score: `weight` times the + /// shape match against the learned profile. 0 when inert or the bin is empty. + pub(crate) fn score(&self, ema: &HistogramEma, bin: i64) -> f64 { + if self.weight == 0.0 { + return 0.0; + } + self.weight * self.shape_match(ema, bin) + } + + /// Blend the L1-normalized arm shape at `pick` into the profile (slow EMA, + /// [`SHAPE_BETA`]). No-op when inert or the pick is empty. + pub(crate) fn update(&mut self, ema: &HistogramEma, pick: i64) { + if self.weight == 0.0 { + return; + } + if let Some(arms) = normalized_arms_at(ema, pick) { + (0..N_ARMS).for_each(|i| { + self.profile[i] = (1.0 - SHAPE_BETA) * self.profile[i] + SHAPE_BETA * arms[i]; + }); + } + } + + /// Shape match `1 - L1distance` between the candidate's L1-normalized arm + /// vector and the profile. 1.0 is an identical shape and it falls as mass + /// shifts off the round-USD ladder. 0 for an empty (no-mass) center. + fn shape_match(&self, ema: &HistogramEma, center: i64) -> f64 { + match normalized_arms_at(ema, center) { + Some(arms) => 1.0 - (0..N_ARMS).map(|i| (arms[i] - self.profile[i]).abs()).sum::(), + None => 0.0, + } + } +} diff --git a/crates/brk_oracle/src/stencil.rs b/crates/brk_oracle/src/stencil.rs new file mode 100644 index 000000000..70e193081 --- /dev/null +++ b/crates/brk_oracle/src/stencil.rs @@ -0,0 +1,129 @@ +use crate::{ + scale::{HistogramEma, NUM_BINS}, + shape::ShapeAnchor, +}; + +/// Bin offsets for 19 round-USD amounts relative to the $100 reference (offset 0). +/// Each offset = log10(amount / 100) * BINS_PER_DECADE. +const STENCIL_OFFSETS: [i32; 19] = [ + -400, // $1 + -340, // $2 + -305, // $3 + -260, // $5 + -200, // $10 + -165, // $15 + -140, // $20 + -120, // $25 + -105, // $30 + -60, // $50 + 0, // $100 + 35, // $150 + 60, // $200 + 95, // $300 + 140, // $500 + 200, // $1000 + 260, // $2000 + 340, // $5000 + 400, // $10000 +]; + +/// Number of round-USD stencil arms. +pub(crate) const N_ARMS: usize = STENCIL_OFFSETS.len(); + +/// EMA mass at `idx`, or 0.0 when the index falls outside the histogram. +#[inline(always)] +fn bin_value(ema: &HistogramEma, idx: i64) -> f64 { + if idx >= 0 && (idx as usize) < NUM_BINS { + ema[idx as usize] + } else { + 0.0 + } +} + +/// Raw EMA mass on each of the 19 stencil arms at `center`. +fn arms_at(ema: &HistogramEma, center: i64) -> [f64; N_ARMS] { + STENCIL_OFFSETS.map(|offset| bin_value(ema, center + offset as i64)) +} + +/// [`arms_at`] L1-normalized to sum 1, or `None` when the center carries no mass. +pub(crate) fn normalized_arms_at(ema: &HistogramEma, center: i64) -> Option<[f64; N_ARMS]> { + let mut arms = arms_at(ema, center); + let sum: f64 = arms.iter().sum(); + if sum <= 0.0 { + return None; + } + for arm in &mut arms { + *arm /= sum; + } + Some(arms) +} + +/// Scores each candidate bin in the search window by summing normalized stencil +/// matches across the EMA histogram, then refines with parabolic interpolation. +/// Each candidate also picks up `shape`'s shape-anchoring restoring force, which +/// is inert (adds 0) outside the slow cold-start regime. +pub(crate) fn find_best_bin( + ema: &HistogramEma, + prev_bin: f64, + search_below: usize, + search_above: usize, + shape: &ShapeAnchor, +) -> f64 { + let center = prev_bin.round() as usize; + let search_start = center.saturating_sub(search_below); + let search_end = (center + search_above + 1).min(NUM_BINS); + + if search_start >= search_end { + return prev_bin; + } + + // Per-offset peak within the search window (for normalization). + let mut arm_peaks = [0.0f64; N_ARMS]; + for (i, &offset) in STENCIL_OFFSETS.iter().enumerate() { + for bin in search_start..search_end { + arm_peaks[i] = arm_peaks[i].max(bin_value(ema, bin as i64 + offset as i64)); + } + } + + let score = |bin: usize| -> f64 { + let mut total = 0.0; + for (i, &offset) in STENCIL_OFFSETS.iter().enumerate() { + if arm_peaks[i] > 0.0 { + total += bin_value(ema, bin as i64 + offset as i64) / arm_peaks[i]; + } + } + total += shape.score(ema, bin as i64); + total + }; + + let mut best_bin = search_start; + let mut best_score = score(search_start); + for bin in (search_start + 1)..search_end { + let candidate = score(bin); + if candidate > best_score { + best_score = candidate; + best_bin = bin; + } + } + + // Parabolic sub-bin interpolation for fractional precision. + let score_center = best_score; + let score_left = if best_bin > search_start { + score(best_bin - 1) + } else { + score_center + }; + let score_right = if best_bin + 1 < search_end { + score(best_bin + 1) + } else { + score_center + }; + let denom = score_left - 2.0 * score_center + score_right; + let sub_bin = if denom.abs() > 1e-10 { + (0.5 * (score_left - score_right) / denom).clamp(-0.5, 0.5) + } else { + 0.0 + }; + + best_bin as f64 + sub_bin +} diff --git a/crates/brk_oracle/src/window.rs b/crates/brk_oracle/src/window.rs new file mode 100644 index 000000000..f10e28fb6 --- /dev/null +++ b/crates/brk_oracle/src/window.rs @@ -0,0 +1,74 @@ +use crate::scale::{HistogramEma, HistogramRaw}; + +/// A sliding window of the most recent raw block histograms and their weighted +/// exponential moving average. +/// +/// [`push`](Self::push) records a block into a fixed-size ring. +/// [`recompute`](Self::recompute) folds the ring into [`ema`](Self::ema) with +/// per-age weights `alpha * (1 - alpha)^age` (newest block is age 0). Recording +/// and recomputing are separate steps so warm-up can fill the ring without +/// paying for the EMA until the first real query. +#[derive(Clone)] +pub(crate) struct EmaWindow { + histograms: Vec, + weights: Vec, + ema: Box, + cursor: usize, + filled: usize, +} + +impl EmaWindow { + pub(crate) fn new(window_size: usize, alpha: f64) -> Self { + let decay = 1.0 - alpha; + let weights = (0..window_size) + .map(|i| alpha * decay.powi(i as i32)) + .collect(); + Self { + histograms: vec![HistogramRaw::zeros(); window_size], + weights, + ema: Box::new(HistogramEma::zeros()), + cursor: 0, + filled: 0, + } + } + + /// Record `hist` as the newest block, evicting the oldest once full. + pub(crate) fn push(&mut self, hist: &HistogramRaw) { + let window = self.histograms.len(); + self.histograms[self.cursor] = hist.clone(); + self.cursor = (self.cursor + 1) % window; + self.filled = (self.filled + 1).min(window); + } + + /// Ring index of the block `age` steps back from the newest (age 0). + fn index_at_age(&self, age: usize) -> usize { + let window = self.histograms.len(); + (self.cursor + window - 1 - age) % window + } + + /// Fold the ring into the weighted EMA, newest block weighted `weights[0]`. + pub(crate) fn recompute(&mut self) { + self.ema.fill(0.0); + for age in 0..self.filled { + let weight = self.weights[age]; + let h = &self.histograms[self.index_at_age(age)]; + self.ema + .iter_mut() + .zip(h.iter()) + .for_each(|(e, &c)| *e += weight * c as f64); + } + } + + pub(crate) fn ema(&self) -> &HistogramEma { + &self.ema + } + + /// The most recent `min(filled, n)` raw histograms, oldest first - the + /// hand-off a regime switch replays into a fresh window of size `n`. + pub(crate) fn recent(&self, n: usize) -> Vec { + (0..self.filled.min(n)) + .rev() + .map(|age| self.histograms[self.index_at_age(age)].clone()) + .collect() + } +}