oracle: v2

2026-05-19 06:14:47 -07:00 · 2026-05-17 22:13:03 +02:00
parent e5819769e8
commit 20c4a113c9
10 changed files with 190 additions and 96 deletions
--- a/crates/brk_computer/src/distribution/vecs.rs
+++ b/crates/brk_computer/src/distribution/vecs.rs
@@ -41,7 +41,7 @@ use super::{
    metrics::AvgAmountMetrics,
 };

-const VERSION: Version = Version::new(24);
+const VERSION: Version = Version::new(24 + brk_oracle::VERSION);

 #[derive(Traversable)]
 pub struct AddrMetricsVecs<M: StorageMode = Rw> {
--- a/crates/brk_computer/src/prices/compute.rs
+++ b/crates/brk_computer/src/prices/compute.rs
@@ -61,8 +61,8 @@ impl Vecs {
    fn compute_prices(&mut self, indexer: &Indexer, exit: &Exit) -> Result<()> {
        let starting_height = indexer.safe_lengths().height;

-        let source_version =
-            indexer.vecs.outputs.value.version() + indexer.vecs.outputs.output_type.version();
+        let source_version = indexer.vecs.outputs.value.version()
+            + indexer.vecs.outputs.output_type.version();
        self.spot
            .cents
            .height
@@ -153,6 +153,10 @@ impl Vecs {
    /// Feed a range of blocks from the indexer into an Oracle (skipping coinbase),
    /// returning per-block ref_bin values.
    ///
+    /// A transaction carrying an `OP_RETURN` output is protocol machinery, not a
+    /// dollar-denominated payment, so all of its outputs are dropped from the
+    /// histogram. This needs per-transaction grouping of a block's outputs.
+    ///
    /// Pass `cap = None` from compute paths, when the indexer is quiescent and
    /// raw vec lengths are authoritative. Pass `cap = Some(&safe_lengths)` from
    /// reader paths so concurrent writer pushes past the cap are invisible.
@@ -193,33 +197,36 @@ impl Vecs {

        // Cursor avoids per-block PcoVec page decompression for the
        // tx-indexed first_txout_index lookup. Accessed tx_index values
-        // (first_tx_index + 1) are strictly increasing across blocks,
-        // so the cursor only advances forward.
+        // are strictly increasing across blocks, so it only advances forward.
        let mut txout_cursor = indexer.vecs.transactions.first_txout_index.cursor();

-        // Reusable buffers: avoid per-block allocation
+        // Reusable buffers: avoid per-block allocation. `tx_starts` holds the
+        // first txout index of each non-coinbase tx in the current block.
        let mut values: Vec<Sats> = Vec::new();
        let mut output_types: Vec<OutputType> = Vec::new();
+        let mut tx_starts: Vec<usize> = Vec::new();

        for idx in 0..range.len() {
-            let first_tx_index = first_tx_indexes[idx];
            let next_first_tx_index = first_tx_indexes
                .get(idx + 1)
                .copied()
-                .unwrap_or(TxIndex::from(total_txs));
+                .unwrap_or(TxIndex::from(total_txs))
+                .to_usize();
+            let block_first_tx = first_tx_indexes[idx].to_usize() + 1;
+            let tx_count = next_first_tx_index - block_first_tx;

            let out_end = out_firsts
                .get(idx + 1)
                .copied()
                .unwrap_or(TxOutIndex::from(total_outputs))
                .to_usize();
-            let out_start = if first_tx_index.to_usize() + 1 < next_first_tx_index.to_usize() {
-                let target = first_tx_index.to_usize() + 1;
-                txout_cursor.advance(target - txout_cursor.position());
-                txout_cursor.next().unwrap().to_usize()
-            } else {
-                out_end
-            };
+
+            txout_cursor.advance(block_first_tx - txout_cursor.position());
+            tx_starts.clear();
+            for _ in 0..tx_count {
+                tx_starts.push(txout_cursor.next().unwrap().to_usize());
+            }
+            let out_start = tx_starts.first().copied().unwrap_or(out_end);

            indexer
                .vecs
@@ -233,9 +240,19 @@ impl Vecs {
            );

            let mut hist = Histogram::zeros();
-            for (sats, output_type) in values.iter().zip(&output_types) {
-                if let Some(bin) = oracle.output_to_bin(*sats, *output_type) {
-                    hist.increment(bin);
+            for tx in 0..tx_count {
+                let lo = tx_starts[tx] - out_start;
+                let hi = tx_starts
+                    .get(tx + 1)
+                    .map(|s| s - out_start)
+                    .unwrap_or(out_end - out_start);
+                if output_types[lo..hi].contains(&OutputType::OpReturn) {
+                    continue;
+                }
+                for i in lo..hi {
+                    if let Some(bin) = oracle.output_to_bin(values[i], output_types[i]) {
+                        hist.increment(bin);
+                    }
                }
            }

--- a/crates/brk_computer/src/prices/mod.rs
+++ b/crates/brk_computer/src/prices/mod.rs
@@ -4,6 +4,7 @@ pub(crate) mod ohlcs;

 use std::path::Path;

+use brk_oracle::VERSION as ORACLE_VERSION;
 use brk_traversable::Traversable;
 use brk_types::Version;
 use vecdb::{Database, ReadOnlyClone, Rw, StorageMode};
@@ -49,7 +50,9 @@ impl Vecs {
        version: Version,
        indexes: &indexes::Vecs,
    ) -> brk_error::Result<Self> {
-        let version = version + Version::new(11);
+        // `ORACLE_VERSION` folds in the on-chain oracle algorithm version so
+        // every price-derived module invalidates when computed prices change.
+        let version = version + Version::new(11 + ORACLE_VERSION);

        let price_cents = CachedPerBlock::forced_import(db, "price_cents", version, indexes)?;

--- a/crates/brk_oracle/README.md
+++ b/crates/brk_oracle/README.md
@@ -1,6 +1,8 @@
 # brk_oracle

-Pure on-chain BTC/USD price oracle. No exchange feeds, no external APIs. Derives the bitcoin price from transaction data alone. Tracks block by block from height 550,000 (November 2018) onward.
+**Version 2**
+
+Pure on-chain BTC/USD price oracle. No exchange feeds, no external APIs. Derives the bitcoin price from transaction data alone. Tracks block by block from height 525,000 (May 2018) onward.

 Inspired by [UTXOracle](https://utxo.live/oracle/) by [@SteveSimple](https://x.com/SteveSimple), which proved the concept. brk_oracle takes the same core insight and redesigns the algorithm for per-block resolution and rolling operation. See [comparison](#comparison-with-utxoracle) below.

@@ -46,7 +48,7 @@ For each new block:

 ### 1. Filter outputs

-Skip the coinbase transaction, then exclude noisy outputs: script types dominated by protocol activity (P2TR, P2WSH by default), dust below 1,000 sats, and round BTC amounts (0.01, 0.1, 1.0 BTC, etc.) that create false spikes unrelated to dollar purchases.
+Skip the coinbase transaction, and skip every output of a transaction carrying an `OP_RETURN`: that transaction is protocol machinery, not a dollar-denominated payment, so its payout amounts are not price signal. Then exclude noisy outputs: script types dominated by protocol activity (P2TR by default), dust below 1,000 sats, and round BTC amounts (0.01, 0.1, 1.0 BTC, etc.) that create false spikes unrelated to dollar purchases.

 ### 2. Build a log-scale histogram

@@ -116,13 +118,11 @@ Parabolic interpolation between the best bin and its two neighbors refines the e
                          log-scale                             scoring   interpolation
 ```

-## Input formats
+## Input

-The oracle accepts three input formats:
+The oracle consumes one pre-built histogram per block via `process_histogram(&hist)`, a `[u32; 2400]` bin-count array, and returns the updated reference bin.

- **Raw block**: `process_block(&block)` — filters and bins internally
- **Output pairs**: `process_outputs(iter)` — `(sats, output_type)` pairs, still applies configured filters
- **Histogram**: `process_histogram(&hist)` — pre-built `[u32; 2400]` array
+The caller does the filtering when it builds the histogram. For each block it skips the coinbase, drops every output of a transaction carrying an `OP_RETURN`, then bins the rest. `default_eligible_bin(sats, output_type)` (or `Oracle::output_to_bin` for a non-default `Config`) applies the per-output rules: excluded script types, dust, and round-BTC values. It returns the bin index, or `None` for a filtered output.

 The initial seed must be close to the real price at the starting height. The crate includes a `PRICES` constant with exchange prices for every height up to 630,000 to derive a seed from.

@@ -137,7 +137,7 @@ All parameters via `Config` with sensible defaults:
 | `search_below` / `search_above` | 9 / 11 | Search window around previous estimate (bins) |
 | `min_sats` | 1,000 | Dust threshold |
 | `exclude_common_round_values` | true | Filter d × 10ⁿ (d ∈ {1,2,3,5,6}) to prevent false stencil matches |
-| `excluded_output_types` | P2TR, P2WSH | Script types dominated by protocol activity |
+| `excluded_output_types` | P2TR | Script types dominated by protocol activity |

 ## Comparison with UTXOracle

@@ -150,30 +150,30 @@ All parameters via `Config` with sensible defaults:
 | Algorithm | Single-pass stencil scoring with per-offset normalization | Multi-step: dual stencil → rough estimate → output-to-USD mapping → iterative convergence |
 | Stencil | 19 round-USD offsets ($1 to $10k), each normalized to its own peak | 803-point Gaussian + weighted spike template targeting 17 round-USD amounts |
 | Round BTC handling | Excluded from histogram entirely | Histogram bins smoothed by averaging neighbors |
-| Output filtering | Per-output: script type, dust threshold, round BTC | Per-tx: exactly 2 outputs, ≤5 inputs, no same-day inputs, ≤500-byte witness |
-| Validated from | Height 550,000 (November 2018) | December 2023 |
+| Output filtering | Per-tx OP_RETURN drop, then per-output: script type, dust threshold, round BTC | Per-tx: exactly 2 outputs, ≤5 inputs, no same-day inputs, ≤500-byte witness |
+| Validated from | Height 525,000 (May 2018) | December 2023 |
 | Language | Rust | Python |
 | Dependencies | None (pure computation, caller provides block data) | Bitcoin Core RPC |
 | Bins per decade | 200 | 200 |

 ## Accuracy

-Tested over 386,251 blocks (heights 550,000 to 937,447, as of February 2026) against exchange OHLC data. Error is measured per block as distance from the oracle estimate to the exchange high/low range at that height. If the oracle falls within the range, the error is zero.
+Tested over 411,251 blocks (heights 525,000 to 949,800, as of May 2026) against exchange OHLC data. Error is measured per block as distance from the oracle estimate to the exchange high/low range at that height. If the oracle falls within the range, the error is zero.

 ### Per-block

 | Metric | Value |
 |--------|-------|
 | Median error | 0.11% |
-| 95th percentile | 0.66% |
-| 99th percentile | 1.6% |
-| 99.9th percentile | 6.2% |
-| RMSE | 0.52% |
+| 95th percentile | 0.67% |
+| 99th percentile | 1.7% |
+| 99.9th percentile | 5.4% |
+| RMSE | 0.50% |
 | Max error | 33.4% |
-| Bias | +0.01 bins (essentially zero) |
-| Blocks > 5% error | 519 (0.13%) |
-| Blocks > 10% error | 203 |
-| Blocks > 20% error | 5 |
+| Bias | +0.00 bins (essentially zero) |
+| Blocks > 5% error | 472 (0.11%) |
+| Blocks > 10% error | 177 |
+| Blocks > 20% error | 3 |

 ### Daily candles

@@ -181,26 +181,26 @@ Oracle daily OHLC built from per-block prices vs exchange daily OHLC:

 | | Median | RMSE | Max |
 |-------|--------|------|-----|
-| Open | 0.21% | 0.59% | 15.4% |
-| High | 0.53% | 1.18% | 28.0% |
-| Low | 0.50% | 1.52% | 19.6% |
-| Close | 0.24% | 0.74% | 15.5% |
+| Open | 0.21% | 0.65% | 15.3% |
+| High | 0.53% | 1.12% | 28.0% |
+| Low | 0.51% | 1.38% | 19.7% |
+| Close | 0.24% | 0.73% | 15.4% |

 ### By year

 | Year | Blocks | Median | RMSE | Max | >5% | >10% | >20% | Price range |
 |------|--------|--------|------|-----|-----|------|------|-------------|
-| 2018 | 6,492 | 0.69% | 2.34% | 33.4% | 183 | 122 | 5 | $3,129–$6,293 |
-| 2019 | 54,272 | 0.16% | 0.74% | 17.4% | 195 | 69 | 0 | $3,338–$13,868 |
-| 2020 | 53,102 | 0.10% | 0.43% | 18.1% | 68 | 3 | 0 | $3,858–$29,322 |
-| 2021 | 52,733 | 0.07% | 0.47% | 14.4% | 38 | 9 | 0 | $27,678–$69,000 |
+| 2018 | 31,492 | 0.21% | 1.11% | 33.4% | 169 | 109 | 3 | $3,129–$8,488 |
+| 2019 | 54,272 | 0.16% | 0.69% | 17.4% | 165 | 53 | 0 | $3,338–$13,868 |
+| 2020 | 53,102 | 0.10% | 0.44% | 12.6% | 70 | 6 | 0 | $3,858–$29,322 |
+| 2021 | 52,733 | 0.07% | 0.47% | 14.4% | 42 | 9 | 0 | $27,678–$69,000 |
 | 2022 | 53,230 | 0.07% | 0.32% | 6.8% | 10 | 0 | 0 | $15,460–$48,240 |
-| 2023 | 54,032 | 0.10% | 0.25% | 6.7% | 5 | 0 | 0 | $16,490–$44,700 |
-| 2024 | 53,367 | 0.11% | 0.31% | 9.7% | 16 | 0 | 0 | $38,555–$108,298 |
+| 2023 | 54,032 | 0.10% | 0.25% | 6.6% | 5 | 0 | 0 | $16,490–$44,700 |
+| 2024 | 53,367 | 0.10% | 0.28% | 6.7% | 7 | 0 | 0 | $38,555–$108,298 |
 | 2025 | 53,113 | 0.11% | 0.25% | 5.8% | 4 | 0 | 0 | $74,409–$126,198 |
-| 2026 | 5,910 | 0.10% | 0.27% | 3.3% | 0 | 0 | 0 | $60,000–$97,900 |
+| 2026 | 5,910 | 0.11% | 0.27% | 3.2% | 0 | 0 | 0 | $60,000–$97,900 |

-The oracle is only as good as the signal it reads. In late 2018 on-chain transaction volume was low and the round-dollar pattern was weak, so the first few thousand blocks are noisy (33% max error, 2.3% RMSE). By 2020 the signal is strong enough for 0.1% median accuracy. Since 2022, zero blocks exceed 10% error.
+The oracle is only as good as the signal it reads. The largest errors cluster in late 2018: the November price crash fell faster than the narrow search window could follow (33% max error), and on-chain volume was lower then, so the round-dollar pattern was weaker (1.1% RMSE for the year). By 2020 the signal is strong enough for 0.1% median accuracy, and since 2022 no block exceeds 10% error.

 ### Why no outlier smoothing?

@@ -208,3 +208,15 @@ Post-hoc smoothing — for example, correcting any block whose price deviates mo

 1. **Simplicity**: The oracle is a single forward pass with no lookback corrections. Adding smoothing means defining thresholds, neighbor windows, and replacement strategies, all of which add complexity for marginal gain.
 2. **Finality**: Each block's price is produced once and never revised (unless the block itself is reorged). Downstream consumers can treat the oracle output as append-only. Smoothing would require retroactively changing already-published prices, breaking that property.
+
+## Changelog
+
+### v2
+
+Changes from v1:
+
+- **OP_RETURN filter**: every output of a transaction carrying an `OP_RETURN` is now dropped from the histogram. Such transactions are protocol machinery (cross-chain swaps, anchoring) whose payout amounts can form false round-dollar patterns. This was the trigger for the worst price glitches in v1.
+- **P2WSH reactivated**: once the OP_RETURN filter removes the protocol noise, P2WSH outputs are usable round-dollar signal again, so they are no longer excluded. P2TR stays excluded.
+- **Earlier start**: on-chain tracking begins at height 525,000 (May 2018) instead of 550,000, adding about 25,000 blocks of history.
+
+`VERSION` is exposed as a crate constant so downstream consumers can invalidate prices computed by an earlier algorithm.
--- a/crates/brk_oracle/examples/report.rs
+++ b/crates/brk_oracle/examples/report.rs
@@ -185,9 +185,12 @@ fn main() {
    let total_txs = indexer.vecs.transactions.txid.len();
    let total_outputs = indexer.vecs.outputs.value.len();

-    // Pre-collect height-indexed vecs (small). Transaction-indexed vecs are too large.
+    // Pre-collect height-indexed vecs (small). Transaction-indexed vecs are too
+    // large, so the tx-indexed first_txout_index is read through a forward cursor.
    let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
    let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
+    let mut txout_cursor = indexer.vecs.transactions.first_txout_index.cursor();
+    let mut tx_starts: Vec<usize> = Vec::new();

    let mut year_stats: Vec<YearStats> = Vec::new();
    let mut overall = YearStats::new(0);
@@ -205,27 +208,22 @@ fn main() {
            .copied()
            .unwrap_or(TxIndex::from(total_txs));

-        let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
-            indexer
-                .vecs
-                .transactions
-                .first_txout_index
-                .collect_one(ft + 1)
-                .unwrap()
-                .to_usize()
-        } else {
-            out_first
-                .get(h + 1)
-                .copied()
-                .unwrap_or(TxOutIndex::from(total_outputs))
-                .to_usize()
-        };
+        let block_first_tx = ft.to_usize() + 1;
+        let tx_count = next_ft.to_usize() - block_first_tx;
        let out_end = out_first
            .get(h + 1)
            .copied()
            .unwrap_or(TxOutIndex::from(total_outputs))
            .to_usize();

+        // First txout index of each non-coinbase tx, for per-tx grouping.
+        txout_cursor.advance(block_first_tx - txout_cursor.position());
+        tx_starts.clear();
+        for _ in 0..tx_count {
+            tx_starts.push(txout_cursor.next().unwrap().to_usize());
+        }
+        let out_start = tx_starts.first().copied().unwrap_or(out_end);
+
        let values: Vec<Sats> = indexer
            .vecs
            .outputs
@@ -237,10 +235,21 @@ fn main() {
            .output_type
            .collect_range_at(out_start, out_end);

+        // Drop every output of a tx carrying an OP_RETURN (protocol machinery).
        let mut hist = Histogram::zeros();
-        for (sats, output_type) in values.into_iter().zip(output_types) {
-            if let Some(bin) = default_eligible_bin(sats, output_type) {
-                hist.increment(bin as usize);
+        for tx in 0..tx_count {
+            let lo = tx_starts[tx] - out_start;
+            let hi = tx_starts
+                .get(tx + 1)
+                .map(|s| s - out_start)
+                .unwrap_or(out_end - out_start);
+            if output_types[lo..hi].contains(&OutputType::OpReturn) {
+                continue;
+            }
+            for i in lo..hi {
+                if let Some(bin) = default_eligible_bin(values[i], output_types[i]) {
+                    hist.increment(bin as usize);
+                }
            }
        }

--- a/crates/brk_oracle/src/config.rs
+++ b/crates/brk_oracle/src/config.rs
@@ -3,10 +3,9 @@ use brk_types::OutputType;
 /// Dust floor used by `Config::default()` and `default_eligible_bin`.
 pub(crate) const DEFAULT_MIN_SATS: u64 = 1000;

-/// Output types skipped by `Config::default()` (noisy) and the source of
-/// truth for `default_eligible_bin`'s precomputed exclusion mask.
-pub(crate) const DEFAULT_EXCLUDED_OUTPUT_TYPES: &[OutputType] =
-    &[OutputType::P2TR, OutputType::P2WSH];
+/// Output types skipped by `Config::default()` (protocol-dominated) and the
+/// source of truth for `default_eligible_bin`'s precomputed exclusion mask.
+pub(crate) const DEFAULT_EXCLUDED_OUTPUT_TYPES: &[OutputType] = &[OutputType::P2TR];

 #[derive(Clone)]
 pub struct Config {
--- a/crates/brk_oracle/src/lib.rs
+++ b/crates/brk_oracle/src/lib.rs
@@ -12,11 +12,15 @@ use config::{DEFAULT_EXCLUDED_OUTPUT_TYPES, DEFAULT_MIN_SATS};
 pub use config::Config;
 pub use histogram::Histogram;

+/// Oracle algorithm version. Bump on any change that alters computed prices
+/// so downstream consumers can invalidate cached results.
+pub const VERSION: u32 = 2;
+
 /// Pre-oracle dollar prices, one per line, heights 0..630_000.
 pub const PRICES: &str = include_str!("prices.txt");

 /// First height where the oracle computes from on-chain data.
-pub const START_HEIGHT: usize = 550_000;
+pub const START_HEIGHT: usize = 525_000;

 pub const BINS_PER_DECADE: usize = 200;
 const MIN_LOG_BTC: i32 = -8;
--- a/crates/brk_query/src/impl/mempool.rs
+++ b/crates/brk_query/src/impl/mempool.rs
@@ -29,12 +29,17 @@ impl Query {

    pub fn mempool_blocks(&self) -> Result<Vec<MempoolBlock>> {
        let mempool = self.require_mempool()?;
-        Ok(mempool.block_stats().iter().map(MempoolBlock::from).collect())
+        Ok(mempool
+            .block_stats()
+            .iter()
+            .map(MempoolBlock::from)
+            .collect())
    }

    /// Indexer-backed resolver for confirmed-parent prevouts. Boxed so
    /// the caller (typically [`Mempool::start_with`]) can stash one
    /// resolver behind a stable type for the lifetime of the loop.
+    #[allow(clippy::type_complexity)]
    pub fn indexer_prevout_resolver(
        &self,
    ) -> Box<dyn Fn(&[(Txid, Vout)]) -> FxHashMap<(Txid, Vout), TxOut> + Send + Sync> {