oracle: cleanup

This commit is contained in:
nym21
2026-05-21 11:02:56 +02:00
parent 56e8103178
commit bf8de73541
9 changed files with 137 additions and 106858 deletions
+6 -5
View File
@@ -124,7 +124,7 @@ The oracle consumes one pre-built histogram per block via `process_histogram(&hi
The caller does the filtering when it builds the histogram. For each block it skips the coinbase, drops every output of a transaction carrying an `OP_RETURN`, then bins the rest. `default_eligible_bin(sats, output_type)` (or `Oracle::output_to_bin` for a non-default `Config`) applies the per-output rules: excluded script types, dust, and round-BTC values. It returns the bin index, or `None` for a filtered output.
The initial seed must be close to the real price at the starting height. The crate includes a `PRICES` constant with exchange prices for every height up to 630,000 to derive a seed from.
The initial seed must be close to the real price at the starting height. The crate includes a `PRICES` constant with exchange prices for heights 0..525,000; the last entry (height 524,999) seeds the oracle's first on-chain computation at `START_HEIGHT`.
## Configuration
@@ -145,15 +145,16 @@ All parameters via `Config` with sensible defaults:
| | brk_oracle | UTXOracle |
|---|---|---|
| Resolution | Per-block (~10 min) + daily candles | Per-run consensus price + per-output intraday scatter |
| Resolution | Per-block (~10 min); daily OHLC built downstream | Per-run consensus price + per-output intraday scatter |
| Operation | Rolling: EMA over ring buffer, updates each block | Batch: processes a full day from scratch, stateless |
| Algorithm | Single-pass stencil scoring with per-offset normalization | Multi-step: dual stencil → rough estimate → output-to-USD mapping → iterative convergence |
| Steps to compute price | 7 (filter+bin → ring insert → EMA → per-offset peaks → score → argmax+parabolic → bin→price) | 10 (filter+bin → clip → smooth round BTC → sum → normalize → cap extremes → dual-stencil slide → neighbor weight-avg → output-to-USD map → iterative central price) |
| Stencil | 19 round-USD offsets ($1 to $10k), each normalized to its own peak | 803-point Gaussian + weighted spike template targeting 17 round-USD amounts |
| Round BTC handling | Excluded from histogram entirely | Histogram bins smoothed by averaging neighbors |
| Output filtering | Per-tx OP_RETURN drop, then per-output: script type, dust threshold, round BTC | Per-tx: exactly 2 outputs, ≤5 inputs, no same-day inputs, ≤500-byte witness |
| Validated from | Height 525,000 (May 2018) | December 2023 |
| Output filtering | Per-tx OP_RETURN drop, then per-output: script type, dust threshold, round BTC | Per-tx: not coinbase, no OP_RETURN, exactly 2 outputs, ≤5 inputs, no same-day inputs, ≤500-byte witness |
| Validated from | Height 525,000 (May 2018) | Dec 15, 2023 |
| Language | Rust | Python |
| Dependencies | None (pure computation, caller provides block data) | Bitcoin Core RPC |
| Dependencies | None (pure computation, caller provides block data) | bitcoin-cli + direct blk file reads |
| Bins per decade | 200 | 200 |
## Accuracy
@@ -1,295 +0,0 @@
//! Compare specific digit filter configurations across multiple start heights.
//!
//! Run with: cargo run -p brk_oracle --example compare_digits --release
use std::path::PathBuf;
use std::time::Instant;
use brk_indexer::Indexer;
use brk_oracle::{Config, Histogram, NUM_BINS, Oracle, PRICES, cents_to_bin, sats_to_bin};
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
use vecdb::{AnyVec, ReadableVec, VecIndex};
const BINS_5PCT: f64 = 4.24;
const BINS_10PCT: f64 = 8.28;
const BINS_20PCT: f64 = 15.84;
fn bins_to_pct(bins: f64) -> f64 {
(10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
}
fn seed_bin(start_height: usize) -> f64 {
let price: f64 = PRICES
.lines()
.nth(start_height - 1)
.expect("prices.txt too short")
.parse()
.expect("Failed to parse seed price");
cents_to_bin(price * 100.0)
}
fn leading_digit(sats: u64) -> u8 {
let log = (sats as f64).log10();
let magnitude = 10.0_f64.powf(log.floor());
let d = (sats as f64 / magnitude).round() as u8;
if d >= 10 { 1 } else { d }
}
fn is_round(sats: u64) -> bool {
let log = (sats as f64).log10();
let magnitude = 10.0_f64.powf(log.floor());
let leading = (sats as f64 / magnitude).round();
let round_val = leading * magnitude;
(sats as f64 - round_val).abs() <= round_val * 0.001
}
struct Stats {
total_sq_err: f64,
total_bias: f64,
max_err: f64,
total_blocks: u64,
gt_5pct: u64,
gt_10pct: u64,
gt_20pct: u64,
}
impl Stats {
fn new() -> Self {
Self {
total_sq_err: 0.0,
total_bias: 0.0,
max_err: 0.0,
total_blocks: 0,
gt_5pct: 0,
gt_10pct: 0,
gt_20pct: 0,
}
}
fn update(&mut self, err: f64) {
self.total_sq_err += err * err;
self.total_bias += err;
self.total_blocks += 1;
let abs_err = err.abs();
if abs_err > self.max_err {
self.max_err = abs_err;
}
if abs_err > BINS_5PCT {
self.gt_5pct += 1;
}
if abs_err > BINS_10PCT {
self.gt_10pct += 1;
}
if abs_err > BINS_20PCT {
self.gt_20pct += 1;
}
}
fn rmse_pct(&self) -> f64 {
bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
}
fn max_pct(&self) -> f64 {
bins_to_pct(self.max_err)
}
fn bias(&self) -> f64 {
self.total_bias / self.total_blocks as f64
}
}
fn main() {
let t0 = Instant::now();
let data_dir = std::env::var("BRK_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| {
let home = std::env::var("HOME").unwrap();
PathBuf::from(home).join(".brk")
});
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
let total_heights = indexer.vecs.blocks.timestamp.len();
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
.expect("Failed to read height_price_ohlc.json"),
)
.expect("Failed to parse height OHLC");
let height_bands: Vec<(f64, f64)> = height_ohlc
.iter()
.map(|ohlc| {
let high = ohlc[1];
let low = ohlc[2];
if high > 0.0 && low > 0.0 {
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
} else {
(0.0, 0.0)
}
})
.collect();
// Configs to compare.
// 987654321
let masks: &[(u16, &str)] = &[
(0b0_0111_0111, "{1,2,3,5,6,7}"),
(0b0_0011_0111, "{1,2,3,5,6}"),
(0b0_0001_1111, "{1,2,3,4,5}"),
(0b0_0001_0111, "{1,2,3,5}"),
];
let start_heights: &[usize] = &[575_000, 600_000, 630_000];
// (mask_idx, start_idx) -> (Oracle, Stats)
let n = masks.len() * start_heights.len();
let mut oracles: Vec<Option<Oracle>> = (0..n).map(|_| None).collect();
let mut stats: Vec<Stats> = (0..n).map(|_| Stats::new()).collect();
let idx = |m: usize, s: usize| -> usize { m * start_heights.len() + s };
let total_txs = indexer.vecs.transactions.txid.len();
let total_outputs = indexer.vecs.outputs.value.len();
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
let ref_config = Config::default();
let earliest_start = *start_heights.iter().min().unwrap();
for h in earliest_start..total_heights {
let ft = first_tx_index[h];
let next_ft = first_tx_index
.get(h + 1)
.copied()
.unwrap_or(TxIndex::from(total_txs));
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
indexer
.vecs
.transactions
.first_txout_index
.collect_one(ft + 1)
.unwrap()
.to_usize()
} else {
out_first
.get(h + 1)
.copied()
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize()
};
let out_end = out_first
.get(h + 1)
.copied()
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize();
let values: Vec<Sats> = indexer
.vecs
.outputs
.value
.collect_range_at(out_start, out_end);
let output_types: Vec<OutputType> = indexer
.vecs
.outputs
.output_type
.collect_range_at(out_start, out_end);
// Build full histogram and per-digit histograms.
let mut full_hist = Histogram::zeros();
let mut digit_hist: [Histogram; 9] = std::array::from_fn(|_| Histogram::zeros());
for (sats, output_type) in values.into_iter().zip(output_types) {
if ref_config.excluded_output_types.contains(&output_type) {
continue;
}
if *sats < ref_config.min_sats {
continue;
}
if let Some(bin) = sats_to_bin(sats) {
full_hist.increment(bin);
if is_round(*sats) {
let d = leading_digit(*sats);
if (1..=9).contains(&d) {
digit_hist[(d - 1) as usize].increment(bin);
}
}
}
}
// Feed each (mask, start_height) combo.
for (mi, &(mask, _)) in masks.iter().enumerate() {
// Build filtered histogram for this mask.
let mut hist = full_hist.clone();
(0..9usize).for_each(|d| {
if mask & (1 << d) != 0 {
for bin in 0..NUM_BINS {
hist[bin] -= digit_hist[d][bin];
}
}
});
for (si, &sh) in start_heights.iter().enumerate() {
if h < sh {
continue;
}
let i = idx(mi, si);
if oracles[i].is_none() {
oracles[i] = Some(Oracle::new(
seed_bin(sh),
Config {
exclude_common_round_values: false,
..Default::default()
},
));
}
let ref_bin = oracles[i].as_mut().unwrap().process_histogram(&hist);
if h < height_bands.len() {
let (high_bin, low_bin) = height_bands[h];
if high_bin > 0.0 && low_bin > 0.0 {
let err = if ref_bin < high_bin {
ref_bin - high_bin
} else if ref_bin > low_bin {
ref_bin - low_bin
} else {
0.0
};
stats[i].update(err);
}
}
}
}
}
// Print results grouped by start height.
for (si, &sh) in start_heights.iter().enumerate() {
println!();
println!("@ {}k:", sh / 1000);
println!(
" {:<16} {:>8} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
"Digits", "Blocks", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
);
println!(" {}", "-".repeat(72));
for (mi, &(_, label)) in masks.iter().enumerate() {
let s = &stats[idx(mi, si)];
println!(
" {:<16} {:>8} {:>7.3}% {:>7.1}% {:>6} {:>6} {:>6} {:>+8.2}",
label,
s.total_blocks,
s.rmse_pct(),
s.max_pct(),
s.gt_5pct,
s.gt_10pct,
s.gt_20pct,
s.bias()
);
}
}
println!("\nDone in {:.1}s", t0.elapsed().as_secs_f64());
}
+128 -131
View File
@@ -1,10 +1,12 @@
//! Verify oracle determinism: oracles started from different heights converge
//! to identical ref_bin values after the ring buffer fills.
//! Verify the production restart property: an oracle restored via
//! `from_checkpoint` (seeded from the previous block's stored cents price,
//! replayed over the last `window_size` blocks) produces bit-exact `ref_bin`
//! values matching a continuously-running oracle from the restart height
//! onward.
//!
//! Creates a reference oracle at height 575k and test oracles every 1000 blocks
//! up to 630k. After window_size blocks, each test oracle should produce the
//! same ref_bin as the reference, proving the truncated EMA provides
//! start-point independence.
//! Mirrors the production filter exactly (per-tx OP_RETURN drop + per-output
//! `default_eligible_bin`), so it exercises the same code path
//! `brk_computer::prices::compute::feed_blocks` uses at runtime.
//!
//! Run with: cargo run -p brk_oracle --example determinism --release
@@ -12,26 +14,49 @@ use std::path::PathBuf;
use brk_indexer::Indexer;
use brk_oracle::{
Config, Histogram, Oracle, PRICES, START_HEIGHT, cents_to_bin, default_eligible_bin,
Config, Histogram, Oracle, PRICES, START_HEIGHT, bin_to_cents, cents_to_bin,
default_eligible_bin,
};
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
use vecdb::{AnyVec, ReadableVec, VecIndex};
fn seed_bin(height: usize) -> f64 {
fn seed_bin_for_start_height() -> f64 {
let price: f64 = PRICES
.lines()
.nth(height - 1)
.expect("prices.txt too short")
.nth(START_HEIGHT - 1)
.expect("prices.txt too short for START_HEIGHT")
.parse()
.expect("Failed to parse seed price");
cents_to_bin(price * 100.0)
}
struct TestRun {
start_height: usize,
oracle: Option<Oracle>,
converged_at: Option<usize>,
diverged_after: bool,
struct Block {
values: Vec<Sats>,
output_types: Vec<OutputType>,
tx_starts: Vec<usize>,
out_start: usize,
out_end: usize,
}
fn build_histogram(block: &Block) -> Histogram {
let mut hist = Histogram::zeros();
for tx in 0..block.tx_starts.len() {
let lo = block.tx_starts[tx] - block.out_start;
let hi = block
.tx_starts
.get(tx + 1)
.map(|s| s - block.out_start)
.unwrap_or(block.out_end - block.out_start);
if block.output_types[lo..hi].contains(&OutputType::OpReturn) {
continue;
}
for i in lo..hi {
if let Some(bin) = default_eligible_bin(block.values[i], block.output_types[i]) {
hist.increment(bin as usize);
}
}
}
hist
}
fn main() {
@@ -48,59 +73,50 @@ fn main() {
let config = Config::default();
let window_size = config.window_size;
let restart_offset = 1000;
let end_offset = restart_offset + window_size * 4;
let end_height = (START_HEIGHT + end_offset).min(total_heights);
let restart_at = START_HEIGHT + restart_offset;
let warmup_start = restart_at - window_size;
assert!(
end_height > restart_at,
"indexer has {total_heights} blocks; need at least {} to test restart at {restart_at}",
restart_at + 1
);
println!(
"Loading {} blocks ({START_HEIGHT}..{end_height})...",
end_height - START_HEIGHT
);
let total_txs = indexer.vecs.transactions.txid.len();
let total_outputs = indexer.vecs.outputs.value.len();
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
let mut txout_cursor = indexer.vecs.transactions.first_txout_index.cursor();
// Reference oracle at 575k.
let ref_start = START_HEIGHT;
let mut ref_oracle = Oracle::new(seed_bin(ref_start), Config::default());
// Test oracles every 1000 blocks from 576k to 630k.
let mut runs: Vec<TestRun> = (576_000..=630_000)
.step_by(1000)
.map(|h| TestRun {
start_height: h,
oracle: None,
converged_at: None,
diverged_after: false,
})
.collect();
let last_start = runs.last().map(|r| r.start_height).unwrap_or(ref_start);
// Process enough blocks for all oracles to converge + verification margin.
let end_height = (last_start + window_size + 100).min(total_heights);
let mut blocks: Vec<Block> = Vec::with_capacity(end_height - START_HEIGHT);
for h in START_HEIGHT..end_height {
let ft = first_tx_index[h];
let next_ft = first_tx_index
.get(h + 1)
.copied()
.unwrap_or(TxIndex::from(total_txs));
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
indexer
.vecs
.transactions
.first_txout_index
.collect_one(ft + 1)
.unwrap()
.to_usize()
} else {
out_first
.get(h + 1)
.copied()
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize()
};
let block_first_tx = ft.to_usize() + 1;
let tx_count = next_ft.to_usize() - block_first_tx;
let out_end = out_first
.get(h + 1)
.copied()
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize();
txout_cursor.advance(block_first_tx - txout_cursor.position());
let mut tx_starts: Vec<usize> = Vec::with_capacity(tx_count);
for _ in 0..tx_count {
tx_starts.push(txout_cursor.next().unwrap().to_usize());
}
let out_start = tx_starts.first().copied().unwrap_or(out_end);
let values: Vec<Sats> = indexer
.vecs
.outputs
@@ -112,95 +128,76 @@ fn main() {
.output_type
.collect_range_at(out_start, out_end);
let mut hist = Histogram::zeros();
for (sats, output_type) in values.into_iter().zip(output_types) {
if let Some(bin) = default_eligible_bin(sats, output_type) {
hist.increment(bin as usize);
}
}
let ref_bin = ref_oracle.process_histogram(&hist);
for run in &mut runs {
if h < run.start_height {
continue;
}
if run.oracle.is_none() {
run.oracle = Some(Oracle::new(seed_bin(run.start_height), Config::default()));
}
let test_bin = run.oracle.as_mut().unwrap().process_histogram(&hist);
if run.converged_at.is_some() {
if test_bin != ref_bin {
run.diverged_after = true;
}
} else if test_bin == ref_bin {
run.converged_at = Some(h);
}
}
blocks.push(Block {
values,
output_types,
tx_starts,
out_start,
out_end,
});
}
// Print results.
println!();
println!("{:<12} {:>16} {:>8}", "Start", "Converged at", "Blocks");
println!("{}", "-".repeat(40));
let mut continuous = Oracle::new(seed_bin_for_start_height(), config.clone());
let continuous_bins: Vec<f64> = blocks
.iter()
.map(|b| continuous.process_histogram(&build_histogram(b)))
.collect();
println!("Continuous oracle: {} blocks processed", continuous_bins.len());
let mut max_blocks = 0usize;
let mut failed = Vec::new();
let mut diverged = Vec::new();
for run in &runs {
if let Some(converged) = run.converged_at {
let blocks = converged - run.start_height;
if blocks > max_blocks {
max_blocks = blocks;
}
println!("{:<12} {:>16} {:>8}", run.start_height, converged, blocks);
if run.diverged_after {
diverged.push(run.start_height);
}
} else {
println!("{:<12} {:>16} {:>8}", run.start_height, "NEVER", "-");
failed.push(run.start_height);
}
}
println!();
let prev_bin = continuous_bins[restart_at - START_HEIGHT - 1];
let seed_bin = cents_to_bin(bin_to_cents(prev_bin) as f64);
println!(
"{}/{} converged, max {} blocks to converge (window_size={})",
runs.len() - failed.len(),
runs.len(),
max_blocks,
window_size,
"Restart at {restart_at}: prev_bin={prev_bin:.4} -> cents -> seed_bin={seed_bin:.4} (delta {:.6})",
seed_bin - prev_bin
);
if !diverged.is_empty() {
println!("DIVERGED after convergence: {:?}", diverged);
}
if !failed.is_empty() {
println!("NEVER converged: {:?}", failed);
let warmup_slice = &blocks[warmup_start - START_HEIGHT..restart_at - START_HEIGHT];
let mut restored = Oracle::from_checkpoint(seed_bin, config.clone(), |o| {
for b in warmup_slice {
o.process_histogram(&build_histogram(b));
}
});
let restored_bins: Vec<f64> = blocks[restart_at - START_HEIGHT..]
.iter()
.map(|b| restored.process_histogram(&build_histogram(b)))
.collect();
println!("Restored oracle: {} blocks processed", restored_bins.len());
let mut mismatches: Vec<(usize, f64, f64)> = Vec::new();
for (i, &r) in restored_bins.iter().enumerate() {
let c = continuous_bins[restart_at - START_HEIGHT + i];
if r != c {
mismatches.push((restart_at + i, c, r));
}
}
// Assertions.
assert!(
failed.is_empty(),
"{} oracles never converged: {:?}",
failed.len(),
failed
);
assert!(
diverged.is_empty(),
"{} oracles diverged after convergence: {:?}",
diverged.len(),
diverged
);
assert!(
max_blocks <= window_size * 2,
"Convergence took {} blocks, expected <= {} (2 * window_size)",
max_blocks,
window_size * 2
println!();
if mismatches.is_empty() {
println!(
"All {} blocks from {restart_at} onward match exactly.",
restored_bins.len()
);
} else {
println!(
"{} of {} blocks differ (showing up to 5):",
mismatches.len(),
restored_bins.len()
);
for (h, c, r) in mismatches.iter().take(5) {
println!(
" h={h}: continuous={c:.6}, restored={r:.6}, delta={:.6}",
r - c
);
}
}
assert_eq!(
mismatches.len(),
0,
"restored oracle diverged from continuous oracle"
);
println!();
println!("All assertions passed!");
println!("Assertion passed: from_checkpoint restart is bit-exact.");
}
-272
View File
@@ -1,272 +0,0 @@
//! Diagnostic: sweep oracle start heights and clamp-top-N strategies.
//!
//! Run with: cargo run -p brk_oracle --example noise --release
use std::path::PathBuf;
use std::time::Instant;
use brk_indexer::Indexer;
use brk_oracle::{Config, Histogram, Oracle, PRICES, cents_to_bin, default_eligible_bin};
use brk_types::{Sats, TxIndex, TxOutIndex};
use vecdb::{AnyVec, ReadableVec, VecIndex};
const BINS_5PCT: f64 = 4.24;
const BINS_10PCT: f64 = 8.28;
const BINS_20PCT: f64 = 15.84;
const BPD: f64 = 200.0;
fn bins_to_pct(bins: f64) -> f64 {
(10.0_f64.powf(bins / BPD) - 1.0) * 100.0
}
fn seed_bin(start_height: usize) -> f64 {
let price: f64 = PRICES
.lines()
.nth(start_height - 1)
.expect("prices.txt too short")
.parse()
.expect("Failed to parse seed price");
cents_to_bin(price * 100.0)
}
/// Clamp the top N bins in `src` down to the (N+1)th highest value, writing into `dst`.
fn clamp_top_n(src: &Histogram, dst: &mut Histogram, n: usize) {
let mut top: Vec<u32> = src.iter().copied().filter(|&v| v > 0).collect();
top.sort_unstable_by(|a, b| b.cmp(a));
let clamp_to = if top.len() > n { top[n] } else { 0 };
for (i, &v) in src.iter().enumerate() {
dst[i] = v.min(clamp_to.max(v.min(clamp_to)));
}
}
fn main() {
let t0 = Instant::now();
let data_dir = std::env::var("BRK_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| {
let home = std::env::var("HOME").unwrap();
PathBuf::from(home).join(".brk")
});
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
let total_heights = indexer.vecs.blocks.timestamp.len();
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
.expect("Failed to read height_price_ohlc.json"),
)
.expect("Failed to parse height OHLC");
let height_bands: Vec<(f64, f64)> = height_ohlc
.iter()
.map(|ohlc| {
let high = ohlc[1];
let low = ohlc[2];
if high > 0.0 && low > 0.0 {
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
} else {
(0.0, 0.0)
}
})
.collect();
// Start heights: 630k, 600k, 575k, then 570k down to 500k by 5k.
let mut start_heights: Vec<usize> = vec![630_000, 600_000, 575_000];
let mut h = 570_000;
while h >= 500_000 {
start_heights.push(h);
h -= 5_000;
}
let lowest = *start_heights.iter().min().unwrap();
// Clamp-top-N values to test: 0 (no clamp), 2, 3, 5, 10.
let clamp_values: Vec<usize> = vec![0, 2, 3, 5, 10];
// Build per-block RAW histograms from the lowest start height.
eprintln!("Building histograms from height {}...", lowest);
let total_txs = indexer.vecs.transactions.txid.len();
let total_outputs = indexer.vecs.outputs.value.len();
let first_txout_index_reader = indexer.vecs.transactions.first_txout_index.reader();
let value_reader = indexer.vecs.outputs.value.reader();
let output_type_reader = indexer.vecs.outputs.output_type.reader();
let config = Config::default();
let total_blocks = total_heights - lowest;
struct BlockData {
hist: Histogram,
high_bin: f64,
low_bin: f64,
}
let mut blocks: Vec<BlockData> = Vec::with_capacity(total_blocks);
for h in lowest..total_heights {
let first_tx_index: TxIndex = indexer
.vecs
.transactions
.first_tx_index
.collect_one_at(h)
.unwrap();
let next_first_tx_index: TxIndex = indexer
.vecs
.transactions
.first_tx_index
.collect_one_at(h + 1)
.unwrap_or(TxIndex::from(total_txs));
let out_start = if first_tx_index.to_usize() + 1 < next_first_tx_index.to_usize() {
first_txout_index_reader
.get(first_tx_index.to_usize() + 1)
.to_usize()
} else {
indexer
.vecs
.outputs
.first_txout_index
.collect_one_at(h + 1)
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize()
};
let out_end: usize = indexer
.vecs
.outputs
.first_txout_index
.collect_one_at(h + 1)
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize();
let mut hist = Histogram::zeros();
for i in out_start..out_end {
let sats: Sats = value_reader.get(i);
let output_type = output_type_reader.get(i);
if let Some(bin) = default_eligible_bin(sats, output_type) {
hist.increment(bin as usize);
}
}
let (high_bin, low_bin) = if h < height_bands.len() {
height_bands[h]
} else {
(0.0, 0.0)
};
blocks.push(BlockData {
hist,
high_bin,
low_bin,
});
if (h - lowest).is_multiple_of(50_000) {
eprint!(
"\r {}/{} ({:.0}%)",
h - lowest,
total_blocks,
(h - lowest) as f64 / total_blocks as f64 * 100.0
);
}
}
eprintln!(
"\r {} blocks built in {:.1}s",
blocks.len(),
t0.elapsed().as_secs_f64()
);
// For each clamp value, run all start heights.
for &clamp_n in &clamp_values {
println!();
let label = if clamp_n == 0 {
"no clamp".to_string()
} else {
format!("clamp top {}", clamp_n)
};
println!("=== {} ===", label);
println!(
"{:>8} {:>8} {:>8} {:>8} {:>6} {:>6} {:>6} {:>8}",
"Start", "Blocks", "RMSE%", "Worst%", ">5%", ">10%", ">20%", "Worst@"
);
println!("{}", "-".repeat(72));
for &start_height in &start_heights {
let mut oracle = Oracle::new(seed_bin(start_height), config.clone());
let block_offset = start_height - lowest;
let mut worst_err: f64 = 0.0;
let mut worst_height: usize = 0;
let mut gt_5: u64 = 0;
let mut gt_10: u64 = 0;
let mut gt_20: u64 = 0;
let mut total_sq_err: f64 = 0.0;
let mut total_measured: u64 = 0;
let mut clamped_hist = Histogram::zeros();
for (i, bd) in blocks[block_offset..].iter().enumerate() {
if clamp_n > 0 {
clamp_top_n(&bd.hist, &mut clamped_hist, clamp_n);
oracle.process_histogram(&clamped_hist);
} else {
oracle.process_histogram(&bd.hist);
}
let height = start_height + i;
let ref_bin = oracle.ref_bin();
if bd.high_bin <= 0.0 || bd.low_bin <= 0.0 {
continue;
}
let err = if ref_bin < bd.high_bin {
ref_bin - bd.high_bin
} else if ref_bin > bd.low_bin {
ref_bin - bd.low_bin
} else {
0.0
};
total_measured += 1;
total_sq_err += err * err;
let abs_err = err.abs();
if abs_err > BINS_5PCT {
gt_5 += 1;
}
if abs_err > BINS_10PCT {
gt_10 += 1;
}
if abs_err > BINS_20PCT {
gt_20 += 1;
}
if abs_err > worst_err {
worst_err = abs_err;
worst_height = height;
}
}
let rmse = if total_measured > 0 {
bins_to_pct((total_sq_err / total_measured as f64).sqrt())
} else {
0.0
};
println!(
"{:>8} {:>8} {:>7.3}% {:>7.1}% {:>6} {:>6} {:>6} {}",
format!("{}k", start_height / 1000),
total_measured,
rmse,
bins_to_pct(worst_err),
gt_5,
gt_10,
gt_20,
worst_height,
);
}
}
println!("\nTotal time: {:.1}s", t0.elapsed().as_secs_f64());
}
-416
View File
@@ -1,416 +0,0 @@
//! Sweep round-value digit filter to find optimal configuration.
//!
//! Tests all 512 subsets of leading digits {1,...,9} to find which
//! digits to filter out for best oracle accuracy.
//!
//! Phase 1: single pass over indexer, precompute per-block histograms.
//! Phase 2: run 512 configs in parallel across CPU cores.
//!
//! Run with: cargo run -p brk_oracle --example sweep_digits --release
use std::path::PathBuf;
use std::time::Instant;
use brk_indexer::Indexer;
use brk_oracle::{Config, Histogram, Oracle, PRICES, cents_to_bin, sats_to_bin};
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
use vecdb::{AnyVec, ReadableVec, VecIndex};
const BINS_5PCT: f64 = 4.24;
const BINS_10PCT: f64 = 8.28;
const BINS_20PCT: f64 = 15.84;
fn bins_to_pct(bins: f64) -> f64 {
(10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
}
fn seed_bin(start_height: usize) -> f64 {
let price: f64 = PRICES
.lines()
.nth(start_height - 1)
.expect("prices.txt too short")
.parse()
.expect("Failed to parse seed price");
cents_to_bin(price * 100.0)
}
fn leading_digit(sats: u64) -> u8 {
let log = (sats as f64).log10();
let magnitude = 10.0_f64.powf(log.floor());
let d = (sats as f64 / magnitude).round() as u8;
if d >= 10 { 1 } else { d }
}
fn is_round(sats: u64) -> bool {
let log = (sats as f64).log10();
let magnitude = 10.0_f64.powf(log.floor());
let leading = (sats as f64 / magnitude).round();
let round_val = leading * magnitude;
(sats as f64 - round_val).abs() <= round_val * 0.001
}
fn mask_label(mask: u16) -> String {
let digits: String = (1..=9u8)
.filter(|&d| mask & (1 << (d - 1)) != 0)
.map(|d| char::from_digit(d as u32, 10).unwrap())
.collect();
if digits.is_empty() {
"none".to_string()
} else {
digits
}
}
struct Stats {
total_sq_err: f64,
total_bias: f64,
max_err: f64,
total_blocks: u64,
gt_5pct: u64,
gt_10pct: u64,
gt_20pct: u64,
}
impl Stats {
fn new() -> Self {
Self {
total_sq_err: 0.0,
total_bias: 0.0,
max_err: 0.0,
total_blocks: 0,
gt_5pct: 0,
gt_10pct: 0,
gt_20pct: 0,
}
}
fn update(&mut self, err: f64) {
self.total_sq_err += err * err;
self.total_bias += err;
self.total_blocks += 1;
let abs_err = err.abs();
if abs_err > self.max_err {
self.max_err = abs_err;
}
if abs_err > BINS_5PCT {
self.gt_5pct += 1;
}
if abs_err > BINS_10PCT {
self.gt_10pct += 1;
}
if abs_err > BINS_20PCT {
self.gt_20pct += 1;
}
}
fn rmse_pct(&self) -> f64 {
bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
}
fn max_pct(&self) -> f64 {
bins_to_pct(self.max_err)
}
fn bias(&self) -> f64 {
self.total_bias / self.total_blocks as f64
}
}
struct BlockData {
full_hist: Histogram,
/// (bin_index, leading_digit) for outputs that are round values.
round_outputs: Vec<(u16, u8)>,
high_bin: f64,
low_bin: f64,
}
fn main() {
let t0 = Instant::now();
let data_dir = std::env::var("BRK_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| {
let home = std::env::var("HOME").unwrap();
PathBuf::from(home).join(".brk")
});
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
let total_heights = indexer.vecs.blocks.timestamp.len();
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
.expect("Failed to read height_price_ohlc.json"),
)
.expect("Failed to parse height OHLC");
let height_bands: Vec<(f64, f64)> = height_ohlc
.iter()
.map(|ohlc| {
let high = ohlc[1];
let low = ohlc[2];
if high > 0.0 && low > 0.0 {
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
} else {
(0.0, 0.0)
}
})
.collect();
let sweep_start: usize = 575_000;
// Phase 1: precompute per-block data in a single pass over the indexer.
eprintln!("Phase 1: precomputing block data...");
let total_txs = indexer.vecs.transactions.txid.len();
let total_outputs = indexer.vecs.outputs.value.len();
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
let ref_config = Config::default();
let total_blocks = total_heights - sweep_start;
let mut blocks: Vec<BlockData> = Vec::with_capacity(total_blocks);
for h in sweep_start..total_heights {
let ft = first_tx_index[h];
let next_ft = first_tx_index
.get(h + 1)
.copied()
.unwrap_or(TxIndex::from(total_txs));
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
indexer
.vecs
.transactions
.first_txout_index
.collect_one(ft + 1)
.unwrap()
.to_usize()
} else {
out_first
.get(h + 1)
.copied()
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize()
};
let out_end = out_first
.get(h + 1)
.copied()
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize();
let values: Vec<Sats> = indexer
.vecs
.outputs
.value
.collect_range_at(out_start, out_end);
let output_types: Vec<OutputType> = indexer
.vecs
.outputs
.output_type
.collect_range_at(out_start, out_end);
let mut full_hist = Histogram::zeros();
let mut round_outputs = Vec::new();
for (sats, output_type) in values.into_iter().zip(output_types) {
if ref_config.excluded_output_types.contains(&output_type) {
continue;
}
if *sats < ref_config.min_sats {
continue;
}
if let Some(bin) = sats_to_bin(sats) {
full_hist.increment(bin);
if is_round(*sats) {
let d = leading_digit(*sats);
if (1..=9).contains(&d) {
round_outputs.push((bin as u16, d));
}
}
}
}
let (high_bin, low_bin) = if h < height_bands.len() {
height_bands[h]
} else {
(0.0, 0.0)
};
blocks.push(BlockData {
full_hist,
round_outputs,
high_bin,
low_bin,
});
if (h - sweep_start).is_multiple_of(50_000) {
eprint!(
"\r {}/{} ({:.0}%)",
h - sweep_start,
total_blocks,
(h - sweep_start) as f64 / total_blocks as f64 * 100.0
);
}
}
let mem_hists = blocks.len() * std::mem::size_of::<Histogram>();
let mem_rounds: usize = blocks.iter().map(|b| b.round_outputs.len() * 3).sum();
eprintln!(
"\r {} blocks precomputed ({:.1} GB hists + {:.0} MB rounds) in {:.1}s",
blocks.len(),
mem_hists as f64 / 1e9,
mem_rounds as f64 / 1e6,
t0.elapsed().as_secs_f64()
);
// Phase 2: sweep digit masks in parallel.
// Always filter digit 1 (powers of 10), sweep digits 2-9.
let base_mask: u16 = 1 << 0; // digit 1 always on
let num_masks: usize = 256; // 2^8 subsets of {2,...,9}
let num_threads = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(8);
eprintln!(
"Phase 2: sweeping {} masks across {} threads...",
num_masks, num_threads
);
let t1 = Instant::now();
let blocks = &blocks; // shared reference for threads
let all_results: Vec<(u16, Stats)> = std::thread::scope(|s| {
let masks_per_thread = num_masks.div_ceil(num_threads);
let handles: Vec<_> = (0..num_threads)
.map(|t| {
s.spawn(move || {
let mask_start = t * masks_per_thread;
let mask_end = ((t + 1) * masks_per_thread).min(num_masks);
let mut results = Vec::with_capacity(mask_end - mask_start);
for idx in mask_start..mask_end {
// Shift idx bits into positions 1-8 (digits 2-9) and add base_mask (digit 1).
let mask = base_mask | ((idx as u16) << 1);
let mut oracle = Oracle::new(
seed_bin(sweep_start),
Config {
exclude_common_round_values: false,
..Default::default()
},
);
let mut stats = Stats::new();
for bd in blocks.iter() {
let mut hist = bd.full_hist.clone();
for &(bin, digit) in &bd.round_outputs {
if mask & (1 << (digit - 1)) != 0 {
hist[bin as usize] -= 1;
}
}
let ref_bin = oracle.process_histogram(&hist);
if bd.high_bin > 0.0 && bd.low_bin > 0.0 {
let err = if ref_bin < bd.high_bin {
ref_bin - bd.high_bin
} else if ref_bin > bd.low_bin {
ref_bin - bd.low_bin
} else {
0.0
};
stats.update(err);
}
}
results.push((mask, stats));
}
results
})
})
.collect();
handles
.into_iter()
.flat_map(|h| h.join().unwrap())
.collect()
});
eprintln!(" Done in {:.1}s.", t1.elapsed().as_secs_f64());
// Sort by RMSE.
let mut results: Vec<&(u16, Stats)> = all_results.iter().collect();
results.sort_by(|a, b| a.1.rmse_pct().partial_cmp(&b.1.rmse_pct()).unwrap());
// Print top 20.
println!();
println!("Top 20 (by RMSE):");
println!(
"{:>4} {:>12} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
"#", "Digits", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
);
println!("{}", "-".repeat(70));
for (rank, (mask, s)) in results.iter().take(20).enumerate() {
println!(
"{:>4} {:>12} {:>8.3}% {:>8.1}% {:>6} {:>6} {:>6} {:>+8.2}",
rank + 1,
mask_label(*mask),
s.rmse_pct(),
s.max_pct(),
s.gt_5pct,
s.gt_10pct,
s.gt_20pct,
s.bias()
);
}
// Print bottom 5.
println!();
println!("Bottom 5 (worst):");
println!(
"{:>4} {:>12} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
"#", "Digits", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
);
println!("{}", "-".repeat(70));
for (mask, s) in results.iter().rev().take(5) {
println!(
"{:>4} {:>12} {:>8.3}% {:>8.1}% {:>6} {:>6} {:>6} {:>+8.2}",
"",
mask_label(*mask),
s.rmse_pct(),
s.max_pct(),
s.gt_5pct,
s.gt_10pct,
s.gt_20pct,
s.bias()
);
}
// Print current config {1,2,3,5} for reference.
let current_mask: u16 = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 4); // digits 1,2,3,5
let current_stats = all_results
.iter()
.find(|(m, _)| *m == current_mask)
.map(|(_, s)| s)
.unwrap();
let current_rank = results
.iter()
.position(|(m, _)| *m == current_mask)
.unwrap();
println!();
println!(
"Current {{1,2,3,5}} = rank {}/{}: RMSE {:.3}%, Max {:.1}%, >5%: {}, >10%: {}, >20%: {}",
current_rank + 1,
num_masks,
current_stats.rmse_pct(),
current_stats.max_pct(),
current_stats.gt_5pct,
current_stats.gt_10pct,
current_stats.gt_20pct,
);
println!("\nTotal time: {:.1}s", t0.elapsed().as_secs_f64());
}
@@ -1,452 +0,0 @@
//! Sweep round-value tolerance to find optimal rounding threshold.
//!
//! Tests different tolerance percentages (0%, 0.01%, 0.1%, 1%, etc.) for
//! detecting round BTC amounts, combined with several digit filter masks.
//!
//! Phase 1: single pass over indexer, store per-output relative errors.
//! Phase 2: sweep tolerance × mask combos across CPU cores.
//!
//! Run with: cargo run -p brk_oracle --example sweep_tolerance --release
use std::path::PathBuf;
use std::time::Instant;
use brk_indexer::Indexer;
use brk_oracle::{Config, Histogram, Oracle, PRICES, cents_to_bin, sats_to_bin};
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
use vecdb::{AnyVec, ReadableVec, VecIndex};
const BINS_5PCT: f64 = 4.24;
const BINS_10PCT: f64 = 8.28;
const BINS_20PCT: f64 = 15.84;
fn bins_to_pct(bins: f64) -> f64 {
(10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
}
fn seed_bin(start_height: usize) -> f64 {
let price: f64 = PRICES
.lines()
.nth(start_height - 1)
.expect("prices.txt too short")
.parse()
.expect("Failed to parse seed price");
cents_to_bin(price * 100.0)
}
fn leading_digit(sats: u64) -> u8 {
let log = (sats as f64).log10();
let magnitude = 10.0_f64.powf(log.floor());
let d = (sats as f64 / magnitude).round() as u8;
if d >= 10 { 1 } else { d }
}
/// Returns the relative error of `sats` from its nearest round value (d × 10^n).
/// e.g. 10_050 → leading=1, round_val=10_000, rel_err = 50/10000 = 0.005
fn relative_roundness(sats: u64) -> f64 {
let log = (sats as f64).log10();
let magnitude = 10.0_f64.powf(log.floor());
let leading = (sats as f64 / magnitude).round();
let round_val = leading * magnitude;
(sats as f64 - round_val).abs() / round_val
}
struct Stats {
total_sq_err: f64,
total_bias: f64,
max_err: f64,
total_blocks: u64,
gt_5pct: u64,
gt_10pct: u64,
gt_20pct: u64,
}
impl Stats {
fn new() -> Self {
Self {
total_sq_err: 0.0,
total_bias: 0.0,
max_err: 0.0,
total_blocks: 0,
gt_5pct: 0,
gt_10pct: 0,
gt_20pct: 0,
}
}
fn update(&mut self, err: f64) {
self.total_sq_err += err * err;
self.total_bias += err;
self.total_blocks += 1;
let abs_err = err.abs();
if abs_err > self.max_err {
self.max_err = abs_err;
}
if abs_err > BINS_5PCT {
self.gt_5pct += 1;
}
if abs_err > BINS_10PCT {
self.gt_10pct += 1;
}
if abs_err > BINS_20PCT {
self.gt_20pct += 1;
}
}
fn rmse_pct(&self) -> f64 {
bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
}
fn max_pct(&self) -> f64 {
bins_to_pct(self.max_err)
}
fn bias(&self) -> f64 {
self.total_bias / self.total_blocks as f64
}
}
/// Per-output data: bin index, leading digit, relative error from round value.
struct RoundOutput {
bin: u16,
digit: u8,
rel_err: f32, // f32 is plenty of precision, saves memory
}
struct BlockData {
full_hist: Histogram,
round_outputs: Vec<RoundOutput>,
high_bin: f64,
low_bin: f64,
}
fn main() {
let t0 = Instant::now();
let data_dir = std::env::var("BRK_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| {
let home = std::env::var("HOME").unwrap();
PathBuf::from(home).join(".brk")
});
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
let total_heights = indexer.vecs.blocks.timestamp.len();
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
.expect("Failed to read height_price_ohlc.json"),
)
.expect("Failed to parse height OHLC");
let height_bands: Vec<(f64, f64)> = height_ohlc
.iter()
.map(|ohlc| {
let high = ohlc[1];
let low = ohlc[2];
if high > 0.0 && low > 0.0 {
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
} else {
(0.0, 0.0)
}
})
.collect();
let sweep_start: usize = 575_000;
// Phase 1: precompute per-block data.
// Store all potentially-round outputs with their relative error so we can
// filter at different tolerance thresholds in Phase 2.
eprintln!("Phase 1: precomputing block data...");
let total_txs = indexer.vecs.transactions.txid.len();
let total_outputs = indexer.vecs.outputs.value.len();
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
let ref_config = Config::default();
let total_blocks = total_heights - sweep_start;
let mut blocks: Vec<BlockData> = Vec::with_capacity(total_blocks);
// Use the widest tolerance we'll test (5%) to decide what to store.
// Outputs beyond 5% relative error will never be filtered at any tolerance.
let max_tolerance: f64 = 0.05;
for h in sweep_start..total_heights {
let ft = first_tx_index[h];
let next_ft = first_tx_index
.get(h + 1)
.copied()
.unwrap_or(TxIndex::from(total_txs));
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
indexer
.vecs
.transactions
.first_txout_index
.collect_one(ft + 1)
.unwrap()
.to_usize()
} else {
out_first
.get(h + 1)
.copied()
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize()
};
let out_end = out_first
.get(h + 1)
.copied()
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize();
let values: Vec<Sats> = indexer
.vecs
.outputs
.value
.collect_range_at(out_start, out_end);
let output_types: Vec<OutputType> = indexer
.vecs
.outputs
.output_type
.collect_range_at(out_start, out_end);
let mut full_hist = Histogram::zeros();
let mut round_outputs = Vec::new();
for (sats, output_type) in values.into_iter().zip(output_types) {
if ref_config.excluded_output_types.contains(&output_type) {
continue;
}
if *sats < ref_config.min_sats {
continue;
}
if let Some(bin) = sats_to_bin(sats) {
full_hist.increment(bin);
let d = leading_digit(*sats);
if (1..=9).contains(&d) {
let rel_err = relative_roundness(*sats);
if rel_err <= max_tolerance {
round_outputs.push(RoundOutput {
bin: bin as u16,
digit: d,
rel_err: rel_err as f32,
});
}
}
}
}
let (high_bin, low_bin) = if h < height_bands.len() {
height_bands[h]
} else {
(0.0, 0.0)
};
blocks.push(BlockData {
full_hist,
round_outputs,
high_bin,
low_bin,
});
if (h - sweep_start).is_multiple_of(50_000) {
eprint!(
"\r {}/{} ({:.0}%)",
h - sweep_start,
total_blocks,
(h - sweep_start) as f64 / total_blocks as f64 * 100.0
);
}
}
let mem_hists = blocks.len() * std::mem::size_of::<Histogram>();
let mem_rounds: usize = blocks
.iter()
.map(|b| b.round_outputs.len() * std::mem::size_of::<RoundOutput>())
.sum();
eprintln!(
"\r {} blocks precomputed ({:.1} GB hists + {:.0} MB rounds) in {:.1}s",
blocks.len(),
mem_hists as f64 / 1e9,
mem_rounds as f64 / 1e6,
t0.elapsed().as_secs_f64()
);
// Phase 2: sweep tolerance × mask combos.
// Tolerances as fractions (not percentages).
let tolerances: &[(f64, &str)] = &[
(0.0, "0%"),
(0.0001, "0.01%"),
(0.0005, "0.05%"),
(0.001, "0.1%"),
(0.002, "0.2%"),
(0.005, "0.5%"),
(0.01, "1%"),
(0.02, "2%"),
(0.05, "5%"),
];
// 987654321
let masks: &[(u16, &str)] = &[
(0b0_0000_0000, "none"),
(0b0_0001_0111, "{1,2,3,5}"),
(0b0_0001_1111, "{1,2,3,4,5}"),
(0b0_0011_0111, "{1,2,3,5,6}"),
(0b0_0111_0111, "{1,2,3,5,6,7}"),
(0b1_1111_1111, "{1-9}"),
];
let num_configs = tolerances.len() * masks.len();
let num_threads = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(8);
eprintln!(
"Phase 2: sweeping {} configs ({} tolerances × {} masks) across {} threads...",
num_configs,
tolerances.len(),
masks.len(),
num_threads
);
let t1 = Instant::now();
let blocks = &blocks;
let tolerances_ref = tolerances;
let masks_ref = masks;
let all_results: Vec<(usize, usize, Stats)> = std::thread::scope(|s| {
let configs_per_thread = num_configs.div_ceil(num_threads);
let handles: Vec<_> = (0..num_threads)
.map(|t| {
s.spawn(move || {
let cfg_start = t * configs_per_thread;
let cfg_end = ((t + 1) * configs_per_thread).min(num_configs);
if cfg_start >= cfg_end {
return vec![];
}
let mut results = Vec::with_capacity(cfg_end - cfg_start);
for cfg_idx in cfg_start..cfg_end {
let ti = cfg_idx / masks_ref.len();
let mi = cfg_idx % masks_ref.len();
let (tolerance, _) = tolerances_ref[ti];
let (mask, _) = masks_ref[mi];
let mut oracle = Oracle::new(
seed_bin(sweep_start),
Config {
exclude_common_round_values: false,
..Default::default()
},
);
let mut stats = Stats::new();
for bd in blocks.iter() {
let mut hist = bd.full_hist.clone();
// Remove outputs matching this tolerance + mask.
let tol_f32 = tolerance as f32;
for ro in &bd.round_outputs {
if mask & (1 << (ro.digit - 1)) != 0 && ro.rel_err <= tol_f32 {
hist[ro.bin as usize] -= 1;
}
}
let ref_bin = oracle.process_histogram(&hist);
if bd.high_bin > 0.0 && bd.low_bin > 0.0 {
let err = if ref_bin < bd.high_bin {
ref_bin - bd.high_bin
} else if ref_bin > bd.low_bin {
ref_bin - bd.low_bin
} else {
0.0
};
stats.update(err);
}
}
results.push((ti, mi, stats));
}
results
})
})
.collect();
handles
.into_iter()
.flat_map(|h| h.join().unwrap())
.collect()
});
eprintln!(" Done in {:.1}s.", t1.elapsed().as_secs_f64());
// Print results grouped by tolerance.
println!();
println!(
"{:>8} {:>16} {:>8} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
"Tol", "Digits", "Blocks", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
);
println!("{}", "-".repeat(88));
for (ti, &(_, tol_label)) in tolerances.iter().enumerate() {
for (mi, &(_, mask_label)) in masks.iter().enumerate() {
let (_, _, stats) = all_results
.iter()
.find(|(t, m, _)| *t == ti && *m == mi)
.unwrap();
println!(
"{:>8} {:>16} {:>8} {:>8.3}% {:>8.1}% {:>6} {:>6} {:>6} {:>+8.2}",
tol_label,
mask_label,
stats.total_blocks,
stats.rmse_pct(),
stats.max_pct(),
stats.gt_5pct,
stats.gt_10pct,
stats.gt_20pct,
stats.bias()
);
}
println!();
}
// Find overall best config by RMSE.
let best = all_results
.iter()
.min_by(|a, b| a.2.rmse_pct().partial_cmp(&b.2.rmse_pct()).unwrap())
.unwrap();
let (bti, bmi, bs) = best;
println!(
"Best: tolerance={}, digits={} → RMSE {:.3}%, Max {:.1}%, >5%: {}, >10%: {}, >20%: {}",
tolerances[*bti].1,
masks[*bmi].1,
bs.rmse_pct(),
bs.max_pct(),
bs.gt_5pct,
bs.gt_10pct,
bs.gt_20pct,
);
// Show current config for reference.
let current = all_results
.iter()
.find(|(t, m, _)| tolerances[*t].0 == 0.001 && masks[*m].0 == 0b0_0011_0111)
.unwrap();
let (_, _, cs) = current;
println!(
"Current: tolerance=0.1%, digits={{1,2,3,5,6}} → RMSE {:.3}%, Max {:.1}%, >5%: {}, >10%: {}, >20%: {}",
cs.rmse_pct(),
cs.max_pct(),
cs.gt_5pct,
cs.gt_10pct,
cs.gt_20pct,
);
println!("\nTotal time: {:.1}s", t0.elapsed().as_secs_f64());
}
-286
View File
@@ -1,286 +0,0 @@
//! Validate oracle accuracy against exchange reference prices.
//!
//! Run with: cargo run -p brk_oracle --example validate --release
//!
//! Requires:
//! - ~/.brk indexed blockchain data (brk_indexer)
//! - examples/height_price_ohlc.json (per-height [open, high, low, close] in dollars)
use std::path::PathBuf;
use brk_indexer::Indexer;
use brk_oracle::{
Config, Histogram, Oracle, PRICES, START_HEIGHT, cents_to_bin, default_eligible_bin,
};
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
use vecdb::{AnyVec, ReadableVec, VecIndex};
const BINS_5PCT: f64 = 4.24;
const BINS_10PCT: f64 = 8.28;
const BINS_20PCT: f64 = 15.84;
fn bins_to_pct(bins: f64) -> f64 {
(10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
}
fn seed_bin(start_height: usize) -> f64 {
let price: f64 = PRICES
.lines()
.nth(start_height - 1)
.expect("prices.txt too short")
.parse()
.expect("Failed to parse seed price");
cents_to_bin(price * 100.0)
}
struct Stats {
total_sq_err: f64,
total_bias: f64,
max_err: f64,
total_blocks: u64,
gt_5pct: u64,
gt_10pct: u64,
gt_20pct: u64,
}
impl Stats {
fn new() -> Self {
Self {
total_sq_err: 0.0,
total_bias: 0.0,
max_err: 0.0,
total_blocks: 0,
gt_5pct: 0,
gt_10pct: 0,
gt_20pct: 0,
}
}
fn update(&mut self, err: f64) {
self.total_sq_err += err * err;
self.total_bias += err;
self.total_blocks += 1;
let abs_err = err.abs();
if abs_err > self.max_err {
self.max_err = abs_err;
}
if abs_err > BINS_5PCT {
self.gt_5pct += 1;
}
if abs_err > BINS_10PCT {
self.gt_10pct += 1;
}
if abs_err > BINS_20PCT {
self.gt_20pct += 1;
}
}
fn rmse_pct(&self) -> f64 {
bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
}
fn max_pct(&self) -> f64 {
bins_to_pct(self.max_err)
}
fn bias(&self) -> f64 {
self.total_bias / self.total_blocks as f64
}
}
struct Run {
label: &'static str,
start_height: usize,
oracle: Option<Oracle>,
stats: Stats,
}
fn main() {
let data_dir = std::env::var("BRK_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| {
let home = std::env::var("HOME").unwrap();
PathBuf::from(home).join(".brk")
});
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
let total_heights = indexer.vecs.blocks.timestamp.len();
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
.expect("Failed to read height_price_ohlc.json"),
)
.expect("Failed to parse height OHLC");
// Pre-compute per-height (high_bin, low_bin) tolerance band.
let height_bands: Vec<(f64, f64)> = height_ohlc
.iter()
.map(|ohlc| {
let high = ohlc[1];
let low = ohlc[2];
if high > 0.0 && low > 0.0 {
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
} else {
(0.0, 0.0)
}
})
.collect();
let mut runs = vec![
Run {
label: "w12 @ 575k",
start_height: 575_000,
oracle: None,
stats: Stats::new(),
},
Run {
label: "w12 @ 600k",
start_height: 600_000,
oracle: None,
stats: Stats::new(),
},
Run {
label: "w12 @ 630k",
start_height: 630_000,
oracle: None,
stats: Stats::new(),
},
];
// Build per-block filtered histograms from the indexer, feeding all oracles in one pass.
let total_txs = indexer.vecs.transactions.txid.len();
let total_outputs = indexer.vecs.outputs.value.len();
// Pre-collect height-indexed vecs (small). Transaction-indexed vecs are too large.
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
for h in START_HEIGHT..total_heights {
let ft = first_tx_index[h];
let next_ft = first_tx_index
.get(h + 1)
.copied()
.unwrap_or(TxIndex::from(total_txs));
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
indexer
.vecs
.transactions
.first_txout_index
.collect_one(ft + 1)
.unwrap()
.to_usize()
} else {
out_first
.get(h + 1)
.copied()
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize()
};
let out_end = out_first
.get(h + 1)
.copied()
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize();
// Build filtered histogram once for all oracles.
let values: Vec<Sats> = indexer
.vecs
.outputs
.value
.collect_range_at(out_start, out_end);
let output_types: Vec<OutputType> = indexer
.vecs
.outputs
.output_type
.collect_range_at(out_start, out_end);
let mut hist = Histogram::zeros();
for (sats, output_type) in values.into_iter().zip(output_types) {
if let Some(bin) = default_eligible_bin(sats, output_type) {
hist.increment(bin as usize);
}
}
for run in &mut runs {
if h < run.start_height {
continue;
}
if run.oracle.is_none() {
let config = Config::default();
run.oracle = Some(Oracle::new(seed_bin(run.start_height), config));
}
let ref_bin = run.oracle.as_mut().unwrap().process_histogram(&hist);
if h < height_bands.len() {
let (high_bin, low_bin) = height_bands[h];
if high_bin > 0.0 && low_bin > 0.0 {
let err = if ref_bin < high_bin {
ref_bin - high_bin
} else if ref_bin > low_bin {
ref_bin - low_bin
} else {
0.0
};
run.stats.update(err);
}
}
}
}
// Print results.
println!();
println!(
"{:<14} {:>8} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
"Config", "Blocks", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
);
println!("{}", "-".repeat(72));
for run in &runs {
let s = &run.stats;
println!(
"{:<14} {:>8} {:>7.2}% {:>7.1}% {:>6} {:>6} {:>6} {:>+8.2}",
run.label,
s.total_blocks,
s.rmse_pct(),
s.max_pct(),
s.gt_5pct,
s.gt_10pct,
s.gt_20pct,
s.bias()
);
}
println!();
// Verify exact counts against reference.
// Reference: trunc w12 @ 575k: 261 >5%, 40 >10%, 0 >20%
// trunc w12 @ 600k: 174 >5%, 31 >10%, 0 >20%
// trunc w12 @ 630k: 84 >5%, 9 >10%, 0 >20%
let expected: &[(&str, u64, u64, u64)] = &[
("w12 @ 575k", 237, 22, 0),
("w12 @ 600k", 152, 15, 0),
("w12 @ 630k", 84, 9, 0),
];
for (run, &(label, exp_5, exp_10, exp_20)) in runs.iter().zip(expected) {
let s = &run.stats;
assert_eq!(
s.gt_20pct, exp_20,
"{label}: expected {exp_20} blocks >20%, got {}",
s.gt_20pct
);
assert_eq!(
s.gt_10pct, exp_10,
"{label}: expected {exp_10} blocks >10%, got {}",
s.gt_10pct
);
assert_eq!(
s.gt_5pct, exp_5,
"{label}: expected {exp_5} blocks >5%, got {}",
s.gt_5pct
);
}
println!("All assertions passed!");
}
+3 -1
View File
@@ -16,7 +16,9 @@ pub use histogram::Histogram;
/// so downstream consumers can invalidate cached results.
pub const VERSION: u32 = 2;
/// Pre-oracle dollar prices, one per line, heights 0..630_000.
/// Pre-oracle dollar prices, one per line, heights 0..525_000. The last
/// entry (height 524_999) seeds the oracle's first on-chain computation
/// at `START_HEIGHT`.
pub const PRICES: &str = include_str!("prices.txt");
/// First height where the oracle computes from on-chain data.
File diff suppressed because it is too large Load Diff