mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-06-08 14:11:56 -07:00
oracle: cleanup
This commit is contained in:
@@ -124,7 +124,7 @@ The oracle consumes one pre-built histogram per block via `process_histogram(&hi
|
||||
|
||||
The caller does the filtering when it builds the histogram. For each block it skips the coinbase, drops every output of a transaction carrying an `OP_RETURN`, then bins the rest. `default_eligible_bin(sats, output_type)` (or `Oracle::output_to_bin` for a non-default `Config`) applies the per-output rules: excluded script types, dust, and round-BTC values. It returns the bin index, or `None` for a filtered output.
|
||||
|
||||
The initial seed must be close to the real price at the starting height. The crate includes a `PRICES` constant with exchange prices for every height up to 630,000 to derive a seed from.
|
||||
The initial seed must be close to the real price at the starting height. The crate includes a `PRICES` constant with exchange prices for heights 0..525,000; the last entry (height 524,999) seeds the oracle's first on-chain computation at `START_HEIGHT`.
|
||||
|
||||
## Configuration
|
||||
|
||||
@@ -145,15 +145,16 @@ All parameters via `Config` with sensible defaults:
|
||||
|
||||
| | brk_oracle | UTXOracle |
|
||||
|---|---|---|
|
||||
| Resolution | Per-block (~10 min) + daily candles | Per-run consensus price + per-output intraday scatter |
|
||||
| Resolution | Per-block (~10 min); daily OHLC built downstream | Per-run consensus price + per-output intraday scatter |
|
||||
| Operation | Rolling: EMA over ring buffer, updates each block | Batch: processes a full day from scratch, stateless |
|
||||
| Algorithm | Single-pass stencil scoring with per-offset normalization | Multi-step: dual stencil → rough estimate → output-to-USD mapping → iterative convergence |
|
||||
| Steps to compute price | 7 (filter+bin → ring insert → EMA → per-offset peaks → score → argmax+parabolic → bin→price) | 10 (filter+bin → clip → smooth round BTC → sum → normalize → cap extremes → dual-stencil slide → neighbor weight-avg → output-to-USD map → iterative central price) |
|
||||
| Stencil | 19 round-USD offsets ($1 to $10k), each normalized to its own peak | 803-point Gaussian + weighted spike template targeting 17 round-USD amounts |
|
||||
| Round BTC handling | Excluded from histogram entirely | Histogram bins smoothed by averaging neighbors |
|
||||
| Output filtering | Per-tx OP_RETURN drop, then per-output: script type, dust threshold, round BTC | Per-tx: exactly 2 outputs, ≤5 inputs, no same-day inputs, ≤500-byte witness |
|
||||
| Validated from | Height 525,000 (May 2018) | December 2023 |
|
||||
| Output filtering | Per-tx OP_RETURN drop, then per-output: script type, dust threshold, round BTC | Per-tx: not coinbase, no OP_RETURN, exactly 2 outputs, ≤5 inputs, no same-day inputs, ≤500-byte witness |
|
||||
| Validated from | Height 525,000 (May 2018) | Dec 15, 2023 |
|
||||
| Language | Rust | Python |
|
||||
| Dependencies | None (pure computation, caller provides block data) | Bitcoin Core RPC |
|
||||
| Dependencies | None (pure computation, caller provides block data) | bitcoin-cli + direct blk file reads |
|
||||
| Bins per decade | 200 | 200 |
|
||||
|
||||
## Accuracy
|
||||
|
||||
@@ -1,295 +0,0 @@
|
||||
//! Compare specific digit filter configurations across multiple start heights.
|
||||
//!
|
||||
//! Run with: cargo run -p brk_oracle --example compare_digits --release
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use brk_indexer::Indexer;
|
||||
use brk_oracle::{Config, Histogram, NUM_BINS, Oracle, PRICES, cents_to_bin, sats_to_bin};
|
||||
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
|
||||
use vecdb::{AnyVec, ReadableVec, VecIndex};
|
||||
|
||||
const BINS_5PCT: f64 = 4.24;
|
||||
const BINS_10PCT: f64 = 8.28;
|
||||
const BINS_20PCT: f64 = 15.84;
|
||||
|
||||
fn bins_to_pct(bins: f64) -> f64 {
|
||||
(10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
|
||||
}
|
||||
|
||||
fn seed_bin(start_height: usize) -> f64 {
|
||||
let price: f64 = PRICES
|
||||
.lines()
|
||||
.nth(start_height - 1)
|
||||
.expect("prices.txt too short")
|
||||
.parse()
|
||||
.expect("Failed to parse seed price");
|
||||
cents_to_bin(price * 100.0)
|
||||
}
|
||||
|
||||
fn leading_digit(sats: u64) -> u8 {
|
||||
let log = (sats as f64).log10();
|
||||
let magnitude = 10.0_f64.powf(log.floor());
|
||||
let d = (sats as f64 / magnitude).round() as u8;
|
||||
if d >= 10 { 1 } else { d }
|
||||
}
|
||||
|
||||
fn is_round(sats: u64) -> bool {
|
||||
let log = (sats as f64).log10();
|
||||
let magnitude = 10.0_f64.powf(log.floor());
|
||||
let leading = (sats as f64 / magnitude).round();
|
||||
let round_val = leading * magnitude;
|
||||
(sats as f64 - round_val).abs() <= round_val * 0.001
|
||||
}
|
||||
|
||||
struct Stats {
|
||||
total_sq_err: f64,
|
||||
total_bias: f64,
|
||||
max_err: f64,
|
||||
total_blocks: u64,
|
||||
gt_5pct: u64,
|
||||
gt_10pct: u64,
|
||||
gt_20pct: u64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
total_sq_err: 0.0,
|
||||
total_bias: 0.0,
|
||||
max_err: 0.0,
|
||||
total_blocks: 0,
|
||||
gt_5pct: 0,
|
||||
gt_10pct: 0,
|
||||
gt_20pct: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, err: f64) {
|
||||
self.total_sq_err += err * err;
|
||||
self.total_bias += err;
|
||||
self.total_blocks += 1;
|
||||
let abs_err = err.abs();
|
||||
if abs_err > self.max_err {
|
||||
self.max_err = abs_err;
|
||||
}
|
||||
if abs_err > BINS_5PCT {
|
||||
self.gt_5pct += 1;
|
||||
}
|
||||
if abs_err > BINS_10PCT {
|
||||
self.gt_10pct += 1;
|
||||
}
|
||||
if abs_err > BINS_20PCT {
|
||||
self.gt_20pct += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn rmse_pct(&self) -> f64 {
|
||||
bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
|
||||
}
|
||||
|
||||
fn max_pct(&self) -> f64 {
|
||||
bins_to_pct(self.max_err)
|
||||
}
|
||||
|
||||
fn bias(&self) -> f64 {
|
||||
self.total_bias / self.total_blocks as f64
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let t0 = Instant::now();
|
||||
|
||||
let data_dir = std::env::var("BRK_DIR")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| {
|
||||
let home = std::env::var("HOME").unwrap();
|
||||
PathBuf::from(home).join(".brk")
|
||||
});
|
||||
|
||||
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
|
||||
let total_heights = indexer.vecs.blocks.timestamp.len();
|
||||
|
||||
let manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||
|
||||
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
|
||||
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
|
||||
.expect("Failed to read height_price_ohlc.json"),
|
||||
)
|
||||
.expect("Failed to parse height OHLC");
|
||||
|
||||
let height_bands: Vec<(f64, f64)> = height_ohlc
|
||||
.iter()
|
||||
.map(|ohlc| {
|
||||
let high = ohlc[1];
|
||||
let low = ohlc[2];
|
||||
if high > 0.0 && low > 0.0 {
|
||||
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
|
||||
} else {
|
||||
(0.0, 0.0)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Configs to compare.
|
||||
// 987654321
|
||||
let masks: &[(u16, &str)] = &[
|
||||
(0b0_0111_0111, "{1,2,3,5,6,7}"),
|
||||
(0b0_0011_0111, "{1,2,3,5,6}"),
|
||||
(0b0_0001_1111, "{1,2,3,4,5}"),
|
||||
(0b0_0001_0111, "{1,2,3,5}"),
|
||||
];
|
||||
|
||||
let start_heights: &[usize] = &[575_000, 600_000, 630_000];
|
||||
|
||||
// (mask_idx, start_idx) -> (Oracle, Stats)
|
||||
let n = masks.len() * start_heights.len();
|
||||
let mut oracles: Vec<Option<Oracle>> = (0..n).map(|_| None).collect();
|
||||
let mut stats: Vec<Stats> = (0..n).map(|_| Stats::new()).collect();
|
||||
|
||||
let idx = |m: usize, s: usize| -> usize { m * start_heights.len() + s };
|
||||
|
||||
let total_txs = indexer.vecs.transactions.txid.len();
|
||||
let total_outputs = indexer.vecs.outputs.value.len();
|
||||
|
||||
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
|
||||
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
|
||||
|
||||
let ref_config = Config::default();
|
||||
let earliest_start = *start_heights.iter().min().unwrap();
|
||||
|
||||
for h in earliest_start..total_heights {
|
||||
let ft = first_tx_index[h];
|
||||
let next_ft = first_tx_index
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxIndex::from(total_txs));
|
||||
|
||||
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
|
||||
indexer
|
||||
.vecs
|
||||
.transactions
|
||||
.first_txout_index
|
||||
.collect_one(ft + 1)
|
||||
.unwrap()
|
||||
.to_usize()
|
||||
} else {
|
||||
out_first
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize()
|
||||
};
|
||||
let out_end = out_first
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize();
|
||||
|
||||
let values: Vec<Sats> = indexer
|
||||
.vecs
|
||||
.outputs
|
||||
.value
|
||||
.collect_range_at(out_start, out_end);
|
||||
let output_types: Vec<OutputType> = indexer
|
||||
.vecs
|
||||
.outputs
|
||||
.output_type
|
||||
.collect_range_at(out_start, out_end);
|
||||
|
||||
// Build full histogram and per-digit histograms.
|
||||
let mut full_hist = Histogram::zeros();
|
||||
let mut digit_hist: [Histogram; 9] = std::array::from_fn(|_| Histogram::zeros());
|
||||
|
||||
for (sats, output_type) in values.into_iter().zip(output_types) {
|
||||
if ref_config.excluded_output_types.contains(&output_type) {
|
||||
continue;
|
||||
}
|
||||
if *sats < ref_config.min_sats {
|
||||
continue;
|
||||
}
|
||||
if let Some(bin) = sats_to_bin(sats) {
|
||||
full_hist.increment(bin);
|
||||
if is_round(*sats) {
|
||||
let d = leading_digit(*sats);
|
||||
if (1..=9).contains(&d) {
|
||||
digit_hist[(d - 1) as usize].increment(bin);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Feed each (mask, start_height) combo.
|
||||
for (mi, &(mask, _)) in masks.iter().enumerate() {
|
||||
// Build filtered histogram for this mask.
|
||||
let mut hist = full_hist.clone();
|
||||
(0..9usize).for_each(|d| {
|
||||
if mask & (1 << d) != 0 {
|
||||
for bin in 0..NUM_BINS {
|
||||
hist[bin] -= digit_hist[d][bin];
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
for (si, &sh) in start_heights.iter().enumerate() {
|
||||
if h < sh {
|
||||
continue;
|
||||
}
|
||||
let i = idx(mi, si);
|
||||
if oracles[i].is_none() {
|
||||
oracles[i] = Some(Oracle::new(
|
||||
seed_bin(sh),
|
||||
Config {
|
||||
exclude_common_round_values: false,
|
||||
..Default::default()
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
let ref_bin = oracles[i].as_mut().unwrap().process_histogram(&hist);
|
||||
|
||||
if h < height_bands.len() {
|
||||
let (high_bin, low_bin) = height_bands[h];
|
||||
if high_bin > 0.0 && low_bin > 0.0 {
|
||||
let err = if ref_bin < high_bin {
|
||||
ref_bin - high_bin
|
||||
} else if ref_bin > low_bin {
|
||||
ref_bin - low_bin
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
stats[i].update(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print results grouped by start height.
|
||||
for (si, &sh) in start_heights.iter().enumerate() {
|
||||
println!();
|
||||
println!("@ {}k:", sh / 1000);
|
||||
println!(
|
||||
" {:<16} {:>8} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
|
||||
"Digits", "Blocks", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
|
||||
);
|
||||
println!(" {}", "-".repeat(72));
|
||||
for (mi, &(_, label)) in masks.iter().enumerate() {
|
||||
let s = &stats[idx(mi, si)];
|
||||
println!(
|
||||
" {:<16} {:>8} {:>7.3}% {:>7.1}% {:>6} {:>6} {:>6} {:>+8.2}",
|
||||
label,
|
||||
s.total_blocks,
|
||||
s.rmse_pct(),
|
||||
s.max_pct(),
|
||||
s.gt_5pct,
|
||||
s.gt_10pct,
|
||||
s.gt_20pct,
|
||||
s.bias()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
println!("\nDone in {:.1}s", t0.elapsed().as_secs_f64());
|
||||
}
|
||||
@@ -1,10 +1,12 @@
|
||||
//! Verify oracle determinism: oracles started from different heights converge
|
||||
//! to identical ref_bin values after the ring buffer fills.
|
||||
//! Verify the production restart property: an oracle restored via
|
||||
//! `from_checkpoint` (seeded from the previous block's stored cents price,
|
||||
//! replayed over the last `window_size` blocks) produces bit-exact `ref_bin`
|
||||
//! values matching a continuously-running oracle from the restart height
|
||||
//! onward.
|
||||
//!
|
||||
//! Creates a reference oracle at height 575k and test oracles every 1000 blocks
|
||||
//! up to 630k. After window_size blocks, each test oracle should produce the
|
||||
//! same ref_bin as the reference, proving the truncated EMA provides
|
||||
//! start-point independence.
|
||||
//! Mirrors the production filter exactly (per-tx OP_RETURN drop + per-output
|
||||
//! `default_eligible_bin`), so it exercises the same code path
|
||||
//! `brk_computer::prices::compute::feed_blocks` uses at runtime.
|
||||
//!
|
||||
//! Run with: cargo run -p brk_oracle --example determinism --release
|
||||
|
||||
@@ -12,26 +14,49 @@ use std::path::PathBuf;
|
||||
|
||||
use brk_indexer::Indexer;
|
||||
use brk_oracle::{
|
||||
Config, Histogram, Oracle, PRICES, START_HEIGHT, cents_to_bin, default_eligible_bin,
|
||||
Config, Histogram, Oracle, PRICES, START_HEIGHT, bin_to_cents, cents_to_bin,
|
||||
default_eligible_bin,
|
||||
};
|
||||
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
|
||||
use vecdb::{AnyVec, ReadableVec, VecIndex};
|
||||
|
||||
fn seed_bin(height: usize) -> f64 {
|
||||
fn seed_bin_for_start_height() -> f64 {
|
||||
let price: f64 = PRICES
|
||||
.lines()
|
||||
.nth(height - 1)
|
||||
.expect("prices.txt too short")
|
||||
.nth(START_HEIGHT - 1)
|
||||
.expect("prices.txt too short for START_HEIGHT")
|
||||
.parse()
|
||||
.expect("Failed to parse seed price");
|
||||
cents_to_bin(price * 100.0)
|
||||
}
|
||||
|
||||
struct TestRun {
|
||||
start_height: usize,
|
||||
oracle: Option<Oracle>,
|
||||
converged_at: Option<usize>,
|
||||
diverged_after: bool,
|
||||
struct Block {
|
||||
values: Vec<Sats>,
|
||||
output_types: Vec<OutputType>,
|
||||
tx_starts: Vec<usize>,
|
||||
out_start: usize,
|
||||
out_end: usize,
|
||||
}
|
||||
|
||||
fn build_histogram(block: &Block) -> Histogram {
|
||||
let mut hist = Histogram::zeros();
|
||||
for tx in 0..block.tx_starts.len() {
|
||||
let lo = block.tx_starts[tx] - block.out_start;
|
||||
let hi = block
|
||||
.tx_starts
|
||||
.get(tx + 1)
|
||||
.map(|s| s - block.out_start)
|
||||
.unwrap_or(block.out_end - block.out_start);
|
||||
if block.output_types[lo..hi].contains(&OutputType::OpReturn) {
|
||||
continue;
|
||||
}
|
||||
for i in lo..hi {
|
||||
if let Some(bin) = default_eligible_bin(block.values[i], block.output_types[i]) {
|
||||
hist.increment(bin as usize);
|
||||
}
|
||||
}
|
||||
}
|
||||
hist
|
||||
}
|
||||
|
||||
fn main() {
|
||||
@@ -48,59 +73,50 @@ fn main() {
|
||||
let config = Config::default();
|
||||
let window_size = config.window_size;
|
||||
|
||||
let restart_offset = 1000;
|
||||
let end_offset = restart_offset + window_size * 4;
|
||||
let end_height = (START_HEIGHT + end_offset).min(total_heights);
|
||||
let restart_at = START_HEIGHT + restart_offset;
|
||||
let warmup_start = restart_at - window_size;
|
||||
|
||||
assert!(
|
||||
end_height > restart_at,
|
||||
"indexer has {total_heights} blocks; need at least {} to test restart at {restart_at}",
|
||||
restart_at + 1
|
||||
);
|
||||
|
||||
println!(
|
||||
"Loading {} blocks ({START_HEIGHT}..{end_height})...",
|
||||
end_height - START_HEIGHT
|
||||
);
|
||||
let total_txs = indexer.vecs.transactions.txid.len();
|
||||
let total_outputs = indexer.vecs.outputs.value.len();
|
||||
|
||||
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
|
||||
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
|
||||
let mut txout_cursor = indexer.vecs.transactions.first_txout_index.cursor();
|
||||
|
||||
// Reference oracle at 575k.
|
||||
let ref_start = START_HEIGHT;
|
||||
let mut ref_oracle = Oracle::new(seed_bin(ref_start), Config::default());
|
||||
|
||||
// Test oracles every 1000 blocks from 576k to 630k.
|
||||
let mut runs: Vec<TestRun> = (576_000..=630_000)
|
||||
.step_by(1000)
|
||||
.map(|h| TestRun {
|
||||
start_height: h,
|
||||
oracle: None,
|
||||
converged_at: None,
|
||||
diverged_after: false,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let last_start = runs.last().map(|r| r.start_height).unwrap_or(ref_start);
|
||||
// Process enough blocks for all oracles to converge + verification margin.
|
||||
let end_height = (last_start + window_size + 100).min(total_heights);
|
||||
|
||||
let mut blocks: Vec<Block> = Vec::with_capacity(end_height - START_HEIGHT);
|
||||
for h in START_HEIGHT..end_height {
|
||||
let ft = first_tx_index[h];
|
||||
let next_ft = first_tx_index
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxIndex::from(total_txs));
|
||||
|
||||
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
|
||||
indexer
|
||||
.vecs
|
||||
.transactions
|
||||
.first_txout_index
|
||||
.collect_one(ft + 1)
|
||||
.unwrap()
|
||||
.to_usize()
|
||||
} else {
|
||||
out_first
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize()
|
||||
};
|
||||
let block_first_tx = ft.to_usize() + 1;
|
||||
let tx_count = next_ft.to_usize() - block_first_tx;
|
||||
let out_end = out_first
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize();
|
||||
|
||||
txout_cursor.advance(block_first_tx - txout_cursor.position());
|
||||
let mut tx_starts: Vec<usize> = Vec::with_capacity(tx_count);
|
||||
for _ in 0..tx_count {
|
||||
tx_starts.push(txout_cursor.next().unwrap().to_usize());
|
||||
}
|
||||
let out_start = tx_starts.first().copied().unwrap_or(out_end);
|
||||
|
||||
let values: Vec<Sats> = indexer
|
||||
.vecs
|
||||
.outputs
|
||||
@@ -112,95 +128,76 @@ fn main() {
|
||||
.output_type
|
||||
.collect_range_at(out_start, out_end);
|
||||
|
||||
let mut hist = Histogram::zeros();
|
||||
for (sats, output_type) in values.into_iter().zip(output_types) {
|
||||
if let Some(bin) = default_eligible_bin(sats, output_type) {
|
||||
hist.increment(bin as usize);
|
||||
}
|
||||
}
|
||||
|
||||
let ref_bin = ref_oracle.process_histogram(&hist);
|
||||
|
||||
for run in &mut runs {
|
||||
if h < run.start_height {
|
||||
continue;
|
||||
}
|
||||
if run.oracle.is_none() {
|
||||
run.oracle = Some(Oracle::new(seed_bin(run.start_height), Config::default()));
|
||||
}
|
||||
let test_bin = run.oracle.as_mut().unwrap().process_histogram(&hist);
|
||||
|
||||
if run.converged_at.is_some() {
|
||||
if test_bin != ref_bin {
|
||||
run.diverged_after = true;
|
||||
}
|
||||
} else if test_bin == ref_bin {
|
||||
run.converged_at = Some(h);
|
||||
}
|
||||
}
|
||||
blocks.push(Block {
|
||||
values,
|
||||
output_types,
|
||||
tx_starts,
|
||||
out_start,
|
||||
out_end,
|
||||
});
|
||||
}
|
||||
|
||||
// Print results.
|
||||
println!();
|
||||
println!("{:<12} {:>16} {:>8}", "Start", "Converged at", "Blocks");
|
||||
println!("{}", "-".repeat(40));
|
||||
let mut continuous = Oracle::new(seed_bin_for_start_height(), config.clone());
|
||||
let continuous_bins: Vec<f64> = blocks
|
||||
.iter()
|
||||
.map(|b| continuous.process_histogram(&build_histogram(b)))
|
||||
.collect();
|
||||
println!("Continuous oracle: {} blocks processed", continuous_bins.len());
|
||||
|
||||
let mut max_blocks = 0usize;
|
||||
let mut failed = Vec::new();
|
||||
let mut diverged = Vec::new();
|
||||
|
||||
for run in &runs {
|
||||
if let Some(converged) = run.converged_at {
|
||||
let blocks = converged - run.start_height;
|
||||
if blocks > max_blocks {
|
||||
max_blocks = blocks;
|
||||
}
|
||||
println!("{:<12} {:>16} {:>8}", run.start_height, converged, blocks);
|
||||
if run.diverged_after {
|
||||
diverged.push(run.start_height);
|
||||
}
|
||||
} else {
|
||||
println!("{:<12} {:>16} {:>8}", run.start_height, "NEVER", "-");
|
||||
failed.push(run.start_height);
|
||||
}
|
||||
}
|
||||
|
||||
println!();
|
||||
let prev_bin = continuous_bins[restart_at - START_HEIGHT - 1];
|
||||
let seed_bin = cents_to_bin(bin_to_cents(prev_bin) as f64);
|
||||
println!(
|
||||
"{}/{} converged, max {} blocks to converge (window_size={})",
|
||||
runs.len() - failed.len(),
|
||||
runs.len(),
|
||||
max_blocks,
|
||||
window_size,
|
||||
"Restart at {restart_at}: prev_bin={prev_bin:.4} -> cents -> seed_bin={seed_bin:.4} (delta {:.6})",
|
||||
seed_bin - prev_bin
|
||||
);
|
||||
|
||||
if !diverged.is_empty() {
|
||||
println!("DIVERGED after convergence: {:?}", diverged);
|
||||
}
|
||||
if !failed.is_empty() {
|
||||
println!("NEVER converged: {:?}", failed);
|
||||
let warmup_slice = &blocks[warmup_start - START_HEIGHT..restart_at - START_HEIGHT];
|
||||
let mut restored = Oracle::from_checkpoint(seed_bin, config.clone(), |o| {
|
||||
for b in warmup_slice {
|
||||
o.process_histogram(&build_histogram(b));
|
||||
}
|
||||
});
|
||||
|
||||
let restored_bins: Vec<f64> = blocks[restart_at - START_HEIGHT..]
|
||||
.iter()
|
||||
.map(|b| restored.process_histogram(&build_histogram(b)))
|
||||
.collect();
|
||||
println!("Restored oracle: {} blocks processed", restored_bins.len());
|
||||
|
||||
let mut mismatches: Vec<(usize, f64, f64)> = Vec::new();
|
||||
for (i, &r) in restored_bins.iter().enumerate() {
|
||||
let c = continuous_bins[restart_at - START_HEIGHT + i];
|
||||
if r != c {
|
||||
mismatches.push((restart_at + i, c, r));
|
||||
}
|
||||
}
|
||||
|
||||
// Assertions.
|
||||
assert!(
|
||||
failed.is_empty(),
|
||||
"{} oracles never converged: {:?}",
|
||||
failed.len(),
|
||||
failed
|
||||
);
|
||||
assert!(
|
||||
diverged.is_empty(),
|
||||
"{} oracles diverged after convergence: {:?}",
|
||||
diverged.len(),
|
||||
diverged
|
||||
);
|
||||
assert!(
|
||||
max_blocks <= window_size * 2,
|
||||
"Convergence took {} blocks, expected <= {} (2 * window_size)",
|
||||
max_blocks,
|
||||
window_size * 2
|
||||
println!();
|
||||
if mismatches.is_empty() {
|
||||
println!(
|
||||
"All {} blocks from {restart_at} onward match exactly.",
|
||||
restored_bins.len()
|
||||
);
|
||||
} else {
|
||||
println!(
|
||||
"{} of {} blocks differ (showing up to 5):",
|
||||
mismatches.len(),
|
||||
restored_bins.len()
|
||||
);
|
||||
for (h, c, r) in mismatches.iter().take(5) {
|
||||
println!(
|
||||
" h={h}: continuous={c:.6}, restored={r:.6}, delta={:.6}",
|
||||
r - c
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
mismatches.len(),
|
||||
0,
|
||||
"restored oracle diverged from continuous oracle"
|
||||
);
|
||||
|
||||
println!();
|
||||
println!("All assertions passed!");
|
||||
println!("Assertion passed: from_checkpoint restart is bit-exact.");
|
||||
}
|
||||
|
||||
@@ -1,272 +0,0 @@
|
||||
//! Diagnostic: sweep oracle start heights and clamp-top-N strategies.
|
||||
//!
|
||||
//! Run with: cargo run -p brk_oracle --example noise --release
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use brk_indexer::Indexer;
|
||||
use brk_oracle::{Config, Histogram, Oracle, PRICES, cents_to_bin, default_eligible_bin};
|
||||
use brk_types::{Sats, TxIndex, TxOutIndex};
|
||||
use vecdb::{AnyVec, ReadableVec, VecIndex};
|
||||
|
||||
const BINS_5PCT: f64 = 4.24;
|
||||
const BINS_10PCT: f64 = 8.28;
|
||||
const BINS_20PCT: f64 = 15.84;
|
||||
const BPD: f64 = 200.0;
|
||||
|
||||
fn bins_to_pct(bins: f64) -> f64 {
|
||||
(10.0_f64.powf(bins / BPD) - 1.0) * 100.0
|
||||
}
|
||||
|
||||
fn seed_bin(start_height: usize) -> f64 {
|
||||
let price: f64 = PRICES
|
||||
.lines()
|
||||
.nth(start_height - 1)
|
||||
.expect("prices.txt too short")
|
||||
.parse()
|
||||
.expect("Failed to parse seed price");
|
||||
cents_to_bin(price * 100.0)
|
||||
}
|
||||
|
||||
/// Clamp the top N bins in `src` down to the (N+1)th highest value, writing into `dst`.
|
||||
fn clamp_top_n(src: &Histogram, dst: &mut Histogram, n: usize) {
|
||||
let mut top: Vec<u32> = src.iter().copied().filter(|&v| v > 0).collect();
|
||||
top.sort_unstable_by(|a, b| b.cmp(a));
|
||||
let clamp_to = if top.len() > n { top[n] } else { 0 };
|
||||
|
||||
for (i, &v) in src.iter().enumerate() {
|
||||
dst[i] = v.min(clamp_to.max(v.min(clamp_to)));
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let t0 = Instant::now();
|
||||
|
||||
let data_dir = std::env::var("BRK_DIR")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| {
|
||||
let home = std::env::var("HOME").unwrap();
|
||||
PathBuf::from(home).join(".brk")
|
||||
});
|
||||
|
||||
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
|
||||
let total_heights = indexer.vecs.blocks.timestamp.len();
|
||||
|
||||
let manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||
|
||||
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
|
||||
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
|
||||
.expect("Failed to read height_price_ohlc.json"),
|
||||
)
|
||||
.expect("Failed to parse height OHLC");
|
||||
|
||||
let height_bands: Vec<(f64, f64)> = height_ohlc
|
||||
.iter()
|
||||
.map(|ohlc| {
|
||||
let high = ohlc[1];
|
||||
let low = ohlc[2];
|
||||
if high > 0.0 && low > 0.0 {
|
||||
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
|
||||
} else {
|
||||
(0.0, 0.0)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Start heights: 630k, 600k, 575k, then 570k down to 500k by 5k.
|
||||
let mut start_heights: Vec<usize> = vec![630_000, 600_000, 575_000];
|
||||
let mut h = 570_000;
|
||||
while h >= 500_000 {
|
||||
start_heights.push(h);
|
||||
h -= 5_000;
|
||||
}
|
||||
let lowest = *start_heights.iter().min().unwrap();
|
||||
|
||||
// Clamp-top-N values to test: 0 (no clamp), 2, 3, 5, 10.
|
||||
let clamp_values: Vec<usize> = vec![0, 2, 3, 5, 10];
|
||||
|
||||
// Build per-block RAW histograms from the lowest start height.
|
||||
eprintln!("Building histograms from height {}...", lowest);
|
||||
|
||||
let total_txs = indexer.vecs.transactions.txid.len();
|
||||
let total_outputs = indexer.vecs.outputs.value.len();
|
||||
|
||||
let first_txout_index_reader = indexer.vecs.transactions.first_txout_index.reader();
|
||||
let value_reader = indexer.vecs.outputs.value.reader();
|
||||
let output_type_reader = indexer.vecs.outputs.output_type.reader();
|
||||
|
||||
let config = Config::default();
|
||||
let total_blocks = total_heights - lowest;
|
||||
|
||||
struct BlockData {
|
||||
hist: Histogram,
|
||||
high_bin: f64,
|
||||
low_bin: f64,
|
||||
}
|
||||
|
||||
let mut blocks: Vec<BlockData> = Vec::with_capacity(total_blocks);
|
||||
|
||||
for h in lowest..total_heights {
|
||||
let first_tx_index: TxIndex = indexer
|
||||
.vecs
|
||||
.transactions
|
||||
.first_tx_index
|
||||
.collect_one_at(h)
|
||||
.unwrap();
|
||||
let next_first_tx_index: TxIndex = indexer
|
||||
.vecs
|
||||
.transactions
|
||||
.first_tx_index
|
||||
.collect_one_at(h + 1)
|
||||
.unwrap_or(TxIndex::from(total_txs));
|
||||
|
||||
let out_start = if first_tx_index.to_usize() + 1 < next_first_tx_index.to_usize() {
|
||||
first_txout_index_reader
|
||||
.get(first_tx_index.to_usize() + 1)
|
||||
.to_usize()
|
||||
} else {
|
||||
indexer
|
||||
.vecs
|
||||
.outputs
|
||||
.first_txout_index
|
||||
.collect_one_at(h + 1)
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize()
|
||||
};
|
||||
let out_end: usize = indexer
|
||||
.vecs
|
||||
.outputs
|
||||
.first_txout_index
|
||||
.collect_one_at(h + 1)
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize();
|
||||
|
||||
let mut hist = Histogram::zeros();
|
||||
for i in out_start..out_end {
|
||||
let sats: Sats = value_reader.get(i);
|
||||
let output_type = output_type_reader.get(i);
|
||||
if let Some(bin) = default_eligible_bin(sats, output_type) {
|
||||
hist.increment(bin as usize);
|
||||
}
|
||||
}
|
||||
|
||||
let (high_bin, low_bin) = if h < height_bands.len() {
|
||||
height_bands[h]
|
||||
} else {
|
||||
(0.0, 0.0)
|
||||
};
|
||||
|
||||
blocks.push(BlockData {
|
||||
hist,
|
||||
high_bin,
|
||||
low_bin,
|
||||
});
|
||||
|
||||
if (h - lowest).is_multiple_of(50_000) {
|
||||
eprint!(
|
||||
"\r {}/{} ({:.0}%)",
|
||||
h - lowest,
|
||||
total_blocks,
|
||||
(h - lowest) as f64 / total_blocks as f64 * 100.0
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!(
|
||||
"\r {} blocks built in {:.1}s",
|
||||
blocks.len(),
|
||||
t0.elapsed().as_secs_f64()
|
||||
);
|
||||
|
||||
// For each clamp value, run all start heights.
|
||||
for &clamp_n in &clamp_values {
|
||||
println!();
|
||||
let label = if clamp_n == 0 {
|
||||
"no clamp".to_string()
|
||||
} else {
|
||||
format!("clamp top {}", clamp_n)
|
||||
};
|
||||
println!("=== {} ===", label);
|
||||
println!(
|
||||
"{:>8} {:>8} {:>8} {:>8} {:>6} {:>6} {:>6} {:>8}",
|
||||
"Start", "Blocks", "RMSE%", "Worst%", ">5%", ">10%", ">20%", "Worst@"
|
||||
);
|
||||
println!("{}", "-".repeat(72));
|
||||
|
||||
for &start_height in &start_heights {
|
||||
let mut oracle = Oracle::new(seed_bin(start_height), config.clone());
|
||||
let block_offset = start_height - lowest;
|
||||
|
||||
let mut worst_err: f64 = 0.0;
|
||||
let mut worst_height: usize = 0;
|
||||
let mut gt_5: u64 = 0;
|
||||
let mut gt_10: u64 = 0;
|
||||
let mut gt_20: u64 = 0;
|
||||
let mut total_sq_err: f64 = 0.0;
|
||||
let mut total_measured: u64 = 0;
|
||||
|
||||
let mut clamped_hist = Histogram::zeros();
|
||||
for (i, bd) in blocks[block_offset..].iter().enumerate() {
|
||||
if clamp_n > 0 {
|
||||
clamp_top_n(&bd.hist, &mut clamped_hist, clamp_n);
|
||||
oracle.process_histogram(&clamped_hist);
|
||||
} else {
|
||||
oracle.process_histogram(&bd.hist);
|
||||
}
|
||||
|
||||
let height = start_height + i;
|
||||
let ref_bin = oracle.ref_bin();
|
||||
|
||||
if bd.high_bin <= 0.0 || bd.low_bin <= 0.0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let err = if ref_bin < bd.high_bin {
|
||||
ref_bin - bd.high_bin
|
||||
} else if ref_bin > bd.low_bin {
|
||||
ref_bin - bd.low_bin
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
total_measured += 1;
|
||||
total_sq_err += err * err;
|
||||
let abs_err = err.abs();
|
||||
if abs_err > BINS_5PCT {
|
||||
gt_5 += 1;
|
||||
}
|
||||
if abs_err > BINS_10PCT {
|
||||
gt_10 += 1;
|
||||
}
|
||||
if abs_err > BINS_20PCT {
|
||||
gt_20 += 1;
|
||||
}
|
||||
if abs_err > worst_err {
|
||||
worst_err = abs_err;
|
||||
worst_height = height;
|
||||
}
|
||||
}
|
||||
|
||||
let rmse = if total_measured > 0 {
|
||||
bins_to_pct((total_sq_err / total_measured as f64).sqrt())
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
println!(
|
||||
"{:>8} {:>8} {:>7.3}% {:>7.1}% {:>6} {:>6} {:>6} {}",
|
||||
format!("{}k", start_height / 1000),
|
||||
total_measured,
|
||||
rmse,
|
||||
bins_to_pct(worst_err),
|
||||
gt_5,
|
||||
gt_10,
|
||||
gt_20,
|
||||
worst_height,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
println!("\nTotal time: {:.1}s", t0.elapsed().as_secs_f64());
|
||||
}
|
||||
@@ -1,416 +0,0 @@
|
||||
//! Sweep round-value digit filter to find optimal configuration.
|
||||
//!
|
||||
//! Tests all 512 subsets of leading digits {1,...,9} to find which
|
||||
//! digits to filter out for best oracle accuracy.
|
||||
//!
|
||||
//! Phase 1: single pass over indexer, precompute per-block histograms.
|
||||
//! Phase 2: run 512 configs in parallel across CPU cores.
|
||||
//!
|
||||
//! Run with: cargo run -p brk_oracle --example sweep_digits --release
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use brk_indexer::Indexer;
|
||||
use brk_oracle::{Config, Histogram, Oracle, PRICES, cents_to_bin, sats_to_bin};
|
||||
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
|
||||
use vecdb::{AnyVec, ReadableVec, VecIndex};
|
||||
|
||||
const BINS_5PCT: f64 = 4.24;
|
||||
const BINS_10PCT: f64 = 8.28;
|
||||
const BINS_20PCT: f64 = 15.84;
|
||||
|
||||
fn bins_to_pct(bins: f64) -> f64 {
|
||||
(10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
|
||||
}
|
||||
|
||||
fn seed_bin(start_height: usize) -> f64 {
|
||||
let price: f64 = PRICES
|
||||
.lines()
|
||||
.nth(start_height - 1)
|
||||
.expect("prices.txt too short")
|
||||
.parse()
|
||||
.expect("Failed to parse seed price");
|
||||
cents_to_bin(price * 100.0)
|
||||
}
|
||||
|
||||
fn leading_digit(sats: u64) -> u8 {
|
||||
let log = (sats as f64).log10();
|
||||
let magnitude = 10.0_f64.powf(log.floor());
|
||||
let d = (sats as f64 / magnitude).round() as u8;
|
||||
if d >= 10 { 1 } else { d }
|
||||
}
|
||||
|
||||
fn is_round(sats: u64) -> bool {
|
||||
let log = (sats as f64).log10();
|
||||
let magnitude = 10.0_f64.powf(log.floor());
|
||||
let leading = (sats as f64 / magnitude).round();
|
||||
let round_val = leading * magnitude;
|
||||
(sats as f64 - round_val).abs() <= round_val * 0.001
|
||||
}
|
||||
|
||||
fn mask_label(mask: u16) -> String {
|
||||
let digits: String = (1..=9u8)
|
||||
.filter(|&d| mask & (1 << (d - 1)) != 0)
|
||||
.map(|d| char::from_digit(d as u32, 10).unwrap())
|
||||
.collect();
|
||||
if digits.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
digits
|
||||
}
|
||||
}
|
||||
|
||||
struct Stats {
|
||||
total_sq_err: f64,
|
||||
total_bias: f64,
|
||||
max_err: f64,
|
||||
total_blocks: u64,
|
||||
gt_5pct: u64,
|
||||
gt_10pct: u64,
|
||||
gt_20pct: u64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
total_sq_err: 0.0,
|
||||
total_bias: 0.0,
|
||||
max_err: 0.0,
|
||||
total_blocks: 0,
|
||||
gt_5pct: 0,
|
||||
gt_10pct: 0,
|
||||
gt_20pct: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, err: f64) {
|
||||
self.total_sq_err += err * err;
|
||||
self.total_bias += err;
|
||||
self.total_blocks += 1;
|
||||
let abs_err = err.abs();
|
||||
if abs_err > self.max_err {
|
||||
self.max_err = abs_err;
|
||||
}
|
||||
if abs_err > BINS_5PCT {
|
||||
self.gt_5pct += 1;
|
||||
}
|
||||
if abs_err > BINS_10PCT {
|
||||
self.gt_10pct += 1;
|
||||
}
|
||||
if abs_err > BINS_20PCT {
|
||||
self.gt_20pct += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn rmse_pct(&self) -> f64 {
|
||||
bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
|
||||
}
|
||||
|
||||
fn max_pct(&self) -> f64 {
|
||||
bins_to_pct(self.max_err)
|
||||
}
|
||||
|
||||
fn bias(&self) -> f64 {
|
||||
self.total_bias / self.total_blocks as f64
|
||||
}
|
||||
}
|
||||
|
||||
struct BlockData {
|
||||
full_hist: Histogram,
|
||||
/// (bin_index, leading_digit) for outputs that are round values.
|
||||
round_outputs: Vec<(u16, u8)>,
|
||||
high_bin: f64,
|
||||
low_bin: f64,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let t0 = Instant::now();
|
||||
|
||||
let data_dir = std::env::var("BRK_DIR")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| {
|
||||
let home = std::env::var("HOME").unwrap();
|
||||
PathBuf::from(home).join(".brk")
|
||||
});
|
||||
|
||||
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
|
||||
let total_heights = indexer.vecs.blocks.timestamp.len();
|
||||
|
||||
let manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||
|
||||
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
|
||||
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
|
||||
.expect("Failed to read height_price_ohlc.json"),
|
||||
)
|
||||
.expect("Failed to parse height OHLC");
|
||||
|
||||
let height_bands: Vec<(f64, f64)> = height_ohlc
|
||||
.iter()
|
||||
.map(|ohlc| {
|
||||
let high = ohlc[1];
|
||||
let low = ohlc[2];
|
||||
if high > 0.0 && low > 0.0 {
|
||||
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
|
||||
} else {
|
||||
(0.0, 0.0)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let sweep_start: usize = 575_000;
|
||||
|
||||
// Phase 1: precompute per-block data in a single pass over the indexer.
|
||||
eprintln!("Phase 1: precomputing block data...");
|
||||
|
||||
let total_txs = indexer.vecs.transactions.txid.len();
|
||||
let total_outputs = indexer.vecs.outputs.value.len();
|
||||
|
||||
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
|
||||
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
|
||||
|
||||
let ref_config = Config::default();
|
||||
let total_blocks = total_heights - sweep_start;
|
||||
let mut blocks: Vec<BlockData> = Vec::with_capacity(total_blocks);
|
||||
|
||||
for h in sweep_start..total_heights {
|
||||
let ft = first_tx_index[h];
|
||||
let next_ft = first_tx_index
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxIndex::from(total_txs));
|
||||
|
||||
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
|
||||
indexer
|
||||
.vecs
|
||||
.transactions
|
||||
.first_txout_index
|
||||
.collect_one(ft + 1)
|
||||
.unwrap()
|
||||
.to_usize()
|
||||
} else {
|
||||
out_first
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize()
|
||||
};
|
||||
let out_end = out_first
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize();
|
||||
|
||||
let values: Vec<Sats> = indexer
|
||||
.vecs
|
||||
.outputs
|
||||
.value
|
||||
.collect_range_at(out_start, out_end);
|
||||
let output_types: Vec<OutputType> = indexer
|
||||
.vecs
|
||||
.outputs
|
||||
.output_type
|
||||
.collect_range_at(out_start, out_end);
|
||||
|
||||
let mut full_hist = Histogram::zeros();
|
||||
let mut round_outputs = Vec::new();
|
||||
|
||||
for (sats, output_type) in values.into_iter().zip(output_types) {
|
||||
if ref_config.excluded_output_types.contains(&output_type) {
|
||||
continue;
|
||||
}
|
||||
if *sats < ref_config.min_sats {
|
||||
continue;
|
||||
}
|
||||
if let Some(bin) = sats_to_bin(sats) {
|
||||
full_hist.increment(bin);
|
||||
if is_round(*sats) {
|
||||
let d = leading_digit(*sats);
|
||||
if (1..=9).contains(&d) {
|
||||
round_outputs.push((bin as u16, d));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (high_bin, low_bin) = if h < height_bands.len() {
|
||||
height_bands[h]
|
||||
} else {
|
||||
(0.0, 0.0)
|
||||
};
|
||||
|
||||
blocks.push(BlockData {
|
||||
full_hist,
|
||||
round_outputs,
|
||||
high_bin,
|
||||
low_bin,
|
||||
});
|
||||
|
||||
if (h - sweep_start).is_multiple_of(50_000) {
|
||||
eprint!(
|
||||
"\r {}/{} ({:.0}%)",
|
||||
h - sweep_start,
|
||||
total_blocks,
|
||||
(h - sweep_start) as f64 / total_blocks as f64 * 100.0
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let mem_hists = blocks.len() * std::mem::size_of::<Histogram>();
|
||||
let mem_rounds: usize = blocks.iter().map(|b| b.round_outputs.len() * 3).sum();
|
||||
eprintln!(
|
||||
"\r {} blocks precomputed ({:.1} GB hists + {:.0} MB rounds) in {:.1}s",
|
||||
blocks.len(),
|
||||
mem_hists as f64 / 1e9,
|
||||
mem_rounds as f64 / 1e6,
|
||||
t0.elapsed().as_secs_f64()
|
||||
);
|
||||
|
||||
// Phase 2: sweep digit masks in parallel.
|
||||
// Always filter digit 1 (powers of 10), sweep digits 2-9.
|
||||
let base_mask: u16 = 1 << 0; // digit 1 always on
|
||||
let num_masks: usize = 256; // 2^8 subsets of {2,...,9}
|
||||
let num_threads = std::thread::available_parallelism()
|
||||
.map(|n| n.get())
|
||||
.unwrap_or(8);
|
||||
eprintln!(
|
||||
"Phase 2: sweeping {} masks across {} threads...",
|
||||
num_masks, num_threads
|
||||
);
|
||||
|
||||
let t1 = Instant::now();
|
||||
let blocks = &blocks; // shared reference for threads
|
||||
|
||||
let all_results: Vec<(u16, Stats)> = std::thread::scope(|s| {
|
||||
let masks_per_thread = num_masks.div_ceil(num_threads);
|
||||
|
||||
let handles: Vec<_> = (0..num_threads)
|
||||
.map(|t| {
|
||||
s.spawn(move || {
|
||||
let mask_start = t * masks_per_thread;
|
||||
let mask_end = ((t + 1) * masks_per_thread).min(num_masks);
|
||||
let mut results = Vec::with_capacity(mask_end - mask_start);
|
||||
|
||||
for idx in mask_start..mask_end {
|
||||
// Shift idx bits into positions 1-8 (digits 2-9) and add base_mask (digit 1).
|
||||
let mask = base_mask | ((idx as u16) << 1);
|
||||
let mut oracle = Oracle::new(
|
||||
seed_bin(sweep_start),
|
||||
Config {
|
||||
exclude_common_round_values: false,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
let mut stats = Stats::new();
|
||||
|
||||
for bd in blocks.iter() {
|
||||
let mut hist = bd.full_hist.clone();
|
||||
for &(bin, digit) in &bd.round_outputs {
|
||||
if mask & (1 << (digit - 1)) != 0 {
|
||||
hist[bin as usize] -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
let ref_bin = oracle.process_histogram(&hist);
|
||||
|
||||
if bd.high_bin > 0.0 && bd.low_bin > 0.0 {
|
||||
let err = if ref_bin < bd.high_bin {
|
||||
ref_bin - bd.high_bin
|
||||
} else if ref_bin > bd.low_bin {
|
||||
ref_bin - bd.low_bin
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
stats.update(err);
|
||||
}
|
||||
}
|
||||
|
||||
results.push((mask, stats));
|
||||
}
|
||||
|
||||
results
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
handles
|
||||
.into_iter()
|
||||
.flat_map(|h| h.join().unwrap())
|
||||
.collect()
|
||||
});
|
||||
|
||||
eprintln!(" Done in {:.1}s.", t1.elapsed().as_secs_f64());
|
||||
|
||||
// Sort by RMSE.
|
||||
let mut results: Vec<&(u16, Stats)> = all_results.iter().collect();
|
||||
results.sort_by(|a, b| a.1.rmse_pct().partial_cmp(&b.1.rmse_pct()).unwrap());
|
||||
|
||||
// Print top 20.
|
||||
println!();
|
||||
println!("Top 20 (by RMSE):");
|
||||
println!(
|
||||
"{:>4} {:>12} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
|
||||
"#", "Digits", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
|
||||
);
|
||||
println!("{}", "-".repeat(70));
|
||||
for (rank, (mask, s)) in results.iter().take(20).enumerate() {
|
||||
println!(
|
||||
"{:>4} {:>12} {:>8.3}% {:>8.1}% {:>6} {:>6} {:>6} {:>+8.2}",
|
||||
rank + 1,
|
||||
mask_label(*mask),
|
||||
s.rmse_pct(),
|
||||
s.max_pct(),
|
||||
s.gt_5pct,
|
||||
s.gt_10pct,
|
||||
s.gt_20pct,
|
||||
s.bias()
|
||||
);
|
||||
}
|
||||
|
||||
// Print bottom 5.
|
||||
println!();
|
||||
println!("Bottom 5 (worst):");
|
||||
println!(
|
||||
"{:>4} {:>12} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
|
||||
"#", "Digits", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
|
||||
);
|
||||
println!("{}", "-".repeat(70));
|
||||
for (mask, s) in results.iter().rev().take(5) {
|
||||
println!(
|
||||
"{:>4} {:>12} {:>8.3}% {:>8.1}% {:>6} {:>6} {:>6} {:>+8.2}",
|
||||
"",
|
||||
mask_label(*mask),
|
||||
s.rmse_pct(),
|
||||
s.max_pct(),
|
||||
s.gt_5pct,
|
||||
s.gt_10pct,
|
||||
s.gt_20pct,
|
||||
s.bias()
|
||||
);
|
||||
}
|
||||
|
||||
// Print current config {1,2,3,5} for reference.
|
||||
let current_mask: u16 = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 4); // digits 1,2,3,5
|
||||
let current_stats = all_results
|
||||
.iter()
|
||||
.find(|(m, _)| *m == current_mask)
|
||||
.map(|(_, s)| s)
|
||||
.unwrap();
|
||||
let current_rank = results
|
||||
.iter()
|
||||
.position(|(m, _)| *m == current_mask)
|
||||
.unwrap();
|
||||
println!();
|
||||
println!(
|
||||
"Current {{1,2,3,5}} = rank {}/{}: RMSE {:.3}%, Max {:.1}%, >5%: {}, >10%: {}, >20%: {}",
|
||||
current_rank + 1,
|
||||
num_masks,
|
||||
current_stats.rmse_pct(),
|
||||
current_stats.max_pct(),
|
||||
current_stats.gt_5pct,
|
||||
current_stats.gt_10pct,
|
||||
current_stats.gt_20pct,
|
||||
);
|
||||
|
||||
println!("\nTotal time: {:.1}s", t0.elapsed().as_secs_f64());
|
||||
}
|
||||
@@ -1,452 +0,0 @@
|
||||
//! Sweep round-value tolerance to find optimal rounding threshold.
|
||||
//!
|
||||
//! Tests different tolerance percentages (0%, 0.01%, 0.1%, 1%, etc.) for
|
||||
//! detecting round BTC amounts, combined with several digit filter masks.
|
||||
//!
|
||||
//! Phase 1: single pass over indexer, store per-output relative errors.
|
||||
//! Phase 2: sweep tolerance × mask combos across CPU cores.
|
||||
//!
|
||||
//! Run with: cargo run -p brk_oracle --example sweep_tolerance --release
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use brk_indexer::Indexer;
|
||||
use brk_oracle::{Config, Histogram, Oracle, PRICES, cents_to_bin, sats_to_bin};
|
||||
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
|
||||
use vecdb::{AnyVec, ReadableVec, VecIndex};
|
||||
|
||||
const BINS_5PCT: f64 = 4.24;
|
||||
const BINS_10PCT: f64 = 8.28;
|
||||
const BINS_20PCT: f64 = 15.84;
|
||||
|
||||
fn bins_to_pct(bins: f64) -> f64 {
|
||||
(10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
|
||||
}
|
||||
|
||||
fn seed_bin(start_height: usize) -> f64 {
|
||||
let price: f64 = PRICES
|
||||
.lines()
|
||||
.nth(start_height - 1)
|
||||
.expect("prices.txt too short")
|
||||
.parse()
|
||||
.expect("Failed to parse seed price");
|
||||
cents_to_bin(price * 100.0)
|
||||
}
|
||||
|
||||
fn leading_digit(sats: u64) -> u8 {
|
||||
let log = (sats as f64).log10();
|
||||
let magnitude = 10.0_f64.powf(log.floor());
|
||||
let d = (sats as f64 / magnitude).round() as u8;
|
||||
if d >= 10 { 1 } else { d }
|
||||
}
|
||||
|
||||
/// Returns the relative error of `sats` from its nearest round value (d × 10^n).
|
||||
/// e.g. 10_050 → leading=1, round_val=10_000, rel_err = 50/10000 = 0.005
|
||||
fn relative_roundness(sats: u64) -> f64 {
|
||||
let log = (sats as f64).log10();
|
||||
let magnitude = 10.0_f64.powf(log.floor());
|
||||
let leading = (sats as f64 / magnitude).round();
|
||||
let round_val = leading * magnitude;
|
||||
(sats as f64 - round_val).abs() / round_val
|
||||
}
|
||||
|
||||
struct Stats {
|
||||
total_sq_err: f64,
|
||||
total_bias: f64,
|
||||
max_err: f64,
|
||||
total_blocks: u64,
|
||||
gt_5pct: u64,
|
||||
gt_10pct: u64,
|
||||
gt_20pct: u64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
total_sq_err: 0.0,
|
||||
total_bias: 0.0,
|
||||
max_err: 0.0,
|
||||
total_blocks: 0,
|
||||
gt_5pct: 0,
|
||||
gt_10pct: 0,
|
||||
gt_20pct: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, err: f64) {
|
||||
self.total_sq_err += err * err;
|
||||
self.total_bias += err;
|
||||
self.total_blocks += 1;
|
||||
let abs_err = err.abs();
|
||||
if abs_err > self.max_err {
|
||||
self.max_err = abs_err;
|
||||
}
|
||||
if abs_err > BINS_5PCT {
|
||||
self.gt_5pct += 1;
|
||||
}
|
||||
if abs_err > BINS_10PCT {
|
||||
self.gt_10pct += 1;
|
||||
}
|
||||
if abs_err > BINS_20PCT {
|
||||
self.gt_20pct += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn rmse_pct(&self) -> f64 {
|
||||
bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
|
||||
}
|
||||
|
||||
fn max_pct(&self) -> f64 {
|
||||
bins_to_pct(self.max_err)
|
||||
}
|
||||
|
||||
fn bias(&self) -> f64 {
|
||||
self.total_bias / self.total_blocks as f64
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-output data: bin index, leading digit, relative error from round value.
|
||||
struct RoundOutput {
|
||||
bin: u16,
|
||||
digit: u8,
|
||||
rel_err: f32, // f32 is plenty of precision, saves memory
|
||||
}
|
||||
|
||||
struct BlockData {
|
||||
full_hist: Histogram,
|
||||
round_outputs: Vec<RoundOutput>,
|
||||
high_bin: f64,
|
||||
low_bin: f64,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let t0 = Instant::now();
|
||||
|
||||
let data_dir = std::env::var("BRK_DIR")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| {
|
||||
let home = std::env::var("HOME").unwrap();
|
||||
PathBuf::from(home).join(".brk")
|
||||
});
|
||||
|
||||
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
|
||||
let total_heights = indexer.vecs.blocks.timestamp.len();
|
||||
|
||||
let manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||
|
||||
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
|
||||
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
|
||||
.expect("Failed to read height_price_ohlc.json"),
|
||||
)
|
||||
.expect("Failed to parse height OHLC");
|
||||
|
||||
let height_bands: Vec<(f64, f64)> = height_ohlc
|
||||
.iter()
|
||||
.map(|ohlc| {
|
||||
let high = ohlc[1];
|
||||
let low = ohlc[2];
|
||||
if high > 0.0 && low > 0.0 {
|
||||
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
|
||||
} else {
|
||||
(0.0, 0.0)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let sweep_start: usize = 575_000;
|
||||
|
||||
// Phase 1: precompute per-block data.
|
||||
// Store all potentially-round outputs with their relative error so we can
|
||||
// filter at different tolerance thresholds in Phase 2.
|
||||
eprintln!("Phase 1: precomputing block data...");
|
||||
|
||||
let total_txs = indexer.vecs.transactions.txid.len();
|
||||
let total_outputs = indexer.vecs.outputs.value.len();
|
||||
|
||||
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
|
||||
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
|
||||
|
||||
let ref_config = Config::default();
|
||||
let total_blocks = total_heights - sweep_start;
|
||||
let mut blocks: Vec<BlockData> = Vec::with_capacity(total_blocks);
|
||||
|
||||
// Use the widest tolerance we'll test (5%) to decide what to store.
|
||||
// Outputs beyond 5% relative error will never be filtered at any tolerance.
|
||||
let max_tolerance: f64 = 0.05;
|
||||
|
||||
for h in sweep_start..total_heights {
|
||||
let ft = first_tx_index[h];
|
||||
let next_ft = first_tx_index
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxIndex::from(total_txs));
|
||||
|
||||
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
|
||||
indexer
|
||||
.vecs
|
||||
.transactions
|
||||
.first_txout_index
|
||||
.collect_one(ft + 1)
|
||||
.unwrap()
|
||||
.to_usize()
|
||||
} else {
|
||||
out_first
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize()
|
||||
};
|
||||
let out_end = out_first
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize();
|
||||
|
||||
let values: Vec<Sats> = indexer
|
||||
.vecs
|
||||
.outputs
|
||||
.value
|
||||
.collect_range_at(out_start, out_end);
|
||||
let output_types: Vec<OutputType> = indexer
|
||||
.vecs
|
||||
.outputs
|
||||
.output_type
|
||||
.collect_range_at(out_start, out_end);
|
||||
|
||||
let mut full_hist = Histogram::zeros();
|
||||
let mut round_outputs = Vec::new();
|
||||
|
||||
for (sats, output_type) in values.into_iter().zip(output_types) {
|
||||
if ref_config.excluded_output_types.contains(&output_type) {
|
||||
continue;
|
||||
}
|
||||
if *sats < ref_config.min_sats {
|
||||
continue;
|
||||
}
|
||||
if let Some(bin) = sats_to_bin(sats) {
|
||||
full_hist.increment(bin);
|
||||
let d = leading_digit(*sats);
|
||||
if (1..=9).contains(&d) {
|
||||
let rel_err = relative_roundness(*sats);
|
||||
if rel_err <= max_tolerance {
|
||||
round_outputs.push(RoundOutput {
|
||||
bin: bin as u16,
|
||||
digit: d,
|
||||
rel_err: rel_err as f32,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (high_bin, low_bin) = if h < height_bands.len() {
|
||||
height_bands[h]
|
||||
} else {
|
||||
(0.0, 0.0)
|
||||
};
|
||||
|
||||
blocks.push(BlockData {
|
||||
full_hist,
|
||||
round_outputs,
|
||||
high_bin,
|
||||
low_bin,
|
||||
});
|
||||
|
||||
if (h - sweep_start).is_multiple_of(50_000) {
|
||||
eprint!(
|
||||
"\r {}/{} ({:.0}%)",
|
||||
h - sweep_start,
|
||||
total_blocks,
|
||||
(h - sweep_start) as f64 / total_blocks as f64 * 100.0
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let mem_hists = blocks.len() * std::mem::size_of::<Histogram>();
|
||||
let mem_rounds: usize = blocks
|
||||
.iter()
|
||||
.map(|b| b.round_outputs.len() * std::mem::size_of::<RoundOutput>())
|
||||
.sum();
|
||||
eprintln!(
|
||||
"\r {} blocks precomputed ({:.1} GB hists + {:.0} MB rounds) in {:.1}s",
|
||||
blocks.len(),
|
||||
mem_hists as f64 / 1e9,
|
||||
mem_rounds as f64 / 1e6,
|
||||
t0.elapsed().as_secs_f64()
|
||||
);
|
||||
|
||||
// Phase 2: sweep tolerance × mask combos.
|
||||
// Tolerances as fractions (not percentages).
|
||||
let tolerances: &[(f64, &str)] = &[
|
||||
(0.0, "0%"),
|
||||
(0.0001, "0.01%"),
|
||||
(0.0005, "0.05%"),
|
||||
(0.001, "0.1%"),
|
||||
(0.002, "0.2%"),
|
||||
(0.005, "0.5%"),
|
||||
(0.01, "1%"),
|
||||
(0.02, "2%"),
|
||||
(0.05, "5%"),
|
||||
];
|
||||
|
||||
// 987654321
|
||||
let masks: &[(u16, &str)] = &[
|
||||
(0b0_0000_0000, "none"),
|
||||
(0b0_0001_0111, "{1,2,3,5}"),
|
||||
(0b0_0001_1111, "{1,2,3,4,5}"),
|
||||
(0b0_0011_0111, "{1,2,3,5,6}"),
|
||||
(0b0_0111_0111, "{1,2,3,5,6,7}"),
|
||||
(0b1_1111_1111, "{1-9}"),
|
||||
];
|
||||
|
||||
let num_configs = tolerances.len() * masks.len();
|
||||
let num_threads = std::thread::available_parallelism()
|
||||
.map(|n| n.get())
|
||||
.unwrap_or(8);
|
||||
eprintln!(
|
||||
"Phase 2: sweeping {} configs ({} tolerances × {} masks) across {} threads...",
|
||||
num_configs,
|
||||
tolerances.len(),
|
||||
masks.len(),
|
||||
num_threads
|
||||
);
|
||||
|
||||
let t1 = Instant::now();
|
||||
let blocks = &blocks;
|
||||
let tolerances_ref = tolerances;
|
||||
let masks_ref = masks;
|
||||
|
||||
let all_results: Vec<(usize, usize, Stats)> = std::thread::scope(|s| {
|
||||
let configs_per_thread = num_configs.div_ceil(num_threads);
|
||||
|
||||
let handles: Vec<_> = (0..num_threads)
|
||||
.map(|t| {
|
||||
s.spawn(move || {
|
||||
let cfg_start = t * configs_per_thread;
|
||||
let cfg_end = ((t + 1) * configs_per_thread).min(num_configs);
|
||||
if cfg_start >= cfg_end {
|
||||
return vec![];
|
||||
}
|
||||
let mut results = Vec::with_capacity(cfg_end - cfg_start);
|
||||
|
||||
for cfg_idx in cfg_start..cfg_end {
|
||||
let ti = cfg_idx / masks_ref.len();
|
||||
let mi = cfg_idx % masks_ref.len();
|
||||
let (tolerance, _) = tolerances_ref[ti];
|
||||
let (mask, _) = masks_ref[mi];
|
||||
|
||||
let mut oracle = Oracle::new(
|
||||
seed_bin(sweep_start),
|
||||
Config {
|
||||
exclude_common_round_values: false,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
let mut stats = Stats::new();
|
||||
|
||||
for bd in blocks.iter() {
|
||||
let mut hist = bd.full_hist.clone();
|
||||
|
||||
// Remove outputs matching this tolerance + mask.
|
||||
let tol_f32 = tolerance as f32;
|
||||
for ro in &bd.round_outputs {
|
||||
if mask & (1 << (ro.digit - 1)) != 0 && ro.rel_err <= tol_f32 {
|
||||
hist[ro.bin as usize] -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
let ref_bin = oracle.process_histogram(&hist);
|
||||
|
||||
if bd.high_bin > 0.0 && bd.low_bin > 0.0 {
|
||||
let err = if ref_bin < bd.high_bin {
|
||||
ref_bin - bd.high_bin
|
||||
} else if ref_bin > bd.low_bin {
|
||||
ref_bin - bd.low_bin
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
stats.update(err);
|
||||
}
|
||||
}
|
||||
|
||||
results.push((ti, mi, stats));
|
||||
}
|
||||
|
||||
results
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
handles
|
||||
.into_iter()
|
||||
.flat_map(|h| h.join().unwrap())
|
||||
.collect()
|
||||
});
|
||||
|
||||
eprintln!(" Done in {:.1}s.", t1.elapsed().as_secs_f64());
|
||||
|
||||
// Print results grouped by tolerance.
|
||||
println!();
|
||||
println!(
|
||||
"{:>8} {:>16} {:>8} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
|
||||
"Tol", "Digits", "Blocks", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
|
||||
);
|
||||
println!("{}", "-".repeat(88));
|
||||
|
||||
for (ti, &(_, tol_label)) in tolerances.iter().enumerate() {
|
||||
for (mi, &(_, mask_label)) in masks.iter().enumerate() {
|
||||
let (_, _, stats) = all_results
|
||||
.iter()
|
||||
.find(|(t, m, _)| *t == ti && *m == mi)
|
||||
.unwrap();
|
||||
println!(
|
||||
"{:>8} {:>16} {:>8} {:>8.3}% {:>8.1}% {:>6} {:>6} {:>6} {:>+8.2}",
|
||||
tol_label,
|
||||
mask_label,
|
||||
stats.total_blocks,
|
||||
stats.rmse_pct(),
|
||||
stats.max_pct(),
|
||||
stats.gt_5pct,
|
||||
stats.gt_10pct,
|
||||
stats.gt_20pct,
|
||||
stats.bias()
|
||||
);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
// Find overall best config by RMSE.
|
||||
let best = all_results
|
||||
.iter()
|
||||
.min_by(|a, b| a.2.rmse_pct().partial_cmp(&b.2.rmse_pct()).unwrap())
|
||||
.unwrap();
|
||||
let (bti, bmi, bs) = best;
|
||||
println!(
|
||||
"Best: tolerance={}, digits={} → RMSE {:.3}%, Max {:.1}%, >5%: {}, >10%: {}, >20%: {}",
|
||||
tolerances[*bti].1,
|
||||
masks[*bmi].1,
|
||||
bs.rmse_pct(),
|
||||
bs.max_pct(),
|
||||
bs.gt_5pct,
|
||||
bs.gt_10pct,
|
||||
bs.gt_20pct,
|
||||
);
|
||||
|
||||
// Show current config for reference.
|
||||
let current = all_results
|
||||
.iter()
|
||||
.find(|(t, m, _)| tolerances[*t].0 == 0.001 && masks[*m].0 == 0b0_0011_0111)
|
||||
.unwrap();
|
||||
let (_, _, cs) = current;
|
||||
println!(
|
||||
"Current: tolerance=0.1%, digits={{1,2,3,5,6}} → RMSE {:.3}%, Max {:.1}%, >5%: {}, >10%: {}, >20%: {}",
|
||||
cs.rmse_pct(),
|
||||
cs.max_pct(),
|
||||
cs.gt_5pct,
|
||||
cs.gt_10pct,
|
||||
cs.gt_20pct,
|
||||
);
|
||||
|
||||
println!("\nTotal time: {:.1}s", t0.elapsed().as_secs_f64());
|
||||
}
|
||||
@@ -1,286 +0,0 @@
|
||||
//! Validate oracle accuracy against exchange reference prices.
|
||||
//!
|
||||
//! Run with: cargo run -p brk_oracle --example validate --release
|
||||
//!
|
||||
//! Requires:
|
||||
//! - ~/.brk indexed blockchain data (brk_indexer)
|
||||
//! - examples/height_price_ohlc.json (per-height [open, high, low, close] in dollars)
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use brk_indexer::Indexer;
|
||||
use brk_oracle::{
|
||||
Config, Histogram, Oracle, PRICES, START_HEIGHT, cents_to_bin, default_eligible_bin,
|
||||
};
|
||||
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
|
||||
use vecdb::{AnyVec, ReadableVec, VecIndex};
|
||||
|
||||
const BINS_5PCT: f64 = 4.24;
|
||||
const BINS_10PCT: f64 = 8.28;
|
||||
const BINS_20PCT: f64 = 15.84;
|
||||
|
||||
fn bins_to_pct(bins: f64) -> f64 {
|
||||
(10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
|
||||
}
|
||||
|
||||
fn seed_bin(start_height: usize) -> f64 {
|
||||
let price: f64 = PRICES
|
||||
.lines()
|
||||
.nth(start_height - 1)
|
||||
.expect("prices.txt too short")
|
||||
.parse()
|
||||
.expect("Failed to parse seed price");
|
||||
cents_to_bin(price * 100.0)
|
||||
}
|
||||
|
||||
struct Stats {
|
||||
total_sq_err: f64,
|
||||
total_bias: f64,
|
||||
max_err: f64,
|
||||
total_blocks: u64,
|
||||
gt_5pct: u64,
|
||||
gt_10pct: u64,
|
||||
gt_20pct: u64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
total_sq_err: 0.0,
|
||||
total_bias: 0.0,
|
||||
max_err: 0.0,
|
||||
total_blocks: 0,
|
||||
gt_5pct: 0,
|
||||
gt_10pct: 0,
|
||||
gt_20pct: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, err: f64) {
|
||||
self.total_sq_err += err * err;
|
||||
self.total_bias += err;
|
||||
self.total_blocks += 1;
|
||||
let abs_err = err.abs();
|
||||
if abs_err > self.max_err {
|
||||
self.max_err = abs_err;
|
||||
}
|
||||
if abs_err > BINS_5PCT {
|
||||
self.gt_5pct += 1;
|
||||
}
|
||||
if abs_err > BINS_10PCT {
|
||||
self.gt_10pct += 1;
|
||||
}
|
||||
if abs_err > BINS_20PCT {
|
||||
self.gt_20pct += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn rmse_pct(&self) -> f64 {
|
||||
bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
|
||||
}
|
||||
|
||||
fn max_pct(&self) -> f64 {
|
||||
bins_to_pct(self.max_err)
|
||||
}
|
||||
|
||||
fn bias(&self) -> f64 {
|
||||
self.total_bias / self.total_blocks as f64
|
||||
}
|
||||
}
|
||||
|
||||
struct Run {
|
||||
label: &'static str,
|
||||
start_height: usize,
|
||||
oracle: Option<Oracle>,
|
||||
stats: Stats,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let data_dir = std::env::var("BRK_DIR")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| {
|
||||
let home = std::env::var("HOME").unwrap();
|
||||
PathBuf::from(home).join(".brk")
|
||||
});
|
||||
|
||||
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
|
||||
let total_heights = indexer.vecs.blocks.timestamp.len();
|
||||
|
||||
let manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||
|
||||
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
|
||||
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
|
||||
.expect("Failed to read height_price_ohlc.json"),
|
||||
)
|
||||
.expect("Failed to parse height OHLC");
|
||||
|
||||
// Pre-compute per-height (high_bin, low_bin) tolerance band.
|
||||
let height_bands: Vec<(f64, f64)> = height_ohlc
|
||||
.iter()
|
||||
.map(|ohlc| {
|
||||
let high = ohlc[1];
|
||||
let low = ohlc[2];
|
||||
if high > 0.0 && low > 0.0 {
|
||||
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
|
||||
} else {
|
||||
(0.0, 0.0)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut runs = vec![
|
||||
Run {
|
||||
label: "w12 @ 575k",
|
||||
start_height: 575_000,
|
||||
oracle: None,
|
||||
stats: Stats::new(),
|
||||
},
|
||||
Run {
|
||||
label: "w12 @ 600k",
|
||||
start_height: 600_000,
|
||||
oracle: None,
|
||||
stats: Stats::new(),
|
||||
},
|
||||
Run {
|
||||
label: "w12 @ 630k",
|
||||
start_height: 630_000,
|
||||
oracle: None,
|
||||
stats: Stats::new(),
|
||||
},
|
||||
];
|
||||
|
||||
// Build per-block filtered histograms from the indexer, feeding all oracles in one pass.
|
||||
let total_txs = indexer.vecs.transactions.txid.len();
|
||||
let total_outputs = indexer.vecs.outputs.value.len();
|
||||
|
||||
// Pre-collect height-indexed vecs (small). Transaction-indexed vecs are too large.
|
||||
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
|
||||
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
|
||||
|
||||
for h in START_HEIGHT..total_heights {
|
||||
let ft = first_tx_index[h];
|
||||
let next_ft = first_tx_index
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxIndex::from(total_txs));
|
||||
|
||||
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
|
||||
indexer
|
||||
.vecs
|
||||
.transactions
|
||||
.first_txout_index
|
||||
.collect_one(ft + 1)
|
||||
.unwrap()
|
||||
.to_usize()
|
||||
} else {
|
||||
out_first
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize()
|
||||
};
|
||||
let out_end = out_first
|
||||
.get(h + 1)
|
||||
.copied()
|
||||
.unwrap_or(TxOutIndex::from(total_outputs))
|
||||
.to_usize();
|
||||
|
||||
// Build filtered histogram once for all oracles.
|
||||
let values: Vec<Sats> = indexer
|
||||
.vecs
|
||||
.outputs
|
||||
.value
|
||||
.collect_range_at(out_start, out_end);
|
||||
let output_types: Vec<OutputType> = indexer
|
||||
.vecs
|
||||
.outputs
|
||||
.output_type
|
||||
.collect_range_at(out_start, out_end);
|
||||
|
||||
let mut hist = Histogram::zeros();
|
||||
for (sats, output_type) in values.into_iter().zip(output_types) {
|
||||
if let Some(bin) = default_eligible_bin(sats, output_type) {
|
||||
hist.increment(bin as usize);
|
||||
}
|
||||
}
|
||||
|
||||
for run in &mut runs {
|
||||
if h < run.start_height {
|
||||
continue;
|
||||
}
|
||||
if run.oracle.is_none() {
|
||||
let config = Config::default();
|
||||
run.oracle = Some(Oracle::new(seed_bin(run.start_height), config));
|
||||
}
|
||||
let ref_bin = run.oracle.as_mut().unwrap().process_histogram(&hist);
|
||||
|
||||
if h < height_bands.len() {
|
||||
let (high_bin, low_bin) = height_bands[h];
|
||||
if high_bin > 0.0 && low_bin > 0.0 {
|
||||
let err = if ref_bin < high_bin {
|
||||
ref_bin - high_bin
|
||||
} else if ref_bin > low_bin {
|
||||
ref_bin - low_bin
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
run.stats.update(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print results.
|
||||
println!();
|
||||
println!(
|
||||
"{:<14} {:>8} {:>10} {:>10} {:>6} {:>6} {:>6} {:>8}",
|
||||
"Config", "Blocks", "RMSE%", "Max%", ">5%", ">10%", ">20%", "Bias"
|
||||
);
|
||||
println!("{}", "-".repeat(72));
|
||||
for run in &runs {
|
||||
let s = &run.stats;
|
||||
println!(
|
||||
"{:<14} {:>8} {:>7.2}% {:>7.1}% {:>6} {:>6} {:>6} {:>+8.2}",
|
||||
run.label,
|
||||
s.total_blocks,
|
||||
s.rmse_pct(),
|
||||
s.max_pct(),
|
||||
s.gt_5pct,
|
||||
s.gt_10pct,
|
||||
s.gt_20pct,
|
||||
s.bias()
|
||||
);
|
||||
}
|
||||
println!();
|
||||
|
||||
// Verify exact counts against reference.
|
||||
// Reference: trunc w12 @ 575k: 261 >5%, 40 >10%, 0 >20%
|
||||
// trunc w12 @ 600k: 174 >5%, 31 >10%, 0 >20%
|
||||
// trunc w12 @ 630k: 84 >5%, 9 >10%, 0 >20%
|
||||
let expected: &[(&str, u64, u64, u64)] = &[
|
||||
("w12 @ 575k", 237, 22, 0),
|
||||
("w12 @ 600k", 152, 15, 0),
|
||||
("w12 @ 630k", 84, 9, 0),
|
||||
];
|
||||
|
||||
for (run, &(label, exp_5, exp_10, exp_20)) in runs.iter().zip(expected) {
|
||||
let s = &run.stats;
|
||||
assert_eq!(
|
||||
s.gt_20pct, exp_20,
|
||||
"{label}: expected {exp_20} blocks >20%, got {}",
|
||||
s.gt_20pct
|
||||
);
|
||||
assert_eq!(
|
||||
s.gt_10pct, exp_10,
|
||||
"{label}: expected {exp_10} blocks >10%, got {}",
|
||||
s.gt_10pct
|
||||
);
|
||||
assert_eq!(
|
||||
s.gt_5pct, exp_5,
|
||||
"{label}: expected {exp_5} blocks >5%, got {}",
|
||||
s.gt_5pct
|
||||
);
|
||||
}
|
||||
|
||||
println!("All assertions passed!");
|
||||
}
|
||||
@@ -16,7 +16,9 @@ pub use histogram::Histogram;
|
||||
/// so downstream consumers can invalidate cached results.
|
||||
pub const VERSION: u32 = 2;
|
||||
|
||||
/// Pre-oracle dollar prices, one per line, heights 0..630_000.
|
||||
/// Pre-oracle dollar prices, one per line, heights 0..525_000. The last
|
||||
/// entry (height 524_999) seeds the oracle's first on-chain computation
|
||||
/// at `START_HEIGHT`.
|
||||
pub const PRICES: &str = include_str!("prices.txt");
|
||||
|
||||
/// First height where the oracle computes from on-chain data.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user