Files
brk/crates/brk_oracle/examples/noise.rs
2026-03-13 22:42:43 +01:00

284 lines
8.8 KiB
Rust

//! Diagnostic: sweep oracle start heights and clamp-top-N strategies.
//!
//! Run with: cargo run -p brk_oracle --example noise --release
use std::path::PathBuf;
use std::time::Instant;
use brk_indexer::Indexer;
use brk_oracle::{Config, NUM_BINS, Oracle, PRICES, cents_to_bin, sats_to_bin};
use brk_types::{Sats, TxIndex, TxOutIndex};
use vecdb::{AnyVec, ReadableVec, VecIndex};
const BINS_5PCT: f64 = 4.24;
const BINS_10PCT: f64 = 8.28;
const BINS_20PCT: f64 = 15.84;
const BPD: f64 = 200.0;
fn bins_to_pct(bins: f64) -> f64 {
(10.0_f64.powf(bins / BPD) - 1.0) * 100.0
}
fn price_seed_bin(start_height: usize) -> f64 {
let price: f64 = PRICES
.lines()
.nth(start_height - 1)
.expect("prices.txt too short")
.parse()
.expect("Failed to parse seed price");
cents_to_bin(price * 100.0)
}
/// Clamp the top N bins in `src` down to the (N+1)th highest value, writing into `dst`.
fn clamp_top_n(src: &[u32; NUM_BINS], dst: &mut [u32; NUM_BINS], n: usize) {
// Find the (n+1)th largest value.
// Collect non-zero counts, sort descending, take the (n+1)th.
let mut top: Vec<u32> = src.iter().copied().filter(|&v| v > 0).collect();
top.sort_unstable_by(|a, b| b.cmp(a));
let clamp_to = if top.len() > n { top[n] } else { 0 };
for (i, &v) in src.iter().enumerate() {
dst[i] = v.min(clamp_to.max(v.min(clamp_to)));
}
}
fn main() {
let t0 = Instant::now();
let data_dir = std::env::var("BRK_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| {
let home = std::env::var("HOME").unwrap();
PathBuf::from(home).join(".brk")
});
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
let total_heights = indexer.vecs.blocks.timestamp.len();
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
.expect("Failed to read height_price_ohlc.json"),
)
.expect("Failed to parse height OHLC");
let height_bands: Vec<(f64, f64)> = height_ohlc
.iter()
.map(|ohlc| {
let high = ohlc[1];
let low = ohlc[2];
if high > 0.0 && low > 0.0 {
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
} else {
(0.0, 0.0)
}
})
.collect();
// Start heights: 630k, 600k, 575k, then 570k down to 500k by 5k.
let mut start_heights: Vec<usize> = vec![630_000, 600_000, 575_000];
let mut h = 570_000;
while h >= 500_000 {
start_heights.push(h);
h -= 5_000;
}
let lowest = *start_heights.iter().min().unwrap();
// Clamp-top-N values to test: 0 (no clamp), 2, 3, 5, 10.
let clamp_values: Vec<usize> = vec![0, 2, 3, 5, 10];
// Build per-block RAW histograms from the lowest start height.
eprintln!("Building histograms from height {}...", lowest);
let total_txs = indexer.vecs.transactions.height.len();
let total_outputs = indexer.vecs.outputs.value.len();
let first_txout_index_reader = indexer.vecs.transactions.first_txout_index.reader();
let value_reader = indexer.vecs.outputs.value.reader();
let output_type_reader = indexer.vecs.outputs.output_type.reader();
let config = Config::default();
let total_blocks = total_heights - lowest;
struct BlockData {
hist: Box<[u32; NUM_BINS]>,
high_bin: f64,
low_bin: f64,
}
let mut blocks: Vec<BlockData> = Vec::with_capacity(total_blocks);
for h in lowest..total_heights {
let first_tx_index: TxIndex = indexer
.vecs
.transactions
.first_tx_index
.collect_one_at(h)
.unwrap();
let next_first_tx_index: TxIndex = indexer
.vecs
.transactions
.first_tx_index
.collect_one_at(h + 1)
.unwrap_or(TxIndex::from(total_txs));
let out_start = if first_tx_index.to_usize() + 1 < next_first_tx_index.to_usize() {
first_txout_index_reader
.get(first_tx_index.to_usize() + 1)
.to_usize()
} else {
indexer
.vecs
.outputs
.first_txout_index
.collect_one_at(h + 1)
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize()
};
let out_end: usize = indexer
.vecs
.outputs
.first_txout_index
.collect_one_at(h + 1)
.unwrap_or(TxOutIndex::from(total_outputs))
.to_usize();
let mut hist = Box::new([0u32; NUM_BINS]);
for i in out_start..out_end {
let sats: Sats = value_reader.get(i);
let output_type = output_type_reader.get(i);
if config.excluded_output_types.contains(&output_type) {
continue;
}
if *sats < config.min_sats {
continue;
}
if config.exclude_common_round_values && sats.is_common_round_value() {
continue;
}
if let Some(bin) = sats_to_bin(sats) {
hist[bin] += 1;
}
}
let (high_bin, low_bin) = if h < height_bands.len() {
height_bands[h]
} else {
(0.0, 0.0)
};
blocks.push(BlockData {
hist,
high_bin,
low_bin,
});
if (h - lowest).is_multiple_of(50_000) {
eprint!(
"\r {}/{} ({:.0}%)",
h - lowest,
total_blocks,
(h - lowest) as f64 / total_blocks as f64 * 100.0
);
}
}
eprintln!(
"\r {} blocks built in {:.1}s",
blocks.len(),
t0.elapsed().as_secs_f64()
);
// For each clamp value, run all start heights.
for &clamp_n in &clamp_values {
println!();
let label = if clamp_n == 0 {
"no clamp".to_string()
} else {
format!("clamp top {}", clamp_n)
};
println!("=== {} ===", label);
println!(
"{:>8} {:>8} {:>8} {:>8} {:>6} {:>6} {:>6} {:>8}",
"Start", "Blocks", "RMSE%", "Worst%", ">5%", ">10%", ">20%", "Worst@"
);
println!("{}", "-".repeat(72));
for &start_height in &start_heights {
let mut oracle = Oracle::new(price_seed_bin(start_height), config.clone());
let block_offset = start_height - lowest;
let mut worst_err: f64 = 0.0;
let mut worst_height: usize = 0;
let mut gt_5: u64 = 0;
let mut gt_10: u64 = 0;
let mut gt_20: u64 = 0;
let mut total_sq_err: f64 = 0.0;
let mut total_measured: u64 = 0;
let mut clamped_hist = [0u32; NUM_BINS];
for (i, bd) in blocks[block_offset..].iter().enumerate() {
if clamp_n > 0 {
clamp_top_n(&bd.hist, &mut clamped_hist, clamp_n);
oracle.process_histogram(&clamped_hist);
} else {
oracle.process_histogram(&bd.hist);
}
let height = start_height + i;
let ref_bin = oracle.ref_bin();
if bd.high_bin <= 0.0 || bd.low_bin <= 0.0 {
continue;
}
let err = if ref_bin < bd.high_bin {
ref_bin - bd.high_bin
} else if ref_bin > bd.low_bin {
ref_bin - bd.low_bin
} else {
0.0
};
total_measured += 1;
total_sq_err += err * err;
let abs_err = err.abs();
if abs_err > BINS_5PCT {
gt_5 += 1;
}
if abs_err > BINS_10PCT {
gt_10 += 1;
}
if abs_err > BINS_20PCT {
gt_20 += 1;
}
if abs_err > worst_err {
worst_err = abs_err;
worst_height = height;
}
}
let rmse = if total_measured > 0 {
bins_to_pct((total_sq_err / total_measured as f64).sqrt())
} else {
0.0
};
println!(
"{:>8} {:>8} {:>7.3}% {:>7.1}% {:>6} {:>6} {:>6} {}",
format!("{}k", start_height / 1000),
total_measured,
rmse,
bins_to_pct(worst_err),
gt_5,
gt_10,
gt_20,
worst_height,
);
}
}
println!("\nTotal time: {:.1}s", t0.elapsed().as_secs_f64());
}