mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-04-25 15:19:58 -07:00
476 lines
15 KiB
Rust
476 lines
15 KiB
Rust
//! Generate detailed oracle accuracy report for README / documentation.
|
|
//!
|
|
//! Run with: cargo run -p brk_oracle --example report --release
|
|
|
|
use std::path::PathBuf;
|
|
|
|
use brk_indexer::Indexer;
|
|
use brk_oracle::{
|
|
Config, NUM_BINS, Oracle, PRICES, START_HEIGHT, bin_to_cents, cents_to_bin, sats_to_bin,
|
|
};
|
|
use brk_types::{OutputType, Sats, TxIndex, TxOutIndex};
|
|
use vecdb::{AnyVec, ReadableVec, VecIndex};
|
|
|
|
/// Day1 1 = Jan 9, 2009 (block 1). For dates after genesis week:
|
|
/// day1 = floor(timestamp / 86400) - 14252.
|
|
const GENESIS_DAY: u32 = 14252;
|
|
|
|
const BINS_5PCT: f64 = 4.24;
|
|
const BINS_10PCT: f64 = 8.28;
|
|
const BINS_20PCT: f64 = 15.84;
|
|
|
|
fn bins_to_pct(bins: f64) -> f64 {
|
|
(10.0_f64.powf(bins / 200.0) - 1.0) * 100.0
|
|
}
|
|
|
|
fn timestamp_to_year(ts: u32) -> u16 {
|
|
let years_since_1970 = ts as f64 / 31557600.0;
|
|
(1970.0 + years_since_1970) as u16
|
|
}
|
|
|
|
struct YearStats {
|
|
year: u16,
|
|
total_sq_err: f64,
|
|
max_err: f64,
|
|
total_blocks: u64,
|
|
gt_5pct: u64,
|
|
gt_10pct: u64,
|
|
gt_20pct: u64,
|
|
min_price: f64,
|
|
max_price: f64,
|
|
errors: Vec<f64>,
|
|
}
|
|
|
|
impl YearStats {
|
|
fn new(year: u16) -> Self {
|
|
Self {
|
|
year,
|
|
total_sq_err: 0.0,
|
|
max_err: 0.0,
|
|
total_blocks: 0,
|
|
gt_5pct: 0,
|
|
gt_10pct: 0,
|
|
gt_20pct: 0,
|
|
min_price: f64::MAX,
|
|
max_price: 0.0,
|
|
errors: Vec::new(),
|
|
}
|
|
}
|
|
|
|
fn update(&mut self, err: f64, exchange_high: f64, exchange_low: f64) {
|
|
let abs_err = err.abs();
|
|
self.total_sq_err += err * err;
|
|
self.total_blocks += 1;
|
|
self.errors.push(bins_to_pct(abs_err));
|
|
if abs_err > self.max_err {
|
|
self.max_err = abs_err;
|
|
}
|
|
if abs_err > BINS_5PCT {
|
|
self.gt_5pct += 1;
|
|
}
|
|
if abs_err > BINS_10PCT {
|
|
self.gt_10pct += 1;
|
|
}
|
|
if abs_err > BINS_20PCT {
|
|
self.gt_20pct += 1;
|
|
}
|
|
if exchange_high > self.max_price {
|
|
self.max_price = exchange_high;
|
|
}
|
|
if exchange_low > 0.0 && exchange_low < self.min_price {
|
|
self.min_price = exchange_low;
|
|
}
|
|
}
|
|
|
|
fn rmse_pct(&self) -> f64 {
|
|
bins_to_pct((self.total_sq_err / self.total_blocks as f64).sqrt())
|
|
}
|
|
|
|
fn max_pct(&self) -> f64 {
|
|
bins_to_pct(self.max_err)
|
|
}
|
|
|
|
fn median_pct(&mut self) -> f64 {
|
|
self.errors.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
|
let n = self.errors.len();
|
|
if n == 0 { 0.0 } else { self.errors[n / 2] }
|
|
}
|
|
|
|
fn percentile(&self, p: f64) -> f64 {
|
|
let n = self.errors.len();
|
|
if n == 0 {
|
|
return 0.0;
|
|
}
|
|
let idx = ((p / 100.0) * (n - 1) as f64).round() as usize;
|
|
self.errors[idx.min(n - 1)]
|
|
}
|
|
}
|
|
|
|
/// Oracle OHLC for a single day, built from per-block prices.
|
|
struct DayCandle {
|
|
day1: usize,
|
|
open: f64,
|
|
high: f64,
|
|
low: f64,
|
|
close: f64,
|
|
}
|
|
|
|
struct BlockError {
|
|
height: usize,
|
|
oracle_price: f64,
|
|
exchange_low: f64,
|
|
exchange_high: f64,
|
|
error_pct: f64,
|
|
}
|
|
|
|
fn main() {
|
|
let data_dir = std::env::var("BRK_DIR")
|
|
.map(PathBuf::from)
|
|
.unwrap_or_else(|_| {
|
|
let home = std::env::var("HOME").unwrap();
|
|
PathBuf::from(home).join(".brk")
|
|
});
|
|
|
|
let indexer = Indexer::forced_import(&data_dir).expect("Failed to load indexer");
|
|
let total_heights = indexer.vecs.blocks.timestamp.len();
|
|
let manifest_dir = env!("CARGO_MANIFEST_DIR");
|
|
|
|
let height_ohlc: Vec<[f64; 4]> = serde_json::from_str(
|
|
&std::fs::read_to_string(format!("{manifest_dir}/examples/height_price_ohlc.json"))
|
|
.expect("Failed to read height_price_ohlc.json"),
|
|
)
|
|
.expect("Failed to parse height OHLC");
|
|
|
|
let daily_ohlc: Vec<[f64; 4]> = serde_json::from_str(
|
|
&std::fs::read_to_string(format!("{manifest_dir}/examples/date_price_ohlc.json"))
|
|
.expect("Failed to read date_price_ohlc.json"),
|
|
)
|
|
.expect("Failed to parse daily OHLC");
|
|
|
|
let height_bands: Vec<(f64, f64)> = height_ohlc
|
|
.iter()
|
|
.map(|ohlc| {
|
|
let high = ohlc[1];
|
|
let low = ohlc[2];
|
|
if high > 0.0 && low > 0.0 {
|
|
(cents_to_bin(high * 100.0), cents_to_bin(low * 100.0))
|
|
} else {
|
|
(0.0, 0.0)
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
// Read block timestamps for year + day1 mapping.
|
|
let timestamps: Vec<brk_types::Timestamp> = indexer.vecs.blocks.timestamp.collect();
|
|
let height_years: Vec<u16> = timestamps
|
|
.iter()
|
|
.map(|ts| timestamp_to_year(**ts))
|
|
.collect();
|
|
let height_day1s: Vec<usize> = timestamps
|
|
.iter()
|
|
.map(|ts| (**ts / 86400).saturating_sub(GENESIS_DAY) as usize)
|
|
.collect();
|
|
|
|
let start_price: f64 = PRICES
|
|
.lines()
|
|
.nth(START_HEIGHT - 1)
|
|
.expect("prices.txt too short")
|
|
.parse()
|
|
.expect("Failed to parse seed price");
|
|
|
|
let config = Config::default();
|
|
let mut oracle = Oracle::new(cents_to_bin(start_price * 100.0), config);
|
|
|
|
let total_txs = indexer.vecs.transactions.height.len();
|
|
let total_outputs = indexer.vecs.outputs.value.len();
|
|
|
|
// Pre-collect height-indexed vecs (small). Transaction-indexed vecs are too large.
|
|
let first_tx_index: Vec<TxIndex> = indexer.vecs.transactions.first_tx_index.collect();
|
|
let out_first: Vec<TxOutIndex> = indexer.vecs.outputs.first_txout_index.collect();
|
|
|
|
let ref_config = Config::default();
|
|
|
|
let mut year_stats: Vec<YearStats> = Vec::new();
|
|
let mut overall = YearStats::new(0);
|
|
let mut worst_blocks: Vec<BlockError> = Vec::new();
|
|
let mut total_bias = 0.0f64;
|
|
|
|
// Track oracle daily candles.
|
|
let mut oracle_candles: Vec<DayCandle> = Vec::new();
|
|
let mut current_di: Option<usize> = None;
|
|
|
|
for h in START_HEIGHT..total_heights {
|
|
let ft = first_tx_index[h];
|
|
let next_ft = first_tx_index
|
|
.get(h + 1)
|
|
.copied()
|
|
.unwrap_or(TxIndex::from(total_txs));
|
|
|
|
let out_start = if ft.to_usize() + 1 < next_ft.to_usize() {
|
|
indexer
|
|
.vecs
|
|
.transactions
|
|
.first_txout_index
|
|
.collect_one(ft + 1)
|
|
.unwrap()
|
|
.to_usize()
|
|
} else {
|
|
out_first
|
|
.get(h + 1)
|
|
.copied()
|
|
.unwrap_or(TxOutIndex::from(total_outputs))
|
|
.to_usize()
|
|
};
|
|
let out_end = out_first
|
|
.get(h + 1)
|
|
.copied()
|
|
.unwrap_or(TxOutIndex::from(total_outputs))
|
|
.to_usize();
|
|
|
|
let values: Vec<Sats> = indexer
|
|
.vecs
|
|
.outputs
|
|
.value
|
|
.collect_range_at(out_start, out_end);
|
|
let output_types: Vec<OutputType> = indexer
|
|
.vecs
|
|
.outputs
|
|
.output_type
|
|
.collect_range_at(out_start, out_end);
|
|
|
|
let mut hist = [0u32; NUM_BINS];
|
|
for (sats, output_type) in values.into_iter().zip(output_types) {
|
|
if ref_config.excluded_output_types.contains(&output_type) {
|
|
continue;
|
|
}
|
|
if *sats < ref_config.min_sats
|
|
|| (ref_config.exclude_common_round_values && sats.is_common_round_value())
|
|
{
|
|
continue;
|
|
}
|
|
if let Some(bin) = sats_to_bin(sats) {
|
|
hist[bin] += 1;
|
|
}
|
|
}
|
|
|
|
let ref_bin = oracle.process_histogram(&hist);
|
|
let oracle_price = bin_to_cents(ref_bin) as f64 / 100.0;
|
|
|
|
// Build oracle daily candle.
|
|
let di = height_day1s[h];
|
|
if current_di != Some(di) {
|
|
current_di = Some(di);
|
|
oracle_candles.push(DayCandle {
|
|
day1: di,
|
|
open: oracle_price,
|
|
high: oracle_price,
|
|
low: oracle_price,
|
|
close: oracle_price,
|
|
});
|
|
} else {
|
|
let candle = oracle_candles.last_mut().unwrap();
|
|
if oracle_price > candle.high {
|
|
candle.high = oracle_price;
|
|
}
|
|
if oracle_price < candle.low {
|
|
candle.low = oracle_price;
|
|
}
|
|
candle.close = oracle_price;
|
|
}
|
|
|
|
// Per-block error stats.
|
|
if h < height_bands.len() {
|
|
let (high_bin, low_bin) = height_bands[h];
|
|
if high_bin > 0.0 && low_bin > 0.0 {
|
|
let err = if ref_bin < high_bin {
|
|
ref_bin - high_bin
|
|
} else if ref_bin > low_bin {
|
|
ref_bin - low_bin
|
|
} else {
|
|
0.0
|
|
};
|
|
|
|
let exchange_high = height_ohlc[h][1];
|
|
let exchange_low = height_ohlc[h][2];
|
|
|
|
overall.update(err, exchange_high, exchange_low);
|
|
total_bias += err;
|
|
|
|
let year = height_years[h];
|
|
if year_stats.is_empty() || year_stats.last().unwrap().year != year {
|
|
year_stats.push(YearStats::new(year));
|
|
}
|
|
year_stats
|
|
.last_mut()
|
|
.unwrap()
|
|
.update(err, exchange_high, exchange_low);
|
|
|
|
if err.abs() > BINS_5PCT {
|
|
worst_blocks.push(BlockError {
|
|
height: h,
|
|
oracle_price,
|
|
exchange_low,
|
|
exchange_high,
|
|
error_pct: if err < 0.0 {
|
|
-bins_to_pct(err.abs())
|
|
} else {
|
|
bins_to_pct(err.abs())
|
|
},
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
worst_blocks.sort_by(|a, b| b.error_pct.abs().partial_cmp(&a.error_pct.abs()).unwrap());
|
|
overall.errors.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
|
|
|
// Daily candle comparison: oracle OHLC vs exchange OHLC.
|
|
let mut daily_open_errors: Vec<f64> = Vec::new();
|
|
let mut daily_high_errors: Vec<f64> = Vec::new();
|
|
let mut daily_low_errors: Vec<f64> = Vec::new();
|
|
let mut daily_close_errors: Vec<f64> = Vec::new();
|
|
let mut daily_days = 0u64;
|
|
|
|
for candle in &oracle_candles {
|
|
let di = candle.day1;
|
|
if di >= daily_ohlc.len() {
|
|
continue;
|
|
}
|
|
let ex = &daily_ohlc[di];
|
|
if ex[0] <= 0.0 || ex[3] <= 0.0 {
|
|
continue;
|
|
}
|
|
let ex_open = ex[0];
|
|
let ex_high = ex[1];
|
|
let ex_low = ex[2];
|
|
let ex_close = ex[3];
|
|
|
|
// Error as percentage: (oracle - exchange) / exchange * 100
|
|
daily_open_errors.push((candle.open - ex_open) / ex_open * 100.0);
|
|
daily_high_errors.push((candle.high - ex_high) / ex_high * 100.0);
|
|
daily_low_errors.push((candle.low - ex_low) / ex_low * 100.0);
|
|
daily_close_errors.push((candle.close - ex_close) / ex_close * 100.0);
|
|
daily_days += 1;
|
|
}
|
|
|
|
fn daily_stats(errors: &mut [f64]) -> (f64, f64, f64) {
|
|
let n = errors.len() as f64;
|
|
let rmse = (errors.iter().map(|e| e * e).sum::<f64>() / n).sqrt();
|
|
errors.sort_by(|a, b| a.abs().partial_cmp(&b.abs()).unwrap());
|
|
let max = errors.last().map(|e| e.abs()).unwrap_or(0.0);
|
|
let median = errors[errors.len() / 2].abs();
|
|
(median, rmse, max)
|
|
}
|
|
|
|
let (open_med, open_rmse, open_max) = daily_stats(&mut daily_open_errors);
|
|
let (high_med, high_rmse, high_max) = daily_stats(&mut daily_high_errors);
|
|
let (low_med, low_rmse, low_max) = daily_stats(&mut daily_low_errors);
|
|
let (close_med, close_rmse, close_max) = daily_stats(&mut daily_close_errors);
|
|
|
|
// Print report.
|
|
println!();
|
|
println!(" brk_oracle accuracy report");
|
|
println!(" ══════════════════════════");
|
|
println!();
|
|
println!(" Config: w12, alpha=2/7, search -9/+11, noisy/dust/round-btc filtered");
|
|
println!(
|
|
" Test range: height {} .. {} ({} blocks)",
|
|
START_HEIGHT,
|
|
total_heights - 1,
|
|
overall.total_blocks
|
|
);
|
|
println!(
|
|
" Price range: ${:.0} .. ${:.0}",
|
|
overall.min_price, overall.max_price
|
|
);
|
|
|
|
println!();
|
|
println!(" Per-block accuracy (vs per-height exchange OHLC):");
|
|
println!(" Median: {:.3}%", overall.percentile(50.0));
|
|
println!(" 95th pct: {:.3}%", overall.percentile(95.0));
|
|
println!(" 99th pct: {:.3}%", overall.percentile(99.0));
|
|
println!(" 99.9th pct: {:.3}%", overall.percentile(99.9));
|
|
println!(" RMSE: {:.3}%", overall.rmse_pct());
|
|
println!(" Max: {:.1}%", overall.max_pct());
|
|
println!(
|
|
" Bias: {:+.2} bins",
|
|
total_bias / overall.total_blocks as f64
|
|
);
|
|
println!(
|
|
" > 5%: {} blocks ({:.3}%)",
|
|
overall.gt_5pct,
|
|
overall.gt_5pct as f64 / overall.total_blocks as f64 * 100.0
|
|
);
|
|
println!(" > 10%: {} blocks", overall.gt_10pct);
|
|
println!(" > 20%: {} blocks", overall.gt_20pct);
|
|
|
|
println!();
|
|
println!(
|
|
" Daily candle accuracy ({} days, vs exchange daily OHLC):",
|
|
daily_days
|
|
);
|
|
println!(
|
|
" {:>8} {:>10} {:>10} {:>10}",
|
|
"", "Median", "RMSE", "Max"
|
|
);
|
|
println!(
|
|
" {:>8} {:>9.2}% {:>9.2}% {:>9.1}%",
|
|
"Open", open_med, open_rmse, open_max
|
|
);
|
|
println!(
|
|
" {:>8} {:>9.2}% {:>9.2}% {:>9.1}%",
|
|
"High", high_med, high_rmse, high_max
|
|
);
|
|
println!(
|
|
" {:>8} {:>9.2}% {:>9.2}% {:>9.1}%",
|
|
"Low", low_med, low_rmse, low_max
|
|
);
|
|
println!(
|
|
" {:>8} {:>9.2}% {:>9.2}% {:>9.1}%",
|
|
"Close", close_med, close_rmse, close_max
|
|
);
|
|
|
|
println!();
|
|
println!(" By year:");
|
|
println!(
|
|
" {:<6} {:>7} {:>9} {:>9} {:>9} {:>6} {:>5} {:>5} {:>14}",
|
|
"Year", "Blocks", "Median", "RMSE", "Max", ">5%", ">10%", ">20%", "Price range"
|
|
);
|
|
println!(" {}", "-".repeat(80));
|
|
for ys in &mut year_stats {
|
|
let median = ys.median_pct();
|
|
println!(
|
|
" {:<6} {:>7} {:>8.3}% {:>8.3}% {:>8.1}% {:>6} {:>5} {:>5} ${:.0}..${:.0}",
|
|
ys.year,
|
|
ys.total_blocks,
|
|
median,
|
|
ys.rmse_pct(),
|
|
ys.max_pct(),
|
|
ys.gt_5pct,
|
|
ys.gt_10pct,
|
|
ys.gt_20pct,
|
|
ys.min_price,
|
|
ys.max_price,
|
|
);
|
|
}
|
|
|
|
if !worst_blocks.is_empty() {
|
|
println!();
|
|
println!(" Worst blocks:");
|
|
let show = worst_blocks.len().min(10);
|
|
for wb in &worst_blocks[..show] {
|
|
let dir = if wb.error_pct < 0.0 { "above" } else { "below" };
|
|
println!(
|
|
" height {:>7}: oracle ${:>9.0}, exchange ${:.0}..${:.0} ({:+.1}%, {})",
|
|
wb.height, wb.oracle_price, wb.exchange_low, wb.exchange_high, wb.error_pct, dir
|
|
);
|
|
}
|
|
if worst_blocks.len() > show {
|
|
println!(" ... and {} more", worst_blocks.len() - show);
|
|
}
|
|
}
|
|
|
|
println!();
|
|
}
|