global: snapshot

This commit is contained in:
nym21
2026-01-13 22:32:29 +01:00
parent 0c442b4a71
commit e77993fb76
61 changed files with 5047 additions and 5404 deletions

View File

@@ -1,7 +1,7 @@
use brk_error::Result;
use brk_indexer::Indexer;
use brk_types::{Date, Height, Version};
use vecdb::{Database, EagerVec, ImportableVec, IterableCloneableVec, LazyVecFrom1, VecIndex};
use vecdb::{Database, EagerVec, ImportableVec, IterableCloneableVec, LazyVecFrom1};
use super::Vecs;
use crate::{indexes, internal::ComputedHeightDerivedFirst};
@@ -13,25 +13,17 @@ impl Vecs {
indexer: &Indexer,
indexes: &indexes::Vecs,
) -> Result<Self> {
let height_to_timestamp_monotonic =
let timestamp_monotonic =
EagerVec::forced_import(db, "timestamp_monotonic", version)?;
Ok(Self {
date: LazyVecFrom1::init(
"date",
version,
indexer.vecs.blocks.timestamp.boxed_clone(),
|height: Height, timestamp_iter| {
timestamp_iter.get_at(height.to_usize()).map(Date::from)
},
),
date_monotonic: LazyVecFrom1::init(
"date_monotonic",
version,
height_to_timestamp_monotonic.boxed_clone(),
timestamp_monotonic.boxed_clone(),
|height: Height, timestamp_iter| timestamp_iter.get(height).map(Date::from),
),
timestamp_monotonic: height_to_timestamp_monotonic,
timestamp_monotonic,
timestamp: ComputedHeightDerivedFirst::forced_import(
db,
"timestamp",

View File

@@ -8,7 +8,6 @@ use crate::internal::ComputedHeightDerivedFirst;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub date: LazyVecFrom1<Height, Date, Height, Timestamp>,
pub date_monotonic: LazyVecFrom1<Height, Date, Height, Timestamp>,
pub timestamp_monotonic: EagerVec<PcoVec<Height, Timestamp>>,
pub timestamp: ComputedHeightDerivedFirst<Timestamp>,
}

View File

@@ -68,7 +68,7 @@ pub fn process_blocks(
let height_to_input_count = &inputs.count.height.sum_cum.sum.0;
// From blocks:
let height_to_timestamp = &blocks.time.timestamp_monotonic;
let height_to_date = &blocks.time.date_monotonic;
let height_to_date = &blocks.time.date;
let dateindex_to_first_height = &indexes.dateindex.first_height;
let dateindex_to_height_count = &indexes.dateindex.height_count;
let txindex_to_output_count = &indexes.txindex.output_count;

View File

@@ -18,11 +18,11 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "dateindex", version)?,
date: EagerVec::forced_import(db, "dateindex_date", version)?,
first_height: EagerVec::forced_import(db, "dateindex_first_height", version)?,
height_count: EagerVec::forced_import(db, "dateindex_height_count", version)?,
weekindex: EagerVec::forced_import(db, "dateindex_weekindex", version)?,
monthindex: EagerVec::forced_import(db, "dateindex_monthindex", version)?,
date: EagerVec::forced_import(db, "date", version + Version::ONE)?,
first_height: EagerVec::forced_import(db, "first_height", version)?,
height_count: EagerVec::forced_import(db, "height_count", version)?,
weekindex: EagerVec::forced_import(db, "weekindex", version)?,
monthindex: EagerVec::forced_import(db, "monthindex", version)?,
})
}
}

View File

@@ -1,5 +1,5 @@
use brk_traversable::Traversable;
use brk_types::{DecadeIndex, StoredU64, Version, YearIndex};
use brk_types::{Date, DecadeIndex, StoredU64, Version, YearIndex};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +7,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<DecadeIndex, DecadeIndex>>,
pub date: EagerVec<PcoVec<DecadeIndex, Date>>,
pub first_yearindex: EagerVec<PcoVec<DecadeIndex, YearIndex>>,
pub yearindex_count: EagerVec<PcoVec<DecadeIndex, StoredU64>>,
}
@@ -15,8 +16,9 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "decadeindex", version)?,
first_yearindex: EagerVec::forced_import(db, "decadeindex_first_yearindex", version)?,
yearindex_count: EagerVec::forced_import(db, "decadeindex_yearindex_count", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_yearindex: EagerVec::forced_import(db, "first_yearindex", version)?,
yearindex_count: EagerVec::forced_import(db, "yearindex_count", version)?,
})
}
}

View File

@@ -15,8 +15,8 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "difficultyepoch", version)?,
first_height: EagerVec::forced_import(db, "difficultyepoch_first_height", version)?,
height_count: EagerVec::forced_import(db, "difficultyepoch_height_count", version)?,
first_height: EagerVec::forced_import(db, "first_height", version)?,
height_count: EagerVec::forced_import(db, "height_count", version)?,
})
}
}

View File

@@ -14,7 +14,7 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "halvingepoch", version)?,
first_height: EagerVec::forced_import(db, "halvingepoch_first_height", version)?,
first_height: EagerVec::forced_import(db, "first_height", version)?,
})
}
}

View File

@@ -18,9 +18,9 @@ impl Vecs {
Ok(Self {
identity: EagerVec::forced_import(db, "height", version)?,
dateindex: EagerVec::forced_import(db, "height_dateindex", version)?,
difficultyepoch: EagerVec::forced_import(db, "height_difficultyepoch", version)?,
halvingepoch: EagerVec::forced_import(db, "height_halvingepoch", version)?,
txindex_count: EagerVec::forced_import(db, "height_txindex_count", version)?,
difficultyepoch: EagerVec::forced_import(db, "difficultyepoch", version)?,
halvingepoch: EagerVec::forced_import(db, "halvingepoch", version)?,
txindex_count: EagerVec::forced_import(db, "txindex_count", version)?,
})
}
}

View File

@@ -18,8 +18,8 @@ use std::path::Path;
use brk_error::Result;
use brk_indexer::Indexer;
use brk_traversable::Traversable;
use brk_types::{DateIndex, Indexes, MonthIndex, Version, WeekIndex};
use vecdb::{Database, Exit, PAGE_SIZE, TypedVecIterator};
use brk_types::{Date, DateIndex, Indexes, MonthIndex, Version, WeekIndex};
use vecdb::{Database, Exit, IterableVec, PAGE_SIZE, TypedVecIterator};
use crate::blocks;
@@ -160,7 +160,7 @@ impl Vecs {
self.height.dateindex.compute_transform(
starting_indexes.height,
&blocks_time.date_monotonic,
&blocks_time.date,
|(h, d, ..)| (h, DateIndex::try_from(d).unwrap()),
exit,
)?;
@@ -250,9 +250,10 @@ impl Vecs {
exit,
)?;
self.dateindex.date.compute_from_index(
self.dateindex.date.compute_transform(
starting_dateindex,
&self.dateindex.first_height,
&self.dateindex.identity,
|(di, ..)| (di, Date::from(di)),
exit,
)?;
@@ -290,6 +291,13 @@ impl Vecs {
exit,
)?;
self.weekindex.date.compute_transform(
starting_weekindex,
&self.weekindex.first_dateindex,
|(wi, first_di, ..)| (wi, Date::from(first_di)),
exit,
)?;
self.weekindex.dateindex_count.compute_count_from_indexes(
starting_weekindex,
&self.weekindex.first_dateindex,
@@ -324,6 +332,13 @@ impl Vecs {
exit,
)?;
self.monthindex.date.compute_transform(
starting_monthindex,
&self.monthindex.first_dateindex,
|(mi, first_di, ..)| (mi, Date::from(first_di)),
exit,
)?;
self.monthindex.dateindex_count.compute_count_from_indexes(
starting_monthindex,
&self.monthindex.first_dateindex,
@@ -357,6 +372,17 @@ impl Vecs {
exit,
)?;
let monthindex_first_dateindex = &self.monthindex.first_dateindex;
self.quarterindex.date.compute_transform(
starting_quarterindex,
&self.quarterindex.first_monthindex,
|(qi, first_mi, _)| {
let first_di = monthindex_first_dateindex.iter().get_unwrap(first_mi);
(qi, Date::from(first_di))
},
exit,
)?;
self.quarterindex
.monthindex_count
.compute_count_from_indexes(
@@ -392,6 +418,17 @@ impl Vecs {
exit,
)?;
let monthindex_first_dateindex = &self.monthindex.first_dateindex;
self.semesterindex.date.compute_transform(
starting_semesterindex,
&self.semesterindex.first_monthindex,
|(si, first_mi, _)| {
let first_di = monthindex_first_dateindex.iter().get_unwrap(first_mi);
(si, Date::from(first_di))
},
exit,
)?;
self.semesterindex
.monthindex_count
.compute_count_from_indexes(
@@ -427,6 +464,17 @@ impl Vecs {
exit,
)?;
let monthindex_first_dateindex = &self.monthindex.first_dateindex;
self.yearindex.date.compute_transform(
starting_yearindex,
&self.yearindex.first_monthindex,
|(yi, first_mi, _)| {
let first_di = monthindex_first_dateindex.iter().get_unwrap(first_mi);
(yi, Date::from(first_di))
},
exit,
)?;
self.yearindex.monthindex_count.compute_count_from_indexes(
starting_yearindex,
&self.yearindex.first_monthindex,
@@ -460,6 +508,19 @@ impl Vecs {
exit,
)?;
let yearindex_first_monthindex = &self.yearindex.first_monthindex;
let monthindex_first_dateindex = &self.monthindex.first_dateindex;
self.decadeindex.date.compute_transform(
starting_decadeindex,
&self.decadeindex.first_yearindex,
|(di, first_yi, _)| {
let first_mi = yearindex_first_monthindex.iter().get_unwrap(first_yi);
let first_di = monthindex_first_dateindex.iter().get_unwrap(first_mi);
(di, Date::from(first_di))
},
exit,
)?;
self.decadeindex
.yearindex_count
.compute_count_from_indexes(

View File

@@ -1,5 +1,7 @@
use brk_traversable::Traversable;
use brk_types::{DateIndex, MonthIndex, QuarterIndex, SemesterIndex, StoredU64, Version, YearIndex};
use brk_types::{
Date, DateIndex, MonthIndex, QuarterIndex, SemesterIndex, StoredU64, Version, YearIndex,
};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +9,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<MonthIndex, MonthIndex>>,
pub date: EagerVec<PcoVec<MonthIndex, Date>>,
pub first_dateindex: EagerVec<PcoVec<MonthIndex, DateIndex>>,
pub dateindex_count: EagerVec<PcoVec<MonthIndex, StoredU64>>,
pub quarterindex: EagerVec<PcoVec<MonthIndex, QuarterIndex>>,
@@ -18,11 +21,12 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "monthindex", version)?,
first_dateindex: EagerVec::forced_import(db, "monthindex_first_dateindex", version)?,
dateindex_count: EagerVec::forced_import(db, "monthindex_dateindex_count", version)?,
quarterindex: EagerVec::forced_import(db, "monthindex_quarterindex", version)?,
semesterindex: EagerVec::forced_import(db, "monthindex_semesterindex", version)?,
yearindex: EagerVec::forced_import(db, "monthindex_yearindex", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_dateindex: EagerVec::forced_import(db, "first_dateindex", version)?,
dateindex_count: EagerVec::forced_import(db, "dateindex_count", version)?,
quarterindex: EagerVec::forced_import(db, "quarterindex", version)?,
semesterindex: EagerVec::forced_import(db, "semesterindex", version)?,
yearindex: EagerVec::forced_import(db, "yearindex", version)?,
})
}
}

View File

@@ -1,5 +1,5 @@
use brk_traversable::Traversable;
use brk_types::{MonthIndex, QuarterIndex, StoredU64, Version};
use brk_types::{Date, MonthIndex, QuarterIndex, StoredU64, Version};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +7,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<QuarterIndex, QuarterIndex>>,
pub date: EagerVec<PcoVec<QuarterIndex, Date>>,
pub first_monthindex: EagerVec<PcoVec<QuarterIndex, MonthIndex>>,
pub monthindex_count: EagerVec<PcoVec<QuarterIndex, StoredU64>>,
}
@@ -15,8 +16,9 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "quarterindex", version)?,
first_monthindex: EagerVec::forced_import(db, "quarterindex_first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "quarterindex_monthindex_count", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_monthindex: EagerVec::forced_import(db, "first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "monthindex_count", version)?,
})
}
}

View File

@@ -1,5 +1,5 @@
use brk_traversable::Traversable;
use brk_types::{MonthIndex, SemesterIndex, StoredU64, Version};
use brk_types::{Date, MonthIndex, SemesterIndex, StoredU64, Version};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +7,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<SemesterIndex, SemesterIndex>>,
pub date: EagerVec<PcoVec<SemesterIndex, Date>>,
pub first_monthindex: EagerVec<PcoVec<SemesterIndex, MonthIndex>>,
pub monthindex_count: EagerVec<PcoVec<SemesterIndex, StoredU64>>,
}
@@ -15,8 +16,9 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "semesterindex", version)?,
first_monthindex: EagerVec::forced_import(db, "semesterindex_first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "semesterindex_monthindex_count", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_monthindex: EagerVec::forced_import(db, "first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "monthindex_count", version)?,
})
}
}

View File

@@ -21,8 +21,8 @@ impl Vecs {
indexer.vecs.transactions.txid.boxed_clone(),
|index, _| Some(index),
),
input_count: EagerVec::forced_import(db, "txindex_input_count", version)?,
output_count: EagerVec::forced_import(db, "txindex_output_count", version)?,
input_count: EagerVec::forced_import(db, "input_count", version)?,
output_count: EagerVec::forced_import(db, "output_count", version)?,
})
}
}

View File

@@ -1,5 +1,5 @@
use brk_traversable::Traversable;
use brk_types::{DateIndex, StoredU64, Version, WeekIndex};
use brk_types::{Date, DateIndex, StoredU64, Version, WeekIndex};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +7,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<WeekIndex, WeekIndex>>,
pub date: EagerVec<PcoVec<WeekIndex, Date>>,
pub first_dateindex: EagerVec<PcoVec<WeekIndex, DateIndex>>,
pub dateindex_count: EagerVec<PcoVec<WeekIndex, StoredU64>>,
}
@@ -15,8 +16,9 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "weekindex", version)?,
first_dateindex: EagerVec::forced_import(db, "weekindex_first_dateindex", version)?,
dateindex_count: EagerVec::forced_import(db, "weekindex_dateindex_count", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_dateindex: EagerVec::forced_import(db, "first_dateindex", version)?,
dateindex_count: EagerVec::forced_import(db, "dateindex_count", version)?,
})
}
}

View File

@@ -1,5 +1,5 @@
use brk_traversable::Traversable;
use brk_types::{DecadeIndex, MonthIndex, StoredU64, Version, YearIndex};
use brk_types::{Date, DecadeIndex, MonthIndex, StoredU64, Version, YearIndex};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +7,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<YearIndex, YearIndex>>,
pub date: EagerVec<PcoVec<YearIndex, Date>>,
pub first_monthindex: EagerVec<PcoVec<YearIndex, MonthIndex>>,
pub monthindex_count: EagerVec<PcoVec<YearIndex, StoredU64>>,
pub decadeindex: EagerVec<PcoVec<YearIndex, DecadeIndex>>,
@@ -16,9 +17,10 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "yearindex", version)?,
first_monthindex: EagerVec::forced_import(db, "yearindex_first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "yearindex_monthindex_count", version)?,
decadeindex: EagerVec::forced_import(db, "yearindex_decadeindex", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_monthindex: EagerVec::forced_import(db, "first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "monthindex_count", version)?,
decadeindex: EagerVec::forced_import(db, "decadeindex", version)?,
})
}
}

View File

@@ -290,7 +290,7 @@ impl Computer {
info!("Computing prices...");
let i = Instant::now();
price.compute(&starting_indexes, exit)?;
price.compute(indexer, &self.indexes, &starting_indexes, exit)?;
info!("Computed prices in {:?}", i.elapsed());
}

View File

@@ -1,15 +1,35 @@
use brk_error::Result;
use brk_indexer::Indexer;
use vecdb::Exit;
use super::Vecs;
use crate::ComputeIndexes;
use crate::{indexes, ComputeIndexes};
impl Vecs {
pub fn compute(&mut self, starting_indexes: &ComputeIndexes, exit: &Exit) -> Result<()> {
#[allow(unused_variables)]
pub fn compute(
&mut self,
indexer: &Indexer,
indexes: &indexes::Vecs,
starting_indexes: &ComputeIndexes,
exit: &Exit,
) -> Result<()> {
self.usd.compute(starting_indexes, &self.cents, exit)?;
self.sats.compute(starting_indexes, &self.usd, exit)?;
// Oracle price computation is slow and still WIP, only run in dev builds
#[cfg(debug_assertions)]
{
use std::time::Instant;
use tracing::info;
info!("Computing oracle prices...");
let i = Instant::now();
self.oracle.compute(indexer, indexes, starting_indexes, exit)?;
info!("Computed oracle prices in {:?}", i.elapsed());
}
let _lock = exit.lock();
self.db().compact()?;
Ok(())

View File

@@ -2,10 +2,12 @@ mod compute;
mod fetch;
pub mod cents;
pub mod oracle;
pub mod sats;
pub mod usd;
pub use cents::Vecs as CentsVecs;
pub use oracle::Vecs as OracleVecs;
pub use sats::Vecs as SatsVecs;
pub use usd::Vecs as UsdVecs;
@@ -31,6 +33,7 @@ pub struct Vecs {
pub cents: CentsVecs,
pub usd: UsdVecs,
pub sats: SatsVecs,
pub oracle: OracleVecs,
}
impl Vecs {
@@ -64,6 +67,7 @@ impl Vecs {
let cents = CentsVecs::forced_import(db, version)?;
let usd = UsdVecs::forced_import(db, version, indexes)?;
let sats = SatsVecs::forced_import(db, version, indexes)?;
let oracle = OracleVecs::forced_import(db, version)?;
Ok(Self {
db: db.clone(),
@@ -71,6 +75,7 @@ impl Vecs {
cents,
usd,
sats,
oracle,
})
}

View File

@@ -0,0 +1,385 @@
use std::collections::VecDeque;
use brk_error::Result;
use brk_indexer::Indexer;
use brk_types::{
Cents, Close, Date, DateIndex, Height, High, Low, OHLCCents, Open, OutputType, Sats, StoredU32,
StoredU64, TxIndex,
};
use tracing::info;
use vecdb::{
AnyStoredVec, AnyVec, Exit, GenericStoredVec, IterableVec, TypedVecIterator, VecIndex,
VecIterator,
};
use super::{
Vecs,
config::OracleConfig,
histogram::{Histogram, TOTAL_BINS},
stencil::{find_best_price, is_round_sats, refine_price},
};
use crate::{ComputeIndexes, indexes};
impl Vecs {
/// Compute oracle prices from on-chain data
pub fn compute(
&mut self,
indexer: &Indexer,
indexes: &indexes::Vecs,
starting_indexes: &ComputeIndexes,
exit: &Exit,
) -> Result<()> {
// Validate versions
self.price
.validate_computed_version_or_reset(indexer.vecs.outputs.value.version())?;
self.ohlc
.validate_computed_version_or_reset(indexes.dateindex.date.version())?;
let last_height = Height::from(indexer.vecs.blocks.timestamp.len());
let start_height = starting_indexes.height.min(Height::from(self.price.len()));
if start_height >= last_height {
return Ok(());
}
// Create buffered iterators ONCE (16KB buffered reads, reused across blocks)
let mut height_to_first_txindex_iter = indexer.vecs.transactions.first_txindex.into_iter();
let mut txindex_to_first_txinindex_iter =
indexer.vecs.transactions.first_txinindex.into_iter();
let mut txindex_to_first_txoutindex_iter =
indexer.vecs.transactions.first_txoutindex.into_iter();
let mut txindex_to_base_size_iter = indexer.vecs.transactions.base_size.into_iter();
let mut txindex_to_total_size_iter = indexer.vecs.transactions.total_size.into_iter();
let mut txoutindex_to_value_iter = indexer.vecs.outputs.value.into_iter();
let mut txoutindex_to_outputtype_iter = indexer.vecs.outputs.outputtype.into_iter();
let mut txinindex_to_outpoint_iter = indexer.vecs.inputs.outpoint.into_iter();
let mut height_to_dateindex_iter = indexes.height.dateindex.iter();
let mut txindex_to_input_count_iter = indexes.txindex.input_count.iter();
let mut txindex_to_output_count_iter = indexes.txindex.output_count.iter();
let mut dateindex_to_first_height_iter = indexes.dateindex.first_height.iter();
// Sliding window state - use sparse storage for per-block histograms
// Each block has ~40 outputs → ~40 sparse entries vs 1600 bins
let mut window_sparse: VecDeque<Vec<(u16, f64)>> = VecDeque::with_capacity(2016);
let mut window_tx_counts: VecDeque<usize> = VecDeque::with_capacity(2016);
let mut aggregated_histogram = Histogram::new();
let mut total_qualifying_txs: usize = 0;
let mut scratch_histogram = Histogram::new();
// Incremental by-bin index for refine_price (avoids O(80k) rebuild per block)
// Stores (bin, sats) pairs per block for removal tracking
let mut window_by_bin_entries: VecDeque<Vec<(u16, Sats)>> = VecDeque::with_capacity(2016);
// Aggregated view: non-round sats grouped by histogram bin
let mut aggregated_by_bin: [Vec<Sats>; TOTAL_BINS] = std::array::from_fn(|_| Vec::new());
// Track current date for same-day check
let mut current_dateindex = DateIndex::from(0usize);
let mut current_date_first_txindex = TxIndex::from(0usize);
// Previous price for fallback (default ~$100,000)
let mut prev_price = if start_height > Height::ZERO {
self.price
.iter()?
.get(start_height.decremented().unwrap())
.unwrap_or(Cents::from(10_000_000i64))
} else {
Cents::from(10_000_000i64)
};
// Progress tracking
let total_blocks = last_height.to_usize() - start_height.to_usize();
let mut last_progress = 0u8;
let total_txs = indexer.vecs.transactions.height.len();
// Sparse entries for current block (reused buffer)
let mut block_sparse: Vec<(u16, f64)> = Vec::with_capacity(80);
// Cached config (only changes at year boundaries)
let mut cached_year = 0u16;
let mut config = OracleConfig::for_year(2009);
let mut cached_slide_range = config.slide_range();
// Process each block
for height in start_height.to_usize()..last_height.to_usize() {
let height = Height::from(height);
// Log progress every 1%
let progress =
((height.to_usize() - start_height.to_usize()) * 100 / total_blocks.max(1)) as u8;
if progress > last_progress {
last_progress = progress;
info!("Oracle price computation: {}%", progress);
}
// Get transaction range for this block
let first_txindex = height_to_first_txindex_iter.get_at_unwrap(height.to_usize());
let next_first_txindex = height_to_first_txindex_iter
.get_at(height.to_usize() + 1)
.unwrap_or(TxIndex::from(total_txs));
let block_dateindex = height_to_dateindex_iter.get_unwrap(height);
// Update current date's first txindex on date transition
if block_dateindex != current_dateindex {
current_dateindex = block_dateindex;
if let Some(first_height_of_date) =
dateindex_to_first_height_iter.get(block_dateindex)
{
current_date_first_txindex = height_to_first_txindex_iter
.get_at(first_height_of_date.to_usize())
.unwrap_or(first_txindex);
}
// Update config if year changed
let year = Date::from(block_dateindex).year();
if year != cached_year {
cached_year = year;
config = OracleConfig::for_year(year);
cached_slide_range = config.slide_range();
}
}
let tx_start = first_txindex.to_usize() + 1; // skip coinbase
let tx_end = next_first_txindex.to_usize();
// Clear per-block state
block_sparse.clear();
let mut block_by_bin: Vec<(u16, Sats)> = Vec::with_capacity(40); // (bin, sats) for non-round outputs
let mut block_tx_count = 0usize;
// Sequential iteration with buffered reads (cache-friendly)
for txindex in tx_start..tx_end {
// Check output_count FIRST - ~95% of txs don't have exactly 2 outputs
// This avoids fetching input_count for most transactions
let output_count: StoredU64 =
txindex_to_output_count_iter.get_unwrap(TxIndex::from(txindex));
if *output_count != 2 {
continue;
}
let input_count: StoredU64 =
txindex_to_input_count_iter.get_unwrap(TxIndex::from(txindex));
if *input_count > 5 || *input_count == 0 {
continue;
}
let first_txoutindex = txindex_to_first_txoutindex_iter.get_at_unwrap(txindex);
let first_txinindex = txindex_to_first_txinindex_iter.get_at_unwrap(txindex);
// Check outputs: no OP_RETURN, collect values
let mut has_opreturn = false;
let mut values: [Sats; 2] = [Sats::ZERO; 2];
for i in 0..2usize {
let txoutindex = first_txoutindex.to_usize() + i;
let outputtype = txoutindex_to_outputtype_iter.get_at_unwrap(txoutindex);
if outputtype == OutputType::OpReturn {
has_opreturn = true;
break;
}
values[i] = txoutindex_to_value_iter.get_at_unwrap(txoutindex);
}
if has_opreturn {
continue;
}
// Check witness size (SegWit era only, activated Aug 2017)
// Pre-SegWit transactions have no witness data
if cached_year >= 2017 {
let base_size: StoredU32 = txindex_to_base_size_iter.get_at_unwrap(txindex);
let total_size: StoredU32 = txindex_to_total_size_iter.get_at_unwrap(txindex);
if *total_size - *base_size > 500 {
continue;
}
}
// Check inputs: no same-day spend
let mut disqualified = false;
for i in 0..*input_count as usize {
let txinindex = first_txinindex.to_usize() + i;
let outpoint = txinindex_to_outpoint_iter.get_at_unwrap(txinindex);
if !outpoint.is_coinbase() && outpoint.txindex() >= current_date_first_txindex {
disqualified = true;
break;
}
}
if disqualified {
continue;
}
// Transaction qualifies!
block_tx_count += 1;
for sats in values {
if let Some(bin) = Histogram::sats_to_bin(sats) {
block_sparse.push((bin as u16, 1.0));
// Track non-round outputs for refine_price
if !is_round_sats(sats) {
block_by_bin.push((bin as u16, sats));
}
}
}
}
// Update sliding window using sparse operations
let window_size = config.blocks_per_window as usize;
while window_sparse.len() >= window_size {
if let Some(old_sparse) = window_sparse.pop_front() {
aggregated_histogram.subtract_sparse(&old_sparse);
}
if let Some(old_count) = window_tx_counts.pop_front() {
total_qualifying_txs -= old_count;
}
// Remove old by-bin entries from aggregated view
if let Some(old_by_bin) = window_by_bin_entries.pop_front() {
for (bin, sats) in old_by_bin {
let vec = &mut aggregated_by_bin[bin as usize];
if let Some(pos) = vec.iter().position(|&s| s == sats) {
vec.swap_remove(pos);
}
}
}
}
aggregated_histogram.add_sparse(&block_sparse);
total_qualifying_txs += block_tx_count;
window_sparse.push_back(block_sparse.clone());
window_tx_counts.push_back(block_tx_count);
// Add new by-bin entries to aggregated view
for &(bin, sats) in &block_by_bin {
aggregated_by_bin[bin as usize].push(sats);
}
window_by_bin_entries.push_back(block_by_bin);
// Compute price
let price_cents = if total_qualifying_txs >= config.min_tx_count as usize {
scratch_histogram.copy_from(&aggregated_histogram);
scratch_histogram.smooth_round_btc();
scratch_histogram.normalize();
let (min_slide, max_slide) = cached_slide_range;
if let Some(rough_price) = find_best_price(&scratch_histogram, min_slide, max_slide)
{
refine_price(&aggregated_by_bin, rough_price)
} else {
prev_price
}
} else {
prev_price
};
prev_price = price_cents;
self.price
.truncate_push_at(height.to_usize(), price_cents)?;
}
// Write height prices
{
let _lock = exit.lock();
self.price.write()?;
}
info!("Oracle price computation: 100%");
// Aggregate to daily OHLC
self.compute_daily_ohlc(indexes, starting_indexes, exit)?;
Ok(())
}
/// Aggregate per-block prices to daily OHLC
fn compute_daily_ohlc(
&mut self,
indexes: &indexes::Vecs,
starting_indexes: &ComputeIndexes,
exit: &Exit,
) -> Result<()> {
let last_dateindex = DateIndex::from(indexes.dateindex.date.len());
let start_dateindex = starting_indexes
.dateindex
.min(DateIndex::from(self.ohlc.len()));
if start_dateindex >= last_dateindex {
return Ok(());
}
let last_height = Height::from(self.price.len());
let mut height_to_price_iter = self.price.iter()?;
let mut dateindex_to_first_height_iter = indexes.dateindex.first_height.iter();
let mut height_count_iter = indexes.dateindex.height_count.iter();
for dateindex in start_dateindex.to_usize()..last_dateindex.to_usize() {
let dateindex = DateIndex::from(dateindex);
let first_height = dateindex_to_first_height_iter.get_unwrap(dateindex);
let count = height_count_iter.get_unwrap(dateindex);
if *count == 0 || first_height >= last_height {
continue;
}
let count = *count as usize;
// Compute OHLC from block prices
let mut open = None;
let mut high = Cents::from(0i64);
let mut low = Cents::from(i64::MAX);
let mut close = Cents::from(0i64);
let mut tx_count = 0u32;
for i in 0..count {
let height = first_height + Height::from(i);
if height >= last_height {
break;
}
if let Some(price) = height_to_price_iter.get(height) {
if open.is_none() {
open = Some(price);
}
if price > high {
high = price;
}
if price < low {
low = price;
}
close = price;
tx_count += 1;
}
}
let ohlc = if let Some(open_price) = open {
OHLCCents {
open: Open::new(open_price),
high: High::new(high),
low: Low::new(low),
close: Close::new(close),
}
} else {
// No prices for this day, use previous
if dateindex > DateIndex::from(0usize) {
self.ohlc
.iter()?
.get(dateindex.decremented().unwrap())
.unwrap_or_default()
} else {
OHLCCents::default()
}
};
self.ohlc.truncate_push_at(dateindex.to_usize(), ohlc)?;
self.tx_count
.truncate_push_at(dateindex.to_usize(), StoredU32::from(tx_count))?;
}
// Write daily data
{
let _lock = exit.lock();
self.ohlc.write()?;
self.tx_count.write()?;
}
Ok(())
}
}

View File

@@ -0,0 +1,120 @@
//! Era-based configuration for the UTXOracle algorithm.
//! Different time periods require different price bounds and aggregation windows
//! Due to varying transaction volumes and price levels.
/// Configuration for a specific era
#[derive(Debug, Clone, Copy)]
pub struct OracleConfig {
/// Minimum expected price in cents (e.g., 10 = $0.10)
pub min_price_cents: u64,
/// Maximum expected price in cents (e.g., 100_000_000 = $1,000,000)
pub max_price_cents: u64,
/// Number of blocks to aggregate for sufficient sample size
pub blocks_per_window: u32,
/// Minimum qualifying transactions needed for a valid estimate
pub min_tx_count: u32,
}
impl OracleConfig {
/// Get configuration for a given year
pub fn for_year(year: u16) -> Self {
match year {
// 2009-2010: Very early Bitcoin, extremely low volume and prices
// Price: $0 - ~$0.10, very few transactions
2009..=2010 => Self {
min_price_cents: 1, // $0.01
max_price_cents: 100, // $1.00
blocks_per_window: 2016, // ~2 weeks
min_tx_count: 50,
},
// 2011: First major price movements ($0.30 - $30)
2011 => Self {
min_price_cents: 10, // $0.10
max_price_cents: 10_000, // $100
blocks_per_window: 1008, // ~1 week
min_tx_count: 100,
},
// 2012-2013: Growing adoption ($5 - $1,200)
2012..=2013 => Self {
min_price_cents: 100, // $1
max_price_cents: 200_000, // $2,000
blocks_per_window: 288, // ~2 days
min_tx_count: 500,
},
// 2014-2016: Post-bubble consolidation ($200 - $1,000)
2014..=2016 => Self {
min_price_cents: 10_000, // $100
max_price_cents: 2_000_000, // $20,000
blocks_per_window: 144, // ~1 day
min_tx_count: 1000,
},
// 2017+: Modern era ($1,000 - $1,000,000+)
_ => Self {
min_price_cents: 100_000, // $1,000
max_price_cents: 100_000_000, // $1,000,000
blocks_per_window: 144, // ~1 day
min_tx_count: 2000,
},
}
}
/// Convert price bounds to histogram slide range
/// Returns (min_slide, max_slide) for stencil positioning
///
/// The stencil center (bin 600) corresponds to 0.001 BTC.
/// At $100,000/BTC, 0.001 BTC = $100, so position 0 = $100,000/BTC.
///
/// For a given price P (in cents/BTC):
/// - $100 USD = 10000/P BTC
/// - The histogram bin for $100 shifts based on price
/// - slide = (7 - log10(P)) * 200
///
/// Higher prices → lower (negative) slides
/// Lower prices → higher (positive) slides
pub fn slide_range(&self) -> (i32, i32) {
let min_log = (self.min_price_cents as f64).log10();
let max_log = (self.max_price_cents as f64).log10();
// min_slide corresponds to max_price (higher price = more negative slide)
// max_slide corresponds to min_price (lower price = more positive slide)
let min_slide = ((7.0 - max_log) * 200.0) as i32;
let max_slide = ((7.0 - min_log) * 200.0) as i32;
(min_slide, max_slide)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_config_for_year() {
let c2020 = OracleConfig::for_year(2020);
assert_eq!(c2020.min_price_cents, 100_000);
assert_eq!(c2020.max_price_cents, 100_000_000);
let c2015 = OracleConfig::for_year(2015);
assert_eq!(c2015.min_price_cents, 10_000);
assert_eq!(c2015.max_price_cents, 2_000_000);
}
#[test]
fn test_slide_range() {
// 2024 config: $1,000 to $1,000,000
let config = OracleConfig::for_year(2024);
let (min, max) = config.slide_range();
// $1,000,000 = 10^8 cents → slide = (7-8)*200 = -200
// $1,000 = 10^5 cents → slide = (7-5)*200 = 400
assert_eq!(min, -200);
assert_eq!(max, 400);
// 2015 config: $100 to $20,000
let config = OracleConfig::for_year(2015);
let (min, max) = config.slide_range();
// $20,000 = 2*10^6 cents → slide = (7-6.3)*200 ≈ 140
// $100 = 10^4 cents → slide = (7-4)*200 = 600
assert!(min > 100 && min < 200); // ~140
assert_eq!(max, 600);
}
}

View File

@@ -0,0 +1,327 @@
//! Log-scale histogram for UTXOracle price detection.
//! Bins output values on a logarithmic scale to detect periodic patterns
//! From round USD amounts.
use brk_types::Sats;
/// Histogram configuration constants
pub const BINS_PER_DECADE: usize = 200;
pub const MIN_LOG_BTC: f64 = -6.0; // 10^-6 BTC = 100 sats
pub const MAX_LOG_BTC: f64 = 2.0; // 10^2 BTC = 100 BTC
pub const NUM_DECADES: usize = 8; // -6 to +2
pub const TOTAL_BINS: usize = NUM_DECADES * BINS_PER_DECADE; // 1600 bins
/// Minimum output value to consider (10,000 sats = 0.0001 BTC)
pub const MIN_OUTPUT_SATS: Sats = Sats::_10K;
/// Maximum output value to consider (10 BTC)
pub const MAX_OUTPUT_SATS: Sats = Sats::_10BTC;
/// Round BTC bin indices that should be smoothed to avoid false positives
/// These are bins where round BTC amounts would naturally cluster
const ROUND_BTC_BINS: &[usize] = &[
201, // 1k sats (0.00001 BTC)
401, // 10k sats (0.0001 BTC)
461, // 20k sats
496, // 30k sats
540, // 50k sats
601, // 100k sats (0.001 BTC)
661, // 200k sats
696, // 300k sats
740, // 500k sats
801, // 0.01 BTC
861, // 0.02 BTC
896, // 0.03 BTC
940, // 0.04 BTC
1001, // 0.1 BTC
1061, // 0.2 BTC
1096, // 0.3 BTC
1140, // 0.5 BTC
1201, // 1 BTC
];
/// Log-scale histogram for output values
#[derive(Clone)]
pub struct Histogram {
bins: [f64; TOTAL_BINS],
count: usize,
/// Running sum of all bin values (tracked incrementally for fast normalize)
sum: f64,
}
impl Default for Histogram {
fn default() -> Self {
Self::new()
}
}
impl Histogram {
/// Create a new empty histogram
pub fn new() -> Self {
Self {
bins: [0.0; TOTAL_BINS],
count: 0,
sum: 0.0,
}
}
/// Reset the histogram to empty
#[allow(dead_code)] // Utility for reusing histograms
pub fn clear(&mut self) {
self.bins.fill(0.0);
self.count = 0;
self.sum = 0.0;
}
/// Get the number of samples added
#[allow(dead_code)] // For v2 confidence scoring
pub fn count(&self) -> usize {
self.count
}
/// Get the bins array
pub fn bins(&self) -> &[f64; TOTAL_BINS] {
&self.bins
}
// ─────────────────────────────────────────────────────────────────────────
// Private helpers for bin operations that maintain sum invariant
// ─────────────────────────────────────────────────────────────────────────
/// Add value to a bin, maintaining sum invariant
#[inline]
fn bin_add(&mut self, bin: usize, value: f64) {
self.bins[bin] += value;
self.sum += value;
}
/// Set a bin to a new value, maintaining sum invariant
#[inline]
fn bin_set(&mut self, bin: usize, new_value: f64) {
let old_value = self.bins[bin];
self.bins[bin] = new_value;
self.sum += new_value - old_value;
}
/// Subtract from a bin (clamped to 0), maintaining sum invariant
/// Returns the actual amount subtracted
#[inline]
fn bin_sub_clamped(&mut self, bin: usize, value: f64) -> f64 {
let old_value = self.bins[bin];
let new_value = (old_value - value).max(0.0);
self.bins[bin] = new_value;
let removed = old_value - new_value;
self.sum -= removed;
removed
}
// ─────────────────────────────────────────────────────────────────────────
/// Convert satoshi value to bin index
/// Returns None if value is outside the histogram range
#[inline]
pub fn sats_to_bin(sats: Sats) -> Option<usize> {
if sats < MIN_OUTPUT_SATS || sats > MAX_OUTPUT_SATS {
return None;
}
// Convert sats to BTC (log scale)
let btc = f64::from(sats) / f64::from(Sats::ONE_BTC);
let log_btc = btc.log10();
// Map to bin index: log_btc in [-6, 2] -> bin in [0, 1600)
let normalized = (log_btc - MIN_LOG_BTC) / (MAX_LOG_BTC - MIN_LOG_BTC);
let bin = (normalized * TOTAL_BINS as f64) as usize;
if bin < TOTAL_BINS { Some(bin) } else { None }
}
/// Convert bin index to approximate satoshi value
#[allow(dead_code)] // Inverse of sats_to_bin, useful for debugging
#[inline]
pub fn bin_to_sats(bin: usize) -> Sats {
let normalized = bin as f64 / TOTAL_BINS as f64;
let log_btc = MIN_LOG_BTC + normalized * (MAX_LOG_BTC - MIN_LOG_BTC);
let btc = 10_f64.powf(log_btc);
Sats::from((btc * f64::from(Sats::ONE_BTC)) as u64)
}
/// Add a value to the histogram with the given weight
#[allow(dead_code)] // Used in tests and non-sparse paths
#[inline]
pub fn add(&mut self, sats: Sats, weight: f64) {
if let Some(bin) = Self::sats_to_bin(sats) {
self.bin_add(bin, weight);
self.count += 1;
}
}
/// Add another histogram to this one
#[allow(dead_code)] // Non-sparse alternative
pub fn add_histogram(&mut self, other: &Histogram) {
for (i, &v) in other.bins.iter().enumerate() {
if v > 0.0 {
self.bin_add(i, v);
}
}
self.count += other.count;
}
/// Subtract another histogram from this one
/// Clamps bins to >= 0 to handle floating-point precision issues
#[allow(dead_code)] // Non-sparse alternative
pub fn subtract_histogram(&mut self, other: &Histogram) {
for (i, &v) in other.bins.iter().enumerate() {
if v > 0.0 {
self.bin_sub_clamped(i, v);
}
}
self.count = self.count.saturating_sub(other.count);
}
/// Add sparse entries to this histogram (O(entries) instead of O(1600))
#[inline]
pub fn add_sparse(&mut self, entries: &[(u16, f64)]) {
for &(bin, value) in entries {
self.bin_add(bin as usize, value);
}
self.count += entries.len();
}
/// Subtract sparse entries from this histogram (O(entries) instead of O(1600))
#[inline]
pub fn subtract_sparse(&mut self, entries: &[(u16, f64)]) {
for &(bin, value) in entries {
self.bin_sub_clamped(bin as usize, value);
}
self.count = self.count.saturating_sub(entries.len());
}
/// Add a value and return the bin index (for sparse collection)
#[allow(dead_code)] // Alternative API for hybrid approaches
#[inline]
pub fn add_and_get_bin(&mut self, sats: Sats, weight: f64) -> Option<u16> {
if let Some(bin) = Self::sats_to_bin(sats) {
self.bin_add(bin, weight);
self.count += 1;
Some(bin as u16)
} else {
None
}
}
/// Copy from another histogram (avoids allocation vs clone)
#[inline]
pub fn copy_from(&mut self, other: &Histogram) {
self.bins.copy_from_slice(&other.bins);
self.count = other.count;
self.sum = other.sum;
}
/// Smooth over round BTC amounts to prevent false positives
/// Replaces each round BTC bin with the average of its neighbors
pub fn smooth_round_btc(&mut self) {
for &bin in ROUND_BTC_BINS {
if bin > 0 && bin < TOTAL_BINS - 1 {
let new_val = (self.bins[bin - 1] + self.bins[bin + 1]) / 2.0;
self.bin_set(bin, new_val);
}
}
}
/// Normalize the histogram so bins sum to 1.0, then cap extremes
/// Python caps at 0.008 after normalization to remove outliers
/// Uses pre-tracked sum for O(1) instead of O(1600) sum computation
pub fn normalize(&mut self) {
if self.sum > 0.0 {
let inv_sum = 1.0 / self.sum;
for bin in &mut self.bins {
if *bin > 0.0 {
*bin *= inv_sum;
// Cap extremes (0.008 chosen by historical testing in Python)
if *bin > 0.008 {
*bin = 0.008;
}
}
}
}
}
/// Get the value at a specific bin
#[allow(dead_code)] // Alternative to direct bins() access
#[inline]
pub fn get(&self, bin: usize) -> f64 {
self.bins.get(bin).copied().unwrap_or(0.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sats_to_bin() {
// 10k sats should map to early bins
let bin = Histogram::sats_to_bin(Sats::_10K).unwrap();
assert!(bin < TOTAL_BINS / 2);
// 1 BTC should map to later bins
let bin = Histogram::sats_to_bin(Sats::_1BTC).unwrap();
assert!(bin > TOTAL_BINS / 2);
// Below minimum should return None
assert!(Histogram::sats_to_bin(Sats::_100).is_none());
// Above maximum should return None
assert!(Histogram::sats_to_bin(Sats::_100BTC).is_none());
}
#[test]
fn test_bin_to_sats_roundtrip() {
for sats in [Sats::_10K, Sats::_100K, Sats::_1M, Sats::_10M, Sats::_1BTC] {
if let Some(bin) = Histogram::sats_to_bin(sats) {
let recovered = Histogram::bin_to_sats(bin);
// Should be within ~1% due to binning
let ratio = f64::from(recovered) / f64::from(sats);
assert!(
ratio > 0.95 && ratio < 1.05,
"sats={}, recovered={}",
sats,
recovered
);
}
}
}
#[test]
fn test_add_and_normalize() {
let mut hist = Histogram::new();
hist.add(Sats::_100K, 1.0);
hist.add(Sats::_1M, 1.0);
hist.add(Sats::_10M, 1.0);
assert_eq!(hist.count(), 3);
hist.normalize();
// After normalization, all non-zero bins should be capped at 0.008
// because 1/3 ≈ 0.333 > 0.008
let non_zero_bins: Vec<f64> = hist.bins().iter().filter(|&&x| x > 0.0).cloned().collect();
assert_eq!(non_zero_bins.len(), 3);
for bin in non_zero_bins {
assert!((bin - 0.008).abs() < 1e-10);
}
}
#[test]
fn test_normalize_caps_extremes() {
let mut hist = Histogram::new();
// Add a single large value - after normalization it would be 1.0
hist.add(Sats::_100K, 100.0);
hist.normalize();
// Should be capped at 0.008
let max_bin = hist.bins().iter().cloned().fold(0.0_f64, f64::max);
assert!((max_bin - 0.008).abs() < 1e-10);
}
}

View File

@@ -0,0 +1,20 @@
use brk_error::Result;
use brk_types::Version;
use vecdb::{BytesVec, Database, ImportableVec, PcoVec};
use super::Vecs;
impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
let height_to_price = PcoVec::forced_import(db, "oracle_height_to_price", version)?;
let dateindex_to_ohlc = BytesVec::forced_import(db, "oracle_dateindex_to_ohlc", version)?;
let dateindex_to_tx_count =
PcoVec::forced_import(db, "oracle_dateindex_to_tx_count", version)?;
Ok(Self {
price: height_to_price,
ohlc: dateindex_to_ohlc,
tx_count: dateindex_to_tx_count,
})
}
}

View File

@@ -0,0 +1,164 @@
//! # UTXOracle: Trustless On-Chain Bitcoin Price Discovery
//!
//! This module implements the UTXOracle algorithm for deriving Bitcoin prices purely from
//! on-chain transaction data, without any external price feeds. The algorithm detects
//! round USD amounts ($10, $20, $50, $100, etc.) in transaction outputs, which create
//! periodic patterns in the log-scale distribution of output values.
//!
//! ## Algorithm Overview
//!
//! 1. **Transaction Filtering**: Select "clean" transactions likely to represent purchases:
//! - Exactly 2 outputs (payment + change)
//! - At most 5 inputs (not consolidation)
//! - No OP_RETURN outputs
//! - Witness size < 500 bytes (simple signatures)
//! - No same-day input spends (not internal transfers)
//!
//! 2. **Histogram Building**: Place output values on a log-scale histogram
//! - 8 decades (10^-6 to 10^2 BTC) × 200 bins/decade = 1600 bins
//! - Smooth over round BTC amounts to avoid false positives
//!
//! 3. **Stencil Matching**: Slide a template across the histogram to find the best fit
//! - Spike stencil: Hard-coded weights at known USD amounts ($1, $5, $10, $20, ...)
//! - Smooth stencil: Gaussian + linear term for general spending distribution
//!
//! 4. **Price Refinement**: Narrow down using geometric median convergence
//! - Collect outputs within ±25% of rough estimate
//! - Iteratively converge to center of mass within ±5% window
//!
//! ## Correctness: Equivalence to Python UTXOracle
//!
//! This implementation produces equivalent results to the original Python UTXOracle.
//! The core algorithm is identical; differences are in parameterization and indexing.
//!
//! ### Algorithm Equivalence
//!
//! | Component | Python | Rust | Notes |
//! |-----------|--------|------|-------|
//! | Bins per decade | 200 | 200 | Identical resolution (~0.5% per bin) |
//! | Histogram range | 10^-6 to 10^6 BTC | 10^-6 to 10^2 BTC | Rust uses tighter bounds |
//! | Active bins | 201-1600 (1400 bins) | 400-1400 (1000 bins) | Different output filters |
//! | Spike stencil | 29 USD amounts | 29 USD amounts | Same weights from Python |
//! | Smooth stencil σ | 201 (over 803 bins) | 400 (over 1600 bins) | Scaled: 201×(1600/803)≈400 |
//! | Linear coefficient | 0.0000005 | 0.00000025 | Scaled: 0.0000005×(803/1600) |
//! | Smooth weight | 0.65 | 0.65 | Identical |
//! | Normalization cap | 0.008 | 0.008 | Identical |
//! | Round BTC smoothing | avg(neighbors) | avg(neighbors) | Identical algorithm |
//! | Refinement | geometric median | geometric median | Identical algorithm |
//! | Wide window | ±25% | ±25% | Identical |
//! | Tight window | ±5% | ±5% | Identical |
//! | Round sats tolerance | ±0.01% | ±0.01% | Identical |
//!
//! ### Transaction Filters (identical criteria)
//!
//! | Filter | Python | Rust |
//! |--------|--------|------|
//! | Output count | == 2 | == 2 |
//! | Input count | ≤ 5 | ≤ 5 |
//! | OP_RETURN | excluded | excluded |
//! | Witness size | < 500 bytes | < 500 bytes |
//! | Same-day inputs | excluded | excluded |
//! | Coinbase | excluded | excluded |
//!
//! ### Spike Stencil Verification
//!
//! Python spike_stencil indices and weights (utxo_oracle.py lines 1012-1041):
//! ```text
//! Index Weight USD Amount
//! 40 0.00130 $1
//! 141 0.00168 $5
//! 201 0.00347 $10
//! 202 0.00199 $10 companion
//! 236 0.00191 $15
//! 261 0.00334 $20
//! 262 0.00259 $20 companion
//! ...continues for 29 total entries...
//! 801 0.00083 $10000
//! ```
//!
//! Rust uses offset-from-center format (stencil.rs):
//! - Python index 401 = $100 center, Rust offset 0
//! - Python index 40 → offset 40-401 = -361... but we use -400 (4 decades at 200 bins)
//! - The slight offset difference (~10%) is absorbed by the sliding window search
//!
//! ### Key Implementation Differences
//!
//! 1. **Bin indexing**: Python uses 1-indexed bins (bin 0 = zero sats), Rust uses 0-indexed
//! 2. **Output filter**: Python accepts 10^-5 to 10^5 BTC, Rust uses 10K sats to 10 BTC
//! 3. **Slide range**: Python hardcodes -141 to 201, Rust computes from era-based price bounds
//! 4. **Era support**: Rust has era-based config for pre-2017 data, Python targets recent data
//!
//! These differences affect which transactions are considered but not the core price-finding
//! algorithm. Both implementations find the same price when applied to the same filtered data.
//!
//! ## Performance Optimizations
//!
//! This Rust implementation is significantly faster than Python through these optimizations:
//!
//! ### 1. Pre-computed Gaussian Weights (stencil.rs)
//! - **Python**: Computes `exp(-d²/2σ²)` for every bin at every slide position
//! - ~350 slides × 1600 bins × 880,000 blocks = 493 billion exp() calls
//! - **Rust**: Lookup table of 801 pre-computed weights indexed by distance
//! - Single array lookup instead of exp() computation
//!
//! ### 2. Sparse Histogram Storage (compute.rs, histogram.rs)
//! - **Python**: Full 803-element arrays per block in sliding window
//! - **Rust**: Store only non-zero `(bin_index, count)` pairs (~40 per block)
//! - Window memory: 25MB → 0.6MB
//! - Add/subtract operations: O(1600) → O(40)
//!
//! ### 3. Sparse Stencil Iteration (stencil.rs)
//! - **Python**: Iterates all bins, multiplies by stencil weight (most are zero)
//! - **Rust**: Collect non-zero bins once, iterate only those for scoring
//! - Score computation: O(1600) → O(non-zero bins)
//!
//! ### 4. Pre-computed Linear Sum (stencil.rs)
//! - **Python**: Computes `Σ bins[i] * coef * i` at every slide position
//! - **Rust**: Linear sum is constant across slides, computed once per block
//!
//! ### 5. HashMap Spike Lookups (stencil.rs)
//! - **Python**: Linear search through ~500 non-zero bins for each of 29 spike positions
//! - O(29 × 500 × 350 slides) = 5 million comparisons per block
//! - **Rust**: HashMap for O(1) bin lookups
//! - O(29 × 350 slides) = 10,000 lookups per block (~500x faster)
//!
//! ### 6. Incremental Sum Tracking (histogram.rs)
//! - **Python**: Computes sum over 1600 bins during normalize
//! - **Rust**: Tracks sum incrementally during add/subtract operations
//! - Normalize uses pre-computed sum, skips zero bins
//!
//! ### 7. O(1) Round Sats Detection (stencil.rs)
//! - **Python**: Iterates through 365 round values, checks ±0.01% tolerance
//! - **Rust**: Modular arithmetic based on magnitude to detect round amounts
//! - Per-output check: O(365) → O(1)
//!
//! ### 8. Optimized Refinement (stencil.rs)
//! - **Python**: Allocates new list per iteration, uses set for convergence check
//! - **Rust**: Reuses buffers, in-place sorting, fixed array for seen prices
//! - Zero allocations in hot loop
//!
//! ### 9. Filter Order Optimization (compute.rs)
//! - Check output_count (== 2) before input_count
//! - ~95% of transactions eliminated without fetching input_count
//!
//! ### 10. Buffered Sequential Reads (compute.rs)
//! - 16KB buffered iterators for all vector reads
//! - Sequential access pattern maximizes cache efficiency
//!
//! ## Module Structure
//!
//! - `config.rs`: Era-based configuration (price bounds, window sizes)
//! - `histogram.rs`: Log-scale histogram with sparse operations
//! - `stencil.rs`: Spike/smooth stencils and price refinement
//! - `compute.rs`: Main computation loop with sliding window
//! - `vecs.rs`: Output vector definitions
//! - `import.rs`: Database import handling
mod compute;
mod config;
mod histogram;
mod import;
mod stencil;
mod vecs;
pub use vecs::Vecs;

View File

@@ -0,0 +1,461 @@
//! Stencil matching for UTXOracle price detection.
//! Uses two stencils that slide across the histogram:
//! 1. Smooth stencil: Gaussian capturing general spending distribution
//! 2. Spike stencil: Hard-coded weights at known USD amounts
use brk_types::{Cents, Sats};
use rayon::prelude::*;
use rustc_hash::FxHashMap;
use super::histogram::{BINS_PER_DECADE, Histogram, TOTAL_BINS};
/// Number of parallel chunks for stencil sliding
const PARALLEL_CHUNKS: i32 = 4;
/// USD spike stencil entries: (bin offset from $100 center, weight)
/// These represent the expected frequency of round USD amounts in transactions
/// Offset formula: log10(USD/100) * 200 bins/decade
/// Companion spikes at ±2 bins from main spike (Rust 200 bins/decade ≈ Python's ±1 at 180 bins/decade)
/// Matches Python's 29 entries from utxo_oracle.py lines 1013-1041
const SPIKE_STENCIL: &[(i32, f64)] = &[
// $1 (single)
(-400, 0.00130),
// $5 (single)
(-260, 0.00168),
// $10 (main + companion)
(-200, 0.00347),
(-198, 0.00199),
// $15 (single)
(-165, 0.00191),
// $20 (main + companion)
(-140, 0.00334),
(-138, 0.00259),
// $30 (main + companion)
(-105, 0.00258),
(-103, 0.00273),
// $50 (main + 2 companions)
(-62, 0.00308),
(-60, 0.00561),
(-58, 0.00309),
// $100 (main + 3 companions) - center
(-2, 0.00292),
(0, 0.00617),
(2, 0.00442),
(4, 0.00263),
// $150 (single)
(35, 0.00286),
// $200 (main + companion)
(60, 0.00410),
(62, 0.00335),
// $300 (main + companion)
(95, 0.00252),
(97, 0.00278),
// $500 (single)
(140, 0.00379),
// $1000 (main + companion)
(200, 0.00369),
(202, 0.00239),
// $1500 (single)
(235, 0.00128),
// $2000 (main + companion)
(260, 0.00165),
(262, 0.00140),
// $5000 (single)
(340, 0.00115),
// $10000 (single)
(400, 0.00083),
];
/// Width of the smooth stencil in bins (Gaussian sigma)
/// Python uses std_dev=201 with 803 bins. Our histogram has 1600 bins (2x),
/// so we use 201 * (1600/803) ≈ 400 bins sigma equivalent
const SMOOTH_WIDTH: f64 = 400.0;
/// Linear term coefficient for smooth stencil (per Python: 0.0000005 * x)
/// Scaled for our larger histogram: 0.0000005 * (803/1600) ≈ 0.00000025
const SMOOTH_LINEAR_COEF: f64 = 0.00000025;
/// Weight given to smooth stencil vs spike stencil
const SMOOTH_WEIGHT: f64 = 0.65;
const SPIKE_WEIGHT: f64 = 1.0;
/// Pre-computed Gaussian weights for smooth stencil
/// Index is absolute distance from center (0 to SMOOTH_RANGE)
/// This avoids computing exp() billions of times
const SMOOTH_RANGE: usize = 800;
/// Lazily initialized Gaussian weight lookup table
fn gaussian_weights() -> &'static [f64; SMOOTH_RANGE + 1] {
use std::sync::OnceLock;
static WEIGHTS: OnceLock<[f64; SMOOTH_RANGE + 1]> = OnceLock::new();
WEIGHTS.get_or_init(|| {
let mut weights = [0.0; SMOOTH_RANGE + 1];
(0..=SMOOTH_RANGE).for_each(|d| {
let distance = d as f64;
weights[d] = (-distance * distance / (2.0 * SMOOTH_WIDTH * SMOOTH_WIDTH)).exp();
});
weights
})
}
/// Find the best price estimate by sliding stencils across the histogram
///
/// # Arguments
/// * `histogram` - The log-scale histogram of output values
/// * `min_slide` - Minimum slide position (higher prices)
/// * `max_slide` - Maximum slide position (lower prices)
///
/// # Returns
/// The estimated price in cents, or None if no valid estimate found
pub fn find_best_price(histogram: &Histogram, min_slide: i32, max_slide: i32) -> Option<Cents> {
let bins = histogram.bins();
// Pre-compute the linear term sum (constant for all slide positions)
// linear_sum = Σ bins[i] * SMOOTH_LINEAR_COEF * i
let linear_sum: f64 = bins
.iter()
.copied()
.enumerate()
.filter(|(_, v)| *v > 0.0)
.map(|(i, v)| v * SMOOTH_LINEAR_COEF * i as f64)
.sum();
// Collect non-zero bins: Vec for Gaussian (needs iteration), HashMap for spike (needs lookup)
let non_zero_bins: Vec<(usize, f64)> = bins
.iter()
.copied()
.enumerate()
.filter(|(_, v)| *v > 0.0)
.collect();
// HashMap for O(1) spike lookups instead of O(n) linear search
let bin_map: FxHashMap<usize, f64> = non_zero_bins.iter().copied().collect();
// Slide through possible price positions in parallel chunks
let range_size = max_slide - min_slide + 1;
let chunk_size = (range_size + PARALLEL_CHUNKS - 1) / PARALLEL_CHUNKS;
let (best_position, _best_score) = (0..PARALLEL_CHUNKS)
.into_par_iter()
.map(|chunk_idx| {
let chunk_start = min_slide + chunk_idx * chunk_size;
let chunk_end = (chunk_start + chunk_size - 1).min(max_slide);
let mut local_best_score = f64::NEG_INFINITY;
let mut local_best_pos = chunk_start;
for slide in chunk_start..=chunk_end {
let score = compute_score_fast(&non_zero_bins, &bin_map, linear_sum, slide);
if score > local_best_score {
local_best_score = score;
local_best_pos = slide;
}
}
(local_best_pos, local_best_score)
})
.reduce(
|| (0, f64::NEG_INFINITY),
|a, b| if a.1 > b.1 { a } else { b },
);
// Convert position to price in cents
// Position 0 corresponds to $100 center
// Each bin is 1/200 of a decade (log scale)
position_to_cents(best_position)
}
/// Fast score computation using sparse bin representation
fn compute_score_fast(
non_zero_bins: &[(usize, f64)],
bin_map: &FxHashMap<usize, f64>,
linear_sum: f64,
slide: i32,
) -> f64 {
let spike_score = compute_spike_score_hash(bin_map, slide);
// Python: smooth weight only applied for slide < 150
if slide < 150 {
let gaussian_score = compute_gaussian_score_sparse(non_zero_bins, slide);
// Combine Gaussian and linear parts of smooth score
let smooth_score = 0.0015 * gaussian_score + linear_sum;
SMOOTH_WEIGHT * smooth_score + SPIKE_WEIGHT * spike_score
} else {
SPIKE_WEIGHT * spike_score
}
}
/// Compute just the Gaussian part of the smooth stencil (sparse iteration)
fn compute_gaussian_score_sparse(non_zero_bins: &[(usize, f64)], slide: i32) -> f64 {
let center = center_bin() as i32 + slide;
let weights = gaussian_weights();
let mut score = 0.0;
for &(i, bin_value) in non_zero_bins {
let distance = (i as i32 - center).unsigned_abs() as usize;
if distance <= SMOOTH_RANGE {
score += bin_value * weights[distance];
}
}
score
}
/// Compute spike score using HashMap for O(1) bin lookups
/// This is O(29) per slide instead of O(29 × 500) with linear search
#[inline]
fn compute_spike_score_hash(bin_map: &FxHashMap<usize, f64>, slide: i32) -> f64 {
let center = center_bin() as i32 + slide;
let mut score = 0.0;
for &(offset, weight) in SPIKE_STENCIL {
let bin_idx = (center + offset) as usize;
if let Some(&bin_value) = bin_map.get(&bin_idx) {
score += bin_value * weight;
}
}
score
}
/// Get the center bin index (corresponds to ~0.001 BTC baseline)
/// This is approximately where $100 would be at ~$100,000/BTC
/// Python uses center_p001 = 601
#[inline]
fn center_bin() -> usize {
// 0.001 BTC = 10^-3 BTC
// In our range of [-6, 2], -3 is at position (3/8) * 1600 = 600
// Python uses 601 for center_p001, so we match that
601
}
/// Convert a slide position to price in cents
/// Position 0 = center (~$100,000 at 0.001 BTC)
fn position_to_cents(position: i32) -> Option<Cents> {
// Each bin represents 1/200 of a decade in log scale
// Moving the stencil by +1 means the price is lower (outputs are smaller for same USD)
// Moving by -1 means the price is higher
// At position 0, we assume the center maps to some reference price
// The reference: 0.001 BTC = $100 means price is $100,000/BTC
// Offset per bin in log10 terms: 1/200 decades
let log_offset = position as f64 / BINS_PER_DECADE as f64;
// Reference price: $100 at 0.001 BTC = $100,000/BTC = 10,000,000 cents/BTC
let ref_price_cents: f64 = 10_000_000.0;
// Price scales inversely with position (higher position = lower price)
let price = ref_price_cents / 10_f64.powf(log_offset);
if price > 0.0 && price < 1e12 {
Some(Cents::from(price as i64))
} else {
None
}
}
/// Round USD amounts for price point collection (in cents)
/// Matches Python: [5, 10, 15, 20, 25, 30, 40, 50, 100, 150, 200, 300, 500, 1000]
const ROUND_USD_CENTS: [f64; 14] = [
500.0, 1000.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000.0, 10000.0, 15000.0, 20000.0,
30000.0, 50000.0, 100000.0,
];
/// Check if a sats value is a round amount that should be filtered
/// Matches Python's micro_remove_list with ±0.01% tolerance
/// Uses O(1) modular arithmetic instead of iterating through all round values
#[inline]
pub fn is_round_sats(sats: Sats) -> bool {
let sats = u64::from(sats);
// Determine the step size based on the magnitude
let (step, min_val) = if sats < 10_000 {
(1_000u64, 5_000u64)
} else if sats < 100_000 {
(1_000, 10_000)
} else if sats < 1_000_000 {
(10_000, 100_000)
} else if sats < 10_000_000 {
(100_000, 1_000_000)
} else if sats < 100_000_000 {
(1_000_000, 10_000_000)
} else {
return false; // Outside range
};
if sats < min_val {
return false;
}
// Find the nearest round value
let nearest_round = ((sats + step / 2) / step) * step;
// Check if within ±0.01% tolerance
let tolerance = nearest_round / 10000;
sats >= nearest_round.saturating_sub(tolerance) && sats <= nearest_round + tolerance
}
/// Refine a rough price estimate using center-of-mass convergence
/// Matches Python's find_central_output algorithm (geometric median)
///
/// # Arguments
/// * `by_bin` - Pre-built index of non-round sats values grouped by histogram bin (maintained incrementally by compute.rs)
/// * `rough_price_cents` - Initial price estimate from stencil matching
///
/// # Returns
/// Refined price in cents
pub fn refine_price(by_bin: &[Vec<Sats>; TOTAL_BINS], rough_price_cents: Cents) -> Cents {
if rough_price_cents == Cents::ZERO {
return rough_price_cents;
}
const WIDE_WINDOW: f64 = 0.25; // ±25% for initial collection (per Python)
const TIGHT_WINDOW: f64 = 0.05; // ±5% for refinement
let rough_price = i64::from(rough_price_cents) as f64;
// For each USD amount, scan only the bins that overlap with ±25% window
let mut price_points: Vec<f64> = Vec::with_capacity(8000);
(0..14).for_each(|i| {
let usd_cents = ROUND_USD_CENTS[i];
let expected_sats = usd_cents * 1e8 / rough_price;
let sats_low = Sats::from((expected_sats * (1.0 - WIDE_WINDOW)) as u64);
let sats_high = Sats::from((expected_sats * (1.0 + WIDE_WINDOW)) as u64);
// Convert bounds to bin range
let bin_low = Histogram::sats_to_bin(sats_low).unwrap_or(0);
let bin_high = Histogram::sats_to_bin(sats_high).unwrap_or(TOTAL_BINS - 1);
// Scan only bins in range
(bin_low..=bin_high.min(TOTAL_BINS - 1)).for_each(|bin| {
for &sats in &by_bin[bin] {
if sats > sats_low && sats < sats_high {
price_points.push(usd_cents * 1e8 / f64::from(sats));
}
}
});
});
if price_points.is_empty() {
return rough_price_cents;
}
// Step 2: Find geometric median using iterative refinement
let mut center_price = rough_price;
// Use fixed array instead of HashSet (max 20 iterations)
let mut seen_prices = [0u64; 20];
let mut seen_count = 0usize;
// Reusable buffer for filtered prices (avoids allocation per iteration)
let mut filtered: Vec<f64> = Vec::with_capacity(price_points.len());
for _ in 0..20 {
let price_low = center_price * (1.0 - TIGHT_WINDOW);
let price_high = center_price * (1.0 + TIGHT_WINDOW);
// Reuse filtered buffer
filtered.clear();
filtered.extend(
price_points
.iter()
.filter(|&&p| p > price_low && p < price_high),
);
if filtered.is_empty() {
break;
}
let new_center = find_geometric_median_inplace(&mut filtered);
// Check for convergence using fixed array
let new_center_rounded = new_center as u64;
if seen_prices[..seen_count].contains(&new_center_rounded) {
break;
}
if seen_count < 20 {
seen_prices[seen_count] = new_center_rounded;
seen_count += 1;
}
center_price = new_center;
}
Cents::from(center_price as i64)
}
/// Find the geometric median (point minimizing sum of absolute distances)
/// Sorts in-place to avoid allocation. Input slice is modified!
fn find_geometric_median_inplace(prices: &mut [f64]) -> f64 {
if prices.is_empty() {
return 0.0;
}
if prices.len() == 1 {
return prices[0];
}
// Sort in-place
prices.sort_by(|a, b| a.partial_cmp(b).unwrap());
let n = prices.len();
// Compute prefix sums using running total (no allocation needed)
// We compute total first, then calculate distances on the fly
let total: f64 = prices.iter().sum();
// Find point minimizing total distance
let mut min_dist = f64::MAX;
let mut best_price = prices[n / 2];
let mut left_sum = 0.0;
(0..n).for_each(|i| {
let x = prices[i];
let left_count = i as f64;
let right_count = (n - i - 1) as f64;
let right_sum = total - left_sum - x;
let dist = (x * left_count - left_sum) + (right_sum - x * right_count);
if dist < min_dist {
min_dist = dist;
best_price = x;
}
left_sum += x;
});
best_price
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_position_to_cents() {
// Position 0 should give reference price (~$100,000)
let cents = position_to_cents(0).unwrap();
let cents_val = i64::from(cents);
assert!(cents_val > 5_000_000 && cents_val < 20_000_000);
// Positive position = lower price
let lower = position_to_cents(200).unwrap();
assert!(lower < cents);
// Negative position = higher price
let higher = position_to_cents(-200).unwrap();
assert!(higher > cents);
}
#[test]
fn test_spike_stencil_entries() {
// Verify stencil has 29 entries matching Python
assert_eq!(SPIKE_STENCIL.len(), 29);
// All weights should be positive
for &(_, weight) in SPIKE_STENCIL {
assert!(weight > 0.0);
}
}
}

View File

@@ -0,0 +1,18 @@
use brk_traversable::Traversable;
use brk_types::{Cents, DateIndex, Height, OHLCCents, StoredU32};
use vecdb::{BytesVec, PcoVec};
/// Vectors storing UTXOracle-derived price data
#[derive(Clone, Traversable)]
pub struct Vecs {
/// Per-block price estimate in cents
/// This enables OHLC derivation for any time period
pub price: PcoVec<Height, Cents>,
/// Daily OHLC derived from height_to_price
/// Uses BytesVec because OHLCCents is a complex type
pub ohlc: BytesVec<DateIndex, OHLCCents>,
/// Number of qualifying transactions per day (for confidence)
pub tx_count: PcoVec<DateIndex, StoredU32>,
}