mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-05-20 23:04:46 -07:00
global: snapshot
This commit is contained in:
375
crates/brk_computer/src/distribution/address/activity.rs
Normal file
375
crates/brk_computer/src/distribution/address/activity.rs
Normal file
@@ -0,0 +1,375 @@
|
||||
//! Address activity tracking - per-block counts of address behaviors.
|
||||
//!
|
||||
//! Tracks global and per-address-type activity metrics:
|
||||
//!
|
||||
//! | Metric | Description |
|
||||
//! |--------|-------------|
|
||||
//! | `receiving` | Unique addresses that received this block |
|
||||
//! | `sending` | Unique addresses that sent this block |
|
||||
//! | `reactivated` | Addresses that were empty and now have funds |
|
||||
//! | `both` | Addresses that both sent AND received same block |
|
||||
//! | `balance_increased` | Receive-only addresses (balance definitely increased) |
|
||||
//! | `balance_decreased` | Send-only addresses (balance definitely decreased) |
|
||||
//!
|
||||
//! Note: `balance_increased` and `balance_decreased` exclude "both" addresses
|
||||
//! since their net balance change requires more complex tracking.
|
||||
|
||||
use brk_cohort::ByAddressType;
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use brk_types::{Height, StoredU32, Version};
|
||||
use derive_more::{Deref, DerefMut};
|
||||
use rayon::prelude::*;
|
||||
use vecdb::{AnyStoredVec, AnyVec, Database, Exit, GenericStoredVec};
|
||||
|
||||
use crate::{ComputeIndexes, indexes, internal::ComputedFromHeightDistribution};
|
||||
|
||||
/// Per-block activity counts - reset each block.
|
||||
///
|
||||
/// Note: `balance_increased` and `balance_decreased` are derived:
|
||||
/// - `balance_increased = receiving - both` (receive-only addresses)
|
||||
/// - `balance_decreased = sending - both` (send-only addresses)
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct BlockActivityCounts {
|
||||
pub reactivated: u32,
|
||||
pub sending: u32,
|
||||
pub receiving: u32,
|
||||
pub both: u32,
|
||||
}
|
||||
|
||||
impl BlockActivityCounts {
|
||||
/// Reset all counts to zero.
|
||||
#[inline]
|
||||
pub fn reset(&mut self) {
|
||||
*self = Self::default();
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-address-type activity counts - aggregated during block processing.
|
||||
#[derive(Debug, Default, Deref, DerefMut)]
|
||||
pub struct AddressTypeToActivityCounts(pub ByAddressType<BlockActivityCounts>);
|
||||
|
||||
impl AddressTypeToActivityCounts {
|
||||
/// Reset all per-type counts.
|
||||
pub fn reset(&mut self) {
|
||||
self.0.values_mut().for_each(|v| v.reset());
|
||||
}
|
||||
|
||||
/// Sum all types to get totals.
|
||||
pub fn totals(&self) -> BlockActivityCounts {
|
||||
let mut total = BlockActivityCounts::default();
|
||||
for counts in self.0.values() {
|
||||
total.reactivated += counts.reactivated;
|
||||
total.sending += counts.sending;
|
||||
total.receiving += counts.receiving;
|
||||
total.both += counts.both;
|
||||
}
|
||||
total
|
||||
}
|
||||
}
|
||||
|
||||
/// Activity count vectors for a single category (e.g., one address type or "all").
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct ActivityCountVecs {
|
||||
pub reactivated: ComputedFromHeightDistribution<StoredU32>,
|
||||
pub sending: ComputedFromHeightDistribution<StoredU32>,
|
||||
pub receiving: ComputedFromHeightDistribution<StoredU32>,
|
||||
pub balance_increased: ComputedFromHeightDistribution<StoredU32>,
|
||||
pub balance_decreased: ComputedFromHeightDistribution<StoredU32>,
|
||||
pub both: ComputedFromHeightDistribution<StoredU32>,
|
||||
}
|
||||
|
||||
impl ActivityCountVecs {
|
||||
pub fn forced_import(
|
||||
db: &Database,
|
||||
name: &str,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
reactivated: ComputedFromHeightDistribution::forced_import(
|
||||
db,
|
||||
&format!("{name}_reactivated"),
|
||||
version,
|
||||
indexes,
|
||||
)?,
|
||||
sending: ComputedFromHeightDistribution::forced_import(
|
||||
db,
|
||||
&format!("{name}_sending"),
|
||||
version,
|
||||
indexes,
|
||||
)?,
|
||||
receiving: ComputedFromHeightDistribution::forced_import(
|
||||
db,
|
||||
&format!("{name}_receiving"),
|
||||
version,
|
||||
indexes,
|
||||
)?,
|
||||
balance_increased: ComputedFromHeightDistribution::forced_import(
|
||||
db,
|
||||
&format!("{name}_balance_increased"),
|
||||
version,
|
||||
indexes,
|
||||
)?,
|
||||
balance_decreased: ComputedFromHeightDistribution::forced_import(
|
||||
db,
|
||||
&format!("{name}_balance_decreased"),
|
||||
version,
|
||||
indexes,
|
||||
)?,
|
||||
both: ComputedFromHeightDistribution::forced_import(
|
||||
db,
|
||||
&format!("{name}_both"),
|
||||
version,
|
||||
indexes,
|
||||
)?,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn min_stateful_height(&self) -> usize {
|
||||
self.reactivated
|
||||
.height
|
||||
.len()
|
||||
.min(self.sending.height.len())
|
||||
.min(self.receiving.height.len())
|
||||
.min(self.balance_increased.height.len())
|
||||
.min(self.balance_decreased.height.len())
|
||||
.min(self.both.height.len())
|
||||
}
|
||||
|
||||
pub fn par_iter_height_mut(&mut self) -> impl ParallelIterator<Item = &mut dyn AnyStoredVec> {
|
||||
[
|
||||
&mut self.reactivated.height as &mut dyn AnyStoredVec,
|
||||
&mut self.sending.height as &mut dyn AnyStoredVec,
|
||||
&mut self.receiving.height as &mut dyn AnyStoredVec,
|
||||
&mut self.balance_increased.height as &mut dyn AnyStoredVec,
|
||||
&mut self.balance_decreased.height as &mut dyn AnyStoredVec,
|
||||
&mut self.both.height as &mut dyn AnyStoredVec,
|
||||
]
|
||||
.into_par_iter()
|
||||
}
|
||||
|
||||
pub fn reset_height(&mut self) -> Result<()> {
|
||||
self.reactivated.height.reset()?;
|
||||
self.sending.height.reset()?;
|
||||
self.receiving.height.reset()?;
|
||||
self.balance_increased.height.reset()?;
|
||||
self.balance_decreased.height.reset()?;
|
||||
self.both.height.reset()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn truncate_push_height(
|
||||
&mut self,
|
||||
height: Height,
|
||||
counts: &BlockActivityCounts,
|
||||
) -> Result<()> {
|
||||
self.reactivated
|
||||
.height
|
||||
.truncate_push(height, counts.reactivated.into())?;
|
||||
self.sending
|
||||
.height
|
||||
.truncate_push(height, counts.sending.into())?;
|
||||
self.receiving
|
||||
.height
|
||||
.truncate_push(height, counts.receiving.into())?;
|
||||
// Derived: balance_increased = receiving - both (receive-only addresses)
|
||||
self.balance_increased
|
||||
.height
|
||||
.truncate_push(height, (counts.receiving - counts.both).into())?;
|
||||
// Derived: balance_decreased = sending - both (send-only addresses)
|
||||
self.balance_decreased
|
||||
.height
|
||||
.truncate_push(height, (counts.sending - counts.both).into())?;
|
||||
self.both
|
||||
.height
|
||||
.truncate_push(height, counts.both.into())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn compute_rest(
|
||||
&mut self,
|
||||
indexes: &indexes::Vecs,
|
||||
starting_indexes: &ComputeIndexes,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.reactivated
|
||||
.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.sending
|
||||
.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.receiving
|
||||
.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.balance_increased
|
||||
.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.balance_decreased
|
||||
.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.both.compute_rest(indexes, starting_indexes, exit)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-address-type activity count vecs.
|
||||
#[derive(Clone, Deref, DerefMut, Traversable)]
|
||||
pub struct AddressTypeToActivityCountVecs(ByAddressType<ActivityCountVecs>);
|
||||
|
||||
impl From<ByAddressType<ActivityCountVecs>> for AddressTypeToActivityCountVecs {
|
||||
#[inline]
|
||||
fn from(value: ByAddressType<ActivityCountVecs>) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl AddressTypeToActivityCountVecs {
|
||||
pub fn forced_import(
|
||||
db: &Database,
|
||||
name: &str,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
) -> Result<Self> {
|
||||
Ok(Self::from(
|
||||
ByAddressType::<ActivityCountVecs>::new_with_name(|type_name| {
|
||||
ActivityCountVecs::forced_import(db, &format!("{type_name}_{name}"), version, indexes)
|
||||
})?,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn min_stateful_height(&self) -> usize {
|
||||
self.0.values().map(|v| v.min_stateful_height()).min().unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn par_iter_height_mut(&mut self) -> impl ParallelIterator<Item = &mut dyn AnyStoredVec> {
|
||||
let inner = &mut self.0;
|
||||
let mut vecs: Vec<&mut dyn AnyStoredVec> = Vec::new();
|
||||
for type_vecs in [
|
||||
&mut inner.p2pk65,
|
||||
&mut inner.p2pk33,
|
||||
&mut inner.p2pkh,
|
||||
&mut inner.p2sh,
|
||||
&mut inner.p2wpkh,
|
||||
&mut inner.p2wsh,
|
||||
&mut inner.p2tr,
|
||||
&mut inner.p2a,
|
||||
] {
|
||||
vecs.push(&mut type_vecs.reactivated.height);
|
||||
vecs.push(&mut type_vecs.sending.height);
|
||||
vecs.push(&mut type_vecs.receiving.height);
|
||||
vecs.push(&mut type_vecs.balance_increased.height);
|
||||
vecs.push(&mut type_vecs.balance_decreased.height);
|
||||
vecs.push(&mut type_vecs.both.height);
|
||||
}
|
||||
vecs.into_par_iter()
|
||||
}
|
||||
|
||||
pub fn reset_height(&mut self) -> Result<()> {
|
||||
self.p2pk65.reset_height()?;
|
||||
self.p2pk33.reset_height()?;
|
||||
self.p2pkh.reset_height()?;
|
||||
self.p2sh.reset_height()?;
|
||||
self.p2wpkh.reset_height()?;
|
||||
self.p2wsh.reset_height()?;
|
||||
self.p2tr.reset_height()?;
|
||||
self.p2a.reset_height()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn truncate_push_height(
|
||||
&mut self,
|
||||
height: Height,
|
||||
counts: &AddressTypeToActivityCounts,
|
||||
) -> Result<()> {
|
||||
self.p2pk65
|
||||
.truncate_push_height(height, &counts.p2pk65)?;
|
||||
self.p2pk33
|
||||
.truncate_push_height(height, &counts.p2pk33)?;
|
||||
self.p2pkh
|
||||
.truncate_push_height(height, &counts.p2pkh)?;
|
||||
self.p2sh.truncate_push_height(height, &counts.p2sh)?;
|
||||
self.p2wpkh
|
||||
.truncate_push_height(height, &counts.p2wpkh)?;
|
||||
self.p2wsh
|
||||
.truncate_push_height(height, &counts.p2wsh)?;
|
||||
self.p2tr.truncate_push_height(height, &counts.p2tr)?;
|
||||
self.p2a.truncate_push_height(height, &counts.p2a)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn compute_rest(
|
||||
&mut self,
|
||||
indexes: &indexes::Vecs,
|
||||
starting_indexes: &ComputeIndexes,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.p2pk65.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.p2pk33.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.p2pkh.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.p2sh.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.p2wpkh.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.p2wsh.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.p2tr.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.p2a.compute_rest(indexes, starting_indexes, exit)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage for activity metrics (global + per type).
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct AddressActivityVecs {
|
||||
pub all: ActivityCountVecs,
|
||||
#[traversable(flatten)]
|
||||
pub by_addresstype: AddressTypeToActivityCountVecs,
|
||||
}
|
||||
|
||||
impl AddressActivityVecs {
|
||||
pub fn forced_import(
|
||||
db: &Database,
|
||||
name: &str,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all: ActivityCountVecs::forced_import(db, name, version, indexes)?,
|
||||
by_addresstype: AddressTypeToActivityCountVecs::forced_import(
|
||||
db, name, version, indexes,
|
||||
)?,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn min_stateful_height(&self) -> usize {
|
||||
self.all.min_stateful_height().min(self.by_addresstype.min_stateful_height())
|
||||
}
|
||||
|
||||
pub fn par_iter_height_mut(&mut self) -> impl ParallelIterator<Item = &mut dyn AnyStoredVec> {
|
||||
self.all
|
||||
.par_iter_height_mut()
|
||||
.chain(self.by_addresstype.par_iter_height_mut())
|
||||
}
|
||||
|
||||
pub fn reset_height(&mut self) -> Result<()> {
|
||||
self.all.reset_height()?;
|
||||
self.by_addresstype.reset_height()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn truncate_push_height(
|
||||
&mut self,
|
||||
height: Height,
|
||||
counts: &AddressTypeToActivityCounts,
|
||||
) -> Result<()> {
|
||||
let totals = counts.totals();
|
||||
self.all.truncate_push_height(height, &totals)?;
|
||||
self.by_addresstype.truncate_push_height(height, counts)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn compute_rest(
|
||||
&mut self,
|
||||
indexes: &indexes::Vecs,
|
||||
starting_indexes: &ComputeIndexes,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.all.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.by_addresstype
|
||||
.compute_rest(indexes, starting_indexes, exit)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -106,7 +106,7 @@ impl AddressTypeToAddrCountVecs {
|
||||
))
|
||||
}
|
||||
|
||||
pub fn min_len(&self) -> usize {
|
||||
pub fn min_stateful_height(&self) -> usize {
|
||||
self.p2pk65
|
||||
.height
|
||||
.len()
|
||||
@@ -242,8 +242,8 @@ impl AddrCountVecs {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn min_len(&self) -> usize {
|
||||
self.all.height.len().min(self.by_addresstype.min_len())
|
||||
pub fn min_stateful_height(&self) -> usize {
|
||||
self.all.height.len().min(self.by_addresstype.min_stateful_height())
|
||||
}
|
||||
|
||||
pub fn par_iter_height_mut(&mut self) -> impl ParallelIterator<Item = &mut dyn AnyStoredVec> {
|
||||
|
||||
92
crates/brk_computer/src/distribution/address/growth_rate.rs
Normal file
92
crates/brk_computer/src/distribution/address/growth_rate.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
//! Growth rate: new_addr_count / addr_count (global + per-type)
|
||||
|
||||
use brk_cohort::{ByAddressType, zip2_by_addresstype};
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use brk_types::{Height, StoredF32, StoredU64, Version};
|
||||
use vecdb::{Database, Exit, IterableCloneableVec};
|
||||
|
||||
use crate::{
|
||||
ComputeIndexes, indexes,
|
||||
internal::{LazyBinaryComputedFromHeightDistribution, RatioU64F32},
|
||||
};
|
||||
|
||||
use super::{AddrCountVecs, NewAddrCountVecs};
|
||||
|
||||
/// Growth rate by type - lazy ratio with distribution stats
|
||||
pub type GrowthRateByType =
|
||||
ByAddressType<LazyBinaryComputedFromHeightDistribution<StoredF32, StoredU64, StoredU64>>;
|
||||
|
||||
/// Growth rate: new_addr_count / addr_count (global + per-type)
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct GrowthRateVecs {
|
||||
pub all: LazyBinaryComputedFromHeightDistribution<StoredF32, StoredU64, StoredU64>,
|
||||
#[traversable(flatten)]
|
||||
pub by_addresstype: GrowthRateByType,
|
||||
}
|
||||
|
||||
impl GrowthRateVecs {
|
||||
pub fn forced_import(
|
||||
db: &Database,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
new_addr_count: &NewAddrCountVecs,
|
||||
addr_count: &AddrCountVecs,
|
||||
) -> Result<Self> {
|
||||
let all = make_growth_rate(
|
||||
db,
|
||||
"growth_rate",
|
||||
version,
|
||||
indexes,
|
||||
&new_addr_count.all.height,
|
||||
&addr_count.all.height,
|
||||
)?;
|
||||
|
||||
let by_addresstype: GrowthRateByType = zip2_by_addresstype(
|
||||
&new_addr_count.by_addresstype,
|
||||
&addr_count.by_addresstype,
|
||||
|name, new, addr| {
|
||||
make_growth_rate(
|
||||
db,
|
||||
&format!("{name}_growth_rate"),
|
||||
version,
|
||||
indexes,
|
||||
&new.height,
|
||||
&addr.height,
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(Self { all, by_addresstype })
|
||||
}
|
||||
|
||||
pub fn derive_from(
|
||||
&mut self,
|
||||
indexes: &indexes::Vecs,
|
||||
starting_indexes: &ComputeIndexes,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.all.derive_from(indexes, starting_indexes, exit)?;
|
||||
for vecs in self.by_addresstype.values_mut() {
|
||||
vecs.derive_from(indexes, starting_indexes, exit)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn make_growth_rate<V1, V2>(
|
||||
db: &Database,
|
||||
name: &str,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
new: &V1,
|
||||
addr: &V2,
|
||||
) -> Result<LazyBinaryComputedFromHeightDistribution<StoredF32, StoredU64, StoredU64>>
|
||||
where
|
||||
V1: IterableCloneableVec<Height, StoredU64>,
|
||||
V2: IterableCloneableVec<Height, StoredU64>,
|
||||
{
|
||||
LazyBinaryComputedFromHeightDistribution::<StoredF32, StoredU64, StoredU64>::forced_import::<
|
||||
RatioU64F32,
|
||||
>(db, name, version, new.boxed_clone(), addr.boxed_clone(), indexes)
|
||||
}
|
||||
@@ -1,9 +1,17 @@
|
||||
mod activity;
|
||||
mod address_count;
|
||||
mod data;
|
||||
mod growth_rate;
|
||||
mod indexes;
|
||||
mod new_addr_count;
|
||||
mod total_addr_count;
|
||||
mod type_map;
|
||||
|
||||
pub use activity::{AddressActivityVecs, AddressTypeToActivityCounts};
|
||||
pub use address_count::{AddrCountVecs, AddressTypeToAddressCount};
|
||||
pub use data::AddressesDataVecs;
|
||||
pub use growth_rate::GrowthRateVecs;
|
||||
pub use indexes::AnyAddressIndexesVecs;
|
||||
pub use new_addr_count::NewAddrCountVecs;
|
||||
pub use total_addr_count::TotalAddrCountVecs;
|
||||
pub use type_map::{AddressTypeToTypeIndexMap, AddressTypeToVec, HeightToAddressTypeToVec};
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
//! New address count: delta of total_addr_count (global + per-type)
|
||||
|
||||
use brk_cohort::{ByAddressType, zip_by_addresstype};
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use brk_types::{Height, StoredU64, Version};
|
||||
use vecdb::{Database, Exit, TypedVecIterator};
|
||||
|
||||
use crate::{ComputeIndexes, indexes, internal::LazyComputedFromHeightFull};
|
||||
|
||||
use super::TotalAddrCountVecs;
|
||||
|
||||
/// New addresses by type - lazy delta with stored dateindex stats
|
||||
pub type NewAddrCountByType = ByAddressType<LazyComputedFromHeightFull<StoredU64, StoredU64>>;
|
||||
|
||||
/// New address count per block (global + per-type)
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct NewAddrCountVecs {
|
||||
pub all: LazyComputedFromHeightFull<StoredU64, StoredU64>,
|
||||
#[traversable(flatten)]
|
||||
pub by_addresstype: NewAddrCountByType,
|
||||
}
|
||||
|
||||
impl NewAddrCountVecs {
|
||||
pub fn forced_import(
|
||||
db: &Database,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
total_addr_count: &TotalAddrCountVecs,
|
||||
) -> Result<Self> {
|
||||
let all = LazyComputedFromHeightFull::forced_import_with_init(
|
||||
db,
|
||||
"new_addr_count",
|
||||
version,
|
||||
total_addr_count.all.height.clone(),
|
||||
indexes,
|
||||
delta_init_fn,
|
||||
)?;
|
||||
|
||||
let by_addresstype: NewAddrCountByType = zip_by_addresstype(
|
||||
&total_addr_count.by_addresstype,
|
||||
|name, total| {
|
||||
LazyComputedFromHeightFull::forced_import_with_init(
|
||||
db,
|
||||
&format!("{name}_new_addr_count"),
|
||||
version,
|
||||
total.height.clone(),
|
||||
indexes,
|
||||
delta_init_fn,
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(Self { all, by_addresstype })
|
||||
}
|
||||
|
||||
pub fn derive_from(
|
||||
&mut self,
|
||||
indexes: &indexes::Vecs,
|
||||
starting_indexes: &ComputeIndexes,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.all.derive_from(indexes, starting_indexes, exit)?;
|
||||
for vecs in self.by_addresstype.values_mut() {
|
||||
vecs.derive_from(indexes, starting_indexes, exit)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Delta init function: value[h] = source[h] - source[h-1]
|
||||
fn delta_init_fn(
|
||||
h: Height,
|
||||
total_iter: &mut dyn TypedVecIterator<I = Height, T = StoredU64, Item = StoredU64>,
|
||||
) -> Option<StoredU64> {
|
||||
let current: u64 = total_iter.get(h)?.into();
|
||||
let prev: u64 = h
|
||||
.decremented()
|
||||
.and_then(|prev_h| total_iter.get(prev_h))
|
||||
.map(|v: StoredU64| v.into())
|
||||
.unwrap_or(0);
|
||||
Some(StoredU64::from(current.saturating_sub(prev)))
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
//! Total address count: addr_count + empty_addr_count (global + per-type)
|
||||
|
||||
use brk_cohort::{ByAddressType, zip2_by_addresstype};
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use brk_types::{StoredU64, Version};
|
||||
use vecdb::{Database, Exit, IterableCloneableVec};
|
||||
|
||||
use crate::{ComputeIndexes, indexes, internal::{LazyBinaryComputedFromHeightLast, U64Plus}};
|
||||
|
||||
use super::AddrCountVecs;
|
||||
|
||||
/// Total addresses by type - lazy sum with all derived indexes
|
||||
pub type TotalAddrCountByType =
|
||||
ByAddressType<LazyBinaryComputedFromHeightLast<StoredU64, StoredU64, StoredU64>>;
|
||||
|
||||
/// Total address count (global + per-type) with all derived indexes
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct TotalAddrCountVecs {
|
||||
pub all: LazyBinaryComputedFromHeightLast<StoredU64, StoredU64, StoredU64>,
|
||||
#[traversable(flatten)]
|
||||
pub by_addresstype: TotalAddrCountByType,
|
||||
}
|
||||
|
||||
impl TotalAddrCountVecs {
|
||||
pub fn forced_import(
|
||||
db: &Database,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
addr_count: &AddrCountVecs,
|
||||
empty_addr_count: &AddrCountVecs,
|
||||
) -> Result<Self> {
|
||||
let all = LazyBinaryComputedFromHeightLast::forced_import::<U64Plus>(
|
||||
db,
|
||||
"total_addr_count",
|
||||
version,
|
||||
addr_count.all.height.boxed_clone(),
|
||||
empty_addr_count.all.height.boxed_clone(),
|
||||
indexes,
|
||||
)?;
|
||||
|
||||
let by_addresstype: TotalAddrCountByType = zip2_by_addresstype(
|
||||
&addr_count.by_addresstype,
|
||||
&empty_addr_count.by_addresstype,
|
||||
|name, addr, empty| {
|
||||
LazyBinaryComputedFromHeightLast::forced_import::<U64Plus>(
|
||||
db,
|
||||
&format!("{name}_total_addr_count"),
|
||||
version,
|
||||
addr.height.boxed_clone(),
|
||||
empty.height.boxed_clone(),
|
||||
indexes,
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(Self { all, by_addresstype })
|
||||
}
|
||||
|
||||
pub fn derive_from(
|
||||
&mut self,
|
||||
indexes: &indexes::Vecs,
|
||||
starting_indexes: &ComputeIndexes,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.all.derive_from(indexes, starting_indexes, exit)?;
|
||||
for vecs in self.by_addresstype.values_mut() {
|
||||
vecs.derive_from(indexes, starting_indexes, exit)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -2,10 +2,14 @@ use brk_cohort::{AmountBucket, ByAddressType};
|
||||
use brk_types::{Dollars, Sats, TypeIndex};
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
use crate::distribution::{address::AddressTypeToVec, cohorts::AddressCohorts};
|
||||
use crate::distribution::{
|
||||
address::{AddressTypeToActivityCounts, AddressTypeToVec},
|
||||
cohorts::AddressCohorts,
|
||||
};
|
||||
|
||||
use super::super::cache::{AddressLookup, TrackingStatus};
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn process_received(
|
||||
received_data: AddressTypeToVec<(TypeIndex, Sats)>,
|
||||
cohorts: &mut AddressCohorts,
|
||||
@@ -13,6 +17,7 @@ pub fn process_received(
|
||||
price: Option<Dollars>,
|
||||
addr_count: &mut ByAddressType<u64>,
|
||||
empty_addr_count: &mut ByAddressType<u64>,
|
||||
activity_counts: &mut AddressTypeToActivityCounts,
|
||||
) {
|
||||
for (output_type, vec) in received_data.unwrap().into_iter() {
|
||||
if vec.is_empty() {
|
||||
@@ -22,6 +27,7 @@ pub fn process_received(
|
||||
// Cache mutable refs for this address type
|
||||
let type_addr_count = addr_count.get_mut(output_type).unwrap();
|
||||
let type_empty_count = empty_addr_count.get_mut(output_type).unwrap();
|
||||
let type_activity = activity_counts.get_mut_unwrap(output_type);
|
||||
|
||||
// Aggregate receives by address - each address processed exactly once
|
||||
// Track (total_value, output_count) for correct UTXO counting
|
||||
@@ -35,6 +41,9 @@ pub fn process_received(
|
||||
for (type_index, (total_value, output_count)) in aggregated {
|
||||
let (addr_data, status) = lookup.get_or_create_for_receive(output_type, type_index);
|
||||
|
||||
// Track receiving activity - each address in receive aggregation
|
||||
type_activity.receiving += 1;
|
||||
|
||||
match status {
|
||||
TrackingStatus::New => {
|
||||
*type_addr_count += 1;
|
||||
@@ -42,6 +51,8 @@ pub fn process_received(
|
||||
TrackingStatus::WasEmpty => {
|
||||
*type_addr_count += 1;
|
||||
*type_empty_count -= 1;
|
||||
// Reactivated - was empty, now has funds
|
||||
type_activity.reactivated += 1;
|
||||
}
|
||||
TrackingStatus::Tracked => {}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
use brk_cohort::{AmountBucket, ByAddressType};
|
||||
use brk_error::Result;
|
||||
use brk_types::{Age, CheckedSub, Dollars, Height, Sats, Timestamp, TypeIndex};
|
||||
use rustc_hash::FxHashSet;
|
||||
use vecdb::{unlikely, VecIndex};
|
||||
|
||||
use crate::distribution::{address::HeightToAddressTypeToVec, cohorts::AddressCohorts};
|
||||
use crate::distribution::{
|
||||
address::{AddressTypeToActivityCounts, HeightToAddressTypeToVec},
|
||||
cohorts::AddressCohorts,
|
||||
};
|
||||
|
||||
use super::super::cache::AddressLookup;
|
||||
|
||||
@@ -25,11 +29,16 @@ pub fn process_sent(
|
||||
current_price: Option<Dollars>,
|
||||
addr_count: &mut ByAddressType<u64>,
|
||||
empty_addr_count: &mut ByAddressType<u64>,
|
||||
activity_counts: &mut AddressTypeToActivityCounts,
|
||||
received_addresses: &ByAddressType<FxHashSet<TypeIndex>>,
|
||||
height_to_price: Option<&[Dollars]>,
|
||||
height_to_timestamp: &[Timestamp],
|
||||
current_height: Height,
|
||||
current_timestamp: Timestamp,
|
||||
) -> Result<()> {
|
||||
// Track unique senders per address type (simple set, no extra data needed)
|
||||
let mut seen_senders: ByAddressType<FxHashSet<TypeIndex>> = ByAddressType::default();
|
||||
|
||||
for (prev_height, by_type) in sent_data.into_iter() {
|
||||
let prev_price = height_to_price.map(|v| v[prev_height.to_usize()]);
|
||||
let prev_timestamp = height_to_timestamp[prev_height.to_usize()];
|
||||
@@ -40,12 +49,26 @@ pub fn process_sent(
|
||||
// Cache mutable refs for this address type
|
||||
let type_addr_count = addr_count.get_mut(output_type).unwrap();
|
||||
let type_empty_count = empty_addr_count.get_mut(output_type).unwrap();
|
||||
let type_activity = activity_counts.get_mut_unwrap(output_type);
|
||||
let type_received = received_addresses.get_unwrap(output_type);
|
||||
let type_seen = seen_senders.get_mut_unwrap(output_type);
|
||||
|
||||
for (type_index, value) in vec {
|
||||
let addr_data = lookup.get_for_send(output_type, type_index);
|
||||
|
||||
let prev_balance = addr_data.balance();
|
||||
let new_balance = prev_balance.checked_sub(value).unwrap();
|
||||
|
||||
// On first encounter of this address this block, track activity
|
||||
if type_seen.insert(type_index) {
|
||||
type_activity.sending += 1;
|
||||
|
||||
// Track "both" - addresses that sent AND received this block
|
||||
if type_received.contains(&type_index) {
|
||||
type_activity.both += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let will_be_empty = addr_data.has_1_utxos();
|
||||
|
||||
// Compute buckets once
|
||||
|
||||
@@ -5,13 +5,14 @@ use brk_error::Result;
|
||||
use brk_indexer::Indexer;
|
||||
use brk_types::{DateIndex, Height, OutputType, Sats, TxIndex, TypeIndex};
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashSet;
|
||||
use tracing::info;
|
||||
use vecdb::{Exit, IterableVec, TypedVecIterator, VecIndex};
|
||||
|
||||
use crate::{
|
||||
blocks,
|
||||
distribution::{
|
||||
address::AddressTypeToAddressCount,
|
||||
address::{AddressTypeToActivityCounts, AddressTypeToAddressCount},
|
||||
block::{
|
||||
AddressCache, InputsResult, process_inputs, process_outputs, process_received,
|
||||
process_sent,
|
||||
@@ -139,6 +140,9 @@ pub fn process_blocks(
|
||||
)
|
||||
};
|
||||
|
||||
// Track activity counts - reset each block
|
||||
let mut activity_counts = AddressTypeToActivityCounts::default();
|
||||
|
||||
let mut cache = AddressCache::new();
|
||||
|
||||
// Main block iteration
|
||||
@@ -184,6 +188,9 @@ pub fn process_blocks(
|
||||
// Reset per-block values for all separate cohorts
|
||||
reset_block_values(&mut vecs.utxo_cohorts, &mut vecs.address_cohorts);
|
||||
|
||||
// Reset per-block activity counts
|
||||
activity_counts.reset();
|
||||
|
||||
// Collect output/input data using reusable iterators (16KB buffered reads)
|
||||
// Must be done before thread::scope since iterators aren't Send
|
||||
let txoutdata_vec = txout_iters.collect_block_outputs(first_txoutindex, output_count);
|
||||
@@ -284,6 +291,18 @@ pub fn process_blocks(
|
||||
timestamp,
|
||||
});
|
||||
|
||||
// Build set of addresses that received this block (for detecting "both" in sent)
|
||||
let received_addresses: ByAddressType<FxHashSet<TypeIndex>> = {
|
||||
let mut sets = ByAddressType::<FxHashSet<TypeIndex>>::default();
|
||||
for (output_type, vec) in outputs_result.received_data.iter() {
|
||||
let set = sets.get_mut_unwrap(output_type);
|
||||
for (type_index, _) in vec {
|
||||
set.insert(*type_index);
|
||||
}
|
||||
}
|
||||
sets
|
||||
};
|
||||
|
||||
// Process UTXO cohorts and Address cohorts in parallel
|
||||
// - Main thread: UTXO cohorts receive/send
|
||||
// - Spawned thread: Address cohorts process_received/process_sent
|
||||
@@ -300,6 +319,7 @@ pub fn process_blocks(
|
||||
block_price,
|
||||
&mut addr_counts,
|
||||
&mut empty_addr_counts,
|
||||
&mut activity_counts,
|
||||
);
|
||||
|
||||
// Process sent inputs (addresses sending funds)
|
||||
@@ -311,6 +331,8 @@ pub fn process_blocks(
|
||||
block_price,
|
||||
&mut addr_counts,
|
||||
&mut empty_addr_counts,
|
||||
&mut activity_counts,
|
||||
&received_addresses,
|
||||
height_to_price_vec.as_deref(),
|
||||
height_to_timestamp_vec,
|
||||
height,
|
||||
@@ -333,6 +355,8 @@ pub fn process_blocks(
|
||||
empty_addr_counts.sum(),
|
||||
&empty_addr_counts,
|
||||
)?;
|
||||
vecs.address_activity
|
||||
.truncate_push_height(height, &activity_counts)?;
|
||||
|
||||
// Get date info for unrealized state computation
|
||||
let date = height_to_date_iter.get_unwrap(height);
|
||||
|
||||
@@ -76,6 +76,7 @@ pub fn write(
|
||||
.chain(vecs.addresses_data.par_iter_mut())
|
||||
.chain(vecs.addr_count.par_iter_height_mut())
|
||||
.chain(vecs.empty_addr_count.par_iter_height_mut())
|
||||
.chain(vecs.address_activity.par_iter_height_mut())
|
||||
.chain(rayon::iter::once(
|
||||
&mut vecs.chain_state as &mut dyn AnyStoredVec,
|
||||
))
|
||||
|
||||
@@ -23,7 +23,10 @@ use crate::{
|
||||
};
|
||||
|
||||
use super::{
|
||||
AddressCohorts, AddressesDataVecs, AnyAddressIndexesVecs, UTXOCohorts, address::AddrCountVecs,
|
||||
AddressCohorts, AddressesDataVecs, AnyAddressIndexesVecs, UTXOCohorts,
|
||||
address::{
|
||||
AddrCountVecs, AddressActivityVecs, GrowthRateVecs, NewAddrCountVecs, TotalAddrCountVecs,
|
||||
},
|
||||
compute::aggregates,
|
||||
};
|
||||
|
||||
@@ -43,6 +46,15 @@ pub struct Vecs {
|
||||
|
||||
pub addr_count: AddrCountVecs,
|
||||
pub empty_addr_count: AddrCountVecs,
|
||||
pub address_activity: AddressActivityVecs,
|
||||
|
||||
/// Total addresses ever seen (addr_count + empty_addr_count) - lazy, global + per-type
|
||||
pub total_addr_count: TotalAddrCountVecs,
|
||||
/// New addresses per block (delta of total) - lazy height, stored dateindex stats, global + per-type
|
||||
pub new_addr_count: NewAddrCountVecs,
|
||||
/// Growth rate (new / addr_count) - lazy ratio with distribution stats, global + per-type
|
||||
pub growth_rate: GrowthRateVecs,
|
||||
|
||||
pub loadedaddressindex:
|
||||
LazyVecFrom1<LoadedAddressIndex, LoadedAddressIndex, LoadedAddressIndex, LoadedAddressData>,
|
||||
pub emptyaddressindex:
|
||||
@@ -103,19 +115,41 @@ impl Vecs {
|
||||
|index, _| Some(index),
|
||||
);
|
||||
|
||||
let addr_count = AddrCountVecs::forced_import(&db, "addr_count", version, indexes)?;
|
||||
let empty_addr_count =
|
||||
AddrCountVecs::forced_import(&db, "empty_addr_count", version, indexes)?;
|
||||
let address_activity =
|
||||
AddressActivityVecs::forced_import(&db, "address_activity", version, indexes)?;
|
||||
|
||||
// Lazy total = addr_count + empty_addr_count (global + per-type, with all derived indexes)
|
||||
let total_addr_count = TotalAddrCountVecs::forced_import(
|
||||
&db,
|
||||
version,
|
||||
indexes,
|
||||
&addr_count,
|
||||
&empty_addr_count,
|
||||
)?;
|
||||
|
||||
// Lazy delta of total (global + per-type)
|
||||
let new_addr_count =
|
||||
NewAddrCountVecs::forced_import(&db, version, indexes, &total_addr_count)?;
|
||||
|
||||
// Growth rate: new / addr_count (global + per-type)
|
||||
let growth_rate =
|
||||
GrowthRateVecs::forced_import(&db, version, indexes, &new_addr_count, &addr_count)?;
|
||||
|
||||
let this = Self {
|
||||
chain_state: BytesVec::forced_import_with(
|
||||
vecdb::ImportOptions::new(&db, "chain", version)
|
||||
.with_saved_stamped_changes(SAVED_STAMPED_CHANGES),
|
||||
)?,
|
||||
|
||||
addr_count: AddrCountVecs::forced_import(&db, "addr_count", version, indexes)?,
|
||||
empty_addr_count: AddrCountVecs::forced_import(
|
||||
&db,
|
||||
"empty_addr_count",
|
||||
version,
|
||||
indexes,
|
||||
)?,
|
||||
addr_count,
|
||||
empty_addr_count,
|
||||
address_activity,
|
||||
total_addr_count,
|
||||
new_addr_count,
|
||||
growth_rate,
|
||||
|
||||
utxo_cohorts,
|
||||
address_cohorts,
|
||||
@@ -210,6 +244,7 @@ impl Vecs {
|
||||
self.chain_state.reset()?;
|
||||
self.addr_count.reset_height()?;
|
||||
self.empty_addr_count.reset_height()?;
|
||||
self.address_activity.reset_height()?;
|
||||
reset_state(
|
||||
&mut self.any_address_indexes,
|
||||
&mut self.addresses_data,
|
||||
@@ -306,6 +341,20 @@ impl Vecs {
|
||||
.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.empty_addr_count
|
||||
.compute_rest(indexes, starting_indexes, exit)?;
|
||||
self.address_activity
|
||||
.compute_rest(indexes, starting_indexes, exit)?;
|
||||
|
||||
// 6c. Derive total_addr_count dateindex stats (height is lazy sum)
|
||||
self.total_addr_count
|
||||
.derive_from(indexes, starting_indexes, exit)?;
|
||||
|
||||
// 6d. Derive new_addr_count dateindex stats (height is lazy delta)
|
||||
self.new_addr_count
|
||||
.derive_from(indexes, starting_indexes, exit)?;
|
||||
|
||||
// 6e. Derive growth_rate dateindex stats (height is lazy ratio)
|
||||
self.growth_rate
|
||||
.derive_from(indexes, starting_indexes, exit)?;
|
||||
|
||||
// 7. Compute rest part2 (relative metrics)
|
||||
let supply_metrics = &self.utxo_cohorts.all.metrics.supply;
|
||||
@@ -354,8 +403,9 @@ impl Vecs {
|
||||
.min(Height::from(self.chain_state.len()))
|
||||
.min(self.any_address_indexes.min_stamped_height())
|
||||
.min(self.addresses_data.min_stamped_height())
|
||||
.min(Height::from(self.addr_count.min_len()))
|
||||
.min(Height::from(self.empty_addr_count.min_len()))
|
||||
.min(Height::from(self.addr_count.min_stateful_height()))
|
||||
.min(Height::from(self.empty_addr_count.min_stateful_height()))
|
||||
.min(Height::from(self.address_activity.min_stateful_height()))
|
||||
}
|
||||
|
||||
/// Get minimum length across all dateindex-indexed stateful vectors.
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
//! ComputedFromHeight using Distribution aggregation (no sum/cumulative).
|
||||
//!
|
||||
//! Use for block-based metrics where sum/cumulative would be misleading
|
||||
//! (e.g., activity counts that can't be deduplicated across blocks).
|
||||
|
||||
use brk_error::Result;
|
||||
|
||||
use brk_traversable::Traversable;
|
||||
use brk_types::{Height, Version};
|
||||
use derive_more::{Deref, DerefMut};
|
||||
use schemars::JsonSchema;
|
||||
use vecdb::{Database, EagerVec, Exit, ImportableVec, IterableCloneableVec, PcoVec};
|
||||
|
||||
use crate::{ComputeIndexes, indexes};
|
||||
|
||||
use crate::internal::{ComputedHeightDerivedDistribution, ComputedVecValue, NumericValue};
|
||||
|
||||
#[derive(Clone, Deref, DerefMut, Traversable)]
|
||||
#[traversable(merge)]
|
||||
pub struct ComputedFromHeightDistribution<T>
|
||||
where
|
||||
T: ComputedVecValue + PartialOrd + JsonSchema,
|
||||
{
|
||||
#[traversable(rename = "base")]
|
||||
pub height: EagerVec<PcoVec<Height, T>>,
|
||||
#[deref]
|
||||
#[deref_mut]
|
||||
pub rest: ComputedHeightDerivedDistribution<T>,
|
||||
}
|
||||
|
||||
const VERSION: Version = Version::ZERO;
|
||||
|
||||
impl<T> ComputedFromHeightDistribution<T>
|
||||
where
|
||||
T: NumericValue + JsonSchema,
|
||||
{
|
||||
pub fn forced_import(
|
||||
db: &Database,
|
||||
name: &str,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
) -> Result<Self> {
|
||||
let v = version + VERSION;
|
||||
|
||||
let height: EagerVec<PcoVec<Height, T>> = EagerVec::forced_import(db, name, v)?;
|
||||
|
||||
let rest = ComputedHeightDerivedDistribution::forced_import(
|
||||
db,
|
||||
name,
|
||||
height.boxed_clone(),
|
||||
v,
|
||||
indexes,
|
||||
)?;
|
||||
|
||||
Ok(Self { height, rest })
|
||||
}
|
||||
|
||||
pub fn compute_all<F>(
|
||||
&mut self,
|
||||
indexes: &indexes::Vecs,
|
||||
starting_indexes: &ComputeIndexes,
|
||||
exit: &Exit,
|
||||
mut compute: F,
|
||||
) -> Result<()>
|
||||
where
|
||||
F: FnMut(&mut EagerVec<PcoVec<Height, T>>) -> Result<()>,
|
||||
{
|
||||
compute(&mut self.height)?;
|
||||
self.compute_rest(indexes, starting_indexes, exit)
|
||||
}
|
||||
|
||||
/// Compute rest from self.height (for stateful computation patterns).
|
||||
pub fn compute_rest(
|
||||
&mut self,
|
||||
indexes: &indexes::Vecs,
|
||||
starting_indexes: &ComputeIndexes,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.rest
|
||||
.derive_from(indexes, starting_indexes, &self.height, exit)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
//! LazyBinaryComputedFromHeightDistribution - lazy binary transform with distribution stats.
|
||||
//!
|
||||
//! Height-level values are lazy: `transform(source1[h], source2[h])`.
|
||||
//! Uses Distribution aggregation (no sum/cumulative) - appropriate for ratios.
|
||||
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use brk_types::{Height, Version};
|
||||
use derive_more::{Deref, DerefMut};
|
||||
use schemars::JsonSchema;
|
||||
use vecdb::{BinaryTransform, Database, Exit, IterableBoxedVec, IterableCloneableVec, LazyVecFrom2};
|
||||
|
||||
use crate::{
|
||||
ComputeIndexes, indexes,
|
||||
internal::{ComputedHeightDerivedDistribution, ComputedVecValue, NumericValue},
|
||||
};
|
||||
|
||||
const VERSION: Version = Version::ZERO;
|
||||
|
||||
/// Lazy binary transform at height with distribution stats (no sum/cumulative).
|
||||
#[derive(Clone, Deref, DerefMut, Traversable)]
|
||||
#[traversable(merge)]
|
||||
pub struct LazyBinaryComputedFromHeightDistribution<T, S1T = T, S2T = T>
|
||||
where
|
||||
T: ComputedVecValue + PartialOrd + JsonSchema,
|
||||
S1T: ComputedVecValue,
|
||||
S2T: ComputedVecValue,
|
||||
{
|
||||
#[traversable(rename = "base")]
|
||||
pub height: LazyVecFrom2<Height, T, Height, S1T, Height, S2T>,
|
||||
#[deref]
|
||||
#[deref_mut]
|
||||
pub rest: ComputedHeightDerivedDistribution<T>,
|
||||
}
|
||||
|
||||
impl<T, S1T, S2T> LazyBinaryComputedFromHeightDistribution<T, S1T, S2T>
|
||||
where
|
||||
T: NumericValue + JsonSchema,
|
||||
S1T: ComputedVecValue + JsonSchema,
|
||||
S2T: ComputedVecValue + JsonSchema,
|
||||
{
|
||||
pub fn forced_import<F: BinaryTransform<S1T, S2T, T>>(
|
||||
db: &Database,
|
||||
name: &str,
|
||||
version: Version,
|
||||
source1: IterableBoxedVec<Height, S1T>,
|
||||
source2: IterableBoxedVec<Height, S2T>,
|
||||
indexes: &indexes::Vecs,
|
||||
) -> Result<Self> {
|
||||
let v = version + VERSION;
|
||||
|
||||
let height = LazyVecFrom2::transformed::<F>(name, v, source1, source2);
|
||||
|
||||
let rest = ComputedHeightDerivedDistribution::forced_import(
|
||||
db,
|
||||
name,
|
||||
height.boxed_clone(),
|
||||
v,
|
||||
indexes,
|
||||
)?;
|
||||
|
||||
Ok(Self { height, rest })
|
||||
}
|
||||
|
||||
pub fn derive_from(
|
||||
&mut self,
|
||||
indexes: &indexes::Vecs,
|
||||
starting_indexes: &ComputeIndexes,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.rest
|
||||
.derive_from(indexes, starting_indexes, &self.height, exit)
|
||||
}
|
||||
}
|
||||
@@ -2,11 +2,13 @@ mod binary_full;
|
||||
mod binary_last;
|
||||
mod binary_sum;
|
||||
mod binary_sum_cum;
|
||||
mod distribution;
|
||||
mod full;
|
||||
mod last;
|
||||
mod lazy_distribution;
|
||||
mod lazy_full;
|
||||
mod lazy_transform_distribution;
|
||||
mod lazy_binary_computed_distribution;
|
||||
mod lazy_binary_computed_full;
|
||||
mod lazy_binary_computed_last;
|
||||
mod lazy_binary_computed_sum;
|
||||
@@ -35,11 +37,13 @@ pub use binary_full::*;
|
||||
pub use binary_last::*;
|
||||
pub use binary_sum::*;
|
||||
pub use binary_sum_cum::*;
|
||||
pub use distribution::*;
|
||||
pub use full::*;
|
||||
pub use last::*;
|
||||
pub use lazy_distribution::*;
|
||||
pub use lazy_full::*;
|
||||
pub use lazy_transform_distribution::*;
|
||||
pub use lazy_binary_computed_distribution::*;
|
||||
pub use lazy_binary_computed_full::*;
|
||||
pub use lazy_binary_computed_last::*;
|
||||
pub use lazy_binary_computed_sum::*;
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
use brk_types::{Cents, Dollars};
|
||||
use vecdb::UnaryTransform;
|
||||
|
||||
pub struct CentsToDollars;
|
||||
|
||||
impl UnaryTransform<Cents, Dollars> for CentsToDollars {
|
||||
#[inline(always)]
|
||||
fn apply(cents: Cents) -> Dollars {
|
||||
Dollars::from(cents)
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
mod cents_to_dollars;
|
||||
mod close_price_times_ratio;
|
||||
mod close_price_times_sats;
|
||||
mod difference_f32;
|
||||
@@ -21,6 +20,7 @@ mod price_times_ratio;
|
||||
mod ratio32;
|
||||
mod ratio32_neg;
|
||||
mod ratio_f32;
|
||||
mod ratio_u64_f32;
|
||||
mod return_f32_tenths;
|
||||
mod return_i8;
|
||||
mod return_u16;
|
||||
@@ -34,12 +34,12 @@ mod sat_plus_to_bitcoin;
|
||||
mod sat_to_bitcoin;
|
||||
mod sats_times_close_price;
|
||||
mod u16_to_years;
|
||||
mod u64_plus;
|
||||
mod volatility_sqrt30;
|
||||
mod volatility_sqrt365;
|
||||
mod volatility_sqrt7;
|
||||
mod weight_to_fullness;
|
||||
|
||||
pub use cents_to_dollars::*;
|
||||
pub use close_price_times_ratio::*;
|
||||
pub use close_price_times_sats::*;
|
||||
pub use difference_f32::*;
|
||||
@@ -62,6 +62,7 @@ pub use price_times_ratio::*;
|
||||
pub use ratio32::*;
|
||||
pub use ratio32_neg::*;
|
||||
pub use ratio_f32::*;
|
||||
pub use ratio_u64_f32::*;
|
||||
pub use return_f32_tenths::*;
|
||||
pub use return_i8::*;
|
||||
pub use return_u16::*;
|
||||
@@ -75,6 +76,7 @@ pub use sat_plus_to_bitcoin::*;
|
||||
pub use sat_to_bitcoin::*;
|
||||
pub use sats_times_close_price::*;
|
||||
pub use u16_to_years::*;
|
||||
pub use u64_plus::*;
|
||||
pub use volatility_sqrt30::*;
|
||||
pub use volatility_sqrt365::*;
|
||||
pub use volatility_sqrt7::*;
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
//! (StoredU64, StoredU64) -> StoredF32 ratio
|
||||
|
||||
use brk_types::{StoredF32, StoredU64};
|
||||
use vecdb::BinaryTransform;
|
||||
|
||||
/// (StoredU64, StoredU64) -> StoredF32 ratio (a/b)
|
||||
pub struct RatioU64F32;
|
||||
|
||||
impl BinaryTransform<StoredU64, StoredU64, StoredF32> for RatioU64F32 {
|
||||
#[inline(always)]
|
||||
fn apply(numerator: StoredU64, denominator: StoredU64) -> StoredF32 {
|
||||
let num: f64 = (*numerator) as f64;
|
||||
let den: f64 = (*denominator) as f64;
|
||||
if den == 0.0 {
|
||||
StoredF32::from(0.0)
|
||||
} else {
|
||||
StoredF32::from(num / den)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
use brk_types::StoredU64;
|
||||
use vecdb::BinaryTransform;
|
||||
|
||||
/// (StoredU64, StoredU64) -> StoredU64 addition
|
||||
/// Used for computing total_addr_count = addr_count + empty_addr_count
|
||||
pub struct U64Plus;
|
||||
|
||||
impl BinaryTransform<StoredU64, StoredU64, StoredU64> for U64Plus {
|
||||
#[inline(always)]
|
||||
fn apply(lhs: StoredU64, rhs: StoredU64) -> StoredU64 {
|
||||
StoredU64::from(u64::from(lhs) + u64::from(rhs))
|
||||
}
|
||||
}
|
||||
1
crates/brk_computer/src/price/.gitignore
vendored
Normal file
1
crates/brk_computer/src/price/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
oracle
|
||||
@@ -19,17 +19,17 @@ impl Vecs {
|
||||
self.sats.compute(starting_indexes, &self.usd, exit)?;
|
||||
|
||||
// Oracle price computation is slow and still WIP, only run in dev builds
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
use std::time::Instant;
|
||||
use tracing::info;
|
||||
|
||||
info!("Computing oracle prices...");
|
||||
let i = Instant::now();
|
||||
self.oracle
|
||||
.compute(indexer, indexes, &self.cents, starting_indexes, exit)?;
|
||||
info!("Computed oracle prices in {:?}", i.elapsed());
|
||||
}
|
||||
// #[cfg(debug_assertions)]
|
||||
// {
|
||||
// use std::time::Instant;
|
||||
// use tracing::info;
|
||||
//
|
||||
// info!("Computing oracle prices...");
|
||||
// let i = Instant::now();
|
||||
// self.oracle
|
||||
// .compute(indexer, indexes, &self.cents, starting_indexes, exit)?;
|
||||
// info!("Computed oracle prices in {:?}", i.elapsed());
|
||||
// }
|
||||
|
||||
let _lock = exit.lock();
|
||||
self.db().compact()?;
|
||||
|
||||
@@ -2,12 +2,12 @@ mod compute;
|
||||
mod fetch;
|
||||
|
||||
pub mod cents;
|
||||
pub mod oracle;
|
||||
// pub mod oracle;
|
||||
pub mod sats;
|
||||
pub mod usd;
|
||||
|
||||
pub use cents::Vecs as CentsVecs;
|
||||
pub use oracle::Vecs as OracleVecs;
|
||||
// pub use oracle::Vecs as OracleVecs;
|
||||
pub use sats::Vecs as SatsVecs;
|
||||
pub use usd::Vecs as UsdVecs;
|
||||
|
||||
@@ -33,7 +33,7 @@ pub struct Vecs {
|
||||
pub cents: CentsVecs,
|
||||
pub usd: UsdVecs,
|
||||
pub sats: SatsVecs,
|
||||
pub oracle: OracleVecs,
|
||||
// pub oracle: OracleVecs,
|
||||
}
|
||||
|
||||
impl Vecs {
|
||||
@@ -67,7 +67,7 @@ impl Vecs {
|
||||
let cents = CentsVecs::forced_import(db, version)?;
|
||||
let usd = UsdVecs::forced_import(db, version, indexes)?;
|
||||
let sats = SatsVecs::forced_import(db, version, indexes)?;
|
||||
let oracle = OracleVecs::forced_import(db, version)?;
|
||||
// let oracle = OracleVecs::forced_import(db, version)?;
|
||||
|
||||
Ok(Self {
|
||||
db: db.clone(),
|
||||
@@ -75,7 +75,7 @@ impl Vecs {
|
||||
cents,
|
||||
usd,
|
||||
sats,
|
||||
oracle,
|
||||
// oracle,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,122 +0,0 @@
|
||||
//! Era-based configuration for the UTXOracle algorithm.
|
||||
//! Different time periods require different price bounds and aggregation windows
|
||||
//! Due to varying transaction volumes and price levels.
|
||||
|
||||
/// Configuration for a specific era
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct OracleConfig {
|
||||
/// Minimum expected price in cents (e.g., 10 = $0.10)
|
||||
pub min_price_cents: u64,
|
||||
/// Maximum expected price in cents (e.g., 100_000_000 = $1,000,000)
|
||||
pub max_price_cents: u64,
|
||||
/// Number of blocks to aggregate for sufficient sample size
|
||||
pub blocks_per_window: u32,
|
||||
/// Minimum qualifying transactions needed for a valid estimate
|
||||
pub min_tx_count: u32,
|
||||
}
|
||||
|
||||
impl OracleConfig {
|
||||
/// Get configuration for a given year
|
||||
pub fn for_year(year: u16) -> Self {
|
||||
match year {
|
||||
// 2009-2010: Very early Bitcoin, extremely low volume and prices
|
||||
// Price: $0 - ~$0.10, very few transactions
|
||||
2009..=2010 => Self {
|
||||
min_price_cents: 1, // $0.01
|
||||
max_price_cents: 100, // $1.00
|
||||
blocks_per_window: 2016, // ~2 weeks
|
||||
min_tx_count: 50,
|
||||
},
|
||||
// 2011: First major price movements ($0.30 - $30)
|
||||
2011 => Self {
|
||||
min_price_cents: 10, // $0.10
|
||||
max_price_cents: 10_000, // $100
|
||||
blocks_per_window: 1008, // ~1 week
|
||||
min_tx_count: 100,
|
||||
},
|
||||
// 2012-2013: Growing adoption ($5 - $1,200)
|
||||
2012..=2013 => Self {
|
||||
min_price_cents: 100, // $1
|
||||
max_price_cents: 200_000, // $2,000
|
||||
blocks_per_window: 288, // ~2 days
|
||||
min_tx_count: 500,
|
||||
},
|
||||
// 2014-2016: Post-bubble consolidation ($200 - $1,000)
|
||||
2014..=2016 => Self {
|
||||
min_price_cents: 10_000, // $100
|
||||
max_price_cents: 2_000_000, // $20,000
|
||||
blocks_per_window: 144, // ~1 day
|
||||
min_tx_count: 1000,
|
||||
},
|
||||
// 2017+: Modern era ($10,000 - $500,000)
|
||||
// Matches Python's slide range of -141 to 201
|
||||
_ => Self {
|
||||
min_price_cents: 1_000_000, // $10,000 (gives max_slide = 200)
|
||||
max_price_cents: 50_000_000, // $500,000 (gives min_slide ≈ -140)
|
||||
blocks_per_window: 144, // ~1 day
|
||||
min_tx_count: 2000,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert price bounds to histogram slide range
|
||||
/// Returns (min_slide, max_slide) for stencil positioning
|
||||
///
|
||||
/// The stencil center (bin 600) corresponds to 0.001 BTC.
|
||||
/// At $100,000/BTC, 0.001 BTC = $100, so position 0 = $100,000/BTC.
|
||||
///
|
||||
/// For a given price P (in cents/BTC):
|
||||
/// - $100 USD = 10000/P BTC
|
||||
/// - The histogram bin for $100 shifts based on price
|
||||
/// - slide = (7 - log10(P)) * 200
|
||||
///
|
||||
/// Higher prices → lower (negative) slides
|
||||
/// Lower prices → higher (positive) slides
|
||||
pub fn slide_range(&self) -> (i32, i32) {
|
||||
let min_log = (self.min_price_cents as f64).log10();
|
||||
let max_log = (self.max_price_cents as f64).log10();
|
||||
|
||||
// min_slide corresponds to max_price (higher price = more negative slide)
|
||||
// max_slide corresponds to min_price (lower price = more positive slide)
|
||||
let min_slide = ((7.0 - max_log) * 200.0) as i32;
|
||||
let max_slide = ((7.0 - min_log) * 200.0) as i32;
|
||||
|
||||
(min_slide, max_slide)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_config_for_year() {
|
||||
// 2017+ config matches Python: $10,000 to $500,000
|
||||
let c2020 = OracleConfig::for_year(2020);
|
||||
assert_eq!(c2020.min_price_cents, 1_000_000);
|
||||
assert_eq!(c2020.max_price_cents, 50_000_000);
|
||||
|
||||
let c2015 = OracleConfig::for_year(2015);
|
||||
assert_eq!(c2015.min_price_cents, 10_000);
|
||||
assert_eq!(c2015.max_price_cents, 2_000_000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_slide_range() {
|
||||
// 2024 config: $10,000 to $500,000 (matches Python's -141 to 201)
|
||||
let config = OracleConfig::for_year(2024);
|
||||
let (min, max) = config.slide_range();
|
||||
// $500,000 = 5*10^7 cents → slide = (7-7.699)*200 ≈ -140
|
||||
// $10,000 = 10^6 cents → slide = (7-6)*200 = 200
|
||||
assert!((-141..=-139).contains(&min)); // ~-140
|
||||
assert_eq!(max, 200);
|
||||
|
||||
// 2015 config: $100 to $20,000
|
||||
let config = OracleConfig::for_year(2015);
|
||||
let (min, max) = config.slide_range();
|
||||
// $20,000 = 2*10^6 cents → slide = (7-6.3)*200 ≈ 140
|
||||
// $100 = 10^4 cents → slide = (7-4)*200 = 600
|
||||
assert!(min > 100 && min < 200); // ~140
|
||||
assert_eq!(max, 600);
|
||||
}
|
||||
}
|
||||
@@ -1,329 +0,0 @@
|
||||
//! Log-scale histogram for UTXOracle price detection.
|
||||
//! Bins output values on a logarithmic scale to detect periodic patterns
|
||||
//! From round USD amounts.
|
||||
|
||||
use brk_types::Sats;
|
||||
|
||||
/// Histogram configuration constants
|
||||
pub const BINS_PER_DECADE: usize = 200;
|
||||
pub const MIN_LOG_BTC: f64 = -6.0; // 10^-6 BTC = 100 sats
|
||||
pub const MAX_LOG_BTC: f64 = 2.0; // 10^2 BTC = 100 BTC
|
||||
pub const NUM_DECADES: usize = 8; // -6 to +2
|
||||
pub const TOTAL_BINS: usize = NUM_DECADES * BINS_PER_DECADE; // 1600 bins
|
||||
|
||||
/// Minimum output value to consider (~1,000 sats = 0.00001 BTC)
|
||||
/// Matches Python: zeros bins 0-200 which is 10^-5 BTC
|
||||
pub const MIN_OUTPUT_SATS: Sats = Sats::_1K;
|
||||
/// Maximum output value to consider (100 BTC)
|
||||
/// Matches Python: zeros bins 1601+ which is ~10^2 BTC
|
||||
pub const MAX_OUTPUT_SATS: Sats = Sats::_100BTC;
|
||||
|
||||
/// Round BTC bin indices that should be smoothed to avoid false positives
|
||||
/// These are bins where round BTC amounts would naturally cluster
|
||||
const ROUND_BTC_BINS: &[usize] = &[
|
||||
201, // 1k sats (0.00001 BTC)
|
||||
401, // 10k sats (0.0001 BTC)
|
||||
461, // 20k sats
|
||||
496, // 30k sats
|
||||
540, // 50k sats
|
||||
601, // 100k sats (0.001 BTC)
|
||||
661, // 200k sats
|
||||
696, // 300k sats
|
||||
740, // 500k sats
|
||||
801, // 0.01 BTC
|
||||
861, // 0.02 BTC
|
||||
896, // 0.03 BTC
|
||||
940, // 0.04 BTC
|
||||
1001, // 0.1 BTC
|
||||
1061, // 0.2 BTC
|
||||
1096, // 0.3 BTC
|
||||
1140, // 0.5 BTC
|
||||
1201, // 1 BTC
|
||||
];
|
||||
|
||||
/// Log-scale histogram for output values
|
||||
#[derive(Clone)]
|
||||
pub struct Histogram {
|
||||
bins: [f64; TOTAL_BINS],
|
||||
count: usize,
|
||||
/// Running sum of all bin values (tracked incrementally for fast normalize)
|
||||
sum: f64,
|
||||
}
|
||||
|
||||
impl Default for Histogram {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Histogram {
|
||||
/// Create a new empty histogram
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
bins: [0.0; TOTAL_BINS],
|
||||
count: 0,
|
||||
sum: 0.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset the histogram to empty
|
||||
#[allow(dead_code)] // Utility for reusing histograms
|
||||
pub fn clear(&mut self) {
|
||||
self.bins.fill(0.0);
|
||||
self.count = 0;
|
||||
self.sum = 0.0;
|
||||
}
|
||||
|
||||
/// Get the number of samples added
|
||||
#[allow(dead_code)] // For v2 confidence scoring
|
||||
pub fn count(&self) -> usize {
|
||||
self.count
|
||||
}
|
||||
|
||||
/// Get the bins array
|
||||
pub fn bins(&self) -> &[f64; TOTAL_BINS] {
|
||||
&self.bins
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// Private helpers for bin operations that maintain sum invariant
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Add value to a bin, maintaining sum invariant
|
||||
#[inline]
|
||||
fn bin_add(&mut self, bin: usize, value: f64) {
|
||||
self.bins[bin] += value;
|
||||
self.sum += value;
|
||||
}
|
||||
|
||||
/// Set a bin to a new value, maintaining sum invariant
|
||||
#[inline]
|
||||
fn bin_set(&mut self, bin: usize, new_value: f64) {
|
||||
let old_value = self.bins[bin];
|
||||
self.bins[bin] = new_value;
|
||||
self.sum += new_value - old_value;
|
||||
}
|
||||
|
||||
/// Subtract from a bin (clamped to 0), maintaining sum invariant
|
||||
/// Returns the actual amount subtracted
|
||||
#[inline]
|
||||
fn bin_sub_clamped(&mut self, bin: usize, value: f64) -> f64 {
|
||||
let old_value = self.bins[bin];
|
||||
let new_value = (old_value - value).max(0.0);
|
||||
self.bins[bin] = new_value;
|
||||
let removed = old_value - new_value;
|
||||
self.sum -= removed;
|
||||
removed
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Convert satoshi value to bin index
|
||||
/// Returns None if value is outside the histogram range
|
||||
#[inline]
|
||||
pub fn sats_to_bin(sats: Sats) -> Option<usize> {
|
||||
if sats < MIN_OUTPUT_SATS || sats > MAX_OUTPUT_SATS {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Convert sats to BTC (log scale)
|
||||
let btc = f64::from(sats) / f64::from(Sats::ONE_BTC);
|
||||
let log_btc = btc.log10();
|
||||
|
||||
// Map to bin index: log_btc in [-6, 2] -> bin in [0, 1600)
|
||||
let normalized = (log_btc - MIN_LOG_BTC) / (MAX_LOG_BTC - MIN_LOG_BTC);
|
||||
let bin = (normalized * TOTAL_BINS as f64) as usize;
|
||||
|
||||
if bin < TOTAL_BINS { Some(bin) } else { None }
|
||||
}
|
||||
|
||||
/// Convert bin index to approximate satoshi value
|
||||
#[allow(dead_code)] // Inverse of sats_to_bin, useful for debugging
|
||||
#[inline]
|
||||
pub fn bin_to_sats(bin: usize) -> Sats {
|
||||
let normalized = bin as f64 / TOTAL_BINS as f64;
|
||||
let log_btc = MIN_LOG_BTC + normalized * (MAX_LOG_BTC - MIN_LOG_BTC);
|
||||
let btc = 10_f64.powf(log_btc);
|
||||
Sats::from((btc * f64::from(Sats::ONE_BTC)) as u64)
|
||||
}
|
||||
|
||||
/// Add a value to the histogram with the given weight
|
||||
#[allow(dead_code)] // Used in tests and non-sparse paths
|
||||
#[inline]
|
||||
pub fn add(&mut self, sats: Sats, weight: f64) {
|
||||
if let Some(bin) = Self::sats_to_bin(sats) {
|
||||
self.bin_add(bin, weight);
|
||||
self.count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Add another histogram to this one
|
||||
#[allow(dead_code)] // Non-sparse alternative
|
||||
pub fn add_histogram(&mut self, other: &Histogram) {
|
||||
for (i, &v) in other.bins.iter().enumerate() {
|
||||
if v > 0.0 {
|
||||
self.bin_add(i, v);
|
||||
}
|
||||
}
|
||||
self.count += other.count;
|
||||
}
|
||||
|
||||
/// Subtract another histogram from this one
|
||||
/// Clamps bins to >= 0 to handle floating-point precision issues
|
||||
#[allow(dead_code)] // Non-sparse alternative
|
||||
pub fn subtract_histogram(&mut self, other: &Histogram) {
|
||||
for (i, &v) in other.bins.iter().enumerate() {
|
||||
if v > 0.0 {
|
||||
self.bin_sub_clamped(i, v);
|
||||
}
|
||||
}
|
||||
self.count = self.count.saturating_sub(other.count);
|
||||
}
|
||||
|
||||
/// Add sparse entries to this histogram (O(entries) instead of O(1600))
|
||||
#[inline]
|
||||
pub fn add_sparse(&mut self, entries: &[(u16, f64)]) {
|
||||
for &(bin, value) in entries {
|
||||
self.bin_add(bin as usize, value);
|
||||
}
|
||||
self.count += entries.len();
|
||||
}
|
||||
|
||||
/// Subtract sparse entries from this histogram (O(entries) instead of O(1600))
|
||||
#[inline]
|
||||
pub fn subtract_sparse(&mut self, entries: &[(u16, f64)]) {
|
||||
for &(bin, value) in entries {
|
||||
self.bin_sub_clamped(bin as usize, value);
|
||||
}
|
||||
self.count = self.count.saturating_sub(entries.len());
|
||||
}
|
||||
|
||||
/// Add a value and return the bin index (for sparse collection)
|
||||
#[allow(dead_code)] // Alternative API for hybrid approaches
|
||||
#[inline]
|
||||
pub fn add_and_get_bin(&mut self, sats: Sats, weight: f64) -> Option<u16> {
|
||||
if let Some(bin) = Self::sats_to_bin(sats) {
|
||||
self.bin_add(bin, weight);
|
||||
self.count += 1;
|
||||
Some(bin as u16)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy from another histogram (avoids allocation vs clone)
|
||||
#[inline]
|
||||
pub fn copy_from(&mut self, other: &Histogram) {
|
||||
self.bins.copy_from_slice(&other.bins);
|
||||
self.count = other.count;
|
||||
self.sum = other.sum;
|
||||
}
|
||||
|
||||
/// Smooth over round BTC amounts to prevent false positives
|
||||
/// Replaces each round BTC bin with the average of its neighbors
|
||||
pub fn smooth_round_btc(&mut self) {
|
||||
for &bin in ROUND_BTC_BINS {
|
||||
if bin > 0 && bin < TOTAL_BINS - 1 {
|
||||
let new_val = (self.bins[bin - 1] + self.bins[bin + 1]) / 2.0;
|
||||
self.bin_set(bin, new_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalize the histogram so bins sum to 1.0, then cap extremes
|
||||
/// Python caps at 0.008 after normalization to remove outliers
|
||||
/// Uses pre-tracked sum for O(1) instead of O(1600) sum computation
|
||||
pub fn normalize(&mut self) {
|
||||
if self.sum > 0.0 {
|
||||
let inv_sum = 1.0 / self.sum;
|
||||
for bin in &mut self.bins {
|
||||
if *bin > 0.0 {
|
||||
*bin *= inv_sum;
|
||||
// Cap extremes (0.008 chosen by historical testing in Python)
|
||||
if *bin > 0.008 {
|
||||
*bin = 0.008;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the value at a specific bin
|
||||
#[allow(dead_code)] // Alternative to direct bins() access
|
||||
#[inline]
|
||||
pub fn get(&self, bin: usize) -> f64 {
|
||||
self.bins.get(bin).copied().unwrap_or(0.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_sats_to_bin() {
|
||||
// 10k sats should map to early bins
|
||||
let bin = Histogram::sats_to_bin(Sats::_10K).unwrap();
|
||||
assert!(bin < TOTAL_BINS / 2);
|
||||
|
||||
// 1 BTC should map to later bins
|
||||
let bin = Histogram::sats_to_bin(Sats::_1BTC).unwrap();
|
||||
assert!(bin > TOTAL_BINS / 2);
|
||||
|
||||
// Below minimum should return None
|
||||
assert!(Histogram::sats_to_bin(Sats::_100).is_none());
|
||||
|
||||
// Above maximum should return None
|
||||
assert!(Histogram::sats_to_bin(Sats::_100BTC).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bin_to_sats_roundtrip() {
|
||||
for sats in [Sats::_10K, Sats::_100K, Sats::_1M, Sats::_10M, Sats::_1BTC] {
|
||||
if let Some(bin) = Histogram::sats_to_bin(sats) {
|
||||
let recovered = Histogram::bin_to_sats(bin);
|
||||
// Should be within ~1% due to binning
|
||||
let ratio = f64::from(recovered) / f64::from(sats);
|
||||
assert!(
|
||||
ratio > 0.95 && ratio < 1.05,
|
||||
"sats={}, recovered={}",
|
||||
sats,
|
||||
recovered
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_and_normalize() {
|
||||
let mut hist = Histogram::new();
|
||||
hist.add(Sats::_100K, 1.0);
|
||||
hist.add(Sats::_1M, 1.0);
|
||||
hist.add(Sats::_10M, 1.0);
|
||||
|
||||
assert_eq!(hist.count(), 3);
|
||||
|
||||
hist.normalize();
|
||||
|
||||
// After normalization, all non-zero bins should be capped at 0.008
|
||||
// because 1/3 ≈ 0.333 > 0.008
|
||||
let non_zero_bins: Vec<f64> = hist.bins().iter().filter(|&&x| x > 0.0).cloned().collect();
|
||||
|
||||
assert_eq!(non_zero_bins.len(), 3);
|
||||
for bin in non_zero_bins {
|
||||
assert!((bin - 0.008).abs() < 1e-10);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_caps_extremes() {
|
||||
let mut hist = Histogram::new();
|
||||
// Add a single large value - after normalization it would be 1.0
|
||||
hist.add(Sats::_100K, 100.0);
|
||||
|
||||
hist.normalize();
|
||||
// Should be capped at 0.008
|
||||
let max_bin = hist.bins().iter().cloned().fold(0.0_f64, f64::max);
|
||||
assert!((max_bin - 0.008).abs() < 1e-10);
|
||||
}
|
||||
}
|
||||
@@ -1,152 +0,0 @@
|
||||
use brk_error::Result;
|
||||
use brk_types::{DateIndex, OHLCCents, OHLCDollars, Version};
|
||||
use vecdb::{BytesVec, Database, ImportableVec, IterableCloneableVec, LazyVecFrom1, PcoVec};
|
||||
|
||||
use super::Vecs;
|
||||
use crate::internal::{CentsToDollars, Distribution, LazyTransformDistribution};
|
||||
|
||||
impl Vecs {
|
||||
pub fn forced_import(db: &Database, parent_version: Version) -> Result<Self> {
|
||||
// v12: Add both-outputs-round filter
|
||||
let version = parent_version + Version::new(12);
|
||||
|
||||
// Layer 1: Pair output index
|
||||
let pairoutputindex_to_txindex =
|
||||
PcoVec::forced_import(db, "pairoutputindex_to_txindex", version)?;
|
||||
let height_to_first_pairoutputindex =
|
||||
PcoVec::forced_import(db, "height_to_first_pairoutputindex", version)?;
|
||||
|
||||
// Layer 3: Output values
|
||||
let output0_value = PcoVec::forced_import(db, "pair_output0_value", version)?;
|
||||
let output1_value = PcoVec::forced_import(db, "pair_output1_value", version)?;
|
||||
|
||||
// Layer 4: Phase histograms (depends on Layer 1)
|
||||
let phase_histogram = BytesVec::forced_import(db, "phase_histogram", version)?;
|
||||
|
||||
// Layer 5: Phase Oracle prices
|
||||
// v45: Back to decades (10x) + anchor only
|
||||
let phase_version = version + Version::new(38);
|
||||
let phase_price_cents = PcoVec::forced_import(db, "phase_price_cents", phase_version)?;
|
||||
let phase_daily_cents = Distribution::forced_import(db, "phase_daily", phase_version)?;
|
||||
let phase_daily_dollars = LazyTransformDistribution::from_distribution::<CentsToDollars>(
|
||||
"phase_daily_dollars",
|
||||
phase_version,
|
||||
&phase_daily_cents,
|
||||
);
|
||||
|
||||
// UTXOracle (Python port)
|
||||
let price_cents = PcoVec::forced_import(db, "oracle_price_cents", version)?;
|
||||
let ohlc_cents = BytesVec::forced_import(db, "oracle_ohlc_cents", version)?;
|
||||
let tx_count = PcoVec::forced_import(db, "oracle_tx_count", version)?;
|
||||
|
||||
let ohlc_dollars = LazyVecFrom1::init(
|
||||
"oracle_ohlc",
|
||||
version,
|
||||
ohlc_cents.boxed_clone(),
|
||||
|di: DateIndex, iter| iter.get(di).map(|o: OHLCCents| OHLCDollars::from(o)),
|
||||
);
|
||||
|
||||
// Daily OHLC from height close only
|
||||
let close_ohlc_cents = BytesVec::forced_import(db, "close_ohlc_cents", version)?;
|
||||
let close_ohlc_dollars = LazyVecFrom1::init(
|
||||
"close_ohlc_dollars",
|
||||
version,
|
||||
close_ohlc_cents.boxed_clone(),
|
||||
|di: DateIndex, iter| iter.get(di).map(|o: OHLCCents| OHLCDollars::from(o)),
|
||||
);
|
||||
|
||||
// Daily OHLC from height mid price ((open+close)/2)
|
||||
let mid_ohlc_cents = BytesVec::forced_import(db, "mid_ohlc_cents", version)?;
|
||||
let mid_ohlc_dollars = LazyVecFrom1::init(
|
||||
"mid_ohlc_dollars",
|
||||
version,
|
||||
mid_ohlc_cents.boxed_clone(),
|
||||
|di: DateIndex, iter| iter.get(di).map(|o: OHLCCents| OHLCDollars::from(o)),
|
||||
);
|
||||
|
||||
// Phase Oracle V2 (round USD template matching)
|
||||
// v3: Peak prices use 100 bins (downsampled from 200)
|
||||
let phase_v2_version = version + Version::new(3);
|
||||
let phase_v2_histogram =
|
||||
BytesVec::forced_import(db, "phase_v2_histogram", phase_v2_version)?;
|
||||
let phase_v2_price_cents =
|
||||
PcoVec::forced_import(db, "phase_v2_price_cents", phase_v2_version)?;
|
||||
let phase_v2_peak_price_cents =
|
||||
PcoVec::forced_import(db, "phase_v2_peak_price_cents", phase_v2_version)?;
|
||||
let phase_v2_daily_cents =
|
||||
Distribution::forced_import(db, "phase_v2_daily", phase_v2_version)?;
|
||||
let phase_v2_daily_dollars =
|
||||
LazyTransformDistribution::from_distribution::<CentsToDollars>(
|
||||
"phase_v2_daily_dollars",
|
||||
phase_v2_version,
|
||||
&phase_v2_daily_cents,
|
||||
);
|
||||
let phase_v2_peak_daily_cents =
|
||||
Distribution::forced_import(db, "phase_v2_peak_daily", phase_v2_version)?;
|
||||
let phase_v2_peak_daily_dollars =
|
||||
LazyTransformDistribution::from_distribution::<CentsToDollars>(
|
||||
"phase_v2_peak_daily_dollars",
|
||||
phase_v2_version,
|
||||
&phase_v2_peak_daily_cents,
|
||||
);
|
||||
|
||||
// Phase Oracle V3 (BASE + noP2TR + uniqueVal filter)
|
||||
// v5: Added noP2TR filter to reduce inscription spam
|
||||
let phase_v3_version = version + Version::new(5);
|
||||
let phase_v3_histogram =
|
||||
BytesVec::forced_import(db, "phase_v3_histogram", phase_v3_version)?;
|
||||
let phase_v3_price_cents =
|
||||
PcoVec::forced_import(db, "phase_v3_price_cents", phase_v3_version)?;
|
||||
let phase_v3_peak_price_cents =
|
||||
PcoVec::forced_import(db, "phase_v3_peak_price_cents", phase_v3_version)?;
|
||||
let phase_v3_daily_cents =
|
||||
Distribution::forced_import(db, "phase_v3_daily", phase_v3_version)?;
|
||||
let phase_v3_daily_dollars =
|
||||
LazyTransformDistribution::from_distribution::<CentsToDollars>(
|
||||
"phase_v3_daily_dollars",
|
||||
phase_v3_version,
|
||||
&phase_v3_daily_cents,
|
||||
);
|
||||
let phase_v3_peak_daily_cents =
|
||||
Distribution::forced_import(db, "phase_v3_peak_daily", phase_v3_version)?;
|
||||
let phase_v3_peak_daily_dollars =
|
||||
LazyTransformDistribution::from_distribution::<CentsToDollars>(
|
||||
"phase_v3_peak_daily_dollars",
|
||||
phase_v3_version,
|
||||
&phase_v3_peak_daily_cents,
|
||||
);
|
||||
|
||||
Ok(Self {
|
||||
pairoutputindex_to_txindex,
|
||||
height_to_first_pairoutputindex,
|
||||
output0_value,
|
||||
output1_value,
|
||||
phase_histogram,
|
||||
phase_price_cents,
|
||||
phase_daily_cents,
|
||||
phase_daily_dollars,
|
||||
price_cents,
|
||||
ohlc_cents,
|
||||
ohlc_dollars,
|
||||
tx_count,
|
||||
close_ohlc_cents,
|
||||
close_ohlc_dollars,
|
||||
mid_ohlc_cents,
|
||||
mid_ohlc_dollars,
|
||||
phase_v2_histogram,
|
||||
phase_v2_price_cents,
|
||||
phase_v2_peak_price_cents,
|
||||
phase_v2_daily_cents,
|
||||
phase_v2_daily_dollars,
|
||||
phase_v2_peak_daily_cents,
|
||||
phase_v2_peak_daily_dollars,
|
||||
phase_v3_histogram,
|
||||
phase_v3_price_cents,
|
||||
phase_v3_peak_price_cents,
|
||||
phase_v3_daily_cents,
|
||||
phase_v3_daily_dollars,
|
||||
phase_v3_peak_daily_cents,
|
||||
phase_v3_peak_daily_dollars,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,165 +0,0 @@
|
||||
//! # UTXOracle: Trustless On-Chain Bitcoin Price Discovery
|
||||
//!
|
||||
//! This module implements the UTXOracle algorithm for deriving Bitcoin prices purely from
|
||||
//! on-chain transaction data, without any external price feeds. The algorithm detects
|
||||
//! round USD amounts ($10, $20, $50, $100, etc.) in transaction outputs, which create
|
||||
//! periodic patterns in the log-scale distribution of output values.
|
||||
//!
|
||||
//! ## Algorithm Overview
|
||||
//!
|
||||
//! 1. **Transaction Filtering**: Select "clean" transactions likely to represent purchases:
|
||||
//! - Exactly 2 outputs (payment + change)
|
||||
//! - At most 5 inputs (not consolidation)
|
||||
//! - No OP_RETURN outputs
|
||||
//! - Witness size < 500 bytes (simple signatures)
|
||||
//! - No same-day input spends (not internal transfers)
|
||||
//!
|
||||
//! 2. **Histogram Building**: Place output values on a log-scale histogram
|
||||
//! - 8 decades (10^-6 to 10^2 BTC) × 200 bins/decade = 1600 bins
|
||||
//! - Smooth over round BTC amounts to avoid false positives
|
||||
//!
|
||||
//! 3. **Stencil Matching**: Slide a template across the histogram to find the best fit
|
||||
//! - Spike stencil: Hard-coded weights at known USD amounts ($1, $5, $10, $20, ...)
|
||||
//! - Smooth stencil: Gaussian + linear term for general spending distribution
|
||||
//!
|
||||
//! 4. **Price Refinement**: Narrow down using geometric median convergence
|
||||
//! - Collect outputs within ±25% of rough estimate
|
||||
//! - Iteratively converge to center of mass within ±5% window
|
||||
//!
|
||||
//! ## Correctness: Equivalence to Python UTXOracle
|
||||
//!
|
||||
//! This implementation produces equivalent results to the original Python UTXOracle.
|
||||
//! The core algorithm is identical; differences are in parameterization and indexing.
|
||||
//!
|
||||
//! ### Algorithm Equivalence
|
||||
//!
|
||||
//! | Component | Python | Rust | Notes |
|
||||
//! |-----------|--------|------|-------|
|
||||
//! | Bins per decade | 200 | 200 | Identical resolution (~0.5% per bin) |
|
||||
//! | Histogram range | 10^-6 to 10^6 BTC | 10^-6 to 10^2 BTC | Rust uses tighter bounds |
|
||||
//! | Active bins | 201-1600 (1400 bins) | 400-1400 (1000 bins) | Different output filters |
|
||||
//! | Spike stencil | 29 USD amounts | 29 USD amounts | Same weights from Python |
|
||||
//! | Smooth stencil σ | 201 (over 803 bins) | 400 (over 1600 bins) | Scaled: 201×(1600/803)≈400 |
|
||||
//! | Linear coefficient | 0.0000005 | 0.00000025 | Scaled: 0.0000005×(803/1600) |
|
||||
//! | Smooth weight | 0.65 | 0.65 | Identical |
|
||||
//! | Normalization cap | 0.008 | 0.008 | Identical |
|
||||
//! | Round BTC smoothing | avg(neighbors) | avg(neighbors) | Identical algorithm |
|
||||
//! | Refinement | geometric median | geometric median | Identical algorithm |
|
||||
//! | Wide window | ±25% | ±25% | Identical |
|
||||
//! | Tight window | ±5% | ±5% | Identical |
|
||||
//! | Round sats tolerance | ±0.01% | ±0.01% | Identical |
|
||||
//!
|
||||
//! ### Transaction Filters (identical criteria)
|
||||
//!
|
||||
//! | Filter | Python | Rust |
|
||||
//! |--------|--------|------|
|
||||
//! | Output count | == 2 | == 2 |
|
||||
//! | Input count | ≤ 5 | ≤ 5 |
|
||||
//! | OP_RETURN | excluded | excluded |
|
||||
//! | Witness size | < 500 bytes | < 500 bytes |
|
||||
//! | Same-day inputs | excluded | excluded |
|
||||
//! | Coinbase | excluded | excluded |
|
||||
//!
|
||||
//! ### Spike Stencil Verification
|
||||
//!
|
||||
//! Python spike_stencil indices and weights (utxo_oracle.py lines 1012-1041):
|
||||
//! ```text
|
||||
//! Index Weight USD Amount
|
||||
//! 40 0.00130 $1
|
||||
//! 141 0.00168 $5
|
||||
//! 201 0.00347 $10
|
||||
//! 202 0.00199 $10 companion
|
||||
//! 236 0.00191 $15
|
||||
//! 261 0.00334 $20
|
||||
//! 262 0.00259 $20 companion
|
||||
//! ...continues for 29 total entries...
|
||||
//! 801 0.00083 $10000
|
||||
//! ```
|
||||
//!
|
||||
//! Rust uses offset-from-center format (stencil.rs):
|
||||
//! - Python index 401 = $100 center, Rust offset 0
|
||||
//! - Python index 40 → offset 40-401 = -361... but we use -400 (4 decades at 200 bins)
|
||||
//! - The slight offset difference (~10%) is absorbed by the sliding window search
|
||||
//!
|
||||
//! ### Key Implementation Differences
|
||||
//!
|
||||
//! 1. **Bin indexing**: Python uses 1-indexed bins (bin 0 = zero sats), Rust uses 0-indexed
|
||||
//! 2. **Output filter**: Python accepts 10^-5 to 10^5 BTC, Rust uses 10K sats to 10 BTC
|
||||
//! 3. **Slide range**: Python hardcodes -141 to 201, Rust computes from era-based price bounds
|
||||
//! 4. **Era support**: Rust has era-based config for pre-2017 data, Python targets recent data
|
||||
//!
|
||||
//! These differences affect which transactions are considered but not the core price-finding
|
||||
//! algorithm. Both implementations find the same price when applied to the same filtered data.
|
||||
//!
|
||||
//! ## Performance Optimizations
|
||||
//!
|
||||
//! This Rust implementation is significantly faster than Python through these optimizations:
|
||||
//!
|
||||
//! ### 1. Pre-computed Gaussian Weights (stencil.rs)
|
||||
//! - **Python**: Computes `exp(-d²/2σ²)` for every bin at every slide position
|
||||
//! - ~350 slides × 1600 bins × 880,000 blocks = 493 billion exp() calls
|
||||
//! - **Rust**: Lookup table of 801 pre-computed weights indexed by distance
|
||||
//! - Single array lookup instead of exp() computation
|
||||
//!
|
||||
//! ### 2. Sparse Histogram Storage (compute.rs, histogram.rs)
|
||||
//! - **Python**: Full 803-element arrays per block in sliding window
|
||||
//! - **Rust**: Store only non-zero `(bin_index, count)` pairs (~40 per block)
|
||||
//! - Window memory: 25MB → 0.6MB
|
||||
//! - Add/subtract operations: O(1600) → O(40)
|
||||
//!
|
||||
//! ### 3. Sparse Stencil Iteration (stencil.rs)
|
||||
//! - **Python**: Iterates all bins, multiplies by stencil weight (most are zero)
|
||||
//! - **Rust**: Collect non-zero bins once, iterate only those for scoring
|
||||
//! - Score computation: O(1600) → O(non-zero bins)
|
||||
//!
|
||||
//! ### 4. Pre-computed Linear Sum (stencil.rs)
|
||||
//! - **Python**: Computes `Σ bins[i] * coef * i` at every slide position
|
||||
//! - **Rust**: Linear sum is constant across slides, computed once per block
|
||||
//!
|
||||
//! ### 5. HashMap Spike Lookups (stencil.rs)
|
||||
//! - **Python**: Linear search through ~500 non-zero bins for each of 29 spike positions
|
||||
//! - O(29 × 500 × 350 slides) = 5 million comparisons per block
|
||||
//! - **Rust**: HashMap for O(1) bin lookups
|
||||
//! - O(29 × 350 slides) = 10,000 lookups per block (~500x faster)
|
||||
//!
|
||||
//! ### 6. Incremental Sum Tracking (histogram.rs)
|
||||
//! - **Python**: Computes sum over 1600 bins during normalize
|
||||
//! - **Rust**: Tracks sum incrementally during add/subtract operations
|
||||
//! - Normalize uses pre-computed sum, skips zero bins
|
||||
//!
|
||||
//! ### 7. O(1) Round Sats Detection (stencil.rs)
|
||||
//! - **Python**: Iterates through 365 round values, checks ±0.01% tolerance
|
||||
//! - **Rust**: Modular arithmetic based on magnitude to detect round amounts
|
||||
//! - Per-output check: O(365) → O(1)
|
||||
//!
|
||||
//! ### 8. Optimized Refinement (stencil.rs)
|
||||
//! - **Python**: Allocates new list per iteration, uses set for convergence check
|
||||
//! - **Rust**: Reuses buffers, in-place sorting, fixed array for seen prices
|
||||
//! - Zero allocations in hot loop
|
||||
//!
|
||||
//! ### 9. Filter Order Optimization (compute.rs)
|
||||
//! - Check output_count (== 2) before input_count
|
||||
//! - ~95% of transactions eliminated without fetching input_count
|
||||
//!
|
||||
//! ### 10. Buffered Sequential Reads (compute.rs)
|
||||
//! - 16KB buffered iterators for all vector reads
|
||||
//! - Sequential access pattern maximizes cache efficiency
|
||||
//!
|
||||
//! ## Module Structure
|
||||
//!
|
||||
//! - `config.rs`: Era-based configuration (price bounds, window sizes)
|
||||
//! - `histogram.rs`: Log-scale histogram with sparse operations
|
||||
//! - `stencil.rs`: Spike/smooth stencils and price refinement
|
||||
//! - `compute.rs`: Main computation loop with sliding window
|
||||
//! - `vecs.rs`: Output vector definitions
|
||||
//! - `import.rs`: Database import handling
|
||||
|
||||
mod compute;
|
||||
mod config;
|
||||
mod histogram;
|
||||
mod import;
|
||||
mod phase_v2;
|
||||
mod stencil;
|
||||
mod vecs;
|
||||
|
||||
pub use vecs::Vecs;
|
||||
@@ -1,295 +0,0 @@
|
||||
//! Phase Oracle V2 - Round USD Template Cross-Correlation
|
||||
//!
|
||||
//! Detects Bitcoin prices by finding where round USD amounts ($1, $5, $10, etc.)
|
||||
//! cluster in the phase histogram. Uses weekly OHLC anchors to constrain search.
|
||||
//!
|
||||
//! ## Algorithm
|
||||
//!
|
||||
//! 1. Build 200-bin phase histogram: bin = frac(log10(sats)) * 200
|
||||
//! 2. Cross-correlate with weighted round USD template
|
||||
//! 3. Use weekly OHLC anchor to constrain phase search range
|
||||
//! 4. Return best-matching phase, convert to price
|
||||
//!
|
||||
//! ## Key Insight
|
||||
//!
|
||||
//! Round USD amounts create a fixed "fingerprint" pattern in phase space:
|
||||
//! - $1, $10, $100, $1000 → phase 0.00 (weight 10)
|
||||
//! - $5, $50, $500 → phase 0.70 (weight 9)
|
||||
//! - $2, $20, $200 → phase 0.30 (weight 7)
|
||||
//! - etc.
|
||||
//!
|
||||
//! The pattern shifts based on price: sats_phase = usd_phase - price_phase (mod 1)
|
||||
//! Finding the shift that best matches the template reveals the price phase.
|
||||
|
||||
use brk_types::Sats;
|
||||
|
||||
/// Number of phase bins (0.5% resolution)
|
||||
pub const PHASE_BINS_V2: usize = 200;
|
||||
|
||||
/// Round USD template: (phase, weight) pairs
|
||||
/// Phase = frac(log10(usd_cents)) for round USD values
|
||||
/// Weight reflects expected popularity (higher = more common)
|
||||
pub const ROUND_USD_TEMPLATE: [(f64, u32); 11] = [
|
||||
(0.00, 10), // $1, $10, $100, $1000 - VERY common
|
||||
(0.18, 3), // $1.50, $15, $150 - uncommon
|
||||
(0.30, 7), // $2, $20, $200 - common
|
||||
(0.40, 4), // $2.50, $25, $250 - moderate
|
||||
(0.48, 5), // $3, $30, $300 - moderate
|
||||
(0.60, 4), // $4, $40, $400 - moderate
|
||||
(0.70, 9), // $5, $50, $500 - VERY common
|
||||
(0.78, 2), // $6, $60, $600 - rare
|
||||
(0.85, 2), // $7, $70, $700 - rare
|
||||
(0.90, 2), // $8, $80, $800 - rare
|
||||
(0.95, 2), // $9, $90, $900 - rare
|
||||
];
|
||||
|
||||
/// Pre-computed template bins: (bin_index, weight)
|
||||
pub fn template_bins() -> Vec<(usize, u32)> {
|
||||
ROUND_USD_TEMPLATE
|
||||
.iter()
|
||||
.map(|&(phase, weight)| {
|
||||
let bin = ((phase * PHASE_BINS_V2 as f64) as usize) % PHASE_BINS_V2;
|
||||
(bin, weight)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Phase histogram for V2 oracle (200 bins)
|
||||
#[derive(Clone)]
|
||||
pub struct PhaseHistogramV2 {
|
||||
bins: [u32; PHASE_BINS_V2],
|
||||
total: u32,
|
||||
}
|
||||
|
||||
impl Default for PhaseHistogramV2 {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
impl PhaseHistogramV2 {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
bins: [0; PHASE_BINS_V2],
|
||||
total: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert sats value to phase bin index
|
||||
/// Filters: min 1k sats, max 100k BTC
|
||||
#[inline]
|
||||
pub fn sats_to_bin(sats: Sats) -> Option<usize> {
|
||||
if sats < Sats::_1K || sats > Sats::_100K_BTC {
|
||||
return None;
|
||||
}
|
||||
let log_sats = f64::from(sats).log10();
|
||||
let phase = log_sats.fract();
|
||||
let phase = if phase < 0.0 { phase + 1.0 } else { phase };
|
||||
Some(((phase * PHASE_BINS_V2 as f64) as usize).min(PHASE_BINS_V2 - 1))
|
||||
}
|
||||
|
||||
/// Add a sats value to the histogram
|
||||
#[inline]
|
||||
pub fn add(&mut self, sats: Sats) {
|
||||
if let Some(bin) = Self::sats_to_bin(sats) {
|
||||
self.bins[bin] = self.bins[bin].saturating_add(1);
|
||||
self.total += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Add another histogram to this one
|
||||
pub fn add_histogram(&mut self, other: &PhaseHistogramV2) {
|
||||
for (i, &count) in other.bins.iter().enumerate() {
|
||||
self.bins[i] = self.bins[i].saturating_add(count);
|
||||
}
|
||||
self.total = self.total.saturating_add(other.total);
|
||||
}
|
||||
|
||||
/// Get total count
|
||||
pub fn total(&self) -> u32 {
|
||||
self.total
|
||||
}
|
||||
|
||||
/// Get bins array
|
||||
pub fn bins(&self) -> &[u32; PHASE_BINS_V2] {
|
||||
&self.bins
|
||||
}
|
||||
|
||||
/// Clear the histogram
|
||||
pub fn clear(&mut self) {
|
||||
self.bins.fill(0);
|
||||
self.total = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the best price phase using cross-correlation with weighted template
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `histogram` - Phase histogram to analyze
|
||||
/// * `tolerance_bins` - Number of bins tolerance for template matching (e.g., 4 = ±2%)
|
||||
/// * `phase_min` - Optional minimum phase from anchor (0.0-1.0)
|
||||
/// * `phase_max` - Optional maximum phase from anchor (0.0-1.0)
|
||||
///
|
||||
/// # Returns
|
||||
/// * `(best_phase, best_correlation)` - Best matching phase (0.0-1.0) and correlation score
|
||||
pub fn find_best_phase(
|
||||
histogram: &PhaseHistogramV2,
|
||||
tolerance_bins: usize,
|
||||
phase_min: Option<f64>,
|
||||
phase_max: Option<f64>,
|
||||
) -> (f64, u64) {
|
||||
let template = template_bins();
|
||||
let bins = histogram.bins();
|
||||
|
||||
let mut best_phase = 0.0;
|
||||
let mut best_corr: u64 = 0;
|
||||
|
||||
// Determine valid shifts based on anchor constraints
|
||||
let valid_shifts: Vec<usize> = if let (Some(p_min), Some(p_max)) = (phase_min, phase_max) {
|
||||
let min_bin = ((p_min * PHASE_BINS_V2 as f64) as usize) % PHASE_BINS_V2;
|
||||
let max_bin = ((p_max * PHASE_BINS_V2 as f64) as usize) % PHASE_BINS_V2;
|
||||
|
||||
if min_bin <= max_bin {
|
||||
(min_bin..=max_bin).collect()
|
||||
} else {
|
||||
// Wraps around
|
||||
(min_bin..PHASE_BINS_V2).chain(0..=max_bin).collect()
|
||||
}
|
||||
} else {
|
||||
(0..PHASE_BINS_V2).collect()
|
||||
};
|
||||
|
||||
// Cross-correlation: slide template across histogram
|
||||
for shift in valid_shifts {
|
||||
let mut corr: u64 = 0;
|
||||
|
||||
for &(template_bin, weight) in &template {
|
||||
// Where would this template bin appear at this price phase shift?
|
||||
let expected_bin = (template_bin + PHASE_BINS_V2 - shift) % PHASE_BINS_V2;
|
||||
|
||||
// Sum bins within tolerance, weighted
|
||||
for t in 0..=(2 * tolerance_bins) {
|
||||
let check_bin = (expected_bin + PHASE_BINS_V2 - tolerance_bins + t) % PHASE_BINS_V2;
|
||||
corr += bins[check_bin] as u64 * weight as u64;
|
||||
}
|
||||
}
|
||||
|
||||
if corr > best_corr {
|
||||
best_corr = corr;
|
||||
best_phase = shift as f64 / PHASE_BINS_V2 as f64;
|
||||
}
|
||||
}
|
||||
|
||||
(best_phase, best_corr)
|
||||
}
|
||||
|
||||
/// Get phase range from price anchor (low, high)
|
||||
///
|
||||
/// Returns (phase_min, phase_max) with tolerance added
|
||||
pub fn phase_range_from_anchor(price_low: f64, price_high: f64, tolerance_pct: f64) -> (f64, f64) {
|
||||
let low_adj = price_low * (1.0 - tolerance_pct);
|
||||
let high_adj = price_high * (1.0 + tolerance_pct);
|
||||
|
||||
let phase_low = low_adj.log10().fract();
|
||||
let phase_high = high_adj.log10().fract();
|
||||
|
||||
let phase_low = if phase_low < 0.0 {
|
||||
phase_low + 1.0
|
||||
} else {
|
||||
phase_low
|
||||
};
|
||||
let phase_high = if phase_high < 0.0 {
|
||||
phase_high + 1.0
|
||||
} else {
|
||||
phase_high
|
||||
};
|
||||
|
||||
(phase_low, phase_high)
|
||||
}
|
||||
|
||||
/// Convert detected phase to price using anchor for decade selection
|
||||
///
|
||||
/// The phase alone is ambiguous ($6.3, $63, $630, $6300 all have same phase).
|
||||
/// Use the anchor price range to select the correct decade.
|
||||
pub fn phase_to_price(phase: f64, anchor_low: f64, anchor_high: f64) -> f64 {
|
||||
// Base price from phase (arbitrary decade, we'll adjust)
|
||||
// phase = frac(log10(price)), so price = 10^(decade + phase)
|
||||
// Start with decade 0 (prices 1-10)
|
||||
let base_price = 10.0_f64.powf(phase);
|
||||
|
||||
// Find which decade puts us in the anchor range
|
||||
let anchor_mid = (anchor_low + anchor_high) / 2.0;
|
||||
|
||||
// Try decades -2 to 6 ($0.01 to $1,000,000)
|
||||
let mut best_price = base_price;
|
||||
let mut best_dist = f64::MAX;
|
||||
|
||||
for decade in -2..=6 {
|
||||
let candidate = base_price * 10.0_f64.powi(decade);
|
||||
let dist = (candidate - anchor_mid).abs();
|
||||
if dist < best_dist {
|
||||
best_dist = dist;
|
||||
best_price = candidate;
|
||||
}
|
||||
}
|
||||
|
||||
// Clamp to reasonable range
|
||||
best_price.clamp(0.01, 10_000_000.0)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_template_bins() {
|
||||
let template = template_bins();
|
||||
assert_eq!(template.len(), 11);
|
||||
|
||||
// Check $1/$10/$100 maps to bin 0
|
||||
assert_eq!(template[0].0, 0);
|
||||
assert_eq!(template[0].1, 10);
|
||||
|
||||
// Check $5/$50 maps to bin 140 (0.70 * 200)
|
||||
assert_eq!(template[6].0, 140);
|
||||
assert_eq!(template[6].1, 9);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sats_to_bin() {
|
||||
// 1 BTC = 100M sats, log10(100M) = 8.0, frac = 0.0 → bin 0
|
||||
let bin = PhaseHistogramV2::sats_to_bin(Sats::_1BTC).unwrap();
|
||||
assert_eq!(bin, 0);
|
||||
|
||||
// 10M sats, log10(10M) = 7.0, frac = 0.0 → bin 0
|
||||
let bin = PhaseHistogramV2::sats_to_bin(Sats::_10M).unwrap();
|
||||
assert_eq!(bin, 0);
|
||||
|
||||
// 5M sats, log10(5M) ≈ 6.699, frac ≈ 0.699 → bin ~140
|
||||
let bin = PhaseHistogramV2::sats_to_bin(Sats::from(5_000_000u64)).unwrap();
|
||||
assert!((138..=142).contains(&bin), "5M sats bin = {}", bin);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_phase_range_from_anchor() {
|
||||
// $6000-$8000 range
|
||||
let (p_min, p_max) = phase_range_from_anchor(6000.0, 8000.0, 0.05);
|
||||
|
||||
// $6000 → log10 = 3.778, phase = 0.778
|
||||
// $8000 → log10 = 3.903, phase = 0.903
|
||||
assert!(p_min > 0.7 && p_min < 0.8, "p_min = {}", p_min);
|
||||
assert!(p_max > 0.85 && p_max < 0.95, "p_max = {}", p_max);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_phase_to_price() {
|
||||
// Phase 0.0 with anchor $50-150 should give ~$100
|
||||
let price = phase_to_price(0.0, 50.0, 150.0);
|
||||
assert!(price > 80.0 && price < 120.0, "price = {}", price);
|
||||
|
||||
// Phase 0.70 with anchor $4000-6000 should give ~$5000
|
||||
let price = phase_to_price(0.70, 4000.0, 6000.0);
|
||||
assert!(price > 4000.0 && price < 6000.0, "price = {}", price);
|
||||
}
|
||||
}
|
||||
@@ -1,512 +0,0 @@
|
||||
//! Stencil matching for UTXOracle price detection.
|
||||
//! Uses two stencils that slide across the histogram:
|
||||
//! 1. Smooth stencil: Gaussian capturing general spending distribution
|
||||
//! 2. Spike stencil: Hard-coded weights at known USD amounts
|
||||
|
||||
use brk_types::{Cents, Sats};
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
use super::histogram::{BINS_PER_DECADE, Histogram, TOTAL_BINS};
|
||||
|
||||
/// Number of parallel chunks for stencil sliding
|
||||
const PARALLEL_CHUNKS: i32 = 4;
|
||||
|
||||
/// USD spike stencil entries: (bin offset from center_bin, weight)
|
||||
/// These represent the expected frequency of round USD amounts in transactions
|
||||
/// Positions derived from Python's empirical data (utxo_oracle.py lines 1013-1041)
|
||||
/// Offset = python_stencil_index - 402 (since Python stencil starts at bin 199, center is 601)
|
||||
const SPIKE_STENCIL: &[(i32, f64)] = &[
|
||||
// $1 (single) - Python index 40
|
||||
(-362, 0.00130),
|
||||
// $5 (single) - Python index 141
|
||||
(-261, 0.00168),
|
||||
// $10 (main + companion) - Python indices 201-202
|
||||
(-201, 0.00347),
|
||||
(-200, 0.00199),
|
||||
// $15 (single) - Python index 236
|
||||
(-166, 0.00191),
|
||||
// $20 (main + companion) - Python indices 261-262
|
||||
(-141, 0.00334),
|
||||
(-140, 0.00259),
|
||||
// $30 (main + companion) - Python indices 296-297
|
||||
(-106, 0.00258),
|
||||
(-105, 0.00273),
|
||||
// $50 (main + 2 companions) - Python indices 340-342
|
||||
(-62, 0.00308),
|
||||
(-61, 0.00561),
|
||||
(-60, 0.00309),
|
||||
// $100 (main + 3 companions) - Python indices 400-403
|
||||
(-2, 0.00292),
|
||||
(-1, 0.00617),
|
||||
(0, 0.00442),
|
||||
(1, 0.00263),
|
||||
// $150 (single) - Python index 436
|
||||
(34, 0.00286),
|
||||
// $200 (main + companion) - Python indices 461-462
|
||||
(59, 0.00410),
|
||||
(60, 0.00335),
|
||||
// $300 (main + companion) - Python indices 496-497
|
||||
(94, 0.00252),
|
||||
(95, 0.00278),
|
||||
// $500 (single) - Python index 541
|
||||
(139, 0.00379),
|
||||
// $1000 (main + companion) - Python indices 601-602
|
||||
(199, 0.00369),
|
||||
(200, 0.00239),
|
||||
// $1500 (single) - Python index 636
|
||||
(234, 0.00128),
|
||||
// $2000 (main + companion) - Python indices 661-662
|
||||
(259, 0.00165),
|
||||
(260, 0.00140),
|
||||
// $5000 (single) - Python index 741
|
||||
(339, 0.00115),
|
||||
// $10000 (single) - Python index 801
|
||||
(399, 0.00083),
|
||||
];
|
||||
|
||||
/// Width of the smooth stencil in bins (Gaussian sigma)
|
||||
/// Both Python and Rust use 200 bins per decade, so sigma is the same
|
||||
const SMOOTH_WIDTH: f64 = 201.0;
|
||||
|
||||
/// Linear term coefficient for smooth stencil (per Python: 0.0000005 * x)
|
||||
/// NOT scaled - the linear term uses window position (0-802), same as Python
|
||||
const SMOOTH_LINEAR_COEF: f64 = 0.0000005;
|
||||
|
||||
/// Weight given to smooth stencil vs spike stencil
|
||||
const SMOOTH_WEIGHT: f64 = 0.65;
|
||||
const SPIKE_WEIGHT: f64 = 1.0;
|
||||
|
||||
/// Pre-computed Gaussian weights for smooth stencil
|
||||
/// Index is absolute distance from center (0 to SMOOTH_RANGE)
|
||||
/// This avoids computing exp() billions of times
|
||||
const SMOOTH_RANGE: usize = 800;
|
||||
|
||||
/// Gaussian center bin offset from spike center
|
||||
/// Python's Gaussian has mean=411 in 803-element stencil
|
||||
/// Stencil starts at bin 199, so Gaussian centers at bin 199+411=610
|
||||
/// Spike center is at bin 601, so Gaussian is offset by +9 bins
|
||||
const GAUSSIAN_CENTER_OFFSET: i32 = 9;
|
||||
|
||||
/// Lazily initialized Gaussian weight lookup table
|
||||
fn gaussian_weights() -> &'static [f64; SMOOTH_RANGE + 1] {
|
||||
use std::sync::OnceLock;
|
||||
static WEIGHTS: OnceLock<[f64; SMOOTH_RANGE + 1]> = OnceLock::new();
|
||||
WEIGHTS.get_or_init(|| {
|
||||
let mut weights = [0.0; SMOOTH_RANGE + 1];
|
||||
(0..=SMOOTH_RANGE).for_each(|d| {
|
||||
let distance = d as f64;
|
||||
weights[d] = (-distance * distance / (2.0 * SMOOTH_WIDTH * SMOOTH_WIDTH)).exp();
|
||||
});
|
||||
weights
|
||||
})
|
||||
}
|
||||
|
||||
/// Find the best price estimate by sliding stencils across the histogram
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `histogram` - The log-scale histogram of output values
|
||||
/// * `min_slide` - Minimum slide position (higher prices)
|
||||
/// * `max_slide` - Maximum slide position (lower prices)
|
||||
///
|
||||
/// # Returns
|
||||
/// The estimated price in cents, or None if no valid estimate found
|
||||
pub fn find_best_price(histogram: &Histogram, min_slide: i32, max_slide: i32) -> Option<Cents> {
|
||||
let bins = histogram.bins();
|
||||
|
||||
// Collect non-zero bins: Vec for Gaussian (needs iteration), HashMap for spike (needs lookup)
|
||||
let non_zero_bins: Vec<(usize, f64)> = bins
|
||||
.iter()
|
||||
.copied()
|
||||
.enumerate()
|
||||
.filter(|(_, v)| *v > 0.0)
|
||||
.collect();
|
||||
|
||||
// HashMap for O(1) spike lookups instead of O(n) linear search
|
||||
let bin_map: FxHashMap<usize, f64> = non_zero_bins.iter().copied().collect();
|
||||
|
||||
// Slide through possible price positions in parallel chunks
|
||||
let range_size = max_slide - min_slide + 1;
|
||||
let chunk_size = (range_size + PARALLEL_CHUNKS - 1) / PARALLEL_CHUNKS;
|
||||
|
||||
// Track total score for weighted average computation
|
||||
let (best_position, best_score, total_score) = (0..PARALLEL_CHUNKS)
|
||||
.into_par_iter()
|
||||
.map(|chunk_idx| {
|
||||
let chunk_start = min_slide + chunk_idx * chunk_size;
|
||||
let chunk_end = (chunk_start + chunk_size - 1).min(max_slide);
|
||||
|
||||
let mut local_best_score = f64::NEG_INFINITY;
|
||||
let mut local_best_pos = chunk_start;
|
||||
let mut local_total = 0.0;
|
||||
|
||||
for slide in chunk_start..=chunk_end {
|
||||
let score = compute_score_fast(&non_zero_bins, &bin_map, slide);
|
||||
local_total += score;
|
||||
if score > local_best_score {
|
||||
local_best_score = score;
|
||||
local_best_pos = slide;
|
||||
}
|
||||
}
|
||||
|
||||
(local_best_pos, local_best_score, local_total)
|
||||
})
|
||||
.reduce(
|
||||
|| (0, f64::NEG_INFINITY, 0.0),
|
||||
|a, b| {
|
||||
let total = a.2 + b.2;
|
||||
if a.1 > b.1 {
|
||||
(a.0, a.1, total)
|
||||
} else {
|
||||
(b.0, b.1, total)
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// Compute neighbor scores for sub-bin interpolation (matches Python behavior)
|
||||
let neighbor_up_score = compute_score_fast(&non_zero_bins, &bin_map, best_position + 1);
|
||||
let neighbor_down_score = compute_score_fast(&non_zero_bins, &bin_map, best_position - 1);
|
||||
|
||||
// Find best neighbor
|
||||
let (best_neighbor_offset, neighbor_score) = if neighbor_up_score > neighbor_down_score {
|
||||
(1, neighbor_up_score)
|
||||
} else {
|
||||
(-1, neighbor_down_score)
|
||||
};
|
||||
|
||||
// Weighted average between best position and best neighbor (Python lines 1144-1149)
|
||||
// This provides sub-bin precision for the rough estimate
|
||||
let avg_score = total_score / range_size as f64;
|
||||
let a1 = best_score - avg_score;
|
||||
let a2 = (neighbor_score - avg_score).abs();
|
||||
|
||||
if a1 + a2 > 0.0 {
|
||||
let w1 = a1 / (a1 + a2);
|
||||
let w2 = a2 / (a1 + a2);
|
||||
|
||||
let price_best = i64::from(position_to_cents(best_position)?);
|
||||
let price_neighbor = i64::from(position_to_cents(best_position + best_neighbor_offset)?);
|
||||
|
||||
let weighted_price = Cents::from((w1 * price_best as f64 + w2 * price_neighbor as f64) as i64);
|
||||
Some(weighted_price)
|
||||
} else {
|
||||
position_to_cents(best_position)
|
||||
}
|
||||
}
|
||||
|
||||
/// Fast score computation using sparse bin representation
|
||||
fn compute_score_fast(
|
||||
non_zero_bins: &[(usize, f64)],
|
||||
bin_map: &FxHashMap<usize, f64>,
|
||||
slide: i32,
|
||||
) -> f64 {
|
||||
let spike_score = compute_spike_score_hash(bin_map, slide);
|
||||
|
||||
// Python: smooth weight only applied for slide < 150
|
||||
if slide < 150 {
|
||||
let gaussian_score = compute_gaussian_score_sparse(non_zero_bins, slide);
|
||||
let linear_score = compute_linear_score_sparse(non_zero_bins, slide);
|
||||
// Combine Gaussian and linear parts of smooth score
|
||||
let smooth_score = 0.0015 * gaussian_score + linear_score;
|
||||
SMOOTH_WEIGHT * smooth_score + SPIKE_WEIGHT * spike_score
|
||||
} else {
|
||||
SPIKE_WEIGHT * spike_score
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the linear part of the smooth stencil (per-slide, matches Python)
|
||||
/// Python: sum(shifted_curve[n] * 0.0000005 * n) where n is window position (0-802)
|
||||
fn compute_linear_score_sparse(non_zero_bins: &[(usize, f64)], slide: i32) -> f64 {
|
||||
// Window starts at left_p001 + slide = (center_bin - 402) + slide = 199 + slide
|
||||
// Python: left_p001 = center_p001 - int((803+1)/2) = 601 - 402 = 199
|
||||
let window_start = 199 + slide;
|
||||
let window_end = window_start + 803; // 803 elements like Python's stencil
|
||||
let mut score = 0.0;
|
||||
|
||||
for &(i, bin_value) in non_zero_bins {
|
||||
let bin_idx = i as i32;
|
||||
if bin_idx >= window_start && bin_idx < window_end {
|
||||
let window_pos = bin_idx - window_start;
|
||||
score += bin_value * SMOOTH_LINEAR_COEF * window_pos as f64;
|
||||
}
|
||||
}
|
||||
|
||||
score
|
||||
}
|
||||
|
||||
/// Compute just the Gaussian part of the smooth stencil (sparse iteration)
|
||||
/// Note: Gaussian center is offset from spike center by GAUSSIAN_CENTER_OFFSET
|
||||
fn compute_gaussian_score_sparse(non_zero_bins: &[(usize, f64)], slide: i32) -> f64 {
|
||||
// Python's Gaussian is centered at bin 610 (not 601), so we add the offset
|
||||
let center = center_bin() as i32 + GAUSSIAN_CENTER_OFFSET + slide;
|
||||
let weights = gaussian_weights();
|
||||
let mut score = 0.0;
|
||||
|
||||
for &(i, bin_value) in non_zero_bins {
|
||||
let distance = (i as i32 - center).unsigned_abs() as usize;
|
||||
if distance <= SMOOTH_RANGE {
|
||||
score += bin_value * weights[distance];
|
||||
}
|
||||
}
|
||||
|
||||
score
|
||||
}
|
||||
|
||||
/// Compute spike score using HashMap for O(1) bin lookups
|
||||
/// This is O(29) per slide instead of O(29 × 500) with linear search
|
||||
#[inline]
|
||||
fn compute_spike_score_hash(bin_map: &FxHashMap<usize, f64>, slide: i32) -> f64 {
|
||||
let center = center_bin() as i32 + slide;
|
||||
let mut score = 0.0;
|
||||
|
||||
for &(offset, weight) in SPIKE_STENCIL {
|
||||
let bin_idx = (center + offset) as usize;
|
||||
if let Some(&bin_value) = bin_map.get(&bin_idx) {
|
||||
score += bin_value * weight;
|
||||
}
|
||||
}
|
||||
|
||||
score
|
||||
}
|
||||
|
||||
/// Get the center bin index (corresponds to ~0.001 BTC baseline)
|
||||
/// This is approximately where $100 would be at ~$100,000/BTC
|
||||
/// Python uses center_p001 = 601
|
||||
#[inline]
|
||||
fn center_bin() -> usize {
|
||||
// 0.001 BTC = 10^-3 BTC
|
||||
// In our range of [-6, 2], -3 is at position (3/8) * 1600 = 600
|
||||
// Python uses 601 for center_p001, so we match that
|
||||
601
|
||||
}
|
||||
|
||||
/// Convert a slide position to price in cents
|
||||
/// Position 0 = center (~$100,000 at 0.001 BTC)
|
||||
fn position_to_cents(position: i32) -> Option<Cents> {
|
||||
// Each bin represents 1/200 of a decade in log scale
|
||||
// Moving the stencil by +1 means the price is lower (outputs are smaller for same USD)
|
||||
// Moving by -1 means the price is higher
|
||||
|
||||
// At position 0, we assume the center maps to some reference price
|
||||
// The reference: 0.001 BTC = $100 means price is $100,000/BTC
|
||||
|
||||
// Offset per bin in log10 terms: 1/200 decades
|
||||
let log_offset = position as f64 / BINS_PER_DECADE as f64;
|
||||
|
||||
// Reference price: $100 at 0.001 BTC = $100,000/BTC = 10,000,000 cents/BTC
|
||||
let ref_price_cents: f64 = 10_000_000.0;
|
||||
|
||||
// Price scales inversely with position (higher position = lower price)
|
||||
let price = ref_price_cents / 10_f64.powf(log_offset);
|
||||
|
||||
if price > 0.0 && price < 1e12 {
|
||||
Some(Cents::from(price as i64))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Round USD amounts for price point collection (in cents)
|
||||
/// Matches Python: [5, 10, 15, 20, 25, 30, 40, 50, 100, 150, 200, 300, 500, 1000]
|
||||
const ROUND_USD_CENTS: [f64; 14] = [
|
||||
500.0, 1000.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000.0, 10000.0, 15000.0, 20000.0,
|
||||
30000.0, 50000.0, 100000.0,
|
||||
];
|
||||
|
||||
/// Check if a sats value is a round amount that should be filtered
|
||||
/// Matches Python's micro_remove_list with ±0.01% tolerance
|
||||
/// Uses O(1) modular arithmetic instead of iterating through all round values
|
||||
#[inline]
|
||||
pub fn is_round_sats(sats: Sats) -> bool {
|
||||
let sats = u64::from(sats);
|
||||
|
||||
// Determine the step size based on the magnitude
|
||||
let (step, min_val) = if sats < 10_000 {
|
||||
(1_000u64, 5_000u64)
|
||||
} else if sats < 100_000 {
|
||||
(1_000, 10_000)
|
||||
} else if sats < 1_000_000 {
|
||||
(10_000, 100_000)
|
||||
} else if sats < 10_000_000 {
|
||||
(100_000, 1_000_000)
|
||||
} else if sats < 100_000_000 {
|
||||
(1_000_000, 10_000_000)
|
||||
} else {
|
||||
return false; // Outside range
|
||||
};
|
||||
|
||||
if sats < min_val {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find the nearest round value
|
||||
let nearest_round = ((sats + step / 2) / step) * step;
|
||||
|
||||
// Check if within ±0.01% tolerance
|
||||
let tolerance = nearest_round / 10000;
|
||||
sats >= nearest_round.saturating_sub(tolerance) && sats <= nearest_round + tolerance
|
||||
}
|
||||
|
||||
/// Refine a rough price estimate using center-of-mass convergence
|
||||
/// Matches Python's find_central_output algorithm (geometric median)
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `by_bin` - Pre-built index of non-round sats values grouped by histogram bin (maintained incrementally by compute.rs)
|
||||
/// * `rough_price_cents` - Initial price estimate from stencil matching
|
||||
///
|
||||
/// # Returns
|
||||
/// Refined price in cents
|
||||
pub fn refine_price(by_bin: &[Vec<Sats>; TOTAL_BINS], rough_price_cents: Cents) -> Cents {
|
||||
if rough_price_cents == Cents::ZERO {
|
||||
return rough_price_cents;
|
||||
}
|
||||
|
||||
const WIDE_WINDOW: f64 = 0.25; // ±25% for initial collection (per Python)
|
||||
const TIGHT_WINDOW: f64 = 0.05; // ±5% for refinement
|
||||
|
||||
let rough_price = i64::from(rough_price_cents) as f64;
|
||||
|
||||
// For each USD amount, scan only the bins that overlap with ±25% window
|
||||
let mut price_points: Vec<f64> = Vec::with_capacity(8000);
|
||||
|
||||
(0..14).for_each(|i| {
|
||||
let usd_cents = ROUND_USD_CENTS[i];
|
||||
let expected_sats = usd_cents * 1e8 / rough_price;
|
||||
let sats_low = Sats::from((expected_sats * (1.0 - WIDE_WINDOW)) as u64);
|
||||
let sats_high = Sats::from((expected_sats * (1.0 + WIDE_WINDOW)) as u64);
|
||||
|
||||
// Convert bounds to bin range
|
||||
let bin_low = Histogram::sats_to_bin(sats_low).unwrap_or(0);
|
||||
let bin_high = Histogram::sats_to_bin(sats_high).unwrap_or(TOTAL_BINS - 1);
|
||||
|
||||
// Scan only bins in range
|
||||
(bin_low..=bin_high.min(TOTAL_BINS - 1)).for_each(|bin| {
|
||||
for &sats in &by_bin[bin] {
|
||||
if sats > sats_low && sats < sats_high {
|
||||
price_points.push(usd_cents * 1e8 / f64::from(sats));
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if price_points.is_empty() {
|
||||
return rough_price_cents;
|
||||
}
|
||||
|
||||
// Step 2: Find geometric median using iterative refinement
|
||||
let mut center_price = rough_price;
|
||||
// Use fixed array instead of HashSet (max 20 iterations)
|
||||
let mut seen_prices = [0u64; 20];
|
||||
let mut seen_count = 0usize;
|
||||
|
||||
// Reusable buffer for filtered prices (avoids allocation per iteration)
|
||||
let mut filtered: Vec<f64> = Vec::with_capacity(price_points.len());
|
||||
|
||||
for _ in 0..20 {
|
||||
let price_low = center_price * (1.0 - TIGHT_WINDOW);
|
||||
let price_high = center_price * (1.0 + TIGHT_WINDOW);
|
||||
|
||||
// Reuse filtered buffer
|
||||
filtered.clear();
|
||||
filtered.extend(
|
||||
price_points
|
||||
.iter()
|
||||
.filter(|&&p| p > price_low && p < price_high),
|
||||
);
|
||||
|
||||
if filtered.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
let new_center = find_geometric_median_inplace(&mut filtered);
|
||||
|
||||
// Check for convergence using fixed array
|
||||
let new_center_rounded = new_center as u64;
|
||||
if seen_prices[..seen_count].contains(&new_center_rounded) {
|
||||
break;
|
||||
}
|
||||
if seen_count < 20 {
|
||||
seen_prices[seen_count] = new_center_rounded;
|
||||
seen_count += 1;
|
||||
}
|
||||
|
||||
center_price = new_center;
|
||||
}
|
||||
|
||||
Cents::from(center_price as i64)
|
||||
}
|
||||
|
||||
/// Find the geometric median (point minimizing sum of absolute distances)
|
||||
/// Sorts in-place to avoid allocation. Input slice is modified!
|
||||
fn find_geometric_median_inplace(prices: &mut [f64]) -> f64 {
|
||||
if prices.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
if prices.len() == 1 {
|
||||
return prices[0];
|
||||
}
|
||||
|
||||
// Sort in-place
|
||||
prices.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
|
||||
let n = prices.len();
|
||||
|
||||
// Compute prefix sums using running total (no allocation needed)
|
||||
// We compute total first, then calculate distances on the fly
|
||||
let total: f64 = prices.iter().sum();
|
||||
|
||||
// Find point minimizing total distance
|
||||
let mut min_dist = f64::MAX;
|
||||
let mut best_price = prices[n / 2];
|
||||
let mut left_sum = 0.0;
|
||||
|
||||
(0..n).for_each(|i| {
|
||||
let x = prices[i];
|
||||
let left_count = i as f64;
|
||||
let right_count = (n - i - 1) as f64;
|
||||
let right_sum = total - left_sum - x;
|
||||
|
||||
let dist = (x * left_count - left_sum) + (right_sum - x * right_count);
|
||||
|
||||
if dist < min_dist {
|
||||
min_dist = dist;
|
||||
best_price = x;
|
||||
}
|
||||
|
||||
left_sum += x;
|
||||
});
|
||||
|
||||
best_price
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_position_to_cents() {
|
||||
// Position 0 should give reference price (~$100,000)
|
||||
let cents = position_to_cents(0).unwrap();
|
||||
let cents_val = i64::from(cents);
|
||||
assert!(cents_val > 5_000_000 && cents_val < 20_000_000);
|
||||
|
||||
// Positive position = lower price
|
||||
let lower = position_to_cents(200).unwrap();
|
||||
assert!(lower < cents);
|
||||
|
||||
// Negative position = higher price
|
||||
let higher = position_to_cents(-200).unwrap();
|
||||
assert!(higher > cents);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_spike_stencil_entries() {
|
||||
// Verify stencil has 29 entries matching Python
|
||||
assert_eq!(SPIKE_STENCIL.len(), 29);
|
||||
|
||||
// All weights should be positive
|
||||
for &(_, weight) in SPIKE_STENCIL {
|
||||
assert!(weight > 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,117 +0,0 @@
|
||||
use brk_traversable::Traversable;
|
||||
use brk_types::{
|
||||
Cents, DateIndex, Dollars, Height, OHLCCents, OHLCDollars, OracleBins, OracleBinsV2,
|
||||
PairOutputIndex, Sats, StoredU32, TxIndex,
|
||||
};
|
||||
use vecdb::{BytesVec, LazyVecFrom1, PcoVec};
|
||||
|
||||
use crate::internal::{Distribution, LazyTransformDistribution};
|
||||
|
||||
/// Vectors storing oracle-derived price data
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct Vecs {
|
||||
// ========== Layer 1: Pair identification (requires chain scan) ==========
|
||||
/// Maps PairOutputIndex to TxIndex for all 2-output transactions
|
||||
/// This is the base index for oracle candidates (~400M entries)
|
||||
pub pairoutputindex_to_txindex: PcoVec<PairOutputIndex, TxIndex>,
|
||||
|
||||
/// Maps Height to first PairOutputIndex in that block
|
||||
/// Enables efficient per-block iteration over pairs
|
||||
pub height_to_first_pairoutputindex: PcoVec<Height, PairOutputIndex>,
|
||||
|
||||
// ========== Layer 3: Output values (enables any price algorithm) ==========
|
||||
/// First output value for each pair (index 0)
|
||||
pub output0_value: PcoVec<PairOutputIndex, Sats>,
|
||||
|
||||
/// Second output value for each pair (index 1)
|
||||
pub output1_value: PcoVec<PairOutputIndex, Sats>,
|
||||
|
||||
// ========== Layer 4: Phase histograms (per block) ==========
|
||||
/// Phase histogram per block: frac(log10(sats)) binned into 100 bins
|
||||
/// ~200 bytes per block, ~175 MB total
|
||||
pub phase_histogram: BytesVec<Height, OracleBins>,
|
||||
|
||||
// ========== Layer 5: Phase Oracle prices (derived from histograms) ==========
|
||||
/// Per-block price in cents from phase histogram analysis
|
||||
/// Calibrated at block 840,000 (~$63,000)
|
||||
/// TODO: Add interpolation for sub-bin precision
|
||||
pub phase_price_cents: PcoVec<Height, Cents>,
|
||||
|
||||
/// Daily distribution (min, max, average, percentiles) from phase oracle in cents
|
||||
pub phase_daily_cents: Distribution<DateIndex, Cents>,
|
||||
|
||||
/// Daily distribution in dollars (lazy conversion from cents)
|
||||
pub phase_daily_dollars: LazyTransformDistribution<DateIndex, Dollars, Cents>,
|
||||
|
||||
// ========== UTXOracle (Python port) ==========
|
||||
/// Per-block price estimate in cents (sliding window + stencil matching)
|
||||
pub price_cents: PcoVec<Height, Cents>,
|
||||
|
||||
/// Daily OHLC derived from price_cents
|
||||
pub ohlc_cents: BytesVec<DateIndex, OHLCCents>,
|
||||
|
||||
/// Daily OHLC in dollars (lazy conversion from cents)
|
||||
pub ohlc_dollars: LazyVecFrom1<DateIndex, OHLCDollars, DateIndex, OHLCCents>,
|
||||
|
||||
/// Number of qualifying transactions per day (for confidence)
|
||||
pub tx_count: PcoVec<DateIndex, StoredU32>,
|
||||
|
||||
// ========== Daily OHLC from height close only ==========
|
||||
/// Daily OHLC computed from height close prices only
|
||||
pub close_ohlc_cents: BytesVec<DateIndex, OHLCCents>,
|
||||
|
||||
/// Daily OHLC from close in dollars (lazy conversion)
|
||||
pub close_ohlc_dollars: LazyVecFrom1<DateIndex, OHLCDollars, DateIndex, OHLCCents>,
|
||||
|
||||
// ========== Daily OHLC from height mid price (open+close)/2 ==========
|
||||
/// Daily OHLC computed from height mid prices ((open+close)/2)
|
||||
pub mid_ohlc_cents: BytesVec<DateIndex, OHLCCents>,
|
||||
|
||||
/// Daily OHLC from mid in dollars (lazy conversion)
|
||||
pub mid_ohlc_dollars: LazyVecFrom1<DateIndex, OHLCDollars, DateIndex, OHLCCents>,
|
||||
|
||||
// ========== Phase Oracle V2 (round USD template matching) ==========
|
||||
/// Per-block 200-bin phase histogram
|
||||
pub phase_v2_histogram: BytesVec<Height, OracleBinsV2>,
|
||||
|
||||
/// Per-block price in cents from phase oracle V2 (cross-correlation with round USD template)
|
||||
pub phase_v2_price_cents: PcoVec<Height, Cents>,
|
||||
|
||||
/// Per-block price in cents using direct peak finding (like V1)
|
||||
pub phase_v2_peak_price_cents: PcoVec<Height, Cents>,
|
||||
|
||||
/// Daily distribution (min, max, average, percentiles) from phase oracle V2
|
||||
pub phase_v2_daily_cents: Distribution<DateIndex, Cents>,
|
||||
|
||||
/// Daily distribution in dollars (lazy conversion from cents)
|
||||
pub phase_v2_daily_dollars: LazyTransformDistribution<DateIndex, Dollars, Cents>,
|
||||
|
||||
/// Daily distribution from peak-based prices
|
||||
pub phase_v2_peak_daily_cents: Distribution<DateIndex, Cents>,
|
||||
|
||||
/// Daily distribution in dollars (lazy conversion from cents)
|
||||
pub phase_v2_peak_daily_dollars: LazyTransformDistribution<DateIndex, Dollars, Cents>,
|
||||
|
||||
// ========== Phase Oracle V3 (BASE + uniqueVal filter) ==========
|
||||
/// Per-block 200-bin phase histogram with uniqueVal filtering
|
||||
/// Only includes outputs with unique values within their transaction
|
||||
pub phase_v3_histogram: BytesVec<Height, OracleBinsV2>,
|
||||
|
||||
/// Per-block price in cents from phase oracle V3 (cross-correlation)
|
||||
pub phase_v3_price_cents: PcoVec<Height, Cents>,
|
||||
|
||||
/// Per-block price in cents using direct peak finding (like V1)
|
||||
pub phase_v3_peak_price_cents: PcoVec<Height, Cents>,
|
||||
|
||||
/// Daily distribution from phase oracle V3
|
||||
pub phase_v3_daily_cents: Distribution<DateIndex, Cents>,
|
||||
|
||||
/// Daily distribution in dollars (lazy conversion from cents)
|
||||
pub phase_v3_daily_dollars: LazyTransformDistribution<DateIndex, Dollars, Cents>,
|
||||
|
||||
/// Daily distribution from peak-based prices
|
||||
pub phase_v3_peak_daily_cents: Distribution<DateIndex, Cents>,
|
||||
|
||||
/// Daily distribution in dollars (lazy conversion from cents)
|
||||
pub phase_v3_peak_daily_dollars: LazyTransformDistribution<DateIndex, Dollars, Cents>,
|
||||
}
|
||||
Reference in New Issue
Block a user