global: snapshot

This commit is contained in:
nym21
2026-03-02 15:28:13 +01:00
parent 4d97cec869
commit 4e7cd9ab6f
21 changed files with 595 additions and 373 deletions

View File

@@ -3,7 +3,9 @@ use std::thread;
use brk_cohort::ByAddressType;
use brk_error::Result;
use brk_indexer::Indexer;
use brk_types::{Cents, Date, Day1, Height, OutputType, Sats, StoredU64, TxIndex, TypeIndex};
use brk_types::{
Cents, Date, Day1, Height, OutputType, Sats, StoredU64, Timestamp, TxIndex, TypeIndex,
};
use rayon::prelude::*;
use rustc_hash::FxHashSet;
use tracing::{debug, info};
@@ -20,7 +22,7 @@ use crate::{
compute::write::{process_address_updates, write},
state::{BlockState, Transacted},
},
indexes, inputs, outputs, prices, transactions,
indexes, inputs, outputs, transactions,
};
use super::{
@@ -30,8 +32,8 @@ use super::{
vecs::Vecs,
},
BIP30_DUPLICATE_HEIGHT_1, BIP30_DUPLICATE_HEIGHT_2, BIP30_ORIGINAL_HEIGHT_1,
BIP30_ORIGINAL_HEIGHT_2, ComputeContext, FLUSH_INTERVAL, IndexToTxIndexBuf, TxInReaders,
TxOutReaders, VecsReaders,
BIP30_ORIGINAL_HEIGHT_2, ComputeContext, FLUSH_INTERVAL, IndexToTxIndexBuf, PriceRangeMax,
TxInReaders, TxOutReaders, VecsReaders,
};
/// Process all blocks from starting_height to last_height.
@@ -44,52 +46,45 @@ pub(crate) fn process_blocks(
outputs: &outputs::Vecs,
transactions: &transactions::Vecs,
blocks: &blocks::Vecs,
prices: &prices::Vecs,
starting_height: Height,
last_height: Height,
chain_state: &mut Vec<BlockState>,
txindex_to_height: &mut RangeMap<TxIndex, Height>,
cached_prices: &[Cents],
cached_timestamps: &[Timestamp],
cached_price_range_max: &PriceRangeMax,
exit: &Exit,
) -> Result<()> {
// Create computation context with pre-computed vectors for thread-safe access
debug!("creating ComputeContext");
let ctx = ComputeContext::new(starting_height, last_height, blocks, prices);
debug!("ComputeContext created");
let ctx = ComputeContext {
starting_height,
last_height,
height_to_timestamp: cached_timestamps,
height_to_price: cached_prices,
price_range_max: cached_price_range_max,
};
if ctx.starting_height > ctx.last_height {
return Ok(());
}
// References to vectors using correct field paths
// From indexer.vecs:
let height_to_first_txindex = &indexer.vecs.transactions.first_txindex;
let height_to_first_txoutindex = &indexer.vecs.outputs.first_txoutindex;
let height_to_first_txinindex = &indexer.vecs.inputs.first_txinindex;
// From transactions and inputs/outputs (via .height or .height.sum_cumulative.sum patterns):
let height_to_tx_count = &transactions.count.tx_count.height;
let height_to_output_count = &outputs.count.total_count.full.sum_cumulative.sum.0;
let height_to_input_count = &inputs.count.full.sum_cumulative.sum.0;
// From blocks:
let height_to_timestamp = &blocks.time.timestamp_monotonic;
let height_to_date = &blocks.time.date;
let day1_to_first_height = &indexes.day1.first_height;
let day1_to_height_count = &indexes.day1.height_count;
let txindex_to_output_count = &indexes.txindex.output_count;
let txindex_to_input_count = &indexes.txindex.input_count;
// From price - use cents for computation:
let height_to_price = &prices.price.cents.height;
let height_to_price_vec = cached_prices;
let height_to_timestamp_vec = cached_timestamps;
// Access pre-computed vectors from context for thread-safe access
let height_to_price_vec = &ctx.height_to_price;
let height_to_timestamp_vec = &ctx.height_to_timestamp;
// Range for pre-collecting height-indexed vecs
let start_usize = starting_height.to_usize();
let end_usize = last_height.to_usize() + 1;
// Pre-collect height-indexed vecs for the block range (bulk read before hot loop)
let height_to_first_txindex_vec: Vec<TxIndex> =
height_to_first_txindex.collect_range_at(start_usize, end_usize);
let height_to_first_txoutindex_vec: Vec<_> =
@@ -102,10 +97,8 @@ pub(crate) fn process_blocks(
height_to_output_count.collect_range_at(start_usize, end_usize);
let height_to_input_count_vec: Vec<_> =
height_to_input_count.collect_range_at(start_usize, end_usize);
let height_to_timestamp_collected: Vec<_> =
height_to_timestamp.collect_range_at(start_usize, end_usize);
let height_to_price_collected: Vec<_> =
height_to_price.collect_range_at(start_usize, end_usize);
let height_to_timestamp_collected = &cached_timestamps[start_usize..end_usize];
let height_to_price_collected = &cached_prices[start_usize..end_usize];
debug!("creating VecsReaders");
let mut vr = VecsReaders::new(&vecs.any_address_indexes, &vecs.addresses_data);
@@ -115,7 +108,10 @@ pub(crate) fn process_blocks(
let target_len = indexer.vecs.transactions.first_txindex.len();
let current_len = txindex_to_height.len();
if current_len < target_len {
debug!("extending txindex_to_height RangeMap from {} to {}", current_len, target_len);
debug!(
"extending txindex_to_height RangeMap from {} to {}",
current_len, target_len
);
let new_entries: Vec<TxIndex> = indexer
.vecs
.transactions
@@ -125,10 +121,16 @@ pub(crate) fn process_blocks(
txindex_to_height.push(first_txindex);
}
} else if current_len > target_len {
debug!("truncating txindex_to_height RangeMap from {} to {}", current_len, target_len);
debug!(
"truncating txindex_to_height RangeMap from {} to {}",
current_len, target_len
);
txindex_to_height.truncate(target_len);
}
debug!("txindex_to_height RangeMap ready ({} entries)", txindex_to_height.len());
debug!(
"txindex_to_height RangeMap ready ({} entries)",
txindex_to_height.len()
);
// Create reusable iterators and buffers for per-block reads
let mut txout_iters = TxOutReaders::new(indexer);
@@ -395,7 +397,7 @@ pub(crate) fn process_blocks(
&mut vecs.address_cohorts,
&mut lookup,
block_price,
&ctx.price_range_max,
ctx.price_range_max,
&mut addr_counts,
&mut empty_addr_counts,
&mut activity_counts,
@@ -412,7 +414,7 @@ pub(crate) fn process_blocks(
vecs.utxo_cohorts
.receive(transacted, height, timestamp, block_price);
vecs.utxo_cohorts
.send(height_to_sent, chain_state, &ctx.price_range_max);
.send(height_to_sent, chain_state, ctx.price_range_max);
});
// Push to height-indexed vectors
@@ -468,7 +470,7 @@ pub(crate) fn process_blocks(
height,
timestamp,
block_price,
&ctx.price_range_max,
ctx.price_range_max,
)?;
}

View File

@@ -2,150 +2,123 @@ use std::time::Instant;
use brk_types::{Cents, Height, Timestamp};
use tracing::debug;
use vecdb::{ReadableVec, VecIndex};
use crate::{blocks, prices};
use vecdb::VecIndex;
/// Sparse table for O(1) range maximum queries on prices.
/// Uses O(n log n) space (~140MB for 880k blocks).
/// Vec<Vec> per level for incremental O(new_blocks * log n) extension.
#[derive(Debug, Clone, Default)]
pub struct PriceRangeMax {
/// Flattened table: table[k * n + i] = max of 2^k elements starting at index i
/// Using flat layout for better cache locality.
table: Vec<Cents>,
/// Number of elements
levels: Vec<Vec<Cents>>,
n: usize,
}
impl PriceRangeMax {
/// Build sparse table from high prices. O(n log n) time and space.
pub(crate) fn build(prices: &[Cents]) -> Self {
let start = Instant::now();
let n = prices.len();
if n == 0 {
return Self {
table: vec![],
n: 0,
};
pub(crate) fn extend(&mut self, prices: &[Cents]) {
let new_n = prices.len();
if new_n <= self.n || new_n == 0 {
return;
}
// levels = floor(log2(n)) + 1
let levels = (usize::BITS - n.leading_zeros()) as usize;
let start = Instant::now();
let old_n = self.n;
let new_levels_count = (usize::BITS - new_n.leading_zeros()) as usize;
// Allocate flat table: levels * n elements
let mut table = vec![Cents::ZERO; levels * n];
while self.levels.len() < new_levels_count {
self.levels.push(Vec::new());
}
// Base case: level 0 = original prices
table[..n].copy_from_slice(prices);
self.levels[0].extend_from_slice(&prices[old_n..new_n]);
// Build each level from the previous
// table[k][i] = max(table[k-1][i], table[k-1][i + 2^(k-1)])
for k in 1..levels {
let prev_offset = (k - 1) * n;
let curr_offset = k * n;
for k in 1..new_levels_count {
let half = 1 << (k - 1);
let end = n.saturating_sub(1 << k) + 1;
let new_end = if new_n >= (1 << k) {
new_n + 1 - (1 << k)
} else {
0
};
// Use split_at_mut to avoid bounds checks in the loop
let (prev_level, rest) = table.split_at_mut(curr_offset);
let prev = &prev_level[prev_offset..prev_offset + n];
let curr = &mut rest[..n];
for i in 0..end {
curr[i] = prev[i].max(prev[i + half]);
let old_end = self.levels[k].len();
if new_end > old_end {
let (prev_levels, curr_levels) = self.levels.split_at_mut(k);
let prev = &prev_levels[k - 1];
let curr = &mut curr_levels[0];
curr.reserve(new_end - old_end);
for i in old_end..new_end {
curr.push(prev[i].max(prev[i + half]));
}
}
}
self.n = new_n;
let elapsed = start.elapsed();
let total_entries: usize = self.levels.iter().map(|l| l.len()).sum();
debug!(
"PriceRangeMax built: {} heights, {} levels, {:.2}MB, {:.2}ms",
n,
levels,
(levels * n * std::mem::size_of::<Cents>()) as f64 / 1_000_000.0,
"PriceRangeMax extended: {} -> {} heights ({} new), {} levels, {:.2}MB, {:.2}ms",
old_n,
new_n,
new_n - old_n,
new_levels_count,
(total_entries * std::mem::size_of::<Cents>()) as f64 / 1_000_000.0,
elapsed.as_secs_f64() * 1000.0
);
Self { table, n }
}
/// Query maximum value in range [l, r] (inclusive). O(1) time.
pub(crate) fn truncate(&mut self, new_n: usize) {
if new_n >= self.n {
return;
}
if new_n == 0 {
self.levels.clear();
self.n = 0;
return;
}
let new_levels_count = (usize::BITS - new_n.leading_zeros()) as usize;
self.levels.truncate(new_levels_count);
for k in 0..new_levels_count {
let valid = if new_n >= (1 << k) {
new_n + 1 - (1 << k)
} else {
0
};
self.levels[k].truncate(valid);
}
self.n = new_n;
}
#[inline]
pub(crate) fn range_max(&self, l: usize, r: usize) -> Cents {
debug_assert!(l <= r && r < self.n);
let len = r - l + 1;
// k = floor(log2(len))
let k = (usize::BITS - len.leading_zeros() - 1) as usize;
let half = 1 << k;
// max of [l, l + 2^k) and [r - 2^k + 1, r + 1)
let offset = k * self.n;
let level = &self.levels[k];
unsafe {
let a = *self.table.get_unchecked(offset + l);
let b = *self.table.get_unchecked(offset + r + 1 - half);
let a = *level.get_unchecked(l);
let b = *level.get_unchecked(r + 1 - half);
a.max(b)
}
}
/// Query maximum value in height range. O(1) time.
#[inline]
pub(crate) fn max_between(&self, from: Height, to: Height) -> Cents {
self.range_max(from.to_usize(), to.to_usize())
}
}
/// Context shared across block processing.
pub struct ComputeContext {
/// Starting height for this computation run
pub struct ComputeContext<'a> {
pub starting_height: Height,
/// Last height to process
pub last_height: Height,
/// Pre-computed height -> timestamp mapping
pub height_to_timestamp: Vec<Timestamp>,
/// Pre-computed height -> price mapping
pub height_to_price: Vec<Cents>,
/// Sparse table for O(1) range max queries on high prices.
/// Used for computing max price during UTXO holding periods (peak regret).
pub price_range_max: PriceRangeMax,
pub height_to_timestamp: &'a [Timestamp],
pub height_to_price: &'a [Cents],
pub price_range_max: &'a PriceRangeMax,
}
impl ComputeContext {
/// Create a new computation context.
pub(crate) fn new(
starting_height: Height,
last_height: Height,
blocks: &blocks::Vecs,
prices: &prices::Vecs,
) -> Self {
let height_to_timestamp: Vec<Timestamp> =
blocks.time.timestamp_monotonic.collect();
let height_to_price: Vec<Cents> =
prices.price.cents.height.collect();
// Build sparse table for O(1) range max queries on prices
// Used for computing peak price during UTXO holding periods (peak regret)
let price_range_max = PriceRangeMax::build(&height_to_price);
Self {
starting_height,
last_height,
height_to_timestamp,
height_to_price,
price_range_max,
}
}
/// Get price at height.
impl<'a> ComputeContext<'a> {
pub(crate) fn price_at(&self, height: Height) -> Cents {
self.height_to_price[height.to_usize()]
}
/// Get timestamp at height.
pub(crate) fn timestamp_at(&self, height: Height) -> Timestamp {
self.height_to_timestamp[height.to_usize()]
}

View File

@@ -34,16 +34,6 @@ impl<I: Default + Copy, V: Default + Copy> Default for RangeMap<I, V> {
}
impl<I: Ord + Copy + Default, V: From<usize> + Copy + Default> RangeMap<I, V> {
/// Create with pre-allocated capacity.
pub(crate) fn with_capacity(capacity: usize) -> Self {
Self {
first_indexes: Vec::with_capacity(capacity),
cache: [(I::default(), I::default(), V::default(), 0); CACHE_SIZE],
cache_len: 0,
_phantom: PhantomData,
}
}
/// Number of ranges stored.
pub(crate) fn len(&self) -> usize {
self.first_indexes.len()

View File

@@ -4,8 +4,8 @@ use brk_error::Result;
use brk_indexer::Indexer;
use brk_traversable::Traversable;
use brk_types::{
Day1, EmptyAddressData, EmptyAddressIndex, FundedAddressData, FundedAddressIndex, Height,
SupplyState, TxIndex, Version,
Cents, Day1, EmptyAddressData, EmptyAddressIndex, FundedAddressData, FundedAddressIndex,
Height, SupplyState, Timestamp, TxIndex, Version,
};
use tracing::{debug, info};
use vecdb::{
@@ -16,7 +16,10 @@ use vecdb::{
use crate::{
ComputeIndexes, blocks,
distribution::{
compute::{StartMode, determine_start_mode, process_blocks, recover_state, reset_state},
compute::{
PriceRangeMax, StartMode, determine_start_mode, process_blocks, recover_state,
reset_state,
},
state::BlockState,
},
indexes, inputs, outputs, prices, transactions,
@@ -69,6 +72,16 @@ pub struct Vecs<M: StorageMode = Rw> {
/// In-memory txindex→height reverse lookup. Kept across compute() calls.
#[traversable(skip)]
txindex_to_height: RangeMap<TxIndex, Height>,
/// Cached height→price mapping. Incrementally extended, O(new_blocks) on resume.
#[traversable(skip)]
cached_prices: Vec<Cents>,
/// Cached height→timestamp mapping. Incrementally extended, O(new_blocks) on resume.
#[traversable(skip)]
cached_timestamps: Vec<Timestamp>,
/// Cached sparse table for O(1) range-max price queries. Incrementally extended.
#[traversable(skip)]
cached_price_range_max: PriceRangeMax,
}
const SAVED_STAMPED_CHANGES: u16 = 10;
@@ -159,6 +172,10 @@ impl Vecs {
chain_state: Vec::new(),
txindex_to_height: RangeMap::default(),
cached_prices: Vec::new(),
cached_timestamps: Vec::new(),
cached_price_range_max: PriceRangeMax::default(),
db,
states_path,
};
@@ -194,6 +211,32 @@ impl Vecs {
starting_indexes: &mut ComputeIndexes,
exit: &Exit,
) -> Result<()> {
let cache_target_len = prices
.price
.cents
.height
.len()
.min(blocks.time.timestamp_monotonic.len());
let cache_current_len = self.cached_prices.len();
if cache_target_len < cache_current_len {
self.cached_prices.truncate(cache_target_len);
self.cached_timestamps.truncate(cache_target_len);
self.cached_price_range_max.truncate(cache_target_len);
} else if cache_target_len > cache_current_len {
let new_prices = prices
.price
.cents
.height
.collect_range_at(cache_current_len, cache_target_len);
let new_timestamps = blocks
.time
.timestamp_monotonic
.collect_range_at(cache_current_len, cache_target_len);
self.cached_prices.extend(new_prices);
self.cached_timestamps.extend(new_timestamps);
}
self.cached_price_range_max.extend(&self.cached_prices);
// 1. Find minimum height we have data for across stateful vecs
let current_height = Height::from(self.supply_state.len());
let min_stateful = self.min_stateful_height_len();
@@ -268,15 +311,9 @@ impl Vecs {
debug!("reusing in-memory chain_state ({} entries)", chain_state.len());
recovered_height
} else {
// Rollback or first run after restart: rebuild from supply_state
debug!("rebuilding chain_state from stored values");
let height_to_timestamp = &blocks.time.timestamp_monotonic;
let height_to_price = &prices.price.cents.height;
let end = usize::from(recovered_height);
let timestamp_data: Vec<_> = height_to_timestamp.collect_range_at(0, end);
let price_data: Vec<_> = height_to_price.collect_range_at(0, end);
debug!("building supply_state vec for {} heights", recovered_height);
let supply_state_data: Vec<_> = self.supply_state.collect_range_at(0, end);
chain_state = supply_state_data
@@ -284,8 +321,8 @@ impl Vecs {
.enumerate()
.map(|(h, supply)| BlockState {
supply,
price: price_data[h],
timestamp: timestamp_data[h],
price: self.cached_prices[h],
timestamp: self.cached_timestamps[h],
})
.collect();
debug!("chain_state rebuilt");
@@ -329,6 +366,12 @@ impl Vecs {
// 4. Process blocks
if starting_height <= last_height {
debug!("calling process_blocks");
let cached_prices = std::mem::take(&mut self.cached_prices);
let cached_timestamps = std::mem::take(&mut self.cached_timestamps);
let cached_price_range_max =
std::mem::take(&mut self.cached_price_range_max);
process_blocks(
self,
indexer,
@@ -337,13 +380,19 @@ impl Vecs {
outputs,
transactions,
blocks,
prices,
starting_height,
last_height,
&mut chain_state,
&mut txindex_to_height,
&cached_prices,
&cached_timestamps,
&cached_price_range_max,
exit,
)?;
self.cached_prices = cached_prices;
self.cached_timestamps = cached_timestamps;
self.cached_price_range_max = cached_price_range_max;
}
// Put chain_state and txindex_to_height back

View File

@@ -5,7 +5,7 @@ use vecdb::{AnyStoredVec, AnyVec, Database, EagerVec, Exit, PcoVec, ReadableVec,
use crate::{
ComputeIndexes, blocks, indexes,
internal::{ComputedFromHeightStdDevExtended, Price},
internal::{ComputedFromHeightStdDevExtended, Price, TDigest},
};
use super::super::ComputedFromHeight;
@@ -31,9 +31,12 @@ pub struct ComputedFromHeightRatioExtension<M: StorageMode = Rw> {
pub ratio_4y_sd: ComputedFromHeightStdDevExtended<M>,
pub ratio_2y_sd: ComputedFromHeightStdDevExtended<M>,
pub ratio_1y_sd: ComputedFromHeightStdDevExtended<M>,
#[traversable(skip)]
tdigest: TDigest,
}
const VERSION: Version = Version::new(3);
const VERSION: Version = Version::new(4);
impl ComputedFromHeightRatioExtension {
pub(crate) fn forced_import(
@@ -92,6 +95,7 @@ impl ComputedFromHeightRatioExtension {
ratio_pct5_price: import_price!("ratio_pct5"),
ratio_pct2_price: import_price!("ratio_pct2"),
ratio_pct1_price: import_price!("ratio_pct1"),
tdigest: TDigest::default(),
})
}
@@ -118,8 +122,6 @@ impl ComputedFromHeightRatioExtension {
exit,
)?;
// Percentiles via order-statistic Fenwick tree with coordinate compression.
// O(n log n) total vs O(n²) for the naive sorted-insert approach.
let ratio_version = ratio_source.version();
self.mut_ratio_vecs()
.try_for_each(|v| -> Result<()> {
@@ -138,53 +140,19 @@ impl ComputedFromHeightRatioExtension {
let ratio_len = ratio_source.len();
if ratio_len > start {
let all_ratios = ratio_source.collect_range_at(0, ratio_len);
// Coordinate compression: unique sorted values → integer ranks
let coords = {
let mut c = all_ratios.clone();
c.sort_unstable();
c.dedup();
c
};
let m = coords.len();
// Build Fenwick tree (BIT) from elements [0, start) in O(m)
let mut bit = vec![0u32; m + 1]; // 1-indexed
for &v in &all_ratios[..start] {
bit[coords.binary_search(&v).unwrap() + 1] += 1;
}
for i in 1..=m {
let j = i + (i & i.wrapping_neg());
if j <= m {
bit[j] += bit[i];
}
}
// Highest power of 2 <= m (for binary-lifting kth query)
let log2 = {
let mut b = 1usize;
while b <= m {
b <<= 1;
}
b >> 1
};
// Find rank of k-th smallest element (k is 1-indexed) in O(log m)
let kth = |bit: &[u32], mut k: u32| -> usize {
let mut pos = 0;
let mut b = log2;
while b > 0 {
let next = pos + b;
if next <= m && bit[next] < k {
k -= bit[next];
pos = next;
let tdigest_count = self.tdigest.count() as usize;
if tdigest_count != start {
self.tdigest.reset();
if start > 0 {
let historical = ratio_source.collect_range_at(0, start);
for &v in &historical {
self.tdigest.add(*v as f64);
}
b >>= 1;
}
pos
};
}
// Process new blocks [start, ratio_len)
let new_ratios = ratio_source.collect_range_at(start, ratio_len);
let mut pct_vecs: [&mut EagerVec<PcoVec<Height, StoredF32>>; 6] = [
&mut self.ratio_pct1.height,
&mut self.ratio_pct2.height,
@@ -194,25 +162,14 @@ impl ComputedFromHeightRatioExtension {
&mut self.ratio_pct99.height,
];
const PCTS: [f64; 6] = [0.01, 0.02, 0.05, 0.95, 0.98, 0.99];
let mut out = [0.0f64; 6];
let mut count = start;
for (offset, &ratio) in all_ratios[start..].iter().enumerate() {
count += 1;
// Insert into Fenwick tree: O(log m)
let mut i = coords.binary_search(&ratio).unwrap() + 1;
while i <= m {
bit[i] += 1;
i += i & i.wrapping_neg();
}
// Nearest-rank percentile: one kth query each
for (offset, &ratio) in new_ratios.iter().enumerate() {
self.tdigest.add(*ratio as f64);
self.tdigest.quantiles(&PCTS, &mut out);
let idx = start + offset;
let cf = count as f64;
for (vec, &pct) in pct_vecs.iter_mut().zip(PCTS.iter()) {
let k = (cf * pct).ceil().max(1.0) as u32;
let val = coords[kth(&bit, k)];
vec.truncate_push_at(idx, val)?;
for (vec, &val) in pct_vecs.iter_mut().zip(out.iter()) {
vec.truncate_push_at(idx, StoredF32::from(val as f32))?;
}
}
}

View File

@@ -10,6 +10,7 @@ mod lazy_eager_indexes;
mod lazy_value;
mod rolling;
pub(crate) mod sliding_window;
mod tdigest;
mod traits;
mod transform;
mod tx_derived;
@@ -28,6 +29,7 @@ pub(crate) use indexes::*;
pub(crate) use lazy_eager_indexes::*;
pub(crate) use lazy_value::*;
pub(crate) use rolling::*;
pub(crate) use tdigest::*;
pub(crate) use traits::*;
pub use transform::*;
pub(crate) use tx_derived::*;

View File

@@ -0,0 +1,263 @@
/// Streaming t-digest for approximate quantile estimation.
///
/// Uses the merging algorithm with scale function k₂: `q * (1 - q)`.
/// Compression parameter δ controls accuracy vs memory (default 100 → ~200 centroids max).
#[derive(Clone)]
pub(crate) struct TDigest {
centroids: Vec<Centroid>,
count: u64,
min: f64,
max: f64,
compression: f64,
}
#[derive(Clone, Copy)]
struct Centroid {
mean: f64,
weight: f64,
}
impl Default for TDigest {
fn default() -> Self {
Self::new(100.0)
}
}
impl TDigest {
pub fn new(compression: f64) -> Self {
Self {
centroids: Vec::new(),
count: 0,
min: f64::INFINITY,
max: f64::NEG_INFINITY,
compression,
}
}
pub fn count(&self) -> u64 {
self.count
}
pub fn reset(&mut self) {
self.centroids.clear();
self.count = 0;
self.min = f64::INFINITY;
self.max = f64::NEG_INFINITY;
}
pub fn add(&mut self, value: f64) {
if value.is_nan() {
return;
}
self.count += 1;
if value < self.min {
self.min = value;
}
if value > self.max {
self.max = value;
}
if self.centroids.is_empty() {
self.centroids.push(Centroid {
mean: value,
weight: 1.0,
});
return;
}
// Find nearest centroid by mean
let pos = self
.centroids
.binary_search_by(|c| c.mean.partial_cmp(&value).unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or_else(|i| i.min(self.centroids.len() - 1));
// Check neighbors for the actual nearest
let nearest = if pos > 0
&& (value - self.centroids[pos - 1].mean).abs()
< (value - self.centroids[pos].mean).abs()
{
pos - 1
} else {
pos
};
// Compute quantile of nearest centroid
let cum_weight: f64 = self.centroids[..nearest]
.iter()
.map(|c| c.weight)
.sum::<f64>()
+ self.centroids[nearest].weight / 2.0;
let q = cum_weight / self.count as f64;
let limit = (4.0 * self.compression * q * (1.0 - q)).floor().max(1.0);
if self.centroids[nearest].weight + 1.0 <= limit {
// Merge into nearest centroid
let c = &mut self.centroids[nearest];
c.mean = (c.mean * c.weight + value) / (c.weight + 1.0);
c.weight += 1.0;
} else {
// Insert new centroid at sorted position
let insert_pos = self
.centroids
.binary_search_by(|c| {
c.mean
.partial_cmp(&value)
.unwrap_or(std::cmp::Ordering::Equal)
})
.unwrap_or_else(|i| i);
self.centroids.insert(
insert_pos,
Centroid {
mean: value,
weight: 1.0,
},
);
}
// Compress if too many centroids
let max_centroids = (2.0 * self.compression) as usize;
if self.centroids.len() > max_centroids {
self.compress();
}
}
fn compress(&mut self) {
if self.centroids.len() <= 1 {
return;
}
let total: f64 = self.centroids.iter().map(|c| c.weight).sum();
let mut merged: Vec<Centroid> = Vec::with_capacity(self.centroids.len());
let mut cum = 0.0;
for c in &self.centroids {
if let Some(last) = merged.last_mut() {
let q = (cum + last.weight / 2.0) / total;
let limit = (4.0 * self.compression * q * (1.0 - q)).floor().max(1.0);
if last.weight + c.weight <= limit {
let new_weight = last.weight + c.weight;
last.mean = (last.mean * last.weight + c.mean * c.weight) / new_weight;
last.weight = new_weight;
continue;
}
cum += last.weight;
}
merged.push(*c);
}
self.centroids = merged;
}
pub fn quantile(&self, q: f64) -> f64 {
if self.centroids.is_empty() {
return 0.0;
}
if q <= 0.0 {
return self.min;
}
if q >= 1.0 {
return self.max;
}
if self.centroids.len() == 1 {
return self.centroids[0].mean;
}
let total: f64 = self.centroids.iter().map(|c| c.weight).sum();
let target = q * total;
let mut cum = 0.0;
for i in 0..self.centroids.len() {
let c = &self.centroids[i];
let mid = cum + c.weight / 2.0;
if target < mid {
// Interpolate between previous centroid (or min) and this one
if i == 0 {
// Between min and first centroid center
let first_mid = c.weight / 2.0;
if first_mid == 0.0 {
return self.min;
}
return self.min + (c.mean - self.min) * (target / first_mid);
}
let prev = &self.centroids[i - 1];
let prev_center = cum - prev.weight / 2.0;
let frac = if mid == prev_center {
0.5
} else {
(target - prev_center) / (mid - prev_center)
};
return prev.mean + (c.mean - prev.mean) * frac;
}
cum += c.weight;
}
// Between last centroid center and max
let last = self.centroids.last().unwrap();
let last_mid = total - last.weight / 2.0;
let remaining = total - last_mid;
if remaining == 0.0 {
return self.max;
}
last.mean + (self.max - last.mean) * ((target - last_mid) / remaining)
}
/// Batch quantile query. `qs` must be sorted ascending.
pub fn quantiles(&self, qs: &[f64], out: &mut [f64]) {
for (i, &q) in qs.iter().enumerate() {
out[i] = self.quantile(q);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic_quantiles() {
let mut td = TDigest::default();
for i in 1..=1000 {
td.add(i as f64);
}
assert_eq!(td.count(), 1000);
let median = td.quantile(0.5);
assert!((median - 500.0).abs() < 10.0, "median was {median}");
let p99 = td.quantile(0.99);
assert!((p99 - 990.0).abs() < 15.0, "p99 was {p99}");
let p01 = td.quantile(0.01);
assert!((p01 - 10.0).abs() < 15.0, "p01 was {p01}");
}
#[test]
fn empty_digest() {
let td = TDigest::default();
assert_eq!(td.count(), 0);
assert_eq!(td.quantile(0.5), 0.0);
}
#[test]
fn single_value() {
let mut td = TDigest::default();
td.add(42.0);
assert_eq!(td.quantile(0.0), 42.0);
assert_eq!(td.quantile(0.5), 42.0);
assert_eq!(td.quantile(1.0), 42.0);
}
#[test]
fn reset_works() {
let mut td = TDigest::default();
for i in 0..100 {
td.add(i as f64);
}
assert_eq!(td.count(), 100);
td.reset();
assert_eq!(td.count(), 0);
assert_eq!(td.quantile(0.5), 0.0);
}
}

View File

@@ -0,0 +1,11 @@
use brk_types::StoredF32;
use vecdb::UnaryTransform;
pub struct DaysToYears;
impl UnaryTransform<StoredF32, StoredF32> for DaysToYears {
#[inline(always)]
fn apply(v: StoredF32) -> StoredF32 {
StoredF32::from(*v / 365.0)
}
}

View File

@@ -41,7 +41,7 @@ mod sat_halve_to_bitcoin;
mod sat_identity;
mod sat_mask;
mod sat_to_bitcoin;
mod u16_to_years;
mod days_to_years;
mod volatility_sqrt30;
mod volatility_sqrt365;
mod volatility_sqrt7;
@@ -89,7 +89,7 @@ pub use sat_halve_to_bitcoin::*;
pub use sat_identity::*;
pub use sat_mask::*;
pub use sat_to_bitcoin::*;
pub use u16_to_years::*;
pub use days_to_years::*;
pub use volatility_sqrt7::*;
pub use volatility_sqrt30::*;
pub use volatility_sqrt365::*;

View File

@@ -1,12 +0,0 @@
use brk_types::{StoredF32, StoredU16};
use vecdb::UnaryTransform;
/// StoredU16 / 365.0 -> StoredF32 (days to years conversion)
pub struct StoredU16ToYears;
impl UnaryTransform<StoredU16, StoredF32> for StoredU16ToYears {
#[inline(always)]
fn apply(v: StoredU16) -> StoredF32 {
StoredF32::from(*v as f64 / 365.0)
}
}

View File

@@ -1,15 +1,15 @@
use brk_error::Result;
use brk_types::{Day1, StoredU16};
use brk_types::{StoredF32, Timestamp};
use vecdb::{Exit, ReadableVec, VecIndex};
use super::Vecs;
use crate::{ComputeIndexes, indexes, prices, traits::ComputeDrawdown};
use crate::{blocks, ComputeIndexes, prices, traits::ComputeDrawdown};
impl Vecs {
pub(crate) fn compute(
&mut self,
prices: &prices::Vecs,
indexes: &indexes::Vecs,
blocks: &blocks::Vecs,
starting_indexes: &ComputeIndexes,
exit: &Exit,
) -> Result<()> {
@@ -19,28 +19,28 @@ impl Vecs {
exit,
)?;
let mut ath_day: Option<Day1> = None;
let mut ath_ts: Option<Timestamp> = None;
self.days_since_price_ath.height.compute_transform3(
starting_indexes.height,
&self.price_ath.cents.height,
&prices.price.cents.height,
&indexes.height.day1,
|(i, ath, price, day, slf)| {
if ath_day.is_none() {
&blocks.time.timestamp_monotonic,
|(i, ath, price, ts, slf)| {
if ath_ts.is_none() {
let idx = i.to_usize();
ath_day = Some(if idx > 0 {
let prev_days_since = slf.collect_one_at(idx - 1).unwrap();
Day1::from(day.to_usize().saturating_sub(usize::from(prev_days_since)))
ath_ts = Some(if idx > 0 {
let prev_days: StoredF32 = slf.collect_one_at(idx - 1).unwrap();
Timestamp::from((*ts as f64 - *prev_days as f64 * 86400.0) as u32)
} else {
day
ts
});
}
if price == ath {
ath_day = Some(day);
(i, StoredU16::default())
ath_ts = Some(ts);
(i, StoredF32::default())
} else {
let days_since = (day.to_usize() - ath_day.unwrap().to_usize()) as u16;
(i, StoredU16::from(days_since))
let days = ts.difference_in_days_between_float(ath_ts.unwrap());
(i, StoredF32::from(days as f32))
}
},
exit,
@@ -56,7 +56,7 @@ impl Vecs {
prev.replace(if i > 0 {
slf.collect_one_at(i - 1).unwrap()
} else {
StoredU16::ZERO
StoredF32::default()
});
}
let max = prev.unwrap().max(days);

View File

@@ -5,45 +5,42 @@ use vecdb::Database;
use super::Vecs;
use crate::{
indexes,
internal::{
ComputedFromHeight, LazyHeightDerived,
Price, StoredU16ToYears,
},
internal::{ComputedFromHeight, DaysToYears, LazyHeightDerived, Price},
};
const VERSION: Version = Version::ONE;
impl Vecs {
pub(crate) fn forced_import(
db: &Database,
version: Version,
indexes: &indexes::Vecs,
) -> Result<Self> {
let price_ath = Price::forced_import(db, "price_ath", version, indexes)?;
let v = version + VERSION;
let max_days_between_price_aths = ComputedFromHeight::forced_import(
db,
"max_days_between_price_aths",
version,
indexes,
)?;
let price_ath = Price::forced_import(db, "price_ath", v, indexes)?;
let max_days_between_price_aths =
ComputedFromHeight::forced_import(db, "max_days_between_price_aths", v, indexes)?;
let max_years_between_price_aths =
LazyHeightDerived::from_computed::<StoredU16ToYears>(
LazyHeightDerived::from_computed::<DaysToYears>(
"max_years_between_price_aths",
version,
v,
&max_days_between_price_aths,
);
let days_since_price_ath =
ComputedFromHeight::forced_import(db, "days_since_price_ath", version, indexes)?;
ComputedFromHeight::forced_import(db, "days_since_price_ath", v, indexes)?;
let years_since_price_ath = LazyHeightDerived::from_computed::<StoredU16ToYears>(
let years_since_price_ath = LazyHeightDerived::from_computed::<DaysToYears>(
"years_since_price_ath",
version,
v,
&days_since_price_ath,
);
let price_drawdown =
ComputedFromHeight::forced_import(db, "price_drawdown", version, indexes)?;
ComputedFromHeight::forced_import(db, "price_drawdown", v, indexes)?;
Ok(Self {
price_ath,

View File

@@ -1,16 +1,15 @@
use brk_traversable::Traversable;
use brk_types::{Cents, StoredF32, StoredU16};
use brk_types::{Cents, StoredF32};
use vecdb::{Rw, StorageMode};
use crate::internal::{ComputedFromHeight, LazyHeightDerived, Price};
/// All-time high related metrics
#[derive(Traversable)]
pub struct Vecs<M: StorageMode = Rw> {
pub price_ath: Price<ComputedFromHeight<Cents, M>>,
pub price_drawdown: ComputedFromHeight<StoredF32, M>,
pub days_since_price_ath: ComputedFromHeight<StoredU16, M>,
pub years_since_price_ath: LazyHeightDerived<StoredF32, StoredU16>,
pub max_days_between_price_aths: ComputedFromHeight<StoredU16, M>,
pub max_years_between_price_aths: LazyHeightDerived<StoredF32, StoredU16>,
pub days_since_price_ath: ComputedFromHeight<StoredF32, M>,
pub years_since_price_ath: LazyHeightDerived<StoredF32, StoredF32>,
pub max_days_between_price_aths: ComputedFromHeight<StoredF32, M>,
pub max_years_between_price_aths: LazyHeightDerived<StoredF32, StoredF32>,
}

View File

@@ -18,8 +18,7 @@ impl Vecs {
starting_indexes: &ComputeIndexes,
exit: &Exit,
) -> Result<()> {
// ATH metrics (independent)
self.ath.compute(prices, indexes, starting_indexes, exit)?;
self.ath.compute(prices, blocks, starting_indexes, exit)?;
// Lookback metrics (independent)
self.lookback
@@ -46,7 +45,6 @@ impl Vecs {
.compute(indexes, prices, blocks, &self.lookback, starting_indexes, exit)?;
self.indicators.compute(
indexes,
&mining.rewards,
&self.returns,
&self.range,

View File

@@ -4,10 +4,7 @@ use vecdb::{AnyVec, Exit, ReadableOptionVec, ReadableVec, VecIndex};
use super::Vecs;
use crate::{
ComputeIndexes, blocks, indexes,
internal::{ComputedFromHeight, PercentageDiffCents},
market::lookback,
prices,
ComputeIndexes, blocks, indexes, internal::PercentageDiffCents, market::lookback, prices,
};
const DCA_AMOUNT: Dollars = Dollars::mint(100.0);
@@ -25,9 +22,7 @@ impl Vecs {
let h2d = &indexes.height.day1;
let close = &prices.split.close.usd.day1;
let first_price_di = Day1::try_from(Date::new(2010, 7, 12))
.unwrap()
.to_usize();
let first_price_di = Day1::try_from(Date::new(2010, 7, 12)).unwrap().to_usize();
// Compute per-height DCA sats contribution once (reused by all periods).
// Value = sats_from_dca(close_price) on day-boundary blocks, Sats::ZERO otherwise.
@@ -42,7 +37,10 @@ impl Vecs {
if same_day {
(h, Sats::ZERO)
} else {
let s = close.collect_one_flat(di).map(sats_from_dca).unwrap_or(Sats::ZERO);
let s = close
.collect_one_flat(di)
.map(sats_from_dca)
.unwrap_or(Sats::ZERO);
(h, s)
}
},
@@ -68,7 +66,10 @@ impl Vecs {
.zip_mut_with_days(&self.period_stack)
{
let days = days as usize;
let stack_data = stack.sats.height.collect_range_at(sh, stack.sats.height.len());
let stack_data = stack
.sats
.height
.collect_range_at(sh, stack.sats.height.len());
average_price.cents.height.compute_transform(
starting_indexes.height,
h2d,
@@ -76,9 +77,7 @@ impl Vecs {
let di_usize = di.to_usize();
let stack_sats = stack_data[h.to_usize() - sh];
let avg = if di_usize > first_price_di {
let num_days = days
.min(di_usize + 1)
.min(di_usize + 1 - first_price_di);
let num_days = days.min(di_usize + 1).min(di_usize + 1 - first_price_di);
Cents::from(DCA_AMOUNT * num_days / Bitcoin::from(stack_sats))
} else {
Cents::ZERO
@@ -123,7 +122,10 @@ impl Vecs {
self.period_lump_sum_stack.zip_mut_with_days(&lookback_dca)
{
let total_invested = DCA_AMOUNT * days as usize;
let lookback_data = lookback_price.cents.height.collect_range_at(sh, lookback_price.cents.height.len());
let lookback_data = lookback_price
.cents
.height
.collect_range_at(sh, lookback_price.cents.height.len());
stack.sats.height.compute_transform(
starting_indexes.height,
h2d,
@@ -193,7 +195,10 @@ impl Vecs {
} else {
Sats::ZERO
};
let s = close.collect_one_flat(di).map(sats_from_dca).unwrap_or(Sats::ZERO);
let s = close
.collect_one_flat(di)
.map(sats_from_dca)
.unwrap_or(Sats::ZERO);
prev + s
};
prev_value = result;
@@ -212,7 +217,10 @@ impl Vecs {
.zip(start_days)
{
let from_usize = from.to_usize();
let stack_data = stack.sats.height.collect_range_at(sh, stack.sats.height.len());
let stack_data = stack
.sats
.height
.collect_range_at(sh, stack.sats.height.len());
average_price.cents.height.compute_transform(
starting_indexes.height,
h2d,

View File

@@ -1,10 +1,10 @@
use brk_error::Result;
use brk_types::{Day1, Dollars, StoredF32};
use vecdb::{Exit, ReadableVec};
use brk_types::{Dollars, StoredF32};
use vecdb::Exit;
use super::{super::range, Vecs};
use crate::{
ComputeIndexes, blocks, distribution, indexes,
ComputeIndexes, blocks, distribution,
internal::Ratio32,
mining, prices, transactions,
};
@@ -23,7 +23,6 @@ impl Vecs {
#[allow(clippy::too_many_arguments)]
pub(crate) fn compute(
&mut self,
indexes: &indexes::Vecs,
rewards: &mining::RewardsVecs,
returns: &super::super::returns::Vecs,
range: &range::Vecs,
@@ -106,14 +105,10 @@ impl Vecs {
)?;
}
// Gini (daily, expanded to Height)
let h2d: Vec<Day1> = indexes.height.day1.collect();
let total_heights = h2d.len();
// Gini (per height)
super::gini::compute(
&mut self.gini,
distribution,
&h2d,
total_heights,
starting_indexes,
exit,
)?;

View File

@@ -1,14 +1,12 @@
use brk_error::Result;
use brk_types::{Day1, StoredF32, Version};
use vecdb::{AnyStoredVec, AnyVec, Exit, ReadableOptionVec, VecIndex, WritableVec};
use brk_types::{Sats, StoredF32, StoredU64, Version};
use vecdb::{AnyStoredVec, AnyVec, Exit, ReadableVec, VecIndex, WritableVec};
use crate::{ComputeIndexes, distribution, internal::ComputedFromHeight};
pub(super) fn compute(
gini: &mut ComputedFromHeight<StoredF32>,
distribution: &distribution::Vecs,
h2d: &[Day1],
total_heights: usize,
starting_indexes: &ComputeIndexes,
exit: &Exit,
) -> Result<()> {
@@ -16,11 +14,11 @@ pub(super) fn compute(
let supply_vecs: Vec<&_> = amount_range
.iter()
.map(|c| &c.metrics.supply.total.sats.day1)
.map(|c| &c.metrics.supply.total.sats.height)
.collect();
let count_vecs: Vec<&_> = amount_range
.iter()
.map(|c| &c.metrics.outputs.utxo_count.day1)
.map(|c| &c.metrics.outputs.utxo_count.height)
.collect();
if supply_vecs.is_empty() || supply_vecs.len() != count_vecs.len() {
@@ -39,49 +37,40 @@ pub(super) fn compute(
gini.height
.truncate_if_needed_at(gini.height.len().min(starting_indexes.height.to_usize()))?;
let start_height = gini.height.len();
if start_height >= total_heights {
return Ok(());
}
let num_days = supply_vecs
let total_heights = supply_vecs
.iter()
.map(|v| v.len())
.min()
.unwrap_or(0)
.min(count_vecs.iter().map(|v| v.len()).min().unwrap_or(0));
// Only compute gini for new days (each day is independent)
let start_day = if start_height > 0 {
h2d[start_height].to_usize()
} else {
0
};
let mut gini_new: Vec<f32> = Vec::with_capacity(num_days.saturating_sub(start_day));
let mut buckets: Vec<(u64, u64)> = Vec::with_capacity(supply_vecs.len());
for di in start_day..num_days {
buckets.clear();
let day = Day1::from(di);
for (sv, cv) in supply_vecs.iter().zip(count_vecs.iter()) {
let supply: u64 = sv.collect_one_flat(day).unwrap_or_default().into();
let count: u64 = cv.collect_one_flat(day).unwrap_or_default().into();
buckets.push((count, supply));
}
gini_new.push(gini_from_lorenz(&buckets));
let start_height = gini.height.len();
if start_height >= total_heights {
return Ok(());
}
// Expand to Height
(start_height..total_heights).for_each(|h| {
let di = h2d[h].to_usize();
let offset = di.saturating_sub(start_day);
let val = if offset < gini_new.len() {
StoredF32::from(gini_new[offset])
} else {
StoredF32::NAN
};
gini.height.push(val);
});
// Batch-collect all cohort data for the range [start_height, total_heights)
let n_cohorts = supply_vecs.len();
let supply_data: Vec<Vec<Sats>> = supply_vecs
.iter()
.map(|v| v.collect_range_at(start_height, total_heights))
.collect();
let count_data: Vec<Vec<StoredU64>> = count_vecs
.iter()
.map(|v| v.collect_range_at(start_height, total_heights))
.collect();
let mut buckets: Vec<(u64, u64)> = Vec::with_capacity(n_cohorts);
for offset in 0..total_heights - start_height {
buckets.clear();
for c in 0..n_cohorts {
let supply: u64 = supply_data[c][offset].into();
let count: u64 = count_data[c][offset].into();
buckets.push((count, supply));
}
gini.height
.push(StoredF32::from(gini_from_lorenz(&buckets)));
}
{
let _lock = exit.lock();

View File

@@ -4,6 +4,7 @@ use vecdb::Exit;
use super::MacdChain;
use crate::{ComputeIndexes, blocks, prices};
#[allow(clippy::too_many_arguments)]
pub(super) fn compute(
chain: &mut MacdChain,
blocks: &blocks::Vecs,
@@ -19,19 +20,15 @@ pub(super) fn compute(
let ws_slow = blocks.count.start_vec(slow_days);
let ws_signal = blocks.count.start_vec(signal_days);
chain.ema_fast.height.compute_rolling_ema(
starting_indexes.height,
ws_fast,
close,
exit,
)?;
chain
.ema_fast
.height
.compute_rolling_ema(starting_indexes.height, ws_fast, close, exit)?;
chain.ema_slow.height.compute_rolling_ema(
starting_indexes.height,
ws_slow,
close,
exit,
)?;
chain
.ema_slow
.height
.compute_rolling_ema(starting_indexes.height, ws_slow, close, exit)?;
// MACD line = ema_fast - ema_slow
chain.line.height.compute_subtract(