mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-05-31 18:23:39 -07:00
global: snapshot
This commit is contained in:
@@ -5,7 +5,7 @@ use vecdb::{AnyStoredVec, AnyVec, Database, EagerVec, Exit, PcoVec, ReadableVec,
|
||||
|
||||
use crate::{
|
||||
ComputeIndexes, blocks, indexes,
|
||||
internal::{ComputedFromHeightStdDevExtended, Price},
|
||||
internal::{ComputedFromHeightStdDevExtended, Price, TDigest},
|
||||
};
|
||||
|
||||
use super::super::ComputedFromHeight;
|
||||
@@ -31,9 +31,12 @@ pub struct ComputedFromHeightRatioExtension<M: StorageMode = Rw> {
|
||||
pub ratio_4y_sd: ComputedFromHeightStdDevExtended<M>,
|
||||
pub ratio_2y_sd: ComputedFromHeightStdDevExtended<M>,
|
||||
pub ratio_1y_sd: ComputedFromHeightStdDevExtended<M>,
|
||||
|
||||
#[traversable(skip)]
|
||||
tdigest: TDigest,
|
||||
}
|
||||
|
||||
const VERSION: Version = Version::new(3);
|
||||
const VERSION: Version = Version::new(4);
|
||||
|
||||
impl ComputedFromHeightRatioExtension {
|
||||
pub(crate) fn forced_import(
|
||||
@@ -92,6 +95,7 @@ impl ComputedFromHeightRatioExtension {
|
||||
ratio_pct5_price: import_price!("ratio_pct5"),
|
||||
ratio_pct2_price: import_price!("ratio_pct2"),
|
||||
ratio_pct1_price: import_price!("ratio_pct1"),
|
||||
tdigest: TDigest::default(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -118,8 +122,6 @@ impl ComputedFromHeightRatioExtension {
|
||||
exit,
|
||||
)?;
|
||||
|
||||
// Percentiles via order-statistic Fenwick tree with coordinate compression.
|
||||
// O(n log n) total vs O(n²) for the naive sorted-insert approach.
|
||||
let ratio_version = ratio_source.version();
|
||||
self.mut_ratio_vecs()
|
||||
.try_for_each(|v| -> Result<()> {
|
||||
@@ -138,53 +140,19 @@ impl ComputedFromHeightRatioExtension {
|
||||
let ratio_len = ratio_source.len();
|
||||
|
||||
if ratio_len > start {
|
||||
let all_ratios = ratio_source.collect_range_at(0, ratio_len);
|
||||
|
||||
// Coordinate compression: unique sorted values → integer ranks
|
||||
let coords = {
|
||||
let mut c = all_ratios.clone();
|
||||
c.sort_unstable();
|
||||
c.dedup();
|
||||
c
|
||||
};
|
||||
let m = coords.len();
|
||||
|
||||
// Build Fenwick tree (BIT) from elements [0, start) in O(m)
|
||||
let mut bit = vec![0u32; m + 1]; // 1-indexed
|
||||
for &v in &all_ratios[..start] {
|
||||
bit[coords.binary_search(&v).unwrap() + 1] += 1;
|
||||
}
|
||||
for i in 1..=m {
|
||||
let j = i + (i & i.wrapping_neg());
|
||||
if j <= m {
|
||||
bit[j] += bit[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Highest power of 2 <= m (for binary-lifting kth query)
|
||||
let log2 = {
|
||||
let mut b = 1usize;
|
||||
while b <= m {
|
||||
b <<= 1;
|
||||
}
|
||||
b >> 1
|
||||
};
|
||||
|
||||
// Find rank of k-th smallest element (k is 1-indexed) in O(log m)
|
||||
let kth = |bit: &[u32], mut k: u32| -> usize {
|
||||
let mut pos = 0;
|
||||
let mut b = log2;
|
||||
while b > 0 {
|
||||
let next = pos + b;
|
||||
if next <= m && bit[next] < k {
|
||||
k -= bit[next];
|
||||
pos = next;
|
||||
let tdigest_count = self.tdigest.count() as usize;
|
||||
if tdigest_count != start {
|
||||
self.tdigest.reset();
|
||||
if start > 0 {
|
||||
let historical = ratio_source.collect_range_at(0, start);
|
||||
for &v in &historical {
|
||||
self.tdigest.add(*v as f64);
|
||||
}
|
||||
b >>= 1;
|
||||
}
|
||||
pos
|
||||
};
|
||||
}
|
||||
|
||||
// Process new blocks [start, ratio_len)
|
||||
let new_ratios = ratio_source.collect_range_at(start, ratio_len);
|
||||
let mut pct_vecs: [&mut EagerVec<PcoVec<Height, StoredF32>>; 6] = [
|
||||
&mut self.ratio_pct1.height,
|
||||
&mut self.ratio_pct2.height,
|
||||
@@ -194,25 +162,14 @@ impl ComputedFromHeightRatioExtension {
|
||||
&mut self.ratio_pct99.height,
|
||||
];
|
||||
const PCTS: [f64; 6] = [0.01, 0.02, 0.05, 0.95, 0.98, 0.99];
|
||||
let mut out = [0.0f64; 6];
|
||||
|
||||
let mut count = start;
|
||||
for (offset, &ratio) in all_ratios[start..].iter().enumerate() {
|
||||
count += 1;
|
||||
|
||||
// Insert into Fenwick tree: O(log m)
|
||||
let mut i = coords.binary_search(&ratio).unwrap() + 1;
|
||||
while i <= m {
|
||||
bit[i] += 1;
|
||||
i += i & i.wrapping_neg();
|
||||
}
|
||||
|
||||
// Nearest-rank percentile: one kth query each
|
||||
for (offset, &ratio) in new_ratios.iter().enumerate() {
|
||||
self.tdigest.add(*ratio as f64);
|
||||
self.tdigest.quantiles(&PCTS, &mut out);
|
||||
let idx = start + offset;
|
||||
let cf = count as f64;
|
||||
for (vec, &pct) in pct_vecs.iter_mut().zip(PCTS.iter()) {
|
||||
let k = (cf * pct).ceil().max(1.0) as u32;
|
||||
let val = coords[kth(&bit, k)];
|
||||
vec.truncate_push_at(idx, val)?;
|
||||
for (vec, &val) in pct_vecs.iter_mut().zip(out.iter()) {
|
||||
vec.truncate_push_at(idx, StoredF32::from(val as f32))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ mod lazy_eager_indexes;
|
||||
mod lazy_value;
|
||||
mod rolling;
|
||||
pub(crate) mod sliding_window;
|
||||
mod tdigest;
|
||||
mod traits;
|
||||
mod transform;
|
||||
mod tx_derived;
|
||||
@@ -28,6 +29,7 @@ pub(crate) use indexes::*;
|
||||
pub(crate) use lazy_eager_indexes::*;
|
||||
pub(crate) use lazy_value::*;
|
||||
pub(crate) use rolling::*;
|
||||
pub(crate) use tdigest::*;
|
||||
pub(crate) use traits::*;
|
||||
pub use transform::*;
|
||||
pub(crate) use tx_derived::*;
|
||||
|
||||
@@ -0,0 +1,263 @@
|
||||
/// Streaming t-digest for approximate quantile estimation.
|
||||
///
|
||||
/// Uses the merging algorithm with scale function k₂: `q * (1 - q)`.
|
||||
/// Compression parameter δ controls accuracy vs memory (default 100 → ~200 centroids max).
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct TDigest {
|
||||
centroids: Vec<Centroid>,
|
||||
count: u64,
|
||||
min: f64,
|
||||
max: f64,
|
||||
compression: f64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct Centroid {
|
||||
mean: f64,
|
||||
weight: f64,
|
||||
}
|
||||
|
||||
impl Default for TDigest {
|
||||
fn default() -> Self {
|
||||
Self::new(100.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl TDigest {
|
||||
pub fn new(compression: f64) -> Self {
|
||||
Self {
|
||||
centroids: Vec::new(),
|
||||
count: 0,
|
||||
min: f64::INFINITY,
|
||||
max: f64::NEG_INFINITY,
|
||||
compression,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn count(&self) -> u64 {
|
||||
self.count
|
||||
}
|
||||
|
||||
pub fn reset(&mut self) {
|
||||
self.centroids.clear();
|
||||
self.count = 0;
|
||||
self.min = f64::INFINITY;
|
||||
self.max = f64::NEG_INFINITY;
|
||||
}
|
||||
|
||||
pub fn add(&mut self, value: f64) {
|
||||
if value.is_nan() {
|
||||
return;
|
||||
}
|
||||
|
||||
self.count += 1;
|
||||
if value < self.min {
|
||||
self.min = value;
|
||||
}
|
||||
if value > self.max {
|
||||
self.max = value;
|
||||
}
|
||||
|
||||
if self.centroids.is_empty() {
|
||||
self.centroids.push(Centroid {
|
||||
mean: value,
|
||||
weight: 1.0,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Find nearest centroid by mean
|
||||
let pos = self
|
||||
.centroids
|
||||
.binary_search_by(|c| c.mean.partial_cmp(&value).unwrap_or(std::cmp::Ordering::Equal))
|
||||
.unwrap_or_else(|i| i.min(self.centroids.len() - 1));
|
||||
|
||||
// Check neighbors for the actual nearest
|
||||
let nearest = if pos > 0
|
||||
&& (value - self.centroids[pos - 1].mean).abs()
|
||||
< (value - self.centroids[pos].mean).abs()
|
||||
{
|
||||
pos - 1
|
||||
} else {
|
||||
pos
|
||||
};
|
||||
|
||||
// Compute quantile of nearest centroid
|
||||
let cum_weight: f64 = self.centroids[..nearest]
|
||||
.iter()
|
||||
.map(|c| c.weight)
|
||||
.sum::<f64>()
|
||||
+ self.centroids[nearest].weight / 2.0;
|
||||
let q = cum_weight / self.count as f64;
|
||||
let limit = (4.0 * self.compression * q * (1.0 - q)).floor().max(1.0);
|
||||
|
||||
if self.centroids[nearest].weight + 1.0 <= limit {
|
||||
// Merge into nearest centroid
|
||||
let c = &mut self.centroids[nearest];
|
||||
c.mean = (c.mean * c.weight + value) / (c.weight + 1.0);
|
||||
c.weight += 1.0;
|
||||
} else {
|
||||
// Insert new centroid at sorted position
|
||||
let insert_pos = self
|
||||
.centroids
|
||||
.binary_search_by(|c| {
|
||||
c.mean
|
||||
.partial_cmp(&value)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
})
|
||||
.unwrap_or_else(|i| i);
|
||||
self.centroids.insert(
|
||||
insert_pos,
|
||||
Centroid {
|
||||
mean: value,
|
||||
weight: 1.0,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// Compress if too many centroids
|
||||
let max_centroids = (2.0 * self.compression) as usize;
|
||||
if self.centroids.len() > max_centroids {
|
||||
self.compress();
|
||||
}
|
||||
}
|
||||
|
||||
fn compress(&mut self) {
|
||||
if self.centroids.len() <= 1 {
|
||||
return;
|
||||
}
|
||||
|
||||
let total: f64 = self.centroids.iter().map(|c| c.weight).sum();
|
||||
let mut merged: Vec<Centroid> = Vec::with_capacity(self.centroids.len());
|
||||
let mut cum = 0.0;
|
||||
|
||||
for c in &self.centroids {
|
||||
if let Some(last) = merged.last_mut() {
|
||||
let q = (cum + last.weight / 2.0) / total;
|
||||
let limit = (4.0 * self.compression * q * (1.0 - q)).floor().max(1.0);
|
||||
if last.weight + c.weight <= limit {
|
||||
let new_weight = last.weight + c.weight;
|
||||
last.mean = (last.mean * last.weight + c.mean * c.weight) / new_weight;
|
||||
last.weight = new_weight;
|
||||
continue;
|
||||
}
|
||||
cum += last.weight;
|
||||
}
|
||||
merged.push(*c);
|
||||
}
|
||||
self.centroids = merged;
|
||||
}
|
||||
|
||||
pub fn quantile(&self, q: f64) -> f64 {
|
||||
if self.centroids.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
if q <= 0.0 {
|
||||
return self.min;
|
||||
}
|
||||
if q >= 1.0 {
|
||||
return self.max;
|
||||
}
|
||||
if self.centroids.len() == 1 {
|
||||
return self.centroids[0].mean;
|
||||
}
|
||||
|
||||
let total: f64 = self.centroids.iter().map(|c| c.weight).sum();
|
||||
let target = q * total;
|
||||
let mut cum = 0.0;
|
||||
|
||||
for i in 0..self.centroids.len() {
|
||||
let c = &self.centroids[i];
|
||||
let mid = cum + c.weight / 2.0;
|
||||
|
||||
if target < mid {
|
||||
// Interpolate between previous centroid (or min) and this one
|
||||
if i == 0 {
|
||||
// Between min and first centroid center
|
||||
let first_mid = c.weight / 2.0;
|
||||
if first_mid == 0.0 {
|
||||
return self.min;
|
||||
}
|
||||
return self.min + (c.mean - self.min) * (target / first_mid);
|
||||
}
|
||||
let prev = &self.centroids[i - 1];
|
||||
let prev_center = cum - prev.weight / 2.0;
|
||||
let frac = if mid == prev_center {
|
||||
0.5
|
||||
} else {
|
||||
(target - prev_center) / (mid - prev_center)
|
||||
};
|
||||
return prev.mean + (c.mean - prev.mean) * frac;
|
||||
}
|
||||
|
||||
cum += c.weight;
|
||||
}
|
||||
|
||||
// Between last centroid center and max
|
||||
let last = self.centroids.last().unwrap();
|
||||
let last_mid = total - last.weight / 2.0;
|
||||
let remaining = total - last_mid;
|
||||
if remaining == 0.0 {
|
||||
return self.max;
|
||||
}
|
||||
last.mean + (self.max - last.mean) * ((target - last_mid) / remaining)
|
||||
}
|
||||
|
||||
/// Batch quantile query. `qs` must be sorted ascending.
|
||||
pub fn quantiles(&self, qs: &[f64], out: &mut [f64]) {
|
||||
for (i, &q) in qs.iter().enumerate() {
|
||||
out[i] = self.quantile(q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn basic_quantiles() {
|
||||
let mut td = TDigest::default();
|
||||
for i in 1..=1000 {
|
||||
td.add(i as f64);
|
||||
}
|
||||
assert_eq!(td.count(), 1000);
|
||||
|
||||
let median = td.quantile(0.5);
|
||||
assert!((median - 500.0).abs() < 10.0, "median was {median}");
|
||||
|
||||
let p99 = td.quantile(0.99);
|
||||
assert!((p99 - 990.0).abs() < 15.0, "p99 was {p99}");
|
||||
|
||||
let p01 = td.quantile(0.01);
|
||||
assert!((p01 - 10.0).abs() < 15.0, "p01 was {p01}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_digest() {
|
||||
let td = TDigest::default();
|
||||
assert_eq!(td.count(), 0);
|
||||
assert_eq!(td.quantile(0.5), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_value() {
|
||||
let mut td = TDigest::default();
|
||||
td.add(42.0);
|
||||
assert_eq!(td.quantile(0.0), 42.0);
|
||||
assert_eq!(td.quantile(0.5), 42.0);
|
||||
assert_eq!(td.quantile(1.0), 42.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reset_works() {
|
||||
let mut td = TDigest::default();
|
||||
for i in 0..100 {
|
||||
td.add(i as f64);
|
||||
}
|
||||
assert_eq!(td.count(), 100);
|
||||
td.reset();
|
||||
assert_eq!(td.count(), 0);
|
||||
assert_eq!(td.quantile(0.5), 0.0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
use brk_types::StoredF32;
|
||||
use vecdb::UnaryTransform;
|
||||
|
||||
pub struct DaysToYears;
|
||||
|
||||
impl UnaryTransform<StoredF32, StoredF32> for DaysToYears {
|
||||
#[inline(always)]
|
||||
fn apply(v: StoredF32) -> StoredF32 {
|
||||
StoredF32::from(*v / 365.0)
|
||||
}
|
||||
}
|
||||
@@ -41,7 +41,7 @@ mod sat_halve_to_bitcoin;
|
||||
mod sat_identity;
|
||||
mod sat_mask;
|
||||
mod sat_to_bitcoin;
|
||||
mod u16_to_years;
|
||||
mod days_to_years;
|
||||
mod volatility_sqrt30;
|
||||
mod volatility_sqrt365;
|
||||
mod volatility_sqrt7;
|
||||
@@ -89,7 +89,7 @@ pub use sat_halve_to_bitcoin::*;
|
||||
pub use sat_identity::*;
|
||||
pub use sat_mask::*;
|
||||
pub use sat_to_bitcoin::*;
|
||||
pub use u16_to_years::*;
|
||||
pub use days_to_years::*;
|
||||
pub use volatility_sqrt7::*;
|
||||
pub use volatility_sqrt30::*;
|
||||
pub use volatility_sqrt365::*;
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
use brk_types::{StoredF32, StoredU16};
|
||||
use vecdb::UnaryTransform;
|
||||
|
||||
/// StoredU16 / 365.0 -> StoredF32 (days to years conversion)
|
||||
pub struct StoredU16ToYears;
|
||||
|
||||
impl UnaryTransform<StoredU16, StoredF32> for StoredU16ToYears {
|
||||
#[inline(always)]
|
||||
fn apply(v: StoredU16) -> StoredF32 {
|
||||
StoredF32::from(*v as f64 / 365.0)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user