global: snapshot

This commit is contained in:
nym21
2026-03-02 15:28:13 +01:00
parent 4d97cec869
commit 4e7cd9ab6f
21 changed files with 595 additions and 373 deletions
@@ -5,7 +5,7 @@ use vecdb::{AnyStoredVec, AnyVec, Database, EagerVec, Exit, PcoVec, ReadableVec,
use crate::{
ComputeIndexes, blocks, indexes,
internal::{ComputedFromHeightStdDevExtended, Price},
internal::{ComputedFromHeightStdDevExtended, Price, TDigest},
};
use super::super::ComputedFromHeight;
@@ -31,9 +31,12 @@ pub struct ComputedFromHeightRatioExtension<M: StorageMode = Rw> {
pub ratio_4y_sd: ComputedFromHeightStdDevExtended<M>,
pub ratio_2y_sd: ComputedFromHeightStdDevExtended<M>,
pub ratio_1y_sd: ComputedFromHeightStdDevExtended<M>,
#[traversable(skip)]
tdigest: TDigest,
}
const VERSION: Version = Version::new(3);
const VERSION: Version = Version::new(4);
impl ComputedFromHeightRatioExtension {
pub(crate) fn forced_import(
@@ -92,6 +95,7 @@ impl ComputedFromHeightRatioExtension {
ratio_pct5_price: import_price!("ratio_pct5"),
ratio_pct2_price: import_price!("ratio_pct2"),
ratio_pct1_price: import_price!("ratio_pct1"),
tdigest: TDigest::default(),
})
}
@@ -118,8 +122,6 @@ impl ComputedFromHeightRatioExtension {
exit,
)?;
// Percentiles via order-statistic Fenwick tree with coordinate compression.
// O(n log n) total vs O(n²) for the naive sorted-insert approach.
let ratio_version = ratio_source.version();
self.mut_ratio_vecs()
.try_for_each(|v| -> Result<()> {
@@ -138,53 +140,19 @@ impl ComputedFromHeightRatioExtension {
let ratio_len = ratio_source.len();
if ratio_len > start {
let all_ratios = ratio_source.collect_range_at(0, ratio_len);
// Coordinate compression: unique sorted values → integer ranks
let coords = {
let mut c = all_ratios.clone();
c.sort_unstable();
c.dedup();
c
};
let m = coords.len();
// Build Fenwick tree (BIT) from elements [0, start) in O(m)
let mut bit = vec![0u32; m + 1]; // 1-indexed
for &v in &all_ratios[..start] {
bit[coords.binary_search(&v).unwrap() + 1] += 1;
}
for i in 1..=m {
let j = i + (i & i.wrapping_neg());
if j <= m {
bit[j] += bit[i];
}
}
// Highest power of 2 <= m (for binary-lifting kth query)
let log2 = {
let mut b = 1usize;
while b <= m {
b <<= 1;
}
b >> 1
};
// Find rank of k-th smallest element (k is 1-indexed) in O(log m)
let kth = |bit: &[u32], mut k: u32| -> usize {
let mut pos = 0;
let mut b = log2;
while b > 0 {
let next = pos + b;
if next <= m && bit[next] < k {
k -= bit[next];
pos = next;
let tdigest_count = self.tdigest.count() as usize;
if tdigest_count != start {
self.tdigest.reset();
if start > 0 {
let historical = ratio_source.collect_range_at(0, start);
for &v in &historical {
self.tdigest.add(*v as f64);
}
b >>= 1;
}
pos
};
}
// Process new blocks [start, ratio_len)
let new_ratios = ratio_source.collect_range_at(start, ratio_len);
let mut pct_vecs: [&mut EagerVec<PcoVec<Height, StoredF32>>; 6] = [
&mut self.ratio_pct1.height,
&mut self.ratio_pct2.height,
@@ -194,25 +162,14 @@ impl ComputedFromHeightRatioExtension {
&mut self.ratio_pct99.height,
];
const PCTS: [f64; 6] = [0.01, 0.02, 0.05, 0.95, 0.98, 0.99];
let mut out = [0.0f64; 6];
let mut count = start;
for (offset, &ratio) in all_ratios[start..].iter().enumerate() {
count += 1;
// Insert into Fenwick tree: O(log m)
let mut i = coords.binary_search(&ratio).unwrap() + 1;
while i <= m {
bit[i] += 1;
i += i & i.wrapping_neg();
}
// Nearest-rank percentile: one kth query each
for (offset, &ratio) in new_ratios.iter().enumerate() {
self.tdigest.add(*ratio as f64);
self.tdigest.quantiles(&PCTS, &mut out);
let idx = start + offset;
let cf = count as f64;
for (vec, &pct) in pct_vecs.iter_mut().zip(PCTS.iter()) {
let k = (cf * pct).ceil().max(1.0) as u32;
let val = coords[kth(&bit, k)];
vec.truncate_push_at(idx, val)?;
for (vec, &val) in pct_vecs.iter_mut().zip(out.iter()) {
vec.truncate_push_at(idx, StoredF32::from(val as f32))?;
}
}
}
+2
View File
@@ -10,6 +10,7 @@ mod lazy_eager_indexes;
mod lazy_value;
mod rolling;
pub(crate) mod sliding_window;
mod tdigest;
mod traits;
mod transform;
mod tx_derived;
@@ -28,6 +29,7 @@ pub(crate) use indexes::*;
pub(crate) use lazy_eager_indexes::*;
pub(crate) use lazy_value::*;
pub(crate) use rolling::*;
pub(crate) use tdigest::*;
pub(crate) use traits::*;
pub use transform::*;
pub(crate) use tx_derived::*;
+263
View File
@@ -0,0 +1,263 @@
/// Streaming t-digest for approximate quantile estimation.
///
/// Uses the merging algorithm with scale function k₂: `q * (1 - q)`.
/// Compression parameter δ controls accuracy vs memory (default 100 → ~200 centroids max).
#[derive(Clone)]
pub(crate) struct TDigest {
centroids: Vec<Centroid>,
count: u64,
min: f64,
max: f64,
compression: f64,
}
#[derive(Clone, Copy)]
struct Centroid {
mean: f64,
weight: f64,
}
impl Default for TDigest {
fn default() -> Self {
Self::new(100.0)
}
}
impl TDigest {
pub fn new(compression: f64) -> Self {
Self {
centroids: Vec::new(),
count: 0,
min: f64::INFINITY,
max: f64::NEG_INFINITY,
compression,
}
}
pub fn count(&self) -> u64 {
self.count
}
pub fn reset(&mut self) {
self.centroids.clear();
self.count = 0;
self.min = f64::INFINITY;
self.max = f64::NEG_INFINITY;
}
pub fn add(&mut self, value: f64) {
if value.is_nan() {
return;
}
self.count += 1;
if value < self.min {
self.min = value;
}
if value > self.max {
self.max = value;
}
if self.centroids.is_empty() {
self.centroids.push(Centroid {
mean: value,
weight: 1.0,
});
return;
}
// Find nearest centroid by mean
let pos = self
.centroids
.binary_search_by(|c| c.mean.partial_cmp(&value).unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or_else(|i| i.min(self.centroids.len() - 1));
// Check neighbors for the actual nearest
let nearest = if pos > 0
&& (value - self.centroids[pos - 1].mean).abs()
< (value - self.centroids[pos].mean).abs()
{
pos - 1
} else {
pos
};
// Compute quantile of nearest centroid
let cum_weight: f64 = self.centroids[..nearest]
.iter()
.map(|c| c.weight)
.sum::<f64>()
+ self.centroids[nearest].weight / 2.0;
let q = cum_weight / self.count as f64;
let limit = (4.0 * self.compression * q * (1.0 - q)).floor().max(1.0);
if self.centroids[nearest].weight + 1.0 <= limit {
// Merge into nearest centroid
let c = &mut self.centroids[nearest];
c.mean = (c.mean * c.weight + value) / (c.weight + 1.0);
c.weight += 1.0;
} else {
// Insert new centroid at sorted position
let insert_pos = self
.centroids
.binary_search_by(|c| {
c.mean
.partial_cmp(&value)
.unwrap_or(std::cmp::Ordering::Equal)
})
.unwrap_or_else(|i| i);
self.centroids.insert(
insert_pos,
Centroid {
mean: value,
weight: 1.0,
},
);
}
// Compress if too many centroids
let max_centroids = (2.0 * self.compression) as usize;
if self.centroids.len() > max_centroids {
self.compress();
}
}
fn compress(&mut self) {
if self.centroids.len() <= 1 {
return;
}
let total: f64 = self.centroids.iter().map(|c| c.weight).sum();
let mut merged: Vec<Centroid> = Vec::with_capacity(self.centroids.len());
let mut cum = 0.0;
for c in &self.centroids {
if let Some(last) = merged.last_mut() {
let q = (cum + last.weight / 2.0) / total;
let limit = (4.0 * self.compression * q * (1.0 - q)).floor().max(1.0);
if last.weight + c.weight <= limit {
let new_weight = last.weight + c.weight;
last.mean = (last.mean * last.weight + c.mean * c.weight) / new_weight;
last.weight = new_weight;
continue;
}
cum += last.weight;
}
merged.push(*c);
}
self.centroids = merged;
}
pub fn quantile(&self, q: f64) -> f64 {
if self.centroids.is_empty() {
return 0.0;
}
if q <= 0.0 {
return self.min;
}
if q >= 1.0 {
return self.max;
}
if self.centroids.len() == 1 {
return self.centroids[0].mean;
}
let total: f64 = self.centroids.iter().map(|c| c.weight).sum();
let target = q * total;
let mut cum = 0.0;
for i in 0..self.centroids.len() {
let c = &self.centroids[i];
let mid = cum + c.weight / 2.0;
if target < mid {
// Interpolate between previous centroid (or min) and this one
if i == 0 {
// Between min and first centroid center
let first_mid = c.weight / 2.0;
if first_mid == 0.0 {
return self.min;
}
return self.min + (c.mean - self.min) * (target / first_mid);
}
let prev = &self.centroids[i - 1];
let prev_center = cum - prev.weight / 2.0;
let frac = if mid == prev_center {
0.5
} else {
(target - prev_center) / (mid - prev_center)
};
return prev.mean + (c.mean - prev.mean) * frac;
}
cum += c.weight;
}
// Between last centroid center and max
let last = self.centroids.last().unwrap();
let last_mid = total - last.weight / 2.0;
let remaining = total - last_mid;
if remaining == 0.0 {
return self.max;
}
last.mean + (self.max - last.mean) * ((target - last_mid) / remaining)
}
/// Batch quantile query. `qs` must be sorted ascending.
pub fn quantiles(&self, qs: &[f64], out: &mut [f64]) {
for (i, &q) in qs.iter().enumerate() {
out[i] = self.quantile(q);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic_quantiles() {
let mut td = TDigest::default();
for i in 1..=1000 {
td.add(i as f64);
}
assert_eq!(td.count(), 1000);
let median = td.quantile(0.5);
assert!((median - 500.0).abs() < 10.0, "median was {median}");
let p99 = td.quantile(0.99);
assert!((p99 - 990.0).abs() < 15.0, "p99 was {p99}");
let p01 = td.quantile(0.01);
assert!((p01 - 10.0).abs() < 15.0, "p01 was {p01}");
}
#[test]
fn empty_digest() {
let td = TDigest::default();
assert_eq!(td.count(), 0);
assert_eq!(td.quantile(0.5), 0.0);
}
#[test]
fn single_value() {
let mut td = TDigest::default();
td.add(42.0);
assert_eq!(td.quantile(0.0), 42.0);
assert_eq!(td.quantile(0.5), 42.0);
assert_eq!(td.quantile(1.0), 42.0);
}
#[test]
fn reset_works() {
let mut td = TDigest::default();
for i in 0..100 {
td.add(i as f64);
}
assert_eq!(td.count(), 100);
td.reset();
assert_eq!(td.count(), 0);
assert_eq!(td.quantile(0.5), 0.0);
}
}
@@ -0,0 +1,11 @@
use brk_types::StoredF32;
use vecdb::UnaryTransform;
pub struct DaysToYears;
impl UnaryTransform<StoredF32, StoredF32> for DaysToYears {
#[inline(always)]
fn apply(v: StoredF32) -> StoredF32 {
StoredF32::from(*v / 365.0)
}
}
@@ -41,7 +41,7 @@ mod sat_halve_to_bitcoin;
mod sat_identity;
mod sat_mask;
mod sat_to_bitcoin;
mod u16_to_years;
mod days_to_years;
mod volatility_sqrt30;
mod volatility_sqrt365;
mod volatility_sqrt7;
@@ -89,7 +89,7 @@ pub use sat_halve_to_bitcoin::*;
pub use sat_identity::*;
pub use sat_mask::*;
pub use sat_to_bitcoin::*;
pub use u16_to_years::*;
pub use days_to_years::*;
pub use volatility_sqrt7::*;
pub use volatility_sqrt30::*;
pub use volatility_sqrt365::*;
@@ -1,12 +0,0 @@
use brk_types::{StoredF32, StoredU16};
use vecdb::UnaryTransform;
/// StoredU16 / 365.0 -> StoredF32 (days to years conversion)
pub struct StoredU16ToYears;
impl UnaryTransform<StoredU16, StoredF32> for StoredU16ToYears {
#[inline(always)]
fn apply(v: StoredU16) -> StoredF32 {
StoredF32::from(*v as f64 / 365.0)
}
}