mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-04-25 07:09:59 -07:00
global: MASSIVE snapshot
This commit is contained in:
81
crates/brk_computer/src/internal/group/distribution.rs
Normal file
81
crates/brk_computer/src/internal/group/distribution.rs
Normal file
@@ -0,0 +1,81 @@
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use schemars::JsonSchema;
|
||||
use vecdb::{AnyVec, Database, Exit, IterableVec, VecIndex, VecValue, Version};
|
||||
|
||||
use crate::internal::ComputedVecValue;
|
||||
use crate::internal::vec::AverageVec;
|
||||
|
||||
use super::{MinMax, Percentiles};
|
||||
|
||||
/// Distribution stats (average + minmax + percentiles)
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct Distribution<I: VecIndex, T: ComputedVecValue + JsonSchema> {
|
||||
#[traversable(flatten)]
|
||||
pub average: AverageVec<I, T>,
|
||||
#[traversable(flatten)]
|
||||
pub minmax: MinMax<I, T>,
|
||||
pub percentiles: Percentiles<I, T>,
|
||||
}
|
||||
|
||||
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> Distribution<I, T> {
|
||||
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
|
||||
Ok(Self {
|
||||
average: AverageVec::forced_import(db, name, version)?,
|
||||
minmax: MinMax::forced_import(db, name, version)?,
|
||||
percentiles: Percentiles::forced_import(db, name, version)?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Compute distribution stats from source data.
|
||||
///
|
||||
/// This computes: average, min, max, percentiles (pct10, pct25, median, pct75, pct90)
|
||||
pub fn compute<A>(
|
||||
&mut self,
|
||||
max_from: I,
|
||||
source: &impl IterableVec<A, T>,
|
||||
first_indexes: &impl IterableVec<I, A>,
|
||||
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
|
||||
exit: &Exit,
|
||||
) -> Result<()>
|
||||
where
|
||||
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
|
||||
{
|
||||
crate::internal::compute_aggregations(
|
||||
max_from,
|
||||
source,
|
||||
first_indexes,
|
||||
count_indexes,
|
||||
exit,
|
||||
None, // first
|
||||
None, // last
|
||||
Some(&mut self.minmax.min.0),
|
||||
Some(&mut self.minmax.max.0),
|
||||
Some(&mut self.average.0),
|
||||
None, // sum
|
||||
None, // cumulative
|
||||
Some(&mut self.percentiles.median.0),
|
||||
Some(&mut self.percentiles.pct10.0),
|
||||
Some(&mut self.percentiles.pct25.0),
|
||||
Some(&mut self.percentiles.pct75.0),
|
||||
Some(&mut self.percentiles.pct90.0),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.average
|
||||
.0
|
||||
.len()
|
||||
.min(self.minmax.min.0.len())
|
||||
.min(self.minmax.max.0.len())
|
||||
.min(self.percentiles.pct10.0.len())
|
||||
.min(self.percentiles.pct25.0.len())
|
||||
.min(self.percentiles.median.0.len())
|
||||
.min(self.percentiles.pct75.0.len())
|
||||
.min(self.percentiles.pct90.0.len())
|
||||
}
|
||||
|
||||
pub fn starting_index(&self, max_from: I) -> I {
|
||||
max_from.min(I::from(self.len()))
|
||||
}
|
||||
}
|
||||
106
crates/brk_computer/src/internal/group/full.rs
Normal file
106
crates/brk_computer/src/internal/group/full.rs
Normal file
@@ -0,0 +1,106 @@
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use schemars::JsonSchema;
|
||||
use vecdb::{Database, Exit, IterableVec, VecIndex, VecValue, Version};
|
||||
|
||||
use crate::internal::ComputedVecValue;
|
||||
|
||||
use super::{Distribution, SumCum};
|
||||
|
||||
/// Full stats aggregate: distribution + sum_cum
|
||||
/// Matches the common full_stats() pattern: average + minmax + percentiles + sum + cumulative
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct Full<I: VecIndex, T: ComputedVecValue + JsonSchema> {
|
||||
pub distribution: Distribution<I, T>,
|
||||
pub sum_cum: SumCum<I, T>,
|
||||
}
|
||||
|
||||
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> Full<I, T> {
|
||||
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
|
||||
Ok(Self {
|
||||
distribution: Distribution::forced_import(db, name, version)?,
|
||||
sum_cum: SumCum::forced_import(db, name, version)?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Compute all stats from source data.
|
||||
///
|
||||
/// This computes: average, min, max, percentiles (pct10, pct25, median, pct75, pct90), sum, cumulative
|
||||
pub fn compute<A>(
|
||||
&mut self,
|
||||
max_from: I,
|
||||
source: &impl IterableVec<A, T>,
|
||||
first_indexes: &impl IterableVec<I, A>,
|
||||
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
|
||||
exit: &Exit,
|
||||
) -> Result<()>
|
||||
where
|
||||
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
|
||||
{
|
||||
crate::internal::compute_aggregations(
|
||||
max_from,
|
||||
source,
|
||||
first_indexes,
|
||||
count_indexes,
|
||||
exit,
|
||||
None, // first
|
||||
None, // last
|
||||
Some(&mut self.distribution.minmax.min.0),
|
||||
Some(&mut self.distribution.minmax.max.0),
|
||||
Some(&mut self.distribution.average.0),
|
||||
Some(&mut self.sum_cum.sum.0),
|
||||
Some(&mut self.sum_cum.cumulative.0),
|
||||
Some(&mut self.distribution.percentiles.median.0),
|
||||
Some(&mut self.distribution.percentiles.pct10.0),
|
||||
Some(&mut self.distribution.percentiles.pct25.0),
|
||||
Some(&mut self.distribution.percentiles.pct75.0),
|
||||
Some(&mut self.distribution.percentiles.pct90.0),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.distribution.len().min(self.sum_cum.len())
|
||||
}
|
||||
|
||||
pub fn starting_index(&self, max_from: I) -> I {
|
||||
max_from.min(I::from(self.len()))
|
||||
}
|
||||
|
||||
/// Compute from aligned source (for coarser time periods like week from dateindex).
|
||||
///
|
||||
/// NOTE: Percentiles cannot be derived from finer percentiles - they are skipped.
|
||||
pub fn compute_from_aligned<A>(
|
||||
&mut self,
|
||||
max_from: I,
|
||||
source: &Full<A, T>,
|
||||
first_indexes: &impl IterableVec<I, A>,
|
||||
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
|
||||
exit: &Exit,
|
||||
) -> Result<()>
|
||||
where
|
||||
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
|
||||
{
|
||||
// Note: Percentiles cannot be derived from finer percentiles, so we skip them
|
||||
crate::internal::compute_aggregations_from_aligned(
|
||||
max_from,
|
||||
first_indexes,
|
||||
count_indexes,
|
||||
exit,
|
||||
// Source vecs
|
||||
None, // first not in Full
|
||||
None, // last not in Full
|
||||
Some(&source.distribution.minmax.min.0),
|
||||
Some(&source.distribution.minmax.max.0),
|
||||
Some(&source.distribution.average.0),
|
||||
Some(&source.sum_cum.sum.0),
|
||||
// Target vecs
|
||||
None, // first
|
||||
None, // last
|
||||
Some(&mut self.distribution.minmax.min.0),
|
||||
Some(&mut self.distribution.minmax.max.0),
|
||||
Some(&mut self.distribution.average.0),
|
||||
Some(&mut self.sum_cum.sum.0),
|
||||
Some(&mut self.sum_cum.cumulative.0),
|
||||
)
|
||||
}
|
||||
}
|
||||
25
crates/brk_computer/src/internal/group/min_max.rs
Normal file
25
crates/brk_computer/src/internal/group/min_max.rs
Normal file
@@ -0,0 +1,25 @@
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use schemars::JsonSchema;
|
||||
use vecdb::{Database, VecIndex, Version};
|
||||
|
||||
use crate::internal::vec::{MaxVec, MinVec};
|
||||
use crate::internal::ComputedVecValue;
|
||||
|
||||
/// Min + Max
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct MinMax<I: VecIndex, T: ComputedVecValue + JsonSchema> {
|
||||
#[traversable(flatten)]
|
||||
pub min: MinVec<I, T>,
|
||||
#[traversable(flatten)]
|
||||
pub max: MaxVec<I, T>,
|
||||
}
|
||||
|
||||
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> MinMax<I, T> {
|
||||
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
|
||||
Ok(Self {
|
||||
min: MinVec::forced_import(db, name, version)?,
|
||||
max: MaxVec::forced_import(db, name, version)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
106
crates/brk_computer/src/internal/group/min_max_average.rs
Normal file
106
crates/brk_computer/src/internal/group/min_max_average.rs
Normal file
@@ -0,0 +1,106 @@
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use schemars::JsonSchema;
|
||||
use vecdb::{AnyVec, Database, Exit, IterableVec, VecIndex, VecValue, Version};
|
||||
|
||||
use crate::internal::ComputedVecValue;
|
||||
use crate::internal::vec::AverageVec;
|
||||
|
||||
use super::MinMax;
|
||||
|
||||
/// Average + MinMax (for TxIndex dateindex aggregation - no percentiles)
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct MinMaxAverage<I: VecIndex, T: ComputedVecValue + JsonSchema> {
|
||||
pub average: AverageVec<I, T>,
|
||||
#[traversable(flatten)]
|
||||
pub minmax: MinMax<I, T>,
|
||||
}
|
||||
|
||||
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> MinMaxAverage<I, T> {
|
||||
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
|
||||
Ok(Self {
|
||||
average: AverageVec::forced_import(db, name, version)?,
|
||||
minmax: MinMax::forced_import(db, name, version)?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Compute average and minmax from source data.
|
||||
pub fn compute<A>(
|
||||
&mut self,
|
||||
max_from: I,
|
||||
source: &impl IterableVec<A, T>,
|
||||
first_indexes: &impl IterableVec<I, A>,
|
||||
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
|
||||
exit: &Exit,
|
||||
) -> Result<()>
|
||||
where
|
||||
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
|
||||
{
|
||||
crate::internal::compute_aggregations(
|
||||
max_from,
|
||||
source,
|
||||
first_indexes,
|
||||
count_indexes,
|
||||
exit,
|
||||
None, // first
|
||||
None, // last
|
||||
Some(&mut self.minmax.min.0),
|
||||
Some(&mut self.minmax.max.0),
|
||||
Some(&mut self.average.0),
|
||||
None, // sum
|
||||
None, // cumulative
|
||||
None, // median
|
||||
None, // pct10
|
||||
None, // pct25
|
||||
None, // pct75
|
||||
None, // pct90
|
||||
)
|
||||
}
|
||||
|
||||
/// Compute from aligned source (for coarser time periods).
|
||||
pub fn compute_from_aligned<A>(
|
||||
&mut self,
|
||||
max_from: I,
|
||||
source: &MinMaxAverage<A, T>,
|
||||
first_indexes: &impl IterableVec<I, A>,
|
||||
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
|
||||
exit: &Exit,
|
||||
) -> Result<()>
|
||||
where
|
||||
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
|
||||
{
|
||||
crate::internal::compute_aggregations_from_aligned(
|
||||
max_from,
|
||||
first_indexes,
|
||||
count_indexes,
|
||||
exit,
|
||||
// Source vecs
|
||||
None, // first
|
||||
None, // last
|
||||
Some(&source.minmax.min.0),
|
||||
Some(&source.minmax.max.0),
|
||||
Some(&source.average.0),
|
||||
None, // sum
|
||||
// Target vecs
|
||||
None, // first
|
||||
None, // last
|
||||
Some(&mut self.minmax.min.0),
|
||||
Some(&mut self.minmax.max.0),
|
||||
Some(&mut self.average.0),
|
||||
None, // sum
|
||||
None, // cumulative
|
||||
)
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.average
|
||||
.0
|
||||
.len()
|
||||
.min(self.minmax.min.0.len())
|
||||
.min(self.minmax.max.0.len())
|
||||
}
|
||||
|
||||
pub fn starting_index(&self, max_from: I) -> I {
|
||||
max_from.min(I::from(self.len()))
|
||||
}
|
||||
}
|
||||
15
crates/brk_computer/src/internal/group/mod.rs
Normal file
15
crates/brk_computer/src/internal/group/mod.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
mod distribution;
|
||||
mod full;
|
||||
mod min_max;
|
||||
mod min_max_average;
|
||||
mod percentiles;
|
||||
mod stats;
|
||||
mod sum_cum;
|
||||
|
||||
pub use distribution::*;
|
||||
pub use full::*;
|
||||
pub use min_max::*;
|
||||
pub use min_max_average::*;
|
||||
pub use percentiles::*;
|
||||
pub use stats::*;
|
||||
pub use sum_cum::*;
|
||||
29
crates/brk_computer/src/internal/group/percentiles.rs
Normal file
29
crates/brk_computer/src/internal/group/percentiles.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use schemars::JsonSchema;
|
||||
use vecdb::{Database, VecIndex, Version};
|
||||
|
||||
use crate::internal::vec::{MedianVec, Pct10Vec, Pct25Vec, Pct75Vec, Pct90Vec};
|
||||
use crate::internal::ComputedVecValue;
|
||||
|
||||
/// All percentiles (pct10, pct25, median, pct75, pct90)
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct Percentiles<I: VecIndex, T: ComputedVecValue + JsonSchema> {
|
||||
pub pct10: Pct10Vec<I, T>,
|
||||
pub pct25: Pct25Vec<I, T>,
|
||||
pub median: MedianVec<I, T>,
|
||||
pub pct75: Pct75Vec<I, T>,
|
||||
pub pct90: Pct90Vec<I, T>,
|
||||
}
|
||||
|
||||
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> Percentiles<I, T> {
|
||||
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
|
||||
Ok(Self {
|
||||
pct10: Pct10Vec::forced_import(db, name, version)?,
|
||||
pct25: Pct25Vec::forced_import(db, name, version)?,
|
||||
median: MedianVec::forced_import(db, name, version)?,
|
||||
pct75: Pct75Vec::forced_import(db, name, version)?,
|
||||
pct90: Pct90Vec::forced_import(db, name, version)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
72
crates/brk_computer/src/internal/group/stats.rs
Normal file
72
crates/brk_computer/src/internal/group/stats.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use schemars::JsonSchema;
|
||||
use vecdb::{AnyVec, Database, Exit, IterableVec, VecIndex, VecValue, Version};
|
||||
|
||||
use crate::internal::vec::AverageVec;
|
||||
use crate::internal::ComputedVecValue;
|
||||
|
||||
use super::{MinMax, SumCum};
|
||||
|
||||
/// Sum + Cumulative + Average + Min + Max. Like `Full` but without percentiles.
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct Stats<I: VecIndex, T: ComputedVecValue + JsonSchema> {
|
||||
pub sum_cum: SumCum<I, T>,
|
||||
pub average: AverageVec<I, T>,
|
||||
pub minmax: MinMax<I, T>,
|
||||
}
|
||||
|
||||
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> Stats<I, T> {
|
||||
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
|
||||
Ok(Self {
|
||||
sum_cum: SumCum::forced_import(db, name, version)?,
|
||||
average: AverageVec::forced_import(db, name, version)?,
|
||||
minmax: MinMax::forced_import(db, name, version)?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Compute sum, cumulative, average, and minmax from source data.
|
||||
pub fn compute<A>(
|
||||
&mut self,
|
||||
max_from: I,
|
||||
source: &impl IterableVec<A, T>,
|
||||
first_indexes: &impl IterableVec<I, A>,
|
||||
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
|
||||
exit: &Exit,
|
||||
) -> Result<()>
|
||||
where
|
||||
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
|
||||
{
|
||||
crate::internal::compute_aggregations(
|
||||
max_from,
|
||||
source,
|
||||
first_indexes,
|
||||
count_indexes,
|
||||
exit,
|
||||
None, // first
|
||||
None, // last
|
||||
Some(&mut self.minmax.min.0),
|
||||
Some(&mut self.minmax.max.0),
|
||||
Some(&mut self.average.0),
|
||||
Some(&mut self.sum_cum.sum.0),
|
||||
Some(&mut self.sum_cum.cumulative.0),
|
||||
None, // median
|
||||
None, // pct10
|
||||
None, // pct25
|
||||
None, // pct75
|
||||
None, // pct90
|
||||
)
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.sum_cum
|
||||
.len()
|
||||
.min(self.average.0.len())
|
||||
.min(self.minmax.min.0.len())
|
||||
.min(self.minmax.max.0.len())
|
||||
}
|
||||
|
||||
pub fn starting_index(&self, max_from: I) -> I {
|
||||
max_from.min(I::from(self.len()))
|
||||
}
|
||||
}
|
||||
111
crates/brk_computer/src/internal/group/sum_cum.rs
Normal file
111
crates/brk_computer/src/internal/group/sum_cum.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use schemars::JsonSchema;
|
||||
use vecdb::{AnyVec, Database, Exit, IterableVec, VecIndex, VecValue, Version};
|
||||
|
||||
use crate::internal::vec::{CumulativeVec, SumVec};
|
||||
use crate::internal::ComputedVecValue;
|
||||
|
||||
/// Sum + Cumulative (12% of usage)
|
||||
#[derive(Clone, Traversable)]
|
||||
pub struct SumCum<I: VecIndex, T: ComputedVecValue + JsonSchema> {
|
||||
#[traversable(flatten)]
|
||||
pub sum: SumVec<I, T>,
|
||||
#[traversable(flatten)]
|
||||
pub cumulative: CumulativeVec<I, T>,
|
||||
}
|
||||
|
||||
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> SumCum<I, T> {
|
||||
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
|
||||
Ok(Self {
|
||||
sum: SumVec::forced_import(db, name, version)?,
|
||||
cumulative: CumulativeVec::forced_import(db, name, version)?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Compute sum and cumulative from source data.
|
||||
pub fn compute<A>(
|
||||
&mut self,
|
||||
max_from: I,
|
||||
source: &impl IterableVec<A, T>,
|
||||
first_indexes: &impl IterableVec<I, A>,
|
||||
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
|
||||
exit: &Exit,
|
||||
) -> Result<()>
|
||||
where
|
||||
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
|
||||
{
|
||||
crate::internal::compute_aggregations(
|
||||
max_from,
|
||||
source,
|
||||
first_indexes,
|
||||
count_indexes,
|
||||
exit,
|
||||
None, // first
|
||||
None, // last
|
||||
None, // min
|
||||
None, // max
|
||||
None, // average
|
||||
Some(&mut self.sum.0),
|
||||
Some(&mut self.cumulative.0),
|
||||
None, // median
|
||||
None, // pct10
|
||||
None, // pct25
|
||||
None, // pct75
|
||||
None, // pct90
|
||||
)
|
||||
}
|
||||
|
||||
/// Extend cumulative from an existing source vec.
|
||||
pub fn extend_cumulative(
|
||||
&mut self,
|
||||
max_from: I,
|
||||
source: &impl IterableVec<I, T>,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
crate::internal::compute_cumulative_extend(max_from, source, &mut self.cumulative.0, exit)
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.sum.0.len().min(self.cumulative.0.len())
|
||||
}
|
||||
|
||||
pub fn starting_index(&self, max_from: I) -> I {
|
||||
max_from.min(I::from(self.len()))
|
||||
}
|
||||
|
||||
/// Compute from aligned source (for coarser time periods like week from dateindex).
|
||||
pub fn compute_from_aligned<A>(
|
||||
&mut self,
|
||||
max_from: I,
|
||||
source: &SumCum<A, T>,
|
||||
first_indexes: &impl IterableVec<I, A>,
|
||||
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
|
||||
exit: &Exit,
|
||||
) -> Result<()>
|
||||
where
|
||||
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
|
||||
{
|
||||
crate::internal::compute_aggregations_from_aligned(
|
||||
max_from,
|
||||
first_indexes,
|
||||
count_indexes,
|
||||
exit,
|
||||
// Source vecs
|
||||
None, // first
|
||||
None, // last
|
||||
None, // min
|
||||
None, // max
|
||||
None, // average
|
||||
Some(&source.sum.0),
|
||||
// Target vecs
|
||||
None, // first
|
||||
None, // last
|
||||
None, // min
|
||||
None, // max
|
||||
None, // average
|
||||
Some(&mut self.sum.0),
|
||||
Some(&mut self.cumulative.0),
|
||||
)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user