global: MASSIVE snapshot

This commit is contained in:
nym21
2026-01-07 01:16:37 +01:00
parent e832ffbe23
commit cb0abc324e
487 changed files with 21155 additions and 13627 deletions

View File

@@ -0,0 +1,81 @@
use brk_error::Result;
use brk_traversable::Traversable;
use schemars::JsonSchema;
use vecdb::{AnyVec, Database, Exit, IterableVec, VecIndex, VecValue, Version};
use crate::internal::ComputedVecValue;
use crate::internal::vec::AverageVec;
use super::{MinMax, Percentiles};
/// Distribution stats (average + minmax + percentiles)
#[derive(Clone, Traversable)]
pub struct Distribution<I: VecIndex, T: ComputedVecValue + JsonSchema> {
#[traversable(flatten)]
pub average: AverageVec<I, T>,
#[traversable(flatten)]
pub minmax: MinMax<I, T>,
pub percentiles: Percentiles<I, T>,
}
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> Distribution<I, T> {
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
Ok(Self {
average: AverageVec::forced_import(db, name, version)?,
minmax: MinMax::forced_import(db, name, version)?,
percentiles: Percentiles::forced_import(db, name, version)?,
})
}
/// Compute distribution stats from source data.
///
/// This computes: average, min, max, percentiles (pct10, pct25, median, pct75, pct90)
pub fn compute<A>(
&mut self,
max_from: I,
source: &impl IterableVec<A, T>,
first_indexes: &impl IterableVec<I, A>,
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
exit: &Exit,
) -> Result<()>
where
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
{
crate::internal::compute_aggregations(
max_from,
source,
first_indexes,
count_indexes,
exit,
None, // first
None, // last
Some(&mut self.minmax.min.0),
Some(&mut self.minmax.max.0),
Some(&mut self.average.0),
None, // sum
None, // cumulative
Some(&mut self.percentiles.median.0),
Some(&mut self.percentiles.pct10.0),
Some(&mut self.percentiles.pct25.0),
Some(&mut self.percentiles.pct75.0),
Some(&mut self.percentiles.pct90.0),
)
}
pub fn len(&self) -> usize {
self.average
.0
.len()
.min(self.minmax.min.0.len())
.min(self.minmax.max.0.len())
.min(self.percentiles.pct10.0.len())
.min(self.percentiles.pct25.0.len())
.min(self.percentiles.median.0.len())
.min(self.percentiles.pct75.0.len())
.min(self.percentiles.pct90.0.len())
}
pub fn starting_index(&self, max_from: I) -> I {
max_from.min(I::from(self.len()))
}
}

View File

@@ -0,0 +1,106 @@
use brk_error::Result;
use brk_traversable::Traversable;
use schemars::JsonSchema;
use vecdb::{Database, Exit, IterableVec, VecIndex, VecValue, Version};
use crate::internal::ComputedVecValue;
use super::{Distribution, SumCum};
/// Full stats aggregate: distribution + sum_cum
/// Matches the common full_stats() pattern: average + minmax + percentiles + sum + cumulative
#[derive(Clone, Traversable)]
pub struct Full<I: VecIndex, T: ComputedVecValue + JsonSchema> {
pub distribution: Distribution<I, T>,
pub sum_cum: SumCum<I, T>,
}
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> Full<I, T> {
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
Ok(Self {
distribution: Distribution::forced_import(db, name, version)?,
sum_cum: SumCum::forced_import(db, name, version)?,
})
}
/// Compute all stats from source data.
///
/// This computes: average, min, max, percentiles (pct10, pct25, median, pct75, pct90), sum, cumulative
pub fn compute<A>(
&mut self,
max_from: I,
source: &impl IterableVec<A, T>,
first_indexes: &impl IterableVec<I, A>,
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
exit: &Exit,
) -> Result<()>
where
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
{
crate::internal::compute_aggregations(
max_from,
source,
first_indexes,
count_indexes,
exit,
None, // first
None, // last
Some(&mut self.distribution.minmax.min.0),
Some(&mut self.distribution.minmax.max.0),
Some(&mut self.distribution.average.0),
Some(&mut self.sum_cum.sum.0),
Some(&mut self.sum_cum.cumulative.0),
Some(&mut self.distribution.percentiles.median.0),
Some(&mut self.distribution.percentiles.pct10.0),
Some(&mut self.distribution.percentiles.pct25.0),
Some(&mut self.distribution.percentiles.pct75.0),
Some(&mut self.distribution.percentiles.pct90.0),
)
}
pub fn len(&self) -> usize {
self.distribution.len().min(self.sum_cum.len())
}
pub fn starting_index(&self, max_from: I) -> I {
max_from.min(I::from(self.len()))
}
/// Compute from aligned source (for coarser time periods like week from dateindex).
///
/// NOTE: Percentiles cannot be derived from finer percentiles - they are skipped.
pub fn compute_from_aligned<A>(
&mut self,
max_from: I,
source: &Full<A, T>,
first_indexes: &impl IterableVec<I, A>,
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
exit: &Exit,
) -> Result<()>
where
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
{
// Note: Percentiles cannot be derived from finer percentiles, so we skip them
crate::internal::compute_aggregations_from_aligned(
max_from,
first_indexes,
count_indexes,
exit,
// Source vecs
None, // first not in Full
None, // last not in Full
Some(&source.distribution.minmax.min.0),
Some(&source.distribution.minmax.max.0),
Some(&source.distribution.average.0),
Some(&source.sum_cum.sum.0),
// Target vecs
None, // first
None, // last
Some(&mut self.distribution.minmax.min.0),
Some(&mut self.distribution.minmax.max.0),
Some(&mut self.distribution.average.0),
Some(&mut self.sum_cum.sum.0),
Some(&mut self.sum_cum.cumulative.0),
)
}
}

View File

@@ -0,0 +1,25 @@
use brk_error::Result;
use brk_traversable::Traversable;
use schemars::JsonSchema;
use vecdb::{Database, VecIndex, Version};
use crate::internal::vec::{MaxVec, MinVec};
use crate::internal::ComputedVecValue;
/// Min + Max
#[derive(Clone, Traversable)]
pub struct MinMax<I: VecIndex, T: ComputedVecValue + JsonSchema> {
#[traversable(flatten)]
pub min: MinVec<I, T>,
#[traversable(flatten)]
pub max: MaxVec<I, T>,
}
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> MinMax<I, T> {
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
Ok(Self {
min: MinVec::forced_import(db, name, version)?,
max: MaxVec::forced_import(db, name, version)?,
})
}
}

View File

@@ -0,0 +1,106 @@
use brk_error::Result;
use brk_traversable::Traversable;
use schemars::JsonSchema;
use vecdb::{AnyVec, Database, Exit, IterableVec, VecIndex, VecValue, Version};
use crate::internal::ComputedVecValue;
use crate::internal::vec::AverageVec;
use super::MinMax;
/// Average + MinMax (for TxIndex dateindex aggregation - no percentiles)
#[derive(Clone, Traversable)]
pub struct MinMaxAverage<I: VecIndex, T: ComputedVecValue + JsonSchema> {
pub average: AverageVec<I, T>,
#[traversable(flatten)]
pub minmax: MinMax<I, T>,
}
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> MinMaxAverage<I, T> {
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
Ok(Self {
average: AverageVec::forced_import(db, name, version)?,
minmax: MinMax::forced_import(db, name, version)?,
})
}
/// Compute average and minmax from source data.
pub fn compute<A>(
&mut self,
max_from: I,
source: &impl IterableVec<A, T>,
first_indexes: &impl IterableVec<I, A>,
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
exit: &Exit,
) -> Result<()>
where
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
{
crate::internal::compute_aggregations(
max_from,
source,
first_indexes,
count_indexes,
exit,
None, // first
None, // last
Some(&mut self.minmax.min.0),
Some(&mut self.minmax.max.0),
Some(&mut self.average.0),
None, // sum
None, // cumulative
None, // median
None, // pct10
None, // pct25
None, // pct75
None, // pct90
)
}
/// Compute from aligned source (for coarser time periods).
pub fn compute_from_aligned<A>(
&mut self,
max_from: I,
source: &MinMaxAverage<A, T>,
first_indexes: &impl IterableVec<I, A>,
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
exit: &Exit,
) -> Result<()>
where
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
{
crate::internal::compute_aggregations_from_aligned(
max_from,
first_indexes,
count_indexes,
exit,
// Source vecs
None, // first
None, // last
Some(&source.minmax.min.0),
Some(&source.minmax.max.0),
Some(&source.average.0),
None, // sum
// Target vecs
None, // first
None, // last
Some(&mut self.minmax.min.0),
Some(&mut self.minmax.max.0),
Some(&mut self.average.0),
None, // sum
None, // cumulative
)
}
pub fn len(&self) -> usize {
self.average
.0
.len()
.min(self.minmax.min.0.len())
.min(self.minmax.max.0.len())
}
pub fn starting_index(&self, max_from: I) -> I {
max_from.min(I::from(self.len()))
}
}

View File

@@ -0,0 +1,15 @@
mod distribution;
mod full;
mod min_max;
mod min_max_average;
mod percentiles;
mod stats;
mod sum_cum;
pub use distribution::*;
pub use full::*;
pub use min_max::*;
pub use min_max_average::*;
pub use percentiles::*;
pub use stats::*;
pub use sum_cum::*;

View File

@@ -0,0 +1,29 @@
use brk_error::Result;
use brk_traversable::Traversable;
use schemars::JsonSchema;
use vecdb::{Database, VecIndex, Version};
use crate::internal::vec::{MedianVec, Pct10Vec, Pct25Vec, Pct75Vec, Pct90Vec};
use crate::internal::ComputedVecValue;
/// All percentiles (pct10, pct25, median, pct75, pct90)
#[derive(Clone, Traversable)]
pub struct Percentiles<I: VecIndex, T: ComputedVecValue + JsonSchema> {
pub pct10: Pct10Vec<I, T>,
pub pct25: Pct25Vec<I, T>,
pub median: MedianVec<I, T>,
pub pct75: Pct75Vec<I, T>,
pub pct90: Pct90Vec<I, T>,
}
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> Percentiles<I, T> {
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
Ok(Self {
pct10: Pct10Vec::forced_import(db, name, version)?,
pct25: Pct25Vec::forced_import(db, name, version)?,
median: MedianVec::forced_import(db, name, version)?,
pct75: Pct75Vec::forced_import(db, name, version)?,
pct90: Pct90Vec::forced_import(db, name, version)?,
})
}
}

View File

@@ -0,0 +1,72 @@
use brk_error::Result;
use brk_traversable::Traversable;
use schemars::JsonSchema;
use vecdb::{AnyVec, Database, Exit, IterableVec, VecIndex, VecValue, Version};
use crate::internal::vec::AverageVec;
use crate::internal::ComputedVecValue;
use super::{MinMax, SumCum};
/// Sum + Cumulative + Average + Min + Max. Like `Full` but without percentiles.
#[derive(Clone, Traversable)]
pub struct Stats<I: VecIndex, T: ComputedVecValue + JsonSchema> {
pub sum_cum: SumCum<I, T>,
pub average: AverageVec<I, T>,
pub minmax: MinMax<I, T>,
}
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> Stats<I, T> {
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
Ok(Self {
sum_cum: SumCum::forced_import(db, name, version)?,
average: AverageVec::forced_import(db, name, version)?,
minmax: MinMax::forced_import(db, name, version)?,
})
}
/// Compute sum, cumulative, average, and minmax from source data.
pub fn compute<A>(
&mut self,
max_from: I,
source: &impl IterableVec<A, T>,
first_indexes: &impl IterableVec<I, A>,
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
exit: &Exit,
) -> Result<()>
where
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
{
crate::internal::compute_aggregations(
max_from,
source,
first_indexes,
count_indexes,
exit,
None, // first
None, // last
Some(&mut self.minmax.min.0),
Some(&mut self.minmax.max.0),
Some(&mut self.average.0),
Some(&mut self.sum_cum.sum.0),
Some(&mut self.sum_cum.cumulative.0),
None, // median
None, // pct10
None, // pct25
None, // pct75
None, // pct90
)
}
pub fn len(&self) -> usize {
self.sum_cum
.len()
.min(self.average.0.len())
.min(self.minmax.min.0.len())
.min(self.minmax.max.0.len())
}
pub fn starting_index(&self, max_from: I) -> I {
max_from.min(I::from(self.len()))
}
}

View File

@@ -0,0 +1,111 @@
use brk_error::Result;
use brk_traversable::Traversable;
use schemars::JsonSchema;
use vecdb::{AnyVec, Database, Exit, IterableVec, VecIndex, VecValue, Version};
use crate::internal::vec::{CumulativeVec, SumVec};
use crate::internal::ComputedVecValue;
/// Sum + Cumulative (12% of usage)
#[derive(Clone, Traversable)]
pub struct SumCum<I: VecIndex, T: ComputedVecValue + JsonSchema> {
#[traversable(flatten)]
pub sum: SumVec<I, T>,
#[traversable(flatten)]
pub cumulative: CumulativeVec<I, T>,
}
impl<I: VecIndex, T: ComputedVecValue + JsonSchema> SumCum<I, T> {
pub fn forced_import(db: &Database, name: &str, version: Version) -> Result<Self> {
Ok(Self {
sum: SumVec::forced_import(db, name, version)?,
cumulative: CumulativeVec::forced_import(db, name, version)?,
})
}
/// Compute sum and cumulative from source data.
pub fn compute<A>(
&mut self,
max_from: I,
source: &impl IterableVec<A, T>,
first_indexes: &impl IterableVec<I, A>,
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
exit: &Exit,
) -> Result<()>
where
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
{
crate::internal::compute_aggregations(
max_from,
source,
first_indexes,
count_indexes,
exit,
None, // first
None, // last
None, // min
None, // max
None, // average
Some(&mut self.sum.0),
Some(&mut self.cumulative.0),
None, // median
None, // pct10
None, // pct25
None, // pct75
None, // pct90
)
}
/// Extend cumulative from an existing source vec.
pub fn extend_cumulative(
&mut self,
max_from: I,
source: &impl IterableVec<I, T>,
exit: &Exit,
) -> Result<()> {
crate::internal::compute_cumulative_extend(max_from, source, &mut self.cumulative.0, exit)
}
pub fn len(&self) -> usize {
self.sum.0.len().min(self.cumulative.0.len())
}
pub fn starting_index(&self, max_from: I) -> I {
max_from.min(I::from(self.len()))
}
/// Compute from aligned source (for coarser time periods like week from dateindex).
pub fn compute_from_aligned<A>(
&mut self,
max_from: I,
source: &SumCum<A, T>,
first_indexes: &impl IterableVec<I, A>,
count_indexes: &impl IterableVec<I, brk_types::StoredU64>,
exit: &Exit,
) -> Result<()>
where
A: VecIndex + VecValue + brk_types::CheckedSub<A>,
{
crate::internal::compute_aggregations_from_aligned(
max_from,
first_indexes,
count_indexes,
exit,
// Source vecs
None, // first
None, // last
None, // min
None, // max
None, // average
Some(&source.sum.0),
// Target vecs
None, // first
None, // last
None, // min
None, // max
None, // average
Some(&mut self.sum.0),
Some(&mut self.cumulative.0),
)
}
}