global: snapshot

This commit is contained in:
nym21
2026-01-13 22:32:29 +01:00
parent 0c442b4a71
commit e77993fb76
61 changed files with 5047 additions and 5404 deletions
Generated
+17 -17
View File
@@ -963,9 +963,9 @@ dependencies = [
[[package]]
name = "clap_lex"
version = "0.7.6"
version = "0.7.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32"
[[package]]
name = "color-eyre"
@@ -2684,7 +2684,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.4",
"rand_core 0.9.5",
]
[[package]]
@@ -2704,7 +2704,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core 0.9.4",
"rand_core 0.9.5",
]
[[package]]
@@ -2718,9 +2718,9 @@ dependencies = [
[[package]]
name = "rand_core"
version = "0.9.4"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f1b3bc831f92381018fd9c6350b917c7b21f1eed35a65a51900e0e55a3d7afa"
checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
dependencies = [
"getrandom 0.3.4",
]
@@ -3334,30 +3334,30 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.44"
version = "0.3.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd"
dependencies = [
"deranged",
"itoa",
"num-conv",
"powerfmt",
"serde",
"serde_core",
"time-core",
"time-macros",
]
[[package]]
name = "time-core"
version = "0.1.6"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca"
[[package]]
name = "time-macros"
version = "0.2.24"
version = "0.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd"
dependencies = [
"num-conv",
"time-core",
@@ -3476,9 +3476,9 @@ checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"
[[package]]
name = "tower"
version = "0.5.2"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
dependencies = [
"futures-core",
"futures-util",
@@ -4228,9 +4228,9 @@ checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3"
[[package]]
name = "zmij"
version = "1.0.13"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac93432f5b761b22864c774aac244fa5c0fd877678a4c37ebf6cf42208f9c9ec"
checksum = "bd8f3f50b848df28f887acb68e41201b5aea6bc8a8dacc00fb40635ff9a72fea"
[[package]]
name = "zopfli"
+4
View File
@@ -1 +1,5 @@
clients/
/*.json
/*.js
/*.rs
/*.py
-296
View File
@@ -1,296 +0,0 @@
# brk_bindgen Design Document
## Goal
Generate typed API clients for **Rust, JavaScript, and Python** with:
- **Discoverability**: Full IDE autocomplete for 20k+ metrics
- **Ease of use**: Fluent API with `.fetch()` on each metric node
## Current State
### What's Working ✅
1. **JS + JSDoc generator**: Generates `client.js` with full JSDoc type annotations
2. **Python generator**: Generates `client.py` with type hints and httpx
3. **Rust generator**: Generates `client.rs` with strong typing and reqwest
4. **schemars integration**: JSON schemas embedded in `MetricLeafWithSchema` for type info
5. **Tree navigation**: `client.tree.blocks.difficulty.fetch()` pattern
6. **OpenAPI integration**: All GET endpoints generate typed methods
7. **Server integration**: brk_server calls brk_bindgen on startup (when clients/ dir exists)
### Generated Output
When `crates/brk_bindgen/clients/` directory exists, running the server generates:
```
crates/brk_bindgen/clients/
├── javascript/
│ └── client.js # JS + JSDoc with tree + API methods
├── python/
│ └── client.py # Python with type hints + httpx
└── rust/
└── client.rs # Rust with reqwest + strong typing
```
## Target Architecture
### Input Sources
```
┌─────────────────────────────────────────────────────────────┐
│ Input Sources │
├─────────────────────────────────────────────────────────────┤
│ 1. OpenAPI spec (from aide) - endpoint definitions │
│ 2. brk_query catalog - metric tree structure │
│ 3. brk_types - Rust types for responses (Rust client only) │
└─────────────────────────────────────────────────────────────┘
```
### Output: Fluent Client
```javascript
// JavaScript (with JSDoc for IDE support)
const client = new BrkClient("http://localhost:3000");
const data = await client.tree.supply.active.by_date.fetch();
// ^^^^ autocomplete all the way down
```
```python
# Python
client = BrkClient("http://localhost:3000")
data = client.tree.supply.active.by_date.fetch()
```
```rust
// Rust
let client = BrkClient::new("http://localhost:3000")?;
let data = client.tree().supply.active.by_date.fetch()?;
```
## Implementation Details
### Smart Metric Nodes
Each tree leaf becomes a "smart node" holding a client reference:
```javascript
// JavaScript + JSDoc
/**
* Metric node with fetch capability
* @template T
*/
class MetricNode {
constructor(client, path) {
this._client = client;
this._path = path;
}
async fetch() {
return this._client.get(this._path);
}
}
```
```python
# Python
class MetricNode(Generic[T]):
def __init__(self, client: BrkClientBase, path: str):
self._client = client
self._path = path
def fetch(self) -> T:
return self._client.get(self._path)
```
```rust
// Rust
pub struct MetricNode<'a, T> {
client: &'a BrkClientBase,
path: &'static str,
_marker: PhantomData<T>,
}
impl<'a, T: DeserializeOwned> MetricNode<'a, T> {
pub fn fetch(&self) -> Result<T> {
self.client.get(self.path)
}
}
```
### Pattern Reuse
To avoid 20k+ individual types, reuse structural patterns:
```rust
// Shared pattern for metrics with same index groupings
struct ByDateHeightMonth<T> {
by_date: MetricNode<T>,
by_height: MetricNode<T>,
by_month: MetricNode<T>,
}
// Composed into full tree
struct Supply {
active: ByDateHeightMonth<Vec<f64>>,
total: ByDateHeightMonth<Vec<f64>>,
}
```
## Type Discovery Solution ✅ IMPLEMENTED
### The Problem
Type information was erased at runtime because metrics are stored as `&dyn AnyExportableVec` trait objects.
### The Solution
Use `std::any::type_name::<T>()` with caching to extract short type names.
#### Implementation (vecdb)
Added `short_type_name<T>()` helper and `value_type_to_string()` to `AnyVec` trait.
### Result
`brk_query` now exposes:
```rust
for (metric_name, index_to_vec) in &vecs.metric_to_index_to_vec {
for (index, vec) in index_to_vec {
println!("{} @ {} -> {}",
metric_name, // "difficulty"
vec.index_type_to_string(), // "Height"
vec.value_type_to_string(), // "StoredF64"
);
}
}
```
## TreeNode Enhancement ✅ IMPLEMENTED
Changed `TreeNode::Leaf(String)` to `TreeNode::Leaf(MetricLeafWithSchema)` where:
```rust
#[derive(Debug, Clone, Serialize, JsonSchema)]
pub struct MetricLeafWithSchema {
#[serde(flatten)]
pub leaf: MetricLeaf,
#[serde(skip)]
pub schema: serde_json::Value, // JSON Schema from schemars
}
```
## OpenAPI Integration ✅ IMPLEMENTED
### Flow
1. brk_server creates OpenAPI spec via aide
2. On startup, serializes spec to JSON string
3. Passes JSON to `brk_bindgen::generate_clients()`
4. brk_bindgen parses with `oas3` crate (supports OpenAPI 3.1)
5. Generates typed methods for all GET endpoints
### Why oas3?
aide generates OpenAPI 3.1 specs. The `openapiv3` crate only supports 3.0.x.
The `oas3` crate supports OpenAPI 3.1.x parsing.
## Tasks
### Phase 0: Type Infrastructure ✅ COMPLETE
- [x] vecdb: Add `short_type_name<T>()` and `value_type_to_string()`
- [x] vecdb: Add optional `schemars` feature with `AnySchemaVec` trait
- [x] brk_types: Enhance `TreeNode::Leaf` to include `MetricLeafWithSchema`
- [x] brk_traversable: Update all `to_tree_node()` with schemars integration
- [x] brk_bindgen: Set up generator module structure
### Phase 1: JavaScript Client ✅ COMPLETE
- [x] Define `MetricNode` class with JSDoc generics
- [x] Define `BrkClient` with base HTTP functionality
- [x] Generate `client.js` with full JSDoc type annotations
- [x] Tree navigation: `client.tree.category.metric.fetch()`
- [x] API methods from OpenAPI endpoints
### Phase 2: OpenAPI Integration ✅ COMPLETE
- [x] Add `oas3` crate dependency (OpenAPI 3.1 support)
- [x] brk_server passes OpenAPI JSON to brk_bindgen on startup
- [x] Parse OpenAPI spec and extract endpoint definitions
- [x] Generate typed methods for each GET endpoint
### Phase 3: Python Client ✅ COMPLETE
- [x] Define `MetricNode` class with type hints
- [x] Define `BrkClient` with httpx
- [x] Generate typed methods from OpenAPI
- [x] Generate tree navigation
### Phase 4: Rust Client ✅ COMPLETE
- [x] Define `MetricNode<T>` struct with lifetimes
- [x] Define `BrkClient` with reqwest (blocking)
- [x] Generate tree navigation with proper lifetimes
- [x] Generate typed methods from OpenAPI
### Phase 5: Polish
- [x] Switch from `openapiv3` to `oas3` crate
- [ ] Error types per language
- [ ] Documentation generation
- [ ] Tests
- [ ] Example usage in each language
- [ ] Async Rust client variant
## File Structure
```
crates/brk_bindgen/
├── src/
│ ├── lib.rs
│ ├── js.rs # JS constants generation (existing)
│ └── generator/
│ ├── mod.rs # generate_clients() entry point
│ ├── types.rs # ClientMetadata, MetricInfo, IndexPattern
│ ├── openapi.rs # OpenAPI 3.1 spec parsing (oas3)
│ ├── javascript.rs # JavaScript + JSDoc client ✅
│ ├── python.rs # Python client ✅
│ └── rust.rs # Rust client ✅
├── clients/ # Generated output (gitignored)
│ ├── javascript/
│ ├── python/
│ └── rust/
├── Cargo.toml
├── README.md
└── DESIGN.md
crates/brk_server/
└── src/
├── lib.rs # Calls brk_bindgen::generate_clients() on startup
└── api/
└── openapi.rs # create_openapi() for aide
```
## Dependencies
```toml
[dependencies]
brk_query = { workspace = true }
brk_types = { workspace = true }
oas3 = "0.20" # OpenAPI 3.1 spec parsing
schemars = { workspace = true }
serde_json = { workspace = true }
```
## Usage
To generate clients:
```bash
# Create the output directory
mkdir -p crates/brk_bindgen/clients
# Run the server (generates clients on startup)
cargo run -p brk_server
```
+79 -386
View File
@@ -1,201 +1,17 @@
//! Vec name deconstruction and reconstruction logic.
//! Common prefix/suffix detection for metric names.
//!
//! This module analyzes vec names bottom-up to detect common denominators
//! (prefixes or suffixes) and field positions for pattern instances.
use std::collections::HashMap;
use crate::FieldNamePosition;
/// Common denominator found across children's effective names.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CommonDenominator {
/// Children share this prefix. Fields append their unique suffix.
/// Example: children are ["addrs_0sats", "addrs_1sats"], common = "addrs_"
Prefix(String),
/// Children share this suffix. Fields prepend their unique prefix.
/// Example: children are ["cumulative_supply", "net_supply"], common = "_supply"
Suffix(String),
/// No common part found. Fields use Identity (field = base).
None,
}
/// Result of analyzing a pattern level.
#[derive(Debug, Clone)]
pub struct PatternAnalysis {
/// The common prefix/suffix found across all children.
pub common: CommonDenominator,
/// What's left after stripping the common part (passed to parent).
pub base: String,
/// How each field modifies the accumulated name.
pub field_positions: HashMap<String, FieldNamePosition>,
}
/// Analyze a pattern level using child effective names.
///
/// This is the core algorithm that detects common prefix/suffix and
/// determines field positions for each child.
///
/// # Arguments
/// * `child_names` - Vec of (field_name, effective_name) pairs
/// where effective_name is either:
/// - For leaves: the leaf's vec name
/// - For branches: the base returned by analyzing that branch
pub fn analyze_pattern_level(child_names: &[(String, String)]) -> PatternAnalysis {
if child_names.is_empty() {
return PatternAnalysis {
common: CommonDenominator::None,
base: String::new(),
field_positions: HashMap::new(),
};
}
if child_names.len() == 1 {
let (field_name, effective) = &child_names[0];
let mut positions = HashMap::new();
// Try suffix match: effective ends with "_fieldname"
let suffix_pattern = format!("_{}", field_name);
if let Some(base) = effective.strip_suffix(&suffix_pattern) {
positions.insert(
field_name.clone(),
FieldNamePosition::Append(suffix_pattern),
);
return PatternAnalysis {
common: CommonDenominator::None,
base: base.to_string(),
field_positions: positions,
};
}
// Try prefix match: effective starts with "fieldname_"
let prefix_pattern = format!("{}_", field_name);
if let Some(base) = effective.strip_prefix(&prefix_pattern) {
positions.insert(
field_name.clone(),
FieldNamePosition::Prepend(prefix_pattern),
);
return PatternAnalysis {
common: CommonDenominator::None,
base: base.to_string(),
field_positions: positions,
};
}
// Field equals effective OR field doesn't appear → Identity
// Root-level instances where field == effective are handled by
// passing empty `acc` and conditional position expressions
positions.insert(field_name.clone(), FieldNamePosition::Identity);
return PatternAnalysis {
common: CommonDenominator::None,
base: effective.clone(),
field_positions: positions,
};
}
let effective_names: Vec<&str> = child_names.iter().map(|(_, n)| n.as_str()).collect();
// Try to find common prefix first
if let Some(prefix) = find_common_prefix(&effective_names)
&& !prefix.is_empty()
{
let base = prefix.trim_end_matches('_').to_string();
let mut positions = HashMap::new();
for (field_name, effective) in child_names {
// If effective equals the base (prefix without underscore), use Identity
if effective == &base {
positions.insert(field_name.clone(), FieldNamePosition::Identity);
} else if let Some(suffix) = effective.strip_prefix(&prefix) {
// Normal case: effective has the full prefix
let suffix_with_underscore = if suffix.starts_with('_') {
suffix.to_string()
} else {
format!("_{}", suffix)
};
positions.insert(
field_name.clone(),
FieldNamePosition::Append(suffix_with_underscore),
);
} else {
// Fallback: use Identity if strip_prefix fails unexpectedly
positions.insert(field_name.clone(), FieldNamePosition::Identity);
}
}
return PatternAnalysis {
common: CommonDenominator::Prefix(prefix),
base,
field_positions: positions,
};
}
// Try to find common suffix
if let Some(suffix) = find_common_suffix(&effective_names)
&& !suffix.is_empty()
{
let mut positions = HashMap::new();
for (field_name, effective) in child_names {
let prefix = effective
.strip_suffix(&suffix)
.unwrap_or(effective)
.to_string();
let prefix_with_underscore = if prefix.ends_with('_') {
prefix
} else {
format!("{}_", prefix)
};
positions.insert(
field_name.clone(),
FieldNamePosition::Prepend(prefix_with_underscore),
);
}
let base = suffix.trim_start_matches('_').to_string();
return PatternAnalysis {
common: CommonDenominator::Suffix(suffix),
base,
field_positions: positions,
};
}
// No common part - use Identity for all fields
let mut positions = HashMap::new();
for (field_name, _) in child_names {
positions.insert(field_name.clone(), FieldNamePosition::Identity);
}
// Check if all fields are "true Identity" (field_name == effective_name)
// In that case, the base should be empty since metrics are accessed directly by field name
let all_true_identity = child_names
.iter()
.all(|(field_name, effective)| field_name == effective);
let base = if all_true_identity {
String::new()
} else {
// Use the first name as base (they're all independent but have different names)
child_names
.first()
.map(|(_, n)| n.clone())
.unwrap_or_default()
};
PatternAnalysis {
common: CommonDenominator::None,
base,
field_positions: positions,
}
}
//! This module provides utilities to find common prefixes and suffixes
//! among metric names, which is used to detect pattern mode (suffix vs prefix).
/// Find the longest common prefix among all strings.
/// The prefix must end at an underscore boundary for semantic coherence.
fn find_common_prefix(names: &[&str]) -> Option<String> {
if names.is_empty() {
/// Returns the prefix WITH trailing underscore if found at word boundary.
/// Returns None if no common prefix exists.
pub fn find_common_prefix(names: &[&str]) -> Option<String> {
if names.is_empty() || names.iter().any(|n| n.is_empty()) {
return None;
}
let first = names[0];
if first.is_empty() {
return None;
}
// Find character-by-character common prefix
let mut prefix_len = 0;
@@ -213,48 +29,41 @@ fn find_common_prefix(names: &[&str]) -> Option<String> {
let raw_prefix = &first[..prefix_len];
// If raw_prefix exactly matches one of the names, it's a complete metric name.
// In this case, return it with trailing underscore to preserve the full name.
// Must end at underscore boundary for semantic coherence
if raw_prefix.ends_with('_') {
return Some(raw_prefix.to_string());
}
// If raw_prefix equals one of the full names (one name is a prefix of all others),
// return it with trailing underscore for proper base detection
if names.contains(&raw_prefix) {
return Some(format!("{}_", raw_prefix));
}
// Find the last underscore position to get a clean boundary
// Prefer ending at an underscore for semantic coherence
if let Some(last_underscore) = raw_prefix.rfind('_')
&& last_underscore > 0
{
// Find the last underscore position
if let Some(last_underscore) = raw_prefix.rfind('_') {
let clean_prefix = &first[..=last_underscore];
// Verify this still works for all names
if names.iter().all(|n| n.starts_with(clean_prefix)) {
return Some(clean_prefix.to_string());
}
}
// If no underscore boundary works, the full prefix must end at an underscore
if raw_prefix.ends_with('_') {
return Some(raw_prefix.to_string());
}
None
}
/// Find the longest common suffix among all strings.
/// The suffix must start at an underscore boundary for semantic coherence.
fn find_common_suffix(names: &[&str]) -> Option<String> {
if names.is_empty() {
/// Returns the suffix WITH leading underscore if found at word boundary.
/// Returns None if no common suffix exists.
pub fn find_common_suffix(names: &[&str]) -> Option<String> {
if names.is_empty() || names.iter().any(|n| n.is_empty()) {
return None;
}
let first = names[0];
if first.is_empty() {
return None;
}
let first_chars: Vec<char> = first.chars().collect();
// Find character-by-character common suffix (from the end)
let first_chars: Vec<char> = first.chars().collect();
let mut suffix_len = 0;
for i in 0..first_chars.len() {
let idx_from_end = first_chars.len() - 1 - i;
let ch = first_chars[idx_from_end];
@@ -280,22 +89,34 @@ fn find_common_suffix(names: &[&str]) -> Option<String> {
let raw_suffix = &first[first.len() - suffix_len..];
// Find the first underscore position to get a clean boundary
if let Some(first_underscore) = raw_suffix.find('_')
&& first_underscore < raw_suffix.len() - 1
{
// Must start at underscore boundary for semantic coherence
if raw_suffix.starts_with('_') {
return Some(raw_suffix.to_string());
}
// Check if preceded by underscore in all names (word boundary)
let at_word_boundary = names.iter().all(|n| {
if *n == raw_suffix {
true // Suffix is the whole string
} else if let Some(prefix) = n.strip_suffix(raw_suffix) {
prefix.ends_with('_')
} else {
false
}
});
if at_word_boundary {
return Some(format!("_{}", raw_suffix));
}
// Find the first underscore position in suffix
if let Some(first_underscore) = raw_suffix.find('_') {
let clean_suffix = &raw_suffix[first_underscore..];
// Verify this still works for all names
if names.iter().all(|n| n.ends_with(clean_suffix)) {
return Some(clean_suffix.to_string());
}
}
// If no underscore boundary works, the full suffix must start with underscore
if raw_suffix.starts_with('_') {
return Some(raw_suffix.to_string());
}
None
}
@@ -304,187 +125,59 @@ mod tests {
use super::*;
#[test]
fn test_common_prefix() {
fn test_common_prefix_basic() {
let names = vec!["addrs_0sats", "addrs_1sats", "addrs_2sats"];
assert_eq!(find_common_prefix(&names), Some("addrs_".to_string()));
}
#[test]
fn test_common_suffix() {
fn test_common_prefix_none() {
let names = vec!["foo", "bar", "baz"];
assert_eq!(find_common_prefix(&names), None);
}
#[test]
fn test_common_prefix_lth() {
let names = vec!["lth_cost_basis_max", "lth_cost_basis_min", "lth_cost_basis"];
assert_eq!(find_common_prefix(&names), Some("lth_cost_basis_".to_string()));
}
#[test]
fn test_common_suffix_basic() {
let names = vec!["cumulative_supply", "net_supply", "total_supply"];
assert_eq!(find_common_suffix(&names), Some("_supply".to_string()));
}
#[test]
fn test_no_common() {
fn test_common_prefix_cost_basis() {
// With suffix naming convention, cost_basis variants share a common prefix
let names = vec!["cost_basis_max", "cost_basis_min", "cost_basis"];
assert_eq!(find_common_prefix(&names), Some("cost_basis_".to_string()));
}
#[test]
fn test_common_suffix_none() {
let names = vec!["foo", "bar", "baz"];
assert_eq!(find_common_prefix(&names), None);
assert_eq!(find_common_suffix(&names), None);
}
#[test]
fn test_analyze_pattern_level_prefix() {
let children = vec![
("_0sats".to_string(), "addrs_0sats".to_string()),
("_1sats".to_string(), "addrs_1sats".to_string()),
fn test_common_prefix_one_is_prefix_of_other() {
// When one name is a prefix of another (block_count vs block_count_cumulative)
let names = vec!["block_count_cumulative", "block_count"];
assert_eq!(find_common_prefix(&names), Some("block_count_".to_string()));
}
#[test]
fn test_common_suffix_realized_loss() {
let names = vec![
"cumulative_realized_loss",
"net_realized_loss",
"realized_loss",
];
let analysis = analyze_pattern_level(&children);
assert!(matches!(analysis.common, CommonDenominator::Prefix(_)));
assert_eq!(analysis.base, "addrs");
assert!(matches!(
analysis.field_positions.get("_0sats"),
Some(FieldNamePosition::Append(_))
));
}
#[test]
fn test_analyze_pattern_level_suffix() {
let children = vec![
("cumulative".to_string(), "cumulative_supply".to_string()),
("net".to_string(), "net_supply".to_string()),
];
let analysis = analyze_pattern_level(&children);
assert!(matches!(analysis.common, CommonDenominator::Suffix(_)));
assert_eq!(analysis.base, "supply");
assert!(matches!(
analysis.field_positions.get("cumulative"),
Some(FieldNamePosition::Prepend(_))
));
}
#[test]
fn test_single_child_suffix() {
// Field "count" appears as suffix "_count" in "activity_count"
let children = vec![("count".to_string(), "activity_count".to_string())];
let analysis = analyze_pattern_level(&children);
assert!(matches!(analysis.common, CommonDenominator::None));
assert_eq!(analysis.base, "activity");
assert_eq!(
analysis.field_positions.get("count"),
Some(&FieldNamePosition::Append("_count".to_string()))
);
}
#[test]
fn test_single_child_prefix() {
// Field "cumulative" appears as prefix "cumulative_" in "cumulative_supply"
let children = vec![("cumulative".to_string(), "cumulative_supply".to_string())];
let analysis = analyze_pattern_level(&children);
assert!(matches!(analysis.common, CommonDenominator::None));
assert_eq!(analysis.base, "supply");
assert_eq!(
analysis.field_positions.get("cumulative"),
Some(&FieldNamePosition::Prepend("cumulative_".to_string()))
);
}
#[test]
fn test_single_child_identity_equal() {
// Field "supply" equals effective "supply" → Identity
// (root-level handling is done via empty acc and conditional expressions)
let children = vec![("supply".to_string(), "supply".to_string())];
let analysis = analyze_pattern_level(&children);
assert!(matches!(analysis.common, CommonDenominator::None));
assert_eq!(analysis.base, "supply");
assert_eq!(
analysis.field_positions.get("supply"),
Some(&FieldNamePosition::Identity)
);
}
#[test]
fn test_single_child_identity_structural() {
// Field "x" doesn't appear in "a_b" - it's structural grouping
let children = vec![("x".to_string(), "a_b".to_string())];
let analysis = analyze_pattern_level(&children);
assert!(matches!(analysis.common, CommonDenominator::None));
assert_eq!(analysis.base, "a_b"); // passes through unchanged
assert_eq!(
analysis.field_positions.get("x"),
Some(&FieldNamePosition::Identity)
);
}
#[test]
fn test_common_prefix_exact_match() {
// When one name exactly matches the common prefix, preserve the full name
// This fixes the realized_loss vs realized_count bug
let names = vec!["realized_loss", "realized_loss_cumulative"];
assert_eq!(
find_common_prefix(&names),
Some("realized_loss_".to_string())
);
}
#[test]
fn test_common_prefix_exact_match_multiple() {
// Multiple children with same base name
let names = vec!["realized_loss", "realized_loss", "realized_loss_cumulative"];
assert_eq!(
find_common_prefix(&names),
Some("realized_loss_".to_string())
);
}
#[test]
fn test_analyze_pattern_level_full_base() {
// When names are like [realized_loss, realized_loss_cumulative],
// base should be "realized_loss" not "realized"
let children = vec![
("sum".to_string(), "realized_loss".to_string()),
(
"cumulative".to_string(),
"realized_loss_cumulative".to_string(),
),
];
let analysis = analyze_pattern_level(&children);
assert!(matches!(analysis.common, CommonDenominator::Prefix(_)));
assert_eq!(analysis.base, "realized_loss");
// sum effective equals base, so position is Identity
assert_eq!(
analysis.field_positions.get("sum"),
Some(&FieldNamePosition::Identity)
);
// cumulative has suffix "_cumulative" after the base
assert_eq!(
analysis.field_positions.get("cumulative"),
Some(&FieldNamePosition::Append("_cumulative".to_string()))
);
}
#[test]
fn test_analyze_pattern_level_no_base_field() {
// When there's no base field (like block_weight which has no block_weight metric),
// only suffixed metrics like block_weight_average, block_weight_sum, etc.
// Base should still be "block_weight"
let children = vec![
("average".to_string(), "block_weight_average".to_string()),
("sum".to_string(), "block_weight_sum".to_string()),
(
"cumulative".to_string(),
"block_weight_cumulative".to_string(),
),
("max".to_string(), "block_weight_max".to_string()),
("min".to_string(), "block_weight_min".to_string()),
];
let analysis = analyze_pattern_level(&children);
assert!(matches!(analysis.common, CommonDenominator::Prefix(_)));
assert_eq!(analysis.base, "block_weight");
assert_eq!(
analysis.field_positions.get("average"),
Some(&FieldNamePosition::Append("_average".to_string()))
);
assert_eq!(
analysis.field_positions.get("sum"),
Some(&FieldNamePosition::Append("_sum".to_string()))
find_common_suffix(&names),
Some("_realized_loss".to_string())
);
}
}
+10 -6
View File
@@ -7,7 +7,7 @@ use std::collections::{BTreeSet, HashMap};
use brk_types::{TreeNode, extract_json_type};
use super::analyze_all_field_positions;
use super::analyze_pattern_modes;
use crate::{PatternField, StructuralPattern, to_pascal_case};
/// Context for pattern detection, holding all intermediate state.
@@ -39,6 +39,7 @@ impl PatternContext {
/// Detect structural patterns in the tree using a bottom-up approach.
///
/// Returns (patterns, concrete_to_pattern, concrete_to_type_param).
/// Each pattern has its `mode` set based on analysis of all instances.
pub fn detect_structural_patterns(
tree: &TreeNode,
) -> (
@@ -52,7 +53,9 @@ pub fn detect_structural_patterns(
let (generic_patterns, generic_mappings, type_mappings) =
detect_generic_patterns(&ctx.signature_to_pattern);
let mut patterns: Vec<StructuralPattern> = ctx.signature_to_pattern
// Only include patterns that appear 2+ times for the patterns list
let mut patterns: Vec<StructuralPattern> = ctx
.signature_to_pattern
.iter()
.filter(|(sig, _)| {
ctx.signature_counts.get(*sig).copied().unwrap_or(0) >= 2
@@ -76,7 +79,7 @@ pub fn detect_structural_patterns(
StructuralPattern {
name: name.clone(),
fields: fields_with_type_params,
field_positions: HashMap::new(),
mode: None, // Will be determined by analyze_pattern_modes
is_generic: false,
}
})
@@ -84,6 +87,7 @@ pub fn detect_structural_patterns(
patterns.extend(generic_patterns);
// Build pattern lookup for mode analysis (patterns appearing 2+ times)
let mut pattern_lookup: HashMap<Vec<PatternField>, String> = HashMap::new();
for (sig, name) in &ctx.signature_to_pattern {
if ctx.signature_counts.get(sig).copied().unwrap_or(0) >= 2 {
@@ -94,8 +98,8 @@ pub fn detect_structural_patterns(
let concrete_to_pattern = pattern_lookup.clone();
// Use the new bottom-up field position analysis
analyze_all_field_positions(tree, &mut patterns, &pattern_lookup);
// Analyze pattern modes (suffix vs prefix) from all instances
analyze_pattern_modes(tree, &mut patterns, &pattern_lookup);
patterns.sort_by(|a, b| b.fields.len().cmp(&a.fields.len()));
(patterns, concrete_to_pattern, type_mappings)
@@ -137,7 +141,7 @@ fn detect_generic_patterns(
patterns.push(StructuralPattern {
name: generic_name,
fields: normalized_fields,
field_positions: HashMap::new(),
mode: None, // Will be determined by analyze_pattern_modes
is_generic: true,
});
}
+381 -90
View File
@@ -1,149 +1,440 @@
//! Field position detection for pattern instances.
//! Pattern mode detection and field part extraction.
//!
//! This module bridges the name analysis with pattern field positions,
//! processing patterns bottom-up to determine how each field modifies
//! the accumulated metric name.
//! This module analyzes pattern instances to detect whether they use
//! suffix mode (fields append to acc) or prefix mode (fields prepend to acc),
//! and extracts the field parts (relatives or prefixes) for code generation.
use std::collections::HashMap;
use brk_types::TreeNode;
use super::{analyze_pattern_level, get_node_fields};
use crate::{FieldNamePosition, PatternField, StructuralPattern};
use super::{find_common_prefix, find_common_suffix, get_node_fields};
use crate::{PatternField, PatternMode, StructuralPattern};
/// Analyze field positions for all patterns using bottom-up tree traversal.
/// Result of analyzing a single pattern instance.
#[derive(Debug, Clone)]
struct InstanceAnalysis {
/// The base to return to parent (used for nesting)
base: String,
/// For suffix mode: field -> relative name
/// For prefix mode: field -> prefix
field_parts: HashMap<String, String>,
/// Whether this instance appears to be suffix mode
is_suffix_mode: bool,
}
/// Analyze all pattern instances and determine their modes.
///
/// This is the main entry point for field position detection. It processes
/// the tree bottom-up, analyzing each pattern instance and aggregating
/// the positions across all instances.
pub fn analyze_all_field_positions(
/// This is the main entry point for mode detection. It processes
/// the tree bottom-up, collecting analysis for each pattern instance,
/// then determines the consistent mode for each pattern.
pub fn analyze_pattern_modes(
tree: &TreeNode,
patterns: &mut [StructuralPattern],
pattern_lookup: &HashMap<Vec<PatternField>, String>,
) {
let mut all_positions: HashMap<String, HashMap<String, Vec<FieldNamePosition>>> =
HashMap::new();
// Collect analyses from all instances, keyed by pattern name
let mut all_analyses: HashMap<String, Vec<InstanceAnalysis>> = HashMap::new();
// Collect positions from all instances bottom-up
collect_positions_bottom_up(tree, pattern_lookup, &mut all_positions);
// Bottom-up traversal
collect_instance_analyses(tree, pattern_lookup, &mut all_analyses);
// Merge positions into patterns
// For each pattern, determine mode from collected instances
for pattern in patterns.iter_mut() {
if let Some(field_positions) = all_positions.get(&pattern.name) {
pattern.field_positions = merge_field_positions(field_positions);
if let Some(analyses) = all_analyses.get(&pattern.name) {
pattern.mode = determine_pattern_mode(analyses, &pattern.fields);
}
}
}
/// Recursively collect field positions bottom-up.
/// Returns the effective base for this node (used by parent level).
fn collect_positions_bottom_up(
/// Recursively collect instance analyses bottom-up.
/// Returns the "base" for this node (used by parent for its analysis).
fn collect_instance_analyses(
node: &TreeNode,
pattern_lookup: &HashMap<Vec<PatternField>, String>,
all_positions: &mut HashMap<String, HashMap<String, Vec<FieldNamePosition>>>,
all_analyses: &mut HashMap<String, Vec<InstanceAnalysis>>,
) -> Option<String> {
match node {
TreeNode::Leaf(leaf) => {
// Leaves return their vec name as the effective base
// Leaves return their metric name as the base
Some(leaf.name().to_string())
}
TreeNode::Branch(children) => {
// First, process all children recursively (bottom-up)
let mut child_bases: HashMap<String, String> = HashMap::new();
for (field_name, child_node) in children {
if let Some(base) = collect_positions_bottom_up(child_node, pattern_lookup, all_positions) {
if let Some(base) =
collect_instance_analyses(child_node, pattern_lookup, all_analyses)
{
child_bases.insert(field_name.clone(), base);
}
}
// Build child names for this level's analysis
let child_names: Vec<(String, String)> = children
.keys()
.filter_map(|field_name| {
child_bases
.get(field_name)
.map(|base| (field_name.clone(), base.clone()))
})
.collect();
if child_names.is_empty() {
if child_bases.is_empty() {
return None;
}
// Analyze this level
let analysis = analyze_pattern_level(&child_names);
// Analyze this instance
let analysis = analyze_instance(&child_bases);
// Get the pattern name for this node (if any)
let fields = get_node_fields(children, pattern_lookup);
if let Some(pattern_name) = pattern_lookup.get(&fields) {
// Record field positions for this pattern instance
for (field_name, position) in &analysis.field_positions {
all_positions
.entry(pattern_name.clone())
.or_default()
.entry(field_name.clone())
.or_default()
.push(position.clone());
}
all_analyses
.entry(pattern_name.clone())
.or_default()
.push(analysis.clone());
}
// Return our base for the parent level
// Return the base for parent
Some(analysis.base)
}
}
}
/// Check if a list of positions contains incompatible values.
///
/// Positions are incompatible if there are multiple different non-Identity positions,
/// meaning different pattern instances use different naming conventions.
fn has_incompatible_positions(positions: &[FieldNamePosition]) -> bool {
let non_identity: Vec<_> = positions
.iter()
.filter(|p| !matches!(p, FieldNamePosition::Identity))
.collect();
/// Analyze a single pattern instance from its child bases.
fn analyze_instance(child_bases: &HashMap<String, String>) -> InstanceAnalysis {
let bases: Vec<&str> = child_bases.values().map(|s| s.as_str()).collect();
if non_identity.len() <= 1 {
return false;
// Try suffix mode first: look for common prefix among children
if let Some(common_prefix) = find_common_prefix(&bases) {
let base = common_prefix.trim_end_matches('_').to_string();
let mut field_parts = HashMap::new();
for (field_name, child_base) in child_bases {
// Relative = child_base with common prefix stripped
// If child_base equals base, relative is empty (identity field)
let relative = if child_base == &base {
String::new()
} else {
child_base
.strip_prefix(&common_prefix)
.unwrap_or(child_base)
.to_string()
};
field_parts.insert(field_name.clone(), relative);
}
return InstanceAnalysis {
base,
field_parts,
is_suffix_mode: true,
};
}
// Check if all non-identity positions are the same
let first = &non_identity[0];
non_identity.iter().skip(1).any(|p| p != first)
// Try prefix mode: look for common suffix among children
if let Some(common_suffix) = find_common_suffix(&bases) {
let base = common_suffix.trim_start_matches('_').to_string();
let mut field_parts = HashMap::new();
for (field_name, child_base) in child_bases {
// Prefix = child_base with common suffix stripped
let prefix = child_base
.strip_suffix(&common_suffix)
.map(|s| {
// Ensure prefix ends with underscore if non-empty
if s.is_empty() {
String::new()
} else if s.ends_with('_') {
s.to_string()
} else {
format!("{}_", s)
}
})
.unwrap_or_default();
field_parts.insert(field_name.clone(), prefix);
}
return InstanceAnalysis {
base,
field_parts,
is_suffix_mode: false,
};
}
// No common prefix or suffix - use first child's base and treat as suffix mode
// with full metric names as relatives
let base = child_bases.values().next().cloned().unwrap_or_default();
let field_parts = child_bases
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
InstanceAnalysis {
base,
field_parts,
is_suffix_mode: true,
}
}
/// Merge multiple observed positions for each field into a single position.
///
/// Returns an empty map if any field has incompatible positions across instances,
/// which will cause `is_parameterizable()` to return false for the pattern.
fn merge_field_positions(
field_positions: &HashMap<String, Vec<FieldNamePosition>>,
) -> HashMap<String, FieldNamePosition> {
// First check for incompatible positions
for positions in field_positions.values() {
if has_incompatible_positions(positions) {
// Incompatible positions found - pattern cannot be parameterized
return HashMap::new();
/// Determine the consistent mode for a pattern from all its instances.
/// Uses majority voting: if most instances agree on mode and field_parts,
/// use those. Minority instances will be inlined at usage sites.
fn determine_pattern_mode(
analyses: &[InstanceAnalysis],
fields: &[PatternField],
) -> Option<PatternMode> {
if analyses.is_empty() {
return None;
}
// Group instances by (mode, field_parts) signature
let suffix_instances: Vec<_> = analyses.iter().filter(|a| a.is_suffix_mode).collect();
let prefix_instances: Vec<_> = analyses.iter().filter(|a| !a.is_suffix_mode).collect();
// Pick the majority mode group
let (majority_instances, is_suffix) = if suffix_instances.len() >= prefix_instances.len() {
(suffix_instances, true)
} else {
(prefix_instances, false)
};
if majority_instances.is_empty() {
return None;
}
// Find the most common field_parts within the majority group
// Convert to sorted Vec for comparison since HashMap isn't hashable
let mut parts_counts: HashMap<Vec<(String, String)>, usize> = HashMap::new();
for analysis in &majority_instances {
let mut sorted: Vec<_> = analysis.field_parts.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
sorted.sort();
*parts_counts.entry(sorted).or_insert(0) += 1;
}
let (best_parts_vec, _count) = parts_counts.into_iter().max_by_key(|(_, count)| *count)?;
let best_parts: HashMap<String, String> = best_parts_vec.into_iter().collect();
// Verify all required fields have parts
for field in fields {
if !best_parts.contains_key(&field.name) {
return None;
}
}
// All positions are compatible, proceed with merge
field_positions
.iter()
.filter_map(|(field_name, positions)| {
if positions.is_empty() {
return None;
}
let field_parts = best_parts;
// Prefer Append/Prepend over Identity, as Identity at root-level
// is handled by empty acc and conditional position expressions
let preferred = positions
.iter()
.find(|p| !matches!(p, FieldNamePosition::Identity))
.cloned()
.unwrap_or_else(|| positions[0].clone());
Some((field_name.clone(), preferred))
if is_suffix {
Some(PatternMode::Suffix {
relatives: field_parts,
})
.collect()
} else {
Some(PatternMode::Prefix {
prefixes: field_parts,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_analyze_instance_suffix_mode() {
let mut child_bases = HashMap::new();
child_bases.insert("max".to_string(), "lth_cost_basis_max".to_string());
child_bases.insert("min".to_string(), "lth_cost_basis_min".to_string());
child_bases.insert("percentiles".to_string(), "lth_cost_basis".to_string());
let analysis = analyze_instance(&child_bases);
assert!(analysis.is_suffix_mode);
assert_eq!(analysis.base, "lth_cost_basis");
assert_eq!(analysis.field_parts.get("max"), Some(&"max".to_string()));
assert_eq!(analysis.field_parts.get("min"), Some(&"min".to_string()));
assert_eq!(analysis.field_parts.get("percentiles"), Some(&"".to_string()));
}
#[test]
fn test_analyze_instance_prefix_mode() {
// Period-prefixed metrics like "1y_lump_sum_stack", "1m_lump_sum_stack"
// share a common suffix "_lump_sum_stack" with different period prefixes
let mut child_bases = HashMap::new();
child_bases.insert("_1y".to_string(), "1y_lump_sum_stack".to_string());
child_bases.insert("_1m".to_string(), "1m_lump_sum_stack".to_string());
child_bases.insert("_1w".to_string(), "1w_lump_sum_stack".to_string());
let analysis = analyze_instance(&child_bases);
assert!(!analysis.is_suffix_mode);
assert_eq!(analysis.base, "lump_sum_stack");
assert_eq!(analysis.field_parts.get("_1y"), Some(&"1y_".to_string()));
assert_eq!(analysis.field_parts.get("_1m"), Some(&"1m_".to_string()));
assert_eq!(analysis.field_parts.get("_1w"), Some(&"1w_".to_string()));
}
#[test]
fn test_analyze_instance_root_suffix() {
// At root level with suffix naming convention
let mut child_bases = HashMap::new();
child_bases.insert("max".to_string(), "cost_basis_max".to_string());
child_bases.insert("min".to_string(), "cost_basis_min".to_string());
child_bases.insert("percentiles".to_string(), "cost_basis".to_string());
let analysis = analyze_instance(&child_bases);
// With suffix naming, common prefix is "cost_basis_" (since cost_basis is one of the names)
assert!(analysis.is_suffix_mode);
assert_eq!(analysis.base, "cost_basis");
assert_eq!(analysis.field_parts.get("max"), Some(&"max".to_string()));
assert_eq!(analysis.field_parts.get("min"), Some(&"min".to_string()));
assert_eq!(analysis.field_parts.get("percentiles"), Some(&"".to_string()));
}
#[test]
fn test_determine_pattern_mode_majority_voting() {
// Test that majority voting works when instances have mixed modes.
// This simulates CostBasisPattern2: most instances use suffix mode,
// but root-level uses prefix mode (max_cost_basis, min_cost_basis, cost_basis).
use std::collections::BTreeSet;
let fields = vec![
PatternField {
name: "max".to_string(),
rust_type: "TestType".to_string(),
json_type: "number".to_string(),
indexes: BTreeSet::new(),
type_param: None,
},
PatternField {
name: "min".to_string(),
rust_type: "TestType".to_string(),
json_type: "number".to_string(),
indexes: BTreeSet::new(),
type_param: None,
},
PatternField {
name: "percentiles".to_string(),
rust_type: "TestType".to_string(),
json_type: "number".to_string(),
indexes: BTreeSet::new(),
type_param: None,
},
];
// 3 suffix mode instances (majority)
let suffix1 = InstanceAnalysis {
base: "lth_cost_basis".to_string(),
field_parts: [
("max".to_string(), "max".to_string()),
("min".to_string(), "min".to_string()),
("percentiles".to_string(), "".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: true,
};
let suffix2 = InstanceAnalysis {
base: "sth_cost_basis".to_string(),
field_parts: [
("max".to_string(), "max".to_string()),
("min".to_string(), "min".to_string()),
("percentiles".to_string(), "".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: true,
};
let suffix3 = InstanceAnalysis {
base: "utxo_cost_basis".to_string(),
field_parts: [
("max".to_string(), "max".to_string()),
("min".to_string(), "min".to_string()),
("percentiles".to_string(), "".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: true,
};
// 1 prefix mode instance (minority - root level)
let prefix1 = InstanceAnalysis {
base: "cost_basis".to_string(),
field_parts: [
("max".to_string(), "max_".to_string()),
("min".to_string(), "min_".to_string()),
("percentiles".to_string(), "".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: false,
};
let analyses = vec![suffix1, suffix2, suffix3, prefix1];
let mode = determine_pattern_mode(&analyses, &fields);
// Should pick suffix mode (majority) with the common field_parts
assert!(mode.is_some());
match mode.unwrap() {
PatternMode::Suffix { relatives } => {
assert_eq!(relatives.get("max"), Some(&"max".to_string()));
assert_eq!(relatives.get("min"), Some(&"min".to_string()));
assert_eq!(relatives.get("percentiles"), Some(&"".to_string()));
}
PatternMode::Prefix { .. } => {
panic!("Expected suffix mode, got prefix mode");
}
}
}
#[test]
fn test_determine_pattern_mode_all_same() {
// Test when all instances agree on mode and field_parts
use std::collections::BTreeSet;
let fields = vec![
PatternField {
name: "max".to_string(),
rust_type: "TestType".to_string(),
json_type: "number".to_string(),
indexes: BTreeSet::new(),
type_param: None,
},
PatternField {
name: "min".to_string(),
rust_type: "TestType".to_string(),
json_type: "number".to_string(),
indexes: BTreeSet::new(),
type_param: None,
},
];
let instance1 = InstanceAnalysis {
base: "metric_a".to_string(),
field_parts: [
("max".to_string(), "max".to_string()),
("min".to_string(), "min".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: true,
};
let instance2 = InstanceAnalysis {
base: "metric_b".to_string(),
field_parts: [
("max".to_string(), "max".to_string()),
("min".to_string(), "min".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: true,
};
let analyses = vec![instance1, instance2];
let mode = determine_pattern_mode(&analyses, &fields);
assert!(mode.is_some());
match mode.unwrap() {
PatternMode::Suffix { relatives } => {
assert_eq!(relatives.get("max"), Some(&"max".to_string()));
assert_eq!(relatives.get("min"), Some(&"min".to_string()));
}
PatternMode::Prefix { .. } => {
panic!("Expected suffix mode");
}
}
}
}
+80 -17
View File
@@ -7,7 +7,9 @@ use std::collections::{BTreeMap, BTreeSet, HashMap};
use brk_types::{Index, TreeNode, extract_json_type};
use crate::{IndexSetPattern, PatternField, analysis::names::{analyze_pattern_level, CommonDenominator}, child_type_name};
use crate::{IndexSetPattern, PatternField, child_type_name};
use super::{find_common_prefix, find_common_suffix};
/// Get the first leaf name from a tree node.
pub fn get_first_leaf_name(node: &TreeNode) -> Option<String> {
@@ -147,8 +149,7 @@ impl PatternBaseResult {
/// Get the metric base for a pattern instance by analyzing direct children.
///
/// Uses field names and first leaf names from direct children to determine
/// the common base via `analyze_pattern_level`.
/// Uses the shortest leaf names from direct children to find common prefix/suffix.
///
/// If the initial analysis fails to find a common pattern, it tries excluding
/// each child one at a time to detect outliers (e.g., a mismatched "base" field
@@ -164,18 +165,12 @@ pub fn get_pattern_instance_base(node: &TreeNode) -> PatternBaseResult {
};
}
let analysis = analyze_pattern_level(&child_names);
// If we found a common pattern, use it
if !matches!(analysis.common, CommonDenominator::None) {
return PatternBaseResult {
base: analysis.base,
has_outlier: false,
};
// Try to find common base from leaf names
if let Some((base, has_outlier)) = try_find_base(&child_names, false) {
return PatternBaseResult { base, has_outlier };
}
// If no common pattern found, try excluding each child one at a time
// to detect if there's a single outlier breaking the pattern.
// If no common pattern found and we have enough children, try excluding outliers
if child_names.len() > 2 {
for i in 0..child_names.len() {
let filtered: Vec<_> = child_names
@@ -185,22 +180,43 @@ pub fn get_pattern_instance_base(node: &TreeNode) -> PatternBaseResult {
.map(|(_, v)| v.clone())
.collect();
let filtered_analysis = analyze_pattern_level(&filtered);
if !matches!(filtered_analysis.common, CommonDenominator::None) {
if let Some((base, _)) = try_find_base(&filtered, true) {
return PatternBaseResult {
base: filtered_analysis.base,
base,
has_outlier: true,
};
}
}
}
// Fallback: no common prefix/suffix found - this is a root-level pattern
// Return empty base so metric names are used directly
PatternBaseResult {
base: analysis.base,
base: String::new(),
has_outlier: false,
}
}
/// Try to find a common base from child names using prefix/suffix detection.
/// Returns Some((base, has_outlier)) if found.
fn try_find_base(child_names: &[(String, String)], is_outlier_attempt: bool) -> Option<(String, bool)> {
let leaf_names: Vec<&str> = child_names.iter().map(|(_, n)| n.as_str()).collect();
// Try common prefix first (suffix mode)
if let Some(prefix) = find_common_prefix(&leaf_names) {
let base = prefix.trim_end_matches('_').to_string();
return Some((base, is_outlier_attempt));
}
// Try common suffix (prefix mode)
if let Some(suffix) = find_common_suffix(&leaf_names) {
let base = suffix.trim_start_matches('_').to_string();
return Some((base, is_outlier_attempt));
}
None
}
/// Get (field_name, shortest_leaf_name) pairs for direct children of a branch node.
///
/// Uses the shortest leaf name from each child subtree to find the "base" case
@@ -371,4 +387,51 @@ mod tests {
assert_eq!(result.base, "block_weight");
assert!(result.has_outlier); // Pattern factory should NOT be used
}
#[test]
fn test_get_pattern_instance_base_root_level_no_common_pattern() {
// Simulates root-level pattern with metrics that have no common prefix/suffix.
// These names have no shared prefix or suffix, even when excluding any one.
// In this case, we should return empty base so metric names are used directly.
let tree = make_branch(vec![
("alpha", make_leaf("foo_metric")),
("beta", make_leaf("bar_value")),
("gamma", make_leaf("baz_count")),
]);
let result = get_pattern_instance_base(&tree);
// No common prefix or suffix - return empty base
assert_eq!(result.base, "");
assert!(!result.has_outlier);
}
#[test]
fn test_get_pattern_instance_base_two_children_no_pattern() {
// Two children with no common pattern - should still return empty base
let tree = make_branch(vec![
("foo", make_leaf("alpha")),
("bar", make_leaf("beta")),
]);
let result = get_pattern_instance_base(&tree);
assert_eq!(result.base, "");
assert!(!result.has_outlier);
}
#[test]
fn test_get_pattern_instance_base_with_outlier_excluded() {
// Simulates the realized pattern: adjusted_sopr, sopr, asopr.
// When "asopr" is excluded as outlier, "adjusted_sopr" and "sopr" share suffix "_sopr".
// The outlier detection should find base="sopr" with has_outlier=true.
let tree = make_branch(vec![
("adjustedSopr", make_leaf("adjusted_sopr")),
("sopr", make_leaf("sopr")),
("asopr", make_leaf("asopr")),
]);
let result = get_pattern_instance_base(&tree);
// Outlier detected - pattern base found by excluding "asopr"
assert_eq!(result.base, "sopr");
assert!(result.has_outlier); // Pattern factory should NOT be used (inline instead)
}
}
+21 -27
View File
@@ -1,6 +1,6 @@
//! JavaScript language syntax implementation.
use crate::{FieldNamePosition, GenericSyntax, LanguageSyntax, to_camel_case, to_pascal_case};
use crate::{GenericSyntax, LanguageSyntax, to_camel_case, to_pascal_case};
/// JavaScript-specific code generation syntax.
pub struct JavaScriptSyntax;
@@ -16,32 +16,26 @@ impl LanguageSyntax for JavaScriptSyntax {
format!("`${{{}}}{}`", var_name, suffix)
}
fn position_expr(&self, pos: &FieldNamePosition, base_var: &str) -> String {
// Convert base_var to camelCase for JavaScript
let var_name = to_camel_case(base_var);
match pos {
FieldNamePosition::Append(s) => {
// Use helper _m(acc, suffix) to build metric name
// e.g., _m(acc, "cap") produces: acc ? `${acc}_cap` : 'cap'
if let Some(suffix) = s.strip_prefix('_') {
format!("_m({}, '{}')", var_name, suffix)
} else {
format!("`${{{}}}{}`", var_name, s)
}
}
FieldNamePosition::Prepend(s) => {
// Handle empty acc case for prepend
if let Some(prefix) = s.strip_suffix('_') {
format!(
"({} ? `{}${{{}}}` : '{}')",
var_name, s, var_name, prefix
)
} else {
format!("`{}${{{}}}`", s, var_name)
}
}
FieldNamePosition::Identity => var_name,
FieldNamePosition::SetBase(s) => format!("'{}'", s),
fn suffix_expr(&self, acc_var: &str, relative: &str) -> String {
let var_name = to_camel_case(acc_var);
if relative.is_empty() {
// Identity: just return acc
var_name
} else {
// _m(acc, relative) -> acc ? `${acc}_relative` : 'relative'
format!("_m({}, '{}')", var_name, relative)
}
}
fn prefix_expr(&self, prefix: &str, acc_var: &str) -> String {
let var_name = to_camel_case(acc_var);
if prefix.is_empty() {
// Identity: just return acc
var_name
} else {
// _p(prefix, acc) -> acc ? `${prefix}${acc}` : 'prefix_without_underscore'
let prefix_base = prefix.trim_end_matches('_');
format!("_p('{}', {})", prefix_base, var_name)
}
}
+19 -25
View File
@@ -1,6 +1,6 @@
//! Python language syntax implementation.
use crate::{FieldNamePosition, GenericSyntax, LanguageSyntax, escape_python_keyword, to_snake_case};
use crate::{GenericSyntax, LanguageSyntax, escape_python_keyword, to_snake_case};
/// Python-specific code generation syntax.
pub struct PythonSyntax;
@@ -14,30 +14,24 @@ impl LanguageSyntax for PythonSyntax {
format!("f'{{{}}}{}'", base_var, suffix)
}
fn position_expr(&self, pos: &FieldNamePosition, base_var: &str) -> String {
match pos {
FieldNamePosition::Append(s) => {
// Use helper _m(acc, suffix) to build metric name
if let Some(suffix) = s.strip_prefix('_') {
format!("_m({}, '{}')", base_var, suffix)
} else {
format!("f'{{{}}}{}'", base_var, s)
}
}
FieldNamePosition::Prepend(s) => {
// Handle empty acc case for prepend
// Want to produce: (f'prefix_{acc}' if acc else 'prefix')
if let Some(prefix) = s.strip_suffix('_') {
format!(
"(f'{}{{{}}}' if {} else '{}')",
s, base_var, base_var, prefix
)
} else {
format!("f'{}{{{}}}'" , s, base_var)
}
}
FieldNamePosition::Identity => base_var.to_string(),
FieldNamePosition::SetBase(s) => format!("'{}'", s),
fn suffix_expr(&self, acc_var: &str, relative: &str) -> String {
if relative.is_empty() {
// Identity: just return acc
acc_var.to_string()
} else {
// _m(acc, relative) -> f'{acc}_{relative}' if acc else 'relative'
format!("_m({}, '{}')", acc_var, relative)
}
}
fn prefix_expr(&self, prefix: &str, acc_var: &str) -> String {
if prefix.is_empty() {
// Identity: just return acc
acc_var.to_string()
} else {
// _p(prefix, acc) -> f'{prefix}{acc}' if acc else 'prefix_base'
let prefix_base = prefix.trim_end_matches('_');
format!("_p('{}', {})", prefix_base, acc_var)
}
}
+19 -25
View File
@@ -1,6 +1,6 @@
//! Rust language syntax implementation.
use crate::{FieldNamePosition, GenericSyntax, LanguageSyntax, to_snake_case};
use crate::{GenericSyntax, LanguageSyntax, to_snake_case};
/// Rust-specific code generation syntax.
pub struct RustSyntax;
@@ -14,30 +14,24 @@ impl LanguageSyntax for RustSyntax {
format!("format!(\"{{{}}}{}\")", base_var, suffix)
}
fn position_expr(&self, pos: &FieldNamePosition, _base_var: &str) -> String {
match pos {
FieldNamePosition::Append(s) => {
// Use helper _m(&acc, suffix) to build metric name
if let Some(suffix) = s.strip_prefix('_') {
format!("_m(&acc, \"{}\")", suffix)
} else {
format!("format!(\"{{acc}}{}\")", s)
}
}
FieldNamePosition::Prepend(s) => {
// Handle empty acc case for prepend
if let Some(prefix) = s.strip_suffix('_') {
format!(
"if acc.is_empty() {{ \"{prefix}\".to_string() }} else {{ format!(\"{s}{{acc}}\") }}",
prefix = prefix,
s = s
)
} else {
format!("format!(\"{}{{acc}}\")", s)
}
}
FieldNamePosition::Identity => "acc.clone()".to_string(),
FieldNamePosition::SetBase(base) => format!("\"{}\".to_string()", base),
fn suffix_expr(&self, acc_var: &str, relative: &str) -> String {
if relative.is_empty() {
// Identity: just return acc
format!("{}.clone()", acc_var)
} else {
// _m(&acc, relative) -> if acc.is_empty() { relative } else { format!("{acc}_{relative}") }
format!("_m(&{}, \"{}\")", acc_var, relative)
}
}
fn prefix_expr(&self, prefix: &str, acc_var: &str) -> String {
if prefix.is_empty() {
// Identity: just return acc
format!("{}.clone()", acc_var)
} else {
// _p(prefix, &acc) -> if acc.is_empty() { prefix_base } else { format!("{prefix}{acc}") }
let prefix_base = prefix.trim_end_matches('_');
format!("_p(\"{}\", &{})", prefix_base, acc_var)
}
}
+45 -35
View File
@@ -20,6 +20,46 @@ fn path_suffix(name: &str) -> String {
}
}
/// Compute path expression from pattern mode and field part.
fn compute_path_expr<S: LanguageSyntax>(
syntax: &S,
pattern: &StructuralPattern,
field: &PatternField,
base_var: &str,
) -> String {
match pattern.get_field_part(&field.name) {
Some(part) => {
if pattern.is_suffix_mode() {
syntax.suffix_expr(base_var, part)
} else {
syntax.prefix_expr(part, base_var)
}
}
None => syntax.path_expr(base_var, &path_suffix(&field.name)),
}
}
/// Compute field value from path expression.
fn compute_field_value<S: LanguageSyntax>(
syntax: &S,
field: &PatternField,
metadata: &ClientMetadata,
path_expr: &str,
) -> String {
if metadata.is_pattern_type(&field.rust_type) {
syntax.constructor(&field.rust_type, path_expr)
} else if let Some(accessor) = metadata.find_index_set_pattern(&field.indexes) {
syntax.constructor(&accessor.name, path_expr)
} else if field.is_branch() {
syntax.constructor(&field.rust_type, path_expr)
} else {
panic!(
"Field '{}' has no matching pattern or index accessor. All metrics must be indexed.",
field.name
)
}
}
/// Generate a parameterized field using the language syntax.
///
/// This is used for pattern instances where fields use an accumulated
@@ -34,26 +74,8 @@ pub fn generate_parameterized_field<S: LanguageSyntax>(
) {
let field_name = syntax.field_name(&field.name);
let type_ann = metadata.field_type_annotation(field, pattern.is_generic, None, syntax.generic_syntax());
// Compute path expression from field position
let path_expr = pattern
.get_field_position(&field.name)
.map(|pos| syntax.position_expr(pos, "acc"))
.unwrap_or_else(|| syntax.path_expr("acc", &path_suffix(&field.name)));
let value = if metadata.is_pattern_type(&field.rust_type) {
syntax.constructor(&field.rust_type, &path_expr)
} else if let Some(accessor) = metadata.find_index_set_pattern(&field.indexes) {
syntax.constructor(&accessor.name, &path_expr)
} else if field.is_branch() {
// Non-pattern branch - instantiate the nested struct
syntax.constructor(&field.rust_type, &path_expr)
} else {
panic!(
"Field '{}' has no matching pattern or index accessor. All metrics must be indexed.",
field.name
)
};
let path_expr = compute_path_expr(syntax, pattern, field, "acc");
let value = compute_field_value(syntax, field, metadata, &path_expr);
writeln!(output, "{}", syntax.field_init(indent, &field_name, &type_ann, &value)).unwrap();
}
@@ -66,26 +88,14 @@ pub fn generate_tree_path_field<S: LanguageSyntax>(
output: &mut String,
syntax: &S,
field: &PatternField,
pattern: &StructuralPattern,
metadata: &ClientMetadata,
indent: &str,
) {
let field_name = syntax.field_name(&field.name);
let type_ann = metadata.field_type_annotation(field, false, None, syntax.generic_syntax());
let path_expr = syntax.path_expr("base_path", &path_suffix(&field.name));
let value = if metadata.is_pattern_type(&field.rust_type) {
syntax.constructor(&field.rust_type, &path_expr)
} else if let Some(accessor) = metadata.find_index_set_pattern(&field.indexes) {
syntax.constructor(&accessor.name, &path_expr)
} else if field.is_branch() {
// Non-pattern branch - instantiate the nested struct
syntax.constructor(&field.rust_type, &path_expr)
} else {
panic!(
"Field '{}' has no matching pattern or index accessor. All metrics must be indexed.",
field.name
)
};
let path_expr = compute_path_expr(syntax, pattern, field, "base_path");
let value = compute_field_value(syntax, field, metadata, &path_expr);
writeln!(output, "{}", syntax.field_init(indent, &field_name, &type_ann, &value)).unwrap();
}
+15 -3
View File
@@ -23,10 +23,12 @@ pub struct ChildContext<'a> {
pub base_result: PatternBaseResult,
/// Whether this is a leaf node.
pub is_leaf: bool,
/// Whether to use an inline type instead of a pattern factory (only meaningful for branches).
/// Whether to use an inline type instead of a pattern type (only meaningful for branches).
pub should_inline: bool,
/// The type name to use for inline branches.
pub inline_type_name: String,
/// Whether the pattern is parameterizable (has ::new() constructor).
pub is_parameterizable: bool,
}
/// Context for generating a tree node, returned by `prepare_tree_node`.
@@ -78,11 +80,20 @@ pub fn prepare_tree_node<'a>(
.map(|((child_name, child_node), (field, child_fields))| {
let is_leaf = matches!(child_node, TreeNode::Leaf(_));
let base_result = get_pattern_instance_base(child_node);
// For type annotations: use pattern type if ANY pattern matches
let matches_any_pattern = child_fields
.as_ref()
.is_some_and(|cf| metadata.matches_pattern(cf));
// For constructors: only use ::new() if parameterizable
let is_parameterizable = child_fields
.as_ref()
.is_some_and(|cf| metadata.is_parameterizable_fields(cf));
// should_inline is only meaningful for branches
let should_inline = !is_leaf && base_result.should_inline(is_parameterizable);
// should_inline determines if we generate an inline struct type
// We inline only if it's a branch AND doesn't match any pattern
let should_inline = !is_leaf && !matches_any_pattern;
// Inline type name (only used when should_inline is true)
let inline_type_name = if should_inline {
@@ -100,6 +111,7 @@ pub fn prepare_tree_node<'a>(
is_leaf,
should_inline,
inline_type_name,
is_parameterizable,
}
})
.collect();
@@ -12,7 +12,7 @@ use serde_json::Value;
use crate::{
ClientMetadata, GenericSyntax, IndexSetPattern, JavaScriptSyntax, StructuralPattern, VERSION,
generate_parameterized_field, generate_tree_path_field, to_camel_case,
generate_parameterized_field, to_camel_case,
};
/// Generate the base BrkClient class with HTTP functionality.
@@ -186,7 +186,7 @@ function _endpoint(client, name, index) {{
get(index) {{ return singleItemBuilder(index); }},
slice(start, end) {{ return rangeBuilder(start, end); }},
first(n) {{ return rangeBuilder(undefined, n); }},
last(n) {{ return rangeBuilder(-n, undefined); }},
last(n) {{ return n === 0 ? rangeBuilder(undefined, 0) : rangeBuilder(-n, undefined); }},
skip(n) {{ return skippedBuilder(n); }},
fetch(onUpdate) {{ return client.getJson(buildPath(), onUpdate); }},
fetchCsv() {{ return client.getText(buildPath(undefined, undefined, 'csv')); }},
@@ -220,7 +220,7 @@ class BrkClientBase {{
const base = this.baseUrl.endsWith('/') ? this.baseUrl.slice(0, -1) : this.baseUrl;
const url = `${{base}}${{path}}`;
const res = await fetch(url, {{ signal: AbortSignal.timeout(this.timeout) }});
if (!res.ok) throw new BrkError(`HTTP ${{res.status}}`, res.status);
if (!res.ok) throw new BrkError(`HTTP ${{res.status}}: ${{url}}`, res.status);
return res;
}}
@@ -271,12 +271,20 @@ class BrkClientBase {{
}}
/**
* Build metric name with optional prefix.
* Build metric name with suffix.
* @param {{string}} acc - Accumulated prefix
* @param {{string}} s - Metric suffix
* @returns {{string}}
*/
const _m = (acc, s) => acc ? `${{acc}}_${{s}}` : s;
const _m = (acc, s) => s ? (acc ? `${{acc}}_${{s}}` : s) : acc;
/**
* Build metric name with prefix.
* @param {{string}} prefix - Prefix to prepend
* @param {{string}} acc - Accumulated name
* @returns {{string}}
*/
const _p = (prefix, acc) => acc ? `${{prefix}}_${{acc}}` : prefix;
"#
)
@@ -470,8 +478,7 @@ pub fn generate_structural_patterns(
writeln!(output, "// Reusable structural pattern factories\n").unwrap();
for pattern in patterns {
let is_parameterizable = pattern.is_parameterizable();
// Generate typedef
writeln!(output, "/**").unwrap();
if pattern.is_generic {
writeln!(output, " * @template T").unwrap();
@@ -494,17 +501,14 @@ pub fn generate_structural_patterns(
}
writeln!(output, " */\n").unwrap();
// Generate factory function for ALL patterns
writeln!(output, "/**").unwrap();
writeln!(output, " * Create a {} pattern node", pattern.name).unwrap();
if pattern.is_generic {
writeln!(output, " * @template T").unwrap();
}
writeln!(output, " * @param {{BrkClientBase}} client").unwrap();
if is_parameterizable {
writeln!(output, " * @param {{string}} acc - Accumulated metric name").unwrap();
} else {
writeln!(output, " * @param {{string}} basePath").unwrap();
}
writeln!(output, " * @param {{string}} acc - Accumulated metric name").unwrap();
let return_type = if pattern.is_generic {
format!("{}<T>", pattern.name)
} else {
@@ -513,26 +517,12 @@ pub fn generate_structural_patterns(
writeln!(output, " * @returns {{{}}}", return_type).unwrap();
writeln!(output, " */").unwrap();
let param_name = if is_parameterizable {
"acc"
} else {
"basePath"
};
writeln!(
output,
"function create{}(client, {}) {{",
pattern.name, param_name
)
.unwrap();
writeln!(output, "function create{}(client, acc) {{", pattern.name).unwrap();
writeln!(output, " return {{").unwrap();
let syntax = JavaScriptSyntax;
for field in &pattern.fields {
if is_parameterizable {
generate_parameterized_field(output, &syntax, field, pattern, metadata, " ");
} else {
generate_tree_path_field(output, &syntax, field, metadata, " ");
}
generate_parameterized_field(output, &syntax, field, pattern, metadata, " ");
}
writeln!(output, " }};").unwrap();
@@ -3,9 +3,9 @@
//! This module generates a JavaScript + JSDoc client for the BRK API.
mod api;
mod client;
mod tree;
mod types;
pub mod client;
pub mod tree;
pub mod types;
use std::{fmt::Write, fs, io, path::Path};
@@ -175,10 +175,8 @@ fn generate_tree_initializer(
TreeNode::Branch(grandchildren) => {
let field_name = to_camel_case(child_name);
let child_fields = get_node_fields(grandchildren, pattern_lookup);
// Only use pattern factory if pattern is parameterizable
let pattern_name = pattern_lookup
.get(&child_fields)
.filter(|name| metadata.is_parameterizable(name));
// Use pattern factory if ANY pattern matches (not just parameterizable)
let pattern_name = pattern_lookup.get(&child_fields);
let base_result = get_pattern_instance_base(child_node);
@@ -11,7 +11,7 @@ use serde::Serialize;
use crate::{
ClientMetadata, IndexSetPattern, PythonSyntax, StructuralPattern, VERSION,
generate_parameterized_field, generate_tree_path_field, index_to_field_name,
generate_parameterized_field, index_to_field_name,
};
/// Generate class-level constants for the BrkClient class.
@@ -132,9 +132,15 @@ class BrkClientBase:
def _m(acc: str, s: str) -> str:
"""Build metric name with optional prefix."""
"""Build metric name with suffix."""
if not s: return acc
return f"{{acc}}_{{s}}" if acc else s
def _p(prefix: str, acc: str) -> str:
"""Build metric name with prefix."""
return f"{{prefix}}_{{acc}}" if acc else prefix
"#
)
.unwrap();
@@ -309,9 +315,10 @@ class MetricEndpointBuilder(Generic[T]):
def tail(self, n: int = 10) -> RangeBuilder[T]:
"""Get the last n items (pandas-style)."""
start, end = (None, 0) if n == 0 else (-n, None)
return RangeBuilder(_EndpointConfig(
self._config.client, self._config.name, self._config.index,
-n, None
start, end
))
def skip(self, n: int) -> SkippedBuilder[T]:
@@ -467,9 +474,7 @@ pub fn generate_structural_patterns(
writeln!(output, "# Reusable structural pattern classes\n").unwrap();
for pattern in patterns {
let is_parameterizable = pattern.is_parameterizable();
// For generic patterns, inherit from Generic[T]
// Generate class
if pattern.is_generic {
writeln!(output, "class {}(Generic[T]):", pattern.name).unwrap();
} else {
@@ -481,33 +486,20 @@ pub fn generate_structural_patterns(
)
.unwrap();
writeln!(output, " ").unwrap();
if is_parameterizable {
writeln!(
output,
" def __init__(self, client: BrkClientBase, acc: str):"
)
.unwrap();
writeln!(
output,
" \"\"\"Create pattern node with accumulated metric name.\"\"\""
)
.unwrap();
} else {
writeln!(
output,
" def __init__(self, client: BrkClientBase, base_path: str):"
)
.unwrap();
}
writeln!(
output,
" def __init__(self, client: BrkClientBase, acc: str):"
)
.unwrap();
writeln!(
output,
" \"\"\"Create pattern node with accumulated metric name.\"\"\""
)
.unwrap();
let syntax = PythonSyntax;
for field in &pattern.fields {
if is_parameterizable {
generate_parameterized_field(output, &syntax, field, pattern, metadata, " ");
} else {
generate_tree_path_field(output, &syntax, field, metadata, " ");
}
generate_parameterized_field(output, &syntax, field, pattern, metadata, " ");
}
writeln!(output).unwrap();
@@ -2,10 +2,10 @@
//!
//! This module generates a Python client with type hints for the BRK API.
mod api;
mod client;
mod tree;
mod types;
pub mod api;
pub mod client;
pub mod tree;
pub mod types;
use std::{fmt::Write, fs, io, path::Path};
@@ -4,7 +4,7 @@ use std::fmt::Write;
use crate::{
ClientMetadata, GenericSyntax, IndexSetPattern, RustSyntax, StructuralPattern,
generate_parameterized_field, generate_tree_path_field, index_to_field_name, to_snake_case,
generate_parameterized_field, index_to_field_name, to_snake_case,
};
/// Generate import statements.
@@ -116,10 +116,18 @@ impl BrkClientBase {{
}}
}}
/// Build metric name with optional prefix.
/// Build metric name with suffix.
#[inline]
fn _m(acc: &str, s: &str) -> String {{
if acc.is_empty() {{ s.to_string() }} else {{ format!("{{acc}}_{{s}}") }}
if s.is_empty() {{ acc.to_string() }}
else if acc.is_empty() {{ s.to_string() }}
else {{ format!("{{acc}}_{{s}}") }}
}}
/// Build metric name with prefix.
#[inline]
fn _p(prefix: &str, acc: &str) -> String {{
if acc.is_empty() {{ prefix.to_string() }} else {{ format!("{{prefix}}_{{acc}}") }}
}}
"#
@@ -265,7 +273,11 @@ impl<T: DeserializeOwned> MetricEndpointBuilder<T> {{
/// Take the last n items.
pub fn last(mut self, n: usize) -> RangeBuilder<T> {{
self.config.start = Some(-(n as i64));
if n == 0 {{
self.config.end = Some(0);
}} else {{
self.config.start = Some(-(n as i64));
}}
RangeBuilder {{ config: self.config, _marker: std::marker::PhantomData }}
}}
@@ -399,7 +411,6 @@ pub fn generate_index_accessors(output: &mut String, patterns: &[IndexSetPattern
)
.unwrap();
writeln!(output, "pub struct {}<T> {{", pattern.name).unwrap();
writeln!(output, " client: Arc<BrkClientBase>,").unwrap();
writeln!(output, " name: Arc<str>,").unwrap();
writeln!(output, " pub by: {}<T>,", by_name).unwrap();
writeln!(output, "}}\n").unwrap();
@@ -413,13 +424,8 @@ pub fn generate_index_accessors(output: &mut String, patterns: &[IndexSetPattern
.unwrap();
writeln!(output, " let name: Arc<str> = name.into();").unwrap();
writeln!(output, " Self {{").unwrap();
writeln!(output, " client: client.clone(),").unwrap();
writeln!(output, " name: name.clone(),").unwrap();
writeln!(output, " by: {} {{", by_name).unwrap();
writeln!(output, " client,").unwrap();
writeln!(output, " name,").unwrap();
writeln!(output, " _marker: std::marker::PhantomData,").unwrap();
writeln!(output, " }}").unwrap();
writeln!(output, " by: {} {{ client, name, _marker: std::marker::PhantomData }}", by_name).unwrap();
writeln!(output, " }}").unwrap();
writeln!(output, " }}").unwrap();
writeln!(output).unwrap();
@@ -472,9 +478,9 @@ pub fn generate_pattern_structs(
writeln!(output, "// Reusable pattern structs\n").unwrap();
for pattern in patterns {
let is_parameterizable = pattern.is_parameterizable();
let generic_params = if pattern.is_generic { "<T>" } else { "" };
// Generate struct definition
writeln!(output, "/// Pattern struct for repeated tree structure.").unwrap();
writeln!(output, "pub struct {}{} {{", pattern.name, generic_params).unwrap();
@@ -487,7 +493,7 @@ pub fn generate_pattern_structs(
writeln!(output, "}}\n").unwrap();
// Generate impl block with constructor
// Generate impl block with constructor for ALL patterns
let impl_generic = if pattern.is_generic {
"<T: DeserializeOwned>"
} else {
@@ -500,33 +506,21 @@ pub fn generate_pattern_structs(
)
.unwrap();
if is_parameterizable {
writeln!(
output,
" /// Create a new pattern node with accumulated metric name."
)
.unwrap();
writeln!(
output,
" pub fn new(client: Arc<BrkClientBase>, acc: String) -> Self {{"
)
.unwrap();
} else {
writeln!(
output,
" pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {{"
)
.unwrap();
}
writeln!(
output,
" /// Create a new pattern node with accumulated metric name."
)
.unwrap();
writeln!(
output,
" pub fn new(client: Arc<BrkClientBase>, acc: String) -> Self {{"
)
.unwrap();
writeln!(output, " Self {{").unwrap();
let syntax = RustSyntax;
for field in &pattern.fields {
if is_parameterizable {
generate_parameterized_field(output, &syntax, field, pattern, metadata, " ");
} else {
generate_tree_path_field(output, &syntax, field, metadata, " ");
}
generate_parameterized_field(output, &syntax, field, pattern, metadata, " ");
}
writeln!(output, " }}").unwrap();
@@ -2,9 +2,9 @@
//!
//! This module generates a Rust client with full type safety for the BRK API.
mod api;
mod client;
mod tree;
pub mod api;
pub mod client;
pub mod tree;
mod types;
use std::{fmt::Write, fs, io, path::Path};
@@ -86,7 +86,7 @@ fn generate_tree_node(
);
}
} else if child.should_inline {
// Inline struct
// Inline struct type - only for nodes that don't match any pattern
let path_expr = syntax.path_expr("base_path", &format!("_{}", child.name));
writeln!(
output,
@@ -95,7 +95,9 @@ fn generate_tree_node(
)
.unwrap();
} else {
// Use pattern constructor
// Pattern type - use ::new() constructor
// All patterns have ::new(), parameterizable ones use detected mode,
// non-parameterizable ones use field name fallback
generate_tree_node_field(
output,
&syntax,
+1 -1
View File
@@ -58,7 +58,7 @@ mod types;
pub use analysis::*;
pub use backends::*;
pub use generate::*;
pub use generators::{generate_javascript_client, generate_python_client, generate_rust_client};
pub use generators::*;
pub use openapi::*;
pub use syntax::*;
pub use types::*;
+21 -5
View File
@@ -4,7 +4,7 @@
//! language-specific code generation patterns, allowing shared generation
//! logic to work across Python, JavaScript, and Rust backends.
use crate::{FieldNamePosition, GenericSyntax};
use crate::GenericSyntax;
/// Language-specific syntax for code generation.
///
@@ -30,11 +30,27 @@ pub trait LanguageSyntax {
/// - Rust: `format!("{acc}_suffix")`
fn path_expr(&self, base_var: &str, suffix: &str) -> String;
/// Format a `FieldNamePosition` as a path expression.
/// Format a suffix mode expression: `_m(acc, relative)`.
///
/// This handles the different name transformation patterns (append, prepend,
/// identity, set_base) in a language-specific way.
fn position_expr(&self, pos: &FieldNamePosition, base_var: &str) -> String;
/// Suffix mode appends the relative name to the accumulator.
/// - If relative is empty, returns just acc (identity)
/// - Otherwise: `{acc}_{relative}` or `{relative}` if acc is empty
///
/// # Arguments
/// * `acc_var` - The accumulator variable name (e.g., "acc")
/// * `relative` - The relative name to append (e.g., "max_cost_basis")
fn suffix_expr(&self, acc_var: &str, relative: &str) -> String;
/// Format a prefix mode expression: `_p(prefix, acc)`.
///
/// Prefix mode prepends the prefix to the accumulator.
/// - If prefix is empty, returns just acc (identity)
/// - Otherwise: `{prefix}{acc}` (prefix includes trailing underscore)
///
/// # Arguments
/// * `prefix` - The prefix to prepend (e.g., "cumulative_")
/// * `acc_var` - The accumulator variable name (e.g., "acc")
fn prefix_expr(&self, prefix: &str, acc_var: &str) -> String;
/// Generate a constructor call for patterns and accessors.
///
+35 -6
View File
@@ -28,7 +28,11 @@ pub struct ClientMetadata {
impl ClientMetadata {
/// Extract metadata from brk_query::Vecs.
pub fn from_vecs(vecs: &Vecs) -> Self {
let catalog = vecs.catalog().clone();
Self::from_catalog(vecs.catalog().clone())
}
/// Extract metadata from a catalog TreeNode directly.
pub fn from_catalog(catalog: brk_types::TreeNode) -> Self {
let (structural_patterns, concrete_to_pattern, concrete_to_type_param) =
analysis::detect_structural_patterns(&catalog);
let (used_indexes, index_set_patterns) = analysis::detect_index_patterns(&catalog);
@@ -65,9 +69,33 @@ impl ClientMetadata {
self.find_pattern(name).is_some_and(|p| p.is_generic)
}
/// Check if a pattern by name is parameterizable.
/// Check if a pattern by name is fully parameterizable.
/// A pattern is parameterizable if it has a mode AND all its branch fields
/// are also parameterizable (or not patterns at all).
pub fn is_parameterizable(&self, name: &str) -> bool {
self.find_pattern(name).is_some_and(|p| p.is_parameterizable())
self.find_pattern(name).is_some_and(|p| {
if !p.is_parameterizable() {
return false;
}
// Check all branch fields have parameterizable types (or are not patterns)
p.fields.iter().all(|f| {
if f.is_branch() {
self.structural_patterns
.iter()
.find(|pat| pat.name == f.rust_type)
.is_none_or(|pat| pat.is_parameterizable())
} else {
true
}
})
})
}
/// Check if child fields match ANY pattern (parameterizable or not).
/// Used for type annotations - we want to reuse pattern types for all patterns.
pub fn matches_pattern(&self, fields: &[PatternField]) -> bool {
self.concrete_to_pattern.contains_key(fields)
|| self.structural_patterns.iter().any(|p| p.fields == fields)
}
/// Check if child fields match a parameterizable pattern.
@@ -84,8 +112,8 @@ impl ClientMetadata {
.is_some_and(|name| self.is_parameterizable(name))
}
/// Resolve the type name for a tree field, considering parameterizability.
/// If the field matches a parameterizable pattern, returns type annotation.
/// Resolve the type name for a tree field.
/// If the field matches ANY pattern (parameterizable or not), returns pattern type.
/// Otherwise returns the inline type name (parent_child format).
pub fn resolve_tree_field_type(
&self,
@@ -96,7 +124,8 @@ impl ClientMetadata {
syntax: GenericSyntax,
) -> String {
match child_fields {
Some(cf) if self.is_parameterizable_fields(cf) => {
// Use pattern type for ANY matching pattern (parameterizable or not)
Some(cf) if self.matches_pattern(cf) => {
let generic_value_type = self.get_type_param(cf).map(String::as_str);
self.field_type_annotation(field, false, generic_value_type, syntax)
}
+23 -11
View File
@@ -1,14 +1,26 @@
//! Field name position types for metric name reconstruction.
//! Pattern mode and field parts for metric name reconstruction.
//!
//! Patterns are either suffix mode or prefix mode:
//! - Suffix mode: `_m(acc, relative)` → `acc_relative` or just `relative` if acc empty
//! - Prefix mode: `_p(prefix, acc)` → `prefix_acc` or just `acc` if prefix empty
/// How a field modifies the accumulated metric name.
use std::collections::HashMap;
/// How a pattern constructs metric names from the accumulator.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FieldNamePosition {
/// Field prepends a prefix: leaf.name() = prefix + accumulated
Prepend(String),
/// Field appends a suffix: leaf.name() = accumulated + suffix
Append(String),
/// Field IS the accumulated name (no modification)
Identity,
/// Field sets a new base name (used at pattern entry points)
SetBase(String),
pub enum PatternMode {
/// Fields append their relative name to acc.
/// Formula: `_m(acc, relative)` → `{acc}_{relative}` or `{relative}` if acc empty
/// Example: `_m("lth", "max_cost_basis")` → `"lth_max_cost_basis"`
Suffix {
/// Maps field name to its relative name (full metric name when acc = "")
relatives: HashMap<String, String>,
},
/// Fields prepend their prefix to acc.
/// Formula: `_p(prefix, acc)` → `{prefix}_{acc}` or `{acc}` if prefix empty
/// Example: `_p("cumulative", "lth_realized_loss")` → `"cumulative_lth_realized_loss"`
Prefix {
/// Maps field name to its prefix (empty string for identity)
prefixes: HashMap<String, String>,
},
}
+23 -13
View File
@@ -1,10 +1,10 @@
//! Structural pattern and field types.
use std::collections::{BTreeSet, HashMap};
use std::collections::BTreeSet;
use brk_types::Index;
use super::FieldNamePosition;
use super::PatternMode;
/// A pattern of indexes that appear together on multiple metrics.
#[derive(Debug, Clone)]
@@ -22,8 +22,8 @@ pub struct StructuralPattern {
pub name: String,
/// Ordered list of child fields
pub fields: Vec<PatternField>,
/// How each field modifies the accumulated name
pub field_positions: HashMap<String, FieldNamePosition>,
/// How fields construct metric names from acc (None = not parameterizable)
pub mode: Option<PatternMode>,
/// If true, all leaf fields use a type parameter T
pub is_generic: bool,
}
@@ -34,18 +34,28 @@ impl StructuralPattern {
self.fields.iter().any(|f| f.is_leaf())
}
/// Returns true if all leaf fields have consistent name transformations.
/// Returns true if this pattern can be parameterized with an accumulator.
pub fn is_parameterizable(&self) -> bool {
!self.field_positions.is_empty()
&& self
.fields
.iter()
.all(|f| f.is_branch() || self.field_positions.contains_key(&f.name))
self.mode.is_some()
}
/// Get the field position for a given field name.
pub fn get_field_position(&self, field_name: &str) -> Option<&FieldNamePosition> {
self.field_positions.get(field_name)
/// Get the field part (relative name or prefix) for a given field.
pub fn get_field_part(&self, field_name: &str) -> Option<&str> {
match &self.mode {
Some(PatternMode::Suffix { relatives }) => relatives.get(field_name).map(|s| s.as_str()),
Some(PatternMode::Prefix { prefixes }) => prefixes.get(field_name).map(|s| s.as_str()),
None => None,
}
}
/// Returns true if this pattern is in suffix mode.
pub fn is_suffix_mode(&self) -> bool {
matches!(&self.mode, Some(PatternMode::Suffix { .. }))
}
/// Returns true if this pattern is in prefix mode.
pub fn is_prefix_mode(&self) -> bool {
matches!(&self.mode, Some(PatternMode::Prefix { .. }))
}
}
+822
View File
@@ -0,0 +1,822 @@
//! Tests that verify pattern analysis using the real catalog.
use std::collections::HashSet;
use std::fmt::Write;
use brk_bindgen::ClientMetadata;
use brk_types::TreeNode;
/// Load the catalog from the JSON file.
fn load_catalog() -> TreeNode {
let path = concat!(env!("CARGO_MANIFEST_DIR"), "/catalog.json");
let catalog_json = std::fs::read_to_string(path).expect("Failed to read catalog.json");
serde_json::from_str(&catalog_json).expect("Failed to parse catalog.json")
}
/// Load OpenAPI spec from api.json.
fn load_openapi_json() -> String {
let path = concat!(env!("CARGO_MANIFEST_DIR"), "/api.json");
std::fs::read_to_string(path).expect("Failed to read api.json")
}
/// Load metadata from the catalog.
#[allow(unused)]
fn load_metadata() -> ClientMetadata {
ClientMetadata::from_catalog(load_catalog())
}
/// Collect all leaf metric names from a tree.
fn collect_leaf_names(node: &TreeNode, names: &mut HashSet<String>) {
match node {
TreeNode::Leaf(leaf) => {
names.insert(leaf.name().to_string());
}
TreeNode::Branch(children) => {
for child in children.values() {
collect_leaf_names(child, names);
}
}
}
}
#[test]
fn test_catalog_loads() {
let catalog = load_catalog();
// Should be a branch with top-level categories
let TreeNode::Branch(categories) = &catalog else {
panic!("Expected catalog to be a branch");
};
// Check some expected top-level categories exist
assert!(
categories.contains_key("addresses"),
"Missing addresses category"
);
assert!(categories.contains_key("blocks"), "Missing blocks category");
assert!(categories.contains_key("market"), "Missing market category");
assert!(categories.contains_key("supply"), "Missing supply category");
println!("Catalog has {} top-level categories", categories.len());
}
#[test]
fn test_all_leaves_have_names() {
let catalog = load_catalog();
let mut names = HashSet::new();
collect_leaf_names(&catalog, &mut names);
println!("Catalog has {} unique metric names", names.len());
assert!(!names.is_empty(), "Should have at least some metrics");
// All names should be non-empty
for name in &names {
assert!(!name.is_empty(), "Found empty metric name");
}
}
#[test]
fn test_pattern_detection() {
let catalog = load_catalog();
let (patterns, concrete_to_pattern, concrete_to_type_param) =
brk_bindgen::detect_structural_patterns(&catalog);
println!("Detected {} structural patterns", patterns.len());
println!(
"Concrete to pattern mappings: {}",
concrete_to_pattern.len()
);
println!("Type parameter mappings: {}", concrete_to_type_param.len());
// Print pattern details
for pattern in &patterns {
let mode_str = match &pattern.mode {
Some(brk_bindgen::PatternMode::Suffix { relatives }) => {
format!("Suffix({})", relatives.len())
}
Some(brk_bindgen::PatternMode::Prefix { prefixes }) => {
format!("Prefix({})", prefixes.len())
}
None => "None".to_string(),
};
println!(
" {} (fields: {}, generic: {}, mode: {})",
pattern.name,
pattern.fields.len(),
pattern.is_generic,
mode_str
);
}
// Should have detected some patterns
assert!(!patterns.is_empty(), "Should detect at least some patterns");
// Check that parameterizable patterns have valid modes
for pattern in &patterns {
if pattern.is_parameterizable() {
let mode = pattern.mode.as_ref().unwrap();
match mode {
brk_bindgen::PatternMode::Suffix { relatives } => {
assert_eq!(
relatives.len(),
pattern.fields.len(),
"Pattern {} should have relative for each field",
pattern.name
);
}
brk_bindgen::PatternMode::Prefix { prefixes } => {
assert_eq!(
prefixes.len(),
pattern.fields.len(),
"Pattern {} should have prefix for each field",
pattern.name
);
}
}
}
}
}
#[test]
fn test_cost_basis_pattern() {
let catalog = load_catalog();
let (patterns, _, _) = brk_bindgen::detect_structural_patterns(&catalog);
// Find CostBasisPattern2 and inspect it
let cost_basis = patterns
.iter()
.find(|p| p.name == "CostBasisPattern2")
.expect("CostBasisPattern2 should exist");
println!("CostBasisPattern2:");
println!(
" Fields: {:?}",
cost_basis
.fields
.iter()
.map(|f| &f.name)
.collect::<Vec<_>>()
);
println!(" Mode: {:?}", cost_basis.mode);
println!(" Is generic: {}", cost_basis.is_generic);
// With suffix naming convention (cost_basis_max, cost_basis_min, cost_basis):
//
// At root level: common prefix is "cost_basis_" -> suffix mode
// max -> "max"
// min -> "min"
// percentiles -> "" (identity)
//
// At lth_ level: common prefix is "lth_cost_basis_" -> suffix mode
// max -> "max"
// min -> "min"
// percentiles -> "" (identity)
//
// Both use suffix mode with same relatives, so pattern IS parameterizable!
assert!(
cost_basis.is_parameterizable(),
"CostBasisPattern2 should be parameterizable with consistent suffix mode"
);
}
#[test]
fn test_realized_pattern3_fields() {
let catalog = load_catalog();
let metadata = ClientMetadata::from_catalog(catalog);
let pattern = metadata
.find_pattern("RealizedPattern3")
.expect("RealizedPattern3 should exist");
println!("RealizedPattern3 fields:");
for field in &pattern.fields {
let is_branch = field.is_branch();
let is_pattern = metadata.find_pattern(&field.rust_type).is_some();
let is_param = metadata.is_parameterizable(&field.rust_type);
println!(
" {} -> {} (branch={}, pattern={}, param={})",
field.name, field.rust_type, is_branch, is_pattern, is_param
);
}
// Check if RealizedPattern3 is considered parameterizable
println!(
"\nRealizedPattern3 is_parameterizable (metadata): {}",
metadata.is_parameterizable("RealizedPattern3")
);
}
#[test]
fn test_parameterizable_patterns_have_mode() {
let catalog = load_catalog();
let (patterns, _, _) = brk_bindgen::detect_structural_patterns(&catalog);
// All patterns that appear 2+ times should either:
// 1. Be parameterizable (have a mode)
// 2. Or have inconsistent instances (mode = None)
//
// Patterns with mode = None should be inlined, not generate factories
let parameterizable: Vec<_> = patterns.iter().filter(|p| p.is_parameterizable()).collect();
let non_parameterizable: Vec<_> = patterns
.iter()
.filter(|p| !p.is_parameterizable())
.collect();
println!("\nParameterizable patterns ({}):", parameterizable.len());
for p in &parameterizable {
let mode = p.mode.as_ref().unwrap();
let mode_type = match mode {
brk_bindgen::PatternMode::Suffix { .. } => "Suffix",
brk_bindgen::PatternMode::Prefix { .. } => "Prefix",
};
println!(" {} ({} fields, {})", p.name, p.fields.len(), mode_type);
}
println!(
"\nNon-parameterizable patterns ({}):",
non_parameterizable.len()
);
for p in &non_parameterizable {
println!(" {} ({} fields)", p.name, p.fields.len());
}
// Verify all parameterizable patterns have valid modes with all fields
for pattern in &parameterizable {
let mode = pattern.mode.as_ref().unwrap();
let field_names: HashSet<_> = pattern.fields.iter().map(|f| f.name.clone()).collect();
match mode {
brk_bindgen::PatternMode::Suffix { relatives } => {
let mode_fields: HashSet<_> = relatives.keys().cloned().collect();
assert_eq!(
field_names, mode_fields,
"Pattern {} suffix mode should have all fields",
pattern.name
);
}
brk_bindgen::PatternMode::Prefix { prefixes } => {
let mode_fields: HashSet<_> = prefixes.keys().cloned().collect();
assert_eq!(
field_names, mode_fields,
"Pattern {} prefix mode should have all fields",
pattern.name
);
}
}
}
}
#[test]
fn test_index_patterns() {
let catalog = load_catalog();
let (used_indexes, index_patterns) = brk_bindgen::detect_index_patterns(&catalog);
println!("Used indexes: {:?}", used_indexes);
println!("Index set patterns: {}", index_patterns.len());
for pattern in &index_patterns {
println!(" {} -> {:?}", pattern.name, pattern.indexes);
}
// Should have detected some index patterns
assert!(!index_patterns.is_empty(), "Should detect index patterns");
}
#[test]
fn test_generated_rust_output() {
let catalog = load_catalog();
let metadata = ClientMetadata::from_catalog(catalog.clone());
// Collect all metric names from the catalog
let mut all_metrics = HashSet::new();
collect_leaf_names(&catalog, &mut all_metrics);
// Generate Rust client output
let mut rust_output = String::new();
brk_bindgen::rust::client::generate_imports(&mut rust_output);
brk_bindgen::rust::client::generate_base_client(&mut rust_output);
brk_bindgen::rust::client::generate_metric_pattern_trait(&mut rust_output);
brk_bindgen::rust::client::generate_endpoint(&mut rust_output);
brk_bindgen::rust::client::generate_index_accessors(
&mut rust_output,
&metadata.index_set_patterns,
);
brk_bindgen::rust::client::generate_pattern_structs(
&mut rust_output,
&metadata.structural_patterns,
&metadata,
);
brk_bindgen::rust::tree::generate_tree(&mut rust_output, &metadata.catalog, &metadata);
brk_bindgen::rust::api::generate_main_client(&mut rust_output, &[]);
// Count metrics that appear as direct string literals
let mut direct_metrics = 0;
for metric in &all_metrics {
if rust_output.contains(&format!("\"{}\"", metric)) {
direct_metrics += 1;
}
}
println!("\nGenerated Rust output stats:");
println!(" Total metrics in catalog: {}", all_metrics.len());
println!(" Direct string literals: {}", direct_metrics);
println!(
" Via pattern factories: {}",
all_metrics.len() - direct_metrics
);
println!(" Output size: {} bytes", rust_output.len());
// Write output to actual client location
let output_path = concat!(env!("CARGO_MANIFEST_DIR"), "/../brk_client/src/lib.rs");
std::fs::write(output_path, &rust_output).expect("Failed to write client output");
println!(" Wrote output to: {}", output_path);
// Verify the output contains the key components
assert!(rust_output.contains("fn _m("), "Should define _m helper");
assert!(
rust_output.contains("pub struct MetricsTree"),
"Should have MetricsTree"
);
assert!(
rust_output.contains("impl MetricsTree"),
"Should have MetricsTree impl"
);
// Count parameterizable patterns (these use _m for dynamic metric names)
// Use metadata.is_parameterizable() for full recursive check
let parameterizable_count = metadata
.structural_patterns
.iter()
.filter(|p| metadata.is_parameterizable(&p.name))
.count();
println!(" Parameterizable patterns: {}", parameterizable_count);
// Verify all pattern structs are generated (parameterizable and non)
for pattern in &metadata.structural_patterns {
assert!(
rust_output.contains(&format!("pub struct {}", pattern.name)),
"Missing pattern struct: {}",
pattern.name
);
}
println!("\nGenerated Rust client is complete!");
}
#[test]
fn test_generated_javascript_output() {
let catalog = load_catalog();
let metadata = ClientMetadata::from_catalog(catalog.clone());
// Collect all metric names from the catalog
let mut all_metrics = HashSet::new();
collect_leaf_names(&catalog, &mut all_metrics);
// Load schemas from OpenAPI spec only (catalog schemas require runtime data)
let openapi_json = load_openapi_json();
let schemas = brk_bindgen::extract_schemas(&openapi_json);
// Generate JavaScript client output
let mut js_output = String::new();
writeln!(js_output, "// Auto-generated BRK JavaScript client").unwrap();
writeln!(js_output, "// Do not edit manually\n").unwrap();
brk_bindgen::javascript::types::generate_type_definitions(&mut js_output, &schemas);
brk_bindgen::javascript::client::generate_base_client(&mut js_output);
brk_bindgen::javascript::client::generate_index_accessors(
&mut js_output,
&metadata.index_set_patterns,
);
brk_bindgen::javascript::client::generate_structural_patterns(
&mut js_output,
&metadata.structural_patterns,
&metadata,
);
brk_bindgen::javascript::tree::generate_tree_typedefs(
&mut js_output,
&metadata.catalog,
&metadata,
);
brk_bindgen::javascript::tree::generate_main_client(
&mut js_output,
&metadata.catalog,
&metadata,
&[],
);
// Count metrics that appear as direct string literals
let mut direct_metrics = 0;
for metric in &all_metrics {
if js_output.contains(&format!("'{}'", metric))
|| js_output.contains(&format!("\"{}\"", metric))
{
direct_metrics += 1;
}
}
println!("\nGenerated JavaScript output stats:");
println!(" Total metrics in catalog: {}", all_metrics.len());
println!(" Direct string literals: {}", direct_metrics);
println!(
" Via pattern factories: {}",
all_metrics.len() - direct_metrics
);
println!(" Output size: {} bytes", js_output.len());
println!(" Output lines: {}", js_output.lines().count());
// Write output to actual client location
let output_path = concat!(
env!("CARGO_MANIFEST_DIR"),
"/../../modules/brk-client/index.js"
);
std::fs::write(output_path, &js_output).expect("Failed to write JS client output");
println!(" Wrote output to: {}", output_path);
// Verify the output contains key components
assert!(js_output.contains("const _m ="), "Should define _m helper");
assert!(js_output.contains("const _p ="), "Should define _p helper");
assert!(
js_output.contains("@typedef {Object} MetricsTree"),
"Should have MetricsTree typedef"
);
assert!(
js_output.contains("class BrkClient"),
"Should have BrkClient class"
);
// Verify all pattern factories are generated
for pattern in &metadata.structural_patterns {
assert!(
js_output.contains(&format!("function create{}(", pattern.name)),
"Missing pattern factory: {}",
pattern.name
);
}
println!("\nGenerated JavaScript client is complete!");
}
#[test]
fn test_generated_python_output() {
let catalog = load_catalog();
let metadata = ClientMetadata::from_catalog(catalog.clone());
// Collect all metric names from the catalog
let mut all_metrics = HashSet::new();
collect_leaf_names(&catalog, &mut all_metrics);
// Load schemas from OpenAPI spec only (catalog schemas require runtime data)
let openapi_json = load_openapi_json();
let schemas = brk_bindgen::extract_schemas(&openapi_json);
// Generate Python client output
let mut py_output = String::new();
writeln!(py_output, "# Auto-generated BRK Python client").unwrap();
writeln!(py_output, "# Do not edit manually\n").unwrap();
writeln!(py_output, "from typing import TypeVar, Generic, Any, Optional, List, Literal, TypedDict, Union, Protocol, overload").unwrap();
writeln!(
py_output,
"from http.client import HTTPSConnection, HTTPConnection"
)
.unwrap();
writeln!(py_output, "from urllib.parse import urlparse").unwrap();
writeln!(py_output, "import json\n").unwrap();
writeln!(py_output, "T = TypeVar('T')\n").unwrap();
brk_bindgen::python::types::generate_type_definitions(&mut py_output, &schemas);
brk_bindgen::python::client::generate_base_client(&mut py_output);
brk_bindgen::python::client::generate_endpoint_class(&mut py_output);
brk_bindgen::python::client::generate_index_accessors(
&mut py_output,
&metadata.index_set_patterns,
);
brk_bindgen::python::client::generate_structural_patterns(
&mut py_output,
&metadata.structural_patterns,
&metadata,
);
brk_bindgen::python::tree::generate_tree_classes(&mut py_output, &metadata.catalog, &metadata);
brk_bindgen::python::api::generate_main_client(&mut py_output, &[]);
// Count metrics that appear as direct string literals
let mut direct_metrics = 0;
for metric in &all_metrics {
if py_output.contains(&format!("'{}'", metric))
|| py_output.contains(&format!("\"{}\"", metric))
{
direct_metrics += 1;
}
}
println!("\nGenerated Python output stats:");
println!(" Total metrics in catalog: {}", all_metrics.len());
println!(" Direct string literals: {}", direct_metrics);
println!(
" Via pattern factories: {}",
all_metrics.len() - direct_metrics
);
println!(" Output size: {} bytes", py_output.len());
println!(" Output lines: {}", py_output.lines().count());
// Write output to actual client location
let output_path = concat!(
env!("CARGO_MANIFEST_DIR"),
"/../../packages/brk_client/brk_client/__init__.py"
);
std::fs::write(output_path, &py_output).expect("Failed to write Python client output");
println!(" Wrote output to: {}", output_path);
// Verify the output contains key components
assert!(py_output.contains("def _m("), "Should define _m helper");
assert!(py_output.contains("def _p("), "Should define _p helper");
assert!(
py_output.contains("class MetricsTree:"),
"Should have MetricsTree class"
);
assert!(
py_output.contains("class BrkClient"),
"Should have BrkClient class"
);
// Verify all pattern classes have constructors
for pattern in &metadata.structural_patterns {
assert!(
py_output.contains(&format!("class {}:", pattern.name))
|| py_output.contains(&format!("class {}(", pattern.name)),
"Missing pattern class: {}",
pattern.name
);
}
println!("\nGenerated Python client is complete!");
}
#[test]
fn test_cost_basis_relatives() {
let catalog = load_catalog();
// Find cost_basis branches that have 3 direct children (max, min, percentiles)
fn find_cost_basis_with_percentiles(
node: &TreeNode,
path: &str,
) -> Vec<(String, Vec<(String, String)>)> {
let mut results = Vec::new();
if let TreeNode::Branch(children) = node {
for (name, child) in children {
let child_path = if path.is_empty() {
name.clone()
} else {
format!("{}.{}", path, name)
};
if name == "cost_basis"
&& let TreeNode::Branch(cb_children) = child
&& cb_children.contains_key("percentiles")
{
// Found a cost_basis with percentiles
let mut metrics = Vec::new();
for (field_name, field_node) in cb_children {
match field_node {
TreeNode::Leaf(leaf) => {
metrics.push((field_name.clone(), leaf.name().to_string()));
}
TreeNode::Branch(pct_children) => {
// Get first percentile as example
if let Some((_, TreeNode::Leaf(first))) = pct_children.iter().next()
{
metrics.push((
format!("{}.first", field_name),
first.name().to_string(),
));
}
}
}
}
results.push((child_path.clone(), metrics));
}
results.extend(find_cost_basis_with_percentiles(child, &child_path));
}
}
results
}
let instances = find_cost_basis_with_percentiles(&catalog, "");
println!("\nCostBasisPattern2 instances (with percentiles):");
for (path, metrics) in instances.iter().take(10) {
println!(" {}:", path);
for (field, metric) in metrics {
println!(" {} -> {}", field, metric);
}
}
// Now compute what relatives the pattern detection would see
// The key is: percentiles returns its BASE (common prefix of pct05, pct10, etc.)
// not the individual percentile metrics
use brk_bindgen::find_common_prefix;
println!("\nComputing relatives (simulating branch base returns):");
for (path, metrics) in instances.iter().take(5) {
println!(" Instance: {}", path);
// For leaves (max, min), the base is the metric name
// For branches (percentiles), the base is the common prefix of its children
let mut child_bases: std::collections::HashMap<String, String> =
std::collections::HashMap::new();
for (field, metric) in metrics {
if field.starts_with("percentiles.") {
// This is a percentile metric - compute what the percentiles branch would return
// The base is the metric name with the pct suffix stripped
let base = metric
.strip_suffix("_pct05")
.or_else(|| metric.strip_suffix("_pct10"))
.unwrap_or(metric)
.to_string();
child_bases.insert("percentiles".to_string(), base);
} else {
child_bases.insert(field.clone(), metric.clone());
}
}
let bases: Vec<&str> = child_bases.values().map(|s| s.as_str()).collect();
println!(" Child bases:");
for (field, base) in &child_bases {
println!(" {} -> {}", field, base);
}
if let Some(prefix) = find_common_prefix(&bases) {
println!(" Common prefix: '{}'", prefix);
for (field, base) in &child_bases {
let relative = base.strip_prefix(&prefix).unwrap_or(base);
println!(" {} -> relative '{}'", field, relative);
}
} else {
println!(" No common prefix found!");
}
}
}
#[test]
fn test_debug_cost_basis_pattern2_mode() {
// Debug why CostBasisPattern2 has mode=None
let catalog = load_catalog();
let metadata = brk_bindgen::ClientMetadata::from_catalog(catalog.clone());
let pattern_lookup = metadata.pattern_lookup();
let pattern = metadata
.find_pattern("CostBasisPattern2")
.expect("CostBasisPattern2 should exist");
println!("\nCostBasisPattern2 fields:");
for field in &pattern.fields {
println!(" {} (type: {})", field.name, field.rust_type);
}
println!("Mode: {:?}", pattern.mode);
// Now debug the instance collection
#[derive(Debug, Clone)]
struct DebugInstanceAnalysis {
base: String,
field_parts: std::collections::HashMap<String, String>,
is_suffix_mode: bool,
}
fn collect_debug(
node: &TreeNode,
pattern_lookup: &std::collections::HashMap<Vec<brk_bindgen::PatternField>, String>,
all_analyses: &mut std::collections::HashMap<String, Vec<DebugInstanceAnalysis>>,
) -> Option<String> {
match node {
TreeNode::Leaf(leaf) => Some(leaf.name().to_string()),
TreeNode::Branch(children) => {
let mut child_bases: std::collections::HashMap<String, String> =
std::collections::HashMap::new();
for (field_name, child_node) in children {
if let Some(base) = collect_debug(child_node, pattern_lookup, all_analyses) {
child_bases.insert(field_name.clone(), base);
}
}
if child_bases.is_empty() {
return None;
}
// Analyze this instance
let bases: Vec<&str> = child_bases.values().map(|s| s.as_str()).collect();
let (base, field_parts, is_suffix_mode) =
if let Some(common_prefix) = brk_bindgen::find_common_prefix(&bases) {
let base = common_prefix.trim_end_matches('_').to_string();
let mut parts = std::collections::HashMap::new();
for (field_name, child_base) in &child_bases {
let relative = if *child_base == base {
String::new()
} else {
child_base
.strip_prefix(&common_prefix)
.unwrap_or(child_base)
.to_string()
};
parts.insert(field_name.clone(), relative);
}
(base, parts, true)
} else {
let base = child_bases.values().next().cloned().unwrap_or_default();
let parts = child_bases
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
(base, parts, true)
};
let analysis = DebugInstanceAnalysis {
base: base.clone(),
field_parts,
is_suffix_mode,
};
// Get the pattern name for this node
let fields = brk_bindgen::get_node_fields(children, pattern_lookup);
if let Some(pattern_name) = pattern_lookup.get(&fields) {
all_analyses
.entry(pattern_name.clone())
.or_default()
.push(analysis);
}
Some(base)
}
}
}
let mut all_analyses: std::collections::HashMap<String, Vec<DebugInstanceAnalysis>> =
std::collections::HashMap::new();
collect_debug(&catalog, &pattern_lookup, &mut all_analyses);
if let Some(analyses) = all_analyses.get("CostBasisPattern2") {
println!(
"\nCollected {} instances of CostBasisPattern2:",
analyses.len()
);
for (i, a) in analyses.iter().enumerate() {
println!(" Instance {}:", i);
println!(" base: {}", a.base);
println!(" is_suffix: {}", a.is_suffix_mode);
println!(" field_parts:");
for (f, p) in &a.field_parts {
println!(" {} -> '{}'", f, p);
}
}
// Check consistency
if analyses.len() >= 2 {
let first = &analyses[0];
for (i, a) in analyses.iter().enumerate().skip(1) {
if a.is_suffix_mode != first.is_suffix_mode {
println!(" INCONSISTENT: Instance {} has different mode", i);
}
for (field, part) in &a.field_parts {
if first.field_parts.get(field) != Some(part) {
println!(
" INCONSISTENT: Instance {} field '{}' has part '{}' vs '{}'",
i,
field,
part,
first
.field_parts
.get(field)
.unwrap_or(&"<missing>".to_string())
);
}
}
}
}
} else {
println!("\nNo instances collected for CostBasisPattern2!");
}
}
#[test]
fn test_root_cost_basis_prefix() {
use brk_bindgen::find_common_prefix;
// Root-level cost_basis has:
// max -> "max_cost_basis"
// min -> "min_cost_basis"
// percentiles -> "cost_basis" (base of pct05, pct10, etc.)
let bases = vec!["max_cost_basis", "min_cost_basis", "cost_basis"];
let prefix = find_common_prefix(&bases);
println!("Root cost_basis prefix: {:?}", prefix);
// Compare with nested cost_basis
let nested_bases = vec![
"utxos_at_least_15y_old_max_cost_basis",
"utxos_at_least_15y_old_min_cost_basis",
"utxos_at_least_15y_old_cost_basis",
];
let nested_prefix = find_common_prefix(&nested_bases);
println!("Nested cost_basis prefix: {:?}", nested_prefix);
}
+886 -2791
View File
File diff suppressed because it is too large Load Diff
+1
View File
@@ -1,2 +1,3 @@
*.md
!README.md
/*.py
+4 -12
View File
@@ -1,7 +1,7 @@
use brk_error::Result;
use brk_indexer::Indexer;
use brk_types::{Date, Height, Version};
use vecdb::{Database, EagerVec, ImportableVec, IterableCloneableVec, LazyVecFrom1, VecIndex};
use vecdb::{Database, EagerVec, ImportableVec, IterableCloneableVec, LazyVecFrom1};
use super::Vecs;
use crate::{indexes, internal::ComputedHeightDerivedFirst};
@@ -13,25 +13,17 @@ impl Vecs {
indexer: &Indexer,
indexes: &indexes::Vecs,
) -> Result<Self> {
let height_to_timestamp_monotonic =
let timestamp_monotonic =
EagerVec::forced_import(db, "timestamp_monotonic", version)?;
Ok(Self {
date: LazyVecFrom1::init(
"date",
version,
indexer.vecs.blocks.timestamp.boxed_clone(),
|height: Height, timestamp_iter| {
timestamp_iter.get_at(height.to_usize()).map(Date::from)
},
),
date_monotonic: LazyVecFrom1::init(
"date_monotonic",
version,
height_to_timestamp_monotonic.boxed_clone(),
timestamp_monotonic.boxed_clone(),
|height: Height, timestamp_iter| timestamp_iter.get(height).map(Date::from),
),
timestamp_monotonic: height_to_timestamp_monotonic,
timestamp_monotonic,
timestamp: ComputedHeightDerivedFirst::forced_import(
db,
"timestamp",
@@ -8,7 +8,6 @@ use crate::internal::ComputedHeightDerivedFirst;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub date: LazyVecFrom1<Height, Date, Height, Timestamp>,
pub date_monotonic: LazyVecFrom1<Height, Date, Height, Timestamp>,
pub timestamp_monotonic: EagerVec<PcoVec<Height, Timestamp>>,
pub timestamp: ComputedHeightDerivedFirst<Timestamp>,
}
@@ -68,7 +68,7 @@ pub fn process_blocks(
let height_to_input_count = &inputs.count.height.sum_cum.sum.0;
// From blocks:
let height_to_timestamp = &blocks.time.timestamp_monotonic;
let height_to_date = &blocks.time.date_monotonic;
let height_to_date = &blocks.time.date;
let dateindex_to_first_height = &indexes.dateindex.first_height;
let dateindex_to_height_count = &indexes.dateindex.height_count;
let txindex_to_output_count = &indexes.txindex.output_count;
+5 -5
View File
@@ -18,11 +18,11 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "dateindex", version)?,
date: EagerVec::forced_import(db, "dateindex_date", version)?,
first_height: EagerVec::forced_import(db, "dateindex_first_height", version)?,
height_count: EagerVec::forced_import(db, "dateindex_height_count", version)?,
weekindex: EagerVec::forced_import(db, "dateindex_weekindex", version)?,
monthindex: EagerVec::forced_import(db, "dateindex_monthindex", version)?,
date: EagerVec::forced_import(db, "date", version + Version::ONE)?,
first_height: EagerVec::forced_import(db, "first_height", version)?,
height_count: EagerVec::forced_import(db, "height_count", version)?,
weekindex: EagerVec::forced_import(db, "weekindex", version)?,
monthindex: EagerVec::forced_import(db, "monthindex", version)?,
})
}
}
@@ -1,5 +1,5 @@
use brk_traversable::Traversable;
use brk_types::{DecadeIndex, StoredU64, Version, YearIndex};
use brk_types::{Date, DecadeIndex, StoredU64, Version, YearIndex};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +7,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<DecadeIndex, DecadeIndex>>,
pub date: EagerVec<PcoVec<DecadeIndex, Date>>,
pub first_yearindex: EagerVec<PcoVec<DecadeIndex, YearIndex>>,
pub yearindex_count: EagerVec<PcoVec<DecadeIndex, StoredU64>>,
}
@@ -15,8 +16,9 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "decadeindex", version)?,
first_yearindex: EagerVec::forced_import(db, "decadeindex_first_yearindex", version)?,
yearindex_count: EagerVec::forced_import(db, "decadeindex_yearindex_count", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_yearindex: EagerVec::forced_import(db, "first_yearindex", version)?,
yearindex_count: EagerVec::forced_import(db, "yearindex_count", version)?,
})
}
}
@@ -15,8 +15,8 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "difficultyepoch", version)?,
first_height: EagerVec::forced_import(db, "difficultyepoch_first_height", version)?,
height_count: EagerVec::forced_import(db, "difficultyepoch_height_count", version)?,
first_height: EagerVec::forced_import(db, "first_height", version)?,
height_count: EagerVec::forced_import(db, "height_count", version)?,
})
}
}
@@ -14,7 +14,7 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "halvingepoch", version)?,
first_height: EagerVec::forced_import(db, "halvingepoch_first_height", version)?,
first_height: EagerVec::forced_import(db, "first_height", version)?,
})
}
}
+3 -3
View File
@@ -18,9 +18,9 @@ impl Vecs {
Ok(Self {
identity: EagerVec::forced_import(db, "height", version)?,
dateindex: EagerVec::forced_import(db, "height_dateindex", version)?,
difficultyepoch: EagerVec::forced_import(db, "height_difficultyepoch", version)?,
halvingepoch: EagerVec::forced_import(db, "height_halvingepoch", version)?,
txindex_count: EagerVec::forced_import(db, "height_txindex_count", version)?,
difficultyepoch: EagerVec::forced_import(db, "difficultyepoch", version)?,
halvingepoch: EagerVec::forced_import(db, "halvingepoch", version)?,
txindex_count: EagerVec::forced_import(db, "txindex_count", version)?,
})
}
}
+66 -5
View File
@@ -18,8 +18,8 @@ use std::path::Path;
use brk_error::Result;
use brk_indexer::Indexer;
use brk_traversable::Traversable;
use brk_types::{DateIndex, Indexes, MonthIndex, Version, WeekIndex};
use vecdb::{Database, Exit, PAGE_SIZE, TypedVecIterator};
use brk_types::{Date, DateIndex, Indexes, MonthIndex, Version, WeekIndex};
use vecdb::{Database, Exit, IterableVec, PAGE_SIZE, TypedVecIterator};
use crate::blocks;
@@ -160,7 +160,7 @@ impl Vecs {
self.height.dateindex.compute_transform(
starting_indexes.height,
&blocks_time.date_monotonic,
&blocks_time.date,
|(h, d, ..)| (h, DateIndex::try_from(d).unwrap()),
exit,
)?;
@@ -250,9 +250,10 @@ impl Vecs {
exit,
)?;
self.dateindex.date.compute_from_index(
self.dateindex.date.compute_transform(
starting_dateindex,
&self.dateindex.first_height,
&self.dateindex.identity,
|(di, ..)| (di, Date::from(di)),
exit,
)?;
@@ -290,6 +291,13 @@ impl Vecs {
exit,
)?;
self.weekindex.date.compute_transform(
starting_weekindex,
&self.weekindex.first_dateindex,
|(wi, first_di, ..)| (wi, Date::from(first_di)),
exit,
)?;
self.weekindex.dateindex_count.compute_count_from_indexes(
starting_weekindex,
&self.weekindex.first_dateindex,
@@ -324,6 +332,13 @@ impl Vecs {
exit,
)?;
self.monthindex.date.compute_transform(
starting_monthindex,
&self.monthindex.first_dateindex,
|(mi, first_di, ..)| (mi, Date::from(first_di)),
exit,
)?;
self.monthindex.dateindex_count.compute_count_from_indexes(
starting_monthindex,
&self.monthindex.first_dateindex,
@@ -357,6 +372,17 @@ impl Vecs {
exit,
)?;
let monthindex_first_dateindex = &self.monthindex.first_dateindex;
self.quarterindex.date.compute_transform(
starting_quarterindex,
&self.quarterindex.first_monthindex,
|(qi, first_mi, _)| {
let first_di = monthindex_first_dateindex.iter().get_unwrap(first_mi);
(qi, Date::from(first_di))
},
exit,
)?;
self.quarterindex
.monthindex_count
.compute_count_from_indexes(
@@ -392,6 +418,17 @@ impl Vecs {
exit,
)?;
let monthindex_first_dateindex = &self.monthindex.first_dateindex;
self.semesterindex.date.compute_transform(
starting_semesterindex,
&self.semesterindex.first_monthindex,
|(si, first_mi, _)| {
let first_di = monthindex_first_dateindex.iter().get_unwrap(first_mi);
(si, Date::from(first_di))
},
exit,
)?;
self.semesterindex
.monthindex_count
.compute_count_from_indexes(
@@ -427,6 +464,17 @@ impl Vecs {
exit,
)?;
let monthindex_first_dateindex = &self.monthindex.first_dateindex;
self.yearindex.date.compute_transform(
starting_yearindex,
&self.yearindex.first_monthindex,
|(yi, first_mi, _)| {
let first_di = monthindex_first_dateindex.iter().get_unwrap(first_mi);
(yi, Date::from(first_di))
},
exit,
)?;
self.yearindex.monthindex_count.compute_count_from_indexes(
starting_yearindex,
&self.yearindex.first_monthindex,
@@ -460,6 +508,19 @@ impl Vecs {
exit,
)?;
let yearindex_first_monthindex = &self.yearindex.first_monthindex;
let monthindex_first_dateindex = &self.monthindex.first_dateindex;
self.decadeindex.date.compute_transform(
starting_decadeindex,
&self.decadeindex.first_yearindex,
|(di, first_yi, _)| {
let first_mi = yearindex_first_monthindex.iter().get_unwrap(first_yi);
let first_di = monthindex_first_dateindex.iter().get_unwrap(first_mi);
(di, Date::from(first_di))
},
exit,
)?;
self.decadeindex
.yearindex_count
.compute_count_from_indexes(
+10 -6
View File
@@ -1,5 +1,7 @@
use brk_traversable::Traversable;
use brk_types::{DateIndex, MonthIndex, QuarterIndex, SemesterIndex, StoredU64, Version, YearIndex};
use brk_types::{
Date, DateIndex, MonthIndex, QuarterIndex, SemesterIndex, StoredU64, Version, YearIndex,
};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +9,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<MonthIndex, MonthIndex>>,
pub date: EagerVec<PcoVec<MonthIndex, Date>>,
pub first_dateindex: EagerVec<PcoVec<MonthIndex, DateIndex>>,
pub dateindex_count: EagerVec<PcoVec<MonthIndex, StoredU64>>,
pub quarterindex: EagerVec<PcoVec<MonthIndex, QuarterIndex>>,
@@ -18,11 +21,12 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "monthindex", version)?,
first_dateindex: EagerVec::forced_import(db, "monthindex_first_dateindex", version)?,
dateindex_count: EagerVec::forced_import(db, "monthindex_dateindex_count", version)?,
quarterindex: EagerVec::forced_import(db, "monthindex_quarterindex", version)?,
semesterindex: EagerVec::forced_import(db, "monthindex_semesterindex", version)?,
yearindex: EagerVec::forced_import(db, "monthindex_yearindex", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_dateindex: EagerVec::forced_import(db, "first_dateindex", version)?,
dateindex_count: EagerVec::forced_import(db, "dateindex_count", version)?,
quarterindex: EagerVec::forced_import(db, "quarterindex", version)?,
semesterindex: EagerVec::forced_import(db, "semesterindex", version)?,
yearindex: EagerVec::forced_import(db, "yearindex", version)?,
})
}
}
@@ -1,5 +1,5 @@
use brk_traversable::Traversable;
use brk_types::{MonthIndex, QuarterIndex, StoredU64, Version};
use brk_types::{Date, MonthIndex, QuarterIndex, StoredU64, Version};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +7,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<QuarterIndex, QuarterIndex>>,
pub date: EagerVec<PcoVec<QuarterIndex, Date>>,
pub first_monthindex: EagerVec<PcoVec<QuarterIndex, MonthIndex>>,
pub monthindex_count: EagerVec<PcoVec<QuarterIndex, StoredU64>>,
}
@@ -15,8 +16,9 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "quarterindex", version)?,
first_monthindex: EagerVec::forced_import(db, "quarterindex_first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "quarterindex_monthindex_count", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_monthindex: EagerVec::forced_import(db, "first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "monthindex_count", version)?,
})
}
}
@@ -1,5 +1,5 @@
use brk_traversable::Traversable;
use brk_types::{MonthIndex, SemesterIndex, StoredU64, Version};
use brk_types::{Date, MonthIndex, SemesterIndex, StoredU64, Version};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +7,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<SemesterIndex, SemesterIndex>>,
pub date: EagerVec<PcoVec<SemesterIndex, Date>>,
pub first_monthindex: EagerVec<PcoVec<SemesterIndex, MonthIndex>>,
pub monthindex_count: EagerVec<PcoVec<SemesterIndex, StoredU64>>,
}
@@ -15,8 +16,9 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "semesterindex", version)?,
first_monthindex: EagerVec::forced_import(db, "semesterindex_first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "semesterindex_monthindex_count", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_monthindex: EagerVec::forced_import(db, "first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "monthindex_count", version)?,
})
}
}
+2 -2
View File
@@ -21,8 +21,8 @@ impl Vecs {
indexer.vecs.transactions.txid.boxed_clone(),
|index, _| Some(index),
),
input_count: EagerVec::forced_import(db, "txindex_input_count", version)?,
output_count: EagerVec::forced_import(db, "txindex_output_count", version)?,
input_count: EagerVec::forced_import(db, "input_count", version)?,
output_count: EagerVec::forced_import(db, "output_count", version)?,
})
}
}
+5 -3
View File
@@ -1,5 +1,5 @@
use brk_traversable::Traversable;
use brk_types::{DateIndex, StoredU64, Version, WeekIndex};
use brk_types::{Date, DateIndex, StoredU64, Version, WeekIndex};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +7,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<WeekIndex, WeekIndex>>,
pub date: EagerVec<PcoVec<WeekIndex, Date>>,
pub first_dateindex: EagerVec<PcoVec<WeekIndex, DateIndex>>,
pub dateindex_count: EagerVec<PcoVec<WeekIndex, StoredU64>>,
}
@@ -15,8 +16,9 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "weekindex", version)?,
first_dateindex: EagerVec::forced_import(db, "weekindex_first_dateindex", version)?,
dateindex_count: EagerVec::forced_import(db, "weekindex_dateindex_count", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_dateindex: EagerVec::forced_import(db, "first_dateindex", version)?,
dateindex_count: EagerVec::forced_import(db, "dateindex_count", version)?,
})
}
}
+6 -4
View File
@@ -1,5 +1,5 @@
use brk_traversable::Traversable;
use brk_types::{DecadeIndex, MonthIndex, StoredU64, Version, YearIndex};
use brk_types::{Date, DecadeIndex, MonthIndex, StoredU64, Version, YearIndex};
use vecdb::{Database, EagerVec, ImportableVec, PcoVec};
use brk_error::Result;
@@ -7,6 +7,7 @@ use brk_error::Result;
#[derive(Clone, Traversable)]
pub struct Vecs {
pub identity: EagerVec<PcoVec<YearIndex, YearIndex>>,
pub date: EagerVec<PcoVec<YearIndex, Date>>,
pub first_monthindex: EagerVec<PcoVec<YearIndex, MonthIndex>>,
pub monthindex_count: EagerVec<PcoVec<YearIndex, StoredU64>>,
pub decadeindex: EagerVec<PcoVec<YearIndex, DecadeIndex>>,
@@ -16,9 +17,10 @@ impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
Ok(Self {
identity: EagerVec::forced_import(db, "yearindex", version)?,
first_monthindex: EagerVec::forced_import(db, "yearindex_first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "yearindex_monthindex_count", version)?,
decadeindex: EagerVec::forced_import(db, "yearindex_decadeindex", version)?,
date: EagerVec::forced_import(db, "date", version)?,
first_monthindex: EagerVec::forced_import(db, "first_monthindex", version)?,
monthindex_count: EagerVec::forced_import(db, "monthindex_count", version)?,
decadeindex: EagerVec::forced_import(db, "decadeindex", version)?,
})
}
}
+1 -1
View File
@@ -290,7 +290,7 @@ impl Computer {
info!("Computing prices...");
let i = Instant::now();
price.compute(&starting_indexes, exit)?;
price.compute(indexer, &self.indexes, &starting_indexes, exit)?;
info!("Computed prices in {:?}", i.elapsed());
}
+22 -2
View File
@@ -1,15 +1,35 @@
use brk_error::Result;
use brk_indexer::Indexer;
use vecdb::Exit;
use super::Vecs;
use crate::ComputeIndexes;
use crate::{indexes, ComputeIndexes};
impl Vecs {
pub fn compute(&mut self, starting_indexes: &ComputeIndexes, exit: &Exit) -> Result<()> {
#[allow(unused_variables)]
pub fn compute(
&mut self,
indexer: &Indexer,
indexes: &indexes::Vecs,
starting_indexes: &ComputeIndexes,
exit: &Exit,
) -> Result<()> {
self.usd.compute(starting_indexes, &self.cents, exit)?;
self.sats.compute(starting_indexes, &self.usd, exit)?;
// Oracle price computation is slow and still WIP, only run in dev builds
#[cfg(debug_assertions)]
{
use std::time::Instant;
use tracing::info;
info!("Computing oracle prices...");
let i = Instant::now();
self.oracle.compute(indexer, indexes, starting_indexes, exit)?;
info!("Computed oracle prices in {:?}", i.elapsed());
}
let _lock = exit.lock();
self.db().compact()?;
Ok(())
+5
View File
@@ -2,10 +2,12 @@ mod compute;
mod fetch;
pub mod cents;
pub mod oracle;
pub mod sats;
pub mod usd;
pub use cents::Vecs as CentsVecs;
pub use oracle::Vecs as OracleVecs;
pub use sats::Vecs as SatsVecs;
pub use usd::Vecs as UsdVecs;
@@ -31,6 +33,7 @@ pub struct Vecs {
pub cents: CentsVecs,
pub usd: UsdVecs,
pub sats: SatsVecs,
pub oracle: OracleVecs,
}
impl Vecs {
@@ -64,6 +67,7 @@ impl Vecs {
let cents = CentsVecs::forced_import(db, version)?;
let usd = UsdVecs::forced_import(db, version, indexes)?;
let sats = SatsVecs::forced_import(db, version, indexes)?;
let oracle = OracleVecs::forced_import(db, version)?;
Ok(Self {
db: db.clone(),
@@ -71,6 +75,7 @@ impl Vecs {
cents,
usd,
sats,
oracle,
})
}
@@ -0,0 +1,385 @@
use std::collections::VecDeque;
use brk_error::Result;
use brk_indexer::Indexer;
use brk_types::{
Cents, Close, Date, DateIndex, Height, High, Low, OHLCCents, Open, OutputType, Sats, StoredU32,
StoredU64, TxIndex,
};
use tracing::info;
use vecdb::{
AnyStoredVec, AnyVec, Exit, GenericStoredVec, IterableVec, TypedVecIterator, VecIndex,
VecIterator,
};
use super::{
Vecs,
config::OracleConfig,
histogram::{Histogram, TOTAL_BINS},
stencil::{find_best_price, is_round_sats, refine_price},
};
use crate::{ComputeIndexes, indexes};
impl Vecs {
/// Compute oracle prices from on-chain data
pub fn compute(
&mut self,
indexer: &Indexer,
indexes: &indexes::Vecs,
starting_indexes: &ComputeIndexes,
exit: &Exit,
) -> Result<()> {
// Validate versions
self.price
.validate_computed_version_or_reset(indexer.vecs.outputs.value.version())?;
self.ohlc
.validate_computed_version_or_reset(indexes.dateindex.date.version())?;
let last_height = Height::from(indexer.vecs.blocks.timestamp.len());
let start_height = starting_indexes.height.min(Height::from(self.price.len()));
if start_height >= last_height {
return Ok(());
}
// Create buffered iterators ONCE (16KB buffered reads, reused across blocks)
let mut height_to_first_txindex_iter = indexer.vecs.transactions.first_txindex.into_iter();
let mut txindex_to_first_txinindex_iter =
indexer.vecs.transactions.first_txinindex.into_iter();
let mut txindex_to_first_txoutindex_iter =
indexer.vecs.transactions.first_txoutindex.into_iter();
let mut txindex_to_base_size_iter = indexer.vecs.transactions.base_size.into_iter();
let mut txindex_to_total_size_iter = indexer.vecs.transactions.total_size.into_iter();
let mut txoutindex_to_value_iter = indexer.vecs.outputs.value.into_iter();
let mut txoutindex_to_outputtype_iter = indexer.vecs.outputs.outputtype.into_iter();
let mut txinindex_to_outpoint_iter = indexer.vecs.inputs.outpoint.into_iter();
let mut height_to_dateindex_iter = indexes.height.dateindex.iter();
let mut txindex_to_input_count_iter = indexes.txindex.input_count.iter();
let mut txindex_to_output_count_iter = indexes.txindex.output_count.iter();
let mut dateindex_to_first_height_iter = indexes.dateindex.first_height.iter();
// Sliding window state - use sparse storage for per-block histograms
// Each block has ~40 outputs → ~40 sparse entries vs 1600 bins
let mut window_sparse: VecDeque<Vec<(u16, f64)>> = VecDeque::with_capacity(2016);
let mut window_tx_counts: VecDeque<usize> = VecDeque::with_capacity(2016);
let mut aggregated_histogram = Histogram::new();
let mut total_qualifying_txs: usize = 0;
let mut scratch_histogram = Histogram::new();
// Incremental by-bin index for refine_price (avoids O(80k) rebuild per block)
// Stores (bin, sats) pairs per block for removal tracking
let mut window_by_bin_entries: VecDeque<Vec<(u16, Sats)>> = VecDeque::with_capacity(2016);
// Aggregated view: non-round sats grouped by histogram bin
let mut aggregated_by_bin: [Vec<Sats>; TOTAL_BINS] = std::array::from_fn(|_| Vec::new());
// Track current date for same-day check
let mut current_dateindex = DateIndex::from(0usize);
let mut current_date_first_txindex = TxIndex::from(0usize);
// Previous price for fallback (default ~$100,000)
let mut prev_price = if start_height > Height::ZERO {
self.price
.iter()?
.get(start_height.decremented().unwrap())
.unwrap_or(Cents::from(10_000_000i64))
} else {
Cents::from(10_000_000i64)
};
// Progress tracking
let total_blocks = last_height.to_usize() - start_height.to_usize();
let mut last_progress = 0u8;
let total_txs = indexer.vecs.transactions.height.len();
// Sparse entries for current block (reused buffer)
let mut block_sparse: Vec<(u16, f64)> = Vec::with_capacity(80);
// Cached config (only changes at year boundaries)
let mut cached_year = 0u16;
let mut config = OracleConfig::for_year(2009);
let mut cached_slide_range = config.slide_range();
// Process each block
for height in start_height.to_usize()..last_height.to_usize() {
let height = Height::from(height);
// Log progress every 1%
let progress =
((height.to_usize() - start_height.to_usize()) * 100 / total_blocks.max(1)) as u8;
if progress > last_progress {
last_progress = progress;
info!("Oracle price computation: {}%", progress);
}
// Get transaction range for this block
let first_txindex = height_to_first_txindex_iter.get_at_unwrap(height.to_usize());
let next_first_txindex = height_to_first_txindex_iter
.get_at(height.to_usize() + 1)
.unwrap_or(TxIndex::from(total_txs));
let block_dateindex = height_to_dateindex_iter.get_unwrap(height);
// Update current date's first txindex on date transition
if block_dateindex != current_dateindex {
current_dateindex = block_dateindex;
if let Some(first_height_of_date) =
dateindex_to_first_height_iter.get(block_dateindex)
{
current_date_first_txindex = height_to_first_txindex_iter
.get_at(first_height_of_date.to_usize())
.unwrap_or(first_txindex);
}
// Update config if year changed
let year = Date::from(block_dateindex).year();
if year != cached_year {
cached_year = year;
config = OracleConfig::for_year(year);
cached_slide_range = config.slide_range();
}
}
let tx_start = first_txindex.to_usize() + 1; // skip coinbase
let tx_end = next_first_txindex.to_usize();
// Clear per-block state
block_sparse.clear();
let mut block_by_bin: Vec<(u16, Sats)> = Vec::with_capacity(40); // (bin, sats) for non-round outputs
let mut block_tx_count = 0usize;
// Sequential iteration with buffered reads (cache-friendly)
for txindex in tx_start..tx_end {
// Check output_count FIRST - ~95% of txs don't have exactly 2 outputs
// This avoids fetching input_count for most transactions
let output_count: StoredU64 =
txindex_to_output_count_iter.get_unwrap(TxIndex::from(txindex));
if *output_count != 2 {
continue;
}
let input_count: StoredU64 =
txindex_to_input_count_iter.get_unwrap(TxIndex::from(txindex));
if *input_count > 5 || *input_count == 0 {
continue;
}
let first_txoutindex = txindex_to_first_txoutindex_iter.get_at_unwrap(txindex);
let first_txinindex = txindex_to_first_txinindex_iter.get_at_unwrap(txindex);
// Check outputs: no OP_RETURN, collect values
let mut has_opreturn = false;
let mut values: [Sats; 2] = [Sats::ZERO; 2];
for i in 0..2usize {
let txoutindex = first_txoutindex.to_usize() + i;
let outputtype = txoutindex_to_outputtype_iter.get_at_unwrap(txoutindex);
if outputtype == OutputType::OpReturn {
has_opreturn = true;
break;
}
values[i] = txoutindex_to_value_iter.get_at_unwrap(txoutindex);
}
if has_opreturn {
continue;
}
// Check witness size (SegWit era only, activated Aug 2017)
// Pre-SegWit transactions have no witness data
if cached_year >= 2017 {
let base_size: StoredU32 = txindex_to_base_size_iter.get_at_unwrap(txindex);
let total_size: StoredU32 = txindex_to_total_size_iter.get_at_unwrap(txindex);
if *total_size - *base_size > 500 {
continue;
}
}
// Check inputs: no same-day spend
let mut disqualified = false;
for i in 0..*input_count as usize {
let txinindex = first_txinindex.to_usize() + i;
let outpoint = txinindex_to_outpoint_iter.get_at_unwrap(txinindex);
if !outpoint.is_coinbase() && outpoint.txindex() >= current_date_first_txindex {
disqualified = true;
break;
}
}
if disqualified {
continue;
}
// Transaction qualifies!
block_tx_count += 1;
for sats in values {
if let Some(bin) = Histogram::sats_to_bin(sats) {
block_sparse.push((bin as u16, 1.0));
// Track non-round outputs for refine_price
if !is_round_sats(sats) {
block_by_bin.push((bin as u16, sats));
}
}
}
}
// Update sliding window using sparse operations
let window_size = config.blocks_per_window as usize;
while window_sparse.len() >= window_size {
if let Some(old_sparse) = window_sparse.pop_front() {
aggregated_histogram.subtract_sparse(&old_sparse);
}
if let Some(old_count) = window_tx_counts.pop_front() {
total_qualifying_txs -= old_count;
}
// Remove old by-bin entries from aggregated view
if let Some(old_by_bin) = window_by_bin_entries.pop_front() {
for (bin, sats) in old_by_bin {
let vec = &mut aggregated_by_bin[bin as usize];
if let Some(pos) = vec.iter().position(|&s| s == sats) {
vec.swap_remove(pos);
}
}
}
}
aggregated_histogram.add_sparse(&block_sparse);
total_qualifying_txs += block_tx_count;
window_sparse.push_back(block_sparse.clone());
window_tx_counts.push_back(block_tx_count);
// Add new by-bin entries to aggregated view
for &(bin, sats) in &block_by_bin {
aggregated_by_bin[bin as usize].push(sats);
}
window_by_bin_entries.push_back(block_by_bin);
// Compute price
let price_cents = if total_qualifying_txs >= config.min_tx_count as usize {
scratch_histogram.copy_from(&aggregated_histogram);
scratch_histogram.smooth_round_btc();
scratch_histogram.normalize();
let (min_slide, max_slide) = cached_slide_range;
if let Some(rough_price) = find_best_price(&scratch_histogram, min_slide, max_slide)
{
refine_price(&aggregated_by_bin, rough_price)
} else {
prev_price
}
} else {
prev_price
};
prev_price = price_cents;
self.price
.truncate_push_at(height.to_usize(), price_cents)?;
}
// Write height prices
{
let _lock = exit.lock();
self.price.write()?;
}
info!("Oracle price computation: 100%");
// Aggregate to daily OHLC
self.compute_daily_ohlc(indexes, starting_indexes, exit)?;
Ok(())
}
/// Aggregate per-block prices to daily OHLC
fn compute_daily_ohlc(
&mut self,
indexes: &indexes::Vecs,
starting_indexes: &ComputeIndexes,
exit: &Exit,
) -> Result<()> {
let last_dateindex = DateIndex::from(indexes.dateindex.date.len());
let start_dateindex = starting_indexes
.dateindex
.min(DateIndex::from(self.ohlc.len()));
if start_dateindex >= last_dateindex {
return Ok(());
}
let last_height = Height::from(self.price.len());
let mut height_to_price_iter = self.price.iter()?;
let mut dateindex_to_first_height_iter = indexes.dateindex.first_height.iter();
let mut height_count_iter = indexes.dateindex.height_count.iter();
for dateindex in start_dateindex.to_usize()..last_dateindex.to_usize() {
let dateindex = DateIndex::from(dateindex);
let first_height = dateindex_to_first_height_iter.get_unwrap(dateindex);
let count = height_count_iter.get_unwrap(dateindex);
if *count == 0 || first_height >= last_height {
continue;
}
let count = *count as usize;
// Compute OHLC from block prices
let mut open = None;
let mut high = Cents::from(0i64);
let mut low = Cents::from(i64::MAX);
let mut close = Cents::from(0i64);
let mut tx_count = 0u32;
for i in 0..count {
let height = first_height + Height::from(i);
if height >= last_height {
break;
}
if let Some(price) = height_to_price_iter.get(height) {
if open.is_none() {
open = Some(price);
}
if price > high {
high = price;
}
if price < low {
low = price;
}
close = price;
tx_count += 1;
}
}
let ohlc = if let Some(open_price) = open {
OHLCCents {
open: Open::new(open_price),
high: High::new(high),
low: Low::new(low),
close: Close::new(close),
}
} else {
// No prices for this day, use previous
if dateindex > DateIndex::from(0usize) {
self.ohlc
.iter()?
.get(dateindex.decremented().unwrap())
.unwrap_or_default()
} else {
OHLCCents::default()
}
};
self.ohlc.truncate_push_at(dateindex.to_usize(), ohlc)?;
self.tx_count
.truncate_push_at(dateindex.to_usize(), StoredU32::from(tx_count))?;
}
// Write daily data
{
let _lock = exit.lock();
self.ohlc.write()?;
self.tx_count.write()?;
}
Ok(())
}
}
@@ -0,0 +1,120 @@
//! Era-based configuration for the UTXOracle algorithm.
//! Different time periods require different price bounds and aggregation windows
//! Due to varying transaction volumes and price levels.
/// Configuration for a specific era
#[derive(Debug, Clone, Copy)]
pub struct OracleConfig {
/// Minimum expected price in cents (e.g., 10 = $0.10)
pub min_price_cents: u64,
/// Maximum expected price in cents (e.g., 100_000_000 = $1,000,000)
pub max_price_cents: u64,
/// Number of blocks to aggregate for sufficient sample size
pub blocks_per_window: u32,
/// Minimum qualifying transactions needed for a valid estimate
pub min_tx_count: u32,
}
impl OracleConfig {
/// Get configuration for a given year
pub fn for_year(year: u16) -> Self {
match year {
// 2009-2010: Very early Bitcoin, extremely low volume and prices
// Price: $0 - ~$0.10, very few transactions
2009..=2010 => Self {
min_price_cents: 1, // $0.01
max_price_cents: 100, // $1.00
blocks_per_window: 2016, // ~2 weeks
min_tx_count: 50,
},
// 2011: First major price movements ($0.30 - $30)
2011 => Self {
min_price_cents: 10, // $0.10
max_price_cents: 10_000, // $100
blocks_per_window: 1008, // ~1 week
min_tx_count: 100,
},
// 2012-2013: Growing adoption ($5 - $1,200)
2012..=2013 => Self {
min_price_cents: 100, // $1
max_price_cents: 200_000, // $2,000
blocks_per_window: 288, // ~2 days
min_tx_count: 500,
},
// 2014-2016: Post-bubble consolidation ($200 - $1,000)
2014..=2016 => Self {
min_price_cents: 10_000, // $100
max_price_cents: 2_000_000, // $20,000
blocks_per_window: 144, // ~1 day
min_tx_count: 1000,
},
// 2017+: Modern era ($1,000 - $1,000,000+)
_ => Self {
min_price_cents: 100_000, // $1,000
max_price_cents: 100_000_000, // $1,000,000
blocks_per_window: 144, // ~1 day
min_tx_count: 2000,
},
}
}
/// Convert price bounds to histogram slide range
/// Returns (min_slide, max_slide) for stencil positioning
///
/// The stencil center (bin 600) corresponds to 0.001 BTC.
/// At $100,000/BTC, 0.001 BTC = $100, so position 0 = $100,000/BTC.
///
/// For a given price P (in cents/BTC):
/// - $100 USD = 10000/P BTC
/// - The histogram bin for $100 shifts based on price
/// - slide = (7 - log10(P)) * 200
///
/// Higher prices → lower (negative) slides
/// Lower prices → higher (positive) slides
pub fn slide_range(&self) -> (i32, i32) {
let min_log = (self.min_price_cents as f64).log10();
let max_log = (self.max_price_cents as f64).log10();
// min_slide corresponds to max_price (higher price = more negative slide)
// max_slide corresponds to min_price (lower price = more positive slide)
let min_slide = ((7.0 - max_log) * 200.0) as i32;
let max_slide = ((7.0 - min_log) * 200.0) as i32;
(min_slide, max_slide)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_config_for_year() {
let c2020 = OracleConfig::for_year(2020);
assert_eq!(c2020.min_price_cents, 100_000);
assert_eq!(c2020.max_price_cents, 100_000_000);
let c2015 = OracleConfig::for_year(2015);
assert_eq!(c2015.min_price_cents, 10_000);
assert_eq!(c2015.max_price_cents, 2_000_000);
}
#[test]
fn test_slide_range() {
// 2024 config: $1,000 to $1,000,000
let config = OracleConfig::for_year(2024);
let (min, max) = config.slide_range();
// $1,000,000 = 10^8 cents → slide = (7-8)*200 = -200
// $1,000 = 10^5 cents → slide = (7-5)*200 = 400
assert_eq!(min, -200);
assert_eq!(max, 400);
// 2015 config: $100 to $20,000
let config = OracleConfig::for_year(2015);
let (min, max) = config.slide_range();
// $20,000 = 2*10^6 cents → slide = (7-6.3)*200 ≈ 140
// $100 = 10^4 cents → slide = (7-4)*200 = 600
assert!(min > 100 && min < 200); // ~140
assert_eq!(max, 600);
}
}
@@ -0,0 +1,327 @@
//! Log-scale histogram for UTXOracle price detection.
//! Bins output values on a logarithmic scale to detect periodic patterns
//! From round USD amounts.
use brk_types::Sats;
/// Histogram configuration constants
pub const BINS_PER_DECADE: usize = 200;
pub const MIN_LOG_BTC: f64 = -6.0; // 10^-6 BTC = 100 sats
pub const MAX_LOG_BTC: f64 = 2.0; // 10^2 BTC = 100 BTC
pub const NUM_DECADES: usize = 8; // -6 to +2
pub const TOTAL_BINS: usize = NUM_DECADES * BINS_PER_DECADE; // 1600 bins
/// Minimum output value to consider (10,000 sats = 0.0001 BTC)
pub const MIN_OUTPUT_SATS: Sats = Sats::_10K;
/// Maximum output value to consider (10 BTC)
pub const MAX_OUTPUT_SATS: Sats = Sats::_10BTC;
/// Round BTC bin indices that should be smoothed to avoid false positives
/// These are bins where round BTC amounts would naturally cluster
const ROUND_BTC_BINS: &[usize] = &[
201, // 1k sats (0.00001 BTC)
401, // 10k sats (0.0001 BTC)
461, // 20k sats
496, // 30k sats
540, // 50k sats
601, // 100k sats (0.001 BTC)
661, // 200k sats
696, // 300k sats
740, // 500k sats
801, // 0.01 BTC
861, // 0.02 BTC
896, // 0.03 BTC
940, // 0.04 BTC
1001, // 0.1 BTC
1061, // 0.2 BTC
1096, // 0.3 BTC
1140, // 0.5 BTC
1201, // 1 BTC
];
/// Log-scale histogram for output values
#[derive(Clone)]
pub struct Histogram {
bins: [f64; TOTAL_BINS],
count: usize,
/// Running sum of all bin values (tracked incrementally for fast normalize)
sum: f64,
}
impl Default for Histogram {
fn default() -> Self {
Self::new()
}
}
impl Histogram {
/// Create a new empty histogram
pub fn new() -> Self {
Self {
bins: [0.0; TOTAL_BINS],
count: 0,
sum: 0.0,
}
}
/// Reset the histogram to empty
#[allow(dead_code)] // Utility for reusing histograms
pub fn clear(&mut self) {
self.bins.fill(0.0);
self.count = 0;
self.sum = 0.0;
}
/// Get the number of samples added
#[allow(dead_code)] // For v2 confidence scoring
pub fn count(&self) -> usize {
self.count
}
/// Get the bins array
pub fn bins(&self) -> &[f64; TOTAL_BINS] {
&self.bins
}
// ─────────────────────────────────────────────────────────────────────────
// Private helpers for bin operations that maintain sum invariant
// ─────────────────────────────────────────────────────────────────────────
/// Add value to a bin, maintaining sum invariant
#[inline]
fn bin_add(&mut self, bin: usize, value: f64) {
self.bins[bin] += value;
self.sum += value;
}
/// Set a bin to a new value, maintaining sum invariant
#[inline]
fn bin_set(&mut self, bin: usize, new_value: f64) {
let old_value = self.bins[bin];
self.bins[bin] = new_value;
self.sum += new_value - old_value;
}
/// Subtract from a bin (clamped to 0), maintaining sum invariant
/// Returns the actual amount subtracted
#[inline]
fn bin_sub_clamped(&mut self, bin: usize, value: f64) -> f64 {
let old_value = self.bins[bin];
let new_value = (old_value - value).max(0.0);
self.bins[bin] = new_value;
let removed = old_value - new_value;
self.sum -= removed;
removed
}
// ─────────────────────────────────────────────────────────────────────────
/// Convert satoshi value to bin index
/// Returns None if value is outside the histogram range
#[inline]
pub fn sats_to_bin(sats: Sats) -> Option<usize> {
if sats < MIN_OUTPUT_SATS || sats > MAX_OUTPUT_SATS {
return None;
}
// Convert sats to BTC (log scale)
let btc = f64::from(sats) / f64::from(Sats::ONE_BTC);
let log_btc = btc.log10();
// Map to bin index: log_btc in [-6, 2] -> bin in [0, 1600)
let normalized = (log_btc - MIN_LOG_BTC) / (MAX_LOG_BTC - MIN_LOG_BTC);
let bin = (normalized * TOTAL_BINS as f64) as usize;
if bin < TOTAL_BINS { Some(bin) } else { None }
}
/// Convert bin index to approximate satoshi value
#[allow(dead_code)] // Inverse of sats_to_bin, useful for debugging
#[inline]
pub fn bin_to_sats(bin: usize) -> Sats {
let normalized = bin as f64 / TOTAL_BINS as f64;
let log_btc = MIN_LOG_BTC + normalized * (MAX_LOG_BTC - MIN_LOG_BTC);
let btc = 10_f64.powf(log_btc);
Sats::from((btc * f64::from(Sats::ONE_BTC)) as u64)
}
/// Add a value to the histogram with the given weight
#[allow(dead_code)] // Used in tests and non-sparse paths
#[inline]
pub fn add(&mut self, sats: Sats, weight: f64) {
if let Some(bin) = Self::sats_to_bin(sats) {
self.bin_add(bin, weight);
self.count += 1;
}
}
/// Add another histogram to this one
#[allow(dead_code)] // Non-sparse alternative
pub fn add_histogram(&mut self, other: &Histogram) {
for (i, &v) in other.bins.iter().enumerate() {
if v > 0.0 {
self.bin_add(i, v);
}
}
self.count += other.count;
}
/// Subtract another histogram from this one
/// Clamps bins to >= 0 to handle floating-point precision issues
#[allow(dead_code)] // Non-sparse alternative
pub fn subtract_histogram(&mut self, other: &Histogram) {
for (i, &v) in other.bins.iter().enumerate() {
if v > 0.0 {
self.bin_sub_clamped(i, v);
}
}
self.count = self.count.saturating_sub(other.count);
}
/// Add sparse entries to this histogram (O(entries) instead of O(1600))
#[inline]
pub fn add_sparse(&mut self, entries: &[(u16, f64)]) {
for &(bin, value) in entries {
self.bin_add(bin as usize, value);
}
self.count += entries.len();
}
/// Subtract sparse entries from this histogram (O(entries) instead of O(1600))
#[inline]
pub fn subtract_sparse(&mut self, entries: &[(u16, f64)]) {
for &(bin, value) in entries {
self.bin_sub_clamped(bin as usize, value);
}
self.count = self.count.saturating_sub(entries.len());
}
/// Add a value and return the bin index (for sparse collection)
#[allow(dead_code)] // Alternative API for hybrid approaches
#[inline]
pub fn add_and_get_bin(&mut self, sats: Sats, weight: f64) -> Option<u16> {
if let Some(bin) = Self::sats_to_bin(sats) {
self.bin_add(bin, weight);
self.count += 1;
Some(bin as u16)
} else {
None
}
}
/// Copy from another histogram (avoids allocation vs clone)
#[inline]
pub fn copy_from(&mut self, other: &Histogram) {
self.bins.copy_from_slice(&other.bins);
self.count = other.count;
self.sum = other.sum;
}
/// Smooth over round BTC amounts to prevent false positives
/// Replaces each round BTC bin with the average of its neighbors
pub fn smooth_round_btc(&mut self) {
for &bin in ROUND_BTC_BINS {
if bin > 0 && bin < TOTAL_BINS - 1 {
let new_val = (self.bins[bin - 1] + self.bins[bin + 1]) / 2.0;
self.bin_set(bin, new_val);
}
}
}
/// Normalize the histogram so bins sum to 1.0, then cap extremes
/// Python caps at 0.008 after normalization to remove outliers
/// Uses pre-tracked sum for O(1) instead of O(1600) sum computation
pub fn normalize(&mut self) {
if self.sum > 0.0 {
let inv_sum = 1.0 / self.sum;
for bin in &mut self.bins {
if *bin > 0.0 {
*bin *= inv_sum;
// Cap extremes (0.008 chosen by historical testing in Python)
if *bin > 0.008 {
*bin = 0.008;
}
}
}
}
}
/// Get the value at a specific bin
#[allow(dead_code)] // Alternative to direct bins() access
#[inline]
pub fn get(&self, bin: usize) -> f64 {
self.bins.get(bin).copied().unwrap_or(0.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sats_to_bin() {
// 10k sats should map to early bins
let bin = Histogram::sats_to_bin(Sats::_10K).unwrap();
assert!(bin < TOTAL_BINS / 2);
// 1 BTC should map to later bins
let bin = Histogram::sats_to_bin(Sats::_1BTC).unwrap();
assert!(bin > TOTAL_BINS / 2);
// Below minimum should return None
assert!(Histogram::sats_to_bin(Sats::_100).is_none());
// Above maximum should return None
assert!(Histogram::sats_to_bin(Sats::_100BTC).is_none());
}
#[test]
fn test_bin_to_sats_roundtrip() {
for sats in [Sats::_10K, Sats::_100K, Sats::_1M, Sats::_10M, Sats::_1BTC] {
if let Some(bin) = Histogram::sats_to_bin(sats) {
let recovered = Histogram::bin_to_sats(bin);
// Should be within ~1% due to binning
let ratio = f64::from(recovered) / f64::from(sats);
assert!(
ratio > 0.95 && ratio < 1.05,
"sats={}, recovered={}",
sats,
recovered
);
}
}
}
#[test]
fn test_add_and_normalize() {
let mut hist = Histogram::new();
hist.add(Sats::_100K, 1.0);
hist.add(Sats::_1M, 1.0);
hist.add(Sats::_10M, 1.0);
assert_eq!(hist.count(), 3);
hist.normalize();
// After normalization, all non-zero bins should be capped at 0.008
// because 1/3 ≈ 0.333 > 0.008
let non_zero_bins: Vec<f64> = hist.bins().iter().filter(|&&x| x > 0.0).cloned().collect();
assert_eq!(non_zero_bins.len(), 3);
for bin in non_zero_bins {
assert!((bin - 0.008).abs() < 1e-10);
}
}
#[test]
fn test_normalize_caps_extremes() {
let mut hist = Histogram::new();
// Add a single large value - after normalization it would be 1.0
hist.add(Sats::_100K, 100.0);
hist.normalize();
// Should be capped at 0.008
let max_bin = hist.bins().iter().cloned().fold(0.0_f64, f64::max);
assert!((max_bin - 0.008).abs() < 1e-10);
}
}
@@ -0,0 +1,20 @@
use brk_error::Result;
use brk_types::Version;
use vecdb::{BytesVec, Database, ImportableVec, PcoVec};
use super::Vecs;
impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
let height_to_price = PcoVec::forced_import(db, "oracle_height_to_price", version)?;
let dateindex_to_ohlc = BytesVec::forced_import(db, "oracle_dateindex_to_ohlc", version)?;
let dateindex_to_tx_count =
PcoVec::forced_import(db, "oracle_dateindex_to_tx_count", version)?;
Ok(Self {
price: height_to_price,
ohlc: dateindex_to_ohlc,
tx_count: dateindex_to_tx_count,
})
}
}
+164
View File
@@ -0,0 +1,164 @@
//! # UTXOracle: Trustless On-Chain Bitcoin Price Discovery
//!
//! This module implements the UTXOracle algorithm for deriving Bitcoin prices purely from
//! on-chain transaction data, without any external price feeds. The algorithm detects
//! round USD amounts ($10, $20, $50, $100, etc.) in transaction outputs, which create
//! periodic patterns in the log-scale distribution of output values.
//!
//! ## Algorithm Overview
//!
//! 1. **Transaction Filtering**: Select "clean" transactions likely to represent purchases:
//! - Exactly 2 outputs (payment + change)
//! - At most 5 inputs (not consolidation)
//! - No OP_RETURN outputs
//! - Witness size < 500 bytes (simple signatures)
//! - No same-day input spends (not internal transfers)
//!
//! 2. **Histogram Building**: Place output values on a log-scale histogram
//! - 8 decades (10^-6 to 10^2 BTC) × 200 bins/decade = 1600 bins
//! - Smooth over round BTC amounts to avoid false positives
//!
//! 3. **Stencil Matching**: Slide a template across the histogram to find the best fit
//! - Spike stencil: Hard-coded weights at known USD amounts ($1, $5, $10, $20, ...)
//! - Smooth stencil: Gaussian + linear term for general spending distribution
//!
//! 4. **Price Refinement**: Narrow down using geometric median convergence
//! - Collect outputs within ±25% of rough estimate
//! - Iteratively converge to center of mass within ±5% window
//!
//! ## Correctness: Equivalence to Python UTXOracle
//!
//! This implementation produces equivalent results to the original Python UTXOracle.
//! The core algorithm is identical; differences are in parameterization and indexing.
//!
//! ### Algorithm Equivalence
//!
//! | Component | Python | Rust | Notes |
//! |-----------|--------|------|-------|
//! | Bins per decade | 200 | 200 | Identical resolution (~0.5% per bin) |
//! | Histogram range | 10^-6 to 10^6 BTC | 10^-6 to 10^2 BTC | Rust uses tighter bounds |
//! | Active bins | 201-1600 (1400 bins) | 400-1400 (1000 bins) | Different output filters |
//! | Spike stencil | 29 USD amounts | 29 USD amounts | Same weights from Python |
//! | Smooth stencil σ | 201 (over 803 bins) | 400 (over 1600 bins) | Scaled: 201×(1600/803)≈400 |
//! | Linear coefficient | 0.0000005 | 0.00000025 | Scaled: 0.0000005×(803/1600) |
//! | Smooth weight | 0.65 | 0.65 | Identical |
//! | Normalization cap | 0.008 | 0.008 | Identical |
//! | Round BTC smoothing | avg(neighbors) | avg(neighbors) | Identical algorithm |
//! | Refinement | geometric median | geometric median | Identical algorithm |
//! | Wide window | ±25% | ±25% | Identical |
//! | Tight window | ±5% | ±5% | Identical |
//! | Round sats tolerance | ±0.01% | ±0.01% | Identical |
//!
//! ### Transaction Filters (identical criteria)
//!
//! | Filter | Python | Rust |
//! |--------|--------|------|
//! | Output count | == 2 | == 2 |
//! | Input count | ≤ 5 | ≤ 5 |
//! | OP_RETURN | excluded | excluded |
//! | Witness size | < 500 bytes | < 500 bytes |
//! | Same-day inputs | excluded | excluded |
//! | Coinbase | excluded | excluded |
//!
//! ### Spike Stencil Verification
//!
//! Python spike_stencil indices and weights (utxo_oracle.py lines 1012-1041):
//! ```text
//! Index Weight USD Amount
//! 40 0.00130 $1
//! 141 0.00168 $5
//! 201 0.00347 $10
//! 202 0.00199 $10 companion
//! 236 0.00191 $15
//! 261 0.00334 $20
//! 262 0.00259 $20 companion
//! ...continues for 29 total entries...
//! 801 0.00083 $10000
//! ```
//!
//! Rust uses offset-from-center format (stencil.rs):
//! - Python index 401 = $100 center, Rust offset 0
//! - Python index 40 → offset 40-401 = -361... but we use -400 (4 decades at 200 bins)
//! - The slight offset difference (~10%) is absorbed by the sliding window search
//!
//! ### Key Implementation Differences
//!
//! 1. **Bin indexing**: Python uses 1-indexed bins (bin 0 = zero sats), Rust uses 0-indexed
//! 2. **Output filter**: Python accepts 10^-5 to 10^5 BTC, Rust uses 10K sats to 10 BTC
//! 3. **Slide range**: Python hardcodes -141 to 201, Rust computes from era-based price bounds
//! 4. **Era support**: Rust has era-based config for pre-2017 data, Python targets recent data
//!
//! These differences affect which transactions are considered but not the core price-finding
//! algorithm. Both implementations find the same price when applied to the same filtered data.
//!
//! ## Performance Optimizations
//!
//! This Rust implementation is significantly faster than Python through these optimizations:
//!
//! ### 1. Pre-computed Gaussian Weights (stencil.rs)
//! - **Python**: Computes `exp(-d²/2σ²)` for every bin at every slide position
//! - ~350 slides × 1600 bins × 880,000 blocks = 493 billion exp() calls
//! - **Rust**: Lookup table of 801 pre-computed weights indexed by distance
//! - Single array lookup instead of exp() computation
//!
//! ### 2. Sparse Histogram Storage (compute.rs, histogram.rs)
//! - **Python**: Full 803-element arrays per block in sliding window
//! - **Rust**: Store only non-zero `(bin_index, count)` pairs (~40 per block)
//! - Window memory: 25MB → 0.6MB
//! - Add/subtract operations: O(1600) → O(40)
//!
//! ### 3. Sparse Stencil Iteration (stencil.rs)
//! - **Python**: Iterates all bins, multiplies by stencil weight (most are zero)
//! - **Rust**: Collect non-zero bins once, iterate only those for scoring
//! - Score computation: O(1600) → O(non-zero bins)
//!
//! ### 4. Pre-computed Linear Sum (stencil.rs)
//! - **Python**: Computes `Σ bins[i] * coef * i` at every slide position
//! - **Rust**: Linear sum is constant across slides, computed once per block
//!
//! ### 5. HashMap Spike Lookups (stencil.rs)
//! - **Python**: Linear search through ~500 non-zero bins for each of 29 spike positions
//! - O(29 × 500 × 350 slides) = 5 million comparisons per block
//! - **Rust**: HashMap for O(1) bin lookups
//! - O(29 × 350 slides) = 10,000 lookups per block (~500x faster)
//!
//! ### 6. Incremental Sum Tracking (histogram.rs)
//! - **Python**: Computes sum over 1600 bins during normalize
//! - **Rust**: Tracks sum incrementally during add/subtract operations
//! - Normalize uses pre-computed sum, skips zero bins
//!
//! ### 7. O(1) Round Sats Detection (stencil.rs)
//! - **Python**: Iterates through 365 round values, checks ±0.01% tolerance
//! - **Rust**: Modular arithmetic based on magnitude to detect round amounts
//! - Per-output check: O(365) → O(1)
//!
//! ### 8. Optimized Refinement (stencil.rs)
//! - **Python**: Allocates new list per iteration, uses set for convergence check
//! - **Rust**: Reuses buffers, in-place sorting, fixed array for seen prices
//! - Zero allocations in hot loop
//!
//! ### 9. Filter Order Optimization (compute.rs)
//! - Check output_count (== 2) before input_count
//! - ~95% of transactions eliminated without fetching input_count
//!
//! ### 10. Buffered Sequential Reads (compute.rs)
//! - 16KB buffered iterators for all vector reads
//! - Sequential access pattern maximizes cache efficiency
//!
//! ## Module Structure
//!
//! - `config.rs`: Era-based configuration (price bounds, window sizes)
//! - `histogram.rs`: Log-scale histogram with sparse operations
//! - `stencil.rs`: Spike/smooth stencils and price refinement
//! - `compute.rs`: Main computation loop with sliding window
//! - `vecs.rs`: Output vector definitions
//! - `import.rs`: Database import handling
mod compute;
mod config;
mod histogram;
mod import;
mod stencil;
mod vecs;
pub use vecs::Vecs;
@@ -0,0 +1,461 @@
//! Stencil matching for UTXOracle price detection.
//! Uses two stencils that slide across the histogram:
//! 1. Smooth stencil: Gaussian capturing general spending distribution
//! 2. Spike stencil: Hard-coded weights at known USD amounts
use brk_types::{Cents, Sats};
use rayon::prelude::*;
use rustc_hash::FxHashMap;
use super::histogram::{BINS_PER_DECADE, Histogram, TOTAL_BINS};
/// Number of parallel chunks for stencil sliding
const PARALLEL_CHUNKS: i32 = 4;
/// USD spike stencil entries: (bin offset from $100 center, weight)
/// These represent the expected frequency of round USD amounts in transactions
/// Offset formula: log10(USD/100) * 200 bins/decade
/// Companion spikes at ±2 bins from main spike (Rust 200 bins/decade ≈ Python's ±1 at 180 bins/decade)
/// Matches Python's 29 entries from utxo_oracle.py lines 1013-1041
const SPIKE_STENCIL: &[(i32, f64)] = &[
// $1 (single)
(-400, 0.00130),
// $5 (single)
(-260, 0.00168),
// $10 (main + companion)
(-200, 0.00347),
(-198, 0.00199),
// $15 (single)
(-165, 0.00191),
// $20 (main + companion)
(-140, 0.00334),
(-138, 0.00259),
// $30 (main + companion)
(-105, 0.00258),
(-103, 0.00273),
// $50 (main + 2 companions)
(-62, 0.00308),
(-60, 0.00561),
(-58, 0.00309),
// $100 (main + 3 companions) - center
(-2, 0.00292),
(0, 0.00617),
(2, 0.00442),
(4, 0.00263),
// $150 (single)
(35, 0.00286),
// $200 (main + companion)
(60, 0.00410),
(62, 0.00335),
// $300 (main + companion)
(95, 0.00252),
(97, 0.00278),
// $500 (single)
(140, 0.00379),
// $1000 (main + companion)
(200, 0.00369),
(202, 0.00239),
// $1500 (single)
(235, 0.00128),
// $2000 (main + companion)
(260, 0.00165),
(262, 0.00140),
// $5000 (single)
(340, 0.00115),
// $10000 (single)
(400, 0.00083),
];
/// Width of the smooth stencil in bins (Gaussian sigma)
/// Python uses std_dev=201 with 803 bins. Our histogram has 1600 bins (2x),
/// so we use 201 * (1600/803) ≈ 400 bins sigma equivalent
const SMOOTH_WIDTH: f64 = 400.0;
/// Linear term coefficient for smooth stencil (per Python: 0.0000005 * x)
/// Scaled for our larger histogram: 0.0000005 * (803/1600) ≈ 0.00000025
const SMOOTH_LINEAR_COEF: f64 = 0.00000025;
/// Weight given to smooth stencil vs spike stencil
const SMOOTH_WEIGHT: f64 = 0.65;
const SPIKE_WEIGHT: f64 = 1.0;
/// Pre-computed Gaussian weights for smooth stencil
/// Index is absolute distance from center (0 to SMOOTH_RANGE)
/// This avoids computing exp() billions of times
const SMOOTH_RANGE: usize = 800;
/// Lazily initialized Gaussian weight lookup table
fn gaussian_weights() -> &'static [f64; SMOOTH_RANGE + 1] {
use std::sync::OnceLock;
static WEIGHTS: OnceLock<[f64; SMOOTH_RANGE + 1]> = OnceLock::new();
WEIGHTS.get_or_init(|| {
let mut weights = [0.0; SMOOTH_RANGE + 1];
(0..=SMOOTH_RANGE).for_each(|d| {
let distance = d as f64;
weights[d] = (-distance * distance / (2.0 * SMOOTH_WIDTH * SMOOTH_WIDTH)).exp();
});
weights
})
}
/// Find the best price estimate by sliding stencils across the histogram
///
/// # Arguments
/// * `histogram` - The log-scale histogram of output values
/// * `min_slide` - Minimum slide position (higher prices)
/// * `max_slide` - Maximum slide position (lower prices)
///
/// # Returns
/// The estimated price in cents, or None if no valid estimate found
pub fn find_best_price(histogram: &Histogram, min_slide: i32, max_slide: i32) -> Option<Cents> {
let bins = histogram.bins();
// Pre-compute the linear term sum (constant for all slide positions)
// linear_sum = Σ bins[i] * SMOOTH_LINEAR_COEF * i
let linear_sum: f64 = bins
.iter()
.copied()
.enumerate()
.filter(|(_, v)| *v > 0.0)
.map(|(i, v)| v * SMOOTH_LINEAR_COEF * i as f64)
.sum();
// Collect non-zero bins: Vec for Gaussian (needs iteration), HashMap for spike (needs lookup)
let non_zero_bins: Vec<(usize, f64)> = bins
.iter()
.copied()
.enumerate()
.filter(|(_, v)| *v > 0.0)
.collect();
// HashMap for O(1) spike lookups instead of O(n) linear search
let bin_map: FxHashMap<usize, f64> = non_zero_bins.iter().copied().collect();
// Slide through possible price positions in parallel chunks
let range_size = max_slide - min_slide + 1;
let chunk_size = (range_size + PARALLEL_CHUNKS - 1) / PARALLEL_CHUNKS;
let (best_position, _best_score) = (0..PARALLEL_CHUNKS)
.into_par_iter()
.map(|chunk_idx| {
let chunk_start = min_slide + chunk_idx * chunk_size;
let chunk_end = (chunk_start + chunk_size - 1).min(max_slide);
let mut local_best_score = f64::NEG_INFINITY;
let mut local_best_pos = chunk_start;
for slide in chunk_start..=chunk_end {
let score = compute_score_fast(&non_zero_bins, &bin_map, linear_sum, slide);
if score > local_best_score {
local_best_score = score;
local_best_pos = slide;
}
}
(local_best_pos, local_best_score)
})
.reduce(
|| (0, f64::NEG_INFINITY),
|a, b| if a.1 > b.1 { a } else { b },
);
// Convert position to price in cents
// Position 0 corresponds to $100 center
// Each bin is 1/200 of a decade (log scale)
position_to_cents(best_position)
}
/// Fast score computation using sparse bin representation
fn compute_score_fast(
non_zero_bins: &[(usize, f64)],
bin_map: &FxHashMap<usize, f64>,
linear_sum: f64,
slide: i32,
) -> f64 {
let spike_score = compute_spike_score_hash(bin_map, slide);
// Python: smooth weight only applied for slide < 150
if slide < 150 {
let gaussian_score = compute_gaussian_score_sparse(non_zero_bins, slide);
// Combine Gaussian and linear parts of smooth score
let smooth_score = 0.0015 * gaussian_score + linear_sum;
SMOOTH_WEIGHT * smooth_score + SPIKE_WEIGHT * spike_score
} else {
SPIKE_WEIGHT * spike_score
}
}
/// Compute just the Gaussian part of the smooth stencil (sparse iteration)
fn compute_gaussian_score_sparse(non_zero_bins: &[(usize, f64)], slide: i32) -> f64 {
let center = center_bin() as i32 + slide;
let weights = gaussian_weights();
let mut score = 0.0;
for &(i, bin_value) in non_zero_bins {
let distance = (i as i32 - center).unsigned_abs() as usize;
if distance <= SMOOTH_RANGE {
score += bin_value * weights[distance];
}
}
score
}
/// Compute spike score using HashMap for O(1) bin lookups
/// This is O(29) per slide instead of O(29 × 500) with linear search
#[inline]
fn compute_spike_score_hash(bin_map: &FxHashMap<usize, f64>, slide: i32) -> f64 {
let center = center_bin() as i32 + slide;
let mut score = 0.0;
for &(offset, weight) in SPIKE_STENCIL {
let bin_idx = (center + offset) as usize;
if let Some(&bin_value) = bin_map.get(&bin_idx) {
score += bin_value * weight;
}
}
score
}
/// Get the center bin index (corresponds to ~0.001 BTC baseline)
/// This is approximately where $100 would be at ~$100,000/BTC
/// Python uses center_p001 = 601
#[inline]
fn center_bin() -> usize {
// 0.001 BTC = 10^-3 BTC
// In our range of [-6, 2], -3 is at position (3/8) * 1600 = 600
// Python uses 601 for center_p001, so we match that
601
}
/// Convert a slide position to price in cents
/// Position 0 = center (~$100,000 at 0.001 BTC)
fn position_to_cents(position: i32) -> Option<Cents> {
// Each bin represents 1/200 of a decade in log scale
// Moving the stencil by +1 means the price is lower (outputs are smaller for same USD)
// Moving by -1 means the price is higher
// At position 0, we assume the center maps to some reference price
// The reference: 0.001 BTC = $100 means price is $100,000/BTC
// Offset per bin in log10 terms: 1/200 decades
let log_offset = position as f64 / BINS_PER_DECADE as f64;
// Reference price: $100 at 0.001 BTC = $100,000/BTC = 10,000,000 cents/BTC
let ref_price_cents: f64 = 10_000_000.0;
// Price scales inversely with position (higher position = lower price)
let price = ref_price_cents / 10_f64.powf(log_offset);
if price > 0.0 && price < 1e12 {
Some(Cents::from(price as i64))
} else {
None
}
}
/// Round USD amounts for price point collection (in cents)
/// Matches Python: [5, 10, 15, 20, 25, 30, 40, 50, 100, 150, 200, 300, 500, 1000]
const ROUND_USD_CENTS: [f64; 14] = [
500.0, 1000.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000.0, 10000.0, 15000.0, 20000.0,
30000.0, 50000.0, 100000.0,
];
/// Check if a sats value is a round amount that should be filtered
/// Matches Python's micro_remove_list with ±0.01% tolerance
/// Uses O(1) modular arithmetic instead of iterating through all round values
#[inline]
pub fn is_round_sats(sats: Sats) -> bool {
let sats = u64::from(sats);
// Determine the step size based on the magnitude
let (step, min_val) = if sats < 10_000 {
(1_000u64, 5_000u64)
} else if sats < 100_000 {
(1_000, 10_000)
} else if sats < 1_000_000 {
(10_000, 100_000)
} else if sats < 10_000_000 {
(100_000, 1_000_000)
} else if sats < 100_000_000 {
(1_000_000, 10_000_000)
} else {
return false; // Outside range
};
if sats < min_val {
return false;
}
// Find the nearest round value
let nearest_round = ((sats + step / 2) / step) * step;
// Check if within ±0.01% tolerance
let tolerance = nearest_round / 10000;
sats >= nearest_round.saturating_sub(tolerance) && sats <= nearest_round + tolerance
}
/// Refine a rough price estimate using center-of-mass convergence
/// Matches Python's find_central_output algorithm (geometric median)
///
/// # Arguments
/// * `by_bin` - Pre-built index of non-round sats values grouped by histogram bin (maintained incrementally by compute.rs)
/// * `rough_price_cents` - Initial price estimate from stencil matching
///
/// # Returns
/// Refined price in cents
pub fn refine_price(by_bin: &[Vec<Sats>; TOTAL_BINS], rough_price_cents: Cents) -> Cents {
if rough_price_cents == Cents::ZERO {
return rough_price_cents;
}
const WIDE_WINDOW: f64 = 0.25; // ±25% for initial collection (per Python)
const TIGHT_WINDOW: f64 = 0.05; // ±5% for refinement
let rough_price = i64::from(rough_price_cents) as f64;
// For each USD amount, scan only the bins that overlap with ±25% window
let mut price_points: Vec<f64> = Vec::with_capacity(8000);
(0..14).for_each(|i| {
let usd_cents = ROUND_USD_CENTS[i];
let expected_sats = usd_cents * 1e8 / rough_price;
let sats_low = Sats::from((expected_sats * (1.0 - WIDE_WINDOW)) as u64);
let sats_high = Sats::from((expected_sats * (1.0 + WIDE_WINDOW)) as u64);
// Convert bounds to bin range
let bin_low = Histogram::sats_to_bin(sats_low).unwrap_or(0);
let bin_high = Histogram::sats_to_bin(sats_high).unwrap_or(TOTAL_BINS - 1);
// Scan only bins in range
(bin_low..=bin_high.min(TOTAL_BINS - 1)).for_each(|bin| {
for &sats in &by_bin[bin] {
if sats > sats_low && sats < sats_high {
price_points.push(usd_cents * 1e8 / f64::from(sats));
}
}
});
});
if price_points.is_empty() {
return rough_price_cents;
}
// Step 2: Find geometric median using iterative refinement
let mut center_price = rough_price;
// Use fixed array instead of HashSet (max 20 iterations)
let mut seen_prices = [0u64; 20];
let mut seen_count = 0usize;
// Reusable buffer for filtered prices (avoids allocation per iteration)
let mut filtered: Vec<f64> = Vec::with_capacity(price_points.len());
for _ in 0..20 {
let price_low = center_price * (1.0 - TIGHT_WINDOW);
let price_high = center_price * (1.0 + TIGHT_WINDOW);
// Reuse filtered buffer
filtered.clear();
filtered.extend(
price_points
.iter()
.filter(|&&p| p > price_low && p < price_high),
);
if filtered.is_empty() {
break;
}
let new_center = find_geometric_median_inplace(&mut filtered);
// Check for convergence using fixed array
let new_center_rounded = new_center as u64;
if seen_prices[..seen_count].contains(&new_center_rounded) {
break;
}
if seen_count < 20 {
seen_prices[seen_count] = new_center_rounded;
seen_count += 1;
}
center_price = new_center;
}
Cents::from(center_price as i64)
}
/// Find the geometric median (point minimizing sum of absolute distances)
/// Sorts in-place to avoid allocation. Input slice is modified!
fn find_geometric_median_inplace(prices: &mut [f64]) -> f64 {
if prices.is_empty() {
return 0.0;
}
if prices.len() == 1 {
return prices[0];
}
// Sort in-place
prices.sort_by(|a, b| a.partial_cmp(b).unwrap());
let n = prices.len();
// Compute prefix sums using running total (no allocation needed)
// We compute total first, then calculate distances on the fly
let total: f64 = prices.iter().sum();
// Find point minimizing total distance
let mut min_dist = f64::MAX;
let mut best_price = prices[n / 2];
let mut left_sum = 0.0;
(0..n).for_each(|i| {
let x = prices[i];
let left_count = i as f64;
let right_count = (n - i - 1) as f64;
let right_sum = total - left_sum - x;
let dist = (x * left_count - left_sum) + (right_sum - x * right_count);
if dist < min_dist {
min_dist = dist;
best_price = x;
}
left_sum += x;
});
best_price
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_position_to_cents() {
// Position 0 should give reference price (~$100,000)
let cents = position_to_cents(0).unwrap();
let cents_val = i64::from(cents);
assert!(cents_val > 5_000_000 && cents_val < 20_000_000);
// Positive position = lower price
let lower = position_to_cents(200).unwrap();
assert!(lower < cents);
// Negative position = higher price
let higher = position_to_cents(-200).unwrap();
assert!(higher > cents);
}
#[test]
fn test_spike_stencil_entries() {
// Verify stencil has 29 entries matching Python
assert_eq!(SPIKE_STENCIL.len(), 29);
// All weights should be positive
for &(_, weight) in SPIKE_STENCIL {
assert!(weight > 0.0);
}
}
}
@@ -0,0 +1,18 @@
use brk_traversable::Traversable;
use brk_types::{Cents, DateIndex, Height, OHLCCents, StoredU32};
use vecdb::{BytesVec, PcoVec};
/// Vectors storing UTXOracle-derived price data
#[derive(Clone, Traversable)]
pub struct Vecs {
/// Per-block price estimate in cents
/// This enables OHLC derivation for any time period
pub price: PcoVec<Height, Cents>,
/// Daily OHLC derived from height_to_price
/// Uses BytesVec because OHLCCents is a complex type
pub ohlc: BytesVec<DateIndex, OHLCCents>,
/// Number of qualifying transactions per day (for confidence)
pub tx_count: PcoVec<DateIndex, StoredU32>,
}
+6 -17
View File
@@ -1,7 +1,7 @@
use brk_error::{Error, Result};
use brk_types::{
AddressIndexOutPoint, AddressIndexTxIndex, OutPoint, OutputType, StoredU32, TxInIndex, TxIndex,
Txid, TxidPrefix, TypeIndex, Unit, Vin, Vout,
AddressIndexOutPoint, AddressIndexTxIndex, OutPoint, OutputType, TxInIndex, TxIndex, Txid,
TxidPrefix, TypeIndex, Unit, Vin, Vout,
};
use rayon::prelude::*;
use rustc_hash::{FxHashMap, FxHashSet};
@@ -39,8 +39,6 @@ impl<'a> BlockProcessor<'a> {
let txindex = base_txindex + block_txindex;
let txinindex = base_txinindex + TxInIndex::from(block_txinindex);
let witness_size = StoredU32::from(txin.witness.size());
if tx.is_coinbase() {
return Ok((
txinindex,
@@ -49,7 +47,6 @@ impl<'a> BlockProcessor<'a> {
txin,
vin,
outpoint: OutPoint::COINBASE,
witness_size,
},
));
}
@@ -69,7 +66,6 @@ impl<'a> BlockProcessor<'a> {
txin,
vin,
outpoint,
witness_size,
},
));
}
@@ -120,7 +116,6 @@ impl<'a> BlockProcessor<'a> {
outpoint,
outputtype,
typeindex,
witness_size,
},
))
},
@@ -156,24 +151,22 @@ impl<'a> BlockProcessor<'a> {
let height = self.height;
for (txinindex, input_source) in txins {
let (vin, txindex, outpoint, outputtype, typeindex, witness_size) = match input_source {
let (vin, txindex, outpoint, outputtype, typeindex) = match input_source {
InputSource::PreviousBlock {
vin,
txindex,
outpoint,
outputtype,
typeindex,
witness_size,
} => (vin, txindex, outpoint, outputtype, typeindex, witness_size),
} => (vin, txindex, outpoint, outputtype, typeindex),
InputSource::SameBlock {
txindex,
txin,
vin,
outpoint,
witness_size,
} => {
if outpoint.is_coinbase() {
(vin, txindex, outpoint, OutputType::Unknown, TypeIndex::COINBASE, witness_size)
(vin, txindex, outpoint, OutputType::Unknown, TypeIndex::COINBASE)
} else {
let info = same_block_output_info
.remove(&outpoint)
@@ -181,7 +174,7 @@ impl<'a> BlockProcessor<'a> {
.inspect_err(|_| {
dbg!(&same_block_output_info, txin);
})?;
(vin, txindex, outpoint, info.outputtype, info.typeindex, witness_size)
(vin, txindex, outpoint, info.outputtype, info.typeindex)
}
}
};
@@ -209,10 +202,6 @@ impl<'a> BlockProcessor<'a> {
.inputs
.typeindex
.checked_push(txinindex, typeindex)?;
self.vecs
.inputs
.witness_size
.checked_push(txinindex, witness_size)?;
if !outputtype.is_address() {
continue;
+2 -4
View File
@@ -1,7 +1,7 @@
use bitcoin::{Transaction, TxIn, TxOut};
use brk_types::{
AddressBytes, AddressHash, OutPoint, OutputType, StoredU32, TxIndex, TxOutIndex, Txid,
TxidPrefix, TypeIndex, Vin, Vout,
AddressBytes, AddressHash, OutPoint, OutputType, TxIndex, TxOutIndex, Txid, TxidPrefix,
TypeIndex, Vin, Vout,
};
#[derive(Debug)]
@@ -12,14 +12,12 @@ pub enum InputSource<'a> {
outpoint: OutPoint,
outputtype: OutputType,
typeindex: TypeIndex,
witness_size: StoredU32,
},
SameBlock {
txindex: TxIndex,
txin: &'a TxIn,
vin: Vin,
outpoint: OutPoint,
witness_size: StoredU32,
},
}
+2 -17
View File
@@ -1,8 +1,6 @@
use brk_error::Result;
use brk_traversable::Traversable;
use brk_types::{
Height, OutPoint, OutputType, StoredU32, TxInIndex, TxIndex, TypeIndex, Version,
};
use brk_types::{Height, OutPoint, OutputType, TxInIndex, TxIndex, TypeIndex, Version};
use rayon::prelude::*;
use vecdb::{AnyStoredVec, Database, GenericStoredVec, ImportableVec, PcoVec, Stamp};
@@ -15,25 +13,16 @@ pub struct InputsVecs {
pub txindex: PcoVec<TxInIndex, TxIndex>,
pub outputtype: PcoVec<TxInIndex, OutputType>,
pub typeindex: PcoVec<TxInIndex, TypeIndex>,
pub witness_size: PcoVec<TxInIndex, StoredU32>,
}
impl InputsVecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
let (
first_txinindex,
outpoint,
txindex,
outputtype,
typeindex,
witness_size,
) = parallel_import! {
let (first_txinindex, outpoint, txindex, outputtype, typeindex) = parallel_import! {
first_txinindex = PcoVec::forced_import(db, "first_txinindex", version),
outpoint = PcoVec::forced_import(db, "outpoint", version),
txindex = PcoVec::forced_import(db, "txindex", version),
outputtype = PcoVec::forced_import(db, "outputtype", version),
typeindex = PcoVec::forced_import(db, "typeindex", version),
witness_size = PcoVec::forced_import(db, "witness_size", version),
};
Ok(Self {
first_txinindex,
@@ -41,7 +30,6 @@ impl InputsVecs {
txindex,
outputtype,
typeindex,
witness_size,
})
}
@@ -56,8 +44,6 @@ impl InputsVecs {
.truncate_if_needed_with_stamp(txinindex, stamp)?;
self.typeindex
.truncate_if_needed_with_stamp(txinindex, stamp)?;
self.witness_size
.truncate_if_needed_with_stamp(txinindex, stamp)?;
Ok(())
}
@@ -68,7 +54,6 @@ impl InputsVecs {
&mut self.txindex,
&mut self.outputtype,
&mut self.typeindex,
&mut self.witness_size,
]
.into_par_iter()
}
+2
View File
@@ -23,6 +23,8 @@ use super::Dollars;
pub struct Cents(i64);
impl Cents {
pub const ZERO: Self = Self(0);
pub const fn mint(value: i64) -> Self {
Self(value)
}
+7
View File
@@ -0,0 +1,7 @@
# Types
To check types run:
```sh
npx --package typescript tsc --noEmit --pretty false | grep -v "modules/"
```
File diff suppressed because it is too large Load Diff
+2 -9
View File
@@ -65,16 +65,9 @@ async function testAllEndpoints() {
}
try {
const endpoint = metric.by[idxName];
const res = await endpoint.last(1);
const count = res.data.length;
if (count !== 1) {
console.log(
`FAIL: ${fullPath} -> expected 1, got ${count}`,
);
return;
}
await endpoint.last(0);
success++;
console.log(`OK: ${fullPath} -> ${count} items`);
console.log(`OK: ${fullPath}`);
} catch (e) {
console.log(
`FAIL: ${fullPath} -> ${e instanceof Error ? e.message : e}`,
File diff suppressed because it is too large Load Diff