global: snapshot

This commit is contained in:
nym21
2025-12-26 22:41:36 +01:00
parent d538280f4b
commit de93f08e93
120 changed files with 1125 additions and 1773 deletions

1
.gitignore vendored
View File

@@ -12,6 +12,7 @@ bridge/
# Ignored
_*
/*.md
# Logs
*.log*

38
Cargo.lock generated
View File

@@ -1377,9 +1377,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.2.50"
version = "1.2.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f50d563227a1c37cc0a263f64eca3334388c01c5e4c4861a9def205c614383c"
checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203"
dependencies = [
"find-msvc-tools",
"jobserver",
@@ -2093,9 +2093,9 @@ dependencies = [
[[package]]
name = "find-msvc-tools"
version = "0.1.5"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
[[package]]
name = "fixedbitset"
@@ -2106,8 +2106,6 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
[[package]]
name = "fjall"
version = "3.0.0-rc.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff63be4348f42ed3c0c50175785ff14d0a833915c6b499a31f91d0e3ec5fc337"
dependencies = [
"byteorder-lite",
"byteview",
@@ -2148,9 +2146,9 @@ checksum = "8ce81f49ae8a0482e4c55ea62ebbd7e5a686af544c00b9d090bba3ff9be97b3d"
[[package]]
name = "flume"
version = "0.11.1"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
checksum = "5e139bc46ca777eb5efaf62df0ab8cc5fd400866427e56c68b22e414e53bd3be"
dependencies = [
"spin",
]
@@ -2985,9 +2983,7 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "lsm-tree"
version = "3.0.0-rc.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58db607ef63b47ede3640944f69e0872bc169e938253bcaa8dcf32631638ceab"
version = "3.0.0-rc.8"
dependencies = [
"byteorder-lite",
"byteview",
@@ -3120,9 +3116,9 @@ dependencies = [
[[package]]
name = "nodejs-built-in-modules"
version = "0.0.1"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef70c89437ecec6d0052d1e39efee1830f37fe7d466d9aa44f08b04297f31d49"
checksum = "a5eb86a92577833b75522336f210c49d9ebd7dd55a44d80a92e68c668a75f27c"
[[package]]
name = "nom"
@@ -3636,9 +3632,9 @@ dependencies = [
[[package]]
name = "oxc_resolver"
version = "11.16.1"
version = "11.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5467a6fd6e1b2a0cc25f4f89a5ece8594213427e430ba8f0a8f900808553cb1e"
checksum = "0fbba32382c25ae7d741aaf3c32475b7a1697540aa8e10b0b5e1cd3bfa1ef257"
dependencies = [
"cfg-if",
"fast-glob",
@@ -4037,9 +4033,9 @@ dependencies = [
[[package]]
name = "pnp"
version = "0.12.6"
version = "0.12.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf3870860a3e16e4944bcfa4c5c722305c54c64d487b72d0bb17606bf14b95c2"
checksum = "a6e38320d5a8e386647f622067588bdb338c9e6e43eb32cf6f8991dd0e8f0046"
dependencies = [
"byteorder",
"concurrent_lru",
@@ -4500,9 +4496,9 @@ dependencies = [
[[package]]
name = "schemars"
version = "1.1.0"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289"
checksum = "54e910108742c57a770f492731f99be216a52fadd361b06c8fb59d74ccc267d2"
dependencies = [
"chrono",
"dyn-clone",
@@ -4515,9 +4511,9 @@ dependencies = [
[[package]]
name = "schemars_derive"
version = "1.1.0"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301858a4023d78debd2353c7426dc486001bddc91ae31a76fb1f55132f7e2633"
checksum = "4908ad288c5035a8eb12cfdf0d49270def0a268ee162b75eeee0f85d155a7c45"
dependencies = [
"proc-macro2",
"quote",

View File

@@ -65,7 +65,8 @@ brk_traversable_derive = { version = "0.1.0-alpha.1", path = "crates/brk_travers
byteview = "0.9.1"
color-eyre = "0.6.5"
derive_deref = "1.1.1"
fjall = "3.0.0-rc.6"
# fjall = "3.0.0-rc.6"
fjall = { path = "../fjall" }
jiff = "0.2.17"
log = "0.4.29"
mimalloc = { version = "0.1.48", features = ["v3"] }
@@ -73,7 +74,7 @@ minreq = { version = "2.14.1", features = ["https", "serde_json"] }
parking_lot = "0.12.5"
rayon = "1.11.0"
rustc-hash = "2.1.1"
schemars = "1.1.0"
schemars = "1.2.0"
serde = "1.0.228"
serde_bytes = "0.11.19"
serde_derive = "1.0.228"

View File

@@ -22,8 +22,10 @@ use brk::types::Height;
| Feature | Crate | Description |
|---------|-------|-------------|
| `bencher` | `brk_bencher` | Benchmarking utilities |
| `binder` | `brk_binder` | Client code generation |
| `bundler` | `brk_bundler` | JS bundling |
| `client` | `brk_client` | Generated Rust API client |
| `computer` | `brk_computer` | Metric computation |
| `error` | `brk_error` | Error types |
| `fetcher` | `brk_fetcher` | Price data fetching |
@@ -40,3 +42,5 @@ use brk::types::Height;
| `store` | `brk_store` | Key-value storage |
| `traversable` | `brk_traversable` | Data traversal |
| `types` | `brk_types` | Domain types |
Use `full` to enable all features.

View File

@@ -1,9 +1,11 @@
use std::collections::HashMap;
use std::fs::{self, File};
use std::io::{self, Write};
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use std::time::SystemTime;
use std::{
collections::HashMap,
fs::{self, File},
io::{self, Write},
os::unix::fs::MetadataExt,
path::{Path, PathBuf},
time::SystemTime,
};
pub struct DiskMonitor {
cache: HashMap<PathBuf, (u64, SystemTime)>, // path -> (bytes_used, mtime)

View File

@@ -1,6 +1,8 @@
use std::fs::File;
use std::io::{self, Write};
use std::path::Path;
use std::{
fs::File,
io::{self, Write},
path::Path,
};
#[cfg(target_os = "linux")]
use std::fs;

View File

@@ -1,6 +1,8 @@
use std::fs::File;
use std::io::{self, Write};
use std::path::Path;
use std::{
fs::File,
io::{self, Write},
path::Path,
};
#[cfg(target_os = "linux")]
use std::fs;

View File

@@ -4,39 +4,43 @@ Code generation for BRK client libraries.
## What It Enables
Generate typed metric catalogs and constants for JavaScript/TypeScript clients. Keeps frontend code in sync with available metrics without manual maintenance.
Generate typed client libraries for Rust, JavaScript/TypeScript, and Python from the OpenAPI specification. Keeps frontend code in sync with available metrics and API endpoints without manual maintenance.
## Key Features
- **Metric catalog**: Generates `metrics.js` with all metric IDs and their supported indexes
- **Compression**: Metric names compressed via word-to-base62 mapping for smaller bundles
- **Mining pools**: Generates `pools.js` with pool ID to name mapping
- **Version sync**: Generates `version.js` matching server version
- **Multi-language**: Generates Rust, JavaScript, and Python clients
- **OpenAPI-driven**: Extracts endpoints and schemas from the OpenAPI spec
- **Metric catalog**: Includes all metric IDs and their supported indexes
- **Type definitions**: Generates types/interfaces from JSON Schema
- **Selective output**: Generate only the languages you need
## Core API
```rust,ignore
generate_js_files(&query, &modules_path)?;
use brk_binder::{generate_clients, ClientOutputPaths};
let paths = ClientOutputPaths::new()
.rust("crates/brk_client/src/lib.rs")
.javascript("modules/brk-client/index.js")
.python("packages/brk_client/__init__.py");
generate_clients(&vecs, &openapi_json, &paths)?;
```
## Generated Files
## Generated Clients
```
modules/brk-client/generated/
├── version.js # export const VERSION = "vX.Y.Z"
├── metrics.js # INDEXES, COMPRESSED_METRIC_TO_INDEXES
└── pools.js # POOL_ID_TO_POOL_NAME
```
| Language | Contents |
|----------|----------|
| Rust | Typed API client using `brk_types`, metric catalog |
| JavaScript | ES module with JSDoc types, metric catalog, fetch helpers |
| Python | Typed client with dataclasses, metric catalog |
## Metric Compression
To minimize bundle size, metric names are compressed:
1. Extract all words from metric names
2. Sort by frequency
3. Map to base52 codes (A-Z, a-z)
4. Store compressed metric → index group mapping
Each client includes:
- All REST API endpoints as typed functions
- Complete metric catalog with index information
- Type definitions for request/response schemas
## Built On
- `brk_query` for metric enumeration
- `brk_types` for mining pool data
- `brk_types` for type schemas

View File

@@ -1,8 +1,4 @@
use std::collections::HashSet;
use std::fmt::Write as FmtWrite;
use std::fs;
use std::io;
use std::path::Path;
use std::{collections::HashSet, fmt::Write as FmtWrite, fs, io, path::Path};
use brk_types::{Index, TreeNode};
use serde_json::Value;
@@ -24,26 +20,14 @@ pub fn generate_javascript_client(
) -> io::Result<()> {
let mut output = String::new();
// Header
writeln!(output, "// Auto-generated BRK JavaScript client").unwrap();
writeln!(output, "// Do not edit manually\n").unwrap();
// Generate type definitions from OpenAPI schemas
generate_type_definitions(&mut output, schemas);
// Generate the base client class
generate_base_client(&mut output);
// Generate index accessor factory functions
generate_index_accessors(&mut output, &metadata.index_set_patterns);
// Generate structural pattern factory functions
generate_structural_patterns(&mut output, &metadata.structural_patterns, metadata);
// Generate tree JSDoc typedefs
generate_tree_typedefs(&mut output, &metadata.catalog, metadata);
// Generate the main client class with tree and API methods
generate_main_client(&mut output, &metadata.catalog, metadata, endpoints);
fs::write(output_path, output)?;
@@ -51,7 +35,6 @@ pub fn generate_javascript_client(
Ok(())
}
/// Generate JSDoc type definitions from OpenAPI schemas
fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) {
if schemas.is_empty() {
return;
@@ -63,10 +46,8 @@ fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) {
let js_type = schema_to_js_type_ctx(schema, Some(name));
if is_primitive_alias(schema) {
// Simple type alias: @typedef {number} Height
writeln!(output, "/** @typedef {{{}}} {} */", js_type, name).unwrap();
} else if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
// Object type with properties
writeln!(output, "/**").unwrap();
writeln!(output, " * @typedef {{Object}} {}", name).unwrap();
for (prop_name, prop_schema) in props {
@@ -86,14 +67,12 @@ fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) {
}
writeln!(output, " */").unwrap();
} else {
// Other schemas - just typedef
writeln!(output, "/** @typedef {{{}}} {} */", js_type, name).unwrap();
}
}
writeln!(output).unwrap();
}
/// Check if schema represents a primitive type alias (like Height = number)
fn is_primitive_alias(schema: &Value) -> bool {
schema.get("properties").is_none()
&& schema.get("items").is_none()
@@ -102,7 +81,6 @@ fn is_primitive_alias(schema: &Value) -> bool {
&& schema.get("enum").is_none()
}
/// Convert a single JSON type string to JavaScript type
fn json_type_to_js(ty: &str, schema: &Value, current_type: Option<&str>) -> String {
match ty {
"integer" | "number" => "number".to_string(),
@@ -117,10 +95,8 @@ fn json_type_to_js(ty: &str, schema: &Value, current_type: Option<&str>) -> Stri
format!("{}[]", item_type)
}
"object" => {
// Check if it has additionalProperties (dict-like)
if let Some(add_props) = schema.get("additionalProperties") {
let value_type = schema_to_js_type_ctx(add_props, current_type);
// Use TypeScript index signature syntax for recursive types
return format!("{{ [key: string]: {} }}", value_type);
}
"Object".to_string()
@@ -129,9 +105,7 @@ fn json_type_to_js(ty: &str, schema: &Value, current_type: Option<&str>) -> Stri
}
}
/// Convert JSON Schema to JavaScript/JSDoc type with context for recursive types
fn schema_to_js_type_ctx(schema: &Value, current_type: Option<&str>) -> String {
// Handle allOf (try each element until we find a resolvable type)
if let Some(all_of) = schema.get("allOf").and_then(|v| v.as_array()) {
for item in all_of {
let resolved = schema_to_js_type_ctx(item, current_type);
@@ -141,12 +115,10 @@ fn schema_to_js_type_ctx(schema: &Value, current_type: Option<&str>) -> String {
}
}
// Handle $ref
if let Some(ref_path) = schema.get("$ref").and_then(|r| r.as_str()) {
return ref_path.rsplit('/').next().unwrap_or("*").to_string();
}
// Handle enum (array of string values)
if let Some(enum_values) = schema.get("enum").and_then(|e| e.as_array()) {
let literals: Vec<String> = enum_values
.iter()
@@ -158,9 +130,7 @@ fn schema_to_js_type_ctx(schema: &Value, current_type: Option<&str>) -> String {
}
}
// Handle type field (can be string or array of strings)
if let Some(ty) = schema.get("type") {
// Handle array of types like ["string", "null"] for Optional
if let Some(type_array) = ty.as_array() {
let types: Vec<String> = type_array
.iter()
@@ -187,13 +157,11 @@ fn schema_to_js_type_ctx(schema: &Value, current_type: Option<&str>) -> String {
}
}
// Handle single type string
if let Some(ty_str) = ty.as_str() {
return json_type_to_js(ty_str, schema, current_type);
}
}
// Handle anyOf/oneOf
if let Some(variants) = schema
.get("anyOf")
.or_else(|| schema.get("oneOf"))
@@ -203,7 +171,6 @@ fn schema_to_js_type_ctx(schema: &Value, current_type: Option<&str>) -> String {
.iter()
.map(|v| schema_to_js_type_ctx(v, current_type))
.collect();
// Filter out * and null for cleaner unions
let filtered: Vec<_> = types.iter().filter(|t| *t != "*").collect();
if !filtered.is_empty() {
return format!(
@@ -218,7 +185,6 @@ fn schema_to_js_type_ctx(schema: &Value, current_type: Option<&str>) -> String {
return format!("({})", types.join("|"));
}
// Check for format hint without type (common in OpenAPI)
if let Some(format) = schema.get("format").and_then(|f| f.as_str()) {
return match format {
"int32" | "int64" => "number".to_string(),
@@ -231,7 +197,6 @@ fn schema_to_js_type_ctx(schema: &Value, current_type: Option<&str>) -> String {
"*".to_string()
}
/// Generate the base BrkClient class with HTTP functionality
fn generate_base_client(output: &mut String) {
writeln!(
output,
@@ -350,7 +315,6 @@ class BrkClientBase {{
.unwrap();
}
/// Generate index accessor factory functions
fn generate_index_accessors(output: &mut String, patterns: &[IndexSetPattern]) {
if patterns.is_empty() {
return;
@@ -359,7 +323,6 @@ fn generate_index_accessors(output: &mut String, patterns: &[IndexSetPattern]) {
writeln!(output, "// Index accessor factory functions\n").unwrap();
for pattern in patterns {
// Generate JSDoc typedef for the accessor
writeln!(output, "/**").unwrap();
writeln!(output, " * @template T").unwrap();
writeln!(output, " * @typedef {{Object}} {}", pattern.name).unwrap();
@@ -406,12 +369,10 @@ fn generate_index_accessors(output: &mut String, patterns: &[IndexSetPattern]) {
}
}
/// Convert an Index to a camelCase field name (e.g., DateIndex -> byDateIndex)
fn index_to_camel_case(index: &Index) -> String {
format!("by{}", to_pascal_case(index.serialize_long()))
}
/// Generate structural pattern factory functions
fn generate_structural_patterns(
output: &mut String,
patterns: &[StructuralPattern],
@@ -424,10 +385,8 @@ fn generate_structural_patterns(
writeln!(output, "// Reusable structural pattern factories\n").unwrap();
for pattern in patterns {
// Check if this pattern is parameterizable (has field positions detected)
let is_parameterizable = pattern.is_parameterizable();
// Generate JSDoc typedef
writeln!(output, "/**").unwrap();
if pattern.is_generic {
writeln!(output, " * @template T").unwrap();
@@ -497,7 +456,6 @@ fn generate_structural_patterns(
}
}
/// Generate a field using parameterized (prepend/append) metric name construction
fn generate_parameterized_field(
output: &mut String,
field: &PatternField,
@@ -507,9 +465,7 @@ fn generate_parameterized_field(
) {
let field_name_js = to_camel_case(&field.name);
// For branch fields, pass the accumulated name to nested pattern
if metadata.is_pattern_type(&field.rust_type) {
// Get the field position to determine how to transform the accumulated name
let child_acc = if let Some(pos) = pattern.get_field_position(&field.name) {
match pos {
FieldNamePosition::Append(suffix) => format!("`${{acc}}{}`", suffix),
@@ -518,7 +474,6 @@ fn generate_parameterized_field(
FieldNamePosition::SetBase(base) => format!("'{}'", base),
}
} else {
// Fallback: append field name
format!("`${{acc}}_{}`", field.name)
};
@@ -531,7 +486,6 @@ fn generate_parameterized_field(
return;
}
// For leaf fields, construct the metric path based on position
let metric_expr = if let Some(pos) = pattern.get_field_position(&field.name) {
match pos {
FieldNamePosition::Append(suffix) => format!("`/${{acc}}{suffix}`"),
@@ -540,7 +494,6 @@ fn generate_parameterized_field(
FieldNamePosition::SetBase(base) => format!("'/{base}'"),
}
} else {
// Fallback: use field name appended
format!("`/${{acc}}_{}`", field.name)
};
@@ -562,7 +515,6 @@ fn generate_parameterized_field(
}
}
/// Generate a field using tree path construction (fallback for non-parameterizable patterns)
fn generate_tree_path_field(
output: &mut String,
field: &PatternField,
@@ -596,7 +548,6 @@ fn generate_tree_path_field(
}
}
/// Convert pattern field to JavaScript/JSDoc type, with optional generic support
fn field_to_js_type_generic(
field: &PatternField,
metadata: &ClientMetadata,
@@ -605,17 +556,12 @@ fn field_to_js_type_generic(
field_to_js_type_with_generic_value(field, metadata, is_generic, None)
}
/// Convert pattern field to JavaScript/JSDoc type.
/// - `is_generic`: If true and field.rust_type is "T", use T in the output
/// - `generic_value_type`: For branch fields that reference a generic pattern, this is the concrete type to substitute
fn field_to_js_type_with_generic_value(
field: &PatternField,
metadata: &ClientMetadata,
is_generic: bool,
generic_value_type: Option<&str>,
) -> String {
// For generic patterns, use T instead of concrete value type
// Also extract inner type from wrappers like Close<Dollars> -> Dollars
let value_type = if is_generic && field.rust_type == "T" {
"T".to_string()
} else {
@@ -623,29 +569,23 @@ fn field_to_js_type_with_generic_value(
};
if metadata.is_pattern_type(&field.rust_type) {
// Check if this pattern is generic
if metadata.is_pattern_generic(&field.rust_type) {
if let Some(vt) = generic_value_type {
return format!("{}<{}>", field.rust_type, vt);
} else if is_generic {
// Propagate T when inside a generic pattern
return format!("{}<T>", field.rust_type);
} else {
// Generic pattern without known type - use unknown
return format!("{}<unknown>", field.rust_type);
}
}
field.rust_type.clone()
} else if let Some(accessor) = metadata.find_index_set_pattern(&field.indexes) {
// Leaf with accessor - use value_type as the generic
format!("{}<{}>", accessor.name, value_type)
} else {
// Leaf - use value_type as the generic
format!("MetricNode<{}>", value_type)
}
}
/// Generate tree typedefs
fn generate_tree_typedefs(output: &mut String, catalog: &TreeNode, metadata: &ClientMetadata) {
writeln!(output, "// Catalog tree typedefs\n").unwrap();
@@ -661,7 +601,6 @@ fn generate_tree_typedefs(output: &mut String, catalog: &TreeNode, metadata: &Cl
);
}
/// Recursively generate tree typedefs
fn generate_tree_typedef(
output: &mut String,
name: &str,
@@ -680,7 +619,6 @@ fn generate_tree_typedef(
.map(|(f, _)| f.clone())
.collect();
// Skip if this matches a pattern (already generated)
if pattern_lookup.contains_key(&fields)
&& pattern_lookup.get(&fields) != Some(&name.to_string())
{
@@ -696,7 +634,6 @@ fn generate_tree_typedef(
writeln!(output, " * @typedef {{Object}} {}", name).unwrap();
for (field, child_fields) in &fields_with_child_info {
// Look up type parameter for generic patterns
let generic_value_type = child_fields
.as_ref()
.and_then(|cf| metadata.get_type_param(cf))
@@ -714,7 +651,6 @@ fn generate_tree_typedef(
writeln!(output, " */\n").unwrap();
// Generate child typedefs
for (child_name, child_node) in children {
if let TreeNode::Branch(grandchildren) = child_node {
let child_fields = get_node_fields(grandchildren, pattern_lookup);
@@ -733,7 +669,6 @@ fn generate_tree_typedef(
}
}
/// Generate main client
fn generate_main_client(
output: &mut String,
catalog: &TreeNode,
@@ -760,7 +695,6 @@ fn generate_main_client(
writeln!(output, " this.tree = this._buildTree('');").unwrap();
writeln!(output, " }}\n").unwrap();
// Generate _buildTree method
writeln!(output, " /**").unwrap();
writeln!(output, " * @private").unwrap();
writeln!(output, " * @param {{string}} basePath").unwrap();
@@ -772,12 +706,10 @@ fn generate_main_client(
writeln!(output, " }};").unwrap();
writeln!(output, " }}\n").unwrap();
// Generate API methods
generate_api_methods(output, endpoints);
writeln!(output, "}}\n").unwrap();
// Export
writeln!(
output,
"export {{ BrkClient, BrkClientBase, BrkError, MetricNode }};"
@@ -785,7 +717,6 @@ fn generate_main_client(
.unwrap();
}
/// Generate tree initializer
fn generate_tree_initializer(
output: &mut String,
node: &TreeNode,
@@ -803,7 +734,6 @@ fn generate_tree_initializer(
match child_node {
TreeNode::Leaf(leaf) => {
// Use leaf.name() (vec.name()) for API path, not tree path
let metric_path = format!("/{}", leaf.name());
if let Some(accessor) = metadata.find_index_set_pattern(leaf.indexes()) {
writeln!(
@@ -824,7 +754,6 @@ fn generate_tree_initializer(
TreeNode::Branch(grandchildren) => {
let child_fields = get_node_fields(grandchildren, pattern_lookup);
if let Some(pattern_name) = pattern_lookup.get(&child_fields) {
// For parameterized patterns, derive accumulated metric name from first leaf
let pattern = metadata
.structural_patterns
.iter()
@@ -833,10 +762,8 @@ fn generate_tree_initializer(
pattern.map(|p| p.is_parameterizable()).unwrap_or(false);
let arg = if is_parameterizable {
// Get the metric base from the first leaf descendant
get_pattern_instance_base(child_node, child_name)
} else {
// Fallback to tree path for non-parameterizable patterns
if accumulated_name.is_empty() {
format!("/{}", child_name)
} else {
@@ -851,7 +778,6 @@ fn generate_tree_initializer(
)
.unwrap();
} else {
// Not a pattern - recurse with accumulated name
let child_acc =
infer_child_accumulated_name(child_node, accumulated_name, child_name);
writeln!(output, "{}{}: {{", indent_str, field_name).unwrap();
@@ -871,18 +797,12 @@ fn generate_tree_initializer(
}
}
/// Infer the accumulated metric name for a child node
fn infer_child_accumulated_name(node: &TreeNode, parent_acc: &str, field_name: &str) -> String {
// Try to infer from first leaf descendant
if let Some(leaf_name) = get_first_leaf_name(node) {
// Look for field_name in the leaf metric name
if let Some(pos) = leaf_name.find(field_name) {
// The field_name appears in the metric - use it as base
if pos == 0 {
// At start - this is the base
return field_name.to_string();
} else if leaf_name.chars().nth(pos - 1) == Some('_') {
// After underscore - likely an append pattern
if parent_acc.is_empty() {
return field_name.to_string();
}
@@ -891,7 +811,6 @@ fn infer_child_accumulated_name(node: &TreeNode, parent_acc: &str, field_name: &
}
}
// Fallback: append field name
if parent_acc.is_empty() {
field_name.to_string()
} else {
@@ -899,7 +818,6 @@ fn infer_child_accumulated_name(node: &TreeNode, parent_acc: &str, field_name: &
}
}
/// Generate API methods
fn generate_api_methods(output: &mut String, endpoints: &[Endpoint]) {
for endpoint in endpoints {
if !endpoint.should_generate() {

View File

@@ -1,36 +1,3 @@
//! Client library generator for BRK.
//!
//! This crate generates typed client libraries in multiple languages (Rust, JavaScript, Python)
//! from the BRK metric catalog and OpenAPI specification.
//!
//! # Usage
//!
//! ```ignore
//! use brk_binder::generate_clients;
//! use brk_query::Vecs;
//! use std::path::Path;
//!
//! let vecs = Vecs::load("path/to/data")?;
//! let openapi_json = std::fs::read_to_string("openapi.json")?;
//! generate_clients(&vecs, &openapi_json, Path::new("output"))?;
//! ```
//!
//! # Architecture
//!
//! The generator works in several phases:
//!
//! 1. **Metadata extraction** - Analyzes the metric catalog tree to detect:
//! - Structural patterns (repeated tree shapes)
//! - Index set patterns (common index combinations)
//! - Generic patterns (structures that differ only in value type)
//!
//! 2. **Schema collection** - Merges OpenAPI schemas with schemars-generated type schemas
//!
//! 3. **Code generation** - Produces language-specific clients:
//! - Rust: Uses `brk_types` directly, generates structs with Arc-based sharing
//! - JavaScript: Generates JSDoc-typed ES modules with factory functions
//! - Python: Generates typed classes with TypedDict and Generic support
use std::{collections::btree_map::Entry, fs::create_dir_all, io, path::PathBuf};
use brk_query::Vecs;

View File

@@ -1,5 +1,4 @@
use std::collections::BTreeMap;
use std::io;
use std::{collections::BTreeMap, io};
use oas3::Spec;
use oas3::spec::{ObjectOrReference, Operation, ParameterIn, PathItem, Schema, SchemaTypeSet};

View File

@@ -1,8 +1,4 @@
use std::collections::HashSet;
use std::fmt::Write as FmtWrite;
use std::fs;
use std::io;
use std::path::Path;
use std::{collections::HashSet, fmt::Write as FmtWrite, fs, io, path::Path};
use brk_types::{Index, TreeNode};
use serde_json::Value;
@@ -24,7 +20,6 @@ pub fn generate_python_client(
) -> io::Result<()> {
let mut output = String::new();
// Header
writeln!(output, "# Auto-generated BRK Python client").unwrap();
writeln!(output, "# Do not edit manually\n").unwrap();
writeln!(output, "from __future__ import annotations").unwrap();
@@ -34,29 +29,14 @@ pub fn generate_python_client(
)
.unwrap();
writeln!(output, "import httpx\n").unwrap();
// Type variable for generic MetricNode
writeln!(output, "T = TypeVar('T')\n").unwrap();
// Generate type definitions from OpenAPI schemas (now includes leaf types from catalog)
generate_type_definitions(&mut output, schemas);
// Generate base client class
generate_base_client(&mut output);
// Generate MetricNode class
generate_metric_node(&mut output);
// Generate index accessor classes
generate_index_accessors(&mut output, &metadata.index_set_patterns);
// Generate structural pattern classes
generate_structural_patterns(&mut output, &metadata.structural_patterns, metadata);
// Generate tree classes
generate_tree_classes(&mut output, &metadata.catalog, metadata);
// Generate main client with tree and API methods
generate_main_client(&mut output, endpoints);
fs::write(output_path, output)?;
@@ -64,7 +44,6 @@ pub fn generate_python_client(
Ok(())
}
/// Generate Python type definitions from OpenAPI schemas
fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) {
if schemas.is_empty() {
return;
@@ -72,7 +51,6 @@ fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) {
writeln!(output, "# Type definitions\n").unwrap();
// Sort types by dependencies (types that reference other types must come after)
let sorted_names = topological_sort_schemas(schemas);
for name in sorted_names {
@@ -80,7 +58,6 @@ fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) {
continue;
};
if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
// Object type -> TypedDict
writeln!(output, "class {}(TypedDict):", name).unwrap();
for (prop_name, prop_schema) in props {
let prop_type = schema_to_python_type_ctx(prop_schema, Some(&name));
@@ -89,11 +66,9 @@ fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) {
}
writeln!(output).unwrap();
} else if is_enum_schema(schema) {
// Enum type -> Literal union
let py_type = schema_to_python_type_ctx(schema, Some(&name));
writeln!(output, "{} = {}", name, py_type).unwrap();
} else {
// Primitive type alias
let py_type = schema_to_python_type_ctx(schema, Some(&name));
writeln!(output, "{} = {}", name, py_type).unwrap();
}
@@ -101,7 +76,7 @@ fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) {
writeln!(output).unwrap();
}
/// Topologically sort schema names so dependencies come before dependents.
/// Topologically sort schema names so dependencies come before dependents (avoids forward references).
/// Types that reference other types (via $ref) must be defined after their dependencies.
fn topological_sort_schemas(schemas: &TypeSchemas) -> Vec<String> {
use std::collections::{HashMap, HashSet};
@@ -205,7 +180,6 @@ fn json_type_to_python(ty: &str, schema: &Value, current_type: Option<&str>) ->
format!("List[{}]", item_type)
}
"object" => {
// Check if it has additionalProperties (dict-like)
if let Some(add_props) = schema.get("additionalProperties") {
let value_type = schema_to_python_type_ctx(add_props, current_type);
return format!("dict[str, {}]", value_type);
@@ -218,7 +192,6 @@ fn json_type_to_python(ty: &str, schema: &Value, current_type: Option<&str>) ->
/// Convert JSON Schema to Python type with context for detecting self-references
fn schema_to_python_type_ctx(schema: &Value, current_type: Option<&str>) -> String {
// Handle allOf (try each element until we find a resolvable type)
if let Some(all_of) = schema.get("allOf").and_then(|v| v.as_array()) {
for item in all_of {
let resolved = schema_to_python_type_ctx(item, current_type);
@@ -250,9 +223,7 @@ fn schema_to_python_type_ctx(schema: &Value, current_type: Option<&str>) -> Stri
}
}
// Handle type field (can be string or array of strings)
if let Some(ty) = schema.get("type") {
// Handle array of types like ["string", "null"] for Optional
if let Some(type_array) = ty.as_array() {
let types: Vec<String> = type_array
.iter()
@@ -279,13 +250,11 @@ fn schema_to_python_type_ctx(schema: &Value, current_type: Option<&str>) -> Stri
}
}
// Handle single type string
if let Some(ty_str) = ty.as_str() {
return json_type_to_python(ty_str, schema, current_type);
}
}
// Handle anyOf/oneOf
if let Some(variants) = schema
.get("anyOf")
.or_else(|| schema.get("oneOf"))
@@ -295,7 +264,6 @@ fn schema_to_python_type_ctx(schema: &Value, current_type: Option<&str>) -> Stri
.iter()
.map(|v| schema_to_python_type_ctx(v, current_type))
.collect();
// Filter out Any and null for cleaner unions
let filtered: Vec<_> = types.iter().filter(|t| *t != "Any").collect();
if !filtered.is_empty() {
return filtered

View File

@@ -1,8 +1,4 @@
use std::collections::HashSet;
use std::fmt::Write as FmtWrite;
use std::fs;
use std::io;
use std::path::Path;
use std::{collections::HashSet, fmt::Write as FmtWrite, fs, io, path::Path};
use brk_types::{Index, TreeNode};
@@ -22,31 +18,17 @@ pub fn generate_rust_client(
) -> io::Result<()> {
let mut output = String::new();
// Header
writeln!(output, "// Auto-generated BRK Rust client").unwrap();
writeln!(output, "// Do not edit manually\n").unwrap();
writeln!(output, "#![allow(non_camel_case_types)]").unwrap();
writeln!(output, "#![allow(dead_code)]\n").unwrap();
// Imports
generate_imports(&mut output);
// Generate base client
generate_base_client(&mut output);
// Generate MetricNode
generate_metric_node(&mut output);
// Generate index accessor structs (for each unique set of indexes)
generate_index_accessors(&mut output, &metadata.index_set_patterns);
// Generate pattern structs (reusable, appearing 2+ times)
generate_pattern_structs(&mut output, &metadata.structural_patterns, metadata);
// Generate tree - each node uses its pattern or is generated inline
generate_tree(&mut output, &metadata.catalog, metadata);
// Generate main client with API methods
generate_main_client(&mut output, endpoints);
fs::write(output_path, output)?;
@@ -187,7 +169,6 @@ impl<T: DeserializeOwned> MetricNode<T> {{
.unwrap();
}
/// Generate index accessor structs for each unique set of indexes
fn generate_index_accessors(output: &mut String, patterns: &[IndexSetPattern]) {
if patterns.is_empty() {
return;
@@ -237,12 +218,10 @@ fn generate_index_accessors(output: &mut String, patterns: &[IndexSetPattern]) {
}
}
/// Convert an Index to a snake_case field name (e.g., DateIndex -> by_date_index)
fn index_to_field_name(index: &Index) -> String {
format!("by_{}", to_snake_case(index.serialize_long()))
}
/// Generate pattern structs (those appearing 2+ times)
fn generate_pattern_structs(
output: &mut String,
patterns: &[StructuralPattern],
@@ -317,7 +296,6 @@ fn generate_pattern_structs(
}
}
/// Generate a field using parameterized (prepend/append) metric name construction
fn generate_parameterized_rust_field(
output: &mut String,
field: &PatternField,
@@ -326,7 +304,6 @@ fn generate_parameterized_rust_field(
) {
let field_name = to_snake_case(&field.name);
// For branch fields, pass the accumulated name to nested pattern
if metadata.is_pattern_type(&field.rust_type) {
let child_acc = if let Some(pos) = pattern.get_field_position(&field.name) {
match pos {
@@ -348,7 +325,6 @@ fn generate_parameterized_rust_field(
return;
}
// For leaf fields, construct the metric path based on position
let metric_expr = if let Some(pos) = pattern.get_field_position(&field.name) {
match pos {
FieldNamePosition::Append(suffix) => format!("format!(\"/{{acc}}{}\")", suffix),
@@ -378,7 +354,6 @@ fn generate_parameterized_rust_field(
}
}
/// Generate a field using tree path construction (fallback for non-parameterizable patterns)
fn generate_tree_path_rust_field(
output: &mut String,
field: &PatternField,
@@ -411,7 +386,6 @@ fn generate_tree_path_rust_field(
}
}
/// Convert a PatternField to the full type annotation, with optional generic support
fn field_to_type_annotation_generic(
field: &PatternField,
metadata: &ClientMetadata,
@@ -420,17 +394,12 @@ fn field_to_type_annotation_generic(
field_to_type_annotation_with_generic(field, metadata, is_generic, None)
}
/// Convert a PatternField to the full type annotation.
/// - `is_generic`: If true and field.rust_type is "T", use T in the output
/// - `generic_value_type`: For branch fields that reference a generic pattern, this is the concrete type to substitute
fn field_to_type_annotation_with_generic(
field: &PatternField,
metadata: &ClientMetadata,
is_generic: bool,
generic_value_type: Option<&str>,
) -> String {
// For generic patterns, use T instead of concrete value type
// Also extract inner type from wrappers like Close<Dollars> -> Dollars
let value_type = if is_generic && field.rust_type == "T" {
"T".to_string()
} else {
@@ -438,24 +407,19 @@ fn field_to_type_annotation_with_generic(
};
if metadata.is_pattern_type(&field.rust_type) {
// Check if this pattern is generic and we have a value type
if metadata.is_pattern_generic(&field.rust_type)
&& let Some(vt) = generic_value_type
{
return format!("{}<{}>", field.rust_type, vt);
}
// Non-generic pattern has no type params
field.rust_type.clone()
} else if let Some(accessor) = metadata.find_index_set_pattern(&field.indexes) {
// Leaf with a reusable accessor pattern
format!("{}<{}>", accessor.name, value_type)
} else {
// Leaf with unique index set - use MetricNode directly
format!("MetricNode<{}>", value_type)
}
}
/// Generate the catalog tree structure
fn generate_tree(output: &mut String, catalog: &TreeNode, metadata: &ClientMetadata) {
writeln!(output, "// Catalog tree\n").unwrap();
@@ -471,7 +435,6 @@ fn generate_tree(output: &mut String, catalog: &TreeNode, metadata: &ClientMetad
);
}
/// Recursively generate tree nodes
fn generate_tree_node(
output: &mut String,
name: &str,
@@ -490,7 +453,6 @@ fn generate_tree_node(
.map(|(f, _)| f.clone())
.collect();
// Skip if this matches a pattern (already generated separately)
if let Some(pattern_name) = pattern_lookup.get(&fields)
&& pattern_name != name
{
@@ -519,7 +481,6 @@ fn generate_tree_node(
writeln!(output, "}}\n").unwrap();
// Generate impl block
writeln!(output, "impl {} {{", name).unwrap();
writeln!(
output,
@@ -600,7 +561,6 @@ fn generate_tree_node(
writeln!(output, " }}").unwrap();
writeln!(output, "}}\n").unwrap();
// Recursively generate child nodes that aren't patterns
for (child_name, child_node) in children {
if let TreeNode::Branch(grandchildren) = child_node {
let child_fields = get_node_fields(grandchildren, pattern_lookup);
@@ -619,7 +579,6 @@ fn generate_tree_node(
}
}
/// Generate the main client struct
fn generate_main_client(output: &mut String, endpoints: &[Endpoint]) {
writeln!(
output,
@@ -652,13 +611,11 @@ impl BrkClient {{
)
.unwrap();
// Generate API methods
generate_api_methods(output, endpoints);
writeln!(output, "}}").unwrap();
}
/// Generate API methods from OpenAPI endpoints
fn generate_api_methods(output: &mut String, endpoints: &[Endpoint]) {
for endpoint in endpoints {
if !endpoint.should_generate() {
@@ -672,7 +629,6 @@ fn generate_api_methods(output: &mut String, endpoints: &[Endpoint]) {
.map(js_type_to_rust)
.unwrap_or_else(|| "serde_json::Value".to_string());
// Build doc comment
writeln!(
output,
" /// {}",
@@ -686,7 +642,6 @@ fn generate_api_methods(output: &mut String, endpoints: &[Endpoint]) {
writeln!(output, " /// {}", desc).unwrap();
}
// Build method signature
let params = build_method_params(endpoint);
writeln!(
output,
@@ -695,7 +650,6 @@ fn generate_api_methods(output: &mut String, endpoints: &[Endpoint]) {
)
.unwrap();
// Build path
let path = build_path_template(&endpoint.path, &endpoint.path_params);
if endpoint.query_params.is_empty() {
@@ -761,7 +715,6 @@ fn build_path_template(path: &str, path_params: &[super::Parameter]) -> String {
result
}
/// Convert JS-style type to Rust type (e.g., "Txid[]" -> "Vec<Txid>")
fn js_type_to_rust(js_type: &str) -> String {
if let Some(inner) = js_type.strip_suffix("[]") {
format!("Vec<{}>", js_type_to_rust(inner))

View File

@@ -1,5 +1,3 @@
//! Case conversion utilities for identifiers.
/// Convert a string to PascalCase (e.g., "fee_rate" -> "FeeRate").
pub fn to_pascal_case(s: &str) -> String {
s.replace('-', "_")

View File

@@ -1,5 +1,3 @@
//! Types and utilities for client generation.
mod case;
mod patterns;
mod schema;

View File

@@ -1,5 +1,3 @@
//! Pattern detection for structural patterns in the metric tree.
use std::collections::{BTreeSet, HashMap};
use brk_types::TreeNode;

View File

@@ -1,5 +1,3 @@
//! JSON Schema utilities.
use serde_json::Value;
/// Unwrap allOf with a single element, returning the inner schema.

View File

@@ -1,5 +1,3 @@
//! Tree traversal utilities.
use std::collections::{BTreeMap, BTreeSet, HashMap};
use brk_types::{Index, TreeNode};

View File

@@ -15,6 +15,12 @@ Run a full BRK instance: index the blockchain, compute metrics, serve the API, a
- **Collision checking**: Optional TXID collision validation mode
- **Memory optimized**: Uses mimalloc allocator, 512MB stack for deep recursion
## Install
```bash
cargo install --locked brk_cli
```
## Usage
```bash

View File

@@ -5,7 +5,7 @@ use brk_error::Result;
use brk_fetcher::Fetcher;
use brk_indexer::Indexer;
use mimalloc::MiMalloc;
use vecdb::{AnyStoredVec, Exit};
use vecdb::Exit;
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;
@@ -34,8 +34,8 @@ fn run() -> Result<()> {
let computer = Computer::forced_import(&outputs_dir, &indexer, Some(fetcher))?;
let _a = dbg!(computer.chain.txinindex_to_value.region().meta());
let _b = dbg!(indexer.vecs.txout.txoutindex_to_txoutdata.region().meta());
// let _a = dbg!(computer.chain.txinindex_to_value.region().meta());
// let _b = dbg!(indexer.vecs.txout.txoutindex_to_txoutdata.region().meta());
Ok(())
}

View File

@@ -6,7 +6,7 @@ use brk_types::{
};
use vecdb::{Exit, IterableVec, TypedVecIterator, VecIndex, unlikely};
use crate::{grouped::ComputedVecsFromHeight, indexes, price, utils::OptionExt, Indexes};
use crate::{grouped::ComputedVecsFromHeight, indexes, price, txins, utils::OptionExt, Indexes};
use super::{Vecs, ONE_TERA_HASH, TARGET_BLOCKS_PER_DAY_F32, TARGET_BLOCKS_PER_DAY_F64};
@@ -15,11 +15,12 @@ impl Vecs {
&mut self,
indexer: &Indexer,
indexes: &indexes::Vecs,
txins: &txins::Vecs,
starting_indexes: &Indexes,
price: Option<&price::Vecs>,
exit: &Exit,
) -> Result<()> {
self.compute_(indexer, indexes, starting_indexes, price, exit)?;
self.compute_(indexer, indexes, txins, starting_indexes, price, exit)?;
self.db.compact()?;
Ok(())
}
@@ -28,6 +29,7 @@ impl Vecs {
&mut self,
indexer: &Indexer,
indexes: &indexes::Vecs,
txins: &txins::Vecs,
starting_indexes: &Indexes,
price: Option<&price::Vecs>,
exit: &Exit,
@@ -271,15 +273,11 @@ impl Vecs {
compute_indexes_to_tx_vany(&mut self.indexes_to_tx_v2, TxVersion::TWO)?;
compute_indexes_to_tx_vany(&mut self.indexes_to_tx_v3, TxVersion::THREE)?;
// ---
// TxInIndex
// ---
self.txindex_to_input_value.compute_sum_from_indexes(
starting_indexes.txindex,
&indexer.vecs.tx.txindex_to_first_txinindex,
&indexes.txindex_to_input_count,
&indexer.vecs.txin.txinindex_to_value,
&txins.txinindex_to_value,
exit,
)?;
@@ -365,8 +363,8 @@ impl Vecs {
let mut txindex_to_first_txoutindex_iter =
indexer.vecs.tx.txindex_to_first_txoutindex.iter()?;
let mut txindex_to_output_count_iter = indexes.txindex_to_output_count.iter();
let mut txoutindex_to_txoutdata_iter =
indexer.vecs.txout.txoutindex_to_txoutdata.iter()?;
let mut txoutindex_to_value_iter =
indexer.vecs.txout.txoutindex_to_value.iter()?;
vec.compute_transform(
starting_indexes.height,
&indexer.vecs.tx.height_to_first_txindex,
@@ -378,9 +376,8 @@ impl Vecs {
let mut sats = Sats::ZERO;
(first_txoutindex..first_txoutindex + usize::from(output_count)).for_each(
|txoutindex| {
sats += txoutindex_to_txoutdata_iter
.get_unwrap(TxOutIndex::from(txoutindex))
.value;
sats += txoutindex_to_value_iter
.get_unwrap(TxOutIndex::from(txoutindex));
},
);
(height, sats)

View File

@@ -18,9 +18,9 @@ use crate::{
};
use super::{
Vecs, TARGET_BLOCKS_PER_DAY, TARGET_BLOCKS_PER_DECADE, TARGET_BLOCKS_PER_MONTH,
TARGET_BLOCKS_PER_DAY, TARGET_BLOCKS_PER_DECADE, TARGET_BLOCKS_PER_MONTH,
TARGET_BLOCKS_PER_QUARTER, TARGET_BLOCKS_PER_SEMESTER, TARGET_BLOCKS_PER_WEEK,
TARGET_BLOCKS_PER_YEAR,
TARGET_BLOCKS_PER_YEAR, Vecs,
};
impl Vecs {
@@ -42,7 +42,6 @@ impl Vecs {
let v4 = Version::new(4);
let v5 = Version::new(5);
// Helper macros for common patterns
macro_rules! eager {
($name:expr) => {
EagerVec::forced_import(&db, $name, version + v0)?
@@ -125,8 +124,6 @@ impl Vecs {
.add_cumulative()
};
let txinindex_to_value = eager!("value");
let txindex_to_weight = LazyVecFrom2::init(
"weight",
version + Version::ZERO,
@@ -451,7 +448,6 @@ impl Vecs {
indexes_to_inputs_per_sec: computed_di!("inputs_per_sec", v2, last()),
txindex_to_is_coinbase,
txinindex_to_value,
txindex_to_input_value,
txindex_to_output_value,
txindex_to_fee,

View File

@@ -5,7 +5,7 @@ use brk_traversable::Traversable;
use brk_types::{
Bitcoin, DateIndex, DecadeIndex, DifficultyEpoch, Dollars, FeeRate, HalvingEpoch, Height,
MonthIndex, QuarterIndex, Sats, SemesterIndex, StoredBool, StoredF32, StoredF64, StoredU32,
StoredU64, Timestamp, TxInIndex, TxIndex, VSize, WeekIndex, Weight, YearIndex,
StoredU64, Timestamp, TxIndex, VSize, WeekIndex, Weight, YearIndex,
};
use vecdb::{Database, EagerVec, LazyVecFrom1, LazyVecFrom2, PcoVec};
@@ -86,7 +86,6 @@ pub struct Vecs {
pub indexes_to_tx_vsize: ComputedVecsFromTxindex<VSize>,
pub indexes_to_tx_weight: ComputedVecsFromTxindex<Weight>,
pub indexes_to_unknownoutput_count: ComputedVecsFromHeight<StoredU64>,
pub txinindex_to_value: EagerVec<PcoVec<TxInIndex, Sats>>,
pub indexes_to_input_count: ComputedVecsFromTxindex<StoredU64>,
pub txindex_to_is_coinbase: LazyVecFrom2<TxIndex, StoredBool, TxIndex, Height, Height, TxIndex>,
pub indexes_to_output_count: ComputedVecsFromTxindex<StoredU64>,

View File

@@ -245,10 +245,6 @@ impl Vecs {
starting_indexes: brk_indexer::Indexes,
exit: &Exit,
) -> Result<Indexes> {
// ---
// TxIndex
// ---
self.txindex_to_input_count.compute_count_from_indexes(
starting_indexes.txindex,
&indexer.vecs.tx.txindex_to_first_txinindex,
@@ -270,10 +266,6 @@ impl Vecs {
exit,
)?;
// ---
// Height
// ---
self.height_to_height.compute_from_index(
starting_indexes.height,
&indexer.vecs.block.height_to_weight,
@@ -318,10 +310,6 @@ impl Vecs {
let decremented_starting_height = starting_indexes.height.decremented().unwrap_or_default();
// ---
// DateIndex
// ---
let starting_dateindex = self
.height_to_dateindex
.into_iter()
@@ -370,10 +358,6 @@ impl Vecs {
exit,
)?;
// ---
// WeekIndex
// ---
let starting_weekindex = self
.dateindex_to_weekindex
.into_iter()
@@ -407,10 +391,6 @@ impl Vecs {
exit,
)?;
// ---
// DifficultyEpoch
// ---
let starting_difficultyepoch = self
.height_to_difficultyepoch
.into_iter()
@@ -443,10 +423,6 @@ impl Vecs {
exit,
)?;
// ---
// MonthIndex
// ---
let starting_monthindex = self
.dateindex_to_monthindex
.into_iter()
@@ -480,10 +456,6 @@ impl Vecs {
exit,
)?;
// ---
// QuarterIndex
// ---
let starting_quarterindex = self
.monthindex_to_quarterindex
.into_iter()
@@ -518,10 +490,6 @@ impl Vecs {
exit,
)?;
// ---
// SemesterIndex
// ---
let starting_semesterindex = self
.monthindex_to_semesterindex
.into_iter()
@@ -556,10 +524,6 @@ impl Vecs {
exit,
)?;
// ---
// YearIndex
// ---
let starting_yearindex = self
.monthindex_to_yearindex
.into_iter()
@@ -591,9 +555,6 @@ impl Vecs {
&self.monthindex_to_monthindex,
exit,
)?;
// ---
// HalvingEpoch
// ---
let starting_halvingepoch = self
.height_to_halvingepoch
@@ -619,10 +580,6 @@ impl Vecs {
exit,
)?;
// ---
// DecadeIndex
// ---
let starting_decadeindex = self
.yearindex_to_decadeindex
.into_iter()

View File

@@ -23,6 +23,8 @@ mod pools;
mod price;
mod stateful;
mod traits;
mod txins;
mod txouts;
mod utils;
use indexes::Indexes;
@@ -40,6 +42,8 @@ pub struct Computer {
pub pools: pools::Vecs,
pub price: Option<price::Vecs>,
pub stateful: stateful::Vecs,
pub txins: txins::Vecs,
pub txouts: txouts::Vecs,
}
const VERSION: Version = Version::new(4);
@@ -60,7 +64,7 @@ impl Computer {
let big_thread = || thread::Builder::new().stack_size(STACK_SIZE);
let i = Instant::now();
let (indexes, fetched, blks) = thread::scope(|s| -> Result<_> {
let (indexes, fetched, blks, txins, txouts) = thread::scope(|s| -> Result<_> {
let fetched_handle = fetcher
.map(|fetcher| {
big_thread().spawn_scoped(s, move || {
@@ -72,13 +76,21 @@ impl Computer {
let blks_handle = big_thread()
.spawn_scoped(s, || blks::Vecs::forced_import(&computed_path, VERSION))?;
let txins_handle = big_thread()
.spawn_scoped(s, || txins::Vecs::forced_import(&computed_path, VERSION))?;
let txouts_handle = big_thread()
.spawn_scoped(s, || txouts::Vecs::forced_import(&computed_path, VERSION))?;
let indexes = indexes::Vecs::forced_import(&computed_path, VERSION, indexer)?;
let fetched = fetched_handle.map(|h| h.join().unwrap()).transpose()?;
let blks = blks_handle.join().unwrap()?;
let txins = txins_handle.join().unwrap()?;
let txouts = txouts_handle.join().unwrap()?;
Ok((indexes, fetched, blks))
Ok((indexes, fetched, blks, txins, txouts))
})?;
info!("Imported indexes/fetched/blks in {:?}", i.elapsed());
info!("Imported indexes/fetched/blks/txins/txouts in {:?}", i.elapsed());
let i = Instant::now();
let (price, constants, market) = thread::scope(|s| -> Result<_> {
@@ -144,8 +156,10 @@ impl Computer {
pools,
cointime,
indexes,
txins,
fetched,
price,
txouts,
})
}
@@ -195,20 +209,33 @@ impl Computer {
Ok(())
});
let chain = scope.spawn(|| -> Result<()> {
info!("Computing chain...");
// Txins must complete before txouts (txouts needs txinindex_to_txoutindex)
// and before chain (chain needs txinindex_to_value)
info!("Computing txins...");
let i = Instant::now();
self.txins.compute(indexer, &starting_indexes, exit)?;
info!("Computed txins in {:?}", i.elapsed());
let txouts = scope.spawn(|| -> Result<()> {
info!("Computing txouts...");
let i = Instant::now();
self.chain.compute(
indexer,
&self.indexes,
&starting_indexes,
self.price.as_ref(),
exit,
)?;
info!("Computed chain in {:?}", i.elapsed());
self.txouts.compute(indexer, &self.txins, &starting_indexes, exit)?;
info!("Computed txouts in {:?}", i.elapsed());
Ok(())
});
info!("Computing chain...");
let i = Instant::now();
self.chain.compute(
indexer,
&self.indexes,
&self.txins,
&starting_indexes,
self.price.as_ref(),
exit,
)?;
info!("Computed chain in {:?}", i.elapsed());
if let Some(price) = self.price.as_ref() {
info!("Computing market...");
let i = Instant::now();
@@ -218,7 +245,7 @@ impl Computer {
blks.join().unwrap()?;
constants.join().unwrap()?;
chain.join().unwrap()?;
txouts.join().unwrap()?;
Ok(())
})?;
@@ -244,6 +271,7 @@ impl Computer {
self.stateful.compute(
indexer,
&self.indexes,
&self.txins,
&self.chain,
self.price.as_ref(),
&mut starting_indexes,

View File

@@ -126,8 +126,10 @@ impl Vecs {
let mut txindex_to_first_txoutindex_iter =
indexer.vecs.tx.txindex_to_first_txoutindex.iter()?;
let mut txindex_to_output_count_iter = indexes.txindex_to_output_count.iter();
let mut txoutindex_to_txoutdata_iter =
indexer.vecs.txout.txoutindex_to_txoutdata.iter()?;
let mut txoutindex_to_outputtype_iter =
indexer.vecs.txout.txoutindex_to_outputtype.iter()?;
let mut txoutindex_to_typeindex_iter =
indexer.vecs.txout.txoutindex_to_typeindex.iter()?;
let mut p2pk65addressindex_to_p2pk65bytes_iter = indexer
.vecs
.address
@@ -180,9 +182,8 @@ impl Vecs {
let pool = (*txoutindex..(*txoutindex + *outputcount))
.map(TxOutIndex::from)
.find_map(|txoutindex| {
let txoutdata = txoutindex_to_txoutdata_iter.get_unwrap(txoutindex);
let outputtype = txoutdata.outputtype;
let typeindex = txoutdata.typeindex;
let outputtype = txoutindex_to_outputtype_iter.get_unwrap(txoutindex);
let typeindex = txoutindex_to_typeindex_iter.get_unwrap(txoutindex);
match outputtype {
OutputType::P2PK65 => Some(AddressBytes::from(

View File

@@ -469,9 +469,6 @@ impl Vecs {
exit,
)?;
// self.halvingepoch_to_price_ohlc
// .compute_transform(starting_indexes.halvingepoch, other, t, exit)?;
self.decadeindex_to_price_ohlc.compute_transform4(
starting_indexes.decadeindex,
self.timeindexes_to_price_open.decadeindex.unwrap_first(),
@@ -798,9 +795,6 @@ impl Vecs {
exit,
)?;
// self.halvingepoch_to_price_ohlc
// _in_sats.compute_transform(starting_indexes.halvingepoch, other, t, exit)?;
self.decadeindex_to_price_ohlc_in_sats.compute_transform4(
starting_indexes.decadeindex,
self.timeindexes_to_price_open_in_sats

View File

@@ -1,5 +1,3 @@
//! Address count types per address type.
use brk_error::Result;
use brk_grouper::ByAddressType;
use brk_traversable::Traversable;

View File

@@ -1,5 +1,3 @@
//! Storage for address indexes by type.
use brk_error::{Error, Result};
use brk_traversable::Traversable;
use brk_types::{

View File

@@ -1,5 +1,3 @@
//! Storage for address data (loaded and empty addresses).
use brk_error::Result;
use brk_traversable::Traversable;
use brk_types::{

View File

@@ -1,5 +1,3 @@
//! Height to AddressTypeToVec hashmap.
use brk_types::Height;
use derive_deref::{Deref, DerefMut};
use rustc_hash::FxHashMap;

View File

@@ -1,15 +1,3 @@
//! Address handling with macro-generated code for 8 address types.
//!
//! This module provides:
//! - `AnyAddressIndexesVecs` for storing address indexes by type
//! - `AddressesDataVecs` for storing address data (loaded/empty)
//! - `AddressTypeToTypeIndexMap` for per-type hashmaps
//! - `AddressTypeToVec` for per-type vectors
//! - `HeightToAddressTypeToVec` for height-keyed per-type vectors
//! - `AddressTypeToAddressCount` for runtime address counts
//! - `AddressTypeToHeightToAddressCount` for height-indexed address counts
//! - `AddressTypeToIndexesToAddressCount` for computed address counts
mod address_count;
mod any_address_indexes;
mod data;

View File

@@ -1,13 +1,10 @@
//! Per-address-type hashmap keyed by TypeIndex.
use std::mem;
use std::{collections::hash_map::Entry, mem};
use brk_grouper::ByAddressType;
use brk_types::{OutputType, TypeIndex};
use derive_deref::{Deref, DerefMut};
use rustc_hash::FxHashMap;
use smallvec::{Array, SmallVec};
use std::collections::hash_map::Entry;
/// A hashmap for each address type, keyed by TypeIndex.
#[derive(Debug, Clone, Deref, DerefMut)]

View File

@@ -1,5 +1,3 @@
//! Per-address-type vector.
use brk_grouper::ByAddressType;
use derive_deref::{Deref, DerefMut};

View File

@@ -1,5 +1,3 @@
//! Address cohort vectors with metrics and state.
use std::path::Path;
use brk_error::Result;
@@ -19,8 +17,7 @@ use crate::{
stateful::states::AddressCohortState,
};
use super::super::metrics::{CohortMetrics, ImportConfig};
use super::traits::{CohortVecs, DynCohortVecs};
use super::{super::metrics::{CohortMetrics, ImportConfig}, traits::{CohortVecs, DynCohortVecs}};
const VERSION: Version = Version::ZERO;

View File

@@ -1,5 +1,3 @@
//! Container for all Address cohorts organized by filter type.
use std::path::Path;
use brk_error::Result;

View File

@@ -1,11 +1,3 @@
//! Cohort management for UTXO and address groupings.
//!
//! Cohorts are groups of UTXOs or addresses filtered by criteria like:
//! - Age (0-1d, 1-7d, etc.)
//! - Amount (< 1 BTC, 1-10 BTC, etc.)
//! - Type (P2PKH, P2SH, etc.)
//! - Term (short-term holder, long-term holder)
mod address;
mod address_cohorts;
mod traits;

View File

@@ -1,5 +1,3 @@
//! Traits for cohort vector operations.
use brk_error::Result;
use brk_types::{Bitcoin, DateIndex, Dollars, Height, Version};
use vecdb::{Exit, IterableVec};

View File

@@ -1,5 +1,3 @@
//! UTXO cohort vectors with metrics and state.
use std::path::Path;
use brk_error::Result;

View File

@@ -1,5 +1,3 @@
//! Container for all UTXO cohorts organized by filter type.
mod receive;
mod send;
mod tick_tock;

View File

@@ -1,5 +1,3 @@
//! Processing received outputs (new UTXOs).
use brk_types::{Dollars, Height, Timestamp};
use crate::stateful::states::Transacted;

View File

@@ -1,5 +1,3 @@
//! Processing spent inputs (UTXOs being spent).
use brk_types::{CheckedSub, Height};
use rustc_hash::FxHashMap;
use vecdb::VecIndex;

View File

@@ -1,11 +1,3 @@
//! Age-based state transitions for UTXO cohorts.
//!
//! When a new block arrives, UTXOs age. Some cross day boundaries
//! and need to move between age-based cohorts.
//!
//! Optimization: Instead of iterating all ~800k blocks O(n), we binary search
//! for blocks at each day boundary O(k * log n) where k = number of boundaries.
use brk_grouper::AGE_BOUNDARIES;
use brk_types::{ONE_DAY_IN_SEC, Timestamp};

View File

@@ -1,10 +1,3 @@
//! Aggregate cohort computation.
//!
//! After block processing, compute derived metrics:
//! 1. Overlapping cohorts (e.g., ">=1d" from sum of age_range cohorts)
//! 2. Index-based transforms (height -> dateindex, etc.)
//! 3. Relative metrics (supply ratios, market cap ratios)
use brk_error::Result;
use brk_types::{Bitcoin, DateIndex, Dollars, Height};
use log::info;

View File

@@ -1,25 +1,15 @@
//! Main block processing loop.
//!
//! Iterates through blocks and processes each one:
//! 1. Reset per-block state values
//! 2. Tick-tock age transitions
//! 3. Process outputs (receive) in parallel
//! 4. Process inputs (send) in parallel
//! 5. Push to height-indexed vectors
//! 6. Periodically flush checkpoints
use std::thread;
use brk_error::Result;
use brk_grouper::ByAddressType;
use brk_indexer::Indexer;
use brk_types::{DateIndex, Height, OutputType, Sats, TypeIndex};
use brk_types::{DateIndex, Height, OutputType, Sats, TxIndex, TypeIndex};
use log::info;
use rayon::prelude::*;
use vecdb::{Exit, GenericStoredVec, IterableVec, TypedVecIterator, VecIndex};
use crate::{
chain, indexes, price,
chain, indexes, price, txins,
stateful::{
address::AddressTypeToAddressCount,
compute::write::{process_address_updates, write},
@@ -36,6 +26,7 @@ use super::{
super::{
cohorts::{AddressCohorts, DynCohortVecs, UTXOCohorts},
vecs::Vecs,
RangeMap,
},
BIP30_DUPLICATE_HEIGHT_1, BIP30_DUPLICATE_HEIGHT_2, BIP30_ORIGINAL_HEIGHT_1,
BIP30_ORIGINAL_HEIGHT_2, ComputeContext, FLUSH_INTERVAL, TxInIterators, TxOutIterators,
@@ -48,6 +39,7 @@ pub fn process_blocks(
vecs: &mut Vecs,
indexer: &Indexer,
indexes: &indexes::Vecs,
txins: &txins::Vecs,
chain: &chain::Vecs,
price: Option<&price::Vecs>,
starting_height: Height,
@@ -128,9 +120,19 @@ pub fn process_blocks(
let mut vr = VecsReaders::new(&vecs.any_address_indexes, &vecs.addresses_data);
// Build txindex -> height lookup map for efficient prev_height computation
info!("Building txindex_to_height map...");
let mut txindex_to_height: RangeMap<TxIndex, Height> = {
let mut map = RangeMap::with_capacity(last_height.to_usize() + 1);
for first_txindex in indexer.vecs.tx.height_to_first_txindex.into_iter() {
map.push(first_txindex);
}
map
};
// Create reusable iterators for sequential txout/txin reads (16KB buffered)
let mut txout_iters = TxOutIterators::new(indexer);
let mut txin_iters = TxInIterators::new(indexer);
let mut txin_iters = TxInIterators::new(indexer, txins, &mut txindex_to_height);
info!("Creating address iterators...");
@@ -270,7 +272,7 @@ pub fn process_blocks(
let (input_values, input_prev_heights, input_outputtypes, input_typeindexes) =
if input_count > 1 {
txin_iters.collect_block_inputs(first_txinindex + 1, input_count - 1)
txin_iters.collect_block_inputs(first_txinindex + 1, input_count - 1, height)
} else {
(Vec::new(), Vec::new(), Vec::new(), Vec::new())
};

View File

@@ -1,5 +1,3 @@
//! Computation context holding shared state during block processing.
use brk_types::{Dollars, Height, Timestamp};
use vecdb::VecIndex;

View File

@@ -1,12 +1,3 @@
//! Block processing pipeline.
//!
//! This module handles the main computation loop that processes blocks:
//! 1. Recover state from checkpoint or start fresh
//! 2. Process each block's outputs and inputs
//! 3. Update cohort states
//! 4. Periodically flush to disk
//! 5. Compute aggregate cohorts from separate cohorts
pub mod aggregates;
mod block_loop;
mod context;
@@ -17,7 +8,7 @@ mod write;
pub use block_loop::process_blocks;
pub use context::ComputeContext;
pub use readers::{
TxInIterators, TxOutIterators, VecsReaders, build_txinindex_to_txindex,
TxInIterators, TxOutData, TxOutIterators, VecsReaders, build_txinindex_to_txindex,
build_txoutindex_to_txindex,
};
pub use recover::{StartMode, determine_start_mode, recover_state, reset_state};

View File

@@ -1,31 +1,42 @@
//! Cached readers for efficient data access during computation.
//!
//! Readers provide mmap-based access to indexed data without repeated syscalls.
use brk_grouper::{ByAddressType, ByAnyAddress};
use brk_indexer::Indexer;
use brk_types::{
Height, OutputType, Sats, StoredU64, TxInIndex, TxIndex, TxOutData, TxOutIndex, TypeIndex,
Height, OutPoint, OutputType, Sats, StoredU64, TxInIndex, TxIndex, TxOutIndex, TypeIndex,
};
use vecdb::{
BoxedVecIterator, BytesVecIterator, GenericStoredVec, PcodecVecIterator, Reader, VecIndex,
VecIterator,
};
use crate::stateful::address::{AddressesDataVecs, AnyAddressIndexesVecs};
use crate::{
stateful::{address::{AddressesDataVecs, AnyAddressIndexesVecs}, RangeMap},
txins,
};
/// Output data collected from separate vecs.
#[derive(Debug, Clone, Copy)]
pub struct TxOutData {
pub value: Sats,
pub outputtype: OutputType,
pub typeindex: TypeIndex,
}
/// Reusable iterators for txout vectors (16KB buffered reads).
///
/// Iterators are created once and re-positioned each block to avoid
/// creating new file handles repeatedly.
pub struct TxOutIterators<'a> {
txoutdata_iter: BytesVecIterator<'a, TxOutIndex, TxOutData>,
value_iter: BytesVecIterator<'a, TxOutIndex, Sats>,
outputtype_iter: BytesVecIterator<'a, TxOutIndex, OutputType>,
typeindex_iter: BytesVecIterator<'a, TxOutIndex, TypeIndex>,
}
impl<'a> TxOutIterators<'a> {
pub fn new(indexer: &'a Indexer) -> Self {
Self {
txoutdata_iter: indexer.vecs.txout.txoutindex_to_txoutdata.into_iter(),
value_iter: indexer.vecs.txout.txoutindex_to_value.into_iter(),
outputtype_iter: indexer.vecs.txout.txoutindex_to_outputtype.into_iter(),
typeindex_iter: indexer.vecs.txout.txoutindex_to_typeindex.into_iter(),
}
}
@@ -36,7 +47,11 @@ impl<'a> TxOutIterators<'a> {
output_count: usize,
) -> Vec<TxOutData> {
(first_txoutindex..first_txoutindex + output_count)
.map(|i| self.txoutdata_iter.get_at_unwrap(i))
.map(|i| TxOutData {
value: self.value_iter.get_at_unwrap(i),
outputtype: self.outputtype_iter.get_at_unwrap(i),
typeindex: self.typeindex_iter.get_at_unwrap(i),
})
.collect()
}
}
@@ -44,26 +59,34 @@ impl<'a> TxOutIterators<'a> {
/// Reusable iterators for txin vectors (PcoVec - avoids repeated page decompression).
pub struct TxInIterators<'a> {
value_iter: PcodecVecIterator<'a, TxInIndex, Sats>,
prev_height_iter: PcodecVecIterator<'a, TxInIndex, Height>,
outpoint_iter: PcodecVecIterator<'a, TxInIndex, OutPoint>,
outputtype_iter: PcodecVecIterator<'a, TxInIndex, OutputType>,
typeindex_iter: PcodecVecIterator<'a, TxInIndex, TypeIndex>,
txindex_to_height: &'a mut RangeMap<TxIndex, Height>,
}
impl<'a> TxInIterators<'a> {
pub fn new(indexer: &'a Indexer) -> Self {
pub fn new(
indexer: &'a Indexer,
txins: &'a txins::Vecs,
txindex_to_height: &'a mut RangeMap<TxIndex, Height>,
) -> Self {
Self {
value_iter: indexer.vecs.txin.txinindex_to_value.into_iter(),
prev_height_iter: indexer.vecs.txin.txinindex_to_prev_height.into_iter(),
value_iter: txins.txinindex_to_value.into_iter(),
outpoint_iter: indexer.vecs.txin.txinindex_to_outpoint.into_iter(),
outputtype_iter: indexer.vecs.txin.txinindex_to_outputtype.into_iter(),
typeindex_iter: indexer.vecs.txin.txinindex_to_typeindex.into_iter(),
txindex_to_height,
}
}
/// Collect input data for a block range using buffered iteration.
/// Computes prev_height on-the-fly from outpoint using RangeMap lookup.
pub fn collect_block_inputs(
&mut self,
first_txinindex: usize,
input_count: usize,
current_height: Height,
) -> (Vec<Sats>, Vec<Height>, Vec<OutputType>, Vec<TypeIndex>) {
let mut values = Vec::with_capacity(input_count);
let mut prev_heights = Vec::with_capacity(input_count);
@@ -72,7 +95,17 @@ impl<'a> TxInIterators<'a> {
for i in first_txinindex..first_txinindex + input_count {
values.push(self.value_iter.get_at_unwrap(i));
prev_heights.push(self.prev_height_iter.get_at_unwrap(i));
let outpoint = self.outpoint_iter.get_at_unwrap(i);
let prev_height = if outpoint.is_coinbase() {
current_height
} else {
self.txindex_to_height
.get(outpoint.txindex())
.unwrap_or(current_height)
};
prev_heights.push(prev_height);
outputtypes.push(self.outputtype_iter.get_at_unwrap(i));
typeindexes.push(self.typeindex_iter.get_at_unwrap(i));
}

View File

@@ -1,17 +1,14 @@
//! State recovery logic for checkpoint/resume.
//!
//! Determines starting height and imports saved state from checkpoints.
use std::cmp::Ordering;
use std::collections::BTreeSet;
use std::{cmp::Ordering, collections::BTreeSet};
use brk_error::Result;
use brk_types::Height;
use vecdb::Stamp;
use super::super::AddressesDataVecs;
use super::super::address::AnyAddressIndexesVecs;
use super::super::cohorts::{AddressCohorts, UTXOCohorts};
use super::super::{
AddressesDataVecs,
address::AnyAddressIndexesVecs,
cohorts::{AddressCohorts, UTXOCohorts},
};
/// Result of state recovery.
pub struct RecoveredState {

View File

@@ -1,9 +1,3 @@
//! State flushing logic for checkpoints.
//!
//! Separates processing (mutations) from flushing (I/O):
//! - `process_address_updates`: applies cached address changes to storage
//! - `flush`: writes all data to disk
use std::time::Instant;
use brk_error::Result;

View File

@@ -1,7 +1,3 @@
//! Transaction activity metrics.
//!
//! These metrics track amounts sent and destruction of satoshi-days/blocks.
use brk_error::Result;
use brk_traversable::Traversable;
use brk_types::{Bitcoin, Height, Sats, StoredF64, Version};

View File

@@ -1,5 +1,3 @@
//! Configuration for metric imports.
use brk_grouper::{CohortContext, Filter};
use brk_types::Version;
use vecdb::Database;

View File

@@ -1,13 +1,3 @@
//! Metric vectors organized by category.
//!
//! Instead of a single 80+ field struct, metrics are grouped into logical categories:
//! - `supply`: Supply and UTXO count metrics (always computed)
//! - `activity`: Transaction activity metrics (always computed)
//! - `realized`: Realized cap, profit/loss, SOPR (requires price)
//! - `unrealized`: Unrealized profit/loss (requires price)
//! - `price`: Price paid metrics and percentiles (requires price)
//! - `relative`: Ratios relative to market cap, etc. (requires price)
mod activity;
mod config;
mod price_paid;

View File

@@ -1,7 +1,3 @@
//! Price paid metrics and percentiles.
//!
//! Tracks min/max price paid for UTXOs and price distribution percentiles.
use brk_error::Result;
use brk_traversable::Traversable;
use brk_types::{DateIndex, Dollars, Height, Version};

View File

@@ -1,7 +1,3 @@
//! Realized cap and profit/loss metrics.
//!
//! These metrics require price data and track realized value based on acquisition price.
use brk_error::Result;
use brk_traversable::Traversable;
use brk_types::{Bitcoin, DateIndex, Dollars, Height, StoredF32, StoredF64, Version};

View File

@@ -1,7 +1,3 @@
//! Relative metrics (ratios to market cap, realized cap, supply, etc.)
//!
//! These are computed ratios comparing cohort metrics to global metrics.
use brk_error::Result;
use brk_traversable::Traversable;
use brk_types::{Bitcoin, DateIndex, Dollars, Height, StoredF32, StoredF64, Version};

View File

@@ -1,7 +1,3 @@
//! Supply and UTXO count metrics.
//!
//! These metrics are always computed regardless of price data availability.
use brk_error::Result;
use brk_traversable::Traversable;
use brk_types::{Bitcoin, DateIndex, Dollars, Height, Sats, StoredU64, Version};

View File

@@ -1,7 +1,3 @@
//! Unrealized profit/loss metrics.
//!
//! These metrics track paper gains/losses based on current vs acquisition price.
use brk_error::Result;
use brk_traversable::Traversable;
use brk_types::{DateIndex, Dollars, Height, Sats, Version};

View File

@@ -1,41 +1,15 @@
//! Stateful computation for Bitcoin UTXO and address cohort metrics.
//!
//! This module processes blockchain data to compute metrics for various cohorts
//! (groups of UTXOs or addresses filtered by age, amount, type, etc.).
//!
//! ## Module Structure
//!
//! ```text
//! stateful/
//! ├── address/ # Address type collections (type_vec, type_index_map, etc.)
//! ├── cohorts/ # Cohort traits and state management
//! ├── compute/ # Block processing loop and I/O
//! ├── metrics/ # Metric vectors organized by category
//! ├── process/ # Transaction processing (inputs, outputs, cache)
//! └── vecs.rs # Main vectors container
//! ```
//!
//! ## Data Flow
//!
//! 1. **Import**: Load from checkpoint or start fresh
//! 2. **Process blocks**: For each block, process outputs/inputs in parallel
//! 3. **Update cohorts**: Track supply, realized/unrealized P&L per cohort
//! 4. **Flush**: Periodically checkpoint state to disk
//! 5. **Compute aggregates**: Derive aggregate cohorts from separate cohorts
pub mod address;
pub mod cohorts;
pub mod compute;
pub mod metrics;
mod process;
mod range_map;
mod states;
mod vecs;
use states::*;
pub use range_map::RangeMap;
pub use vecs::Vecs;
// Address re-exports
pub use address::{AddressTypeToTypeIndexMap, AddressesDataVecs, AnyAddressIndexesVecs};
// Cohort re-exports
pub use cohorts::{AddressCohorts, CohortVecs, DynCohortVecs, UTXOCohorts};

View File

@@ -1,10 +1,3 @@
//! Address data update processing for flush operations.
//!
//! Handles transitions between loaded (non-zero balance) and empty (zero balance) states:
//! - New addresses: push to storage
//! - Updated addresses: update in place
//! - State transitions: delete from source, push to destination
use brk_error::Result;
use brk_types::{
AnyAddressIndex, EmptyAddressData, EmptyAddressIndex, LoadedAddressData, LoadedAddressIndex,

View File

@@ -1,15 +1,12 @@
//! Address data cache for flush intervals.
//!
//! Accumulates address data across blocks within a flush interval.
//! Data is flushed to disk at checkpoints.
use brk_grouper::ByAddressType;
use brk_types::{AnyAddressDataIndexEnum, LoadedAddressData, OutputType, TypeIndex};
use vecdb::GenericStoredVec;
use super::super::address::{AddressTypeToTypeIndexMap, AddressesDataVecs, AnyAddressIndexesVecs};
use super::super::compute::VecsReaders;
use super::{
super::{
address::{AddressTypeToTypeIndexMap, AddressesDataVecs, AnyAddressIndexesVecs},
compute::VecsReaders,
},
AddressLookup, EmptyAddressDataWithSource, LoadedAddressDataWithSource, TxIndexVec,
WithAddressDataSource,
};

View File

@@ -1,5 +1,3 @@
//! Parallel input processing.
use brk_grouper::ByAddressType;
use brk_types::{Height, OutputType, Sats, TxIndex, TypeIndex};
use rayon::prelude::*;

View File

@@ -1,9 +1,9 @@
//! Address data lookup during block processing.
use brk_types::{LoadedAddressData, OutputType, TypeIndex};
use super::super::address::AddressTypeToTypeIndexMap;
use super::{EmptyAddressDataWithSource, LoadedAddressDataWithSource, WithAddressDataSource};
use super::{
super::address::AddressTypeToTypeIndexMap,
EmptyAddressDataWithSource, LoadedAddressDataWithSource, WithAddressDataSource,
};
/// Tracking status of an address - determines cohort update strategy.
#[derive(Clone, Copy)]

View File

@@ -1,20 +1,16 @@
//! Output processing.
//!
//! Processes a block's outputs (new UTXOs), building:
//! - Transacted: aggregated supply by output type and amount range
//! - Address data for address cohort tracking (optional)
use brk_grouper::ByAddressType;
use brk_types::{Sats, TxIndex, TxOutData, TypeIndex};
use brk_types::{Sats, TxIndex, TypeIndex};
use crate::stateful::address::{
AddressTypeToTypeIndexMap, AddressesDataVecs, AnyAddressIndexesVecs,
use crate::stateful::{
address::{AddressTypeToTypeIndexMap, AddressesDataVecs, AnyAddressIndexesVecs},
compute::{TxOutData, VecsReaders},
states::Transacted,
};
use crate::stateful::compute::VecsReaders;
use crate::stateful::states::Transacted;
use super::super::address::AddressTypeToVec;
use super::{load_uncached_address_data, AddressCache, LoadedAddressDataWithSource, TxIndexVec};
use super::{
super::address::AddressTypeToVec,
load_uncached_address_data, AddressCache, LoadedAddressDataWithSource, TxIndexVec,
};
/// Result of processing outputs for a block.
pub struct OutputsResult {

View File

@@ -1,12 +1,11 @@
//! Process received outputs for address cohorts.
use brk_grouper::{amounts_in_different_buckets, ByAddressType};
use brk_grouper::{AmountBucket, ByAddressType};
use brk_types::{Dollars, Sats, TypeIndex};
use rustc_hash::FxHashMap;
use super::super::address::AddressTypeToVec;
use super::super::cohorts::AddressCohorts;
use super::lookup::{AddressLookup, TrackingStatus};
use super::{
super::{address::AddressTypeToVec, cohorts::AddressCohorts},
lookup::{AddressLookup, TrackingStatus},
};
pub fn process_received(
received_data: AddressTypeToVec<(TypeIndex, Sats)>,
@@ -21,6 +20,10 @@ pub fn process_received(
continue;
}
// Cache mutable refs for this address type
let type_addr_count = addr_count.get_mut(output_type).unwrap();
let type_empty_count = empty_addr_count.get_mut(output_type).unwrap();
// Aggregate receives by address - each address processed exactly once
// Track (total_value, output_count) for correct UTXO counting
let mut aggregated: FxHashMap<TypeIndex, (Sats, u32)> = FxHashMap::default();
@@ -35,11 +38,11 @@ pub fn process_received(
match status {
TrackingStatus::New => {
*addr_count.get_mut(output_type).unwrap() += 1;
*type_addr_count += 1;
}
TrackingStatus::WasEmpty => {
*addr_count.get_mut(output_type).unwrap() += 1;
*empty_addr_count.get_mut(output_type).unwrap() -= 1;
*type_addr_count += 1;
*type_empty_count -= 1;
}
TrackingStatus::Tracked => {}
}
@@ -49,9 +52,10 @@ pub fn process_received(
if is_new_entry {
// New/was-empty address - just add to cohort
addr_data.receive_outputs(total_value, price, output_count);
let new_bucket = AmountBucket::from(total_value);
cohorts
.amount_range
.get_mut(total_value) // new_balance = 0 + total_value
.get_mut_by_bucket(new_bucket)
.state
.as_mut()
.unwrap()
@@ -59,12 +63,14 @@ pub fn process_received(
} else {
let prev_balance = addr_data.balance();
let new_balance = prev_balance + total_value;
let prev_bucket = AmountBucket::from(prev_balance);
let new_bucket = AmountBucket::from(new_balance);
if amounts_in_different_buckets(prev_balance, new_balance) {
if let Some((old_bucket, new_bucket)) = prev_bucket.transition_to(new_bucket) {
// Crossing cohort boundary - subtract from old, add to new
let cohort_state = cohorts
.amount_range
.get_mut(prev_balance)
.get_mut_by_bucket(old_bucket)
.state
.as_mut()
.unwrap();
@@ -89,7 +95,7 @@ pub fn process_received(
addr_data.receive_outputs(total_value, price, output_count);
cohorts
.amount_range
.get_mut(new_balance)
.get_mut_by_bucket(new_bucket)
.state
.as_mut()
.unwrap()
@@ -98,7 +104,7 @@ pub fn process_received(
// Staying in same cohort - just receive
cohorts
.amount_range
.get_mut(new_balance)
.get_mut_by_bucket(new_bucket)
.state
.as_mut()
.unwrap()

View File

@@ -1,18 +1,12 @@
//! Process sent outputs for address cohorts.
//!
//! Updates address cohort states when addresses send funds:
//! - Addresses may cross cohort boundaries
//! - Addresses may become empty (0 balance)
//! - Age metrics (blocks_old, days_old) are tracked for sent UTXOs
use brk_error::Result;
use brk_grouper::{amounts_in_different_buckets, ByAddressType};
use brk_grouper::{AmountBucket, ByAddressType};
use brk_types::{CheckedSub, Dollars, Height, Sats, Timestamp, TypeIndex};
use vecdb::{VecIndex, unlikely};
use super::super::address::HeightToAddressTypeToVec;
use super::super::cohorts::AddressCohorts;
use super::lookup::AddressLookup;
use super::{
super::{address::HeightToAddressTypeToVec, cohorts::AddressCohorts},
lookup::AddressLookup,
};
/// Process sent outputs for address cohorts.
///
@@ -49,6 +43,10 @@ pub fn process_sent(
.is_more_than_hour();
for (output_type, vec) in by_type.unwrap().into_iter() {
// Cache mutable refs for this address type
let type_addr_count = addr_count.get_mut(output_type).unwrap();
let type_empty_count = empty_addr_count.get_mut(output_type).unwrap();
for (type_index, value) in vec {
let addr_data = lookup.get_for_send(output_type, type_index);
@@ -56,14 +54,16 @@ pub fn process_sent(
let new_balance = prev_balance.checked_sub(value).unwrap();
let will_be_empty = addr_data.has_1_utxos();
// Check if crossing cohort boundary
let crossing_boundary = amounts_in_different_buckets(prev_balance, new_balance);
// Compute buckets once
let prev_bucket = AmountBucket::from(prev_balance);
let new_bucket = AmountBucket::from(new_balance);
let crossing_boundary = prev_bucket != new_bucket;
if will_be_empty || crossing_boundary {
// Subtract from old cohort
let cohort_state = cohorts
.amount_range
.get_mut(prev_balance)
.get_mut_by_bucket(prev_bucket)
.state
.as_mut()
.unwrap();
@@ -101,8 +101,8 @@ pub fn process_sent(
unreachable!()
}
*addr_count.get_mut(output_type).unwrap() -= 1;
*empty_addr_count.get_mut(output_type).unwrap() += 1;
*type_addr_count -= 1;
*type_empty_count += 1;
// Move from loaded to empty
lookup.move_to_empty(output_type, type_index);
@@ -110,7 +110,7 @@ pub fn process_sent(
// Add to new cohort
cohorts
.amount_range
.get_mut(new_balance)
.get_mut_by_bucket(new_bucket)
.state
.as_mut()
.unwrap()
@@ -120,7 +120,7 @@ pub fn process_sent(
// Address staying in same cohort - update in place
cohorts
.amount_range
.get_mut(new_balance)
.get_mut_by_bucket(new_bucket)
.state
.as_mut()
.unwrap()

View File

@@ -1,7 +1,3 @@
//! Transaction count tracking per address.
//!
//! Updates tx_count on address data after deduplicating transaction indexes.
use crate::stateful::address::AddressTypeToTypeIndexMap;
use super::{EmptyAddressDataWithSource, LoadedAddressDataWithSource, TxIndexVec};

View File

@@ -1,5 +1,3 @@
//! Address data types with source tracking for flush operations.
use brk_types::{EmptyAddressData, EmptyAddressIndex, LoadedAddressData, LoadedAddressIndex, TxIndex};
use smallvec::SmallVec;

View File

@@ -0,0 +1,133 @@
use std::marker::PhantomData;
/// Number of ranges to cache. Small enough for O(1) linear scan,
/// large enough to cover the "hot" source blocks in a typical block.
const CACHE_SIZE: usize = 8;
/// Maps ranges of indices to values for efficient reverse lookups.
///
/// Instead of storing a value for every index, stores first_index values
/// in a sorted Vec and uses binary search to find the value for any index.
/// The value is derived from the position in the Vec.
///
/// Includes an LRU cache of recently accessed ranges to avoid binary search
/// when there's locality in access patterns.
#[derive(Debug)]
pub struct RangeMap<I, V> {
/// Sorted vec of first_index values. Position in vec = value.
first_indexes: Vec<I>,
/// LRU cache: (range_low, range_high, value, age). Lower age = more recent.
cache: [(I, I, V, u8); CACHE_SIZE],
cache_len: u8,
_phantom: PhantomData<V>,
}
impl<I: Default + Copy, V: Default + Copy> Default for RangeMap<I, V> {
fn default() -> Self {
Self {
first_indexes: Vec::new(),
cache: [(I::default(), I::default(), V::default(), 0); CACHE_SIZE],
cache_len: 0,
_phantom: PhantomData,
}
}
}
impl<I: Ord + Copy + Default, V: From<usize> + Copy + Default> RangeMap<I, V> {
/// Create with pre-allocated capacity.
pub fn with_capacity(capacity: usize) -> Self {
Self {
first_indexes: Vec::with_capacity(capacity),
cache: [(I::default(), I::default(), V::default(), 0); CACHE_SIZE],
cache_len: 0,
_phantom: PhantomData,
}
}
/// Push a new first_index. Value is implicitly the current length.
/// Must be called in order (first_index must be >= all previous).
#[inline]
pub fn push(&mut self, first_index: I) {
debug_assert!(
self.first_indexes
.last()
.is_none_or(|&last| first_index >= last),
"RangeMap: first_index must be monotonically increasing"
);
self.first_indexes.push(first_index);
}
/// Look up value for an index, checking cache first.
/// Returns the value (position) of the largest first_index <= given index.
#[inline]
pub fn get(&mut self, index: I) -> Option<V> {
if self.first_indexes.is_empty() {
return None;
}
let cache_len = self.cache_len as usize;
// Check cache first (linear scan of small array)
for i in 0..cache_len {
let (low, high, value, _) = self.cache[i];
if index >= low && index < high {
// Cache hit - mark as most recently used
if self.cache[i].3 != 0 {
for j in 0..cache_len {
self.cache[j].3 = self.cache[j].3.saturating_add(1);
}
self.cache[i].3 = 0;
}
return Some(value);
}
}
// Cache miss - binary search
let pos = self.first_indexes.partition_point(|&first| first <= index);
if pos > 0 {
let value = V::from(pos - 1);
let low = self.first_indexes[pos - 1];
// For last range, use low as high (special marker)
// The check `index < high` will fail, but `index >= low` handles it
let high = self.first_indexes.get(pos).copied().unwrap_or(low);
let is_last = pos == self.first_indexes.len();
// Add to cache (skip if last range - unbounded high is tricky)
if !is_last {
self.add_to_cache(low, high, value);
}
Some(value)
} else {
None
}
}
#[inline]
fn add_to_cache(&mut self, low: I, high: I, value: V) {
let cache_len = self.cache_len as usize;
// Age all entries
for i in 0..cache_len {
self.cache[i].3 = self.cache[i].3.saturating_add(1);
}
if cache_len < CACHE_SIZE {
// Not full - append
self.cache[cache_len] = (low, high, value, 0);
self.cache_len += 1;
} else {
// Full - evict oldest (highest age)
let mut oldest_idx = 0;
let mut oldest_age = 0u8;
for i in 0..CACHE_SIZE {
if self.cache[i].3 > oldest_age {
oldest_age = self.cache[i].3;
oldest_idx = i;
}
}
self.cache[oldest_idx] = (low, high, value, 0);
}
}
}

View File

@@ -1,7 +1,3 @@
//! Cohort state tracking during computation.
//!
//! This state is maintained in memory during block processing and periodically flushed.
use std::path::Path;
use brk_error::Result;

View File

@@ -1,5 +1,3 @@
//! Main Vecs struct for stateful computation.
use std::path::Path;
use brk_error::Result;
@@ -21,7 +19,7 @@ use crate::{
ComputedValueVecsFromHeight, ComputedVecsFromDateIndex, ComputedVecsFromHeight, Source,
VecBuilderOptions,
},
indexes, price,
indexes, price, txins,
stateful::{
compute::{StartMode, determine_start_mode, process_blocks, recover_state, reset_state},
states::BlockState,
@@ -43,9 +41,6 @@ pub struct Vecs {
#[traversable(skip)]
db: Database,
// ---
// States
// ---
pub chain_state: BytesVec<Height, SupplyState>,
pub any_address_indexes: AnyAddressIndexesVecs,
pub addresses_data: AddressesDataVecs,
@@ -57,9 +52,6 @@ pub struct Vecs {
pub addresstype_to_height_to_addr_count: AddressTypeToHeightToAddressCount,
pub addresstype_to_height_to_empty_addr_count: AddressTypeToHeightToAddressCount,
// ---
// Computed
// ---
pub addresstype_to_indexes_to_addr_count: AddressTypeToIndexesToAddressCount,
pub addresstype_to_indexes_to_empty_addr_count: AddressTypeToIndexesToAddressCount,
pub indexes_to_unspendable_supply: ComputedValueVecsFromHeight,
@@ -245,6 +237,7 @@ impl Vecs {
&mut self,
indexer: &Indexer,
indexes: &indexes::Vecs,
txins: &txins::Vecs,
chain: &chain::Vecs,
price: Option<&price::Vecs>,
starting_indexes: &mut Indexes,
@@ -365,6 +358,7 @@ impl Vecs {
self,
indexer,
indexes,
txins,
chain,
price,
starting_height,

View File

@@ -0,0 +1,139 @@
use std::path::Path;
use brk_error::Result;
use brk_indexer::Indexer;
use brk_traversable::Traversable;
use brk_types::{Sats, TxInIndex, TxIndex, TxOutIndex, Version, Vout};
use log::info;
use vecdb::{
AnyStoredVec, AnyVec, Database, Exit, GenericStoredVec, ImportableVec, PAGE_SIZE, PcoVec,
TypedVecIterator, VecIndex,
};
use super::Indexes;
const ONE_GB: usize = 1024 * 1024 * 1024;
#[derive(Clone, Traversable)]
pub struct Vecs {
db: Database,
pub txinindex_to_txoutindex: PcoVec<TxInIndex, TxOutIndex>,
pub txinindex_to_value: PcoVec<TxInIndex, Sats>,
}
impl Vecs {
pub fn forced_import(parent_path: &Path, parent_version: Version) -> Result<Self> {
let db = Database::open(&parent_path.join("txins"))?;
db.set_min_len(PAGE_SIZE * 10_000_000)?;
let version = parent_version + Version::ZERO;
let this = Self {
txinindex_to_txoutindex: PcoVec::forced_import(&db, "txoutindex", version)?,
txinindex_to_value: PcoVec::forced_import(&db, "value", version)?,
db,
};
this.db.retain_regions(
this.iter_any_exportable()
.flat_map(|v| v.region_names())
.collect(),
)?;
this.db.compact()?;
Ok(this)
}
pub fn compute(
&mut self,
indexer: &Indexer,
starting_indexes: &Indexes,
exit: &Exit,
) -> Result<()> {
let target = indexer.vecs.txin.txinindex_to_outpoint.len();
if target == 0 {
return Ok(());
}
let min = self
.txinindex_to_txoutindex
.len()
.min(self.txinindex_to_value.len())
.min(starting_indexes.txinindex.to_usize());
if min >= target {
return Ok(());
}
info!("TxIns: computing {} entries ({} to {})", target - min, min, target);
const BATCH_SIZE: usize = ONE_GB / size_of::<Entry>();
let mut outpoint_iter = indexer.vecs.txin.txinindex_to_outpoint.iter()?;
let mut first_txoutindex_iter = indexer.vecs.tx.txindex_to_first_txoutindex.iter()?;
let mut value_iter = indexer.vecs.txout.txoutindex_to_value.iter()?;
let mut entries: Vec<Entry> = Vec::with_capacity(BATCH_SIZE);
let mut batch_start = min;
while batch_start < target {
let batch_end = (batch_start + BATCH_SIZE).min(target);
entries.clear();
for i in batch_start..batch_end {
let txinindex = TxInIndex::from(i);
let outpoint = outpoint_iter.get_unwrap(txinindex);
entries.push(Entry {
txinindex,
txindex: outpoint.txindex(),
vout: outpoint.vout(),
txoutindex: TxOutIndex::COINBASE,
value: Sats::MAX,
});
}
// Coinbase entries (txindex MAX) sorted to end
entries.sort_unstable_by_key(|e| e.txindex);
for entry in &mut entries {
if entry.txindex.is_coinbase() {
break;
}
entry.txoutindex = first_txoutindex_iter.get_unwrap(entry.txindex) + entry.vout;
}
entries.sort_unstable_by_key(|e| e.txoutindex);
for entry in &mut entries {
if entry.txoutindex.is_coinbase() {
break;
}
entry.value = value_iter.get_unwrap(entry.txoutindex);
}
entries.sort_unstable_by_key(|e| e.txinindex);
for entry in &entries {
self.txinindex_to_txoutindex
.truncate_push(entry.txinindex, entry.txoutindex)?;
self.txinindex_to_value
.truncate_push(entry.txinindex, entry.value)?;
}
batch_start = batch_end;
}
{
let _lock = exit.lock();
self.txinindex_to_txoutindex.flush()?;
self.txinindex_to_value.flush()?;
}
self.db.compact()?;
Ok(())
}
}
struct Entry {
txinindex: TxInIndex,
txindex: TxIndex,
vout: Vout,
txoutindex: TxOutIndex,
value: Sats,
}

View File

@@ -0,0 +1,128 @@
use std::path::Path;
use brk_error::Result;
use brk_indexer::Indexer;
use brk_traversable::Traversable;
use brk_types::{Height, TxInIndex, TxOutIndex, Version};
use log::info;
use vecdb::{
AnyVec, BytesVec, Database, Exit, GenericStoredVec, ImportableVec, PAGE_SIZE, Stamp,
TypedVecIterator,
};
use super::{txins, Indexes};
const ONE_GB: usize = 1024 * 1024 * 1024;
const BATCH_SIZE: usize = ONE_GB / size_of::<(TxOutIndex, TxInIndex)>();
#[derive(Clone, Traversable)]
pub struct Vecs {
db: Database,
pub txoutindex_to_txinindex: BytesVec<TxOutIndex, TxInIndex>,
}
impl Vecs {
pub fn forced_import(parent_path: &Path, parent_version: Version) -> Result<Self> {
let db = Database::open(&parent_path.join("txouts"))?;
db.set_min_len(PAGE_SIZE * 10_000_000)?;
let version = parent_version + Version::ZERO;
let this = Self {
txoutindex_to_txinindex: BytesVec::forced_import(&db, "txinindex", version)?,
db,
};
this.db.retain_regions(
this.iter_any_exportable()
.flat_map(|v| v.region_names())
.collect(),
)?;
this.db.compact()?;
Ok(this)
}
pub fn compute(
&mut self,
indexer: &Indexer,
txins: &txins::Vecs,
starting_indexes: &Indexes,
exit: &Exit,
) -> Result<()> {
self.compute_(indexer, txins, starting_indexes, exit)?;
self.db.compact()?;
Ok(())
}
fn compute_(
&mut self,
indexer: &Indexer,
txins: &txins::Vecs,
starting_indexes: &Indexes,
exit: &Exit,
) -> Result<()> {
let target_txoutindex = indexer.vecs.txout.txoutindex_to_value.len();
let target_txinindex = txins.txinindex_to_txoutindex.len();
if target_txoutindex == 0 {
return Ok(());
}
let target_height = Height::from(indexer.vecs.block.height_to_blockhash.len() - 1);
let min_txoutindex =
TxOutIndex::from(self.txoutindex_to_txinindex.len()).min(starting_indexes.txoutindex);
let min_txinindex = usize::from(starting_indexes.txinindex);
let starting_stamp = Stamp::from(starting_indexes.height);
let _ = self.txoutindex_to_txinindex.rollback_before(starting_stamp);
self.txoutindex_to_txinindex
.truncate_if_needed(min_txoutindex)?;
self.txoutindex_to_txinindex
.fill_to(target_txoutindex, TxInIndex::UNSPENT)?;
if min_txinindex < target_txinindex {
info!(
"TxOuts: computing spend mappings ({} to {})",
min_txinindex, target_txinindex
);
let mut txoutindex_iter = txins.txinindex_to_txoutindex.iter()?;
let mut pairs: Vec<(TxOutIndex, TxInIndex)> = Vec::with_capacity(BATCH_SIZE);
let mut batch_start = min_txinindex;
while batch_start < target_txinindex {
let batch_end = (batch_start + BATCH_SIZE).min(target_txinindex);
pairs.clear();
for i in batch_start..batch_end {
let txinindex = TxInIndex::from(i);
let txoutindex = txoutindex_iter.get_unwrap(txinindex);
if txoutindex.is_coinbase() {
continue;
}
pairs.push((txoutindex, txinindex));
}
pairs.sort_unstable_by_key(|(txoutindex, _)| *txoutindex);
for &(txoutindex, txinindex) in &pairs {
self.txoutindex_to_txinindex.update(txoutindex, txinindex)?;
}
batch_start = batch_end;
}
}
let _lock = exit.lock();
self.txoutindex_to_txinindex
.stamped_write_with_changes(Stamp::from(target_height))?;
Ok(())
}
}

View File

@@ -6,9 +6,27 @@ use rayon::prelude::*;
use super::{AmountFilter, Filter};
/// Bucket index for amount ranges. Use for cheap comparisons.
/// Bucket index for amount ranges. Use for cheap comparisons and direct lookups.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct AmountBucket(u8);
pub struct AmountBucket(u8);
impl AmountBucket {
/// Returns (self, other) if buckets differ, None if same.
/// Use with `ByAmountRange::get_mut_by_bucket` to avoid recomputing.
#[inline(always)]
pub fn transition_to(self, other: Self) -> Option<(Self, Self)> {
if self != other {
Some((self, other))
} else {
None
}
}
#[inline(always)]
pub fn index(self) -> u8 {
self.0
}
}
impl From<Sats> for AmountBucket {
#[inline(always)]
@@ -125,7 +143,14 @@ impl<T> ByAmountRange<T> {
#[inline(always)]
pub fn get_mut(&mut self, value: Sats) -> &mut T {
match AmountBucket::from(value).0 {
self.get_mut_by_bucket(AmountBucket::from(value))
}
/// Get mutable reference by pre-computed bucket index.
/// Use with `AmountBucket::transition_to` to avoid recomputing bucket.
#[inline(always)]
pub fn get_mut_by_bucket(&mut self, bucket: AmountBucket) -> &mut T {
match bucket.0 {
0 => &mut self._0sats,
1 => &mut self._1sat_to_10sats,
2 => &mut self._10sats_to_100sats,

View File

@@ -29,7 +29,7 @@ fn main() -> Result<()> {
indexer
.vecs
.txout
.txoutindex_to_txoutdata
.txoutindex_to_value
.iter()?
.enumerate()
.take(200)

View File

@@ -13,8 +13,8 @@ fn run_benchmark(indexer: &Indexer) -> (Sats, std::time::Duration, usize) {
let mut sum = Sats::ZERO;
let mut count = 0;
for txoutdata in indexer.vecs.txout.txoutindex_to_txoutdata.clean_iter().unwrap() {
sum += txoutdata.value;
for value in indexer.vecs.txout.txoutindex_to_value.clean_iter().unwrap() {
sum += value;
count += 1;
}

View File

@@ -224,7 +224,7 @@ impl From<(Height, &mut Vecs, &Stores)> for Indexes {
let txoutindex = starting_index(
&vecs.txout.height_to_first_txoutindex,
&vecs.txout.txoutindex_to_txoutdata,
&vecs.txout.txoutindex_to_value,
height,
)
.unwrap();

View File

@@ -2,7 +2,7 @@
use std::{fs, path::Path, thread, time::Instant};
use brk_error::{Error, Result};
use brk_error::Result;
use brk_iterator::Blocks;
use brk_rpc::Client;
use brk_types::Height;
@@ -11,7 +11,6 @@ use vecdb::Exit;
mod constants;
mod indexes;
mod processor;
mod range_map;
mod readers;
mod stores;
mod vecs;
@@ -19,7 +18,6 @@ mod vecs;
use constants::*;
pub use indexes::*;
pub use processor::*;
pub use range_map::*;
pub use readers::*;
pub use stores::*;
pub use vecs::*;
@@ -32,19 +30,10 @@ pub struct Indexer {
impl Indexer {
pub fn forced_import(outputs_dir: &Path) -> Result<Self> {
match Self::forced_import_inner(outputs_dir) {
Ok(result) => Ok(result),
Err(Error::VersionMismatch { path, .. }) => {
let indexed_path = outputs_dir.join("indexed");
info!("Version mismatch at {path:?}, deleting {indexed_path:?} and retrying");
fs::remove_dir_all(&indexed_path)?;
Self::forced_import(outputs_dir)
}
Err(e) => Err(e),
}
Self::forced_import_inner(outputs_dir, true)
}
fn forced_import_inner(outputs_dir: &Path) -> Result<Self> {
fn forced_import_inner(outputs_dir: &Path, can_retry: bool) -> Result<Self> {
info!("Increasing number of open files limit...");
let no_file_limit = rlimit::getrlimit(rlimit::Resource::NOFILE)?;
rlimit::setrlimit(
@@ -55,24 +44,36 @@ impl Indexer {
info!("Importing indexer...");
let path = outputs_dir.join("indexed");
let indexed_path = outputs_dir.join("indexed");
let try_import = || -> Result<Self> {
let (vecs, stores) = thread::scope(|s| -> Result<_> {
let vecs = s.spawn(|| -> Result<_> {
let i = Instant::now();
let vecs = Vecs::forced_import(&indexed_path, VERSION)?;
info!("Imported vecs in {:?}", i.elapsed());
Ok(vecs)
});
let (vecs, stores) = thread::scope(|s| -> Result<_> {
let vecs = s.spawn(|| -> Result<_> {
let i = Instant::now();
let vecs = Vecs::forced_import(&path, VERSION)?;
info!("Imported vecs in {:?}", i.elapsed());
Ok(vecs)
});
let stores = Stores::forced_import(&indexed_path, VERSION)?;
info!("Imported stores in {:?}", i.elapsed());
let i = Instant::now();
let stores = Stores::forced_import(&path, VERSION)?;
info!("Imported stores in {:?}", i.elapsed());
Ok((vecs.join().unwrap()?, stores))
})?;
Ok((vecs.join().unwrap()?, stores))
})?;
Ok(Self { vecs, stores })
};
Ok(Self { vecs, stores })
match try_import() {
Ok(result) => Ok(result),
Err(err) if can_retry => {
info!("{err:?}, deleting {indexed_path:?} and retrying");
fs::remove_dir_all(&indexed_path)?;
Self::forced_import_inner(outputs_dir, false)
}
Err(err) => Err(err),
}
}
pub fn index(&mut self, blocks: &Blocks, client: &Client, exit: &Exit) -> Result<Indexes> {
@@ -132,25 +133,31 @@ impl Indexer {
let export = move |stores: &mut Stores, vecs: &mut Vecs, height: Height| -> Result<()> {
info!("Exporting...");
let i = Instant::now();
let _lock = exit.lock();
let i = Instant::now();
stores.commit(height).unwrap();
debug!("Commited stores in {}s", i.elapsed().as_secs());
let i = Instant::now();
vecs.flush(height)?;
debug!("Flushed vecs in {}s", i.elapsed().as_secs());
thread::scope(|s| -> Result<()> {
let stores_res = s.spawn(|| -> Result<()> {
let i = Instant::now();
stores.commit(height)?;
info!("Stores exported in {:?}", i.elapsed());
Ok(())
});
let vecs_res = s.spawn(|| -> Result<()> {
let i = Instant::now();
vecs.flush(height)?;
info!("Vecs exported in {:?}", i.elapsed());
Ok(())
});
stores_res.join().unwrap()?;
vecs_res.join().unwrap()?;
Ok(())
})?;
info!("Exported in {:?}", i.elapsed());
Ok(())
};
let mut readers = Readers::new(&self.vecs);
// Build txindex -> height map from existing data for efficient lookups
let mut txindex_to_height = RangeMap::new();
for (height, first_txindex) in self.vecs.tx.height_to_first_txindex.into_iter().enumerate()
{
txindex_to_height.insert(first_txindex, Height::from(height));
}
let vecs = &mut self.vecs;
let stores = &mut self.stores;
@@ -161,9 +168,6 @@ impl Indexer {
indexes.height = height;
// Insert current block's first_txindex -> height before processing inputs
txindex_to_height.insert(indexes.txindex, height);
// Used to check rapidhash collisions
let block_check_collisions = check_collisions && height > COLLISIONS_CHECKED_UP_TO;
@@ -175,7 +179,6 @@ impl Indexer {
vecs,
stores,
readers: &readers,
txindex_to_height: &txindex_to_height,
};
// Phase 1: Process block metadata
@@ -199,11 +202,11 @@ impl Indexer {
let outputs_len = txouts.len();
// Phase 6: Finalize outputs sequentially
let mut same_block_output_info =
let same_block_output_info =
processor.finalize_outputs(txouts, &same_block_spent_outpoints)?;
// Phase 7: Finalize inputs sequentially
processor.finalize_inputs(txins, &mut same_block_output_info)?;
processor.finalize_inputs(txins, same_block_output_info)?;
// Phase 8: Check TXID collisions
processor.check_txid_collisions(&txs)?;

View File

@@ -1,5 +1,3 @@
//! Block metadata processing.
use brk_error::{Error, Result};
use brk_types::{BlockHashPrefix, Timestamp};
use log::error;
@@ -8,13 +6,11 @@ use vecdb::GenericStoredVec;
use super::BlockProcessor;
impl BlockProcessor<'_> {
/// Process block metadata (blockhash, difficulty, timestamp, etc.)
pub fn process_block_metadata(&mut self) -> Result<()> {
let height = self.height;
let blockhash = self.block.hash();
let blockhash_prefix = BlockHashPrefix::from(blockhash);
// Check for blockhash prefix collision
if self
.stores
.blockhashprefix_to_height

View File

@@ -1,8 +1,3 @@
//! Block processing for indexing.
//!
//! This module handles the extraction and storage of all indexed data from blocks.
//! Processing is split into phases that can be parallelized where possible.
mod metadata;
mod tx;
mod txin;
@@ -13,7 +8,7 @@ pub use types::*;
use brk_types::{Block, Height, TxInIndex, TxIndex, TxOutIndex};
use crate::{Indexes, RangeMap, Readers, Stores, Vecs};
use crate::{Indexes, Readers, Stores, Vecs};
/// Processes a single block, extracting and storing all indexed data.
pub struct BlockProcessor<'a> {
@@ -24,7 +19,6 @@ pub struct BlockProcessor<'a> {
pub vecs: &'a mut Vecs,
pub stores: &'a mut Stores,
pub readers: &'a Readers,
pub txindex_to_height: &'a RangeMap<TxIndex, Height>,
}
impl BlockProcessor<'_> {

View File

@@ -1,5 +1,3 @@
//! TXID computation and collision checking.
use brk_error::{Error, Result};
use brk_types::{StoredBool, TxIndex, Txid, TxidPrefix};
use rayon::prelude::*;
@@ -10,7 +8,6 @@ use crate::constants::DUPLICATE_TXIDS;
use super::{BlockProcessor, ComputedTx};
impl<'a> BlockProcessor<'a> {
/// Compute TXIDs in parallel (CPU-intensive operation).
pub fn compute_txids(&self) -> Result<Vec<ComputedTx<'a>>> {
let will_check_collisions =
self.check_collisions && self.stores.txidprefix_to_txindex.needs(self.height);
@@ -44,7 +41,7 @@ impl<'a> BlockProcessor<'a> {
.collect()
}
/// Check for TXID collisions (only for known duplicate TXIDs).
/// Only for known duplicate TXIDs (BIP-30).
pub fn check_txid_collisions(&self, txs: &[ComputedTx]) -> Result<()> {
if likely(!self.check_collisions) {
return Ok(());
@@ -56,7 +53,6 @@ impl<'a> BlockProcessor<'a> {
continue;
};
// In case if we start at an already parsed height
if ct.txindex == prev_txindex {
continue;
}
@@ -80,7 +76,6 @@ impl<'a> BlockProcessor<'a> {
Ok(())
}
/// Store transaction metadata.
pub fn store_transaction_metadata(&mut self, txs: Vec<ComputedTx>) -> Result<()> {
let height = self.height;

View File

@@ -1,9 +1,7 @@
//! Input processing for block indexing.
use brk_error::{Error, Result};
use brk_types::{
AddressIndexOutPoint, AddressIndexTxIndex, OutPoint, OutputType, Sats, TxInIndex, TxIndex,
TxOutIndex, Txid, TxidPrefix, TypeIndex, Unit, Vin, Vout,
AddressIndexOutPoint, AddressIndexTxIndex, OutPoint, OutputType, TxInIndex, TxIndex, Txid,
TxidPrefix, TypeIndex, Unit, Vin, Vout,
};
use rayon::prelude::*;
use rustc_hash::{FxHashMap, FxHashSet};
@@ -12,12 +10,6 @@ use vecdb::GenericStoredVec;
use super::{BlockProcessor, ComputedTx, InputSource, SameBlockOutputInfo};
impl<'a> BlockProcessor<'a> {
/// Process inputs in parallel.
///
/// Uses collect().into_par_iter() pattern because:
/// 1. The inner work (store lookups, vector reads) is expensive
/// 2. We want to parallelize across ALL inputs, not just per-transaction
/// 3. The intermediate allocation (~8KB per block) is negligible compared to parallelism gains
pub fn process_inputs<'c>(
&self,
txs: &[ComputedTx<'c>],
@@ -102,30 +94,25 @@ impl<'a> BlockProcessor<'a> {
let outpoint = OutPoint::new(prev_txindex, vout);
let txoutdata = self
let outputtype = self
.vecs
.txout
.txoutindex_to_txoutdata
.get_pushed_or_read(txoutindex, &self.readers.txoutindex_to_txoutdata)?
.ok_or(Error::Internal("Missing txout data"))?;
.txoutindex_to_outputtype
.get_pushed_or_read(txoutindex, &self.readers.txoutindex_to_outputtype)?
.ok_or(Error::Internal("Missing outputtype"))?;
let value = txoutdata.value;
let outputtype = txoutdata.outputtype;
let typeindex = txoutdata.typeindex;
let height = self
.txindex_to_height
.get(prev_txindex)
.ok_or(Error::Internal("Missing height in txindex_to_height map"))?;
let typeindex = self
.vecs
.txout
.txoutindex_to_typeindex
.get_pushed_or_read(txoutindex, &self.readers.txoutindex_to_typeindex)?
.ok_or(Error::Internal("Missing typeindex"))?;
Ok((
txinindex,
InputSource::PreviousBlock {
vin,
value,
height,
txindex,
txoutindex,
outpoint,
outputtype,
typeindex,
@@ -138,7 +125,6 @@ impl<'a> BlockProcessor<'a> {
Ok(txins)
}
/// Collect same-block spent outpoints.
pub fn collect_same_block_spent_outpoints(
txins: &[(TxInIndex, InputSource)],
) -> FxHashSet<OutPoint> {
@@ -157,66 +143,41 @@ impl<'a> BlockProcessor<'a> {
.collect()
}
/// Finalize inputs sequentially (stores outpoints, updates address UTXOs).
pub fn finalize_inputs(
&mut self,
txins: Vec<(TxInIndex, InputSource)>,
same_block_output_info: &mut FxHashMap<OutPoint, SameBlockOutputInfo>,
mut same_block_output_info: FxHashMap<OutPoint, SameBlockOutputInfo>,
) -> Result<()> {
let height = self.height;
for (txinindex, input_source) in txins {
let (prev_height, vin, txindex, value, outpoint, txoutindex, outputtype, typeindex) =
match input_source {
InputSource::PreviousBlock {
height,
vin,
txindex,
txoutindex,
value,
outpoint,
outputtype,
typeindex,
} => (
height, vin, txindex, value, outpoint, txoutindex, outputtype, typeindex,
),
InputSource::SameBlock {
txindex,
txin,
vin,
outpoint,
} => {
if outpoint.is_coinbase() {
(
height,
vin,
txindex,
Sats::COINBASE,
outpoint,
TxOutIndex::COINBASE,
OutputType::Unknown,
TypeIndex::COINBASE,
)
} else {
let info = same_block_output_info
.remove(&outpoint)
.ok_or(Error::Internal("Same-block output not found"))
.inspect_err(|_| {
dbg!(&same_block_output_info, txin);
})?;
(
height,
vin,
txindex,
info.value,
outpoint,
info.txoutindex,
info.outputtype,
info.typeindex,
)
}
let (vin, txindex, outpoint, outputtype, typeindex) = match input_source {
InputSource::PreviousBlock {
vin,
txindex,
outpoint,
outputtype,
typeindex,
} => (vin, txindex, outpoint, outputtype, typeindex),
InputSource::SameBlock {
txindex,
txin,
vin,
outpoint,
} => {
if outpoint.is_coinbase() {
(vin, txindex, outpoint, OutputType::Unknown, TypeIndex::COINBASE)
} else {
let info = same_block_output_info
.remove(&outpoint)
.ok_or(Error::Internal("Same-block output not found"))
.inspect_err(|_| {
dbg!(&same_block_output_info, txin);
})?;
(vin, txindex, outpoint, info.outputtype, info.typeindex)
}
};
}
};
if vin.is_zero() {
self.vecs
@@ -233,14 +194,6 @@ impl<'a> BlockProcessor<'a> {
.txin
.txinindex_to_outpoint
.checked_push(txinindex, outpoint)?;
self.vecs
.txin
.txinindex_to_value
.checked_push(txinindex, value)?;
self.vecs
.txin
.txinindex_to_prev_height
.checked_push(txinindex, prev_height)?;
self.vecs
.txin
.txinindex_to_outputtype
@@ -250,14 +203,6 @@ impl<'a> BlockProcessor<'a> {
.txinindex_to_typeindex
.checked_push(txinindex, typeindex)?;
// Update txoutindex_to_txinindex for non-coinbase inputs
if !txoutindex.is_coinbase() {
self.vecs
.txout
.txoutindex_to_txinindex
.update(txoutindex, txinindex)?;
}
if !outputtype.is_address() {
continue;
}

View File

@@ -1,10 +1,8 @@
//! Output processing for block indexing.
use brk_error::{Error, Result};
use brk_grouper::ByAddressType;
use brk_types::{
AddressBytes, AddressHash, AddressIndexOutPoint, AddressIndexTxIndex, OutPoint, OutputType,
Sats, TxInIndex, TxIndex, TxOutData, TxOutIndex, TypeIndex, Unit, Vout,
Sats, TxIndex, TxOutIndex, TypeIndex, Unit, Vout,
};
use rayon::prelude::*;
use rustc_hash::{FxHashMap, FxHashSet};
@@ -13,7 +11,6 @@ use vecdb::GenericStoredVec;
use super::{BlockProcessor, ProcessedOutput, SameBlockOutputInfo};
impl<'a> BlockProcessor<'a> {
/// Process outputs in parallel.
pub fn process_outputs(&self) -> Result<Vec<ProcessedOutput<'a>>> {
let height = self.height;
let check_collisions = self.check_collisions;
@@ -21,7 +18,6 @@ impl<'a> BlockProcessor<'a> {
let base_txindex = self.indexes.txindex;
let base_txoutindex = self.indexes.txoutindex;
// Same pattern as inputs: collect then parallelize for maximum parallelism
self.block
.txdata
.iter()
@@ -120,7 +116,6 @@ impl<'a> BlockProcessor<'a> {
.collect()
}
/// Finalize outputs sequentially (stores addresses, tracks UTXOs).
pub fn finalize_outputs(
&mut self,
txouts: Vec<ProcessedOutput>,
@@ -129,7 +124,6 @@ impl<'a> BlockProcessor<'a> {
let height = self.height;
let mut already_added_addresshash: ByAddressType<FxHashMap<AddressHash, TypeIndex>> =
ByAddressType::default();
// Pre-size based on the number of same-block spent outpoints
let mut same_block_output_info: FxHashMap<OutPoint, SameBlockOutputInfo> =
FxHashMap::with_capacity_and_hasher(
same_block_spent_outpoints.len(),
@@ -217,15 +211,18 @@ impl<'a> BlockProcessor<'a> {
}
};
let txoutdata = TxOutData::new(sats, outputtype, typeindex);
self.vecs
.txout
.txoutindex_to_txoutdata
.checked_push(txoutindex, txoutdata)?;
.txoutindex_to_value
.checked_push(txoutindex, sats)?;
self.vecs
.txout
.txoutindex_to_txinindex
.checked_push(txoutindex, TxInIndex::UNSPENT)?;
.txoutindex_to_outputtype
.checked_push(txoutindex, outputtype)?;
self.vecs
.txout
.txoutindex_to_typeindex
.checked_push(txoutindex, typeindex)?;
if outputtype.is_unspendable() {
continue;
@@ -251,8 +248,6 @@ impl<'a> BlockProcessor<'a> {
SameBlockOutputInfo {
outputtype,
typeindex,
value: sats,
txoutindex,
},
);
} else if outputtype.is_address() {

View File

@@ -1,20 +1,14 @@
//! Type definitions for block processing.
use bitcoin::{Transaction, TxIn, TxOut};
use brk_types::{
AddressBytes, AddressHash, Height, OutPoint, OutputType, Sats, TxIndex, TxOutIndex, Txid,
TxidPrefix, TypeIndex, Vin, Vout,
AddressBytes, AddressHash, OutPoint, OutputType, TxIndex, TxOutIndex, Txid, TxidPrefix,
TypeIndex, Vin, Vout,
};
/// Input source for tracking where an input came from.
#[derive(Debug)]
pub enum InputSource<'a> {
PreviousBlock {
vin: Vin,
value: Sats,
height: Height,
txindex: TxIndex,
txoutindex: TxOutIndex,
outpoint: OutPoint,
outputtype: OutputType,
typeindex: TypeIndex,
@@ -27,16 +21,12 @@ pub enum InputSource<'a> {
},
}
/// Output info for same-block spends (output created and spent in the same block).
#[derive(Debug, Clone, Copy)]
pub struct SameBlockOutputInfo {
pub outputtype: OutputType,
pub typeindex: TypeIndex,
pub value: Sats,
pub txoutindex: TxOutIndex,
}
/// Processed output data from parallel output processing.
pub struct ProcessedOutput<'a> {
pub txoutindex: TxOutIndex,
pub txout: &'a TxOut,
@@ -47,7 +37,6 @@ pub struct ProcessedOutput<'a> {
pub existing_typeindex: Option<TypeIndex>,
}
/// Computed transaction data from parallel TXID computation.
pub struct ComputedTx<'a> {
pub txindex: TxIndex,
pub tx: &'a Transaction,

View File

@@ -1,40 +0,0 @@
//! Range-based lookup map for efficient index -> value lookups.
//!
//! Uses the pattern that many indices share the same value (e.g., all txindexes
//! in a block have the same height) to provide O(log n) lookups via BTreeMap.
use std::collections::BTreeMap;
use vecdb::VecIndex;
/// Maps ranges of indices to values for efficient reverse lookups.
///
/// Instead of storing a value for every index, stores (first_index, value)
/// pairs and uses range search to find the value for any index.
#[derive(Debug, Default)]
pub struct RangeMap<I, V>(BTreeMap<I, V>);
impl<I: VecIndex, V: Copy> RangeMap<I, V> {
/// Create a new empty map.
pub fn new() -> Self {
Self(BTreeMap::new())
}
/// Insert a new (first_index, value) mapping.
#[inline]
pub fn insert(&mut self, first_index: I, value: V) {
self.0.insert(first_index, value);
}
/// Look up value for an index using range search.
/// Returns the value associated with the largest first_index <= given index.
#[inline]
pub fn get(&self, index: I) -> Option<V> {
self.0.range(..=index).next_back().map(|(_, &v)| v)
}
/// Clear all entries (for reset/rollback).
pub fn clear(&mut self) {
self.0.clear();
}
}

View File

@@ -7,7 +7,8 @@ use crate::Vecs;
/// These provide consistent snapshots for reading while the main vectors are being modified.
pub struct Readers {
pub txindex_to_first_txoutindex: Reader,
pub txoutindex_to_txoutdata: Reader,
pub txoutindex_to_outputtype: Reader,
pub txoutindex_to_typeindex: Reader,
pub addressbytes: ByAddressType<Reader>,
}
@@ -15,7 +16,8 @@ impl Readers {
pub fn new(vecs: &Vecs) -> Self {
Self {
txindex_to_first_txoutindex: vecs.tx.txindex_to_first_txoutindex.create_reader(),
txoutindex_to_txoutdata: vecs.txout.txoutindex_to_txoutdata.create_reader(),
txoutindex_to_outputtype: vecs.txout.txoutindex_to_outputtype.create_reader(),
txoutindex_to_typeindex: vecs.txout.txoutindex_to_typeindex.create_reader(),
addressbytes: ByAddressType {
p2pk65: vecs
.address

View File

@@ -5,8 +5,7 @@ use brk_grouper::ByAddressType;
use brk_store::{AnyStore, Kind, Mode, Store};
use brk_types::{
AddressHash, AddressIndexOutPoint, AddressIndexTxIndex, BlockHashPrefix, Height, OutPoint,
OutputType, StoredString, TxInIndex, TxIndex, TxOutIndex, TxidPrefix, TypeIndex, Unit, Version,
Vout,
OutputType, StoredString, TxIndex, TxOutIndex, TxidPrefix, TypeIndex, Unit, Version, Vout,
};
use fjall::{Database, PersistMode};
use log::info;
@@ -171,7 +170,7 @@ impl Stores {
.map(|s| s as &mut dyn AnyStore),
)
.try_for_each(|store| store.commit(height))?;
info!("Commits done in {:?}", i.elapsed());
info!("Stores committed in {:?}", i.elapsed());
let i = Instant::now();
self.db.persist(PersistMode::SyncData)?;
@@ -270,39 +269,44 @@ impl Stores {
let mut txoutindex_to_txindex_iter = vecs.txout.txoutindex_to_txindex.iter()?;
let mut txindex_to_first_txoutindex_iter =
vecs.tx.txindex_to_first_txoutindex.iter()?;
vecs.txout
.txoutindex_to_txoutdata
.iter()?
.enumerate()
.skip(starting_indexes.txoutindex.to_usize())
.filter(|(_, txoutdata)| txoutdata.outputtype.is_address())
.for_each(|(txoutindex, txoutdata)| {
let addresstype = txoutdata.outputtype;
let addressindex = txoutdata.typeindex;
let txindex = txoutindex_to_txindex_iter.get_at_unwrap(txoutindex);
let mut txoutindex_to_outputtype_iter = vecs.txout.txoutindex_to_outputtype.iter()?;
let mut txoutindex_to_typeindex_iter = vecs.txout.txoutindex_to_typeindex.iter()?;
self.addresstype_to_addressindex_and_txindex
.get_mut_unwrap(addresstype)
.remove(AddressIndexTxIndex::from((addressindex, txindex)));
for txoutindex in starting_indexes.txoutindex.to_usize()
..vecs.txout.txoutindex_to_outputtype.len()
{
let outputtype = txoutindex_to_outputtype_iter.get_at_unwrap(txoutindex);
if !outputtype.is_address() {
continue;
}
let vout = Vout::from(
txoutindex.to_usize()
- txindex_to_first_txoutindex_iter
.get_unwrap(txindex)
.to_usize(),
);
let outpoint = OutPoint::new(txindex, vout);
let addresstype = outputtype;
let addressindex = txoutindex_to_typeindex_iter.get_at_unwrap(txoutindex);
let txindex = txoutindex_to_txindex_iter.get_at_unwrap(txoutindex);
self.addresstype_to_addressindex_and_unspentoutpoint
.get_mut_unwrap(addresstype)
.remove(AddressIndexOutPoint::from((addressindex, outpoint)));
});
self.addresstype_to_addressindex_and_txindex
.get_mut_unwrap(addresstype)
.remove(AddressIndexTxIndex::from((addressindex, txindex)));
let vout = Vout::from(
txoutindex
- txindex_to_first_txoutindex_iter
.get_unwrap(txindex)
.to_usize(),
);
let outpoint = OutPoint::new(txindex, vout);
self.addresstype_to_addressindex_and_unspentoutpoint
.get_mut_unwrap(addresstype)
.remove(AddressIndexOutPoint::from((addressindex, outpoint)));
}
// Collect outputs that were spent after the rollback point
// We need to: 1) reset their spend status, 2) restore address stores
let mut txindex_to_first_txoutindex_iter =
vecs.tx.txindex_to_first_txoutindex.iter()?;
let mut txoutindex_to_txoutdata_iter = vecs.txout.txoutindex_to_txoutdata.iter()?;
let mut txoutindex_to_outputtype_iter = vecs.txout.txoutindex_to_outputtype.iter()?;
let mut txoutindex_to_typeindex_iter = vecs.txout.txoutindex_to_typeindex.iter()?;
let mut txinindex_to_txindex_iter = vecs.txin.txinindex_to_txindex.iter()?;
let outputs_to_unspend: Vec<_> = vecs
@@ -325,11 +329,11 @@ impl Stores {
// Only process if this output was created before the rollback point
if txoutindex < starting_indexes.txoutindex {
let txoutdata = txoutindex_to_txoutdata_iter.get_unwrap(txoutindex);
let spending_txindex =
txinindex_to_txindex_iter.get_at_unwrap(txinindex);
let outputtype = txoutindex_to_outputtype_iter.get_unwrap(txoutindex);
let typeindex = txoutindex_to_typeindex_iter.get_unwrap(txoutindex);
let spending_txindex = txinindex_to_txindex_iter.get_at_unwrap(txinindex);
Some((txoutindex, outpoint, txoutdata, spending_txindex))
Some((outpoint, outputtype, typeindex, spending_txindex))
} else {
None
}
@@ -337,16 +341,11 @@ impl Stores {
.collect();
// Now process the collected outputs (iterators dropped, can mutate vecs)
for (txoutindex, outpoint, txoutdata, spending_txindex) in outputs_to_unspend {
// Reset spend status back to unspent
vecs.txout
.txoutindex_to_txinindex
.update(txoutindex, TxInIndex::UNSPENT)?;
for (outpoint, outputtype, typeindex, spending_txindex) in outputs_to_unspend {
// Restore address stores if this is an address output
if txoutdata.outputtype.is_address() {
let addresstype = txoutdata.outputtype;
let addressindex = txoutdata.typeindex;
if outputtype.is_address() {
let addresstype = outputtype;
let addressindex = typeindex;
self.addresstype_to_addressindex_and_txindex
.get_mut_unwrap(addresstype)

View File

@@ -1,6 +1,6 @@
use brk_error::Result;
use brk_traversable::Traversable;
use brk_types::{Height, OutPoint, OutputType, Sats, TxInIndex, TxIndex, TypeIndex, Version};
use brk_types::{Height, OutPoint, OutputType, TxInIndex, TxIndex, TypeIndex, Version};
use rayon::prelude::*;
use vecdb::{AnyStoredVec, Database, GenericStoredVec, ImportableVec, PcoVec, Stamp};
@@ -11,8 +11,6 @@ pub struct TxinVecs {
pub height_to_first_txinindex: PcoVec<Height, TxInIndex>,
pub txinindex_to_outpoint: PcoVec<TxInIndex, OutPoint>,
pub txinindex_to_txindex: PcoVec<TxInIndex, TxIndex>,
pub txinindex_to_value: PcoVec<TxInIndex, Sats>,
pub txinindex_to_prev_height: PcoVec<TxInIndex, Height>,
pub txinindex_to_outputtype: PcoVec<TxInIndex, OutputType>,
pub txinindex_to_typeindex: PcoVec<TxInIndex, TypeIndex>,
}
@@ -23,16 +21,12 @@ impl TxinVecs {
height_to_first_txinindex,
txinindex_to_outpoint,
txinindex_to_txindex,
txinindex_to_value,
txinindex_to_prev_height,
txinindex_to_outputtype,
txinindex_to_typeindex,
) = parallel_import! {
height_to_first_txinindex = PcoVec::forced_import(db, "first_txinindex", version),
txinindex_to_outpoint = PcoVec::forced_import(db, "outpoint", version),
txinindex_to_txindex = PcoVec::forced_import(db, "txindex", version),
txinindex_to_value = PcoVec::forced_import(db, "value", version),
txinindex_to_prev_height = PcoVec::forced_import(db, "prev_height", version),
txinindex_to_outputtype = PcoVec::forced_import(db, "outputtype", version),
txinindex_to_typeindex = PcoVec::forced_import(db, "typeindex", version),
};
@@ -40,8 +34,6 @@ impl TxinVecs {
height_to_first_txinindex,
txinindex_to_outpoint,
txinindex_to_txindex,
txinindex_to_value,
txinindex_to_prev_height,
txinindex_to_outputtype,
txinindex_to_typeindex,
})
@@ -54,10 +46,6 @@ impl TxinVecs {
.truncate_if_needed_with_stamp(txinindex, stamp)?;
self.txinindex_to_txindex
.truncate_if_needed_with_stamp(txinindex, stamp)?;
self.txinindex_to_value
.truncate_if_needed_with_stamp(txinindex, stamp)?;
self.txinindex_to_prev_height
.truncate_if_needed_with_stamp(txinindex, stamp)?;
self.txinindex_to_outputtype
.truncate_if_needed_with_stamp(txinindex, stamp)?;
self.txinindex_to_typeindex
@@ -70,8 +58,6 @@ impl TxinVecs {
&mut self.height_to_first_txinindex as &mut dyn AnyStoredVec,
&mut self.txinindex_to_outpoint,
&mut self.txinindex_to_txindex,
&mut self.txinindex_to_value,
&mut self.txinindex_to_prev_height,
&mut self.txinindex_to_outputtype,
&mut self.txinindex_to_typeindex,
]

View File

@@ -1,69 +1,65 @@
use brk_error::Result;
use brk_traversable::Traversable;
use brk_types::{Height, Sats, TxInIndex, TxIndex, TxOutData, TxOutIndex, Version};
use brk_types::{Height, OutputType, Sats, TxIndex, TxOutIndex, TypeIndex, Version};
use rayon::prelude::*;
use vecdb::{
AnyStoredVec, AnyVec, BytesVec, Database, GenericStoredVec, ImportableVec, IterableCloneableVec,
LazyVecFrom1, PcoVec, Stamp,
};
use vecdb::{AnyStoredVec, BytesVec, Database, GenericStoredVec, ImportableVec, PcoVec, Stamp};
use crate::parallel_import;
#[derive(Clone, Traversable)]
pub struct TxoutVecs {
pub height_to_first_txoutindex: PcoVec<Height, TxOutIndex>,
pub txoutindex_to_txoutdata: BytesVec<TxOutIndex, TxOutData>,
pub txoutindex_to_value: BytesVec<TxOutIndex, Sats>,
pub txoutindex_to_outputtype: BytesVec<TxOutIndex, OutputType>,
pub txoutindex_to_typeindex: BytesVec<TxOutIndex, TypeIndex>,
pub txoutindex_to_txindex: PcoVec<TxOutIndex, TxIndex>,
pub txoutindex_to_txinindex: BytesVec<TxOutIndex, TxInIndex>,
pub txoutindex_to_value: LazyVecFrom1<TxOutIndex, Sats, TxOutIndex, TxOutData>,
}
impl TxoutVecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
let (
height_to_first_txoutindex,
txoutindex_to_txoutdata,
txoutindex_to_value,
txoutindex_to_outputtype,
txoutindex_to_typeindex,
txoutindex_to_txindex,
txoutindex_to_txinindex,
) = parallel_import! {
height_to_first_txoutindex = PcoVec::forced_import(db, "first_txoutindex", version),
txoutindex_to_txoutdata = BytesVec::forced_import(db, "txoutdata", version),
txoutindex_to_value = BytesVec::forced_import(db, "value", version),
txoutindex_to_outputtype = BytesVec::forced_import(db, "outputtype", version),
txoutindex_to_typeindex = BytesVec::forced_import(db, "typeindex", version),
txoutindex_to_txindex = PcoVec::forced_import(db, "txindex", version),
txoutindex_to_txinindex = BytesVec::forced_import(db, "txinindex", version),
};
let txoutindex_to_value = LazyVecFrom1::init(
"value",
txoutindex_to_txoutdata.version(),
txoutindex_to_txoutdata.boxed_clone(),
|index, iter| iter.get(index).map(|txoutdata: TxOutData| txoutdata.value),
);
Ok(Self {
height_to_first_txoutindex,
txoutindex_to_txoutdata,
txoutindex_to_txindex,
txoutindex_to_txinindex,
txoutindex_to_value,
txoutindex_to_outputtype,
txoutindex_to_typeindex,
txoutindex_to_txindex,
})
}
pub fn truncate(&mut self, height: Height, txoutindex: TxOutIndex, stamp: Stamp) -> Result<()> {
self.height_to_first_txoutindex
.truncate_if_needed_with_stamp(height, stamp)?;
self.txoutindex_to_txoutdata
self.txoutindex_to_value
.truncate_if_needed_with_stamp(txoutindex, stamp)?;
self.txoutindex_to_outputtype
.truncate_if_needed_with_stamp(txoutindex, stamp)?;
self.txoutindex_to_typeindex
.truncate_if_needed_with_stamp(txoutindex, stamp)?;
self.txoutindex_to_txindex
.truncate_if_needed_with_stamp(txoutindex, stamp)?;
self.txoutindex_to_txinindex
.truncate_if_needed_with_stamp(txoutindex, stamp)?;
Ok(())
}
pub fn par_iter_mut_any(&mut self) -> impl ParallelIterator<Item = &mut dyn AnyStoredVec> {
[
&mut self.height_to_first_txoutindex as &mut dyn AnyStoredVec,
&mut self.txoutindex_to_txoutdata,
&mut self.txoutindex_to_value,
&mut self.txoutindex_to_outputtype,
&mut self.txoutindex_to_typeindex,
&mut self.txoutindex_to_txindex,
&mut self.txoutindex_to_txinindex,
]
.into_par_iter()
}

View File

@@ -4,8 +4,7 @@ use brk_types::TxidPrefix;
use rustc_hash::FxHashMap;
use super::tx_node::TxNode;
use crate::entry::Entry;
use crate::types::{PoolIndex, TxIndex};
use crate::{entry::Entry, types::{PoolIndex, TxIndex}};
/// Type-safe wrapper around Vec<TxNode> that only allows PoolIndex access.
pub struct Graph(Vec<TxNode>);

View File

@@ -1,3 +1,5 @@
use std::cmp::Ordering;
use brk_types::{Sats, VSize};
use super::tx_node::TxNode;
@@ -44,18 +46,18 @@ impl PartialEq for HeapEntry {
impl Eq for HeapEntry {}
impl PartialOrd for HeapEntry {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for HeapEntry {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
fn cmp(&self, other: &Self) -> Ordering {
// Higher fee rate = higher priority
if self.has_higher_fee_rate_than(other) {
std::cmp::Ordering::Greater
Ordering::Greater
} else if other.has_higher_fee_rate_than(self) {
std::cmp::Ordering::Less
Ordering::Less
} else {
// Tiebreaker: lower index first (deterministic)
other.pool_index.cmp(&self.pool_index)

View File

@@ -1,11 +1,3 @@
//! Builds projected blocks from mempool transactions.
//!
//! The algorithm:
//! 1. Build a dependency graph from mempool entries
//! 2. Select transactions using a heap (CPFP-aware)
//! 3. Group into atomic packages (parent + child stay together)
//! 4. Partition packages into blocks by fee rate
mod graph;
mod heap_entry;
mod package;
@@ -13,8 +5,7 @@ mod partitioner;
mod selector;
mod tx_node;
use crate::entry::Entry;
use crate::types::SelectedTx;
use crate::{entry::Entry, types::SelectedTx};
/// Target vsize per block (~1MB, derived from 4MW weight limit).
const BLOCK_VSIZE: u64 = 1_000_000;

View File

@@ -1,5 +1,4 @@
use super::package::Package;
use super::BLOCK_VSIZE;
use super::{BLOCK_VSIZE, package::Package};
use crate::types::SelectedTx;
/// How many packages to look ahead when current doesn't fit.

View File

@@ -4,10 +4,7 @@ use brk_types::FeeRate;
use rustc_hash::FxHashSet;
use smallvec::SmallVec;
use super::BLOCK_VSIZE;
use super::graph::Graph;
use super::heap_entry::HeapEntry;
use super::package::Package;
use super::{BLOCK_VSIZE, graph::Graph, heap_entry::HeapEntry, package::Package};
use crate::types::PoolIndex;
/// Select transactions from the graph and group into CPFP packages.

View File

@@ -1,8 +1,7 @@
use brk_types::TxidPrefix;
use rustc_hash::FxHashMap;
use crate::entry::Entry;
use crate::types::TxIndex;
use crate::{entry::Entry, types::TxIndex};
/// Pool of mempool entries with slot recycling.
///

View File

@@ -1,10 +1,3 @@
//! Bitcoin mempool monitor with fee estimation.
//!
//! Provides real-time mempool tracking with:
//! - Fee estimation via projected blocks
//! - Address mempool stats
//! - CPFP-aware block building
mod addresses;
mod block_builder;
mod entry;

View File

@@ -1,5 +1,3 @@
//! Projected block building and fee estimation.
mod fees;
mod snapshot;
mod stats;

View File

@@ -1,9 +1,7 @@
use brk_types::RecommendedFees;
use super::fees;
use super::stats::{self, BlockStats};
use crate::entry::Entry;
use crate::types::{SelectedTx, TxIndex};
use super::{fees, stats::{self, BlockStats}};
use crate::{entry::Entry, types::{SelectedTx, TxIndex}};
/// Immutable snapshot of projected blocks.
#[derive(Debug, Clone, Default)]

View File

@@ -1,7 +1,6 @@
use brk_types::{FeeRate, Sats, VSize};
use crate::entry::Entry;
use crate::types::SelectedTx;
use crate::{entry::Entry, types::SelectedTx};
/// Statistics for a single projected block.
#[derive(Debug, Clone, Default)]

View File

@@ -4,7 +4,7 @@ use std::{
atomic::{AtomicBool, AtomicU64, Ordering},
},
thread,
time::{Duration, Instant},
time::{Duration, Instant, SystemTime, UNIX_EPOCH},
};
use brk_error::Result;
@@ -15,12 +15,14 @@ use log::{debug, error};
use parking_lot::{RwLock, RwLockReadGuard};
use rustc_hash::FxHashMap;
use crate::addresses::AddressTracker;
use crate::block_builder::build_projected_blocks;
use crate::entry::Entry;
use crate::entry_pool::EntryPool;
use crate::projected_blocks::{BlockStats, RecommendedFees, Snapshot};
use crate::tx_store::TxStore;
use crate::{
addresses::AddressTracker,
block_builder::build_projected_blocks,
entry::Entry,
entry_pool::EntryPool,
projected_blocks::{BlockStats, RecommendedFees, Snapshot},
tx_store::TxStore,
};
/// Max new txs to fetch full data for per update cycle (for address tracking).
const MAX_TX_FETCHES_PER_CYCLE: usize = 10_000;
@@ -44,16 +46,13 @@ impl Mempool {
pub struct MempoolInner {
client: Client,
// Mempool state
info: RwLock<MempoolInfo>,
txs: RwLock<TxStore>,
addresses: RwLock<AddressTracker>,
entries: RwLock<EntryPool>,
// Projected blocks snapshot
snapshot: RwLock<Snapshot>,
// Rate limiting
dirty: AtomicBool,
last_rebuild_ms: AtomicU64,
}
@@ -205,8 +204,8 @@ impl MempoolInner {
return;
}
let now_ms = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
let now_ms = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis() as u64)
.unwrap_or(0);

Some files were not shown because too many files have changed in this diff Show More