mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-06-11 07:23:32 -07:00
binder: snapshot
This commit is contained in:
@@ -9,8 +9,8 @@ use serde_json::Value;
|
||||
|
||||
use crate::{
|
||||
ClientMetadata, Endpoint, FieldNamePosition, IndexSetPattern, PatternField, StructuralPattern,
|
||||
TypeSchemas, extract_inner_type, get_first_leaf_name, get_node_fields,
|
||||
get_pattern_instance_base, to_camel_case, to_pascal_case,
|
||||
TypeSchemas, extract_inner_type, get_fields_with_child_info, get_first_leaf_name,
|
||||
get_node_fields, get_pattern_instance_base, to_camel_case, to_pascal_case,
|
||||
};
|
||||
|
||||
/// Generate JavaScript + JSDoc client from metadata and OpenAPI endpoints
|
||||
@@ -534,7 +534,7 @@ fn generate_parameterized_field(
|
||||
format!("`/${{acc}}_{}`", field.name)
|
||||
};
|
||||
|
||||
if field_uses_accessor(field, metadata) {
|
||||
if metadata.field_uses_accessor(field) {
|
||||
let accessor = metadata.find_index_set_pattern(&field.indexes).unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
@@ -568,7 +568,7 @@ fn generate_tree_path_field(
|
||||
field_name_js, field.rust_type, field.name, comma
|
||||
)
|
||||
.unwrap();
|
||||
} else if field_uses_accessor(field, metadata) {
|
||||
} else if metadata.field_uses_accessor(field) {
|
||||
let accessor = metadata.find_index_set_pattern(&field.indexes).unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
@@ -629,10 +629,6 @@ fn field_to_js_type_with_generic_value(
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a field should use an index accessor
|
||||
fn field_uses_accessor(field: &PatternField, metadata: &ClientMetadata) -> bool {
|
||||
metadata.find_index_set_pattern(&field.indexes).is_some()
|
||||
}
|
||||
|
||||
/// Generate tree typedefs
|
||||
fn generate_tree_typedefs(output: &mut String, catalog: &TreeNode, metadata: &ClientMetadata) {
|
||||
@@ -659,105 +655,65 @@ fn generate_tree_typedef(
|
||||
metadata: &ClientMetadata,
|
||||
generated: &mut HashSet<String>,
|
||||
) {
|
||||
if let TreeNode::Branch(children) = node {
|
||||
// Build signature with child field info for generic pattern lookup
|
||||
let fields_with_child_info: Vec<(PatternField, Option<Vec<PatternField>>)> = children
|
||||
.iter()
|
||||
.map(|(child_name, child_node)| {
|
||||
let (rust_type, json_type, indexes, child_fields) = match child_node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
leaf.schema
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("object")
|
||||
.to_string(),
|
||||
leaf.indexes().clone(),
|
||||
None,
|
||||
),
|
||||
TreeNode::Branch(grandchildren) => {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
let pattern_name = pattern_lookup
|
||||
.get(&child_fields)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| format!("{}_{}", name, to_pascal_case(child_name)));
|
||||
(
|
||||
pattern_name.clone(),
|
||||
pattern_name,
|
||||
std::collections::BTreeSet::new(),
|
||||
Some(child_fields),
|
||||
)
|
||||
}
|
||||
};
|
||||
(
|
||||
PatternField {
|
||||
name: child_name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
},
|
||||
child_fields,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
let TreeNode::Branch(children) = node else {
|
||||
return;
|
||||
};
|
||||
|
||||
let fields: Vec<PatternField> = fields_with_child_info
|
||||
.iter()
|
||||
.map(|(f, _)| f.clone())
|
||||
.collect();
|
||||
let fields_with_child_info = get_fields_with_child_info(children, name, pattern_lookup);
|
||||
let fields: Vec<PatternField> = fields_with_child_info
|
||||
.iter()
|
||||
.map(|(f, _)| f.clone())
|
||||
.collect();
|
||||
|
||||
// Skip if this matches a pattern (already generated)
|
||||
if pattern_lookup.contains_key(&fields)
|
||||
&& pattern_lookup.get(&fields) != Some(&name.to_string())
|
||||
{
|
||||
return;
|
||||
}
|
||||
// Skip if this matches a pattern (already generated)
|
||||
if pattern_lookup.contains_key(&fields) && pattern_lookup.get(&fields) != Some(&name.to_string())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if generated.contains(name) {
|
||||
return;
|
||||
}
|
||||
generated.insert(name.to_string());
|
||||
if generated.contains(name) {
|
||||
return;
|
||||
}
|
||||
generated.insert(name.to_string());
|
||||
|
||||
writeln!(output, "/**").unwrap();
|
||||
writeln!(output, " * @typedef {{Object}} {}", name).unwrap();
|
||||
writeln!(output, "/**").unwrap();
|
||||
writeln!(output, " * @typedef {{Object}} {}", name).unwrap();
|
||||
|
||||
for (field, child_fields) in &fields_with_child_info {
|
||||
// For generic patterns, extract the value type from child fields
|
||||
let generic_value_type = child_fields
|
||||
.as_ref()
|
||||
.and_then(|cf| metadata.get_generic_value_type(&field.rust_type, cf));
|
||||
let js_type = field_to_js_type_with_generic_value(
|
||||
field,
|
||||
metadata,
|
||||
false,
|
||||
generic_value_type.as_deref(),
|
||||
);
|
||||
writeln!(
|
||||
output,
|
||||
" * @property {{{}}} {}",
|
||||
js_type,
|
||||
to_camel_case(&field.name)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
for (field, child_fields) in &fields_with_child_info {
|
||||
let generic_value_type = child_fields
|
||||
.as_ref()
|
||||
.and_then(|cf| metadata.get_generic_value_type(&field.rust_type, cf));
|
||||
let js_type = field_to_js_type_with_generic_value(
|
||||
field,
|
||||
metadata,
|
||||
false,
|
||||
generic_value_type.as_deref(),
|
||||
);
|
||||
writeln!(
|
||||
output,
|
||||
" * @property {{{}}} {}",
|
||||
js_type,
|
||||
to_camel_case(&field.name)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
writeln!(output, " */\n").unwrap();
|
||||
writeln!(output, " */\n").unwrap();
|
||||
|
||||
// Generate child typedefs
|
||||
for (child_name, child_node) in children {
|
||||
if let TreeNode::Branch(grandchildren) = child_node {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
if !pattern_lookup.contains_key(&child_fields) {
|
||||
let child_type_name = format!("{}_{}", name, to_pascal_case(child_name));
|
||||
generate_tree_typedef(
|
||||
output,
|
||||
&child_type_name,
|
||||
child_node,
|
||||
pattern_lookup,
|
||||
metadata,
|
||||
generated,
|
||||
);
|
||||
}
|
||||
// Generate child typedefs
|
||||
for (child_name, child_node) in children {
|
||||
if let TreeNode::Branch(grandchildren) = child_node {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
if !pattern_lookup.contains_key(&child_fields) {
|
||||
let child_type_name = format!("{}_{}", name, to_pascal_case(child_name));
|
||||
generate_tree_typedef(
|
||||
output,
|
||||
&child_type_name,
|
||||
child_node,
|
||||
pattern_lookup,
|
||||
metadata,
|
||||
generated,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1007,15 +963,7 @@ fn generate_api_methods(output: &mut String, endpoints: &[Endpoint]) {
|
||||
}
|
||||
|
||||
fn endpoint_to_method_name(endpoint: &Endpoint) -> String {
|
||||
if let Some(op_id) = &endpoint.operation_id {
|
||||
return to_camel_case(op_id);
|
||||
}
|
||||
let parts: Vec<&str> = endpoint
|
||||
.path
|
||||
.split('/')
|
||||
.filter(|s| !s.is_empty() && !s.starts_with('{'))
|
||||
.collect();
|
||||
format!("get{}", to_pascal_case(&parts.join("_")))
|
||||
to_camel_case(&endpoint.operation_name())
|
||||
}
|
||||
|
||||
fn build_method_params(endpoint: &Endpoint) -> String {
|
||||
|
||||
@@ -1,3 +1,36 @@
|
||||
//! Client library generator for BRK.
|
||||
//!
|
||||
//! This crate generates typed client libraries in multiple languages (Rust, JavaScript, Python)
|
||||
//! from the BRK metric catalog and OpenAPI specification.
|
||||
//!
|
||||
//! # Usage
|
||||
//!
|
||||
//! ```ignore
|
||||
//! use brk_binder::generate_clients;
|
||||
//! use brk_query::Vecs;
|
||||
//! use std::path::Path;
|
||||
//!
|
||||
//! let vecs = Vecs::load("path/to/data")?;
|
||||
//! let openapi_json = std::fs::read_to_string("openapi.json")?;
|
||||
//! generate_clients(&vecs, &openapi_json, Path::new("output"))?;
|
||||
//! ```
|
||||
//!
|
||||
//! # Architecture
|
||||
//!
|
||||
//! The generator works in several phases:
|
||||
//!
|
||||
//! 1. **Metadata extraction** - Analyzes the metric catalog tree to detect:
|
||||
//! - Structural patterns (repeated tree shapes)
|
||||
//! - Index set patterns (common index combinations)
|
||||
//! - Generic patterns (structures that differ only in value type)
|
||||
//!
|
||||
//! 2. **Schema collection** - Merges OpenAPI schemas with schemars-generated type schemas
|
||||
//!
|
||||
//! 3. **Code generation** - Produces language-specific clients:
|
||||
//! - Rust: Uses `brk_types` directly, generates structs with lifetimes
|
||||
//! - JavaScript: Generates JSDoc-typed ES modules with factory functions
|
||||
//! - Python: Generates typed classes with TypedDict and Generic support
|
||||
|
||||
use std::{collections::btree_map::Entry, fs::create_dir_all, io, path::Path};
|
||||
|
||||
use brk_query::Vecs;
|
||||
|
||||
@@ -37,6 +37,28 @@ impl Endpoint {
|
||||
pub fn should_generate(&self) -> bool {
|
||||
self.method == "GET" && !self.deprecated
|
||||
}
|
||||
|
||||
/// Returns the operation ID or generates one from the path.
|
||||
/// The returned string uses the raw case from the spec (typically camelCase).
|
||||
pub fn operation_name(&self) -> String {
|
||||
if let Some(op_id) = &self.operation_id {
|
||||
return op_id.clone();
|
||||
}
|
||||
// Generate from path: /api/blocks/{hash} -> "get_api_blocks_by_hash"
|
||||
let parts: Vec<String> = self
|
||||
.path
|
||||
.split('/')
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|segment| {
|
||||
if let Some(param) = segment.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
|
||||
format!("by_{}", param)
|
||||
} else {
|
||||
segment.to_string()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
format!("get_{}", parts.join("_"))
|
||||
}
|
||||
}
|
||||
|
||||
/// Parameter information
|
||||
|
||||
+125
-188
@@ -9,8 +9,8 @@ use serde_json::Value;
|
||||
|
||||
use crate::{
|
||||
ClientMetadata, Endpoint, FieldNamePosition, IndexSetPattern, PatternField, StructuralPattern,
|
||||
TypeSchemas, extract_inner_type, get_node_fields, get_pattern_instance_base, is_enum_schema,
|
||||
to_pascal_case, to_snake_case,
|
||||
TypeSchemas, extract_inner_type, get_fields_with_child_info, get_node_fields,
|
||||
get_pattern_instance_base, is_enum_schema, to_pascal_case, to_snake_case,
|
||||
};
|
||||
|
||||
/// Generate Python client from metadata and OpenAPI endpoints
|
||||
@@ -561,7 +561,7 @@ fn generate_parameterized_python_field(
|
||||
format!("f'/{{acc}}_{}'", field.name)
|
||||
};
|
||||
|
||||
if field_uses_accessor(field, metadata) {
|
||||
if metadata.field_uses_accessor(field) {
|
||||
let accessor = metadata.find_index_set_pattern(&field.indexes).unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
@@ -595,7 +595,7 @@ fn generate_tree_path_python_field(
|
||||
field_name, py_type, field.rust_type, field.name
|
||||
)
|
||||
.unwrap();
|
||||
} else if field_uses_accessor(field, metadata) {
|
||||
} else if metadata.field_uses_accessor(field) {
|
||||
let accessor = metadata.find_index_set_pattern(&field.indexes).unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
@@ -661,10 +661,6 @@ fn field_to_python_type_with_generic_value(
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a field should use an index accessor
|
||||
fn field_uses_accessor(field: &PatternField, metadata: &ClientMetadata) -> bool {
|
||||
metadata.find_index_set_pattern(&field.indexes).is_some()
|
||||
}
|
||||
|
||||
/// Generate tree classes
|
||||
fn generate_tree_classes(output: &mut String, catalog: &TreeNode, metadata: &ClientMetadata) {
|
||||
@@ -691,179 +687,132 @@ fn generate_tree_class(
|
||||
metadata: &ClientMetadata,
|
||||
generated: &mut HashSet<String>,
|
||||
) {
|
||||
if let TreeNode::Branch(children) = node {
|
||||
// Build signature with child field info for generic pattern lookup
|
||||
let fields_with_child_info: Vec<(PatternField, Option<Vec<PatternField>>)> = children
|
||||
.iter()
|
||||
.map(|(child_name, child_node)| {
|
||||
let (rust_type, json_type, indexes, child_fields) = match child_node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
leaf.schema
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("object")
|
||||
.to_string(),
|
||||
leaf.indexes().clone(),
|
||||
None,
|
||||
),
|
||||
TreeNode::Branch(grandchildren) => {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
let pattern_name = pattern_lookup
|
||||
.get(&child_fields)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| format!("{}_{}", name, to_pascal_case(child_name)));
|
||||
(
|
||||
pattern_name.clone(),
|
||||
pattern_name,
|
||||
std::collections::BTreeSet::new(),
|
||||
Some(child_fields),
|
||||
)
|
||||
}
|
||||
};
|
||||
(
|
||||
PatternField {
|
||||
name: child_name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
},
|
||||
child_fields,
|
||||
let TreeNode::Branch(children) = node else {
|
||||
return;
|
||||
};
|
||||
|
||||
let fields_with_child_info = get_fields_with_child_info(children, name, pattern_lookup);
|
||||
let fields: Vec<PatternField> = fields_with_child_info
|
||||
.iter()
|
||||
.map(|(f, _)| f.clone())
|
||||
.collect();
|
||||
|
||||
// Skip if this matches a pattern (already generated)
|
||||
if pattern_lookup.contains_key(&fields) && pattern_lookup.get(&fields) != Some(&name.to_string())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if generated.contains(name) {
|
||||
return;
|
||||
}
|
||||
generated.insert(name.to_string());
|
||||
|
||||
writeln!(output, "class {}:", name).unwrap();
|
||||
writeln!(output, " \"\"\"Catalog tree node.\"\"\"").unwrap();
|
||||
writeln!(output, " ").unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
" def __init__(self, client: BrkClientBase, base_path: str = ''):"
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
for ((field, child_fields_opt), (child_name, child_node)) in
|
||||
fields_with_child_info.iter().zip(children.iter())
|
||||
{
|
||||
let generic_value_type = child_fields_opt
|
||||
.as_ref()
|
||||
.and_then(|cf| metadata.get_generic_value_type(&field.rust_type, cf));
|
||||
let py_type = field_to_python_type_with_generic_value(
|
||||
field,
|
||||
metadata,
|
||||
false,
|
||||
generic_value_type.as_deref(),
|
||||
);
|
||||
let field_name_py = to_snake_case(&field.name);
|
||||
|
||||
if metadata.is_pattern_type(&field.rust_type) {
|
||||
let pattern = metadata.find_pattern(&field.rust_type);
|
||||
let is_parameterizable = pattern.is_some_and(|p| p.is_parameterizable());
|
||||
|
||||
if is_parameterizable {
|
||||
let metric_base = get_pattern_instance_base(child_node, child_name);
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = {}(client, '{}')",
|
||||
field_name_py, py_type, field.rust_type, metric_base
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let fields: Vec<PatternField> = fields_with_child_info
|
||||
.iter()
|
||||
.map(|(f, _)| f.clone())
|
||||
.collect();
|
||||
|
||||
// Skip if this matches a pattern (already generated)
|
||||
if pattern_lookup.contains_key(&fields)
|
||||
&& pattern_lookup.get(&fields) != Some(&name.to_string())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if generated.contains(name) {
|
||||
return;
|
||||
}
|
||||
generated.insert(name.to_string());
|
||||
|
||||
writeln!(output, "class {}:", name).unwrap();
|
||||
writeln!(output, " \"\"\"Catalog tree node.\"\"\"").unwrap();
|
||||
writeln!(output, " ").unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
" def __init__(self, client: BrkClientBase, base_path: str = ''):"
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
for ((field, child_fields_opt), (child_name, child_node)) in
|
||||
fields_with_child_info.iter().zip(children.iter())
|
||||
{
|
||||
// For generic patterns, extract the value type from child fields
|
||||
let generic_value_type = child_fields_opt
|
||||
.as_ref()
|
||||
.and_then(|cf| metadata.get_generic_value_type(&field.rust_type, cf));
|
||||
let py_type = field_to_python_type_with_generic_value(
|
||||
field,
|
||||
metadata,
|
||||
false,
|
||||
generic_value_type.as_deref(),
|
||||
);
|
||||
let field_name_py = to_snake_case(&field.name);
|
||||
|
||||
if metadata.is_pattern_type(&field.rust_type) {
|
||||
// Check if the pattern is parameterizable
|
||||
let pattern = metadata
|
||||
.structural_patterns
|
||||
.iter()
|
||||
.find(|p| p.name == field.rust_type);
|
||||
let is_parameterizable = pattern.map(|p| p.is_parameterizable()).unwrap_or(false);
|
||||
|
||||
if is_parameterizable {
|
||||
// Get the metric base from the first leaf descendant
|
||||
let metric_base = get_pattern_instance_base(child_node, child_name);
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = {}(client, '{}')",
|
||||
field_name_py, py_type, field.rust_type, metric_base
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = {}(client, f'{{base_path}}/{}')",
|
||||
field_name_py, py_type, field.rust_type, field.name
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
} else if field_uses_accessor(field, metadata) {
|
||||
// Leaf with accessor - get actual metric path from leaf
|
||||
let metric_path = if let TreeNode::Leaf(leaf) = child_node {
|
||||
format!("/{}", leaf.name())
|
||||
} else {
|
||||
format!("{{base_path}}/{}", field.name)
|
||||
};
|
||||
let accessor = metadata.find_index_set_pattern(&field.indexes).unwrap();
|
||||
if metric_path.contains("{base_path}") {
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = {}(client, f'{}')",
|
||||
field_name_py, py_type, accessor.name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = {}(client, '{}')",
|
||||
field_name_py, py_type, accessor.name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
.unwrap();
|
||||
} else {
|
||||
// Leaf without accessor - get actual metric path from leaf
|
||||
let metric_path = if let TreeNode::Leaf(leaf) = child_node {
|
||||
format!("/{}", leaf.name())
|
||||
} else {
|
||||
format!("{{base_path}}/{}", field.name)
|
||||
};
|
||||
if metric_path.contains("{base_path}") {
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = MetricNode(client, f'{}')",
|
||||
field_name_py, py_type, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = MetricNode(client, '{}')",
|
||||
field_name_py, py_type, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = {}(client, f'{{base_path}}/{}')",
|
||||
field_name_py, py_type, field.rust_type, field.name
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
} else if metadata.field_uses_accessor(field) {
|
||||
let metric_path = if let TreeNode::Leaf(leaf) = child_node {
|
||||
format!("/{}", leaf.name())
|
||||
} else {
|
||||
format!("{{base_path}}/{}", field.name)
|
||||
};
|
||||
let accessor = metadata.find_index_set_pattern(&field.indexes).unwrap();
|
||||
if metric_path.contains("{base_path}") {
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = {}(client, f'{}')",
|
||||
field_name_py, py_type, accessor.name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = {}(client, '{}')",
|
||||
field_name_py, py_type, accessor.name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
} else {
|
||||
let metric_path = if let TreeNode::Leaf(leaf) = child_node {
|
||||
format!("/{}", leaf.name())
|
||||
} else {
|
||||
format!("{{base_path}}/{}", field.name)
|
||||
};
|
||||
if metric_path.contains("{base_path}") {
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = MetricNode(client, f'{}')",
|
||||
field_name_py, py_type, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(
|
||||
output,
|
||||
" self.{}: {} = MetricNode(client, '{}')",
|
||||
field_name_py, py_type, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(output).unwrap();
|
||||
writeln!(output).unwrap();
|
||||
|
||||
// Generate child classes
|
||||
for (child_name, child_node) in children {
|
||||
if let TreeNode::Branch(grandchildren) = child_node {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
if !pattern_lookup.contains_key(&child_fields) {
|
||||
let child_class_name = format!("{}_{}", name, to_pascal_case(child_name));
|
||||
generate_tree_class(
|
||||
output,
|
||||
&child_class_name,
|
||||
child_node,
|
||||
pattern_lookup,
|
||||
metadata,
|
||||
generated,
|
||||
);
|
||||
}
|
||||
// Generate child classes
|
||||
for (child_name, child_node) in children {
|
||||
if let TreeNode::Branch(grandchildren) = child_node {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
if !pattern_lookup.contains_key(&child_fields) {
|
||||
let child_class_name = format!("{}_{}", name, to_pascal_case(child_name));
|
||||
generate_tree_class(
|
||||
output,
|
||||
&child_class_name,
|
||||
child_node,
|
||||
pattern_lookup,
|
||||
metadata,
|
||||
generated,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -963,19 +912,7 @@ fn generate_api_methods(output: &mut String, endpoints: &[Endpoint]) {
|
||||
}
|
||||
|
||||
fn endpoint_to_method_name(endpoint: &Endpoint) -> String {
|
||||
if let Some(op_id) = &endpoint.operation_id {
|
||||
return to_snake_case(op_id);
|
||||
}
|
||||
// Include path parameters as "by_{param}" to differentiate endpoints
|
||||
let mut parts = Vec::new();
|
||||
for segment in endpoint.path.split('/').filter(|s| !s.is_empty()) {
|
||||
if let Some(param) = segment.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
|
||||
parts.push(format!("by_{}", param));
|
||||
} else {
|
||||
parts.push(segment.to_string());
|
||||
}
|
||||
}
|
||||
to_snake_case(&format!("get_{}", parts.join("_")))
|
||||
to_snake_case(&endpoint.operation_name())
|
||||
}
|
||||
|
||||
/// Convert JS-style type to Python type (e.g., "Txid[]" -> "List[Txid]", "number" -> "int")
|
||||
|
||||
+135
-198
@@ -8,7 +8,8 @@ use brk_types::{Index, TreeNode};
|
||||
|
||||
use crate::{
|
||||
ClientMetadata, Endpoint, FieldNamePosition, IndexSetPattern, PatternField, StructuralPattern,
|
||||
extract_inner_type, get_node_fields, get_pattern_instance_base, to_pascal_case, to_snake_case,
|
||||
extract_inner_type, get_fields_with_child_info, get_node_fields, get_pattern_instance_base,
|
||||
to_pascal_case, to_snake_case,
|
||||
};
|
||||
|
||||
/// Generate Rust client from metadata and OpenAPI endpoints
|
||||
@@ -358,7 +359,7 @@ fn generate_parameterized_rust_field(
|
||||
format!("format!(\"/{{acc}}_{}\")", field.name)
|
||||
};
|
||||
|
||||
if field_uses_accessor(field, metadata) {
|
||||
if metadata.field_uses_accessor(field) {
|
||||
let accessor = metadata.find_index_set_pattern(&field.indexes).unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
@@ -391,7 +392,7 @@ fn generate_tree_path_rust_field(
|
||||
field_name, field.rust_type, field.name
|
||||
)
|
||||
.unwrap();
|
||||
} else if field_uses_accessor(field, metadata) {
|
||||
} else if metadata.field_uses_accessor(field) {
|
||||
let accessor = metadata.find_index_set_pattern(&field.indexes).unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
@@ -452,11 +453,6 @@ fn field_to_type_annotation_with_generic(
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a field should use an index accessor
|
||||
fn field_uses_accessor(field: &PatternField, metadata: &ClientMetadata) -> bool {
|
||||
metadata.find_index_set_pattern(&field.indexes).is_some()
|
||||
}
|
||||
|
||||
/// Generate the catalog tree structure
|
||||
fn generate_tree(output: &mut String, catalog: &TreeNode, metadata: &ClientMetadata) {
|
||||
writeln!(output, "// Catalog tree\n").unwrap();
|
||||
@@ -482,193 +478,142 @@ fn generate_tree_node(
|
||||
metadata: &ClientMetadata,
|
||||
generated: &mut HashSet<String>,
|
||||
) {
|
||||
if let TreeNode::Branch(children) = node {
|
||||
// Build the signature for this node, also tracking child fields for generic pattern lookup
|
||||
let mut fields_with_child_info: Vec<(PatternField, Option<Vec<PatternField>>)> = children
|
||||
.iter()
|
||||
.map(|(child_name, child_node)| {
|
||||
let (rust_type, json_type, indexes, child_fields) = match child_node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
leaf.schema
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("object")
|
||||
.to_string(),
|
||||
leaf.indexes().clone(),
|
||||
None,
|
||||
),
|
||||
TreeNode::Branch(grandchildren) => {
|
||||
// Get pattern name for this child
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
let pattern_name = pattern_lookup
|
||||
.get(&child_fields)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| format!("{}_{}", name, to_pascal_case(child_name)));
|
||||
(
|
||||
pattern_name.clone(),
|
||||
pattern_name,
|
||||
std::collections::BTreeSet::new(),
|
||||
Some(child_fields),
|
||||
)
|
||||
}
|
||||
};
|
||||
(
|
||||
PatternField {
|
||||
name: child_name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
},
|
||||
child_fields,
|
||||
let TreeNode::Branch(children) = node else {
|
||||
return;
|
||||
};
|
||||
|
||||
let fields_with_child_info = get_fields_with_child_info(children, name, pattern_lookup);
|
||||
let fields: Vec<PatternField> = fields_with_child_info
|
||||
.iter()
|
||||
.map(|(f, _)| f.clone())
|
||||
.collect();
|
||||
|
||||
// Skip if this matches a pattern (already generated separately)
|
||||
if let Some(pattern_name) = pattern_lookup.get(&fields)
|
||||
&& pattern_name != name
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if generated.contains(name) {
|
||||
return;
|
||||
}
|
||||
generated.insert(name.to_string());
|
||||
|
||||
writeln!(output, "/// Catalog tree node.").unwrap();
|
||||
writeln!(output, "pub struct {}<'a> {{", name).unwrap();
|
||||
|
||||
for (field, child_fields) in &fields_with_child_info {
|
||||
let field_name = to_snake_case(&field.name);
|
||||
let generic_value_type = child_fields
|
||||
.as_ref()
|
||||
.and_then(|cf| metadata.get_generic_value_type(&field.rust_type, cf));
|
||||
let type_annotation = field_to_type_annotation_with_generic(
|
||||
field,
|
||||
metadata,
|
||||
false,
|
||||
generic_value_type.as_deref(),
|
||||
);
|
||||
writeln!(output, " pub {}: {},", field_name, type_annotation).unwrap();
|
||||
}
|
||||
|
||||
writeln!(output, "}}\n").unwrap();
|
||||
|
||||
// Generate impl block
|
||||
writeln!(output, "impl<'a> {}<'a> {{", name).unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
" pub fn new(client: &'a BrkClientBase, base_path: &str) -> Self {{"
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(output, " Self {{").unwrap();
|
||||
|
||||
for (field, (child_name, child_node)) in fields.iter().zip(children.iter()) {
|
||||
let field_name = to_snake_case(&field.name);
|
||||
if metadata.is_pattern_type(&field.rust_type) {
|
||||
let pattern = metadata.find_pattern(&field.rust_type);
|
||||
let is_parameterizable = pattern.is_some_and(|p| p.is_parameterizable());
|
||||
|
||||
if is_parameterizable {
|
||||
let metric_base = get_pattern_instance_base(child_node, child_name);
|
||||
writeln!(
|
||||
output,
|
||||
" {}: {}::new(client, \"{}\"),",
|
||||
field_name, field.rust_type, metric_base
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
fields_with_child_info.sort_by(|a, b| a.0.name.cmp(&b.0.name));
|
||||
|
||||
let fields: Vec<PatternField> = fields_with_child_info
|
||||
.iter()
|
||||
.map(|(f, _)| f.clone())
|
||||
.collect();
|
||||
|
||||
// Check if this matches a reusable pattern
|
||||
if let Some(pattern_name) = pattern_lookup.get(&fields) {
|
||||
// This node matches a pattern that will be generated separately
|
||||
// Don't generate it here, it's already in pattern_structs
|
||||
if pattern_name != name {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Generate this struct if not already generated
|
||||
if generated.contains(name) {
|
||||
return;
|
||||
}
|
||||
generated.insert(name.to_string());
|
||||
|
||||
writeln!(output, "/// Catalog tree node.").unwrap();
|
||||
writeln!(output, "pub struct {}<'a> {{", name).unwrap();
|
||||
|
||||
for (field, child_fields) in &fields_with_child_info {
|
||||
let field_name = to_snake_case(&field.name);
|
||||
// For generic patterns, extract the value type from child fields
|
||||
let generic_value_type = child_fields
|
||||
.as_ref()
|
||||
.and_then(|cf| metadata.get_generic_value_type(&field.rust_type, cf));
|
||||
let type_annotation = field_to_type_annotation_with_generic(
|
||||
field,
|
||||
metadata,
|
||||
false,
|
||||
generic_value_type.as_deref(),
|
||||
);
|
||||
writeln!(output, " pub {}: {},", field_name, type_annotation).unwrap();
|
||||
}
|
||||
|
||||
writeln!(output, "}}\n").unwrap();
|
||||
|
||||
// Generate impl block
|
||||
writeln!(output, "impl<'a> {}<'a> {{", name).unwrap();
|
||||
writeln!(
|
||||
output,
|
||||
" pub fn new(client: &'a BrkClientBase, base_path: &str) -> Self {{"
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(output, " Self {{").unwrap();
|
||||
|
||||
for (field, (child_name, child_node)) in fields.iter().zip(children.iter()) {
|
||||
let field_name = to_snake_case(&field.name);
|
||||
if metadata.is_pattern_type(&field.rust_type) {
|
||||
// Check if the pattern is parameterizable
|
||||
let pattern = metadata
|
||||
.structural_patterns
|
||||
.iter()
|
||||
.find(|p| p.name == field.rust_type);
|
||||
let is_parameterizable = pattern.map(|p| p.is_parameterizable()).unwrap_or(false);
|
||||
|
||||
if is_parameterizable {
|
||||
// Get the metric base from the first leaf descendant
|
||||
let metric_base = get_pattern_instance_base(child_node, child_name);
|
||||
writeln!(
|
||||
output,
|
||||
" {}: {}::new(client, \"{}\"),",
|
||||
field_name, field.rust_type, metric_base
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(
|
||||
output,
|
||||
" {}: {}::new(client, &format!(\"{{base_path}}/{}\")),",
|
||||
field_name, field.rust_type, field.name
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
} else if field_uses_accessor(field, metadata) {
|
||||
// Leaf with accessor - get actual metric path from leaf
|
||||
let metric_path = if let TreeNode::Leaf(leaf) = child_node {
|
||||
format!("/{}", leaf.name())
|
||||
} else {
|
||||
format!("{{base_path}}/{}", field.name)
|
||||
};
|
||||
let accessor = metadata.find_index_set_pattern(&field.indexes).unwrap();
|
||||
if metric_path.contains("{base_path}") {
|
||||
writeln!(
|
||||
output,
|
||||
" {}: {}::new(client, &format!(\"{}\")),",
|
||||
field_name, accessor.name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(
|
||||
output,
|
||||
" {}: {}::new(client, \"{}\"),",
|
||||
field_name, accessor.name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
.unwrap();
|
||||
} else {
|
||||
// Leaf without accessor - get actual metric path from leaf
|
||||
let metric_path = if let TreeNode::Leaf(leaf) = child_node {
|
||||
format!("/{}", leaf.name())
|
||||
} else {
|
||||
format!("{{base_path}}/{}", field.name)
|
||||
};
|
||||
if metric_path.contains("{base_path}") {
|
||||
writeln!(
|
||||
output,
|
||||
" {}: MetricNode::new(client, format!(\"{}\")),",
|
||||
field_name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(
|
||||
output,
|
||||
" {}: MetricNode::new(client, \"{}\".to_string()),",
|
||||
field_name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
writeln!(
|
||||
output,
|
||||
" {}: {}::new(client, &format!(\"{{base_path}}/{}\")),",
|
||||
field_name, field.rust_type, field.name
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
} else if metadata.field_uses_accessor(field) {
|
||||
let metric_path = if let TreeNode::Leaf(leaf) = child_node {
|
||||
format!("/{}", leaf.name())
|
||||
} else {
|
||||
format!("{{base_path}}/{}", field.name)
|
||||
};
|
||||
let accessor = metadata.find_index_set_pattern(&field.indexes).unwrap();
|
||||
if metric_path.contains("{base_path}") {
|
||||
writeln!(
|
||||
output,
|
||||
" {}: {}::new(client, &format!(\"{}\")),",
|
||||
field_name, accessor.name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(
|
||||
output,
|
||||
" {}: {}::new(client, \"{}\"),",
|
||||
field_name, accessor.name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
} else {
|
||||
let metric_path = if let TreeNode::Leaf(leaf) = child_node {
|
||||
format!("/{}", leaf.name())
|
||||
} else {
|
||||
format!("{{base_path}}/{}", field.name)
|
||||
};
|
||||
if metric_path.contains("{base_path}") {
|
||||
writeln!(
|
||||
output,
|
||||
" {}: MetricNode::new(client, format!(\"{}\")),",
|
||||
field_name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(
|
||||
output,
|
||||
" {}: MetricNode::new(client, \"{}\".to_string()),",
|
||||
field_name, metric_path
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(output, " }}").unwrap();
|
||||
writeln!(output, " }}").unwrap();
|
||||
writeln!(output, "}}\n").unwrap();
|
||||
writeln!(output, " }}").unwrap();
|
||||
writeln!(output, " }}").unwrap();
|
||||
writeln!(output, "}}\n").unwrap();
|
||||
|
||||
// Recursively generate child nodes that aren't patterns
|
||||
for (child_name, child_node) in children {
|
||||
if let TreeNode::Branch(grandchildren) = child_node {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
if !pattern_lookup.contains_key(&child_fields) {
|
||||
let child_struct_name = format!("{}_{}", name, to_pascal_case(child_name));
|
||||
generate_tree_node(
|
||||
output,
|
||||
&child_struct_name,
|
||||
child_node,
|
||||
pattern_lookup,
|
||||
metadata,
|
||||
generated,
|
||||
);
|
||||
}
|
||||
// Recursively generate child nodes that aren't patterns
|
||||
for (child_name, child_node) in children {
|
||||
if let TreeNode::Branch(grandchildren) = child_node {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
if !pattern_lookup.contains_key(&child_fields) {
|
||||
let child_struct_name = format!("{}_{}", name, to_pascal_case(child_name));
|
||||
generate_tree_node(
|
||||
output,
|
||||
&child_struct_name,
|
||||
child_node,
|
||||
pattern_lookup,
|
||||
metadata,
|
||||
generated,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -781,15 +726,7 @@ fn generate_api_methods(output: &mut String, endpoints: &[Endpoint]) {
|
||||
}
|
||||
|
||||
fn endpoint_to_method_name(endpoint: &Endpoint) -> String {
|
||||
if let Some(op_id) = &endpoint.operation_id {
|
||||
return to_snake_case(op_id);
|
||||
}
|
||||
let parts: Vec<&str> = endpoint
|
||||
.path
|
||||
.split('/')
|
||||
.filter(|s| !s.is_empty() && !s.starts_with('{'))
|
||||
.collect();
|
||||
to_snake_case(&format!("get_{}", parts.join("_")))
|
||||
to_snake_case(&endpoint.operation_name())
|
||||
}
|
||||
|
||||
fn build_method_params(endpoint: &Endpoint) -> String {
|
||||
|
||||
@@ -1,935 +0,0 @@
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use brk_query::Vecs;
|
||||
use brk_types::{Index, TreeNode};
|
||||
|
||||
/// How a field modifies the accumulated metric name
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum FieldNamePosition {
|
||||
/// Field prepends a prefix: leaf.name() = prefix + accumulated
|
||||
Prepend(String),
|
||||
/// Field appends a suffix: leaf.name() = accumulated + suffix
|
||||
Append(String),
|
||||
/// Field IS the accumulated name (no modification)
|
||||
Identity,
|
||||
/// Field sets a new base name (used at pattern entry points)
|
||||
SetBase(String),
|
||||
}
|
||||
|
||||
/// Metadata extracted from brk_query for client generation
|
||||
#[derive(Debug)]
|
||||
pub struct ClientMetadata {
|
||||
/// The catalog tree structure (with schemas in leaves)
|
||||
pub catalog: TreeNode,
|
||||
/// Structural patterns - tree node shapes that repeat
|
||||
pub structural_patterns: Vec<StructuralPattern>,
|
||||
/// All indexes used across the catalog
|
||||
pub used_indexes: BTreeSet<Index>,
|
||||
/// Index set patterns - sets of indexes that appear together on metrics
|
||||
pub index_set_patterns: Vec<IndexSetPattern>,
|
||||
/// Maps concrete field signatures to pattern names (includes generic pattern mappings)
|
||||
pub concrete_to_pattern: HashMap<Vec<PatternField>, String>,
|
||||
}
|
||||
|
||||
/// A pattern of indexes that appear together on multiple metrics
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IndexSetPattern {
|
||||
/// Pattern name (e.g., "DateHeightIndexes")
|
||||
pub name: String,
|
||||
/// The set of indexes
|
||||
pub indexes: BTreeSet<Index>,
|
||||
}
|
||||
|
||||
/// A structural pattern - a branch structure that appears multiple times in the tree
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StructuralPattern {
|
||||
/// Pattern name - sanitized for all languages (e.g., "BaseCumulativeSum")
|
||||
pub name: String,
|
||||
/// Ordered list of child fields (sorted by field name)
|
||||
pub fields: Vec<PatternField>,
|
||||
/// How each field modifies the accumulated name (field_name -> position)
|
||||
pub field_positions: HashMap<String, FieldNamePosition>,
|
||||
/// If true, all leaf fields use a type parameter T instead of concrete types
|
||||
pub is_generic: bool,
|
||||
}
|
||||
|
||||
impl StructuralPattern {
|
||||
/// Returns true if this pattern contains any leaf fields (fields with indexes).
|
||||
/// Patterns with leaves can't use factory functions because leaf.name() is instance-specific.
|
||||
pub fn contains_leaves(&self) -> bool {
|
||||
self.fields.iter().any(|f| !f.indexes.is_empty())
|
||||
}
|
||||
|
||||
/// Returns true if all leaf fields have consistent name transformations.
|
||||
/// A pattern is parameterizable if we can detect prepend/append patterns.
|
||||
pub fn is_parameterizable(&self) -> bool {
|
||||
!self.field_positions.is_empty()
|
||||
&& self.fields.iter().all(|f| {
|
||||
// Branch fields are always OK (they delegate to nested patterns)
|
||||
f.indexes.is_empty() || self.field_positions.contains_key(&f.name)
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the field position for a given field name
|
||||
pub fn get_field_position(&self, field_name: &str) -> Option<&FieldNamePosition> {
|
||||
self.field_positions.get(field_name)
|
||||
}
|
||||
}
|
||||
|
||||
/// A field in a structural pattern
|
||||
#[derive(Debug, Clone, PartialOrd, Ord)]
|
||||
pub struct PatternField {
|
||||
/// Field name
|
||||
pub name: String,
|
||||
/// Rust type: brk_types type for leaves ("Sats", "StoredF64") or pattern name for branches
|
||||
pub rust_type: String,
|
||||
/// JSON type from schema: "integer", "number", "string", "boolean", or pattern name for branches
|
||||
pub json_type: String,
|
||||
/// For leaves: the set of supported indexes. Empty for branches.
|
||||
pub indexes: BTreeSet<Index>,
|
||||
}
|
||||
|
||||
// Manual implementations of Hash/Eq/PartialEq that exclude `indexes`
|
||||
// since indexes aren't part of the structural pattern identity
|
||||
impl Hash for PatternField {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.name.hash(state);
|
||||
self.rust_type.hash(state);
|
||||
self.json_type.hash(state);
|
||||
// indexes excluded from hash
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for PatternField {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.rust_type == other.rust_type
|
||||
&& self.json_type == other.json_type
|
||||
// indexes excluded from equality
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for PatternField {}
|
||||
|
||||
impl ClientMetadata {
|
||||
/// Extract metadata from brk_query::Vecs
|
||||
pub fn from_vecs(vecs: &Vecs) -> Self {
|
||||
let catalog = vecs.catalog().clone();
|
||||
let (structural_patterns, concrete_to_pattern) = detect_structural_patterns(&catalog);
|
||||
let (used_indexes, index_set_patterns) = detect_index_patterns(&catalog);
|
||||
|
||||
ClientMetadata {
|
||||
catalog,
|
||||
structural_patterns,
|
||||
used_indexes,
|
||||
index_set_patterns,
|
||||
concrete_to_pattern,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if an index set matches a pattern
|
||||
pub fn find_index_set_pattern(&self, indexes: &BTreeSet<Index>) -> Option<&IndexSetPattern> {
|
||||
self.index_set_patterns
|
||||
.iter()
|
||||
.find(|p| &p.indexes == indexes)
|
||||
}
|
||||
|
||||
/// Check if a type is a pattern (vs a primitive leaf type)
|
||||
pub fn is_pattern_type(&self, type_name: &str) -> bool {
|
||||
self.structural_patterns.iter().any(|p| p.name == type_name)
|
||||
}
|
||||
|
||||
/// Find a pattern by name
|
||||
pub fn find_pattern(&self, name: &str) -> Option<&StructuralPattern> {
|
||||
self.structural_patterns.iter().find(|p| p.name == name)
|
||||
}
|
||||
|
||||
/// Check if a pattern is generic
|
||||
pub fn is_pattern_generic(&self, name: &str) -> bool {
|
||||
self.find_pattern(name)
|
||||
.map(|p| p.is_generic)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Extract the value type from concrete fields for a generic pattern.
|
||||
/// Returns the first leaf field's rust_type if this pattern is generic.
|
||||
/// If the type is a wrapper like `Close<Dollars>`, extracts the inner type `Dollars`.
|
||||
pub fn get_generic_value_type(
|
||||
&self,
|
||||
pattern_name: &str,
|
||||
fields: &[PatternField],
|
||||
) -> Option<String> {
|
||||
if !self.is_pattern_generic(pattern_name) {
|
||||
return None;
|
||||
}
|
||||
// Find first leaf field (has indexes)
|
||||
fields
|
||||
.iter()
|
||||
.find(|f| !f.indexes.is_empty())
|
||||
.map(|f| extract_inner_type(&f.rust_type))
|
||||
}
|
||||
|
||||
/// Build a lookup map from field signatures to pattern names.
|
||||
/// Includes both generic pattern signatures and concrete signatures.
|
||||
pub fn pattern_lookup(&self) -> HashMap<Vec<PatternField>, String> {
|
||||
// Start with concrete-to-pattern mappings (includes generic pattern concrete signatures)
|
||||
let mut lookup = self.concrete_to_pattern.clone();
|
||||
// Also add the normalized generic signatures
|
||||
for p in &self.structural_patterns {
|
||||
lookup.insert(p.fields.clone(), p.name.clone());
|
||||
}
|
||||
lookup
|
||||
}
|
||||
}
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
/// Unwrap allOf with a single element, returning the inner schema.
|
||||
/// schemars uses allOf for composition, but often with just one $ref.
|
||||
pub fn unwrap_allof(schema: &Value) -> &Value {
|
||||
if let Some(all_of) = schema.get("allOf").and_then(|v| v.as_array())
|
||||
&& all_of.len() == 1
|
||||
{
|
||||
return &all_of[0];
|
||||
}
|
||||
schema
|
||||
}
|
||||
|
||||
/// Check if a schema represents an enum type.
|
||||
/// Enums have either an "enum" array or "oneOf" without properties.
|
||||
pub fn is_enum_schema(schema: &Value) -> bool {
|
||||
schema.get("enum").is_some()
|
||||
|| (schema.get("oneOf").is_some() && schema.get("properties").is_none())
|
||||
}
|
||||
|
||||
/// Extract inner type from a wrapper generic like `Close<Dollars>` -> `Dollars`.
|
||||
/// Also handles malformed types like `Dollars>` (from vecdb's short_type_name which
|
||||
/// extracts "Dollars>" from "Close<brk_types::Dollars>" using rsplit("::")).
|
||||
/// If not a generic, returns the type as-is.
|
||||
pub fn extract_inner_type(type_str: &str) -> String {
|
||||
// Handle proper generic wrappers like `Close<Dollars>` -> `Dollars`
|
||||
if let Some(start) = type_str.find('<')
|
||||
&& let Some(end) = type_str.rfind('>')
|
||||
&& start < end
|
||||
{
|
||||
return type_str[start + 1..end].to_string();
|
||||
}
|
||||
// Handle malformed types like `Dollars>` (trailing > without <)
|
||||
// This happens due to vecdb's short_type_name using rsplit("::")
|
||||
if type_str.ends_with('>') && !type_str.contains('<') {
|
||||
return type_str.trim_end_matches('>').to_string();
|
||||
}
|
||||
type_str.to_string()
|
||||
}
|
||||
|
||||
/// Detect structural patterns in the tree using a bottom-up approach.
|
||||
/// For every branch node, create a signature from its children (sorted field names + types).
|
||||
/// Patterns that appear 2+ times are deduplicated.
|
||||
/// Returns (patterns, concrete_to_pattern_mapping).
|
||||
fn detect_structural_patterns(
|
||||
tree: &TreeNode,
|
||||
) -> (Vec<StructuralPattern>, HashMap<Vec<PatternField>, String>) {
|
||||
// Map from sorted fields signature to pattern name
|
||||
let mut signature_to_pattern: HashMap<Vec<PatternField>, String> = HashMap::new();
|
||||
// Count how many times each signature appears
|
||||
let mut signature_counts: HashMap<Vec<PatternField>, usize> = HashMap::new();
|
||||
// Map normalized signatures to names (so patterns differing only in value type share names)
|
||||
let mut normalized_to_name: HashMap<Vec<PatternField>, String> = HashMap::new();
|
||||
// Track name usage to append index for duplicates
|
||||
let mut name_counts: HashMap<String, usize> = HashMap::new();
|
||||
|
||||
// Process tree bottom-up to resolve all branch types
|
||||
resolve_branch_patterns(
|
||||
tree,
|
||||
"root",
|
||||
&mut signature_to_pattern,
|
||||
&mut signature_counts,
|
||||
&mut normalized_to_name,
|
||||
&mut name_counts,
|
||||
);
|
||||
|
||||
// First, identify generic patterns by grouping ALL signatures by their normalized form.
|
||||
// Even if each concrete signature appears only once, if 2+ different value types
|
||||
// normalize to the same pattern, we create a generic pattern.
|
||||
let (generic_patterns, generic_mappings) = detect_generic_patterns(&signature_to_pattern);
|
||||
|
||||
// Build non-generic patterns: signatures appearing 2+ times that weren't merged into generics
|
||||
let mut patterns: Vec<StructuralPattern> = signature_to_pattern
|
||||
.iter()
|
||||
.filter(|(sig, _)| {
|
||||
signature_counts.get(*sig).copied().unwrap_or(0) >= 2
|
||||
&& !generic_mappings.contains_key(*sig)
|
||||
})
|
||||
.map(|(fields, name)| StructuralPattern {
|
||||
name: name.clone(),
|
||||
fields: fields.clone(),
|
||||
field_positions: HashMap::new(),
|
||||
is_generic: false,
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Add the generic patterns
|
||||
patterns.extend(generic_patterns);
|
||||
|
||||
// Build lookup for second pass - include all concrete signatures
|
||||
let mut pattern_lookup: HashMap<Vec<PatternField>, String> = HashMap::new();
|
||||
// Add non-generic patterns that appear 2+ times
|
||||
for (sig, name) in &signature_to_pattern {
|
||||
if signature_counts.get(sig).copied().unwrap_or(0) >= 2 {
|
||||
pattern_lookup.insert(sig.clone(), name.clone());
|
||||
}
|
||||
}
|
||||
// Add generic mappings (overwrite if there's overlap)
|
||||
pattern_lookup.extend(generic_mappings.clone());
|
||||
|
||||
// Build the concrete_to_pattern map to return
|
||||
let concrete_to_pattern = pattern_lookup.clone();
|
||||
|
||||
// Second pass: analyze field positions by traversing tree instances
|
||||
analyze_pattern_field_positions(tree, &mut patterns, &pattern_lookup);
|
||||
|
||||
// Sort by number of fields descending (larger patterns first)
|
||||
patterns.sort_by(|a, b| b.fields.len().cmp(&a.fields.len()));
|
||||
|
||||
(patterns, concrete_to_pattern)
|
||||
}
|
||||
|
||||
/// Detect generic patterns by grouping all signatures by their normalized form.
|
||||
/// Returns (generic_patterns, concrete_signature -> generic_pattern_name mapping).
|
||||
fn detect_generic_patterns(
|
||||
signature_to_pattern: &HashMap<Vec<PatternField>, String>,
|
||||
) -> (Vec<StructuralPattern>, HashMap<Vec<PatternField>, String>) {
|
||||
// Group signatures by their normalized (generic) form
|
||||
let mut normalized_groups: HashMap<Vec<PatternField>, Vec<(Vec<PatternField>, String)>> =
|
||||
HashMap::new();
|
||||
|
||||
for (fields, name) in signature_to_pattern {
|
||||
if let Some(normalized) = normalize_fields_for_generic(fields) {
|
||||
normalized_groups
|
||||
.entry(normalized)
|
||||
.or_default()
|
||||
.push((fields.clone(), name.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
let mut patterns = Vec::new();
|
||||
let mut mappings: HashMap<Vec<PatternField>, String> = HashMap::new();
|
||||
|
||||
// Create generic patterns for groups with 2+ different concrete signatures
|
||||
for (normalized_fields, group) in normalized_groups {
|
||||
if group.len() >= 2 {
|
||||
// Use the first pattern's name as the generic pattern name
|
||||
let generic_name = group[0].1.clone();
|
||||
|
||||
// Map all concrete signatures to this generic pattern
|
||||
for (concrete_fields, _) in &group {
|
||||
mappings.insert(concrete_fields.clone(), generic_name.clone());
|
||||
}
|
||||
|
||||
patterns.push(StructuralPattern {
|
||||
name: generic_name,
|
||||
fields: normalized_fields,
|
||||
field_positions: HashMap::new(),
|
||||
is_generic: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
(patterns, mappings)
|
||||
}
|
||||
|
||||
/// Normalize fields by replacing concrete value types with "T" for generic matching.
|
||||
/// Returns None if the pattern is not suitable for generics (e.g., mixed value types).
|
||||
fn normalize_fields_for_generic(fields: &[PatternField]) -> Option<Vec<PatternField>> {
|
||||
// Get all leaf field value types
|
||||
let leaf_types: Vec<&str> = fields
|
||||
.iter()
|
||||
.filter(|f| !f.indexes.is_empty()) // Only leaves have indexes
|
||||
.map(|f| f.rust_type.as_str())
|
||||
.collect();
|
||||
|
||||
// Need at least one leaf to be generic
|
||||
if leaf_types.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// All leaves must have the same value type
|
||||
let first_type = leaf_types[0];
|
||||
if !leaf_types.iter().all(|t| *t == first_type) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Create normalized fields with "T" as the value type
|
||||
let normalized: Vec<PatternField> = fields
|
||||
.iter()
|
||||
.map(|f| {
|
||||
if f.indexes.is_empty() {
|
||||
// Branch field - keep as is
|
||||
f.clone()
|
||||
} else {
|
||||
// Leaf field - replace value type with T
|
||||
PatternField {
|
||||
name: f.name.clone(),
|
||||
rust_type: "T".to_string(),
|
||||
json_type: "T".to_string(),
|
||||
indexes: f.indexes.clone(),
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Some(normalized)
|
||||
}
|
||||
|
||||
/// Analyze field positions for all patterns by traversing tree instances.
|
||||
/// For each pattern instance, we compare parent accumulated name with child leaf names.
|
||||
fn analyze_pattern_field_positions(
|
||||
tree: &TreeNode,
|
||||
patterns: &mut [StructuralPattern],
|
||||
pattern_lookup: &HashMap<Vec<PatternField>, String>,
|
||||
) {
|
||||
// Collect instances: pattern_name -> vec of (accumulated_name, field_name, leaf_name)
|
||||
let mut instances: HashMap<String, Vec<(String, String, String)>> = HashMap::new();
|
||||
|
||||
// Traverse tree and collect instances
|
||||
collect_pattern_instances(tree, "", &mut instances, pattern_lookup);
|
||||
|
||||
// For each pattern, analyze field positions from instances
|
||||
for pattern in patterns.iter_mut() {
|
||||
if let Some(pattern_instances) = instances.get(&pattern.name) {
|
||||
pattern.field_positions = analyze_field_positions_from_instances(pattern_instances);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively traverse tree and collect pattern instances with accumulated metric names.
|
||||
fn collect_pattern_instances(
|
||||
node: &TreeNode,
|
||||
accumulated_name: &str,
|
||||
instances: &mut HashMap<String, Vec<(String, String, String)>>,
|
||||
pattern_lookup: &HashMap<Vec<PatternField>, String>,
|
||||
) {
|
||||
if let TreeNode::Branch(children) = node {
|
||||
// Check if this branch matches a pattern
|
||||
let fields = get_node_fields_for_analysis(children, pattern_lookup);
|
||||
if let Some(pattern_name) = pattern_lookup.get(&fields) {
|
||||
// Collect instances for this pattern
|
||||
for (field_name, child_node) in children {
|
||||
if let TreeNode::Leaf(leaf) = child_node {
|
||||
instances.entry(pattern_name.clone()).or_default().push((
|
||||
accumulated_name.to_string(),
|
||||
field_name.clone(),
|
||||
leaf.name().to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Continue traversing children
|
||||
for (field_name, child_node) in children {
|
||||
let child_accumulated = match child_node {
|
||||
TreeNode::Leaf(leaf) => leaf.name().to_string(),
|
||||
TreeNode::Branch(_) => {
|
||||
// For branches, we need to infer the accumulated name
|
||||
// If there's a leaf descendant, use its name as the basis
|
||||
if let Some(desc_leaf_name) = get_descendant_leaf_name(child_node) {
|
||||
// Try to extract what this level contributes
|
||||
infer_accumulated_name(accumulated_name, field_name, &desc_leaf_name)
|
||||
} else {
|
||||
// No descendants - use field name as base
|
||||
if accumulated_name.is_empty() {
|
||||
field_name.clone()
|
||||
} else {
|
||||
format!("{}_{}", accumulated_name, field_name)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
collect_pattern_instances(child_node, &child_accumulated, instances, pattern_lookup);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a descendant leaf name from a branch node (first one found)
|
||||
fn get_descendant_leaf_name(node: &TreeNode) -> Option<String> {
|
||||
match node {
|
||||
TreeNode::Leaf(leaf) => Some(leaf.name().to_string()),
|
||||
TreeNode::Branch(children) => {
|
||||
for child in children.values() {
|
||||
if let Some(name) = get_descendant_leaf_name(child) {
|
||||
return Some(name);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Infer the accumulated name at this level by analyzing what part of the descendant's name
|
||||
/// comes from the current field.
|
||||
fn infer_accumulated_name(parent_acc: &str, field_name: &str, descendant_leaf: &str) -> String {
|
||||
// Try to find field_name in the descendant's metric name
|
||||
if let Some(pos) = descendant_leaf.find(field_name) {
|
||||
// Extract the part that corresponds to this level
|
||||
if pos == 0 {
|
||||
// Field is at the start
|
||||
field_name.to_string()
|
||||
} else if pos > 0 && descendant_leaf.chars().nth(pos - 1) == Some('_') {
|
||||
// Field appears after underscore - this is likely an append
|
||||
if parent_acc.is_empty() {
|
||||
field_name.to_string()
|
||||
} else {
|
||||
format!("{}_{}", parent_acc, field_name)
|
||||
}
|
||||
} else {
|
||||
field_name.to_string()
|
||||
}
|
||||
} else {
|
||||
// Field name not directly found - use as is
|
||||
if parent_acc.is_empty() {
|
||||
field_name.to_string()
|
||||
} else {
|
||||
format!("{}_{}", parent_acc, field_name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Analyze instances to determine field positions (prepend/append/identity).
|
||||
fn analyze_field_positions_from_instances(
|
||||
instances: &[(String, String, String)],
|
||||
) -> HashMap<String, FieldNamePosition> {
|
||||
// Group by field name
|
||||
let mut field_instances: HashMap<String, Vec<(String, String)>> = HashMap::new();
|
||||
for (acc, field, leaf) in instances {
|
||||
field_instances
|
||||
.entry(field.clone())
|
||||
.or_default()
|
||||
.push((acc.clone(), leaf.clone()));
|
||||
}
|
||||
|
||||
let mut positions = HashMap::new();
|
||||
|
||||
for (field_name, field_data) in field_instances {
|
||||
if let Some(position) = detect_field_position(&field_data) {
|
||||
positions.insert(field_name, position);
|
||||
}
|
||||
}
|
||||
|
||||
positions
|
||||
}
|
||||
|
||||
/// Detect the position transformation for a field based on (accumulated, leaf_name) pairs.
|
||||
fn detect_field_position(data: &[(String, String)]) -> Option<FieldNamePosition> {
|
||||
if data.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Try to detect pattern from first instance, then validate against others
|
||||
let (first_acc, first_leaf) = &data[0];
|
||||
|
||||
// Case 1: Identity - leaf == accumulated
|
||||
if first_acc == first_leaf {
|
||||
return Some(FieldNamePosition::Identity);
|
||||
}
|
||||
|
||||
// Case 2: Append - leaf = acc + suffix
|
||||
if let Some(suffix) = first_leaf.strip_prefix(first_acc.as_str()) {
|
||||
let suffix = suffix.to_string();
|
||||
// Validate this pattern holds for all instances
|
||||
if data.iter().all(|(acc, leaf)| {
|
||||
if acc.is_empty() {
|
||||
// When acc is empty, leaf should equal suffix (without leading _)
|
||||
leaf == suffix.trim_start_matches('_')
|
||||
} else {
|
||||
leaf.strip_prefix(acc.as_str()) == Some(&suffix)
|
||||
}
|
||||
}) {
|
||||
return Some(FieldNamePosition::Append(suffix));
|
||||
}
|
||||
}
|
||||
|
||||
// Case 3: Prepend - leaf = prefix + acc
|
||||
if let Some(prefix) = first_leaf.strip_suffix(first_acc.as_str()) {
|
||||
let prefix = prefix.to_string();
|
||||
// Validate this pattern holds for all instances
|
||||
if data.iter().all(|(acc, leaf)| {
|
||||
if acc.is_empty() {
|
||||
// When acc is empty, leaf should equal prefix (without trailing _)
|
||||
leaf == prefix.trim_end_matches('_')
|
||||
} else {
|
||||
leaf.strip_suffix(acc.as_str()) == Some(&prefix)
|
||||
}
|
||||
}) {
|
||||
return Some(FieldNamePosition::Prepend(prefix));
|
||||
}
|
||||
}
|
||||
|
||||
// Case 4: SetBase - the field name IS the metric base
|
||||
// This happens at entry points where accumulated is empty
|
||||
if first_acc.is_empty() {
|
||||
return Some(FieldNamePosition::SetBase(first_leaf.clone()));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Get node fields for pattern matching during analysis
|
||||
fn get_node_fields_for_analysis(
|
||||
children: &BTreeMap<String, TreeNode>,
|
||||
pattern_lookup: &HashMap<Vec<PatternField>, String>,
|
||||
) -> Vec<PatternField> {
|
||||
let mut fields: Vec<PatternField> = children
|
||||
.iter()
|
||||
.map(|(name, node)| {
|
||||
let (rust_type, json_type, indexes) = match node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
schema_to_json_type(&leaf.schema),
|
||||
leaf.indexes().clone(),
|
||||
),
|
||||
TreeNode::Branch(grandchildren) => {
|
||||
let child_fields = get_node_fields_for_analysis(grandchildren, pattern_lookup);
|
||||
let pattern_name = pattern_lookup
|
||||
.get(&child_fields)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
(pattern_name.clone(), pattern_name, BTreeSet::new())
|
||||
}
|
||||
};
|
||||
PatternField {
|
||||
name: name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
fields.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
fields
|
||||
}
|
||||
|
||||
/// Recursively resolve branch patterns bottom-up.
|
||||
/// Returns the pattern name for this node if it's a branch, or None if it's a leaf.
|
||||
fn resolve_branch_patterns(
|
||||
node: &TreeNode,
|
||||
field_name: &str, // The field name in the parent where this node appears
|
||||
signature_to_pattern: &mut HashMap<Vec<PatternField>, String>,
|
||||
signature_counts: &mut HashMap<Vec<PatternField>, usize>,
|
||||
normalized_to_name: &mut HashMap<Vec<PatternField>, String>, // Normalized sig -> name
|
||||
name_counts: &mut HashMap<String, usize>,
|
||||
) -> Option<String> {
|
||||
match node {
|
||||
TreeNode::Leaf(_) => {
|
||||
// Leaves don't have patterns, return None
|
||||
None
|
||||
}
|
||||
TreeNode::Branch(children) => {
|
||||
// First, recursively resolve all children
|
||||
let mut fields: Vec<PatternField> = Vec::new();
|
||||
|
||||
for (child_name, child_node) in children {
|
||||
let (rust_type, json_type, indexes) = match child_node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
schema_to_json_type(&leaf.schema),
|
||||
leaf.indexes().clone(),
|
||||
),
|
||||
TreeNode::Branch(_) => {
|
||||
// Branch: recursively get its pattern name
|
||||
let pattern_name = resolve_branch_patterns(
|
||||
child_node,
|
||||
child_name,
|
||||
signature_to_pattern,
|
||||
signature_counts,
|
||||
normalized_to_name,
|
||||
name_counts,
|
||||
)
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
(pattern_name.clone(), pattern_name, BTreeSet::new())
|
||||
}
|
||||
};
|
||||
|
||||
fields.push(PatternField {
|
||||
name: child_name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
});
|
||||
}
|
||||
|
||||
// Sort fields by name for consistent signatures
|
||||
fields.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
|
||||
// Increment count for this signature
|
||||
*signature_counts.entry(fields.clone()).or_insert(0) += 1;
|
||||
|
||||
// Get or create pattern name - use normalized signature for naming
|
||||
// so patterns that differ only in value type get the same name
|
||||
let pattern_name = if let Some(existing) = signature_to_pattern.get(&fields) {
|
||||
existing.clone()
|
||||
} else {
|
||||
// Check if normalized form already has a name
|
||||
let normalized = normalize_fields_for_naming(&fields);
|
||||
let name = normalized_to_name
|
||||
.entry(normalized)
|
||||
.or_insert_with(|| generate_pattern_name(field_name, name_counts))
|
||||
.clone();
|
||||
signature_to_pattern.insert(fields.clone(), name.clone());
|
||||
name
|
||||
};
|
||||
|
||||
Some(pattern_name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalize fields for naming: replace value types with a placeholder
|
||||
/// so patterns with same structure but different value types get the same name.
|
||||
fn normalize_fields_for_naming(fields: &[PatternField]) -> Vec<PatternField> {
|
||||
fields
|
||||
.iter()
|
||||
.map(|f| {
|
||||
if f.indexes.is_empty() {
|
||||
// Branch field - keep rust_type (it's a pattern name)
|
||||
f.clone()
|
||||
} else {
|
||||
// Leaf field - normalize value type
|
||||
PatternField {
|
||||
name: f.name.clone(),
|
||||
rust_type: "_".to_string(),
|
||||
json_type: "_".to_string(),
|
||||
indexes: f.indexes.clone(),
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Generate a pattern name from the field name where it's used.
|
||||
/// Appends an index if the same base name is used multiple times.
|
||||
fn generate_pattern_name(field_name: &str, name_counts: &mut HashMap<String, usize>) -> String {
|
||||
let pascal = to_pascal_case(field_name);
|
||||
|
||||
// Sanitize: ensure it starts with a letter (prepend "_" if starts with digit)
|
||||
let sanitized = if pascal
|
||||
.chars()
|
||||
.next()
|
||||
.map(|c| c.is_ascii_digit())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
format!("_{}", pascal)
|
||||
} else {
|
||||
pascal
|
||||
};
|
||||
|
||||
// Add "Pattern" suffix to avoid conflicts with type aliases (e.g., Sats = int vs class Sats)
|
||||
let base_name = format!("{}Pattern", sanitized);
|
||||
|
||||
// Track usage count and append index if needed
|
||||
let count = name_counts.entry(base_name.clone()).or_insert(0);
|
||||
*count += 1;
|
||||
|
||||
if *count == 1 {
|
||||
base_name
|
||||
} else {
|
||||
format!("{}{}", base_name, count)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract JSON type from JSON Schema
|
||||
fn schema_to_json_type(schema: &serde_json::Value) -> String {
|
||||
if let Some(ty) = schema.get("type").and_then(|v| v.as_str()) {
|
||||
ty.to_string()
|
||||
} else {
|
||||
"object".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the field signature for a branch node's children
|
||||
pub fn get_node_fields(
|
||||
children: &std::collections::BTreeMap<String, TreeNode>,
|
||||
pattern_lookup: &HashMap<Vec<PatternField>, String>,
|
||||
) -> Vec<PatternField> {
|
||||
let mut fields: Vec<PatternField> = children
|
||||
.iter()
|
||||
.map(|(name, node)| {
|
||||
let (rust_type, json_type, indexes) = match node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
schema_to_json_type(&leaf.schema),
|
||||
leaf.indexes().clone(),
|
||||
),
|
||||
TreeNode::Branch(grandchildren) => {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
let pattern_name = pattern_lookup
|
||||
.get(&child_fields)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
(pattern_name.clone(), pattern_name, BTreeSet::new())
|
||||
}
|
||||
};
|
||||
PatternField {
|
||||
name: name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
fields.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
fields
|
||||
}
|
||||
|
||||
/// Convert a metric name to PascalCase (for struct/class names)
|
||||
pub fn to_pascal_case(s: &str) -> String {
|
||||
// Normalize separators: replace - with _
|
||||
let normalized = s.replace('-', "_");
|
||||
normalized
|
||||
.split('_')
|
||||
.map(|word| {
|
||||
let mut chars = word.chars();
|
||||
match chars.next() {
|
||||
None => String::new(),
|
||||
Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Convert a metric name to snake_case (already snake_case, but sanitize)
|
||||
pub fn to_snake_case(s: &str) -> String {
|
||||
let sanitized = s.replace('-', "_");
|
||||
|
||||
// Prefix with _ if starts with digit
|
||||
let sanitized = if sanitized
|
||||
.chars()
|
||||
.next()
|
||||
.map(|c| c.is_ascii_digit())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
format!("_{}", sanitized)
|
||||
} else {
|
||||
sanitized
|
||||
};
|
||||
|
||||
// Handle Rust keywords
|
||||
match sanitized.as_str() {
|
||||
"type" | "const" | "static" | "match" | "if" | "else" | "loop" | "while" | "for"
|
||||
| "break" | "continue" | "return" | "fn" | "let" | "mut" | "ref" | "self" | "super"
|
||||
| "mod" | "use" | "pub" | "crate" | "extern" | "impl" | "trait" | "struct" | "enum"
|
||||
| "where" | "async" | "await" | "dyn" | "move" => format!("r#{}", sanitized),
|
||||
_ => sanitized,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a metric name to camelCase (for JS/TS)
|
||||
pub fn to_camel_case(s: &str) -> String {
|
||||
let pascal = to_pascal_case(s);
|
||||
let mut chars = pascal.chars();
|
||||
let result = match chars.next() {
|
||||
None => String::new(),
|
||||
Some(first) => first.to_lowercase().collect::<String>() + chars.as_str(),
|
||||
};
|
||||
|
||||
// Prefix with _ if starts with digit
|
||||
if result
|
||||
.chars()
|
||||
.next()
|
||||
.map(|c| c.is_ascii_digit())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
format!("_{}", result)
|
||||
} else {
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the first leaf name from a tree node (used across all generators)
|
||||
pub fn get_first_leaf_name(node: &TreeNode) -> Option<String> {
|
||||
match node {
|
||||
TreeNode::Leaf(leaf) => Some(leaf.name().to_string()),
|
||||
TreeNode::Branch(children) => {
|
||||
for child in children.values() {
|
||||
if let Some(name) = get_first_leaf_name(child) {
|
||||
return Some(name);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the metric base for a pattern instance by analyzing the first leaf descendant.
|
||||
/// This extracts the common base that all leaves in this pattern instance share.
|
||||
pub fn get_pattern_instance_base(node: &TreeNode, field_name: &str) -> String {
|
||||
if let Some(leaf_name) = get_first_leaf_name(node) {
|
||||
// Look for field_name in the leaf metric name
|
||||
if leaf_name.contains(field_name) {
|
||||
// The field name is part of the metric - use it as base
|
||||
return field_name.to_string();
|
||||
}
|
||||
}
|
||||
// Fallback: use field name
|
||||
field_name.to_string()
|
||||
}
|
||||
|
||||
/// Detect index patterns - collect all indexes and find sets that appear 2+ times
|
||||
fn detect_index_patterns(tree: &TreeNode) -> (BTreeSet<Index>, Vec<IndexSetPattern>) {
|
||||
let mut used_indexes: BTreeSet<Index> = BTreeSet::new();
|
||||
let mut index_sets: Vec<BTreeSet<Index>> = Vec::new();
|
||||
|
||||
// Traverse tree and collect index information from leaves
|
||||
collect_indexes_from_tree(tree, &mut used_indexes, &mut index_sets);
|
||||
|
||||
// Count occurrences of each unique index set
|
||||
let mut index_set_counts: Vec<(BTreeSet<Index>, usize)> = Vec::new();
|
||||
for index_set in index_sets {
|
||||
if let Some(entry) = index_set_counts.iter_mut().find(|(s, _)| s == &index_set) {
|
||||
entry.1 += 1;
|
||||
} else {
|
||||
index_set_counts.push((index_set, 1));
|
||||
}
|
||||
}
|
||||
|
||||
// Build patterns for index sets appearing 2+ times
|
||||
let mut patterns: Vec<IndexSetPattern> = index_set_counts
|
||||
.into_iter()
|
||||
.filter(|(indexes, count)| *count >= 2 && !indexes.is_empty())
|
||||
.enumerate()
|
||||
.map(|(i, (indexes, _))| IndexSetPattern {
|
||||
name: if i == 0 {
|
||||
"Indexes".to_string()
|
||||
} else {
|
||||
format!("Indexes{}", i + 1)
|
||||
},
|
||||
indexes,
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by number of indexes descending
|
||||
patterns.sort_by(|a, b| b.indexes.len().cmp(&a.indexes.len()));
|
||||
|
||||
(used_indexes, patterns)
|
||||
}
|
||||
|
||||
/// Recursively collect indexes from tree leaves
|
||||
fn collect_indexes_from_tree(
|
||||
node: &TreeNode,
|
||||
used_indexes: &mut BTreeSet<Index>,
|
||||
index_sets: &mut Vec<BTreeSet<Index>>,
|
||||
) {
|
||||
match node {
|
||||
TreeNode::Leaf(leaf) => {
|
||||
// Add all indexes from this leaf to the global set
|
||||
used_indexes.extend(leaf.indexes().iter().cloned());
|
||||
// Collect this index set
|
||||
index_sets.push(leaf.indexes().clone());
|
||||
}
|
||||
TreeNode::Branch(children) => {
|
||||
for child in children.values() {
|
||||
collect_indexes_from_tree(child, used_indexes, index_sets);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
//! Case conversion utilities for identifiers.
|
||||
|
||||
/// Convert a string to PascalCase (e.g., "fee_rate" -> "FeeRate").
|
||||
pub fn to_pascal_case(s: &str) -> String {
|
||||
s.replace('-', "_")
|
||||
.split('_')
|
||||
.map(|word| {
|
||||
let mut chars = word.chars();
|
||||
match chars.next() {
|
||||
None => String::new(),
|
||||
Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Convert a string to snake_case, handling Rust keywords.
|
||||
pub fn to_snake_case(s: &str) -> String {
|
||||
let sanitized = s.replace('-', "_");
|
||||
|
||||
// Prefix with _ if starts with digit
|
||||
let sanitized = if sanitized.chars().next().is_some_and(|c| c.is_ascii_digit()) {
|
||||
format!("_{}", sanitized)
|
||||
} else {
|
||||
sanitized
|
||||
};
|
||||
|
||||
// Handle Rust keywords
|
||||
match sanitized.as_str() {
|
||||
"type" | "const" | "static" | "match" | "if" | "else" | "loop" | "while" | "for"
|
||||
| "break" | "continue" | "return" | "fn" | "let" | "mut" | "ref" | "self" | "super"
|
||||
| "mod" | "use" | "pub" | "crate" | "extern" | "impl" | "trait" | "struct" | "enum"
|
||||
| "where" | "async" | "await" | "dyn" | "move" => format!("r#{}", sanitized),
|
||||
_ => sanitized,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a string to camelCase (e.g., "fee_rate" -> "feeRate").
|
||||
pub fn to_camel_case(s: &str) -> String {
|
||||
let pascal = to_pascal_case(s);
|
||||
let mut chars = pascal.chars();
|
||||
|
||||
let result = match chars.next() {
|
||||
None => String::new(),
|
||||
Some(first) => first.to_lowercase().collect::<String>() + chars.as_str(),
|
||||
};
|
||||
|
||||
// Prefix with _ if starts with digit
|
||||
if result.chars().next().is_some_and(|c| c.is_ascii_digit()) {
|
||||
format!("_{}", result)
|
||||
} else {
|
||||
result
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
//! Types and utilities for client generation.
|
||||
|
||||
mod case;
|
||||
mod patterns;
|
||||
mod schema;
|
||||
mod tree;
|
||||
|
||||
pub use case::*;
|
||||
pub use schema::*;
|
||||
pub use tree::*;
|
||||
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
|
||||
use brk_query::Vecs;
|
||||
use brk_types::Index;
|
||||
|
||||
/// How a field modifies the accumulated metric name.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum FieldNamePosition {
|
||||
/// Field prepends a prefix: leaf.name() = prefix + accumulated
|
||||
Prepend(String),
|
||||
/// Field appends a suffix: leaf.name() = accumulated + suffix
|
||||
Append(String),
|
||||
/// Field IS the accumulated name (no modification)
|
||||
Identity,
|
||||
/// Field sets a new base name (used at pattern entry points)
|
||||
SetBase(String),
|
||||
}
|
||||
|
||||
/// Metadata extracted from brk_query for client generation.
|
||||
#[derive(Debug)]
|
||||
pub struct ClientMetadata {
|
||||
/// The catalog tree structure (with schemas in leaves)
|
||||
pub catalog: brk_types::TreeNode,
|
||||
/// Structural patterns - tree node shapes that repeat
|
||||
pub structural_patterns: Vec<StructuralPattern>,
|
||||
/// All indexes used across the catalog
|
||||
pub used_indexes: BTreeSet<Index>,
|
||||
/// Index set patterns - sets of indexes that appear together on metrics
|
||||
pub index_set_patterns: Vec<IndexSetPattern>,
|
||||
/// Maps concrete field signatures to pattern names
|
||||
pub concrete_to_pattern: HashMap<Vec<PatternField>, String>,
|
||||
}
|
||||
|
||||
impl ClientMetadata {
|
||||
/// Extract metadata from brk_query::Vecs.
|
||||
pub fn from_vecs(vecs: &Vecs) -> Self {
|
||||
let catalog = vecs.catalog().clone();
|
||||
let (structural_patterns, concrete_to_pattern) =
|
||||
patterns::detect_structural_patterns(&catalog);
|
||||
let (used_indexes, index_set_patterns) = tree::detect_index_patterns(&catalog);
|
||||
|
||||
ClientMetadata {
|
||||
catalog,
|
||||
structural_patterns,
|
||||
used_indexes,
|
||||
index_set_patterns,
|
||||
concrete_to_pattern,
|
||||
}
|
||||
}
|
||||
|
||||
/// Find an index set pattern that matches the given indexes.
|
||||
pub fn find_index_set_pattern(&self, indexes: &BTreeSet<Index>) -> Option<&IndexSetPattern> {
|
||||
self.index_set_patterns
|
||||
.iter()
|
||||
.find(|p| &p.indexes == indexes)
|
||||
}
|
||||
|
||||
/// Check if a type is a structural pattern name.
|
||||
pub fn is_pattern_type(&self, type_name: &str) -> bool {
|
||||
self.structural_patterns.iter().any(|p| p.name == type_name)
|
||||
}
|
||||
|
||||
/// Find a pattern by name.
|
||||
pub fn find_pattern(&self, name: &str) -> Option<&StructuralPattern> {
|
||||
self.structural_patterns.iter().find(|p| p.name == name)
|
||||
}
|
||||
|
||||
/// Check if a pattern is generic.
|
||||
pub fn is_pattern_generic(&self, name: &str) -> bool {
|
||||
self.find_pattern(name).is_some_and(|p| p.is_generic)
|
||||
}
|
||||
|
||||
/// Extract the value type from concrete fields for a generic pattern.
|
||||
pub fn get_generic_value_type(
|
||||
&self,
|
||||
pattern_name: &str,
|
||||
fields: &[PatternField],
|
||||
) -> Option<String> {
|
||||
if !self.is_pattern_generic(pattern_name) {
|
||||
return None;
|
||||
}
|
||||
fields
|
||||
.iter()
|
||||
.find(|f| f.is_leaf())
|
||||
.map(|f| extract_inner_type(&f.rust_type))
|
||||
}
|
||||
|
||||
/// Build a lookup map from field signatures to pattern names.
|
||||
pub fn pattern_lookup(&self) -> HashMap<Vec<PatternField>, String> {
|
||||
let mut lookup = self.concrete_to_pattern.clone();
|
||||
for p in &self.structural_patterns {
|
||||
lookup.insert(p.fields.clone(), p.name.clone());
|
||||
}
|
||||
lookup
|
||||
}
|
||||
|
||||
/// Check if a field should use a shared index accessor.
|
||||
pub fn field_uses_accessor(&self, field: &PatternField) -> bool {
|
||||
self.find_index_set_pattern(&field.indexes).is_some()
|
||||
}
|
||||
}
|
||||
|
||||
/// A pattern of indexes that appear together on multiple metrics.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IndexSetPattern {
|
||||
/// Pattern name (e.g., "DateHeightIndexes")
|
||||
pub name: String,
|
||||
/// The set of indexes
|
||||
pub indexes: BTreeSet<Index>,
|
||||
}
|
||||
|
||||
/// A structural pattern - a branch structure that appears multiple times.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StructuralPattern {
|
||||
/// Pattern name
|
||||
pub name: String,
|
||||
/// Ordered list of child fields
|
||||
pub fields: Vec<PatternField>,
|
||||
/// How each field modifies the accumulated name
|
||||
pub field_positions: HashMap<String, FieldNamePosition>,
|
||||
/// If true, all leaf fields use a type parameter T
|
||||
pub is_generic: bool,
|
||||
}
|
||||
|
||||
impl StructuralPattern {
|
||||
/// Returns true if this pattern contains any leaf fields.
|
||||
pub fn contains_leaves(&self) -> bool {
|
||||
self.fields.iter().any(|f| f.is_leaf())
|
||||
}
|
||||
|
||||
/// Returns true if all leaf fields have consistent name transformations.
|
||||
pub fn is_parameterizable(&self) -> bool {
|
||||
!self.field_positions.is_empty()
|
||||
&& self
|
||||
.fields
|
||||
.iter()
|
||||
.all(|f| f.is_branch() || self.field_positions.contains_key(&f.name))
|
||||
}
|
||||
|
||||
/// Get the field position for a given field name.
|
||||
pub fn get_field_position(&self, field_name: &str) -> Option<&FieldNamePosition> {
|
||||
self.field_positions.get(field_name)
|
||||
}
|
||||
}
|
||||
|
||||
/// A field in a structural pattern.
|
||||
#[derive(Debug, Clone, PartialOrd, Ord)]
|
||||
pub struct PatternField {
|
||||
/// Field name
|
||||
pub name: String,
|
||||
/// Rust type for leaves or pattern name for branches
|
||||
pub rust_type: String,
|
||||
/// JSON type from schema
|
||||
pub json_type: String,
|
||||
/// For leaves: the set of supported indexes. Empty for branches.
|
||||
pub indexes: BTreeSet<Index>,
|
||||
}
|
||||
|
||||
impl PatternField {
|
||||
/// Returns true if this is a leaf field (has indexes).
|
||||
pub fn is_leaf(&self) -> bool {
|
||||
!self.indexes.is_empty()
|
||||
}
|
||||
|
||||
/// Returns true if this is a branch field (no indexes).
|
||||
pub fn is_branch(&self) -> bool {
|
||||
self.indexes.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::hash::Hash for PatternField {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.name.hash(state);
|
||||
self.rust_type.hash(state);
|
||||
self.json_type.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for PatternField {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.rust_type == other.rust_type
|
||||
&& self.json_type == other.json_type
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for PatternField {}
|
||||
@@ -0,0 +1,427 @@
|
||||
//! Pattern detection for structural patterns in the metric tree.
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
|
||||
use brk_types::TreeNode;
|
||||
|
||||
use super::{
|
||||
case::to_pascal_case, schema::schema_to_json_type, FieldNamePosition, PatternField,
|
||||
StructuralPattern,
|
||||
};
|
||||
|
||||
/// Detect structural patterns in the tree using a bottom-up approach.
|
||||
/// Returns (patterns, concrete_to_pattern_mapping).
|
||||
pub fn detect_structural_patterns(
|
||||
tree: &TreeNode,
|
||||
) -> (Vec<StructuralPattern>, HashMap<Vec<PatternField>, String>) {
|
||||
let mut signature_to_pattern: HashMap<Vec<PatternField>, String> = HashMap::new();
|
||||
let mut signature_counts: HashMap<Vec<PatternField>, usize> = HashMap::new();
|
||||
let mut normalized_to_name: HashMap<Vec<PatternField>, String> = HashMap::new();
|
||||
let mut name_counts: HashMap<String, usize> = HashMap::new();
|
||||
|
||||
// Process tree bottom-up to resolve all branch types
|
||||
resolve_branch_patterns(
|
||||
tree,
|
||||
"root",
|
||||
&mut signature_to_pattern,
|
||||
&mut signature_counts,
|
||||
&mut normalized_to_name,
|
||||
&mut name_counts,
|
||||
);
|
||||
|
||||
// Identify generic patterns
|
||||
let (generic_patterns, generic_mappings) = detect_generic_patterns(&signature_to_pattern);
|
||||
|
||||
// Build non-generic patterns: signatures appearing 2+ times that weren't merged into generics
|
||||
let mut patterns: Vec<StructuralPattern> = signature_to_pattern
|
||||
.iter()
|
||||
.filter(|(sig, _)| {
|
||||
signature_counts.get(*sig).copied().unwrap_or(0) >= 2
|
||||
&& !generic_mappings.contains_key(*sig)
|
||||
})
|
||||
.map(|(fields, name)| StructuralPattern {
|
||||
name: name.clone(),
|
||||
fields: fields.clone(),
|
||||
field_positions: HashMap::new(),
|
||||
is_generic: false,
|
||||
})
|
||||
.collect();
|
||||
|
||||
patterns.extend(generic_patterns);
|
||||
|
||||
// Build lookup for field position analysis
|
||||
let mut pattern_lookup: HashMap<Vec<PatternField>, String> = HashMap::new();
|
||||
for (sig, name) in &signature_to_pattern {
|
||||
if signature_counts.get(sig).copied().unwrap_or(0) >= 2 {
|
||||
pattern_lookup.insert(sig.clone(), name.clone());
|
||||
}
|
||||
}
|
||||
pattern_lookup.extend(generic_mappings.clone());
|
||||
|
||||
let concrete_to_pattern = pattern_lookup.clone();
|
||||
|
||||
// Second pass: analyze field positions
|
||||
analyze_pattern_field_positions(tree, &mut patterns, &pattern_lookup);
|
||||
|
||||
patterns.sort_by(|a, b| b.fields.len().cmp(&a.fields.len()));
|
||||
(patterns, concrete_to_pattern)
|
||||
}
|
||||
|
||||
/// Detect generic patterns by grouping signatures by their normalized form.
|
||||
fn detect_generic_patterns(
|
||||
signature_to_pattern: &HashMap<Vec<PatternField>, String>,
|
||||
) -> (Vec<StructuralPattern>, HashMap<Vec<PatternField>, String>) {
|
||||
let mut normalized_groups: HashMap<Vec<PatternField>, Vec<(Vec<PatternField>, String)>> =
|
||||
HashMap::new();
|
||||
|
||||
for (fields, name) in signature_to_pattern {
|
||||
if let Some(normalized) = normalize_fields_for_generic(fields) {
|
||||
normalized_groups
|
||||
.entry(normalized)
|
||||
.or_default()
|
||||
.push((fields.clone(), name.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
let mut patterns = Vec::new();
|
||||
let mut mappings: HashMap<Vec<PatternField>, String> = HashMap::new();
|
||||
|
||||
for (normalized_fields, group) in normalized_groups {
|
||||
if group.len() >= 2 {
|
||||
let generic_name = group[0].1.clone();
|
||||
for (concrete_fields, _) in &group {
|
||||
mappings.insert(concrete_fields.clone(), generic_name.clone());
|
||||
}
|
||||
patterns.push(StructuralPattern {
|
||||
name: generic_name,
|
||||
fields: normalized_fields,
|
||||
field_positions: HashMap::new(),
|
||||
is_generic: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
(patterns, mappings)
|
||||
}
|
||||
|
||||
/// Normalize fields by replacing concrete value types with "T".
|
||||
fn normalize_fields_for_generic(fields: &[PatternField]) -> Option<Vec<PatternField>> {
|
||||
let leaf_types: Vec<&str> = fields
|
||||
.iter()
|
||||
.filter(|f| f.is_leaf())
|
||||
.map(|f| f.rust_type.as_str())
|
||||
.collect();
|
||||
|
||||
if leaf_types.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let first_type = leaf_types[0];
|
||||
if !leaf_types.iter().all(|t| *t == first_type) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let normalized = fields
|
||||
.iter()
|
||||
.map(|f| {
|
||||
if f.is_branch() {
|
||||
f.clone()
|
||||
} else {
|
||||
PatternField {
|
||||
name: f.name.clone(),
|
||||
rust_type: "T".to_string(),
|
||||
json_type: "T".to_string(),
|
||||
indexes: f.indexes.clone(),
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Some(normalized)
|
||||
}
|
||||
|
||||
/// Recursively resolve branch patterns bottom-up.
|
||||
fn resolve_branch_patterns(
|
||||
node: &TreeNode,
|
||||
field_name: &str,
|
||||
signature_to_pattern: &mut HashMap<Vec<PatternField>, String>,
|
||||
signature_counts: &mut HashMap<Vec<PatternField>, usize>,
|
||||
normalized_to_name: &mut HashMap<Vec<PatternField>, String>,
|
||||
name_counts: &mut HashMap<String, usize>,
|
||||
) -> Option<String> {
|
||||
let TreeNode::Branch(children) = node else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let mut fields: Vec<PatternField> = Vec::new();
|
||||
for (child_name, child_node) in children {
|
||||
let (rust_type, json_type, indexes) = match child_node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
schema_to_json_type(&leaf.schema),
|
||||
leaf.indexes().clone(),
|
||||
),
|
||||
TreeNode::Branch(_) => {
|
||||
let pattern_name = resolve_branch_patterns(
|
||||
child_node,
|
||||
child_name,
|
||||
signature_to_pattern,
|
||||
signature_counts,
|
||||
normalized_to_name,
|
||||
name_counts,
|
||||
)
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
(pattern_name.clone(), pattern_name, BTreeSet::new())
|
||||
}
|
||||
};
|
||||
fields.push(PatternField {
|
||||
name: child_name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
});
|
||||
}
|
||||
|
||||
fields.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
*signature_counts.entry(fields.clone()).or_insert(0) += 1;
|
||||
|
||||
let pattern_name = if let Some(existing) = signature_to_pattern.get(&fields) {
|
||||
existing.clone()
|
||||
} else {
|
||||
let normalized = normalize_fields_for_naming(&fields);
|
||||
let name = normalized_to_name
|
||||
.entry(normalized)
|
||||
.or_insert_with(|| generate_pattern_name(field_name, name_counts))
|
||||
.clone();
|
||||
signature_to_pattern.insert(fields, name.clone());
|
||||
name
|
||||
};
|
||||
|
||||
Some(pattern_name)
|
||||
}
|
||||
|
||||
/// Normalize fields for naming (same structure = same name).
|
||||
fn normalize_fields_for_naming(fields: &[PatternField]) -> Vec<PatternField> {
|
||||
fields
|
||||
.iter()
|
||||
.map(|f| {
|
||||
if f.is_branch() {
|
||||
f.clone()
|
||||
} else {
|
||||
PatternField {
|
||||
name: f.name.clone(),
|
||||
rust_type: "_".to_string(),
|
||||
json_type: "_".to_string(),
|
||||
indexes: f.indexes.clone(),
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Generate a unique pattern name.
|
||||
fn generate_pattern_name(field_name: &str, name_counts: &mut HashMap<String, usize>) -> String {
|
||||
let pascal = to_pascal_case(field_name);
|
||||
let sanitized = if pascal.chars().next().is_some_and(|c| c.is_ascii_digit()) {
|
||||
format!("_{}", pascal)
|
||||
} else {
|
||||
pascal
|
||||
};
|
||||
|
||||
let base_name = format!("{}Pattern", sanitized);
|
||||
let count = name_counts.entry(base_name.clone()).or_insert(0);
|
||||
*count += 1;
|
||||
|
||||
if *count == 1 {
|
||||
base_name
|
||||
} else {
|
||||
format!("{}{}", base_name, count)
|
||||
}
|
||||
}
|
||||
|
||||
// Field position analysis
|
||||
|
||||
fn analyze_pattern_field_positions(
|
||||
tree: &TreeNode,
|
||||
patterns: &mut [StructuralPattern],
|
||||
pattern_lookup: &HashMap<Vec<PatternField>, String>,
|
||||
) {
|
||||
let mut instances: HashMap<String, Vec<(String, String, String)>> = HashMap::new();
|
||||
collect_pattern_instances(tree, "", &mut instances, pattern_lookup);
|
||||
|
||||
for pattern in patterns.iter_mut() {
|
||||
if let Some(pattern_instances) = instances.get(&pattern.name) {
|
||||
pattern.field_positions = analyze_field_positions_from_instances(pattern_instances);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_pattern_instances(
|
||||
node: &TreeNode,
|
||||
accumulated_name: &str,
|
||||
instances: &mut HashMap<String, Vec<(String, String, String)>>,
|
||||
pattern_lookup: &HashMap<Vec<PatternField>, String>,
|
||||
) {
|
||||
let TreeNode::Branch(children) = node else {
|
||||
return;
|
||||
};
|
||||
|
||||
let fields = get_node_fields_for_analysis(children, pattern_lookup);
|
||||
if let Some(pattern_name) = pattern_lookup.get(&fields) {
|
||||
for (field_name, child_node) in children {
|
||||
if let TreeNode::Leaf(leaf) = child_node {
|
||||
instances.entry(pattern_name.clone()).or_default().push((
|
||||
accumulated_name.to_string(),
|
||||
field_name.clone(),
|
||||
leaf.name().to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (field_name, child_node) in children {
|
||||
let child_accumulated = match child_node {
|
||||
TreeNode::Leaf(leaf) => leaf.name().to_string(),
|
||||
TreeNode::Branch(_) => {
|
||||
if let Some(desc_leaf_name) = get_descendant_leaf_name(child_node) {
|
||||
infer_accumulated_name(accumulated_name, field_name, &desc_leaf_name)
|
||||
} else if accumulated_name.is_empty() {
|
||||
field_name.clone()
|
||||
} else {
|
||||
format!("{}_{}", accumulated_name, field_name)
|
||||
}
|
||||
}
|
||||
};
|
||||
collect_pattern_instances(child_node, &child_accumulated, instances, pattern_lookup);
|
||||
}
|
||||
}
|
||||
|
||||
fn get_descendant_leaf_name(node: &TreeNode) -> Option<String> {
|
||||
match node {
|
||||
TreeNode::Leaf(leaf) => Some(leaf.name().to_string()),
|
||||
TreeNode::Branch(children) => children.values().find_map(get_descendant_leaf_name),
|
||||
}
|
||||
}
|
||||
|
||||
fn infer_accumulated_name(parent_acc: &str, field_name: &str, descendant_leaf: &str) -> String {
|
||||
if let Some(pos) = descendant_leaf.find(field_name) {
|
||||
if pos == 0 {
|
||||
return field_name.to_string();
|
||||
}
|
||||
if pos > 0 && descendant_leaf.chars().nth(pos - 1) == Some('_') {
|
||||
return if parent_acc.is_empty() {
|
||||
field_name.to_string()
|
||||
} else {
|
||||
format!("{}_{}", parent_acc, field_name)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if parent_acc.is_empty() {
|
||||
field_name.to_string()
|
||||
} else {
|
||||
format!("{}_{}", parent_acc, field_name)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_node_fields_for_analysis(
|
||||
children: &BTreeMap<String, TreeNode>,
|
||||
pattern_lookup: &HashMap<Vec<PatternField>, String>,
|
||||
) -> Vec<PatternField> {
|
||||
let mut fields: Vec<PatternField> = children
|
||||
.iter()
|
||||
.map(|(name, node)| {
|
||||
let (rust_type, json_type, indexes) = match node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
schema_to_json_type(&leaf.schema),
|
||||
leaf.indexes().clone(),
|
||||
),
|
||||
TreeNode::Branch(grandchildren) => {
|
||||
let child_fields = get_node_fields_for_analysis(grandchildren, pattern_lookup);
|
||||
let pattern_name = pattern_lookup
|
||||
.get(&child_fields)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
(pattern_name.clone(), pattern_name, BTreeSet::new())
|
||||
}
|
||||
};
|
||||
PatternField {
|
||||
name: name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
fields.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
fields
|
||||
}
|
||||
|
||||
fn analyze_field_positions_from_instances(
|
||||
instances: &[(String, String, String)],
|
||||
) -> HashMap<String, FieldNamePosition> {
|
||||
let mut field_instances: HashMap<String, Vec<(String, String)>> = HashMap::new();
|
||||
for (acc, field, leaf) in instances {
|
||||
field_instances
|
||||
.entry(field.clone())
|
||||
.or_default()
|
||||
.push((acc.clone(), leaf.clone()));
|
||||
}
|
||||
|
||||
let mut positions = HashMap::new();
|
||||
for (field_name, field_data) in field_instances {
|
||||
if let Some(position) = detect_field_position(&field_data) {
|
||||
positions.insert(field_name, position);
|
||||
}
|
||||
}
|
||||
positions
|
||||
}
|
||||
|
||||
fn detect_field_position(data: &[(String, String)]) -> Option<FieldNamePosition> {
|
||||
if data.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (first_acc, first_leaf) = &data[0];
|
||||
|
||||
// Identity
|
||||
if first_acc == first_leaf {
|
||||
return Some(FieldNamePosition::Identity);
|
||||
}
|
||||
|
||||
// Append
|
||||
if let Some(suffix) = first_leaf.strip_prefix(first_acc.as_str()) {
|
||||
let suffix = suffix.to_string();
|
||||
if data.iter().all(|(acc, leaf)| {
|
||||
if acc.is_empty() {
|
||||
leaf == suffix.trim_start_matches('_')
|
||||
} else {
|
||||
leaf.strip_prefix(acc.as_str()) == Some(&suffix)
|
||||
}
|
||||
}) {
|
||||
return Some(FieldNamePosition::Append(suffix));
|
||||
}
|
||||
}
|
||||
|
||||
// Prepend
|
||||
if let Some(prefix) = first_leaf.strip_suffix(first_acc.as_str()) {
|
||||
let prefix = prefix.to_string();
|
||||
if data.iter().all(|(acc, leaf)| {
|
||||
if acc.is_empty() {
|
||||
leaf == prefix.trim_end_matches('_')
|
||||
} else {
|
||||
leaf.strip_suffix(acc.as_str()) == Some(&prefix)
|
||||
}
|
||||
}) {
|
||||
return Some(FieldNamePosition::Prepend(prefix));
|
||||
}
|
||||
}
|
||||
|
||||
// SetBase
|
||||
if first_acc.is_empty() {
|
||||
return Some(FieldNamePosition::SetBase(first_leaf.clone()));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
//! JSON Schema utilities.
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
/// Unwrap allOf with a single element, returning the inner schema.
|
||||
/// Schemars uses allOf for composition, but often with just one $ref.
|
||||
pub fn unwrap_allof(schema: &Value) -> &Value {
|
||||
if let Some(all_of) = schema.get("allOf").and_then(|v| v.as_array())
|
||||
&& all_of.len() == 1
|
||||
{
|
||||
return &all_of[0];
|
||||
}
|
||||
schema
|
||||
}
|
||||
|
||||
/// Check if a schema represents an enum type.
|
||||
/// Enums have either an "enum" array or "oneOf" without properties.
|
||||
pub fn is_enum_schema(schema: &Value) -> bool {
|
||||
schema.get("enum").is_some()
|
||||
|| (schema.get("oneOf").is_some() && schema.get("properties").is_none())
|
||||
}
|
||||
|
||||
/// Extract inner type from a wrapper generic like `Close<Dollars>` -> `Dollars`.
|
||||
/// Also handles malformed types like `Dollars>` (from vecdb's short_type_name).
|
||||
pub fn extract_inner_type(type_str: &str) -> String {
|
||||
// Handle proper generic wrappers like `Close<Dollars>` -> `Dollars`
|
||||
if let Some(start) = type_str.find('<')
|
||||
&& let Some(end) = type_str.rfind('>')
|
||||
&& start < end
|
||||
{
|
||||
return type_str[start + 1..end].to_string();
|
||||
}
|
||||
// Handle malformed types like `Dollars>` (trailing > without <)
|
||||
if type_str.ends_with('>') && !type_str.contains('<') {
|
||||
return type_str.trim_end_matches('>').to_string();
|
||||
}
|
||||
type_str.to_string()
|
||||
}
|
||||
|
||||
/// Extract JSON type from a schema ("integer", "number", "string", etc).
|
||||
pub fn schema_to_json_type(schema: &Value) -> String {
|
||||
schema
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("object")
|
||||
.to_string()
|
||||
}
|
||||
@@ -0,0 +1,194 @@
|
||||
//! Tree traversal utilities.
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
|
||||
use brk_types::{Index, TreeNode};
|
||||
|
||||
use super::{PatternField, case::to_pascal_case, schema::schema_to_json_type};
|
||||
|
||||
/// Get the first leaf name from a tree node.
|
||||
pub fn get_first_leaf_name(node: &TreeNode) -> Option<String> {
|
||||
match node {
|
||||
TreeNode::Leaf(leaf) => Some(leaf.name().to_string()),
|
||||
TreeNode::Branch(children) => children.values().find_map(get_first_leaf_name),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the metric base for a pattern instance by analyzing the first leaf descendant.
|
||||
pub fn get_pattern_instance_base(node: &TreeNode, field_name: &str) -> String {
|
||||
if let Some(leaf_name) = get_first_leaf_name(node)
|
||||
&& leaf_name.contains(field_name)
|
||||
{
|
||||
return field_name.to_string();
|
||||
}
|
||||
field_name.to_string()
|
||||
}
|
||||
|
||||
/// Get the field signature for a branch node's children.
|
||||
pub fn get_node_fields(
|
||||
children: &BTreeMap<String, TreeNode>,
|
||||
pattern_lookup: &HashMap<Vec<PatternField>, String>,
|
||||
) -> Vec<PatternField> {
|
||||
let mut fields: Vec<PatternField> = children
|
||||
.iter()
|
||||
.map(|(name, node)| {
|
||||
let (rust_type, json_type, indexes) = match node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
schema_to_json_type(&leaf.schema),
|
||||
leaf.indexes().clone(),
|
||||
),
|
||||
TreeNode::Branch(grandchildren) => {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
let pattern_name = pattern_lookup
|
||||
.get(&child_fields)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
(pattern_name.clone(), pattern_name, BTreeSet::new())
|
||||
}
|
||||
};
|
||||
PatternField {
|
||||
name: name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
fields.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
fields
|
||||
}
|
||||
|
||||
/// Like get_node_fields but takes a parent name for generating child pattern names.
|
||||
pub fn get_node_fields_with_parent(
|
||||
children: &BTreeMap<String, TreeNode>,
|
||||
parent_name: &str,
|
||||
pattern_lookup: &HashMap<Vec<PatternField>, String>,
|
||||
) -> Vec<PatternField> {
|
||||
let mut fields: Vec<PatternField> = children
|
||||
.iter()
|
||||
.map(|(name, node)| {
|
||||
let (rust_type, json_type, indexes) = match node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
schema_to_json_type(&leaf.schema),
|
||||
leaf.indexes().clone(),
|
||||
),
|
||||
TreeNode::Branch(grandchildren) => {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
let pattern_name = pattern_lookup
|
||||
.get(&child_fields)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| format!("{}_{}", parent_name, to_pascal_case(name)));
|
||||
(pattern_name.clone(), pattern_name, BTreeSet::new())
|
||||
}
|
||||
};
|
||||
PatternField {
|
||||
name: name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
fields.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
fields
|
||||
}
|
||||
|
||||
/// Get fields with child field information for generic pattern lookup.
|
||||
/// Returns (field, child_fields) pairs where child_fields is Some for branches.
|
||||
pub fn get_fields_with_child_info(
|
||||
children: &BTreeMap<String, TreeNode>,
|
||||
parent_name: &str,
|
||||
pattern_lookup: &HashMap<Vec<PatternField>, String>,
|
||||
) -> Vec<(PatternField, Option<Vec<PatternField>>)> {
|
||||
children
|
||||
.iter()
|
||||
.map(|(name, node)| {
|
||||
let (rust_type, json_type, indexes, child_fields) = match node {
|
||||
TreeNode::Leaf(leaf) => (
|
||||
leaf.value_type().to_string(),
|
||||
schema_to_json_type(&leaf.schema),
|
||||
leaf.indexes().clone(),
|
||||
None,
|
||||
),
|
||||
TreeNode::Branch(grandchildren) => {
|
||||
let child_fields = get_node_fields(grandchildren, pattern_lookup);
|
||||
let pattern_name = pattern_lookup
|
||||
.get(&child_fields)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| format!("{}_{}", parent_name, to_pascal_case(name)));
|
||||
(
|
||||
pattern_name.clone(),
|
||||
pattern_name,
|
||||
BTreeSet::new(),
|
||||
Some(child_fields),
|
||||
)
|
||||
}
|
||||
};
|
||||
(
|
||||
PatternField {
|
||||
name: name.clone(),
|
||||
rust_type,
|
||||
json_type,
|
||||
indexes,
|
||||
},
|
||||
child_fields,
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Detect index patterns (sets of indexes that appear together on multiple metrics).
|
||||
pub fn detect_index_patterns(tree: &TreeNode) -> (BTreeSet<Index>, Vec<super::IndexSetPattern>) {
|
||||
let mut used_indexes: BTreeSet<Index> = BTreeSet::new();
|
||||
let mut index_sets: Vec<BTreeSet<Index>> = Vec::new();
|
||||
|
||||
collect_indexes_from_tree(tree, &mut used_indexes, &mut index_sets);
|
||||
|
||||
// Count occurrences of each unique index set
|
||||
let mut index_set_counts: Vec<(BTreeSet<Index>, usize)> = Vec::new();
|
||||
for index_set in index_sets {
|
||||
if let Some(entry) = index_set_counts.iter_mut().find(|(s, _)| s == &index_set) {
|
||||
entry.1 += 1;
|
||||
} else {
|
||||
index_set_counts.push((index_set, 1));
|
||||
}
|
||||
}
|
||||
|
||||
// Build patterns for index sets appearing 2+ times
|
||||
let mut patterns: Vec<super::IndexSetPattern> = index_set_counts
|
||||
.into_iter()
|
||||
.filter(|(indexes, count)| *count >= 2 && !indexes.is_empty())
|
||||
.enumerate()
|
||||
.map(|(i, (indexes, _))| super::IndexSetPattern {
|
||||
name: if i == 0 {
|
||||
"Indexes".to_string()
|
||||
} else {
|
||||
format!("Indexes{}", i + 1)
|
||||
},
|
||||
indexes,
|
||||
})
|
||||
.collect();
|
||||
|
||||
patterns.sort_by(|a, b| b.indexes.len().cmp(&a.indexes.len()));
|
||||
(used_indexes, patterns)
|
||||
}
|
||||
|
||||
fn collect_indexes_from_tree(
|
||||
node: &TreeNode,
|
||||
used_indexes: &mut BTreeSet<Index>,
|
||||
index_sets: &mut Vec<BTreeSet<Index>>,
|
||||
) {
|
||||
match node {
|
||||
TreeNode::Leaf(leaf) => {
|
||||
used_indexes.extend(leaf.indexes().iter().cloned());
|
||||
index_sets.push(leaf.indexes().clone());
|
||||
}
|
||||
TreeNode::Branch(children) => {
|
||||
for child in children.values() {
|
||||
collect_indexes_from_tree(child, used_indexes, index_sets);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user