diff --git a/crates/brk_binder/src/javascript.rs b/crates/brk_binder/src/javascript.rs index 2b52a4628..b534f6e38 100644 --- a/crates/brk_binder/src/javascript.rs +++ b/crates/brk_binder/src/javascript.rs @@ -10,7 +10,7 @@ use serde_json::Value; use crate::{ ClientMetadata, Endpoint, FieldNamePosition, IndexSetPattern, PatternField, StructuralPattern, TypeSchemas, extract_inner_type, get_first_leaf_name, get_node_fields, - get_pattern_instance_base, to_camel_case, to_pascal_case, + get_pattern_instance_base, to_camel_case, to_pascal_case, unwrap_allof, }; /// Generate JavaScript + JSDoc client from metadata and OpenAPI endpoints @@ -101,6 +101,9 @@ fn is_primitive_alias(schema: &Value) -> bool { /// Convert JSON Schema to JavaScript/JSDoc type fn schema_to_js_type(schema: &Value) -> String { + // Unwrap single-element allOf (schemars uses this for composition) + let schema = unwrap_allof(schema); + // Handle $ref if let Some(ref_path) = schema.get("$ref").and_then(|r| r.as_str()) { return ref_path.rsplit('/').next().unwrap_or("*").to_string(); diff --git a/crates/brk_binder/src/lib.rs b/crates/brk_binder/src/lib.rs index 3f6bf991d..bbcbd9e8a 100644 --- a/crates/brk_binder/src/lib.rs +++ b/crates/brk_binder/src/lib.rs @@ -30,6 +30,13 @@ pub fn generate_clients(vecs: &Vecs, openapi_json: &str, output_dir: &Path) -> i // Collect leaf type schemas from the catalog and merge into schemas collect_leaf_type_schemas(&metadata.catalog, &mut schemas); + // Also collect definitions from all schemas (including OpenAPI schemas) + // We need to do this after collecting leaf schemas so we process everything + let schema_values: Vec<_> = schemas.values().cloned().collect(); + for schema in &schema_values { + collect_schema_definitions(schema, &mut schemas); + } + // Generate Rust client (uses real brk_types, no schema conversion needed) let rust_path = output_dir.join("rust"); create_dir_all(&rust_path)?; @@ -49,9 +56,11 @@ pub fn generate_clients(vecs: &Vecs, openapi_json: &str, output_dir: &Path) -> i } use brk_types::TreeNode; +use serde_json::Value; /// Recursively collect leaf type schemas from the tree and add to schemas map. /// Only adds schemas that aren't already present (OpenAPI schemas take precedence). +/// Also collects definitions from schemars-generated schemas (for referenced types). fn collect_leaf_type_schemas(node: &TreeNode, schemas: &mut TypeSchemas) { match node { TreeNode::Leaf(leaf) => { @@ -63,6 +72,9 @@ fn collect_leaf_type_schemas(node: &TreeNode, schemas: &mut TypeSchemas) { // The leaf schema is the schemars-generated JSON schema schemas.insert(type_name, leaf.schema.clone()); } + + // Also collect any definitions from the schema (schemars puts referenced types here) + collect_schema_definitions(&leaf.schema, schemas); } TreeNode::Branch(children) => { for child in children.values() { @@ -71,3 +83,26 @@ fn collect_leaf_type_schemas(node: &TreeNode, schemas: &mut TypeSchemas) { } } } + +/// Collect type definitions from schemars-generated schema's definitions section. +/// Schemars uses `definitions` or `$defs` to store referenced types. +fn collect_schema_definitions(schema: &Value, schemas: &mut TypeSchemas) { + // Check for definitions (JSON Schema draft-07 style) + if let Some(defs) = schema.get("definitions").and_then(|d| d.as_object()) { + for (name, def_schema) in defs { + // Use the definition name as-is (schemars names match $ref paths) + if !schemas.contains_key(name) { + schemas.insert(name.clone(), def_schema.clone()); + } + } + } + + // Check for $defs (JSON Schema draft 2019-09+ style) + if let Some(defs) = schema.get("$defs").and_then(|d| d.as_object()) { + for (name, def_schema) in defs { + if !schemas.contains_key(name) { + schemas.insert(name.clone(), def_schema.clone()); + } + } + } +} diff --git a/crates/brk_binder/src/python.rs b/crates/brk_binder/src/python.rs index 4f75236cd..2cd10b8b4 100644 --- a/crates/brk_binder/src/python.rs +++ b/crates/brk_binder/src/python.rs @@ -10,7 +10,7 @@ use serde_json::Value; use crate::{ ClientMetadata, Endpoint, FieldNamePosition, IndexSetPattern, PatternField, StructuralPattern, TypeSchemas, extract_inner_type, get_node_fields, get_pattern_instance_base, to_pascal_case, - to_snake_case, + to_snake_case, unwrap_allof, }; /// Generate Python client from metadata and OpenAPI endpoints @@ -70,7 +70,13 @@ fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) { writeln!(output, "# Type definitions\n").unwrap(); - for (name, schema) in schemas { + // Sort types by dependencies (types that reference other types must come after) + let sorted_names = topological_sort_schemas(schemas); + + for name in sorted_names { + let Some(schema) = schemas.get(&name) else { + continue; + }; if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) { // Object type -> TypedDict writeln!(output, "class {}(TypedDict):", name).unwrap(); @@ -89,8 +95,88 @@ fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) { writeln!(output).unwrap(); } +/// Topologically sort schema names so dependencies come before dependents. +/// Types that reference other types (via $ref) must be defined after their dependencies. +fn topological_sort_schemas(schemas: &TypeSchemas) -> Vec { + use std::collections::{HashMap, HashSet}; + + // Build dependency graph + let mut deps: HashMap> = HashMap::new(); + for (name, schema) in schemas { + let mut type_deps = HashSet::new(); + collect_schema_refs(schema, &mut type_deps); + // Only keep deps that are in our schemas + type_deps.retain(|d| schemas.contains_key(d)); + deps.insert(name.clone(), type_deps); + } + + // Kahn's algorithm for topological sort + let mut in_degree: HashMap = HashMap::new(); + for name in schemas.keys() { + in_degree.insert(name.clone(), 0); + } + for type_deps in deps.values() { + for dep in type_deps { + *in_degree.entry(dep.clone()).or_insert(0) += 1; + } + } + + // Start with types that have no dependents (are not referenced by others) + let mut queue: Vec = in_degree + .iter() + .filter(|(_, count)| **count == 0) + .map(|(name, _)| name.clone()) + .collect(); + queue.sort(); // Deterministic order + + let mut result = Vec::new(); + while let Some(name) = queue.pop() { + result.push(name.clone()); + if let Some(type_deps) = deps.get(&name) { + for dep in type_deps { + if let Some(count) = in_degree.get_mut(dep) { + *count = count.saturating_sub(1); + if *count == 0 { + queue.push(dep.clone()); + queue.sort(); // Keep sorted for determinism + } + } + } + } + } + + // Reverse so dependencies come first + result.reverse(); + result +} + +/// Collect all type references ($ref) from a schema +fn collect_schema_refs(schema: &Value, refs: &mut std::collections::HashSet) { + match schema { + Value::Object(map) => { + if let Some(ref_path) = map.get("$ref").and_then(|r| r.as_str()) { + if let Some(type_name) = ref_path.rsplit('/').next() { + refs.insert(type_name.to_string()); + } + } + for value in map.values() { + collect_schema_refs(value, refs); + } + } + Value::Array(arr) => { + for item in arr { + collect_schema_refs(item, refs); + } + } + _ => {} + } +} + /// Convert JSON Schema to Python type fn schema_to_python_type(schema: &Value) -> String { + // Unwrap single-element allOf (schemars uses this for composition) + let schema = unwrap_allof(schema); + // Handle $ref if let Some(ref_path) = schema.get("$ref").and_then(|r| r.as_str()) { return ref_path.rsplit('/').next().unwrap_or("Any").to_string(); @@ -129,7 +215,7 @@ fn schema_to_python_type(schema: &Value) -> String { "Any".to_string() } -/// Escape Python reserved keywords by appending underscore +/// Make a name safe for Python: escape keywords and prefix digit-starting names fn escape_python_keyword(name: &str) -> String { const PYTHON_KEYWORDS: &[&str] = &[ "False", "None", "True", "and", "as", "assert", "async", "await", "break", "class", @@ -137,10 +223,17 @@ fn escape_python_keyword(name: &str) -> String { "if", "import", "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", "return", "try", "while", "with", "yield", ]; - if PYTHON_KEYWORDS.contains(&name) { - format!("{}_", name) + // Names starting with digit need underscore prefix + let name = if name.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) { + format!("_{}", name) } else { name.to_string() + }; + // Reserved keywords get underscore suffix + if PYTHON_KEYWORDS.contains(&name.as_str()) { + format!("{}_", name) + } else { + name } } @@ -726,18 +819,20 @@ fn generate_api_methods(output: &mut String, endpoints: &[Endpoint]) { } else { writeln!(output, " params = []").unwrap(); for param in &endpoint.query_params { + // Use safe name for Python variable, original name for API query parameter + let safe_name = escape_python_keyword(¶m.name); if param.required { writeln!( output, " params.append(f'{}={{{}}}')", - param.name, param.name + param.name, safe_name ) .unwrap(); } else { writeln!( output, " if {} is not None: params.append(f'{}={{{}}}')", - param.name, param.name, param.name + safe_name, param.name, safe_name ) .unwrap(); } @@ -759,11 +854,15 @@ fn endpoint_to_method_name(endpoint: &Endpoint) -> String { if let Some(op_id) = &endpoint.operation_id { return to_snake_case(op_id); } - let parts: Vec<&str> = endpoint - .path - .split('/') - .filter(|s| !s.is_empty() && !s.starts_with('{')) - .collect(); + // Include path parameters as "by_{param}" to differentiate endpoints + let mut parts = Vec::new(); + for segment in endpoint.path.split('/').filter(|s| !s.is_empty()) { + if let Some(param) = segment.strip_prefix('{').and_then(|s| s.strip_suffix('}')) { + parts.push(format!("by_{}", param)); + } else { + parts.push(segment.to_string()); + } + } to_snake_case(&format!("get_{}", parts.join("_"))) } @@ -779,13 +878,15 @@ fn js_type_to_python(js_type: &str) -> String { fn build_method_params(endpoint: &Endpoint) -> String { let mut params = Vec::new(); for param in &endpoint.path_params { - params.push(format!(", {}: str", param.name)); + let safe_name = escape_python_keyword(¶m.name); + params.push(format!(", {}: str", safe_name)); } for param in &endpoint.query_params { + let safe_name = escape_python_keyword(¶m.name); if param.required { - params.push(format!(", {}: str", param.name)); + params.push(format!(", {}: str", safe_name)); } else { - params.push(format!(", {}: Optional[str] = None", param.name)); + params.push(format!(", {}: Optional[str] = None", safe_name)); } } params.join("") diff --git a/crates/brk_binder/src/types.rs b/crates/brk_binder/src/types.rs index e95b49cd3..6b5438d6c 100644 --- a/crates/brk_binder/src/types.rs +++ b/crates/brk_binder/src/types.rs @@ -147,13 +147,19 @@ impl ClientMetadata { /// Check if a pattern is generic pub fn is_pattern_generic(&self, name: &str) -> bool { - self.find_pattern(name).map(|p| p.is_generic).unwrap_or(false) + self.find_pattern(name) + .map(|p| p.is_generic) + .unwrap_or(false) } /// Extract the value type from concrete fields for a generic pattern. /// Returns the first leaf field's rust_type if this pattern is generic. /// If the type is a wrapper like `Close`, extracts the inner type `Dollars`. - pub fn get_generic_value_type(&self, pattern_name: &str, fields: &[PatternField]) -> Option { + pub fn get_generic_value_type( + &self, + pattern_name: &str, + fields: &[PatternField], + ) -> Option { if !self.is_pattern_generic(pattern_name) { return None; } @@ -177,6 +183,19 @@ impl ClientMetadata { } } +use serde_json::Value; + +/// Unwrap allOf with a single element, returning the inner schema. +/// schemars uses allOf for composition, but often with just one $ref. +pub fn unwrap_allof(schema: &Value) -> &Value { + if let Some(all_of) = schema.get("allOf").and_then(|v| v.as_array()) + && all_of.len() == 1 + { + return &all_of[0]; + } + schema +} + /// Extract inner type from a wrapper generic like `Close` -> `Dollars`. /// Also handles malformed types like `Dollars>` (from vecdb's short_type_name which /// extracts "Dollars>" from "Close" using rsplit("::")). @@ -202,7 +221,9 @@ pub fn extract_inner_type(type_str: &str) -> String { /// For every branch node, create a signature from its children (sorted field names + types). /// Patterns that appear 2+ times are deduplicated. /// Returns (patterns, concrete_to_pattern_mapping). -fn detect_structural_patterns(tree: &TreeNode) -> (Vec, HashMap, String>) { +fn detect_structural_patterns( + tree: &TreeNode, +) -> (Vec, HashMap, String>) { // Map from sorted fields signature to pattern name let mut signature_to_pattern: HashMap, String> = HashMap::new(); // Count how many times each signature appears @@ -274,7 +295,8 @@ fn detect_generic_patterns( signature_to_pattern: &HashMap, String>, ) -> (Vec, HashMap, String>) { // Group signatures by their normalized (generic) form - let mut normalized_groups: HashMap, Vec<(Vec, String)>> = HashMap::new(); + let mut normalized_groups: HashMap, Vec<(Vec, String)>> = + HashMap::new(); for (fields, name) in signature_to_pattern { if let Some(normalized) = normalize_fields_for_generic(fields) { @@ -389,10 +411,11 @@ fn collect_pattern_instances( // Collect instances for this pattern for (field_name, child_node) in children { if let TreeNode::Leaf(leaf) = child_node { - instances - .entry(pattern_name.clone()) - .or_default() - .push((accumulated_name.to_string(), field_name.clone(), leaf.name().to_string())); + instances.entry(pattern_name.clone()).or_default().push(( + accumulated_name.to_string(), + field_name.clone(), + leaf.name().to_string(), + )); } } } @@ -901,4 +924,3 @@ fn collect_indexes_from_tree( } } } -