binder: snapshot

This commit is contained in:
nym21
2025-12-20 23:52:12 +01:00
parent 135a18d56f
commit 4386ef47fe
4 changed files with 186 additions and 25 deletions
+4 -1
View File
@@ -10,7 +10,7 @@ use serde_json::Value;
use crate::{
ClientMetadata, Endpoint, FieldNamePosition, IndexSetPattern, PatternField, StructuralPattern,
TypeSchemas, extract_inner_type, get_first_leaf_name, get_node_fields,
get_pattern_instance_base, to_camel_case, to_pascal_case,
get_pattern_instance_base, to_camel_case, to_pascal_case, unwrap_allof,
};
/// Generate JavaScript + JSDoc client from metadata and OpenAPI endpoints
@@ -101,6 +101,9 @@ fn is_primitive_alias(schema: &Value) -> bool {
/// Convert JSON Schema to JavaScript/JSDoc type
fn schema_to_js_type(schema: &Value) -> String {
// Unwrap single-element allOf (schemars uses this for composition)
let schema = unwrap_allof(schema);
// Handle $ref
if let Some(ref_path) = schema.get("$ref").and_then(|r| r.as_str()) {
return ref_path.rsplit('/').next().unwrap_or("*").to_string();
+35
View File
@@ -30,6 +30,13 @@ pub fn generate_clients(vecs: &Vecs, openapi_json: &str, output_dir: &Path) -> i
// Collect leaf type schemas from the catalog and merge into schemas
collect_leaf_type_schemas(&metadata.catalog, &mut schemas);
// Also collect definitions from all schemas (including OpenAPI schemas)
// We need to do this after collecting leaf schemas so we process everything
let schema_values: Vec<_> = schemas.values().cloned().collect();
for schema in &schema_values {
collect_schema_definitions(schema, &mut schemas);
}
// Generate Rust client (uses real brk_types, no schema conversion needed)
let rust_path = output_dir.join("rust");
create_dir_all(&rust_path)?;
@@ -49,9 +56,11 @@ pub fn generate_clients(vecs: &Vecs, openapi_json: &str, output_dir: &Path) -> i
}
use brk_types::TreeNode;
use serde_json::Value;
/// Recursively collect leaf type schemas from the tree and add to schemas map.
/// Only adds schemas that aren't already present (OpenAPI schemas take precedence).
/// Also collects definitions from schemars-generated schemas (for referenced types).
fn collect_leaf_type_schemas(node: &TreeNode, schemas: &mut TypeSchemas) {
match node {
TreeNode::Leaf(leaf) => {
@@ -63,6 +72,9 @@ fn collect_leaf_type_schemas(node: &TreeNode, schemas: &mut TypeSchemas) {
// The leaf schema is the schemars-generated JSON schema
schemas.insert(type_name, leaf.schema.clone());
}
// Also collect any definitions from the schema (schemars puts referenced types here)
collect_schema_definitions(&leaf.schema, schemas);
}
TreeNode::Branch(children) => {
for child in children.values() {
@@ -71,3 +83,26 @@ fn collect_leaf_type_schemas(node: &TreeNode, schemas: &mut TypeSchemas) {
}
}
}
/// Collect type definitions from schemars-generated schema's definitions section.
/// Schemars uses `definitions` or `$defs` to store referenced types.
fn collect_schema_definitions(schema: &Value, schemas: &mut TypeSchemas) {
// Check for definitions (JSON Schema draft-07 style)
if let Some(defs) = schema.get("definitions").and_then(|d| d.as_object()) {
for (name, def_schema) in defs {
// Use the definition name as-is (schemars names match $ref paths)
if !schemas.contains_key(name) {
schemas.insert(name.clone(), def_schema.clone());
}
}
}
// Check for $defs (JSON Schema draft 2019-09+ style)
if let Some(defs) = schema.get("$defs").and_then(|d| d.as_object()) {
for (name, def_schema) in defs {
if !schemas.contains_key(name) {
schemas.insert(name.clone(), def_schema.clone());
}
}
}
}
+116 -15
View File
@@ -10,7 +10,7 @@ use serde_json::Value;
use crate::{
ClientMetadata, Endpoint, FieldNamePosition, IndexSetPattern, PatternField, StructuralPattern,
TypeSchemas, extract_inner_type, get_node_fields, get_pattern_instance_base, to_pascal_case,
to_snake_case,
to_snake_case, unwrap_allof,
};
/// Generate Python client from metadata and OpenAPI endpoints
@@ -70,7 +70,13 @@ fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) {
writeln!(output, "# Type definitions\n").unwrap();
for (name, schema) in schemas {
// Sort types by dependencies (types that reference other types must come after)
let sorted_names = topological_sort_schemas(schemas);
for name in sorted_names {
let Some(schema) = schemas.get(&name) else {
continue;
};
if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
// Object type -> TypedDict
writeln!(output, "class {}(TypedDict):", name).unwrap();
@@ -89,8 +95,88 @@ fn generate_type_definitions(output: &mut String, schemas: &TypeSchemas) {
writeln!(output).unwrap();
}
/// Topologically sort schema names so dependencies come before dependents.
/// Types that reference other types (via $ref) must be defined after their dependencies.
fn topological_sort_schemas(schemas: &TypeSchemas) -> Vec<String> {
use std::collections::{HashMap, HashSet};
// Build dependency graph
let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
for (name, schema) in schemas {
let mut type_deps = HashSet::new();
collect_schema_refs(schema, &mut type_deps);
// Only keep deps that are in our schemas
type_deps.retain(|d| schemas.contains_key(d));
deps.insert(name.clone(), type_deps);
}
// Kahn's algorithm for topological sort
let mut in_degree: HashMap<String, usize> = HashMap::new();
for name in schemas.keys() {
in_degree.insert(name.clone(), 0);
}
for type_deps in deps.values() {
for dep in type_deps {
*in_degree.entry(dep.clone()).or_insert(0) += 1;
}
}
// Start with types that have no dependents (are not referenced by others)
let mut queue: Vec<String> = in_degree
.iter()
.filter(|(_, count)| **count == 0)
.map(|(name, _)| name.clone())
.collect();
queue.sort(); // Deterministic order
let mut result = Vec::new();
while let Some(name) = queue.pop() {
result.push(name.clone());
if let Some(type_deps) = deps.get(&name) {
for dep in type_deps {
if let Some(count) = in_degree.get_mut(dep) {
*count = count.saturating_sub(1);
if *count == 0 {
queue.push(dep.clone());
queue.sort(); // Keep sorted for determinism
}
}
}
}
}
// Reverse so dependencies come first
result.reverse();
result
}
/// Collect all type references ($ref) from a schema
fn collect_schema_refs(schema: &Value, refs: &mut std::collections::HashSet<String>) {
match schema {
Value::Object(map) => {
if let Some(ref_path) = map.get("$ref").and_then(|r| r.as_str()) {
if let Some(type_name) = ref_path.rsplit('/').next() {
refs.insert(type_name.to_string());
}
}
for value in map.values() {
collect_schema_refs(value, refs);
}
}
Value::Array(arr) => {
for item in arr {
collect_schema_refs(item, refs);
}
}
_ => {}
}
}
/// Convert JSON Schema to Python type
fn schema_to_python_type(schema: &Value) -> String {
// Unwrap single-element allOf (schemars uses this for composition)
let schema = unwrap_allof(schema);
// Handle $ref
if let Some(ref_path) = schema.get("$ref").and_then(|r| r.as_str()) {
return ref_path.rsplit('/').next().unwrap_or("Any").to_string();
@@ -129,7 +215,7 @@ fn schema_to_python_type(schema: &Value) -> String {
"Any".to_string()
}
/// Escape Python reserved keywords by appending underscore
/// Make a name safe for Python: escape keywords and prefix digit-starting names
fn escape_python_keyword(name: &str) -> String {
const PYTHON_KEYWORDS: &[&str] = &[
"False", "None", "True", "and", "as", "assert", "async", "await", "break", "class",
@@ -137,10 +223,17 @@ fn escape_python_keyword(name: &str) -> String {
"if", "import", "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", "return",
"try", "while", "with", "yield",
];
if PYTHON_KEYWORDS.contains(&name) {
format!("{}_", name)
// Names starting with digit need underscore prefix
let name = if name.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) {
format!("_{}", name)
} else {
name.to_string()
};
// Reserved keywords get underscore suffix
if PYTHON_KEYWORDS.contains(&name.as_str()) {
format!("{}_", name)
} else {
name
}
}
@@ -726,18 +819,20 @@ fn generate_api_methods(output: &mut String, endpoints: &[Endpoint]) {
} else {
writeln!(output, " params = []").unwrap();
for param in &endpoint.query_params {
// Use safe name for Python variable, original name for API query parameter
let safe_name = escape_python_keyword(&param.name);
if param.required {
writeln!(
output,
" params.append(f'{}={{{}}}')",
param.name, param.name
param.name, safe_name
)
.unwrap();
} else {
writeln!(
output,
" if {} is not None: params.append(f'{}={{{}}}')",
param.name, param.name, param.name
safe_name, param.name, safe_name
)
.unwrap();
}
@@ -759,11 +854,15 @@ fn endpoint_to_method_name(endpoint: &Endpoint) -> String {
if let Some(op_id) = &endpoint.operation_id {
return to_snake_case(op_id);
}
let parts: Vec<&str> = endpoint
.path
.split('/')
.filter(|s| !s.is_empty() && !s.starts_with('{'))
.collect();
// Include path parameters as "by_{param}" to differentiate endpoints
let mut parts = Vec::new();
for segment in endpoint.path.split('/').filter(|s| !s.is_empty()) {
if let Some(param) = segment.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
parts.push(format!("by_{}", param));
} else {
parts.push(segment.to_string());
}
}
to_snake_case(&format!("get_{}", parts.join("_")))
}
@@ -779,13 +878,15 @@ fn js_type_to_python(js_type: &str) -> String {
fn build_method_params(endpoint: &Endpoint) -> String {
let mut params = Vec::new();
for param in &endpoint.path_params {
params.push(format!(", {}: str", param.name));
let safe_name = escape_python_keyword(&param.name);
params.push(format!(", {}: str", safe_name));
}
for param in &endpoint.query_params {
let safe_name = escape_python_keyword(&param.name);
if param.required {
params.push(format!(", {}: str", param.name));
params.push(format!(", {}: str", safe_name));
} else {
params.push(format!(", {}: Optional[str] = None", param.name));
params.push(format!(", {}: Optional[str] = None", safe_name));
}
}
params.join("")
+31 -9
View File
@@ -147,13 +147,19 @@ impl ClientMetadata {
/// Check if a pattern is generic
pub fn is_pattern_generic(&self, name: &str) -> bool {
self.find_pattern(name).map(|p| p.is_generic).unwrap_or(false)
self.find_pattern(name)
.map(|p| p.is_generic)
.unwrap_or(false)
}
/// Extract the value type from concrete fields for a generic pattern.
/// Returns the first leaf field's rust_type if this pattern is generic.
/// If the type is a wrapper like `Close<Dollars>`, extracts the inner type `Dollars`.
pub fn get_generic_value_type(&self, pattern_name: &str, fields: &[PatternField]) -> Option<String> {
pub fn get_generic_value_type(
&self,
pattern_name: &str,
fields: &[PatternField],
) -> Option<String> {
if !self.is_pattern_generic(pattern_name) {
return None;
}
@@ -177,6 +183,19 @@ impl ClientMetadata {
}
}
use serde_json::Value;
/// Unwrap allOf with a single element, returning the inner schema.
/// schemars uses allOf for composition, but often with just one $ref.
pub fn unwrap_allof(schema: &Value) -> &Value {
if let Some(all_of) = schema.get("allOf").and_then(|v| v.as_array())
&& all_of.len() == 1
{
return &all_of[0];
}
schema
}
/// Extract inner type from a wrapper generic like `Close<Dollars>` -> `Dollars`.
/// Also handles malformed types like `Dollars>` (from vecdb's short_type_name which
/// extracts "Dollars>" from "Close<brk_types::Dollars>" using rsplit("::")).
@@ -202,7 +221,9 @@ pub fn extract_inner_type(type_str: &str) -> String {
/// For every branch node, create a signature from its children (sorted field names + types).
/// Patterns that appear 2+ times are deduplicated.
/// Returns (patterns, concrete_to_pattern_mapping).
fn detect_structural_patterns(tree: &TreeNode) -> (Vec<StructuralPattern>, HashMap<Vec<PatternField>, String>) {
fn detect_structural_patterns(
tree: &TreeNode,
) -> (Vec<StructuralPattern>, HashMap<Vec<PatternField>, String>) {
// Map from sorted fields signature to pattern name
let mut signature_to_pattern: HashMap<Vec<PatternField>, String> = HashMap::new();
// Count how many times each signature appears
@@ -274,7 +295,8 @@ fn detect_generic_patterns(
signature_to_pattern: &HashMap<Vec<PatternField>, String>,
) -> (Vec<StructuralPattern>, HashMap<Vec<PatternField>, String>) {
// Group signatures by their normalized (generic) form
let mut normalized_groups: HashMap<Vec<PatternField>, Vec<(Vec<PatternField>, String)>> = HashMap::new();
let mut normalized_groups: HashMap<Vec<PatternField>, Vec<(Vec<PatternField>, String)>> =
HashMap::new();
for (fields, name) in signature_to_pattern {
if let Some(normalized) = normalize_fields_for_generic(fields) {
@@ -389,10 +411,11 @@ fn collect_pattern_instances(
// Collect instances for this pattern
for (field_name, child_node) in children {
if let TreeNode::Leaf(leaf) = child_node {
instances
.entry(pattern_name.clone())
.or_default()
.push((accumulated_name.to_string(), field_name.clone(), leaf.name().to_string()));
instances.entry(pattern_name.clone()).or_default().push((
accumulated_name.to_string(),
field_name.clone(),
leaf.name().to_string(),
));
}
}
}
@@ -901,4 +924,3 @@ fn collect_indexes_from_tree(
}
}
}