From f706910944ee8abe7b27a248596f7705059969d9 Mon Sep 17 00:00:00 2001 From: main Date: Thu, 19 Mar 2026 22:28:01 -0400 Subject: Polish MCP ingest and schema surfaces --- crates/fidget-spinner-cli/src/mcp/catalog.rs | 209 ++++- crates/fidget-spinner-cli/src/mcp/host/runtime.rs | 22 - crates/fidget-spinner-cli/src/mcp/service.rs | 925 ++++++++++++++++++++-- crates/fidget-spinner-cli/src/mcp/telemetry.rs | 1 + 4 files changed, 1037 insertions(+), 120 deletions(-) (limited to 'crates/fidget-spinner-cli/src/mcp') diff --git a/crates/fidget-spinner-cli/src/mcp/catalog.rs b/crates/fidget-spinner-cli/src/mcp/catalog.rs index b23cb31..0831ba4 100644 --- a/crates/fidget-spinner-cli/src/mcp/catalog.rs +++ b/crates/fidget-spinner-cli/src/mcp/catalog.rs @@ -67,6 +67,18 @@ pub(crate) fn tool_spec(name: &str) -> Option { dispatch: DispatchTarget::Worker, replay: ReplayContract::Convergent, }), + "schema.field.upsert" => Some(ToolSpec { + name: "schema.field.upsert", + description: "Add or replace one project-local payload schema field definition.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::NeverReplay, + }), + "schema.field.remove" => Some(ToolSpec { + name: "schema.field.remove", + description: "Remove one project-local payload schema field definition, optionally narrowed by node-class set.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::NeverReplay, + }), "tag.add" => Some(ToolSpec { name: "tag.add", description: "Register one repo-local tag with a required description. Notes may only reference tags from this registry.", @@ -145,9 +157,45 @@ pub(crate) fn tool_spec(name: &str) -> Option { dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }), + "metric.define" => Some(ToolSpec { + name: "metric.define", + description: "Register one project-level metric definition so experiment ingestion only has to send key/value observations.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::NeverReplay, + }), + "run.dimension.define" => Some(ToolSpec { + name: "run.dimension.define", + description: "Register one project-level run dimension used to slice metrics across scenarios, budgets, and flags.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::NeverReplay, + }), + "run.dimension.list" => Some(ToolSpec { + name: "run.dimension.list", + description: "List registered run dimensions together with observed value counts and sample values.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::Convergent, + }), + "metric.keys" => Some(ToolSpec { + name: "metric.keys", + description: "List rankable metric keys, including registered run metrics and observed payload-derived numeric fields.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::Convergent, + }), + "metric.best" => Some(ToolSpec { + name: "metric.best", + description: "Rank completed experiments by one numeric key, with optional run-dimension filters and candidate commit surfacing.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::Convergent, + }), + "metric.migrate" => Some(ToolSpec { + name: "metric.migrate", + description: "Re-run the idempotent legacy metric-plane normalization that registers canonical metrics and backfills benchmark_suite dimensions.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::NeverReplay, + }), "experiment.close" => Some(ToolSpec { name: "experiment.close", - description: "Atomically close a core-path experiment with candidate checkpoint capture, measured result, note, and verdict.", + description: "Atomically close a core-path experiment with typed run dimensions, preregistered metric observations, candidate checkpoint capture, note, and verdict.", dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }), @@ -212,6 +260,8 @@ pub(crate) fn tool_definitions() -> Vec { "project.bind", "project.status", "project.schema", + "schema.field.upsert", + "schema.field.remove", "tag.add", "tag.list", "frontier.list", @@ -225,6 +275,12 @@ pub(crate) fn tool_definitions() -> Vec { "node.archive", "note.quick", "research.record", + "metric.define", + "run.dimension.define", + "run.dimension.list", + "metric.keys", + "metric.best", + "metric.migrate", "experiment.close", "skill.list", "skill.show", @@ -277,7 +333,32 @@ pub(crate) fn list_resources() -> Vec { fn input_schema(name: &str) -> Value { match name { "project.status" | "project.schema" | "tag.list" | "skill.list" | "system.health" - | "system.telemetry" => json!({"type":"object","additionalProperties":false}), + | "system.telemetry" | "run.dimension.list" | "metric.migrate" => { + json!({"type":"object","additionalProperties":false}) + } + "schema.field.upsert" => json!({ + "type": "object", + "properties": { + "name": { "type": "string", "description": "Project payload field name." }, + "node_classes": { "type": "array", "items": node_class_schema(), "description": "Optional node-class scope. Omit or pass [] for all classes." }, + "presence": field_presence_schema(), + "severity": diagnostic_severity_schema(), + "role": field_role_schema(), + "inference_policy": inference_policy_schema(), + "value_type": field_value_type_schema(), + }, + "required": ["name", "presence", "severity", "role", "inference_policy"], + "additionalProperties": false + }), + "schema.field.remove" => json!({ + "type": "object", + "properties": { + "name": { "type": "string", "description": "Project payload field name." }, + "node_classes": { "type": "array", "items": node_class_schema(), "description": "Optional exact node-class scope to remove." } + }, + "required": ["name"], + "additionalProperties": false + }), "project.bind" => json!({ "type": "object", "properties": { @@ -333,9 +414,9 @@ fn input_schema(name: &str) -> Value { "class": node_class_schema(), "frontier_id": { "type": "string" }, "title": { "type": "string" }, - "summary": { "type": "string" }, - "tags": { "type": "array", "items": tag_name_schema() }, - "payload": { "type": "object" }, + "summary": { "type": "string", "description": "Required for `note` and `research` nodes." }, + "tags": { "type": "array", "items": tag_name_schema(), "description": "Required for `note` nodes; optional for other classes." }, + "payload": { "type": "object", "description": "`note` and `research` nodes require a non-empty string `body` field." }, "annotations": { "type": "array", "items": annotation_schema() }, "parents": { "type": "array", "items": { "type": "string" } } }, @@ -393,12 +474,13 @@ fn input_schema(name: &str) -> Value { "properties": { "frontier_id": { "type": "string" }, "title": { "type": "string" }, + "summary": { "type": "string" }, "body": { "type": "string" }, "tags": { "type": "array", "items": tag_name_schema() }, "annotations": { "type": "array", "items": annotation_schema() }, "parents": { "type": "array", "items": { "type": "string" } } }, - "required": ["title", "body", "tags"], + "required": ["title", "summary", "body", "tags"], "additionalProperties": false }), "research.record" => json!({ @@ -408,10 +490,54 @@ fn input_schema(name: &str) -> Value { "title": { "type": "string" }, "summary": { "type": "string" }, "body": { "type": "string" }, + "tags": { "type": "array", "items": tag_name_schema() }, "annotations": { "type": "array", "items": annotation_schema() }, "parents": { "type": "array", "items": { "type": "string" } } }, - "required": ["title", "body"], + "required": ["title", "summary", "body"], + "additionalProperties": false + }), + "metric.define" => json!({ + "type": "object", + "properties": { + "key": { "type": "string" }, + "unit": metric_unit_schema(), + "objective": optimization_objective_schema(), + "description": { "type": "string" } + }, + "required": ["key", "unit", "objective"], + "additionalProperties": false + }), + "run.dimension.define" => json!({ + "type": "object", + "properties": { + "key": { "type": "string" }, + "value_type": field_value_type_schema(), + "description": { "type": "string" } + }, + "required": ["key", "value_type"], + "additionalProperties": false + }), + "metric.keys" => json!({ + "type": "object", + "properties": { + "frontier_id": { "type": "string" }, + "source": metric_source_schema(), + "dimensions": { "type": "object" } + }, + "additionalProperties": false + }), + "metric.best" => json!({ + "type": "object", + "properties": { + "key": { "type": "string" }, + "frontier_id": { "type": "string" }, + "source": metric_source_schema(), + "dimensions": { "type": "object" }, + "order": metric_order_schema(), + "limit": { "type": "integer", "minimum": 1, "maximum": 500 } + }, + "required": ["key"], "additionalProperties": false }), "experiment.close" => json!({ @@ -422,8 +548,8 @@ fn input_schema(name: &str) -> Value { "change_node_id": { "type": "string" }, "candidate_summary": { "type": "string" }, "run": run_schema(), - "primary_metric": metric_observation_schema(), - "supporting_metrics": { "type": "array", "items": metric_observation_schema() }, + "primary_metric": metric_value_schema(), + "supporting_metrics": { "type": "array", "items": metric_value_schema() }, "note": note_schema(), "verdict": verdict_schema(), "decision_title": { "type": "string" }, @@ -461,16 +587,14 @@ fn metric_spec_schema() -> Value { }) } -fn metric_observation_schema() -> Value { +fn metric_value_schema() -> Value { json!({ "type": "object", "properties": { "key": { "type": "string" }, - "unit": metric_unit_schema(), - "objective": optimization_objective_schema(), "value": { "type": "number" } }, - "required": ["key", "unit", "objective", "value"], + "required": ["key", "value"], "additionalProperties": false }) } @@ -509,6 +633,61 @@ fn metric_unit_schema() -> Value { }) } +fn metric_source_schema() -> Value { + json!({ + "type": "string", + "enum": [ + "run_metric", + "change_payload", + "run_payload", + "analysis_payload", + "decision_payload" + ] + }) +} + +fn metric_order_schema() -> Value { + json!({ + "type": "string", + "enum": ["asc", "desc"] + }) +} + +fn field_value_type_schema() -> Value { + json!({ + "type": "string", + "enum": ["string", "numeric", "boolean", "timestamp"] + }) +} + +fn diagnostic_severity_schema() -> Value { + json!({ + "type": "string", + "enum": ["error", "warning", "info"] + }) +} + +fn field_presence_schema() -> Value { + json!({ + "type": "string", + "enum": ["required", "recommended", "optional"] + }) +} + +fn field_role_schema() -> Value { + json!({ + "type": "string", + "enum": ["index", "projection_gate", "render_only", "opaque"] + }) +} + +fn inference_policy_schema() -> Value { + json!({ + "type": "string", + "enum": ["manual_only", "model_may_infer"] + }) +} + fn optimization_objective_schema() -> Value { json!({ "type": "string", @@ -539,7 +718,7 @@ fn run_schema() -> Value { "type": "string", "enum": ["local_process", "worktree_process", "ssh_process"] }, - "benchmark_suite": { "type": "string" }, + "dimensions": { "type": "object" }, "command": { "type": "object", "properties": { @@ -554,7 +733,7 @@ fn run_schema() -> Value { "additionalProperties": false } }, - "required": ["title", "backend", "benchmark_suite", "command"], + "required": ["title", "backend", "dimensions", "command"], "additionalProperties": false }) } diff --git a/crates/fidget-spinner-cli/src/mcp/host/runtime.rs b/crates/fidget-spinner-cli/src/mcp/host/runtime.rs index f84f604..d57a21e 100644 --- a/crates/fidget-spinner-cli/src/mcp/host/runtime.rs +++ b/crates/fidget-spinner-cli/src/mcp/host/runtime.rs @@ -838,20 +838,6 @@ fn system_health_output(health: &HealthSnapshot) -> Result Result { + let args = deserialize::(arguments)?; + let field = self + .store + .upsert_schema_field(UpsertSchemaFieldRequest { + name: NonEmptyText::new(args.name) + .map_err(store_fault("tools/call:schema.field.upsert"))?, + node_classes: args + .node_classes + .unwrap_or_default() + .into_iter() + .map(|class| { + parse_node_class_name(&class) + .map_err(store_fault("tools/call:schema.field.upsert")) + }) + .collect::>()?, + presence: parse_field_presence_name(&args.presence) + .map_err(store_fault("tools/call:schema.field.upsert"))?, + severity: parse_diagnostic_severity_name(&args.severity) + .map_err(store_fault("tools/call:schema.field.upsert"))?, + role: parse_field_role_name(&args.role) + .map_err(store_fault("tools/call:schema.field.upsert"))?, + inference_policy: parse_inference_policy_name(&args.inference_policy) + .map_err(store_fault("tools/call:schema.field.upsert"))?, + value_type: args + .value_type + .as_deref() + .map(parse_field_value_type_name) + .transpose() + .map_err(store_fault("tools/call:schema.field.upsert"))?, + }) + .map_err(store_fault("tools/call:schema.field.upsert"))?; + tool_success( + schema_field_upsert_output(self.store.schema(), &field)?, + presentation, + FaultStage::Worker, + "tools/call:schema.field.upsert", + ) + } + "schema.field.remove" => { + let args = deserialize::(arguments)?; + let removed_count = self + .store + .remove_schema_field(RemoveSchemaFieldRequest { + name: NonEmptyText::new(args.name) + .map_err(store_fault("tools/call:schema.field.remove"))?, + node_classes: args + .node_classes + .map(|node_classes| { + node_classes + .into_iter() + .map(|class| { + parse_node_class_name(&class) + .map_err(store_fault("tools/call:schema.field.remove")) + }) + .collect::>() + }) + .transpose()?, + }) + .map_err(store_fault("tools/call:schema.field.remove"))?; + tool_success( + schema_field_remove_output(self.store.schema(), removed_count)?, + presentation, + FaultStage::Worker, + "tools/call:schema.field.remove", + ) + } "tag.add" => { let args = deserialize::(arguments)?; let tag = self @@ -402,7 +472,10 @@ impl WorkerService { .map_err(store_fault("tools/call:note.quick"))?, title: NonEmptyText::new(args.title) .map_err(store_fault("tools/call:note.quick"))?, - summary: None, + summary: Some( + NonEmptyText::new(args.summary) + .map_err(store_fault("tools/call:note.quick"))?, + ), tags: Some( parse_tag_set(args.tags) .map_err(store_fault("tools/call:note.quick"))?, @@ -439,12 +512,14 @@ impl WorkerService { .map_err(store_fault("tools/call:research.record"))?, title: NonEmptyText::new(args.title) .map_err(store_fault("tools/call:research.record"))?, - summary: args - .summary - .map(NonEmptyText::new) - .transpose() - .map_err(store_fault("tools/call:research.record"))?, - tags: None, + summary: Some( + NonEmptyText::new(args.summary) + .map_err(store_fault("tools/call:research.record"))?, + ), + tags: Some( + parse_tag_set(args.tags) + .map_err(store_fault("tools/call:research.record"))?, + ), payload: NodePayload::with_schema( self.store.schema().schema_ref(), crate::json_object(json!({ "body": args.body })) @@ -463,6 +538,170 @@ impl WorkerService { "tools/call:research.record", ) } + "metric.define" => { + let args = deserialize::(arguments)?; + let metric = self + .store + .define_metric(DefineMetricRequest { + key: NonEmptyText::new(args.key) + .map_err(store_fault("tools/call:metric.define"))?, + unit: parse_metric_unit_name(&args.unit) + .map_err(store_fault("tools/call:metric.define"))?, + objective: crate::parse_optimization_objective(&args.objective) + .map_err(store_fault("tools/call:metric.define"))?, + description: args + .description + .map(NonEmptyText::new) + .transpose() + .map_err(store_fault("tools/call:metric.define"))?, + }) + .map_err(store_fault("tools/call:metric.define"))?; + tool_success( + json_created_output( + "registered metric", + json!({ + "key": metric.key, + "unit": metric_unit_name(metric.unit), + "objective": metric_objective_name(metric.objective), + "description": metric.description, + }), + "tools/call:metric.define", + )?, + presentation, + FaultStage::Worker, + "tools/call:metric.define", + ) + } + "run.dimension.define" => { + let args = deserialize::(arguments)?; + let dimension = self + .store + .define_run_dimension(DefineRunDimensionRequest { + key: NonEmptyText::new(args.key) + .map_err(store_fault("tools/call:run.dimension.define"))?, + value_type: parse_field_value_type_name(&args.value_type) + .map_err(store_fault("tools/call:run.dimension.define"))?, + description: args + .description + .map(NonEmptyText::new) + .transpose() + .map_err(store_fault("tools/call:run.dimension.define"))?, + }) + .map_err(store_fault("tools/call:run.dimension.define"))?; + tool_success( + json_created_output( + "registered run dimension", + json!({ + "key": dimension.key, + "value_type": dimension.value_type.as_str(), + "description": dimension.description, + }), + "tools/call:run.dimension.define", + )?, + presentation, + FaultStage::Worker, + "tools/call:run.dimension.define", + ) + } + "run.dimension.list" => { + let items = self + .store + .list_run_dimensions() + .map_err(store_fault("tools/call:run.dimension.list"))?; + tool_success( + run_dimension_list_output(items.as_slice())?, + presentation, + FaultStage::Worker, + "tools/call:run.dimension.list", + ) + } + "metric.keys" => { + let args = deserialize::(arguments)?; + let keys = self + .store + .list_metric_keys_filtered(MetricKeyQuery { + frontier_id: args + .frontier_id + .as_deref() + .map(crate::parse_frontier_id) + .transpose() + .map_err(store_fault("tools/call:metric.keys"))?, + source: args + .source + .as_deref() + .map(parse_metric_source_name) + .transpose() + .map_err(store_fault("tools/call:metric.keys"))?, + dimensions: coerce_tool_dimensions( + &self.store, + args.dimensions.unwrap_or_default(), + "tools/call:metric.keys", + )?, + }) + .map_err(store_fault("tools/call:metric.keys"))?; + tool_success( + metric_keys_output(keys.as_slice())?, + presentation, + FaultStage::Worker, + "tools/call:metric.keys", + ) + } + "metric.best" => { + let args = deserialize::(arguments)?; + let items = self + .store + .best_metrics(MetricBestQuery { + key: NonEmptyText::new(args.key) + .map_err(store_fault("tools/call:metric.best"))?, + frontier_id: args + .frontier_id + .as_deref() + .map(crate::parse_frontier_id) + .transpose() + .map_err(store_fault("tools/call:metric.best"))?, + source: args + .source + .as_deref() + .map(parse_metric_source_name) + .transpose() + .map_err(store_fault("tools/call:metric.best"))?, + dimensions: coerce_tool_dimensions( + &self.store, + args.dimensions.unwrap_or_default(), + "tools/call:metric.best", + )?, + order: args + .order + .as_deref() + .map(parse_metric_order_name) + .transpose() + .map_err(store_fault("tools/call:metric.best"))?, + limit: args.limit.unwrap_or(10), + }) + .map_err(store_fault("tools/call:metric.best"))?; + tool_success( + metric_best_output(items.as_slice())?, + presentation, + FaultStage::Worker, + "tools/call:metric.best", + ) + } + "metric.migrate" => { + let report = self + .store + .migrate_metric_plane() + .map_err(store_fault("tools/call:metric.migrate"))?; + tool_success( + json_created_output( + "normalized legacy metric plane", + json!(report), + "tools/call:metric.migrate", + )?, + presentation, + FaultStage::Worker, + "tools/call:metric.migrate", + ) + } "experiment.close" => { let args = deserialize::(arguments)?; let frontier_id = crate::parse_frontier_id(&args.frontier_id) @@ -507,8 +746,11 @@ impl WorkerService { .map_err(store_fault("tools/call:experiment.close"))?, backend: parse_backend_name(&args.run.backend) .map_err(store_fault("tools/call:experiment.close"))?, - benchmark_suite: NonEmptyText::new(args.run.benchmark_suite) - .map_err(store_fault("tools/call:experiment.close"))?, + dimensions: coerce_tool_dimensions( + &self.store, + args.run.dimensions, + "tools/call:experiment.close", + )?, command: command_recipe_from_wire( args.run.command, self.store.project_root(), @@ -518,12 +760,12 @@ impl WorkerService { capture_code_snapshot(self.store.project_root()) .map_err(store_fault("tools/call:experiment.close"))?, ), - primary_metric: metric_observation_from_wire(args.primary_metric) + primary_metric: metric_value_from_wire(args.primary_metric) .map_err(store_fault("tools/call:experiment.close"))?, supporting_metrics: args .supporting_metrics .into_iter() - .map(metric_observation_from_wire) + .map(metric_value_from_wire) .collect::, _>>() .map_err(store_fault("tools/call:experiment.close"))?, note: FrontierNote { @@ -547,7 +789,7 @@ impl WorkerService { }) .map_err(store_fault("tools/call:experiment.close"))?; tool_success( - experiment_close_output(&receipt)?, + experiment_close_output(&self.store, &receipt)?, presentation, FaultStage::Worker, "tools/call:experiment.close", @@ -692,8 +934,8 @@ fn project_schema_output(schema: &ProjectSchema) -> Result>() .join(",") }, - format!("{:?}", field.presence).to_ascii_lowercase(), - format!("{:?}", field.role).to_ascii_lowercase(), + field.presence.as_str(), + field.role.as_str(), )); } if schema.fields.len() > 8 { @@ -709,6 +951,59 @@ fn project_schema_output(schema: &ProjectSchema) -> Result Result { + let concise = json!({ + "schema": schema.schema_ref(), + "field": project_schema_field_value(field), + }); + detailed_tool_output( + &concise, + &concise, + format!( + "upserted schema field {}\nschema: {}\nclasses: {}\npresence: {}\nseverity: {}\nrole: {}\ninference: {}{}", + field.name, + schema_label(schema), + render_schema_node_classes(&field.node_classes), + field.presence.as_str(), + field.severity.as_str(), + field.role.as_str(), + field.inference_policy.as_str(), + field + .value_type + .map(|value_type| format!("\nvalue_type: {}", value_type.as_str())) + .unwrap_or_default(), + ), + None, + FaultStage::Worker, + "tools/call:schema.field.upsert", + ) +} + +fn schema_field_remove_output( + schema: &ProjectSchema, + removed_count: u64, +) -> Result { + let concise = json!({ + "schema": schema.schema_ref(), + "removed_count": removed_count, + }); + detailed_tool_output( + &concise, + &concise, + format!( + "removed {} schema field definition(s)\nschema: {}", + removed_count, + schema_label(schema), + ), + None, + FaultStage::Worker, + "tools/call:schema.field.remove", + ) +} + fn tag_add_output(tag: &TagRecord) -> Result { let concise = json!({ "name": tag.name, @@ -892,14 +1187,33 @@ fn node_read_output(node: &fidget_spinner_core::DagNode) -> Result>(); + if !filtered_fields.is_empty() { + let _ = concise.insert( + "payload_field_count".to_owned(), + json!(filtered_fields.len()), + ); + if is_prose_node(node.class) { + let _ = concise.insert( + "payload_fields".to_owned(), + json!( + filtered_fields + .iter() + .take(6) + .map(|(name, _)| (*name).clone()) + .collect::>() + ), + ); + } else { + let payload_preview = payload_preview_value(node.class, &node.payload.fields); + if let Value::Object(object) = &payload_preview + && !object.is_empty() + { + let _ = concise.insert("payload_preview".to_owned(), payload_preview); + } + } + } } if !node.diagnostics.items.is_empty() { let _ = concise.insert( @@ -930,7 +1244,7 @@ fn node_read_output(node: &fidget_spinner_core::DagNode) -> Result Result Result { +fn experiment_close_output( + store: &ProjectStore, + receipt: &ExperimentReceipt, +) -> Result { let concise = json!({ "experiment_id": receipt.experiment.id, "frontier_id": receipt.experiment.frontier_id, @@ -980,7 +1297,8 @@ fn experiment_close_output(receipt: &ExperimentReceipt) -> Result Result Result Value { +fn metric_keys_output(keys: &[MetricKeySummary]) -> Result { + let concise = keys + .iter() + .map(|key| { + json!({ + "key": key.key, + "source": key.source.as_str(), + "experiment_count": key.experiment_count, + "unit": key.unit.map(metric_unit_name), + "objective": key.objective.map(metric_objective_name), + "description": key.description, + "requires_order": key.requires_order, + }) + }) + .collect::>(); + let mut lines = vec![format!("{} metric key(s)", keys.len())]; + lines.extend(keys.iter().map(|key| { + let mut line = format!( + "{} [{}] experiments={}", + key.key, + key.source.as_str(), + key.experiment_count + ); + if let Some(unit) = key.unit { + line.push_str(format!(" unit={}", metric_unit_name(unit)).as_str()); + } + if let Some(objective) = key.objective { + line.push_str(format!(" objective={}", metric_objective_name(objective)).as_str()); + } + if let Some(description) = key.description.as_ref() { + line.push_str(format!(" | {description}").as_str()); + } + if key.requires_order { + line.push_str(" order=required"); + } + line + })); + detailed_tool_output( + &concise, + &keys, + lines.join("\n"), + None, + FaultStage::Worker, + "tools/call:metric.keys", + ) +} + +fn metric_best_output( + items: &[fidget_spinner_store_sqlite::MetricBestEntry], +) -> Result { + let concise = items + .iter() + .enumerate() + .map(|(index, item)| { + json!({ + "rank": index + 1, + "key": item.key, + "source": item.source.as_str(), + "value": item.value, + "order": item.order.as_str(), + "experiment_id": item.experiment_id, + "frontier_id": item.frontier_id, + "change_node_id": item.change_node_id, + "change_title": item.change_title, + "verdict": metric_verdict_name(item.verdict), + "candidate_checkpoint_id": item.candidate_checkpoint_id, + "candidate_commit_hash": item.candidate_commit_hash, + "run_id": item.run_id, + "unit": item.unit.map(metric_unit_name), + "objective": item.objective.map(metric_objective_name), + "dimensions": run_dimensions_value(&item.dimensions), + }) + }) + .collect::>(); + let mut lines = vec![format!("{} ranked experiment(s)", items.len())]; + lines.extend(items.iter().enumerate().map(|(index, item)| { + format!( + "{}. {}={} [{}] {} | verdict={} | commit={} | checkpoint={}", + index + 1, + item.key, + item.value, + item.source.as_str(), + item.change_title, + metric_verdict_name(item.verdict), + item.candidate_commit_hash, + item.candidate_checkpoint_id, + ) + })); + lines.extend( + items + .iter() + .map(|item| format!(" dims: {}", render_dimension_kv(&item.dimensions))), + ); + detailed_tool_output( + &concise, + &items, + lines.join("\n"), + None, + FaultStage::Worker, + "tools/call:metric.best", + ) +} + +fn run_dimension_list_output( + items: &[fidget_spinner_store_sqlite::RunDimensionSummary], +) -> Result { + let concise = items + .iter() + .map(|item| { + json!({ + "key": item.key, + "value_type": item.value_type.as_str(), + "description": item.description, + "observed_run_count": item.observed_run_count, + "distinct_value_count": item.distinct_value_count, + "sample_values": item.sample_values, + }) + }) + .collect::>(); + let mut lines = vec![format!("{} run dimension(s)", items.len())]; + lines.extend(items.iter().map(|item| { + let mut line = format!( + "{} [{}] runs={} distinct={}", + item.key, + item.value_type.as_str(), + item.observed_run_count, + item.distinct_value_count + ); + if let Some(description) = item.description.as_ref() { + line.push_str(format!(" | {description}").as_str()); + } + if !item.sample_values.is_empty() { + line.push_str( + format!( + " | samples={}", + item.sample_values + .iter() + .map(value_summary) + .collect::>() + .join(", ") + ) + .as_str(), + ); + } + line + })); + detailed_tool_output( + &concise, + &items, + lines.join("\n"), + None, + FaultStage::Worker, + "tools/call:run.dimension.list", + ) +} + +fn json_created_output( + headline: &str, + structured: Value, + operation: &'static str, +) -> Result { + detailed_tool_output( + &structured, + &structured, + format!( + "{headline}\n{}", + crate::to_pretty_json(&structured).map_err(store_fault(operation))? + ), + None, + FaultStage::Worker, + operation, + ) +} + +fn project_schema_field_value(field: &ProjectFieldSpec) -> Value { let mut value = Map::new(); let _ = value.insert("name".to_owned(), json!(field.name)); if !field.node_classes.is_empty() { @@ -1023,21 +1519,12 @@ fn project_schema_field_value(field: &fidget_spinner_core::ProjectFieldSpec) -> ), ); } - let _ = value.insert( - "presence".to_owned(), - json!(format!("{:?}", field.presence).to_ascii_lowercase()), - ); - let _ = value.insert( - "severity".to_owned(), - json!(format!("{:?}", field.severity).to_ascii_lowercase()), - ); - let _ = value.insert( - "role".to_owned(), - json!(format!("{:?}", field.role).to_ascii_lowercase()), - ); + let _ = value.insert("presence".to_owned(), json!(field.presence.as_str())); + let _ = value.insert("severity".to_owned(), json!(field.severity.as_str())); + let _ = value.insert("role".to_owned(), json!(field.role.as_str())); let _ = value.insert( "inference_policy".to_owned(), - json!(format!("{:?}", field.inference_policy).to_ascii_lowercase()), + json!(field.inference_policy.as_str()), ); if let Some(value_type) = field.value_type { let _ = value.insert("value_type".to_owned(), json!(value_type.as_str())); @@ -1045,6 +1532,17 @@ fn project_schema_field_value(field: &fidget_spinner_core::ProjectFieldSpec) -> Value::Object(value) } +fn render_schema_node_classes(node_classes: &BTreeSet) -> String { + if node_classes.is_empty() { + return "any".to_owned(); + } + node_classes + .iter() + .map(ToString::to_string) + .collect::>() + .join(", ") +} + fn frontier_projection_summary_value(projection: &FrontierProjection) -> Value { json!({ "frontier_id": projection.frontier.id, @@ -1211,17 +1709,17 @@ fn diagnostic_tally(diagnostics: &fidget_spinner_core::NodeDiagnostics) -> Diagn .fold(DiagnosticTally::default(), |mut tally, item| { tally.total += 1; match item.severity { - fidget_spinner_core::DiagnosticSeverity::Error => tally.errors += 1, - fidget_spinner_core::DiagnosticSeverity::Warning => tally.warnings += 1, - fidget_spinner_core::DiagnosticSeverity::Info => tally.infos += 1, + DiagnosticSeverity::Error => tally.errors += 1, + DiagnosticSeverity::Warning => tally.warnings += 1, + DiagnosticSeverity::Info => tally.infos += 1, } tally }) } -fn payload_preview_value(fields: &Map) -> Value { +fn payload_preview_value(class: NodeClass, fields: &Map) -> Value { let mut preview = Map::new(); - for (index, (name, value)) in fields.iter().enumerate() { + for (index, (name, value)) in filtered_payload_fields(class, fields).enumerate() { if index == 6 { let _ = preview.insert( "...".to_owned(), @@ -1234,14 +1732,33 @@ fn payload_preview_value(fields: &Map) -> Value { Value::Object(preview) } -fn payload_preview_lines(fields: &Map) -> Vec { - if fields.is_empty() { +fn payload_preview_lines(class: NodeClass, fields: &Map) -> Vec { + let filtered = filtered_payload_fields(class, fields).collect::>(); + if filtered.is_empty() { return Vec::new(); } - let mut lines = vec![format!("payload fields: {}", fields.len())]; - for (index, (name, value)) in fields.iter().enumerate() { + if is_prose_node(class) { + let preview_names = filtered + .iter() + .take(6) + .map(|(name, _)| (*name).clone()) + .collect::>(); + let mut lines = vec![format!("payload fields: {}", preview_names.join(", "))]; + if filtered.len() > preview_names.len() { + lines.push(format!( + "payload fields: +{} more field(s)", + filtered.len() - preview_names.len() + )); + } + return lines; + } + let mut lines = vec![format!("payload fields: {}", filtered.len())]; + for (index, (name, value)) in filtered.iter().enumerate() { if index == 6 { - lines.push(format!("payload: +{} more field(s)", fields.len() - index)); + lines.push(format!( + "payload: +{} more field(s)", + filtered.len() - index + )); break; } lines.push(format!( @@ -1253,10 +1770,19 @@ fn payload_preview_lines(fields: &Map) -> Vec { lines } +fn filtered_payload_fields( + class: NodeClass, + fields: &Map, +) -> impl Iterator + '_ { + fields.iter().filter(move |(name, _)| { + !matches!(class, NodeClass::Note | NodeClass::Research) || name.as_str() != "body" + }) +} + fn payload_value_preview(value: &Value) -> Value { match value { Value::Null | Value::Bool(_) | Value::Number(_) => value.clone(), - Value::String(text) => Value::String(libmcp::collapse_inline_whitespace(text)), + Value::String(text) => Value::String(truncated_inline_preview(text, 96)), Value::Array(items) => { let preview = items .iter() @@ -1290,25 +1816,89 @@ fn payload_value_preview(value: &Value) -> Value { } } -fn metric_value(metric: &MetricObservation) -> Value { - json!({ - "key": metric.metric_key, +fn is_prose_node(class: NodeClass) -> bool { + matches!(class, NodeClass::Note | NodeClass::Research) +} + +fn truncated_inline_preview(text: &str, limit: usize) -> String { + let collapsed = libmcp::collapse_inline_whitespace(text); + let truncated = libmcp::render::truncate_chars(&collapsed, Some(limit)); + if truncated.truncated { + format!("{}...", truncated.text) + } else { + truncated.text + } +} + +fn metric_value(store: &ProjectStore, metric: &MetricValue) -> Result { + let definition = metric_definition(store, &metric.key)?; + Ok(json!({ + "key": metric.key, "value": metric.value, - "unit": format!("{:?}", metric.unit).to_ascii_lowercase(), - "objective": format!("{:?}", metric.objective).to_ascii_lowercase(), - }) + "unit": metric_unit_name(definition.unit), + "objective": metric_objective_name(definition.objective), + })) } -fn metric_text(metric: &MetricObservation) -> String { - format!( +fn metric_text(store: &ProjectStore, metric: &MetricValue) -> Result { + let definition = metric_definition(store, &metric.key)?; + Ok(format!( "{}={} {} ({})", - metric.metric_key, + metric.key, metric.value, - format!("{:?}", metric.unit).to_ascii_lowercase(), - format!("{:?}", metric.objective).to_ascii_lowercase(), + metric_unit_name(definition.unit), + metric_objective_name(definition.objective), + )) +} + +fn metric_unit_name(unit: MetricUnit) -> &'static str { + match unit { + MetricUnit::Seconds => "seconds", + MetricUnit::Bytes => "bytes", + MetricUnit::Count => "count", + MetricUnit::Ratio => "ratio", + MetricUnit::Custom => "custom", + } +} + +fn metric_objective_name(objective: fidget_spinner_core::OptimizationObjective) -> &'static str { + match objective { + fidget_spinner_core::OptimizationObjective::Minimize => "minimize", + fidget_spinner_core::OptimizationObjective::Maximize => "maximize", + fidget_spinner_core::OptimizationObjective::Target => "target", + } +} + +fn metric_verdict_name(verdict: FrontierVerdict) -> &'static str { + match verdict { + FrontierVerdict::PromoteToChampion => "promote_to_champion", + FrontierVerdict::KeepOnFrontier => "keep_on_frontier", + FrontierVerdict::RevertToChampion => "revert_to_champion", + FrontierVerdict::ArchiveDeadEnd => "archive_dead_end", + FrontierVerdict::NeedsMoreEvidence => "needs_more_evidence", + } +} + +fn run_dimensions_value(dimensions: &BTreeMap) -> Value { + Value::Object( + dimensions + .iter() + .map(|(key, value)| (key.to_string(), value.as_json())) + .collect::>(), ) } +fn render_dimension_kv(dimensions: &BTreeMap) -> String { + if dimensions.is_empty() { + return "none".to_owned(); + } + dimensions + .iter() + .map(|(key, value)| format!("{key}={}", value_summary(&value.as_json()))) + .collect::>() + .join(", ") +} + fn format_tags(tags: &BTreeSet) -> String { tags.iter() .map(ToString::to_string) @@ -1360,6 +1950,12 @@ fn classify_fault_kind(message: &str) -> FaultKind { || message.contains("empty") || message.contains("already exists") || message.contains("require an explicit tag list") + || message.contains("requires a non-empty summary") + || message.contains("requires a non-empty string payload field `body`") + || message.contains("requires an explicit order") + || message.contains("is ambiguous across sources") + || message.contains("has conflicting semantics") + || message.contains("conflicts with existing definition") { FaultKind::InvalidInput } else { @@ -1414,17 +2010,44 @@ fn metric_spec_from_wire(raw: WireMetricSpec) -> Result }) } -fn metric_observation_from_wire( - raw: WireMetricObservation, -) -> Result { - Ok(MetricObservation { - metric_key: NonEmptyText::new(raw.key)?, - unit: parse_metric_unit_name(&raw.unit)?, - objective: crate::parse_optimization_objective(&raw.objective)?, +fn metric_value_from_wire(raw: WireMetricValue) -> Result { + Ok(MetricValue { + key: NonEmptyText::new(raw.key)?, value: raw.value, }) } +fn metric_definition(store: &ProjectStore, key: &NonEmptyText) -> Result { + store + .list_metric_definitions() + .map_err(store_fault("tools/call:experiment.close"))? + .into_iter() + .find(|definition| definition.key == *key) + .map(|definition| MetricSpec { + metric_key: definition.key, + unit: definition.unit, + objective: definition.objective, + }) + .ok_or_else(|| { + FaultRecord::new( + FaultKind::InvalidInput, + FaultStage::Store, + "tools/call:experiment.close", + format!("metric `{key}` is not registered"), + ) + }) +} + +fn coerce_tool_dimensions( + store: &ProjectStore, + raw_dimensions: BTreeMap, + operation: &'static str, +) -> Result, FaultRecord> { + store + .coerce_run_dimensions(raw_dimensions) + .map_err(store_fault(operation)) +} + fn command_recipe_from_wire( raw: WireRunCommand, project_root: &Utf8Path, @@ -1465,6 +2088,91 @@ fn parse_metric_unit_name(raw: &str) -> Result { crate::parse_metric_unit(raw) } +fn parse_metric_source_name(raw: &str) -> Result { + match raw { + "run_metric" => Ok(MetricFieldSource::RunMetric), + "change_payload" => Ok(MetricFieldSource::ChangePayload), + "run_payload" => Ok(MetricFieldSource::RunPayload), + "analysis_payload" => Ok(MetricFieldSource::AnalysisPayload), + "decision_payload" => Ok(MetricFieldSource::DecisionPayload), + other => Err(StoreError::Json(serde_json::Error::io( + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("unknown metric source `{other}`"), + ), + ))), + } +} + +fn parse_metric_order_name(raw: &str) -> Result { + match raw { + "asc" => Ok(MetricRankOrder::Asc), + "desc" => Ok(MetricRankOrder::Desc), + other => Err(StoreError::Json(serde_json::Error::io( + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("unknown metric order `{other}`"), + ), + ))), + } +} + +fn parse_field_value_type_name(raw: &str) -> Result { + match raw { + "string" => Ok(FieldValueType::String), + "numeric" => Ok(FieldValueType::Numeric), + "boolean" => Ok(FieldValueType::Boolean), + "timestamp" => Ok(FieldValueType::Timestamp), + other => Err(crate::invalid_input(format!( + "unknown field value type `{other}`" + ))), + } +} + +fn parse_diagnostic_severity_name(raw: &str) -> Result { + match raw { + "error" => Ok(DiagnosticSeverity::Error), + "warning" => Ok(DiagnosticSeverity::Warning), + "info" => Ok(DiagnosticSeverity::Info), + other => Err(crate::invalid_input(format!( + "unknown diagnostic severity `{other}`" + ))), + } +} + +fn parse_field_presence_name(raw: &str) -> Result { + match raw { + "required" => Ok(FieldPresence::Required), + "recommended" => Ok(FieldPresence::Recommended), + "optional" => Ok(FieldPresence::Optional), + other => Err(crate::invalid_input(format!( + "unknown field presence `{other}`" + ))), + } +} + +fn parse_field_role_name(raw: &str) -> Result { + match raw { + "index" => Ok(FieldRole::Index), + "projection_gate" => Ok(FieldRole::ProjectionGate), + "render_only" => Ok(FieldRole::RenderOnly), + "opaque" => Ok(FieldRole::Opaque), + other => Err(crate::invalid_input(format!( + "unknown field role `{other}`" + ))), + } +} + +fn parse_inference_policy_name(raw: &str) -> Result { + match raw { + "manual_only" => Ok(InferencePolicy::ManualOnly), + "model_may_infer" => Ok(InferencePolicy::ModelMayInfer), + other => Err(crate::invalid_input(format!( + "unknown inference policy `{other}`" + ))), + } +} + fn parse_backend_name(raw: &str) -> Result { match raw { "local_process" => Ok(ExecutionBackend::LocalProcess), @@ -1574,6 +2282,7 @@ struct NodeArchiveToolArgs { struct QuickNoteToolArgs { frontier_id: Option, title: String, + summary: String, body: String, tags: Vec, #[serde(default)] @@ -1586,14 +2295,65 @@ struct QuickNoteToolArgs { struct ResearchRecordToolArgs { frontier_id: Option, title: String, - summary: Option, + summary: String, body: String, #[serde(default)] + tags: Vec, + #[serde(default)] annotations: Vec, #[serde(default)] parents: Vec, } +#[derive(Debug, Deserialize)] +struct SchemaFieldUpsertToolArgs { + name: String, + node_classes: Option>, + presence: String, + severity: String, + role: String, + inference_policy: String, + value_type: Option, +} + +#[derive(Debug, Deserialize)] +struct SchemaFieldRemoveToolArgs { + name: String, + node_classes: Option>, +} + +#[derive(Debug, Deserialize)] +struct MetricDefineToolArgs { + key: String, + unit: String, + objective: String, + description: Option, +} + +#[derive(Debug, Deserialize)] +struct RunDimensionDefineToolArgs { + key: String, + value_type: String, + description: Option, +} + +#[derive(Debug, Deserialize, Default)] +struct MetricKeysToolArgs { + frontier_id: Option, + source: Option, + dimensions: Option>, +} + +#[derive(Debug, Deserialize)] +struct MetricBestToolArgs { + key: String, + frontier_id: Option, + source: Option, + dimensions: Option>, + order: Option, + limit: Option, +} + #[derive(Debug, Deserialize)] struct ExperimentCloseToolArgs { frontier_id: String, @@ -1601,9 +2361,9 @@ struct ExperimentCloseToolArgs { change_node_id: String, candidate_summary: String, run: WireRun, - primary_metric: WireMetricObservation, + primary_metric: WireMetricValue, #[serde(default)] - supporting_metrics: Vec, + supporting_metrics: Vec, note: WireFrontierNote, verdict: String, decision_title: String, @@ -1627,10 +2387,8 @@ struct WireMetricSpec { } #[derive(Debug, Deserialize)] -struct WireMetricObservation { +struct WireMetricValue { key: String, - unit: String, - objective: String, value: f64, } @@ -1639,7 +2397,8 @@ struct WireRun { title: String, summary: Option, backend: String, - benchmark_suite: String, + #[serde(default)] + dimensions: BTreeMap, command: WireRunCommand, } diff --git a/crates/fidget-spinner-cli/src/mcp/telemetry.rs b/crates/fidget-spinner-cli/src/mcp/telemetry.rs index 7206f76..93fbe71 100644 --- a/crates/fidget-spinner-cli/src/mcp/telemetry.rs +++ b/crates/fidget-spinner-cli/src/mcp/telemetry.rs @@ -36,6 +36,7 @@ impl ServerTelemetry { pub fn record_success(&mut self, operation: &str, latency_ms: u128) { self.successes += 1; + self.last_fault = None; let entry = self.operations.entry(operation.to_owned()).or_default(); entry.successes += 1; entry.last_latency_ms = Some(latency_ms); -- cgit v1.2.3