From f706910944ee8abe7b27a248596f7705059969d9 Mon Sep 17 00:00:00 2001 From: main Date: Thu, 19 Mar 2026 22:28:01 -0400 Subject: Polish MCP ingest and schema surfaces --- crates/fidget-spinner-cli/src/main.rs | 533 ++++++++++- crates/fidget-spinner-cli/src/mcp/catalog.rs | 209 ++++- crates/fidget-spinner-cli/src/mcp/host/runtime.rs | 22 - crates/fidget-spinner-cli/src/mcp/service.rs | 925 ++++++++++++++++-- crates/fidget-spinner-cli/src/mcp/telemetry.rs | 1 + crates/fidget-spinner-cli/tests/mcp_hardening.rs | 1040 ++++++++++++++++++++- 6 files changed, 2567 insertions(+), 163 deletions(-) (limited to 'crates/fidget-spinner-cli') diff --git a/crates/fidget-spinner-cli/src/main.rs b/crates/fidget-spinner-cli/src/main.rs index fe4cb5f..3ad9534 100644 --- a/crates/fidget-spinner-cli/src/main.rs +++ b/crates/fidget-spinner-cli/src/main.rs @@ -10,13 +10,16 @@ use std::path::{Path, PathBuf}; use camino::{Utf8Path, Utf8PathBuf}; use clap::{Args, Parser, Subcommand, ValueEnum}; use fidget_spinner_core::{ - AnnotationVisibility, CodeSnapshotRef, CommandRecipe, ExecutionBackend, FrontierContract, - FrontierNote, FrontierVerdict, GitCommitHash, MetricObservation, MetricSpec, MetricUnit, - NodeAnnotation, NodeClass, NodePayload, NonEmptyText, OptimizationObjective, TagName, + AnnotationVisibility, CodeSnapshotRef, CommandRecipe, DiagnosticSeverity, ExecutionBackend, + FieldPresence, FieldRole, FieldValueType, FrontierContract, FrontierNote, FrontierVerdict, + GitCommitHash, InferencePolicy, MetricSpec, MetricUnit, MetricValue, NodeAnnotation, NodeClass, + NodePayload, NonEmptyText, OptimizationObjective, ProjectFieldSpec, TagName, }; use fidget_spinner_store_sqlite::{ - CloseExperimentRequest, CreateFrontierRequest, CreateNodeRequest, EdgeAttachment, - EdgeAttachmentDirection, ListNodesQuery, ProjectStore, StoreError, + CloseExperimentRequest, CreateFrontierRequest, CreateNodeRequest, DefineMetricRequest, + DefineRunDimensionRequest, EdgeAttachment, EdgeAttachmentDirection, ListNodesQuery, + MetricBestQuery, MetricFieldSource, MetricKeyQuery, MetricRankOrder, ProjectStore, + RemoveSchemaFieldRequest, StoreError, UpsertSchemaFieldRequest, }; use serde::Serialize; use serde_json::{Map, Value, json}; @@ -61,6 +64,16 @@ enum Command { }, /// Record off-path research and enabling work. Research(ResearchCommand), + /// Inspect rankable metrics across closed experiments. + Metric { + #[command(subcommand)] + command: MetricCommand, + }, + /// Define and inspect run dimensions used to slice experiment metrics. + Dimension { + #[command(subcommand)] + command: DimensionCommand, + }, /// Close a core-path experiment atomically. Experiment { #[command(subcommand)] @@ -100,6 +113,10 @@ struct InitArgs { enum SchemaCommand { /// Show the current project schema as JSON. Show(ProjectArg), + /// Add or replace one project schema field definition. + UpsertField(SchemaFieldUpsertArgs), + /// Remove one project schema field definition. + RemoveField(SchemaFieldRemoveArgs), } #[derive(Subcommand)] @@ -169,8 +186,10 @@ struct NodeAddArgs { #[arg(long)] title: String, #[arg(long)] + /// Required for `note` and `research` nodes. summary: Option, #[arg(long = "payload-json")] + /// JSON object payload. `note` and `research` nodes require a non-empty `body` string. payload_json: Option, #[arg(long = "payload-file")] payload_file: Option, @@ -270,6 +289,74 @@ enum ResearchSubcommand { Add(QuickResearchArgs), } +#[derive(Subcommand)] +enum MetricCommand { + /// Register a project-level metric definition. + Define(MetricDefineArgs), + /// List rankable numeric keys observed in completed experiments. + Keys(MetricKeysArgs), + /// Rank completed experiments by one numeric key. + Best(MetricBestArgs), + /// Re-run the idempotent legacy metric-plane normalization. + Migrate(ProjectArg), +} + +#[derive(Subcommand)] +enum DimensionCommand { + /// Register a project-level run dimension definition. + Define(DimensionDefineArgs), + /// List run dimensions and sample values observed in completed runs. + List(ProjectArg), +} + +#[derive(Args)] +struct MetricDefineArgs { + #[command(flatten)] + project: ProjectArg, + /// Metric key used in experiment closure and ranking. + #[arg(long)] + key: String, + /// Canonical unit for this metric key. + #[arg(long, value_enum)] + unit: CliMetricUnit, + /// Optimization direction for this metric key. + #[arg(long, value_enum)] + objective: CliOptimizationObjective, + /// Optional human description shown in metric listings. + #[arg(long)] + description: Option, +} + +#[derive(Args)] +struct MetricKeysArgs { + #[command(flatten)] + project: ProjectArg, + /// Restrict results to one frontier. + #[arg(long)] + frontier: Option, + /// Restrict results to one metric source. + #[arg(long, value_enum)] + source: Option, + /// Exact run-dimension filter in the form `key=value`. + #[arg(long = "dimension")] + dimensions: Vec, +} + +#[derive(Args)] +struct DimensionDefineArgs { + #[command(flatten)] + project: ProjectArg, + /// Run-dimension key used to slice experiments. + #[arg(long)] + key: String, + /// Canonical value type for this run dimension. + #[arg(long = "type", value_enum)] + value_type: CliFieldValueType, + /// Optional human description shown in dimension listings. + #[arg(long)] + description: Option, +} + #[derive(Args)] struct QuickNoteArgs { #[command(flatten)] @@ -279,6 +366,8 @@ struct QuickNoteArgs { #[arg(long)] title: String, #[arg(long)] + summary: String, + #[arg(long)] body: String, #[command(flatten)] tag_selection: ExplicitTagSelectionArgs, @@ -305,13 +394,69 @@ struct QuickResearchArgs { #[arg(long)] title: String, #[arg(long)] - body: String, + summary: String, #[arg(long)] - summary: Option, + body: String, + #[command(flatten)] + tag_selection: ExplicitTagSelectionArgs, #[arg(long = "parent")] parents: Vec, } +#[derive(Args)] +struct SchemaFieldUpsertArgs { + #[command(flatten)] + project: ProjectArg, + #[arg(long)] + name: String, + #[arg(long = "class", value_enum)] + classes: Vec, + #[arg(long, value_enum)] + presence: CliFieldPresence, + #[arg(long, value_enum)] + severity: CliDiagnosticSeverity, + #[arg(long, value_enum)] + role: CliFieldRole, + #[arg(long = "inference", value_enum)] + inference_policy: CliInferencePolicy, + #[arg(long = "type", value_enum)] + value_type: Option, +} + +#[derive(Args)] +struct SchemaFieldRemoveArgs { + #[command(flatten)] + project: ProjectArg, + #[arg(long)] + name: String, + #[arg(long = "class", value_enum)] + classes: Vec, +} + +#[derive(Args)] +struct MetricBestArgs { + #[command(flatten)] + project: ProjectArg, + /// Metric key to rank on. + #[arg(long)] + key: String, + /// Restrict results to one frontier. + #[arg(long)] + frontier: Option, + /// Restrict results to one metric source. + #[arg(long, value_enum)] + source: Option, + /// Explicit ordering for sources whose objective cannot be inferred. + #[arg(long, value_enum)] + order: Option, + /// Exact run-dimension filter in the form `key=value`. + #[arg(long = "dimension")] + dimensions: Vec, + /// Maximum number of ranked experiments to return. + #[arg(long, default_value_t = 10)] + limit: u32, +} + #[derive(Subcommand)] enum ExperimentCommand { /// Close a core-path experiment with checkpoint, run, note, and verdict. @@ -348,24 +493,23 @@ struct ExperimentCloseArgs { run_title: String, #[arg(long = "run-summary")] run_summary: Option, - #[arg(long = "benchmark-suite")] - benchmark_suite: String, + /// Repeat for each run dimension as `key=value`. + #[arg(long = "dimension")] + dimensions: Vec, #[arg(long = "backend", value_enum, default_value_t = CliExecutionBackend::Worktree)] backend: CliExecutionBackend, #[arg(long = "cwd")] working_directory: Option, + /// Repeat for each argv token passed to the recorded command. #[arg(long = "argv")] argv: Vec, + /// Repeat for each environment override as `KEY=VALUE`. #[arg(long = "env")] env: Vec, - #[arg(long = "primary-metric-key")] - primary_metric_key: String, - #[arg(long = "primary-metric-unit", value_enum)] - primary_metric_unit: CliMetricUnit, - #[arg(long = "primary-metric-objective", value_enum)] - primary_metric_objective: CliOptimizationObjective, - #[arg(long = "primary-metric-value")] - primary_metric_value: f64, + /// Primary metric in the form `key=value`; key must be preregistered. + #[arg(long = "primary-metric")] + primary_metric: String, + /// Supporting metric in the form `key=value`; repeat as needed. #[arg(long = "metric")] metrics: Vec, #[arg(long)] @@ -474,6 +618,57 @@ enum CliExecutionBackend { Ssh, } +#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] +enum CliMetricSource { + RunMetric, + ChangePayload, + RunPayload, + AnalysisPayload, + DecisionPayload, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] +enum CliMetricOrder { + Asc, + Desc, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] +enum CliFieldValueType { + String, + Numeric, + Boolean, + Timestamp, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] +enum CliDiagnosticSeverity { + Error, + Warning, + Info, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] +enum CliFieldPresence { + Required, + Recommended, + Optional, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] +enum CliFieldRole { + Index, + ProjectionGate, + RenderOnly, + Opaque, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] +enum CliInferencePolicy { + ManualOnly, + ModelMayInfer, +} + #[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] enum CliFrontierVerdict { PromoteToChampion, @@ -499,6 +694,8 @@ fn run() -> Result<(), StoreError> { let store = open_store(&project.project)?; print_json(store.schema()) } + SchemaCommand::UpsertField(args) => run_schema_field_upsert(args), + SchemaCommand::RemoveField(args) => run_schema_field_remove(args), }, Command::Frontier { command } => match command { FrontierCommand::Init(args) => run_frontier_init(args), @@ -521,6 +718,16 @@ fn run() -> Result<(), StoreError> { Command::Research(command) => match command.command { ResearchSubcommand::Add(args) => run_quick_research(args), }, + Command::Metric { command } => match command { + MetricCommand::Define(args) => run_metric_define(args), + MetricCommand::Keys(args) => run_metric_keys(args), + MetricCommand::Best(args) => run_metric_best(args), + MetricCommand::Migrate(project) => run_metric_migrate(project), + }, + Command::Dimension { command } => match command { + DimensionCommand::Define(args) => run_dimension_define(args), + DimensionCommand::List(project) => run_dimension_list(project), + }, Command::Experiment { command } => match command { ExperimentCommand::Close(args) => run_experiment_close(args), }, @@ -597,27 +804,58 @@ fn run_frontier_status(args: FrontierStatusArgs) -> Result<(), StoreError> { print_json(&frontiers) } +fn run_schema_field_upsert(args: SchemaFieldUpsertArgs) -> Result<(), StoreError> { + let mut store = open_store(&args.project.project)?; + let field = store.upsert_schema_field(UpsertSchemaFieldRequest { + name: NonEmptyText::new(args.name)?, + node_classes: parse_node_class_set(args.classes), + presence: args.presence.into(), + severity: args.severity.into(), + role: args.role.into(), + inference_policy: args.inference_policy.into(), + value_type: args.value_type.map(Into::into), + })?; + print_json(&json!({ + "schema": store.schema().schema_ref(), + "field": schema_field_json(&field), + })) +} + +fn run_schema_field_remove(args: SchemaFieldRemoveArgs) -> Result<(), StoreError> { + let mut store = open_store(&args.project.project)?; + let removed_count = store.remove_schema_field(RemoveSchemaFieldRequest { + name: NonEmptyText::new(args.name)?, + node_classes: (!args.classes.is_empty()).then(|| parse_node_class_set(args.classes)), + })?; + print_json(&json!({ + "schema": store.schema().schema_ref(), + "removed_count": removed_count, + })) +} + fn run_node_add(args: NodeAddArgs) -> Result<(), StoreError> { let mut store = open_store(&args.project.project)?; + let class: NodeClass = args.class.into(); let frontier_id = args .frontier .as_deref() .map(parse_frontier_id) .transpose()?; - let tags = optional_cli_tags(args.tag_selection, args.class == CliNodeClass::Note)?; + let tags = optional_cli_tags(args.tag_selection, class == NodeClass::Note)?; let payload = load_payload( store.schema().schema_ref(), args.payload_json, args.payload_file, args.fields, )?; + validate_cli_prose_payload(class, args.summary.as_deref(), &payload)?; let annotations = args .annotations .into_iter() .map(|body| Ok(NodeAnnotation::hidden(NonEmptyText::new(body)?))) .collect::, StoreError>>()?; let node = store.add_node(CreateNodeRequest { - class: args.class.into(), + class, frontier_id, title: NonEmptyText::new(args.title)?, summary: args.summary.map(NonEmptyText::new).transpose()?, @@ -693,7 +931,7 @@ fn run_quick_note(args: QuickNoteArgs) -> Result<(), StoreError> { .map(parse_frontier_id) .transpose()?, title: NonEmptyText::new(args.title)?, - summary: None, + summary: Some(NonEmptyText::new(args.summary)?), tags: Some(explicit_cli_tags(args.tag_selection)?), payload, annotations: Vec::new(), @@ -730,8 +968,8 @@ fn run_quick_research(args: QuickResearchArgs) -> Result<(), StoreError> { .map(parse_frontier_id) .transpose()?, title: NonEmptyText::new(args.title)?, - summary: args.summary.map(NonEmptyText::new).transpose()?, - tags: None, + summary: Some(NonEmptyText::new(args.summary)?), + tags: optional_cli_tags(args.tag_selection, false)?, payload, annotations: Vec::new(), attachments: lineage_attachments(args.parents)?, @@ -739,6 +977,69 @@ fn run_quick_research(args: QuickResearchArgs) -> Result<(), StoreError> { print_json(&node) } +fn run_metric_define(args: MetricDefineArgs) -> Result<(), StoreError> { + let mut store = open_store(&args.project.project)?; + let record = store.define_metric(DefineMetricRequest { + key: NonEmptyText::new(args.key)?, + unit: args.unit.into(), + objective: args.objective.into(), + description: args.description.map(NonEmptyText::new).transpose()?, + })?; + print_json(&record) +} + +fn run_metric_keys(args: MetricKeysArgs) -> Result<(), StoreError> { + let store = open_store(&args.project.project)?; + print_json( + &store.list_metric_keys_filtered(MetricKeyQuery { + frontier_id: args + .frontier + .as_deref() + .map(parse_frontier_id) + .transpose()?, + source: args.source.map(Into::into), + dimensions: coerce_cli_dimension_filters(&store, args.dimensions)?, + })?, + ) +} + +fn run_metric_best(args: MetricBestArgs) -> Result<(), StoreError> { + let store = open_store(&args.project.project)?; + let entries = store.best_metrics(MetricBestQuery { + key: NonEmptyText::new(args.key)?, + frontier_id: args + .frontier + .as_deref() + .map(parse_frontier_id) + .transpose()?, + source: args.source.map(Into::into), + dimensions: coerce_cli_dimension_filters(&store, args.dimensions)?, + order: args.order.map(Into::into), + limit: args.limit, + })?; + print_json(&entries) +} + +fn run_metric_migrate(args: ProjectArg) -> Result<(), StoreError> { + let mut store = open_store(&args.project)?; + print_json(&store.migrate_metric_plane()?) +} + +fn run_dimension_define(args: DimensionDefineArgs) -> Result<(), StoreError> { + let mut store = open_store(&args.project.project)?; + let record = store.define_run_dimension(DefineRunDimensionRequest { + key: NonEmptyText::new(args.key)?, + value_type: args.value_type.into(), + description: args.description.map(NonEmptyText::new).transpose()?, + })?; + print_json(&record) +} + +fn run_dimension_list(args: ProjectArg) -> Result<(), StoreError> { + let store = open_store(&args.project)?; + print_json(&store.list_run_dimensions()?) +} + fn run_experiment_close(args: ExperimentCloseArgs) -> Result<(), StoreError> { let mut store = open_store(&args.project.project)?; let frontier_id = parse_frontier_id(&args.frontier)?; @@ -764,19 +1065,14 @@ fn run_experiment_close(args: ExperimentCloseArgs) -> Result<(), StoreError> { run_title: NonEmptyText::new(args.run_title)?, run_summary: args.run_summary.map(NonEmptyText::new).transpose()?, backend: args.backend.into(), - benchmark_suite: NonEmptyText::new(args.benchmark_suite)?, + dimensions: coerce_cli_dimension_filters(&store, args.dimensions)?, command, code_snapshot: Some(capture_code_snapshot(store.project_root())?), - primary_metric: MetricObservation { - metric_key: NonEmptyText::new(args.primary_metric_key)?, - unit: args.primary_metric_unit.into(), - objective: args.primary_metric_objective.into(), - value: args.primary_metric_value, - }, + primary_metric: parse_metric_value(args.primary_metric)?, supporting_metrics: args .metrics .into_iter() - .map(parse_metric_observation) + .map(parse_metric_value) .collect::, _>>()?, note: FrontierNote { summary: NonEmptyText::new(args.note)?, @@ -1011,6 +1307,23 @@ fn load_payload( Ok(NodePayload::with_schema(schema, map)) } +fn validate_cli_prose_payload( + class: NodeClass, + summary: Option<&str>, + payload: &NodePayload, +) -> Result<(), StoreError> { + if !matches!(class, NodeClass::Note | NodeClass::Research) { + return Ok(()); + } + if summary.is_none() { + return Err(StoreError::ProseSummaryRequired(class)); + } + match payload.field("body") { + Some(Value::String(body)) if !body.trim().is_empty() => Ok(()), + _ => Err(StoreError::ProseBodyRequired(class)), + } +} + fn json_object(value: Value) -> Result, StoreError> { match value { Value::Object(map) => Ok(map), @@ -1020,6 +1333,22 @@ fn json_object(value: Value) -> Result, StoreError> { } } +fn schema_field_json(field: &ProjectFieldSpec) -> Value { + json!({ + "name": field.name, + "node_classes": field.node_classes.iter().map(ToString::to_string).collect::>(), + "presence": field.presence.as_str(), + "severity": field.severity.as_str(), + "role": field.role.as_str(), + "inference_policy": field.inference_policy.as_str(), + "value_type": field.value_type.map(FieldValueType::as_str), + }) +} + +fn parse_node_class_set(classes: Vec) -> BTreeSet { + classes.into_iter().map(Into::into).collect() +} + fn capture_code_snapshot(project_root: &Utf8Path) -> Result { let head_commit = run_git(project_root, &["rev-parse", "HEAD"])?; let dirty_paths = run_git(project_root, &["status", "--porcelain"])? @@ -1084,23 +1413,71 @@ fn maybe_print_gitignore_hint(project_root: &Utf8Path) -> Result<(), StoreError> } } -fn parse_metric_observation(raw: String) -> Result { - let parts = raw.split(':').collect::>(); - if parts.len() != 4 { - return Err(invalid_input( - "metrics must look like key:unit:objective:value", - )); - } - Ok(MetricObservation { - metric_key: NonEmptyText::new(parts[0])?, - unit: parse_metric_unit(parts[1])?, - objective: parse_optimization_objective(parts[2])?, - value: parts[3] +fn parse_metric_value(raw: String) -> Result { + let Some((key, value)) = raw.split_once('=') else { + return Err(invalid_input("metrics must look like key=value")); + }; + Ok(MetricValue { + key: NonEmptyText::new(key)?, + value: value .parse::() .map_err(|error| invalid_input(format!("invalid metric value: {error}")))?, }) } +fn coerce_cli_dimension_filters( + store: &ProjectStore, + raw_dimensions: Vec, +) -> Result, StoreError> { + let definitions = store + .list_run_dimensions()? + .into_iter() + .map(|summary| (summary.key.to_string(), summary.value_type)) + .collect::>(); + let raw_dimensions = parse_dimension_assignments(raw_dimensions)? + .into_iter() + .map(|(key, raw_value)| { + let Some(value_type) = definitions.get(&key) else { + return Err(invalid_input(format!( + "unknown run dimension `{key}`; register it first" + ))); + }; + Ok((key, parse_cli_dimension_value(*value_type, &raw_value)?)) + }) + .collect::, StoreError>>()?; + store.coerce_run_dimensions(raw_dimensions) +} + +fn parse_dimension_assignments( + raw_dimensions: Vec, +) -> Result, StoreError> { + raw_dimensions + .into_iter() + .map(|raw| { + let Some((key, value)) = raw.split_once('=') else { + return Err(invalid_input("dimensions must look like key=value")); + }; + Ok((key.to_owned(), value.to_owned())) + }) + .collect() +} + +fn parse_cli_dimension_value(value_type: FieldValueType, raw: &str) -> Result { + match value_type { + FieldValueType::String | FieldValueType::Timestamp => Ok(Value::String(raw.to_owned())), + FieldValueType::Numeric => Ok(json!(raw.parse::().map_err(|error| { + invalid_input(format!("invalid numeric dimension value: {error}")) + })?)), + FieldValueType::Boolean => match raw { + "true" => Ok(Value::Bool(true)), + "false" => Ok(Value::Bool(false)), + other => Err(invalid_input(format!( + "invalid boolean dimension value `{other}`" + ))), + }, + } +} + fn parse_metric_unit(raw: &str) -> Result { match raw { "seconds" => Ok(MetricUnit::Seconds), @@ -1204,6 +1581,78 @@ impl From for ExecutionBackend { } } +impl From for MetricFieldSource { + fn from(value: CliMetricSource) -> Self { + match value { + CliMetricSource::RunMetric => Self::RunMetric, + CliMetricSource::ChangePayload => Self::ChangePayload, + CliMetricSource::RunPayload => Self::RunPayload, + CliMetricSource::AnalysisPayload => Self::AnalysisPayload, + CliMetricSource::DecisionPayload => Self::DecisionPayload, + } + } +} + +impl From for MetricRankOrder { + fn from(value: CliMetricOrder) -> Self { + match value { + CliMetricOrder::Asc => Self::Asc, + CliMetricOrder::Desc => Self::Desc, + } + } +} + +impl From for FieldValueType { + fn from(value: CliFieldValueType) -> Self { + match value { + CliFieldValueType::String => Self::String, + CliFieldValueType::Numeric => Self::Numeric, + CliFieldValueType::Boolean => Self::Boolean, + CliFieldValueType::Timestamp => Self::Timestamp, + } + } +} + +impl From for DiagnosticSeverity { + fn from(value: CliDiagnosticSeverity) -> Self { + match value { + CliDiagnosticSeverity::Error => Self::Error, + CliDiagnosticSeverity::Warning => Self::Warning, + CliDiagnosticSeverity::Info => Self::Info, + } + } +} + +impl From for FieldPresence { + fn from(value: CliFieldPresence) -> Self { + match value { + CliFieldPresence::Required => Self::Required, + CliFieldPresence::Recommended => Self::Recommended, + CliFieldPresence::Optional => Self::Optional, + } + } +} + +impl From for FieldRole { + fn from(value: CliFieldRole) -> Self { + match value { + CliFieldRole::Index => Self::Index, + CliFieldRole::ProjectionGate => Self::ProjectionGate, + CliFieldRole::RenderOnly => Self::RenderOnly, + CliFieldRole::Opaque => Self::Opaque, + } + } +} + +impl From for InferencePolicy { + fn from(value: CliInferencePolicy) -> Self { + match value { + CliInferencePolicy::ManualOnly => Self::ManualOnly, + CliInferencePolicy::ModelMayInfer => Self::ModelMayInfer, + } + } +} + impl From for FrontierVerdict { fn from(value: CliFrontierVerdict) -> Self { match value { diff --git a/crates/fidget-spinner-cli/src/mcp/catalog.rs b/crates/fidget-spinner-cli/src/mcp/catalog.rs index b23cb31..0831ba4 100644 --- a/crates/fidget-spinner-cli/src/mcp/catalog.rs +++ b/crates/fidget-spinner-cli/src/mcp/catalog.rs @@ -67,6 +67,18 @@ pub(crate) fn tool_spec(name: &str) -> Option { dispatch: DispatchTarget::Worker, replay: ReplayContract::Convergent, }), + "schema.field.upsert" => Some(ToolSpec { + name: "schema.field.upsert", + description: "Add or replace one project-local payload schema field definition.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::NeverReplay, + }), + "schema.field.remove" => Some(ToolSpec { + name: "schema.field.remove", + description: "Remove one project-local payload schema field definition, optionally narrowed by node-class set.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::NeverReplay, + }), "tag.add" => Some(ToolSpec { name: "tag.add", description: "Register one repo-local tag with a required description. Notes may only reference tags from this registry.", @@ -145,9 +157,45 @@ pub(crate) fn tool_spec(name: &str) -> Option { dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }), + "metric.define" => Some(ToolSpec { + name: "metric.define", + description: "Register one project-level metric definition so experiment ingestion only has to send key/value observations.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::NeverReplay, + }), + "run.dimension.define" => Some(ToolSpec { + name: "run.dimension.define", + description: "Register one project-level run dimension used to slice metrics across scenarios, budgets, and flags.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::NeverReplay, + }), + "run.dimension.list" => Some(ToolSpec { + name: "run.dimension.list", + description: "List registered run dimensions together with observed value counts and sample values.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::Convergent, + }), + "metric.keys" => Some(ToolSpec { + name: "metric.keys", + description: "List rankable metric keys, including registered run metrics and observed payload-derived numeric fields.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::Convergent, + }), + "metric.best" => Some(ToolSpec { + name: "metric.best", + description: "Rank completed experiments by one numeric key, with optional run-dimension filters and candidate commit surfacing.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::Convergent, + }), + "metric.migrate" => Some(ToolSpec { + name: "metric.migrate", + description: "Re-run the idempotent legacy metric-plane normalization that registers canonical metrics and backfills benchmark_suite dimensions.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::NeverReplay, + }), "experiment.close" => Some(ToolSpec { name: "experiment.close", - description: "Atomically close a core-path experiment with candidate checkpoint capture, measured result, note, and verdict.", + description: "Atomically close a core-path experiment with typed run dimensions, preregistered metric observations, candidate checkpoint capture, note, and verdict.", dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }), @@ -212,6 +260,8 @@ pub(crate) fn tool_definitions() -> Vec { "project.bind", "project.status", "project.schema", + "schema.field.upsert", + "schema.field.remove", "tag.add", "tag.list", "frontier.list", @@ -225,6 +275,12 @@ pub(crate) fn tool_definitions() -> Vec { "node.archive", "note.quick", "research.record", + "metric.define", + "run.dimension.define", + "run.dimension.list", + "metric.keys", + "metric.best", + "metric.migrate", "experiment.close", "skill.list", "skill.show", @@ -277,7 +333,32 @@ pub(crate) fn list_resources() -> Vec { fn input_schema(name: &str) -> Value { match name { "project.status" | "project.schema" | "tag.list" | "skill.list" | "system.health" - | "system.telemetry" => json!({"type":"object","additionalProperties":false}), + | "system.telemetry" | "run.dimension.list" | "metric.migrate" => { + json!({"type":"object","additionalProperties":false}) + } + "schema.field.upsert" => json!({ + "type": "object", + "properties": { + "name": { "type": "string", "description": "Project payload field name." }, + "node_classes": { "type": "array", "items": node_class_schema(), "description": "Optional node-class scope. Omit or pass [] for all classes." }, + "presence": field_presence_schema(), + "severity": diagnostic_severity_schema(), + "role": field_role_schema(), + "inference_policy": inference_policy_schema(), + "value_type": field_value_type_schema(), + }, + "required": ["name", "presence", "severity", "role", "inference_policy"], + "additionalProperties": false + }), + "schema.field.remove" => json!({ + "type": "object", + "properties": { + "name": { "type": "string", "description": "Project payload field name." }, + "node_classes": { "type": "array", "items": node_class_schema(), "description": "Optional exact node-class scope to remove." } + }, + "required": ["name"], + "additionalProperties": false + }), "project.bind" => json!({ "type": "object", "properties": { @@ -333,9 +414,9 @@ fn input_schema(name: &str) -> Value { "class": node_class_schema(), "frontier_id": { "type": "string" }, "title": { "type": "string" }, - "summary": { "type": "string" }, - "tags": { "type": "array", "items": tag_name_schema() }, - "payload": { "type": "object" }, + "summary": { "type": "string", "description": "Required for `note` and `research` nodes." }, + "tags": { "type": "array", "items": tag_name_schema(), "description": "Required for `note` nodes; optional for other classes." }, + "payload": { "type": "object", "description": "`note` and `research` nodes require a non-empty string `body` field." }, "annotations": { "type": "array", "items": annotation_schema() }, "parents": { "type": "array", "items": { "type": "string" } } }, @@ -393,12 +474,13 @@ fn input_schema(name: &str) -> Value { "properties": { "frontier_id": { "type": "string" }, "title": { "type": "string" }, + "summary": { "type": "string" }, "body": { "type": "string" }, "tags": { "type": "array", "items": tag_name_schema() }, "annotations": { "type": "array", "items": annotation_schema() }, "parents": { "type": "array", "items": { "type": "string" } } }, - "required": ["title", "body", "tags"], + "required": ["title", "summary", "body", "tags"], "additionalProperties": false }), "research.record" => json!({ @@ -408,10 +490,54 @@ fn input_schema(name: &str) -> Value { "title": { "type": "string" }, "summary": { "type": "string" }, "body": { "type": "string" }, + "tags": { "type": "array", "items": tag_name_schema() }, "annotations": { "type": "array", "items": annotation_schema() }, "parents": { "type": "array", "items": { "type": "string" } } }, - "required": ["title", "body"], + "required": ["title", "summary", "body"], + "additionalProperties": false + }), + "metric.define" => json!({ + "type": "object", + "properties": { + "key": { "type": "string" }, + "unit": metric_unit_schema(), + "objective": optimization_objective_schema(), + "description": { "type": "string" } + }, + "required": ["key", "unit", "objective"], + "additionalProperties": false + }), + "run.dimension.define" => json!({ + "type": "object", + "properties": { + "key": { "type": "string" }, + "value_type": field_value_type_schema(), + "description": { "type": "string" } + }, + "required": ["key", "value_type"], + "additionalProperties": false + }), + "metric.keys" => json!({ + "type": "object", + "properties": { + "frontier_id": { "type": "string" }, + "source": metric_source_schema(), + "dimensions": { "type": "object" } + }, + "additionalProperties": false + }), + "metric.best" => json!({ + "type": "object", + "properties": { + "key": { "type": "string" }, + "frontier_id": { "type": "string" }, + "source": metric_source_schema(), + "dimensions": { "type": "object" }, + "order": metric_order_schema(), + "limit": { "type": "integer", "minimum": 1, "maximum": 500 } + }, + "required": ["key"], "additionalProperties": false }), "experiment.close" => json!({ @@ -422,8 +548,8 @@ fn input_schema(name: &str) -> Value { "change_node_id": { "type": "string" }, "candidate_summary": { "type": "string" }, "run": run_schema(), - "primary_metric": metric_observation_schema(), - "supporting_metrics": { "type": "array", "items": metric_observation_schema() }, + "primary_metric": metric_value_schema(), + "supporting_metrics": { "type": "array", "items": metric_value_schema() }, "note": note_schema(), "verdict": verdict_schema(), "decision_title": { "type": "string" }, @@ -461,16 +587,14 @@ fn metric_spec_schema() -> Value { }) } -fn metric_observation_schema() -> Value { +fn metric_value_schema() -> Value { json!({ "type": "object", "properties": { "key": { "type": "string" }, - "unit": metric_unit_schema(), - "objective": optimization_objective_schema(), "value": { "type": "number" } }, - "required": ["key", "unit", "objective", "value"], + "required": ["key", "value"], "additionalProperties": false }) } @@ -509,6 +633,61 @@ fn metric_unit_schema() -> Value { }) } +fn metric_source_schema() -> Value { + json!({ + "type": "string", + "enum": [ + "run_metric", + "change_payload", + "run_payload", + "analysis_payload", + "decision_payload" + ] + }) +} + +fn metric_order_schema() -> Value { + json!({ + "type": "string", + "enum": ["asc", "desc"] + }) +} + +fn field_value_type_schema() -> Value { + json!({ + "type": "string", + "enum": ["string", "numeric", "boolean", "timestamp"] + }) +} + +fn diagnostic_severity_schema() -> Value { + json!({ + "type": "string", + "enum": ["error", "warning", "info"] + }) +} + +fn field_presence_schema() -> Value { + json!({ + "type": "string", + "enum": ["required", "recommended", "optional"] + }) +} + +fn field_role_schema() -> Value { + json!({ + "type": "string", + "enum": ["index", "projection_gate", "render_only", "opaque"] + }) +} + +fn inference_policy_schema() -> Value { + json!({ + "type": "string", + "enum": ["manual_only", "model_may_infer"] + }) +} + fn optimization_objective_schema() -> Value { json!({ "type": "string", @@ -539,7 +718,7 @@ fn run_schema() -> Value { "type": "string", "enum": ["local_process", "worktree_process", "ssh_process"] }, - "benchmark_suite": { "type": "string" }, + "dimensions": { "type": "object" }, "command": { "type": "object", "properties": { @@ -554,7 +733,7 @@ fn run_schema() -> Value { "additionalProperties": false } }, - "required": ["title", "backend", "benchmark_suite", "command"], + "required": ["title", "backend", "dimensions", "command"], "additionalProperties": false }) } diff --git a/crates/fidget-spinner-cli/src/mcp/host/runtime.rs b/crates/fidget-spinner-cli/src/mcp/host/runtime.rs index f84f604..d57a21e 100644 --- a/crates/fidget-spinner-cli/src/mcp/host/runtime.rs +++ b/crates/fidget-spinner-cli/src/mcp/host/runtime.rs @@ -838,20 +838,6 @@ fn system_health_output(health: &HealthSnapshot) -> Result Result { + let args = deserialize::(arguments)?; + let field = self + .store + .upsert_schema_field(UpsertSchemaFieldRequest { + name: NonEmptyText::new(args.name) + .map_err(store_fault("tools/call:schema.field.upsert"))?, + node_classes: args + .node_classes + .unwrap_or_default() + .into_iter() + .map(|class| { + parse_node_class_name(&class) + .map_err(store_fault("tools/call:schema.field.upsert")) + }) + .collect::>()?, + presence: parse_field_presence_name(&args.presence) + .map_err(store_fault("tools/call:schema.field.upsert"))?, + severity: parse_diagnostic_severity_name(&args.severity) + .map_err(store_fault("tools/call:schema.field.upsert"))?, + role: parse_field_role_name(&args.role) + .map_err(store_fault("tools/call:schema.field.upsert"))?, + inference_policy: parse_inference_policy_name(&args.inference_policy) + .map_err(store_fault("tools/call:schema.field.upsert"))?, + value_type: args + .value_type + .as_deref() + .map(parse_field_value_type_name) + .transpose() + .map_err(store_fault("tools/call:schema.field.upsert"))?, + }) + .map_err(store_fault("tools/call:schema.field.upsert"))?; + tool_success( + schema_field_upsert_output(self.store.schema(), &field)?, + presentation, + FaultStage::Worker, + "tools/call:schema.field.upsert", + ) + } + "schema.field.remove" => { + let args = deserialize::(arguments)?; + let removed_count = self + .store + .remove_schema_field(RemoveSchemaFieldRequest { + name: NonEmptyText::new(args.name) + .map_err(store_fault("tools/call:schema.field.remove"))?, + node_classes: args + .node_classes + .map(|node_classes| { + node_classes + .into_iter() + .map(|class| { + parse_node_class_name(&class) + .map_err(store_fault("tools/call:schema.field.remove")) + }) + .collect::>() + }) + .transpose()?, + }) + .map_err(store_fault("tools/call:schema.field.remove"))?; + tool_success( + schema_field_remove_output(self.store.schema(), removed_count)?, + presentation, + FaultStage::Worker, + "tools/call:schema.field.remove", + ) + } "tag.add" => { let args = deserialize::(arguments)?; let tag = self @@ -402,7 +472,10 @@ impl WorkerService { .map_err(store_fault("tools/call:note.quick"))?, title: NonEmptyText::new(args.title) .map_err(store_fault("tools/call:note.quick"))?, - summary: None, + summary: Some( + NonEmptyText::new(args.summary) + .map_err(store_fault("tools/call:note.quick"))?, + ), tags: Some( parse_tag_set(args.tags) .map_err(store_fault("tools/call:note.quick"))?, @@ -439,12 +512,14 @@ impl WorkerService { .map_err(store_fault("tools/call:research.record"))?, title: NonEmptyText::new(args.title) .map_err(store_fault("tools/call:research.record"))?, - summary: args - .summary - .map(NonEmptyText::new) - .transpose() - .map_err(store_fault("tools/call:research.record"))?, - tags: None, + summary: Some( + NonEmptyText::new(args.summary) + .map_err(store_fault("tools/call:research.record"))?, + ), + tags: Some( + parse_tag_set(args.tags) + .map_err(store_fault("tools/call:research.record"))?, + ), payload: NodePayload::with_schema( self.store.schema().schema_ref(), crate::json_object(json!({ "body": args.body })) @@ -463,6 +538,170 @@ impl WorkerService { "tools/call:research.record", ) } + "metric.define" => { + let args = deserialize::(arguments)?; + let metric = self + .store + .define_metric(DefineMetricRequest { + key: NonEmptyText::new(args.key) + .map_err(store_fault("tools/call:metric.define"))?, + unit: parse_metric_unit_name(&args.unit) + .map_err(store_fault("tools/call:metric.define"))?, + objective: crate::parse_optimization_objective(&args.objective) + .map_err(store_fault("tools/call:metric.define"))?, + description: args + .description + .map(NonEmptyText::new) + .transpose() + .map_err(store_fault("tools/call:metric.define"))?, + }) + .map_err(store_fault("tools/call:metric.define"))?; + tool_success( + json_created_output( + "registered metric", + json!({ + "key": metric.key, + "unit": metric_unit_name(metric.unit), + "objective": metric_objective_name(metric.objective), + "description": metric.description, + }), + "tools/call:metric.define", + )?, + presentation, + FaultStage::Worker, + "tools/call:metric.define", + ) + } + "run.dimension.define" => { + let args = deserialize::(arguments)?; + let dimension = self + .store + .define_run_dimension(DefineRunDimensionRequest { + key: NonEmptyText::new(args.key) + .map_err(store_fault("tools/call:run.dimension.define"))?, + value_type: parse_field_value_type_name(&args.value_type) + .map_err(store_fault("tools/call:run.dimension.define"))?, + description: args + .description + .map(NonEmptyText::new) + .transpose() + .map_err(store_fault("tools/call:run.dimension.define"))?, + }) + .map_err(store_fault("tools/call:run.dimension.define"))?; + tool_success( + json_created_output( + "registered run dimension", + json!({ + "key": dimension.key, + "value_type": dimension.value_type.as_str(), + "description": dimension.description, + }), + "tools/call:run.dimension.define", + )?, + presentation, + FaultStage::Worker, + "tools/call:run.dimension.define", + ) + } + "run.dimension.list" => { + let items = self + .store + .list_run_dimensions() + .map_err(store_fault("tools/call:run.dimension.list"))?; + tool_success( + run_dimension_list_output(items.as_slice())?, + presentation, + FaultStage::Worker, + "tools/call:run.dimension.list", + ) + } + "metric.keys" => { + let args = deserialize::(arguments)?; + let keys = self + .store + .list_metric_keys_filtered(MetricKeyQuery { + frontier_id: args + .frontier_id + .as_deref() + .map(crate::parse_frontier_id) + .transpose() + .map_err(store_fault("tools/call:metric.keys"))?, + source: args + .source + .as_deref() + .map(parse_metric_source_name) + .transpose() + .map_err(store_fault("tools/call:metric.keys"))?, + dimensions: coerce_tool_dimensions( + &self.store, + args.dimensions.unwrap_or_default(), + "tools/call:metric.keys", + )?, + }) + .map_err(store_fault("tools/call:metric.keys"))?; + tool_success( + metric_keys_output(keys.as_slice())?, + presentation, + FaultStage::Worker, + "tools/call:metric.keys", + ) + } + "metric.best" => { + let args = deserialize::(arguments)?; + let items = self + .store + .best_metrics(MetricBestQuery { + key: NonEmptyText::new(args.key) + .map_err(store_fault("tools/call:metric.best"))?, + frontier_id: args + .frontier_id + .as_deref() + .map(crate::parse_frontier_id) + .transpose() + .map_err(store_fault("tools/call:metric.best"))?, + source: args + .source + .as_deref() + .map(parse_metric_source_name) + .transpose() + .map_err(store_fault("tools/call:metric.best"))?, + dimensions: coerce_tool_dimensions( + &self.store, + args.dimensions.unwrap_or_default(), + "tools/call:metric.best", + )?, + order: args + .order + .as_deref() + .map(parse_metric_order_name) + .transpose() + .map_err(store_fault("tools/call:metric.best"))?, + limit: args.limit.unwrap_or(10), + }) + .map_err(store_fault("tools/call:metric.best"))?; + tool_success( + metric_best_output(items.as_slice())?, + presentation, + FaultStage::Worker, + "tools/call:metric.best", + ) + } + "metric.migrate" => { + let report = self + .store + .migrate_metric_plane() + .map_err(store_fault("tools/call:metric.migrate"))?; + tool_success( + json_created_output( + "normalized legacy metric plane", + json!(report), + "tools/call:metric.migrate", + )?, + presentation, + FaultStage::Worker, + "tools/call:metric.migrate", + ) + } "experiment.close" => { let args = deserialize::(arguments)?; let frontier_id = crate::parse_frontier_id(&args.frontier_id) @@ -507,8 +746,11 @@ impl WorkerService { .map_err(store_fault("tools/call:experiment.close"))?, backend: parse_backend_name(&args.run.backend) .map_err(store_fault("tools/call:experiment.close"))?, - benchmark_suite: NonEmptyText::new(args.run.benchmark_suite) - .map_err(store_fault("tools/call:experiment.close"))?, + dimensions: coerce_tool_dimensions( + &self.store, + args.run.dimensions, + "tools/call:experiment.close", + )?, command: command_recipe_from_wire( args.run.command, self.store.project_root(), @@ -518,12 +760,12 @@ impl WorkerService { capture_code_snapshot(self.store.project_root()) .map_err(store_fault("tools/call:experiment.close"))?, ), - primary_metric: metric_observation_from_wire(args.primary_metric) + primary_metric: metric_value_from_wire(args.primary_metric) .map_err(store_fault("tools/call:experiment.close"))?, supporting_metrics: args .supporting_metrics .into_iter() - .map(metric_observation_from_wire) + .map(metric_value_from_wire) .collect::, _>>() .map_err(store_fault("tools/call:experiment.close"))?, note: FrontierNote { @@ -547,7 +789,7 @@ impl WorkerService { }) .map_err(store_fault("tools/call:experiment.close"))?; tool_success( - experiment_close_output(&receipt)?, + experiment_close_output(&self.store, &receipt)?, presentation, FaultStage::Worker, "tools/call:experiment.close", @@ -692,8 +934,8 @@ fn project_schema_output(schema: &ProjectSchema) -> Result>() .join(",") }, - format!("{:?}", field.presence).to_ascii_lowercase(), - format!("{:?}", field.role).to_ascii_lowercase(), + field.presence.as_str(), + field.role.as_str(), )); } if schema.fields.len() > 8 { @@ -709,6 +951,59 @@ fn project_schema_output(schema: &ProjectSchema) -> Result Result { + let concise = json!({ + "schema": schema.schema_ref(), + "field": project_schema_field_value(field), + }); + detailed_tool_output( + &concise, + &concise, + format!( + "upserted schema field {}\nschema: {}\nclasses: {}\npresence: {}\nseverity: {}\nrole: {}\ninference: {}{}", + field.name, + schema_label(schema), + render_schema_node_classes(&field.node_classes), + field.presence.as_str(), + field.severity.as_str(), + field.role.as_str(), + field.inference_policy.as_str(), + field + .value_type + .map(|value_type| format!("\nvalue_type: {}", value_type.as_str())) + .unwrap_or_default(), + ), + None, + FaultStage::Worker, + "tools/call:schema.field.upsert", + ) +} + +fn schema_field_remove_output( + schema: &ProjectSchema, + removed_count: u64, +) -> Result { + let concise = json!({ + "schema": schema.schema_ref(), + "removed_count": removed_count, + }); + detailed_tool_output( + &concise, + &concise, + format!( + "removed {} schema field definition(s)\nschema: {}", + removed_count, + schema_label(schema), + ), + None, + FaultStage::Worker, + "tools/call:schema.field.remove", + ) +} + fn tag_add_output(tag: &TagRecord) -> Result { let concise = json!({ "name": tag.name, @@ -892,14 +1187,33 @@ fn node_read_output(node: &fidget_spinner_core::DagNode) -> Result>(); + if !filtered_fields.is_empty() { + let _ = concise.insert( + "payload_field_count".to_owned(), + json!(filtered_fields.len()), + ); + if is_prose_node(node.class) { + let _ = concise.insert( + "payload_fields".to_owned(), + json!( + filtered_fields + .iter() + .take(6) + .map(|(name, _)| (*name).clone()) + .collect::>() + ), + ); + } else { + let payload_preview = payload_preview_value(node.class, &node.payload.fields); + if let Value::Object(object) = &payload_preview + && !object.is_empty() + { + let _ = concise.insert("payload_preview".to_owned(), payload_preview); + } + } + } } if !node.diagnostics.items.is_empty() { let _ = concise.insert( @@ -930,7 +1244,7 @@ fn node_read_output(node: &fidget_spinner_core::DagNode) -> Result Result Result { +fn experiment_close_output( + store: &ProjectStore, + receipt: &ExperimentReceipt, +) -> Result { let concise = json!({ "experiment_id": receipt.experiment.id, "frontier_id": receipt.experiment.frontier_id, @@ -980,7 +1297,8 @@ fn experiment_close_output(receipt: &ExperimentReceipt) -> Result Result Result Value { +fn metric_keys_output(keys: &[MetricKeySummary]) -> Result { + let concise = keys + .iter() + .map(|key| { + json!({ + "key": key.key, + "source": key.source.as_str(), + "experiment_count": key.experiment_count, + "unit": key.unit.map(metric_unit_name), + "objective": key.objective.map(metric_objective_name), + "description": key.description, + "requires_order": key.requires_order, + }) + }) + .collect::>(); + let mut lines = vec![format!("{} metric key(s)", keys.len())]; + lines.extend(keys.iter().map(|key| { + let mut line = format!( + "{} [{}] experiments={}", + key.key, + key.source.as_str(), + key.experiment_count + ); + if let Some(unit) = key.unit { + line.push_str(format!(" unit={}", metric_unit_name(unit)).as_str()); + } + if let Some(objective) = key.objective { + line.push_str(format!(" objective={}", metric_objective_name(objective)).as_str()); + } + if let Some(description) = key.description.as_ref() { + line.push_str(format!(" | {description}").as_str()); + } + if key.requires_order { + line.push_str(" order=required"); + } + line + })); + detailed_tool_output( + &concise, + &keys, + lines.join("\n"), + None, + FaultStage::Worker, + "tools/call:metric.keys", + ) +} + +fn metric_best_output( + items: &[fidget_spinner_store_sqlite::MetricBestEntry], +) -> Result { + let concise = items + .iter() + .enumerate() + .map(|(index, item)| { + json!({ + "rank": index + 1, + "key": item.key, + "source": item.source.as_str(), + "value": item.value, + "order": item.order.as_str(), + "experiment_id": item.experiment_id, + "frontier_id": item.frontier_id, + "change_node_id": item.change_node_id, + "change_title": item.change_title, + "verdict": metric_verdict_name(item.verdict), + "candidate_checkpoint_id": item.candidate_checkpoint_id, + "candidate_commit_hash": item.candidate_commit_hash, + "run_id": item.run_id, + "unit": item.unit.map(metric_unit_name), + "objective": item.objective.map(metric_objective_name), + "dimensions": run_dimensions_value(&item.dimensions), + }) + }) + .collect::>(); + let mut lines = vec![format!("{} ranked experiment(s)", items.len())]; + lines.extend(items.iter().enumerate().map(|(index, item)| { + format!( + "{}. {}={} [{}] {} | verdict={} | commit={} | checkpoint={}", + index + 1, + item.key, + item.value, + item.source.as_str(), + item.change_title, + metric_verdict_name(item.verdict), + item.candidate_commit_hash, + item.candidate_checkpoint_id, + ) + })); + lines.extend( + items + .iter() + .map(|item| format!(" dims: {}", render_dimension_kv(&item.dimensions))), + ); + detailed_tool_output( + &concise, + &items, + lines.join("\n"), + None, + FaultStage::Worker, + "tools/call:metric.best", + ) +} + +fn run_dimension_list_output( + items: &[fidget_spinner_store_sqlite::RunDimensionSummary], +) -> Result { + let concise = items + .iter() + .map(|item| { + json!({ + "key": item.key, + "value_type": item.value_type.as_str(), + "description": item.description, + "observed_run_count": item.observed_run_count, + "distinct_value_count": item.distinct_value_count, + "sample_values": item.sample_values, + }) + }) + .collect::>(); + let mut lines = vec![format!("{} run dimension(s)", items.len())]; + lines.extend(items.iter().map(|item| { + let mut line = format!( + "{} [{}] runs={} distinct={}", + item.key, + item.value_type.as_str(), + item.observed_run_count, + item.distinct_value_count + ); + if let Some(description) = item.description.as_ref() { + line.push_str(format!(" | {description}").as_str()); + } + if !item.sample_values.is_empty() { + line.push_str( + format!( + " | samples={}", + item.sample_values + .iter() + .map(value_summary) + .collect::>() + .join(", ") + ) + .as_str(), + ); + } + line + })); + detailed_tool_output( + &concise, + &items, + lines.join("\n"), + None, + FaultStage::Worker, + "tools/call:run.dimension.list", + ) +} + +fn json_created_output( + headline: &str, + structured: Value, + operation: &'static str, +) -> Result { + detailed_tool_output( + &structured, + &structured, + format!( + "{headline}\n{}", + crate::to_pretty_json(&structured).map_err(store_fault(operation))? + ), + None, + FaultStage::Worker, + operation, + ) +} + +fn project_schema_field_value(field: &ProjectFieldSpec) -> Value { let mut value = Map::new(); let _ = value.insert("name".to_owned(), json!(field.name)); if !field.node_classes.is_empty() { @@ -1023,21 +1519,12 @@ fn project_schema_field_value(field: &fidget_spinner_core::ProjectFieldSpec) -> ), ); } - let _ = value.insert( - "presence".to_owned(), - json!(format!("{:?}", field.presence).to_ascii_lowercase()), - ); - let _ = value.insert( - "severity".to_owned(), - json!(format!("{:?}", field.severity).to_ascii_lowercase()), - ); - let _ = value.insert( - "role".to_owned(), - json!(format!("{:?}", field.role).to_ascii_lowercase()), - ); + let _ = value.insert("presence".to_owned(), json!(field.presence.as_str())); + let _ = value.insert("severity".to_owned(), json!(field.severity.as_str())); + let _ = value.insert("role".to_owned(), json!(field.role.as_str())); let _ = value.insert( "inference_policy".to_owned(), - json!(format!("{:?}", field.inference_policy).to_ascii_lowercase()), + json!(field.inference_policy.as_str()), ); if let Some(value_type) = field.value_type { let _ = value.insert("value_type".to_owned(), json!(value_type.as_str())); @@ -1045,6 +1532,17 @@ fn project_schema_field_value(field: &fidget_spinner_core::ProjectFieldSpec) -> Value::Object(value) } +fn render_schema_node_classes(node_classes: &BTreeSet) -> String { + if node_classes.is_empty() { + return "any".to_owned(); + } + node_classes + .iter() + .map(ToString::to_string) + .collect::>() + .join(", ") +} + fn frontier_projection_summary_value(projection: &FrontierProjection) -> Value { json!({ "frontier_id": projection.frontier.id, @@ -1211,17 +1709,17 @@ fn diagnostic_tally(diagnostics: &fidget_spinner_core::NodeDiagnostics) -> Diagn .fold(DiagnosticTally::default(), |mut tally, item| { tally.total += 1; match item.severity { - fidget_spinner_core::DiagnosticSeverity::Error => tally.errors += 1, - fidget_spinner_core::DiagnosticSeverity::Warning => tally.warnings += 1, - fidget_spinner_core::DiagnosticSeverity::Info => tally.infos += 1, + DiagnosticSeverity::Error => tally.errors += 1, + DiagnosticSeverity::Warning => tally.warnings += 1, + DiagnosticSeverity::Info => tally.infos += 1, } tally }) } -fn payload_preview_value(fields: &Map) -> Value { +fn payload_preview_value(class: NodeClass, fields: &Map) -> Value { let mut preview = Map::new(); - for (index, (name, value)) in fields.iter().enumerate() { + for (index, (name, value)) in filtered_payload_fields(class, fields).enumerate() { if index == 6 { let _ = preview.insert( "...".to_owned(), @@ -1234,14 +1732,33 @@ fn payload_preview_value(fields: &Map) -> Value { Value::Object(preview) } -fn payload_preview_lines(fields: &Map) -> Vec { - if fields.is_empty() { +fn payload_preview_lines(class: NodeClass, fields: &Map) -> Vec { + let filtered = filtered_payload_fields(class, fields).collect::>(); + if filtered.is_empty() { return Vec::new(); } - let mut lines = vec![format!("payload fields: {}", fields.len())]; - for (index, (name, value)) in fields.iter().enumerate() { + if is_prose_node(class) { + let preview_names = filtered + .iter() + .take(6) + .map(|(name, _)| (*name).clone()) + .collect::>(); + let mut lines = vec![format!("payload fields: {}", preview_names.join(", "))]; + if filtered.len() > preview_names.len() { + lines.push(format!( + "payload fields: +{} more field(s)", + filtered.len() - preview_names.len() + )); + } + return lines; + } + let mut lines = vec![format!("payload fields: {}", filtered.len())]; + for (index, (name, value)) in filtered.iter().enumerate() { if index == 6 { - lines.push(format!("payload: +{} more field(s)", fields.len() - index)); + lines.push(format!( + "payload: +{} more field(s)", + filtered.len() - index + )); break; } lines.push(format!( @@ -1253,10 +1770,19 @@ fn payload_preview_lines(fields: &Map) -> Vec { lines } +fn filtered_payload_fields( + class: NodeClass, + fields: &Map, +) -> impl Iterator + '_ { + fields.iter().filter(move |(name, _)| { + !matches!(class, NodeClass::Note | NodeClass::Research) || name.as_str() != "body" + }) +} + fn payload_value_preview(value: &Value) -> Value { match value { Value::Null | Value::Bool(_) | Value::Number(_) => value.clone(), - Value::String(text) => Value::String(libmcp::collapse_inline_whitespace(text)), + Value::String(text) => Value::String(truncated_inline_preview(text, 96)), Value::Array(items) => { let preview = items .iter() @@ -1290,25 +1816,89 @@ fn payload_value_preview(value: &Value) -> Value { } } -fn metric_value(metric: &MetricObservation) -> Value { - json!({ - "key": metric.metric_key, +fn is_prose_node(class: NodeClass) -> bool { + matches!(class, NodeClass::Note | NodeClass::Research) +} + +fn truncated_inline_preview(text: &str, limit: usize) -> String { + let collapsed = libmcp::collapse_inline_whitespace(text); + let truncated = libmcp::render::truncate_chars(&collapsed, Some(limit)); + if truncated.truncated { + format!("{}...", truncated.text) + } else { + truncated.text + } +} + +fn metric_value(store: &ProjectStore, metric: &MetricValue) -> Result { + let definition = metric_definition(store, &metric.key)?; + Ok(json!({ + "key": metric.key, "value": metric.value, - "unit": format!("{:?}", metric.unit).to_ascii_lowercase(), - "objective": format!("{:?}", metric.objective).to_ascii_lowercase(), - }) + "unit": metric_unit_name(definition.unit), + "objective": metric_objective_name(definition.objective), + })) } -fn metric_text(metric: &MetricObservation) -> String { - format!( +fn metric_text(store: &ProjectStore, metric: &MetricValue) -> Result { + let definition = metric_definition(store, &metric.key)?; + Ok(format!( "{}={} {} ({})", - metric.metric_key, + metric.key, metric.value, - format!("{:?}", metric.unit).to_ascii_lowercase(), - format!("{:?}", metric.objective).to_ascii_lowercase(), + metric_unit_name(definition.unit), + metric_objective_name(definition.objective), + )) +} + +fn metric_unit_name(unit: MetricUnit) -> &'static str { + match unit { + MetricUnit::Seconds => "seconds", + MetricUnit::Bytes => "bytes", + MetricUnit::Count => "count", + MetricUnit::Ratio => "ratio", + MetricUnit::Custom => "custom", + } +} + +fn metric_objective_name(objective: fidget_spinner_core::OptimizationObjective) -> &'static str { + match objective { + fidget_spinner_core::OptimizationObjective::Minimize => "minimize", + fidget_spinner_core::OptimizationObjective::Maximize => "maximize", + fidget_spinner_core::OptimizationObjective::Target => "target", + } +} + +fn metric_verdict_name(verdict: FrontierVerdict) -> &'static str { + match verdict { + FrontierVerdict::PromoteToChampion => "promote_to_champion", + FrontierVerdict::KeepOnFrontier => "keep_on_frontier", + FrontierVerdict::RevertToChampion => "revert_to_champion", + FrontierVerdict::ArchiveDeadEnd => "archive_dead_end", + FrontierVerdict::NeedsMoreEvidence => "needs_more_evidence", + } +} + +fn run_dimensions_value(dimensions: &BTreeMap) -> Value { + Value::Object( + dimensions + .iter() + .map(|(key, value)| (key.to_string(), value.as_json())) + .collect::>(), ) } +fn render_dimension_kv(dimensions: &BTreeMap) -> String { + if dimensions.is_empty() { + return "none".to_owned(); + } + dimensions + .iter() + .map(|(key, value)| format!("{key}={}", value_summary(&value.as_json()))) + .collect::>() + .join(", ") +} + fn format_tags(tags: &BTreeSet) -> String { tags.iter() .map(ToString::to_string) @@ -1360,6 +1950,12 @@ fn classify_fault_kind(message: &str) -> FaultKind { || message.contains("empty") || message.contains("already exists") || message.contains("require an explicit tag list") + || message.contains("requires a non-empty summary") + || message.contains("requires a non-empty string payload field `body`") + || message.contains("requires an explicit order") + || message.contains("is ambiguous across sources") + || message.contains("has conflicting semantics") + || message.contains("conflicts with existing definition") { FaultKind::InvalidInput } else { @@ -1414,17 +2010,44 @@ fn metric_spec_from_wire(raw: WireMetricSpec) -> Result }) } -fn metric_observation_from_wire( - raw: WireMetricObservation, -) -> Result { - Ok(MetricObservation { - metric_key: NonEmptyText::new(raw.key)?, - unit: parse_metric_unit_name(&raw.unit)?, - objective: crate::parse_optimization_objective(&raw.objective)?, +fn metric_value_from_wire(raw: WireMetricValue) -> Result { + Ok(MetricValue { + key: NonEmptyText::new(raw.key)?, value: raw.value, }) } +fn metric_definition(store: &ProjectStore, key: &NonEmptyText) -> Result { + store + .list_metric_definitions() + .map_err(store_fault("tools/call:experiment.close"))? + .into_iter() + .find(|definition| definition.key == *key) + .map(|definition| MetricSpec { + metric_key: definition.key, + unit: definition.unit, + objective: definition.objective, + }) + .ok_or_else(|| { + FaultRecord::new( + FaultKind::InvalidInput, + FaultStage::Store, + "tools/call:experiment.close", + format!("metric `{key}` is not registered"), + ) + }) +} + +fn coerce_tool_dimensions( + store: &ProjectStore, + raw_dimensions: BTreeMap, + operation: &'static str, +) -> Result, FaultRecord> { + store + .coerce_run_dimensions(raw_dimensions) + .map_err(store_fault(operation)) +} + fn command_recipe_from_wire( raw: WireRunCommand, project_root: &Utf8Path, @@ -1465,6 +2088,91 @@ fn parse_metric_unit_name(raw: &str) -> Result { crate::parse_metric_unit(raw) } +fn parse_metric_source_name(raw: &str) -> Result { + match raw { + "run_metric" => Ok(MetricFieldSource::RunMetric), + "change_payload" => Ok(MetricFieldSource::ChangePayload), + "run_payload" => Ok(MetricFieldSource::RunPayload), + "analysis_payload" => Ok(MetricFieldSource::AnalysisPayload), + "decision_payload" => Ok(MetricFieldSource::DecisionPayload), + other => Err(StoreError::Json(serde_json::Error::io( + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("unknown metric source `{other}`"), + ), + ))), + } +} + +fn parse_metric_order_name(raw: &str) -> Result { + match raw { + "asc" => Ok(MetricRankOrder::Asc), + "desc" => Ok(MetricRankOrder::Desc), + other => Err(StoreError::Json(serde_json::Error::io( + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("unknown metric order `{other}`"), + ), + ))), + } +} + +fn parse_field_value_type_name(raw: &str) -> Result { + match raw { + "string" => Ok(FieldValueType::String), + "numeric" => Ok(FieldValueType::Numeric), + "boolean" => Ok(FieldValueType::Boolean), + "timestamp" => Ok(FieldValueType::Timestamp), + other => Err(crate::invalid_input(format!( + "unknown field value type `{other}`" + ))), + } +} + +fn parse_diagnostic_severity_name(raw: &str) -> Result { + match raw { + "error" => Ok(DiagnosticSeverity::Error), + "warning" => Ok(DiagnosticSeverity::Warning), + "info" => Ok(DiagnosticSeverity::Info), + other => Err(crate::invalid_input(format!( + "unknown diagnostic severity `{other}`" + ))), + } +} + +fn parse_field_presence_name(raw: &str) -> Result { + match raw { + "required" => Ok(FieldPresence::Required), + "recommended" => Ok(FieldPresence::Recommended), + "optional" => Ok(FieldPresence::Optional), + other => Err(crate::invalid_input(format!( + "unknown field presence `{other}`" + ))), + } +} + +fn parse_field_role_name(raw: &str) -> Result { + match raw { + "index" => Ok(FieldRole::Index), + "projection_gate" => Ok(FieldRole::ProjectionGate), + "render_only" => Ok(FieldRole::RenderOnly), + "opaque" => Ok(FieldRole::Opaque), + other => Err(crate::invalid_input(format!( + "unknown field role `{other}`" + ))), + } +} + +fn parse_inference_policy_name(raw: &str) -> Result { + match raw { + "manual_only" => Ok(InferencePolicy::ManualOnly), + "model_may_infer" => Ok(InferencePolicy::ModelMayInfer), + other => Err(crate::invalid_input(format!( + "unknown inference policy `{other}`" + ))), + } +} + fn parse_backend_name(raw: &str) -> Result { match raw { "local_process" => Ok(ExecutionBackend::LocalProcess), @@ -1574,6 +2282,7 @@ struct NodeArchiveToolArgs { struct QuickNoteToolArgs { frontier_id: Option, title: String, + summary: String, body: String, tags: Vec, #[serde(default)] @@ -1586,14 +2295,65 @@ struct QuickNoteToolArgs { struct ResearchRecordToolArgs { frontier_id: Option, title: String, - summary: Option, + summary: String, body: String, #[serde(default)] + tags: Vec, + #[serde(default)] annotations: Vec, #[serde(default)] parents: Vec, } +#[derive(Debug, Deserialize)] +struct SchemaFieldUpsertToolArgs { + name: String, + node_classes: Option>, + presence: String, + severity: String, + role: String, + inference_policy: String, + value_type: Option, +} + +#[derive(Debug, Deserialize)] +struct SchemaFieldRemoveToolArgs { + name: String, + node_classes: Option>, +} + +#[derive(Debug, Deserialize)] +struct MetricDefineToolArgs { + key: String, + unit: String, + objective: String, + description: Option, +} + +#[derive(Debug, Deserialize)] +struct RunDimensionDefineToolArgs { + key: String, + value_type: String, + description: Option, +} + +#[derive(Debug, Deserialize, Default)] +struct MetricKeysToolArgs { + frontier_id: Option, + source: Option, + dimensions: Option>, +} + +#[derive(Debug, Deserialize)] +struct MetricBestToolArgs { + key: String, + frontier_id: Option, + source: Option, + dimensions: Option>, + order: Option, + limit: Option, +} + #[derive(Debug, Deserialize)] struct ExperimentCloseToolArgs { frontier_id: String, @@ -1601,9 +2361,9 @@ struct ExperimentCloseToolArgs { change_node_id: String, candidate_summary: String, run: WireRun, - primary_metric: WireMetricObservation, + primary_metric: WireMetricValue, #[serde(default)] - supporting_metrics: Vec, + supporting_metrics: Vec, note: WireFrontierNote, verdict: String, decision_title: String, @@ -1627,10 +2387,8 @@ struct WireMetricSpec { } #[derive(Debug, Deserialize)] -struct WireMetricObservation { +struct WireMetricValue { key: String, - unit: String, - objective: String, value: f64, } @@ -1639,7 +2397,8 @@ struct WireRun { title: String, summary: Option, backend: String, - benchmark_suite: String, + #[serde(default)] + dimensions: BTreeMap, command: WireRunCommand, } diff --git a/crates/fidget-spinner-cli/src/mcp/telemetry.rs b/crates/fidget-spinner-cli/src/mcp/telemetry.rs index 7206f76..93fbe71 100644 --- a/crates/fidget-spinner-cli/src/mcp/telemetry.rs +++ b/crates/fidget-spinner-cli/src/mcp/telemetry.rs @@ -36,6 +36,7 @@ impl ServerTelemetry { pub fn record_success(&mut self, operation: &str, latency_ms: u128) { self.successes += 1; + self.last_fault = None; let entry = self.operations.entry(operation.to_owned()).or_default(); entry.successes += 1; entry.last_latency_ms = Some(latency_ms); diff --git a/crates/fidget-spinner-cli/tests/mcp_hardening.rs b/crates/fidget-spinner-cli/tests/mcp_hardening.rs index 6c81d7f..f076d74 100644 --- a/crates/fidget-spinner-cli/tests/mcp_hardening.rs +++ b/crates/fidget-spinner-cli/tests/mcp_hardening.rs @@ -20,7 +20,10 @@ use uuid as _; type TestResult = Result>; -fn must(result: Result, context: &str) -> TestResult { +fn must( + result: Result, + context: C, +) -> TestResult { result.map_err(|error| io::Error::other(format!("{context}: {error}")).into()) } @@ -54,6 +57,48 @@ fn init_project(root: &Utf8PathBuf) -> TestResult { Ok(()) } +fn run_command(root: &Utf8PathBuf, program: &str, args: &[&str]) -> TestResult { + let output = must( + Command::new(program) + .current_dir(root.as_std_path()) + .args(args) + .output(), + format!("{program} spawn"), + )?; + if !output.status.success() { + return Err(io::Error::other(format!( + "{program} {:?} failed: {}", + args, + String::from_utf8_lossy(&output.stderr) + )) + .into()); + } + Ok(String::from_utf8_lossy(&output.stdout).trim().to_owned()) +} + +fn run_git(root: &Utf8PathBuf, args: &[&str]) -> TestResult { + run_command(root, "git", args) +} + +fn init_git_project(root: &Utf8PathBuf) -> TestResult { + let _ = run_git(root, &["init", "-b", "main"])?; + let _ = run_git(root, &["config", "user.name", "main"])?; + let _ = run_git(root, &["config", "user.email", "main@swarm.moe"])?; + let _ = run_git(root, &["add", "-A"])?; + let _ = run_git(root, &["commit", "-m", "initial state"])?; + run_git(root, &["rev-parse", "HEAD"]) +} + +fn commit_project_state(root: &Utf8PathBuf, marker: &str, message: &str) -> TestResult { + must( + fs::write(root.join(marker).as_std_path(), message), + format!("write marker {marker}"), + )?; + let _ = run_git(root, &["add", "-A"])?; + let _ = run_git(root, &["commit", "-m", message])?; + run_git(root, &["rev-parse", "HEAD"]) +} + fn binary_path() -> PathBuf { PathBuf::from(env!("CARGO_BIN_EXE_fidget-spinner-cli")) } @@ -172,6 +217,10 @@ fn tool_text(response: &Value) -> Option<&str> { .and_then(|entry| entry["text"].as_str()) } +fn fault_message(response: &Value) -> Option<&str> { + response["result"]["structuredContent"]["message"].as_str() +} + #[test] fn cold_start_exposes_health_and_telemetry() -> TestResult { let project_root = temp_project_root("cold_start")?; @@ -361,6 +410,7 @@ fn side_effecting_request_is_not_replayed_after_worker_crash() -> TestResult { "research.record", json!({ "title": "should not duplicate", + "summary": "dedupe check", "body": "host crash before worker execution", }), )?; @@ -462,6 +512,57 @@ fn bind_rejects_nonempty_uninitialized_root() -> TestResult { Ok(()) } +#[test] +fn successful_bind_clears_stale_fault_from_health() -> TestResult { + let bad_root = temp_project_root("bind_fault_bad")?; + must( + fs::write(bad_root.join("README.txt").as_std_path(), "occupied"), + "seed bad bind root", + )?; + let good_root = temp_project_root("bind_fault_good")?; + init_project(&good_root)?; + + let mut harness = McpHarness::spawn(None, &[])?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + let failed_bind = harness.bind_project(301, &bad_root)?; + assert_eq!(failed_bind["result"]["isError"].as_bool(), Some(true)); + + let failed_health = harness.call_tool(302, "system.health", json!({ "detail": "full" }))?; + assert_eq!( + tool_content(&failed_health)["last_fault"]["operation"].as_str(), + Some("tools/call:project.bind") + ); + + let good_bind = harness.bind_project(303, &good_root)?; + assert_eq!(good_bind["result"]["isError"].as_bool(), Some(false)); + + let recovered_health = harness.call_tool(304, "system.health", json!({}))?; + assert_eq!(recovered_health["result"]["isError"].as_bool(), Some(false)); + assert!(tool_content(&recovered_health).get("last_fault").is_none()); + assert!(!must_some(tool_text(&recovered_health), "recovered health text")?.contains("fault:")); + + let recovered_health_full = + harness.call_tool(306, "system.health", json!({ "detail": "full" }))?; + assert_eq!( + tool_content(&recovered_health_full)["last_fault"], + Value::Null, + ); + + let recovered_telemetry = harness.call_tool(305, "system.telemetry", json!({}))?; + assert_eq!( + recovered_telemetry["result"]["isError"].as_bool(), + Some(false) + ); + assert_eq!( + tool_content(&recovered_telemetry)["errors"].as_u64(), + Some(1) + ); + assert!(tool_content(&recovered_telemetry)["last_fault"].is_null()); + Ok(()) +} + #[test] fn bind_retargets_writes_to_sibling_project_root() -> TestResult { let spinner_root = temp_project_root("spinner_root")?; @@ -502,6 +603,7 @@ fn bind_retargets_writes_to_sibling_project_root() -> TestResult { "note.quick", json!({ "title": "libgrid dogfood note", + "summary": "rebind summary", "body": "rebind should redirect writes", "tags": [], }), @@ -545,6 +647,7 @@ fn tag_registry_drives_note_creation_and_lookup() -> TestResult { "note.quick", json!({ "title": "untagged", + "summary": "should fail without explicit tags", "body": "should fail", }), )?; @@ -570,6 +673,7 @@ fn tag_registry_drives_note_creation_and_lookup() -> TestResult { "note.quick", json!({ "title": "tagged note", + "summary": "tagged lookup summary", "body": "tagged lookup should work", "tags": ["dogfood/mcp"], }), @@ -582,3 +686,937 @@ fn tag_registry_drives_note_creation_and_lookup() -> TestResult { assert_eq!(nodes[0]["tags"][0].as_str(), Some("dogfood/mcp")); Ok(()) } + +#[test] +fn research_record_accepts_tags_and_filtering() -> TestResult { + let project_root = temp_project_root("research_tags")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(None, &[])?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + let bind = harness.bind_project(451, &project_root)?; + assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); + + let tag = harness.call_tool( + 452, + "tag.add", + json!({ + "name": "campaign/libgrid", + "description": "libgrid migration campaign", + }), + )?; + assert_eq!(tag["result"]["isError"].as_bool(), Some(false)); + + let research = harness.call_tool( + 453, + "research.record", + json!({ + "title": "ingest tranche", + "summary": "Import the next libgrid tranche.", + "body": "Full import notes live here.", + "tags": ["campaign/libgrid"], + }), + )?; + assert_eq!(research["result"]["isError"].as_bool(), Some(false)); + + let filtered = harness.call_tool(454, "node.list", json!({"tags": ["campaign/libgrid"]}))?; + let nodes = must_some( + tool_content(&filtered).as_array(), + "filtered research nodes", + )?; + assert_eq!(nodes.len(), 1); + assert_eq!(nodes[0]["class"].as_str(), Some("research")); + assert_eq!(nodes[0]["tags"][0].as_str(), Some("campaign/libgrid")); + Ok(()) +} + +#[test] +fn prose_tools_reject_invalid_shapes_over_mcp() -> TestResult { + let project_root = temp_project_root("prose_invalid")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(None, &[])?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + let bind = harness.bind_project(46, &project_root)?; + assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); + + let missing_note_summary = harness.call_tool( + 47, + "note.quick", + json!({ + "title": "untagged", + "body": "body only", + "tags": [], + }), + )?; + assert_eq!( + missing_note_summary["result"]["isError"].as_bool(), + Some(true) + ); + assert!( + fault_message(&missing_note_summary) + .is_some_and(|message| message.contains("summary") || message.contains("missing field")) + ); + + let missing_research_summary = harness.call_tool( + 48, + "research.record", + json!({ + "title": "research only", + "body": "body only", + }), + )?; + assert_eq!( + missing_research_summary["result"]["isError"].as_bool(), + Some(true) + ); + assert!( + fault_message(&missing_research_summary) + .is_some_and(|message| message.contains("summary") || message.contains("missing field")) + ); + + let note_without_body = harness.call_tool( + 49, + "node.create", + json!({ + "class": "note", + "title": "missing body", + "summary": "triage layer", + "tags": [], + "payload": {}, + }), + )?; + assert_eq!(note_without_body["result"]["isError"].as_bool(), Some(true)); + assert!( + fault_message(¬e_without_body) + .is_some_and(|message| message.contains("payload field `body`")) + ); + + let research_without_summary = harness.call_tool( + 50, + "node.create", + json!({ + "class": "research", + "title": "missing summary", + "payload": { "body": "full research body" }, + }), + )?; + assert_eq!( + research_without_summary["result"]["isError"].as_bool(), + Some(true) + ); + assert!( + fault_message(&research_without_summary) + .is_some_and(|message| message.contains("non-empty summary")) + ); + Ok(()) +} + +#[test] +fn concise_note_reads_do_not_leak_body_text() -> TestResult { + let project_root = temp_project_root("concise_note_read")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(None, &[])?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + let bind = harness.bind_project(50, &project_root)?; + assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); + + let note = harness.call_tool( + 51, + "note.quick", + json!({ + "title": "tagged note", + "summary": "triage layer", + "body": "full note body should stay out of concise reads", + "tags": [], + }), + )?; + assert_eq!(note["result"]["isError"].as_bool(), Some(false)); + let node_id = must_some(tool_content(¬e)["id"].as_str(), "created note id")?.to_owned(); + + let concise = harness.call_tool(52, "node.read", json!({ "node_id": node_id }))?; + let concise_structured = tool_content(&concise); + assert_eq!(concise_structured["summary"].as_str(), Some("triage layer")); + assert!(concise_structured["payload_preview"].get("body").is_none()); + assert!( + !must_some(tool_text(&concise), "concise note.read text")? + .contains("full note body should stay out of concise reads") + ); + + let full = harness.call_tool( + 53, + "node.read", + json!({ "node_id": node_id, "detail": "full" }), + )?; + assert_eq!( + tool_content(&full)["payload"]["fields"]["body"].as_str(), + Some("full note body should stay out of concise reads") + ); + Ok(()) +} + +#[test] +fn concise_prose_reads_only_surface_payload_field_names() -> TestResult { + let project_root = temp_project_root("concise_prose_field_names")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(None, &[])?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + let bind = harness.bind_project(531, &project_root)?; + assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); + + let research = harness.call_tool( + 532, + "node.create", + json!({ + "class": "research", + "title": "rich import", + "summary": "triage layer only", + "payload": { + "body": "Body stays out of concise output.", + "source_excerpt": "This imported excerpt is intentionally long and should never reappear in concise node reads as a value preview.", + "verbatim_snippet": "Another long snippet that belongs in full payload inspection only, not in triage surfaces." + } + }), + )?; + assert_eq!(research["result"]["isError"].as_bool(), Some(false)); + let node_id = must_some( + tool_content(&research)["id"].as_str(), + "created research id", + )? + .to_owned(); + + let concise = harness.call_tool(533, "node.read", json!({ "node_id": node_id }))?; + let concise_structured = tool_content(&concise); + assert_eq!(concise_structured["payload_field_count"].as_u64(), Some(2)); + let payload_fields = must_some( + concise_structured["payload_fields"].as_array(), + "concise prose payload fields", + )?; + assert!( + payload_fields + .iter() + .any(|field| field.as_str() == Some("source_excerpt")) + ); + assert!(concise_structured.get("payload_preview").is_none()); + let concise_text = must_some(tool_text(&concise), "concise prose read text")?; + assert!(!concise_text.contains("This imported excerpt is intentionally long")); + assert!(concise_text.contains("payload fields: source_excerpt, verbatim_snippet")); + Ok(()) +} + +#[test] +fn node_list_does_not_enumerate_full_prose_bodies() -> TestResult { + let project_root = temp_project_root("node_list_no_body_leak")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(None, &[])?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + let bind = harness.bind_project(54, &project_root)?; + assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); + + let note = harness.call_tool( + 55, + "note.quick", + json!({ + "title": "tagged note", + "summary": "triage summary", + "body": "full note body should never appear in list-like surfaces", + "tags": [], + }), + )?; + assert_eq!(note["result"]["isError"].as_bool(), Some(false)); + + let listed = harness.call_tool(56, "node.list", json!({ "class": "note" }))?; + let listed_rows = must_some(tool_content(&listed).as_array(), "listed note rows")?; + assert_eq!(listed_rows.len(), 1); + assert_eq!(listed_rows[0]["summary"].as_str(), Some("triage summary")); + assert!(listed_rows[0].get("body").is_none()); + assert!( + !must_some(tool_text(&listed), "node.list text")? + .contains("full note body should never appear in list-like surfaces") + ); + Ok(()) +} + +#[test] +fn metric_tools_are_listed_for_discovery() -> TestResult { + let project_root = temp_project_root("metric_tool_list")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(Some(&project_root), &[])?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + let tools = harness.tools_list()?; + let names = must_some(tools["result"]["tools"].as_array(), "tool list")? + .iter() + .filter_map(|tool| tool["name"].as_str()) + .collect::>(); + assert!(names.contains(&"metric.define")); + assert!(names.contains(&"metric.keys")); + assert!(names.contains(&"metric.best")); + assert!(names.contains(&"metric.migrate")); + assert!(names.contains(&"run.dimension.define")); + assert!(names.contains(&"run.dimension.list")); + assert!(names.contains(&"schema.field.upsert")); + assert!(names.contains(&"schema.field.remove")); + Ok(()) +} + +#[test] +fn schema_field_tools_mutate_project_schema() -> TestResult { + let project_root = temp_project_root("schema_field_tools")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(Some(&project_root), &[])?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + let upsert = harness.call_tool( + 861, + "schema.field.upsert", + json!({ + "name": "scenario", + "node_classes": ["change", "analysis"], + "presence": "recommended", + "severity": "warning", + "role": "projection_gate", + "inference_policy": "manual_only", + "value_type": "string" + }), + )?; + assert_eq!(upsert["result"]["isError"].as_bool(), Some(false)); + assert_eq!( + tool_content(&upsert)["field"]["name"].as_str(), + Some("scenario") + ); + assert_eq!( + tool_content(&upsert)["field"]["node_classes"], + json!(["change", "analysis"]) + ); + + let schema = harness.call_tool(862, "project.schema", json!({ "detail": "full" }))?; + assert_eq!(schema["result"]["isError"].as_bool(), Some(false)); + let fields = must_some(tool_content(&schema)["fields"].as_array(), "schema fields")?; + assert!(fields.iter().any(|field| { + field["name"].as_str() == Some("scenario") && field["value_type"].as_str() == Some("string") + })); + + let remove = harness.call_tool( + 863, + "schema.field.remove", + json!({ + "name": "scenario", + "node_classes": ["change", "analysis"] + }), + )?; + assert_eq!(remove["result"]["isError"].as_bool(), Some(false)); + assert_eq!(tool_content(&remove)["removed_count"].as_u64(), Some(1)); + + let schema_after = harness.call_tool(864, "project.schema", json!({ "detail": "full" }))?; + let fields_after = must_some( + tool_content(&schema_after)["fields"].as_array(), + "schema fields after remove", + )?; + assert!( + !fields_after + .iter() + .any(|field| field["name"].as_str() == Some("scenario")) + ); + Ok(()) +} + +#[test] +fn bind_open_backfills_legacy_missing_summary() -> TestResult { + let project_root = temp_project_root("bind_backfill")?; + init_project(&project_root)?; + + let node_id = { + let mut store = must(ProjectStore::open(&project_root), "open project store")?; + let node = must( + store.add_node(fidget_spinner_store_sqlite::CreateNodeRequest { + class: fidget_spinner_core::NodeClass::Research, + frontier_id: None, + title: must(NonEmptyText::new("legacy research"), "legacy title")?, + summary: Some(must( + NonEmptyText::new("temporary summary"), + "temporary summary", + )?), + tags: None, + payload: fidget_spinner_core::NodePayload::with_schema( + store.schema().schema_ref(), + serde_json::from_value(json!({ + "body": "Derived summary first paragraph.\n\nLonger body follows." + })) + .map_err(|error| io::Error::other(format!("payload object: {error}")))?, + ), + annotations: Vec::new(), + attachments: Vec::new(), + }), + "create legacy research node", + )?; + node.id.to_string() + }; + + let database_path = project_root.join(".fidget_spinner").join("state.sqlite"); + let clear_output = must( + Command::new("sqlite3") + .current_dir(project_root.as_std_path()) + .arg(database_path.as_str()) + .arg(format!( + "UPDATE nodes SET summary = NULL WHERE id = '{node_id}';" + )) + .output(), + "spawn sqlite3 for direct summary clear", + )?; + if !clear_output.status.success() { + return Err(io::Error::other(format!( + "sqlite3 summary clear failed: {}", + String::from_utf8_lossy(&clear_output.stderr) + )) + .into()); + } + + let mut harness = McpHarness::spawn(None, &[])?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + let bind = harness.bind_project(60, &project_root)?; + assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); + + let read = harness.call_tool(61, "node.read", json!({ "node_id": node_id }))?; + assert_eq!(read["result"]["isError"].as_bool(), Some(false)); + assert_eq!( + tool_content(&read)["summary"].as_str(), + Some("Derived summary first paragraph.") + ); + + let listed = harness.call_tool(62, "node.list", json!({ "class": "research" }))?; + let items = must_some(tool_content(&listed).as_array(), "research node list")?; + assert_eq!(items.len(), 1); + assert_eq!( + items[0]["summary"].as_str(), + Some("Derived summary first paragraph.") + ); + Ok(()) +} + +#[test] +fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResult { + let project_root = temp_project_root("metric_rank_e2e")?; + init_project(&project_root)?; + let _initial_head = init_git_project(&project_root)?; + + let mut harness = McpHarness::spawn(Some(&project_root), &[])?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + let frontier = harness.call_tool( + 70, + "frontier.init", + json!({ + "label": "metric frontier", + "objective": "exercise metric ranking", + "contract_title": "metric contract", + "benchmark_suites": ["smoke"], + "promotion_criteria": ["rank by one key"], + "primary_metric": { + "key": "wall_clock_s", + "unit": "seconds", + "objective": "minimize" + } + }), + )?; + assert_eq!(frontier["result"]["isError"].as_bool(), Some(false)); + let frontier_id = must_some( + tool_content(&frontier)["frontier_id"].as_str(), + "frontier id", + )? + .to_owned(); + let base_checkpoint_id = must_some( + tool_content(&frontier)["champion_checkpoint_id"].as_str(), + "base checkpoint id", + )? + .to_owned(); + let metric_define = harness.call_tool( + 701, + "metric.define", + json!({ + "key": "wall_clock_s", + "unit": "seconds", + "objective": "minimize", + "description": "elapsed wall time" + }), + )?; + assert_eq!(metric_define["result"]["isError"].as_bool(), Some(false)); + + let scenario_dimension = harness.call_tool( + 702, + "run.dimension.define", + json!({ + "key": "scenario", + "value_type": "string", + "description": "workload family" + }), + )?; + assert_eq!( + scenario_dimension["result"]["isError"].as_bool(), + Some(false) + ); + + let duration_dimension = harness.call_tool( + 703, + "run.dimension.define", + json!({ + "key": "duration_s", + "value_type": "numeric", + "description": "time budget in seconds" + }), + )?; + assert_eq!( + duration_dimension["result"]["isError"].as_bool(), + Some(false) + ); + + let dimensions = harness.call_tool(704, "run.dimension.list", json!({}))?; + assert_eq!(dimensions["result"]["isError"].as_bool(), Some(false)); + let dimension_rows = must_some(tool_content(&dimensions).as_array(), "run dimension rows")?; + assert!(dimension_rows.iter().any(|row| { + row["key"].as_str() == Some("benchmark_suite") + && row["value_type"].as_str() == Some("string") + })); + assert!(dimension_rows.iter().any(|row| { + row["key"].as_str() == Some("scenario") + && row["description"].as_str() == Some("workload family") + })); + assert!(dimension_rows.iter().any(|row| { + row["key"].as_str() == Some("duration_s") && row["value_type"].as_str() == Some("numeric") + })); + + let first_change = harness.call_tool( + 71, + "node.create", + json!({ + "class": "change", + "frontier_id": frontier_id, + "title": "first change", + "summary": "first change summary", + "payload": { + "body": "first change body", + "wall_clock_s": 14.0 + } + }), + )?; + assert_eq!(first_change["result"]["isError"].as_bool(), Some(false)); + let first_change_id = must_some( + tool_content(&first_change)["id"].as_str(), + "first change id", + )?; + let _first_commit = commit_project_state(&project_root, "candidate-one.txt", "candidate one")?; + + let first_close = harness.call_tool( + 72, + "experiment.close", + json!({ + "frontier_id": frontier_id, + "base_checkpoint_id": base_checkpoint_id, + "change_node_id": first_change_id, + "candidate_summary": "candidate one", + "run": { + "title": "first run", + "summary": "first run summary", + "backend": "worktree_process", + "dimensions": { + "benchmark_suite": "smoke", + "scenario": "belt_4x5", + "duration_s": 20.0 + }, + "command": { + "working_directory": project_root.as_str(), + "argv": ["true"] + } + }, + "primary_metric": { + "key": "wall_clock_s", + "value": 10.0 + }, + "note": { + "summary": "first run note" + }, + "verdict": "keep_on_frontier", + "decision_title": "first decision", + "decision_rationale": "keep first candidate around" + }), + )?; + assert_eq!(first_close["result"]["isError"].as_bool(), Some(false)); + + let first_candidate_checkpoint_id = must_some( + tool_content(&first_close)["candidate_checkpoint_id"].as_str(), + "first candidate checkpoint id", + )? + .to_owned(); + + let second_change = harness.call_tool( + 73, + "node.create", + json!({ + "class": "change", + "frontier_id": frontier_id, + "title": "second change", + "summary": "second change summary", + "payload": { + "body": "second change body", + "wall_clock_s": 7.0 + } + }), + )?; + assert_eq!(second_change["result"]["isError"].as_bool(), Some(false)); + let second_change_id = must_some( + tool_content(&second_change)["id"].as_str(), + "second change id", + )?; + let second_commit = commit_project_state(&project_root, "candidate-two.txt", "candidate two")?; + + let second_close = harness.call_tool( + 74, + "experiment.close", + json!({ + "frontier_id": frontier_id, + "base_checkpoint_id": base_checkpoint_id, + "change_node_id": second_change_id, + "candidate_summary": "candidate two", + "run": { + "title": "second run", + "summary": "second run summary", + "backend": "worktree_process", + "dimensions": { + "benchmark_suite": "smoke", + "scenario": "belt_4x5", + "duration_s": 60.0 + }, + "command": { + "working_directory": project_root.as_str(), + "argv": ["true"] + } + }, + "primary_metric": { + "key": "wall_clock_s", + "value": 5.0 + }, + "note": { + "summary": "second run note" + }, + "verdict": "keep_on_frontier", + "decision_title": "second decision", + "decision_rationale": "second candidate looks stronger" + }), + )?; + assert_eq!(second_close["result"]["isError"].as_bool(), Some(false)); + let second_candidate_checkpoint_id = must_some( + tool_content(&second_close)["candidate_checkpoint_id"].as_str(), + "second candidate checkpoint id", + )? + .to_owned(); + + let second_frontier = harness.call_tool( + 80, + "frontier.init", + json!({ + "label": "metric frontier two", + "objective": "exercise frontier filtering", + "contract_title": "metric contract two", + "benchmark_suites": ["smoke"], + "promotion_criteria": ["frontier filters should isolate rankings"], + "primary_metric": { + "key": "wall_clock_s", + "unit": "seconds", + "objective": "minimize" + } + }), + )?; + assert_eq!(second_frontier["result"]["isError"].as_bool(), Some(false)); + let second_frontier_id = must_some( + tool_content(&second_frontier)["frontier_id"].as_str(), + "second frontier id", + )? + .to_owned(); + let second_base_checkpoint_id = must_some( + tool_content(&second_frontier)["champion_checkpoint_id"].as_str(), + "second frontier base checkpoint id", + )? + .to_owned(); + + let third_change = harness.call_tool( + 81, + "node.create", + json!({ + "class": "change", + "frontier_id": second_frontier_id, + "title": "third change", + "summary": "third change summary", + "payload": { + "body": "third change body", + "wall_clock_s": 3.0 + } + }), + )?; + assert_eq!(third_change["result"]["isError"].as_bool(), Some(false)); + let third_change_id = must_some( + tool_content(&third_change)["id"].as_str(), + "third change id", + )?; + let third_commit = + commit_project_state(&project_root, "candidate-three.txt", "candidate three")?; + + let third_close = harness.call_tool( + 82, + "experiment.close", + json!({ + "frontier_id": second_frontier_id, + "base_checkpoint_id": second_base_checkpoint_id, + "change_node_id": third_change_id, + "candidate_summary": "candidate three", + "run": { + "title": "third run", + "summary": "third run summary", + "backend": "worktree_process", + "dimensions": { + "benchmark_suite": "smoke", + "scenario": "belt_4x5_alt", + "duration_s": 60.0 + }, + "command": { + "working_directory": project_root.as_str(), + "argv": ["true"] + } + }, + "primary_metric": { + "key": "wall_clock_s", + "value": 3.0 + }, + "note": { + "summary": "third run note" + }, + "verdict": "keep_on_frontier", + "decision_title": "third decision", + "decision_rationale": "third candidate is best overall but not in the first frontier" + }), + )?; + assert_eq!(third_close["result"]["isError"].as_bool(), Some(false)); + let third_candidate_checkpoint_id = must_some( + tool_content(&third_close)["candidate_checkpoint_id"].as_str(), + "third candidate checkpoint id", + )? + .to_owned(); + + let keys = harness.call_tool(75, "metric.keys", json!({}))?; + assert_eq!(keys["result"]["isError"].as_bool(), Some(false)); + let key_rows = must_some(tool_content(&keys).as_array(), "metric keys array")?; + assert!(key_rows.iter().any(|row| { + row["key"].as_str() == Some("wall_clock_s") && row["source"].as_str() == Some("run_metric") + })); + assert!(key_rows.iter().any(|row| { + row["key"].as_str() == Some("wall_clock_s") + && row["source"].as_str() == Some("run_metric") + && row["description"].as_str() == Some("elapsed wall time") + && row["requires_order"].as_bool() == Some(false) + })); + assert!(key_rows.iter().any(|row| { + row["key"].as_str() == Some("wall_clock_s") + && row["source"].as_str() == Some("change_payload") + })); + + let filtered_keys = harness.call_tool( + 750, + "metric.keys", + json!({ + "source": "run_metric", + "dimensions": { + "scenario": "belt_4x5", + "duration_s": 60.0 + } + }), + )?; + assert_eq!(filtered_keys["result"]["isError"].as_bool(), Some(false)); + let filtered_key_rows = must_some( + tool_content(&filtered_keys).as_array(), + "filtered metric keys array", + )?; + assert_eq!(filtered_key_rows.len(), 1); + assert_eq!(filtered_key_rows[0]["key"].as_str(), Some("wall_clock_s")); + assert_eq!(filtered_key_rows[0]["experiment_count"].as_u64(), Some(1)); + + let ambiguous = harness.call_tool(76, "metric.best", json!({ "key": "wall_clock_s" }))?; + assert_eq!(ambiguous["result"]["isError"].as_bool(), Some(true)); + assert!( + fault_message(&ambiguous) + .is_some_and(|message| message.contains("ambiguous across sources")) + ); + + let run_metric_best = harness.call_tool( + 77, + "metric.best", + json!({ + "key": "wall_clock_s", + "source": "run_metric", + "dimensions": { + "scenario": "belt_4x5", + "duration_s": 60.0 + }, + "limit": 5 + }), + )?; + assert_eq!(run_metric_best["result"]["isError"].as_bool(), Some(false)); + let run_best_rows = must_some( + tool_content(&run_metric_best).as_array(), + "run metric best array", + )?; + assert_eq!(run_best_rows[0]["value"].as_f64(), Some(5.0)); + assert_eq!(run_best_rows.len(), 1); + assert_eq!( + run_best_rows[0]["candidate_checkpoint_id"].as_str(), + Some(second_candidate_checkpoint_id.as_str()) + ); + assert_eq!( + run_best_rows[0]["candidate_commit_hash"].as_str(), + Some(second_commit.as_str()) + ); + assert_eq!( + run_best_rows[0]["dimensions"]["scenario"].as_str(), + Some("belt_4x5") + ); + assert_eq!( + run_best_rows[0]["dimensions"]["duration_s"].as_f64(), + Some(60.0) + ); + assert!(must_some(tool_text(&run_metric_best), "run metric best text")?.contains("commit=")); + assert!(must_some(tool_text(&run_metric_best), "run metric best text")?.contains("dims:")); + + let payload_requires_order = harness.call_tool( + 78, + "metric.best", + json!({ + "key": "wall_clock_s", + "source": "change_payload" + }), + )?; + assert_eq!( + payload_requires_order["result"]["isError"].as_bool(), + Some(true) + ); + assert!( + fault_message(&payload_requires_order) + .is_some_and(|message| message.contains("explicit order")) + ); + + let payload_best = harness.call_tool( + 79, + "metric.best", + json!({ + "key": "wall_clock_s", + "source": "change_payload", + "dimensions": { + "scenario": "belt_4x5", + "duration_s": 60.0 + }, + "order": "asc" + }), + )?; + assert_eq!(payload_best["result"]["isError"].as_bool(), Some(false)); + let payload_best_rows = must_some( + tool_content(&payload_best).as_array(), + "payload metric best array", + )?; + assert_eq!(payload_best_rows[0]["value"].as_f64(), Some(7.0)); + assert_eq!(payload_best_rows.len(), 1); + assert_eq!( + payload_best_rows[0]["candidate_checkpoint_id"].as_str(), + Some(second_candidate_checkpoint_id.as_str()) + ); + assert_eq!( + payload_best_rows[0]["candidate_commit_hash"].as_str(), + Some(second_commit.as_str()) + ); + + let filtered_best = harness.call_tool( + 83, + "metric.best", + json!({ + "key": "wall_clock_s", + "source": "run_metric", + "frontier_id": frontier_id, + "dimensions": { + "scenario": "belt_4x5" + }, + "limit": 5 + }), + )?; + assert_eq!(filtered_best["result"]["isError"].as_bool(), Some(false)); + let filtered_rows = must_some( + tool_content(&filtered_best).as_array(), + "filtered metric best array", + )?; + assert_eq!(filtered_rows.len(), 2); + assert_eq!( + filtered_rows[0]["candidate_checkpoint_id"].as_str(), + Some(second_candidate_checkpoint_id.as_str()) + ); + assert!( + filtered_rows + .iter() + .all(|row| row["frontier_id"].as_str() == Some(frontier_id.as_str())) + ); + + let global_best = harness.call_tool( + 84, + "metric.best", + json!({ + "key": "wall_clock_s", + "source": "run_metric", + "limit": 5 + }), + )?; + assert_eq!(global_best["result"]["isError"].as_bool(), Some(false)); + let global_rows = must_some( + tool_content(&global_best).as_array(), + "global metric best array", + )?; + assert_eq!( + global_rows[0]["candidate_checkpoint_id"].as_str(), + Some(third_candidate_checkpoint_id.as_str()) + ); + assert_eq!( + global_rows[0]["candidate_commit_hash"].as_str(), + Some(third_commit.as_str()) + ); + + let migrate = harness.call_tool(85, "metric.migrate", json!({}))?; + assert_eq!(migrate["result"]["isError"].as_bool(), Some(false)); + assert_eq!( + tool_content(&migrate)["inserted_metric_definitions"].as_u64(), + Some(0) + ); + assert_eq!( + tool_content(&migrate)["inserted_dimension_definitions"].as_u64(), + Some(0) + ); + assert_eq!( + tool_content(&migrate)["inserted_dimension_values"].as_u64(), + Some(0) + ); + + assert_ne!( + first_candidate_checkpoint_id, + second_candidate_checkpoint_id + ); + assert_ne!( + second_candidate_checkpoint_id, + third_candidate_checkpoint_id + ); + Ok(()) +} -- cgit v1.2.3