From e15fd4966e21bd8d31dbf580ede8a309c994816d Mon Sep 17 00:00:00 2001 From: main Date: Sat, 21 Mar 2026 19:19:49 -0400 Subject: Sharpen frontier grounding and experiment comparators --- crates/fidget-spinner-cli/src/main.rs | 109 +++++++++- crates/fidget-spinner-cli/src/mcp/catalog.rs | 55 ++++- crates/fidget-spinner-cli/src/mcp/projection.rs | 67 ++++++- crates/fidget-spinner-cli/src/mcp/service.rs | 241 ++++++++++++++++++---- crates/fidget-spinner-cli/src/ui.rs | 126 ++++++++++-- crates/fidget-spinner-cli/tests/mcp_hardening.rs | 243 +++++++++++++++++++++++ 6 files changed, 759 insertions(+), 82 deletions(-) (limited to 'crates/fidget-spinner-cli') diff --git a/crates/fidget-spinner-cli/src/main.rs b/crates/fidget-spinner-cli/src/main.rs index 7482794..63a5180 100644 --- a/crates/fidget-spinner-cli/src/main.rs +++ b/crates/fidget-spinner-cli/src/main.rs @@ -21,8 +21,8 @@ use fidget_spinner_store_sqlite::{ ExperimentOutcomePatch, FrontierRoadmapItemDraft, ListArtifactsQuery, ListExperimentsQuery, ListHypothesesQuery, MetricBestQuery, MetricKeysQuery, MetricRankOrder, MetricScope, OpenExperimentRequest, ProjectStore, STORE_DIR_NAME, StoreError, TextPatch, - UpdateArtifactRequest, UpdateExperimentRequest, UpdateFrontierBriefRequest, - UpdateHypothesisRequest, VertexSelector, + UpdateArtifactRequest, UpdateExperimentRequest, UpdateFrontierRequest, UpdateHypothesisRequest, + VertexSelector, }; #[cfg(test)] use libmcp_testkit as _; @@ -126,7 +126,7 @@ enum FrontierCommand { List(ProjectArg), Read(FrontierSelectorArgs), Open(FrontierSelectorArgs), - UpdateBrief(FrontierBriefUpdateArgs), + Update(FrontierUpdateArgs), History(FrontierSelectorArgs), } @@ -146,6 +146,7 @@ enum ExperimentCommand { Read(ExperimentSelectorArgs), Update(ExperimentUpdateArgs), Close(ExperimentCloseArgs), + Nearest(ExperimentNearestArgs), History(ExperimentSelectorArgs), } @@ -226,21 +227,55 @@ struct FrontierSelectorArgs { } #[derive(Args)] -struct FrontierBriefUpdateArgs { +struct FrontierUpdateArgs { #[command(flatten)] project: ProjectArg, #[arg(long)] frontier: String, #[arg(long)] expected_revision: Option, + #[arg(long)] + objective: Option, + #[command(flatten)] + situation: FrontierSituationPatchArgs, + #[command(flatten)] + unknowns: FrontierUnknownsPatchArgs, + #[command(flatten)] + roadmap: FrontierRoadmapPatchArgs, + #[command(flatten)] + scoreboard: FrontierScoreboardPatchArgs, +} + +#[derive(Args)] +struct FrontierSituationPatchArgs { #[arg(long)] situation: Option, #[arg(long)] clear_situation: bool, +} + +#[derive(Args)] +struct FrontierUnknownsPatchArgs { #[arg(long = "unknown")] unknowns: Vec, + #[arg(long = "clear-unknowns")] + clear_unknowns: bool, +} + +#[derive(Args)] +struct FrontierRoadmapPatchArgs { #[arg(long = "roadmap")] roadmap: Vec, + #[arg(long = "clear-roadmap")] + clear_roadmap: bool, +} + +#[derive(Args)] +struct FrontierScoreboardPatchArgs { + #[arg(long = "scoreboard-metric")] + scoreboard_metric_keys: Vec, + #[arg(long = "clear-scoreboard")] + clear_scoreboard_metric_keys: bool, } #[derive(Args)] @@ -417,6 +452,26 @@ struct ExperimentCloseArgs { analysis_body: Option, } +#[derive(Args)] +struct ExperimentNearestArgs { + #[command(flatten)] + project: ProjectArg, + #[arg(long)] + frontier: Option, + #[arg(long)] + hypothesis: Option, + #[arg(long)] + experiment: Option, + #[arg(long)] + metric: Option, + #[arg(long = "dimension")] + dimensions: Vec, + #[arg(long = "tag")] + tags: Vec, + #[arg(long, value_enum)] + order: Option, +} + #[derive(Args)] struct ArtifactRecordArgs { #[command(flatten)] @@ -599,6 +654,7 @@ enum CliMetricVisibility { #[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] enum CliMetricScope { Live, + Scoreboard, Visible, All, } @@ -679,7 +735,7 @@ fn main() -> Result<(), StoreError> { FrontierCommand::Open(args) => { print_json(&open_store(&args.project.project)?.frontier_open(&args.frontier)?) } - FrontierCommand::UpdateBrief(args) => run_frontier_brief_update(args), + FrontierCommand::Update(args) => run_frontier_update(args), FrontierCommand::History(args) => { print_json(&open_store(&args.project.project)?.frontier_history(&args.frontier)?) } @@ -703,6 +759,7 @@ fn main() -> Result<(), StoreError> { } ExperimentCommand::Update(args) => run_experiment_update(args), ExperimentCommand::Close(args) => run_experiment_close(args), + ExperimentCommand::Nearest(args) => run_experiment_nearest(args), ExperimentCommand::History(args) => print_json( &open_store(&args.project.project)?.experiment_history(&args.experiment)?, ), @@ -776,29 +833,43 @@ fn run_frontier_create(args: FrontierCreateArgs) -> Result<(), StoreError> { })?) } -fn run_frontier_brief_update(args: FrontierBriefUpdateArgs) -> Result<(), StoreError> { +fn run_frontier_update(args: FrontierUpdateArgs) -> Result<(), StoreError> { let mut store = open_store(&args.project.project)?; - let roadmap = if args.roadmap.is_empty() { + let roadmap = if args.roadmap.clear_roadmap { + Some(Vec::new()) + } else if args.roadmap.roadmap.is_empty() { None } else { Some( args.roadmap + .roadmap .into_iter() .map(parse_roadmap_item) .collect::, _>>()?, ) }; - let unknowns = if args.unknowns.is_empty() { + let unknowns = if args.unknowns.clear_unknowns { + Some(Vec::new()) + } else if args.unknowns.unknowns.is_empty() { None } else { - Some(to_non_empty_texts(args.unknowns)?) + Some(to_non_empty_texts(args.unknowns.unknowns)?) }; - print_json(&store.update_frontier_brief(UpdateFrontierBriefRequest { + let scoreboard_metric_keys = if args.scoreboard.clear_scoreboard_metric_keys { + Some(Vec::new()) + } else if args.scoreboard.scoreboard_metric_keys.is_empty() { + None + } else { + Some(to_non_empty_texts(args.scoreboard.scoreboard_metric_keys)?) + }; + print_json(&store.update_frontier(UpdateFrontierRequest { frontier: args.frontier, expected_revision: args.expected_revision, - situation: cli_text_patch(args.situation, args.clear_situation)?, + objective: args.objective.map(NonEmptyText::new).transpose()?, + situation: cli_text_patch(args.situation.situation, args.situation.clear_situation)?, roadmap, unknowns, + scoreboard_metric_keys, })?) } @@ -935,6 +1006,21 @@ fn run_experiment_close(args: ExperimentCloseArgs) -> Result<(), StoreError> { ) } +fn run_experiment_nearest(args: ExperimentNearestArgs) -> Result<(), StoreError> { + let store = open_store(&args.project.project)?; + print_json( + &store.experiment_nearest(fidget_spinner_store_sqlite::ExperimentNearestQuery { + frontier: args.frontier, + hypothesis: args.hypothesis, + experiment: args.experiment, + metric: args.metric.map(NonEmptyText::new).transpose()?, + dimensions: parse_dimension_assignments(args.dimensions)?, + tags: parse_tag_set(args.tags)?, + order: args.order.map(Into::into), + })?, + ) +} + fn run_artifact_record(args: ArtifactRecordArgs) -> Result<(), StoreError> { let mut store = open_store(&args.project.project)?; print_json(&store.create_artifact(CreateArtifactRequest { @@ -1414,6 +1500,7 @@ impl From for MetricScope { fn from(value: CliMetricScope) -> Self { match value { CliMetricScope::Live => Self::Live, + CliMetricScope::Scoreboard => Self::Scoreboard, CliMetricScope::Visible => Self::Visible, CliMetricScope::All => Self::All, } diff --git a/crates/fidget-spinner-cli/src/mcp/catalog.rs b/crates/fidget-spinner-cli/src/mcp/catalog.rs index d6c8171..e741e09 100644 --- a/crates/fidget-spinner-cli/src/mcp/catalog.rs +++ b/crates/fidget-spinner-cli/src/mcp/catalog.rs @@ -96,8 +96,8 @@ const TOOL_SPECS: &[ToolSpec] = &[ replay: ReplayContract::Convergent, }, ToolSpec { - name: "frontier.brief.update", - description: "Replace or patch the singleton frontier brief.", + name: "frontier.update", + description: "Patch frontier objective and grounding state.", dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }, @@ -167,6 +167,12 @@ const TOOL_SPECS: &[ToolSpec] = &[ dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }, + ToolSpec { + name: "experiment.nearest", + description: "Find the nearest accepted, kept, rejected, and champion comparators for one slice.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::Convergent, + }, ToolSpec { name: "experiment.history", description: "Read the revision history for one experiment.", @@ -353,13 +359,17 @@ fn tool_input_schema(name: &str) -> Value { &[("frontier", selector_schema("Frontier UUID or slug."))], &["frontier"], ), - "frontier.brief.update" => object_schema( + "frontier.update" => object_schema( &[ ("frontier", selector_schema("Frontier UUID or slug.")), ( "expected_revision", integer_schema("Optimistic concurrency guard."), ), + ( + "objective", + string_schema("Optional replacement frontier objective."), + ), ( "situation", nullable_string_schema("Optional frontier situation text."), @@ -369,6 +379,10 @@ fn tool_input_schema(name: &str) -> Value { "unknowns", string_array_schema("Ordered frontier unknowns."), ), + ( + "scoreboard_metric_keys", + string_array_schema("Ordered frontier scoreboard metric keys."), + ), ], &["frontier"], ), @@ -517,6 +531,36 @@ fn tool_input_schema(name: &str) -> Value { "rationale", ], ), + "experiment.nearest" => object_schema( + &[ + ( + "frontier", + selector_schema("Optional frontier UUID or slug."), + ), + ( + "hypothesis", + selector_schema("Optional hypothesis UUID or slug."), + ), + ( + "experiment", + selector_schema("Optional experiment UUID or slug used as an anchor."), + ), + ( + "metric", + string_schema("Optional metric key used to choose the champion."), + ), + ("dimensions", run_dimensions_schema()), + ("tags", string_array_schema("Require all listed tags.")), + ( + "order", + enum_string_schema( + &["asc", "desc"], + "Optional explicit champion ranking direction.", + ), + ), + ], + &[], + ), "artifact.record" => object_schema( &[ ( @@ -631,7 +675,10 @@ fn tool_input_schema(name: &str) -> Value { ), ( "scope", - enum_string_schema(&["live", "visible", "all"], "Registry slice to enumerate."), + enum_string_schema( + &["live", "scoreboard", "visible", "all"], + "Registry slice to enumerate.", + ), ), ], &[], diff --git a/crates/fidget-spinner-cli/src/mcp/projection.rs b/crates/fidget-spinner-cli/src/mcp/projection.rs index a36e915..c93d3ec 100644 --- a/crates/fidget-spinner-cli/src/mcp/projection.rs +++ b/crates/fidget-spinner-cli/src/mcp/projection.rs @@ -6,10 +6,10 @@ use fidget_spinner_core::{ RunDimensionValue, TagRecord, }; use fidget_spinner_store_sqlite::{ - ArtifactDetail, ArtifactSummary, EntityHistoryEntry, ExperimentDetail, ExperimentSummary, - FrontierOpenProjection, FrontierSummary, HypothesisCurrentState, HypothesisDetail, - MetricBestEntry, MetricKeySummary, MetricObservationSummary, ProjectStore, StoreError, - VertexSummary, + ArtifactDetail, ArtifactSummary, EntityHistoryEntry, ExperimentDetail, ExperimentNearestHit, + ExperimentNearestResult, ExperimentSummary, FrontierOpenProjection, FrontierSummary, + HypothesisCurrentState, HypothesisDetail, MetricBestEntry, MetricKeySummary, + MetricObservationSummary, ProjectStore, StoreError, VertexSummary, }; use libmcp::{ ProjectionError, SelectorProjection, StructuredProjection, SurfaceKind, SurfacePolicy, @@ -56,6 +56,7 @@ pub(crate) struct FrontierBriefProjection { pub(crate) situation: Option, pub(crate) roadmap: Vec, pub(crate) unknowns: Vec, + pub(crate) scoreboard_metric_keys: Vec, pub(crate) revision: u64, #[serde(skip_serializing_if = "Option::is_none")] pub(crate) updated_at: Option, @@ -106,6 +107,7 @@ pub(crate) struct FrontierListOutput { pub(crate) struct FrontierOpenOutput { pub(crate) frontier: FrontierOpenFrontierProjection, pub(crate) active_tags: Vec, + pub(crate) scoreboard_metrics: Vec, pub(crate) active_metric_keys: Vec, pub(crate) active_hypotheses: Vec, pub(crate) open_experiments: Vec, @@ -518,6 +520,32 @@ pub(crate) struct MetricBestOutput { pub(crate) entries: Vec, } +#[derive(Clone, Serialize)] +pub(crate) struct ExperimentNearestHitProjection { + pub(crate) experiment: ExperimentSummaryProjection, + pub(crate) hypothesis: HypothesisSummaryProjection, + pub(crate) dimensions: BTreeMap, + pub(crate) reasons: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) metric_value: Option, +} + +#[derive(Clone, Serialize, libmcp::ToolProjection)] +#[libmcp(kind = "read")] +pub(crate) struct ExperimentNearestOutput { + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) metric: Option, + pub(crate) target_dimensions: BTreeMap, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) accepted: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) kept: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) rejected: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) champion: Option, +} + #[derive(Clone, Serialize)] pub(crate) struct TagRecordProjection { pub(crate) name: String, @@ -649,6 +677,11 @@ pub(crate) fn frontier_open(projection: &FrontierOpenProjection) -> FrontierOpen .iter() .map(ToString::to_string) .collect(), + scoreboard_metrics: projection + .scoreboard_metric_keys + .iter() + .map(metric_key_summary) + .collect(), active_metric_keys: projection .active_metric_keys .iter() @@ -863,6 +896,17 @@ pub(crate) fn metric_best(entries: &[MetricBestEntry]) -> MetricBestOutput { } } +pub(crate) fn experiment_nearest(result: &ExperimentNearestResult) -> ExperimentNearestOutput { + ExperimentNearestOutput { + metric: result.metric.as_ref().map(metric_key_summary), + target_dimensions: dimension_map(&result.target_dimensions), + accepted: result.accepted.as_ref().map(experiment_nearest_hit), + kept: result.kept.as_ref().map(experiment_nearest_hit), + rejected: result.rejected.as_ref().map(experiment_nearest_hit), + champion: result.champion.as_ref().map(experiment_nearest_hit), + } +} + pub(crate) fn tag_record(tag: &TagRecord) -> TagRecordOutput { TagRecordOutput { record: tag_record_projection(tag), @@ -963,6 +1007,11 @@ fn frontier_brief_projection( situation: brief.situation.as_ref().map(ToString::to_string), roadmap, unknowns: brief.unknowns.iter().map(ToString::to_string).collect(), + scoreboard_metric_keys: brief + .scoreboard_metric_keys + .iter() + .map(ToString::to_string) + .collect(), revision: brief.revision, updated_at: brief.updated_at.map(timestamp_value), } @@ -1142,6 +1191,16 @@ fn metric_best_entry(entry: &MetricBestEntry) -> MetricBestEntryProjection { } } +fn experiment_nearest_hit(hit: &ExperimentNearestHit) -> ExperimentNearestHitProjection { + ExperimentNearestHitProjection { + experiment: experiment_summary(&hit.experiment), + hypothesis: hypothesis_summary(&hit.hypothesis), + dimensions: dimension_map(&hit.dimensions), + reasons: hit.reasons.iter().map(ToString::to_string).collect(), + metric_value: hit.metric_value.as_ref().map(metric_observation_summary), + } +} + fn metric_observation_summary( metric: &MetricObservationSummary, ) -> MetricObservationSummaryProjection { diff --git a/crates/fidget-spinner-cli/src/mcp/service.rs b/crates/fidget-spinner-cli/src/mcp/service.rs index 7c649aa..70f4751 100644 --- a/crates/fidget-spinner-cli/src/mcp/service.rs +++ b/crates/fidget-spinner-cli/src/mcp/service.rs @@ -14,11 +14,11 @@ use fidget_spinner_core::{ use fidget_spinner_store_sqlite::{ AttachmentSelector, CloseExperimentRequest, CreateArtifactRequest, CreateFrontierRequest, CreateHypothesisRequest, DefineMetricRequest, DefineRunDimensionRequest, EntityHistoryEntry, - ExperimentOutcomePatch, FrontierOpenProjection, FrontierRoadmapItemDraft, FrontierSummary, - ListArtifactsQuery, ListExperimentsQuery, ListHypothesesQuery, MetricBestEntry, - MetricBestQuery, MetricKeySummary, MetricKeysQuery, MetricRankOrder, MetricScope, - OpenExperimentRequest, ProjectStatus, ProjectStore, StoreError, TextPatch, - UpdateArtifactRequest, UpdateExperimentRequest, UpdateFrontierBriefRequest, + ExperimentNearestQuery, ExperimentOutcomePatch, FrontierOpenProjection, + FrontierRoadmapItemDraft, FrontierSummary, ListArtifactsQuery, ListExperimentsQuery, + ListHypothesesQuery, MetricBestEntry, MetricBestQuery, MetricKeySummary, MetricKeysQuery, + MetricRankOrder, MetricScope, OpenExperimentRequest, ProjectStatus, ProjectStore, StoreError, + TextPatch, UpdateArtifactRequest, UpdateExperimentRequest, UpdateFrontierRequest, UpdateHypothesisRequest, VertexSelector, VertexSummary, }; use serde::Deserialize; @@ -137,44 +137,58 @@ impl WorkerService { let args = deserialize::(arguments)?; frontier_open_output(&lift!(self.store.frontier_open(&args.frontier)), &operation)? } - "frontier.brief.update" => { - let args = deserialize::(arguments)?; + "frontier.update" => { + let args = deserialize::(arguments)?; let frontier = lift!( - self.store - .update_frontier_brief(UpdateFrontierBriefRequest { - frontier: args.frontier, - expected_revision: args.expected_revision, - situation: nullable_text_patch_from_wire(args.situation, &operation)?, - roadmap: args - .roadmap - .map(|items| { - items - .into_iter() - .map(|item| { - Ok(FrontierRoadmapItemDraft { - rank: item.rank, - hypothesis: item.hypothesis, - summary: item - .summary - .map(NonEmptyText::new) - .transpose() - .map_err(store_fault(&operation))?, - }) + self.store.update_frontier(UpdateFrontierRequest { + frontier: args.frontier, + expected_revision: args.expected_revision, + objective: args + .objective + .map(NonEmptyText::new) + .transpose() + .map_err(store_fault(&operation))?, + situation: nullable_text_patch_from_wire(args.situation, &operation)?, + roadmap: args + .roadmap + .map(|items| { + items + .into_iter() + .map(|item| { + Ok(FrontierRoadmapItemDraft { + rank: item.rank, + hypothesis: item.hypothesis, + summary: item + .summary + .map(NonEmptyText::new) + .transpose() + .map_err(store_fault(&operation))?, }) - .collect::, FaultRecord>>() - }) - .transpose()?, - unknowns: args - .unknowns - .map(|items| { - items - .into_iter() - .map(NonEmptyText::new) - .collect::, _>>() - .map_err(store_fault(&operation)) - }) - .transpose()?, - }) + }) + .collect::, FaultRecord>>() + }) + .transpose()?, + unknowns: args + .unknowns + .map(|items| { + items + .into_iter() + .map(NonEmptyText::new) + .collect::, _>>() + .map_err(store_fault(&operation)) + }) + .transpose()?, + scoreboard_metric_keys: args + .scoreboard_metric_keys + .map(|items| { + items + .into_iter() + .map(NonEmptyText::new) + .collect::, _>>() + .map_err(store_fault(&operation)) + }) + .transpose()?, + }) ); frontier_record_output(&self.store, &frontier, &operation)? } @@ -366,6 +380,32 @@ impl WorkerService { ); experiment_record_output(&experiment, &operation)? } + "experiment.nearest" => { + let args = deserialize::(arguments)?; + experiment_nearest_output( + &lift!( + self.store.experiment_nearest(ExperimentNearestQuery { + frontier: args.frontier, + hypothesis: args.hypothesis, + experiment: args.experiment, + metric: args + .metric + .map(NonEmptyText::new) + .transpose() + .map_err(store_fault(&operation))?, + dimensions: dimension_map_from_wire(args.dimensions)?, + tags: args + .tags + .map(tags_to_set) + .transpose() + .map_err(store_fault(&operation))? + .unwrap_or_default(), + order: args.order, + }) + ), + &operation, + )? + } "experiment.history" => { let args = deserialize::(arguments)?; history_output( @@ -583,12 +623,14 @@ struct FrontierSelectorArgs { } #[derive(Debug, Deserialize)] -struct FrontierBriefUpdateArgs { +struct FrontierUpdateArgs { frontier: String, expected_revision: Option, + objective: Option, situation: Option, roadmap: Option>, unknowns: Option>, + scoreboard_metric_keys: Option>, } #[derive(Debug, Deserialize)] @@ -685,6 +727,17 @@ struct ExperimentCloseArgs { analysis: Option, } +#[derive(Debug, Deserialize)] +struct ExperimentNearestArgs { + frontier: Option, + hypothesis: Option, + experiment: Option, + metric: Option, + dimensions: Option>, + tags: Option>, + order: Option, +} + #[derive(Debug, Deserialize)] struct ExperimentOutcomeWire { backend: ExecutionBackend, @@ -835,6 +888,7 @@ where | StoreError::UnknownRoadmapHypothesis(_) | StoreError::ManualExperimentRequiresCommand | StoreError::MetricOrderRequired { .. } + | StoreError::MetricScopeRequiresFrontier { .. } | StoreError::UnknownDimensionFilter(_) | StoreError::DuplicateTag(_) | StoreError::DuplicateMetricDefinition(_) @@ -1005,6 +1059,14 @@ fn json_value_to_dimension(value: Value) -> Result String { + match value { + RunDimensionValue::String(value) | RunDimensionValue::Timestamp(value) => value.to_string(), + RunDimensionValue::Numeric(value) => value.to_string(), + RunDimensionValue::Boolean(value) => value.to_string(), + } +} + fn project_status_output( status: &ProjectStatus, operation: &str, @@ -1144,6 +1206,18 @@ fn frontier_record_output( .join("; ") )); } + if !frontier.brief.scoreboard_metric_keys.is_empty() { + lines.push(format!( + "scoreboard metrics: {}", + frontier + .brief + .scoreboard_metric_keys + .iter() + .map(ToString::to_string) + .collect::>() + .join(", ") + )); + } projected_tool_output( &projection, lines.join("\n"), @@ -1187,6 +1261,17 @@ fn frontier_open_output( .join(", ") )); } + if !projection.scoreboard_metric_keys.is_empty() { + lines.push(format!( + "scoreboard metrics: {}", + projection + .scoreboard_metric_keys + .iter() + .map(|metric| metric.key.to_string()) + .collect::>() + .join(", ") + )); + } if !projection.active_hypotheses.is_empty() { lines.push("active hypotheses:".to_owned()); for state in &projection.active_hypotheses { @@ -1567,6 +1652,71 @@ fn metric_best_output( ) } +fn experiment_nearest_output( + result: &fidget_spinner_store_sqlite::ExperimentNearestResult, + operation: &str, +) -> Result { + let projection = projection::experiment_nearest(result); + let mut lines = Vec::new(); + if !result.target_dimensions.is_empty() { + lines.push(format!( + "target slice: {}", + result + .target_dimensions + .iter() + .map(|(key, value)| format!("{key}={}", run_dimension_value_text(value))) + .collect::>() + .join(", ") + )); + } + if let Some(metric) = result.metric.as_ref() { + lines.push(format!( + "champion metric: {} [{} {}]", + metric.key, + metric.unit.as_str(), + metric.objective.as_str() + )); + } + for (label, hit) in [ + ("accepted", result.accepted.as_ref()), + ("kept", result.kept.as_ref()), + ("rejected", result.rejected.as_ref()), + ("champion", result.champion.as_ref()), + ] { + if let Some(hit) = hit { + let suffix = hit + .metric_value + .as_ref() + .map_or_else(String::new, |metric| { + format!(" | {}={}", metric.key, metric.value) + }); + lines.push(format!( + "{}: {} / {}{}", + label, hit.experiment.slug, hit.hypothesis.slug, suffix + )); + lines.push(format!( + " why: {}", + hit.reasons + .iter() + .map(ToString::to_string) + .collect::>() + .join("; ") + )); + } + } + projected_tool_output( + &projection, + if lines.is_empty() { + "no comparator candidates".to_owned() + } else { + lines.join("\n") + }, + None, + FaultStage::Worker, + operation, + ) +} + fn run_dimension_definition_output( dimension: &fidget_spinner_core::RunDimensionDefinition, operation: &str, @@ -1704,6 +1854,7 @@ mod legacy_projection_values { "situation": frontier.brief.situation, "roadmap": roadmap, "unknowns": frontier.brief.unknowns, + "scoreboard_metric_keys": frontier.brief.scoreboard_metric_keys, "revision": frontier.brief.revision, "updated_at": frontier.brief.updated_at.map(timestamp_value), }, @@ -1749,11 +1900,17 @@ mod legacy_projection_values { "situation": projection.frontier.brief.situation, "roadmap": roadmap, "unknowns": projection.frontier.brief.unknowns, + "scoreboard_metric_keys": projection.frontier.brief.scoreboard_metric_keys, "revision": projection.frontier.brief.revision, "updated_at": projection.frontier.brief.updated_at.map(timestamp_value), }, }, "active_tags": projection.active_tags, + "scoreboard_metrics": projection + .scoreboard_metric_keys + .iter() + .map(metric_key_summary_value) + .collect::>(), "active_metric_keys": projection .active_metric_keys .iter() diff --git a/crates/fidget-spinner-cli/src/ui.rs b/crates/fidget-spinner-cli/src/ui.rs index 0b05b29..cd067ab 100644 --- a/crates/fidget-spinner-cli/src/ui.rs +++ b/crates/fidget-spinner-cli/src/ui.rs @@ -428,19 +428,35 @@ fn render_frontier_tab_content( }) } FrontierTab::Metrics => { - let metric_keys = if projection.active_metric_keys.is_empty() { + let other_metric_keys = if projection.active_metric_keys.is_empty() { store.metric_keys(MetricKeysQuery { frontier: Some(projection.frontier.slug.to_string()), scope: MetricScope::Visible, })? } else { - projection.active_metric_keys.clone() + projection + .active_metric_keys + .iter() + .filter(|metric| { + !projection + .scoreboard_metric_keys + .iter() + .any(|scoreboard| scoreboard.key == metric.key) + }) + .cloned() + .collect() }; let selected_metric = query .metric .as_deref() .and_then(|selector| NonEmptyText::new(selector.to_owned()).ok()) - .or_else(|| metric_keys.first().map(|metric| metric.key.clone())); + .or_else(|| { + projection + .scoreboard_metric_keys + .first() + .or_else(|| other_metric_keys.first()) + .map(|metric| metric.key.clone()) + }); let series = selected_metric .as_ref() .map(|metric| { @@ -452,7 +468,8 @@ fn render_frontier_tab_content( (render_frontier_header(&projection.frontier)) (render_metric_series_section( &projection.frontier.slug, - &metric_keys, + &projection.scoreboard_metric_keys, + &other_metric_keys, selected_metric.as_ref(), series.as_ref(), &dimension_filters, @@ -534,7 +551,8 @@ fn render_closed_hypothesis_grid( fn render_metric_series_section( frontier_slug: &Slug, - metric_keys: &[fidget_spinner_store_sqlite::MetricKeySummary], + scoreboard_metric_keys: &[fidget_spinner_store_sqlite::MetricKeySummary], + other_metric_keys: &[fidget_spinner_store_sqlite::MetricKeySummary], selected_metric: Option<&NonEmptyText>, series: Option<&FrontierMetricSeries>, dimension_filters: &BTreeMap, @@ -552,24 +570,53 @@ fn render_metric_series_section( p.prose { "Server-rendered SVG over the frontier’s closed experiment ledger. Choose a live metric, then walk to the underlying experiments deliberately." } - @if metric_keys.is_empty() { + @if scoreboard_metric_keys.is_empty() && other_metric_keys.is_empty() { p.muted { "No visible metrics registered for this frontier." } } @else { - div.metric-picker { - @for metric in metric_keys { - @let href = frontier_tab_href(frontier_slug, FrontierTab::Metrics, Some(metric.key.as_str())); - a - href=(href) - class={(if selected_metric.is_some_and(|selected| selected == &metric.key) { - "metric-choice active" - } else { - "metric-choice" - })} - { - span.metric-choice-key { (metric.key) } - span.metric-choice-meta { - (metric.objective.as_str()) " · " - (metric.unit.as_str()) + @if !scoreboard_metric_keys.is_empty() { + div.metric-picker-group { + h3 { "Scoreboard" } + div.metric-picker { + @for metric in scoreboard_metric_keys { + @let href = frontier_tab_href(frontier_slug, FrontierTab::Metrics, Some(metric.key.as_str())); + a + href=(href) + class={(if selected_metric.is_some_and(|selected| selected == &metric.key) { + "metric-choice active" + } else { + "metric-choice" + })} + { + span.metric-choice-key { (metric.key) } + span.metric-choice-meta { + (metric.objective.as_str()) " · " + (metric.unit.as_str()) + } + } + } + } + } + } + @if !other_metric_keys.is_empty() { + div.metric-picker-group { + h3 { "Other Live Metrics" } + div.metric-picker { + @for metric in other_metric_keys { + @let href = frontier_tab_href(frontier_slug, FrontierTab::Metrics, Some(metric.key.as_str())); + a + href=(href) + class={(if selected_metric.is_some_and(|selected| selected == &metric.key) { + "metric-choice active" + } else { + "metric-choice" + })} + { + span.metric-choice-key { (metric.key) } + span.metric-choice-meta { + (metric.objective.as_str()) " · " + (metric.unit.as_str()) + } + } } } } @@ -992,6 +1039,43 @@ fn render_frontier_active_sets(projection: &FrontierOpenProjection) -> Markup { } } } + div.subcard { + h3 { "Scoreboard Metrics" } + @if projection.scoreboard_metric_keys.is_empty() { + p.muted { "No frontier scoreboard metrics configured." } + } @else { + div.table-scroll { + table.metric-table { + thead { + tr { + th { "Key" } + th { "Unit" } + th { "Objective" } + th { "Refs" } + } + } + tbody { + @for metric in &projection.scoreboard_metric_keys { + tr { + td { + a href=(frontier_tab_href( + &projection.frontier.slug, + FrontierTab::Metrics, + Some(metric.key.as_str()), + )) { + (metric.key) + } + } + td { (metric.unit.as_str()) } + td { (metric.objective.as_str()) } + td { (metric.reference_count) } + } + } + } + } + } + } + } div.subcard { h3 { "Live Metrics" } @if projection.active_metric_keys.is_empty() { diff --git a/crates/fidget-spinner-cli/tests/mcp_hardening.rs b/crates/fidget-spinner-cli/tests/mcp_hardening.rs index 86b6719..4fb80ab 100644 --- a/crates/fidget-spinner-cli/tests/mcp_hardening.rs +++ b/crates/fidget-spinner-cli/tests/mcp_hardening.rs @@ -226,11 +226,14 @@ fn cold_start_exposes_bound_surface_and_new_toolset() -> TestResult { let tools = harness.tools_list()?; let tool_names = tool_names(&tools); assert!(tool_names.contains(&"frontier.open")); + assert!(tool_names.contains(&"frontier.update")); assert!(tool_names.contains(&"hypothesis.record")); assert!(tool_names.contains(&"experiment.close")); + assert!(tool_names.contains(&"experiment.nearest")); assert!(tool_names.contains(&"artifact.record")); assert!(!tool_names.contains(&"node.list")); assert!(!tool_names.contains(&"research.record")); + assert!(!tool_names.contains(&"frontier.brief.update")); let health = harness.call_tool(3, "system.health", json!({}))?; assert_tool_ok(&health); @@ -392,6 +395,246 @@ fn frontier_open_is_the_grounding_surface_for_live_state() -> TestResult { Ok(()) } +#[test] +fn frontier_update_mutates_objective_and_scoreboard_grounding() -> TestResult { + let project_root = temp_project_root("frontier_update")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(Some(&project_root))?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + assert_tool_ok(&harness.call_tool( + 70, + "metric.define", + json!({ + "key": "nodes_solved", + "unit": "count", + "objective": "maximize", + "visibility": "canonical", + }), + )?); + assert_tool_ok(&harness.call_tool( + 71, + "frontier.create", + json!({ + "label": "LP root frontier", + "objective": "Initial root push", + "slug": "lp-root", + }), + )?); + + let updated = harness.call_tool_full( + 72, + "frontier.update", + json!({ + "frontier": "lp-root", + "objective": "Drive structural LP cash-out on parity rails", + "situation": "Structural LP churn is the active hill.", + "unknowns": ["How far queued structural reuse can cash out below root."], + "scoreboard_metric_keys": ["nodes_solved"], + }), + )?; + assert_tool_ok(&updated); + let updated_content = tool_content(&updated); + assert_eq!( + updated_content["record"]["objective"].as_str(), + Some("Drive structural LP cash-out on parity rails") + ); + assert_eq!( + must_some( + updated_content["record"]["brief"]["scoreboard_metric_keys"] + .as_array() + .and_then(|items| items.first()) + .and_then(Value::as_str), + "frontier scoreboard metric key", + )?, + "nodes_solved" + ); + + let frontier_open = + harness.call_tool_full(73, "frontier.open", json!({ "frontier": "lp-root" }))?; + assert_tool_ok(&frontier_open); + let open_content = tool_content(&frontier_open); + assert_eq!( + open_content["frontier"]["objective"].as_str(), + Some("Drive structural LP cash-out on parity rails") + ); + assert_eq!( + must_some( + open_content["scoreboard_metrics"] + .as_array() + .and_then(|items| items.first()), + "frontier scoreboard metrics entry", + )?["key"] + .as_str(), + Some("nodes_solved") + ); + + let scoreboard = harness.call_tool_full( + 74, + "metric.keys", + json!({ + "frontier": "lp-root", + "scope": "scoreboard", + }), + )?; + assert_tool_ok(&scoreboard); + assert_eq!( + must_some( + tool_content(&scoreboard)["metrics"] + .as_array() + .and_then(|items| items.first()), + "scoreboard metric entry", + )?["key"] + .as_str(), + Some("nodes_solved") + ); + + Ok(()) +} + +#[test] +fn experiment_nearest_finds_structural_buckets_and_champion() -> TestResult { + let project_root = temp_project_root("experiment_nearest")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(Some(&project_root))?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + assert_tool_ok(&harness.call_tool( + 80, + "metric.define", + json!({ + "key": "nodes_solved", + "unit": "count", + "objective": "maximize", + "visibility": "canonical", + }), + )?); + assert_tool_ok(&harness.call_tool( + 81, + "run.dimension.define", + json!({"key": "instance", "value_type": "string"}), + )?); + assert_tool_ok(&harness.call_tool( + 82, + "run.dimension.define", + json!({"key": "profile", "value_type": "string"}), + )?); + assert_tool_ok(&harness.call_tool( + 83, + "run.dimension.define", + json!({"key": "duration_s", "value_type": "numeric"}), + )?); + assert_tool_ok(&harness.call_tool( + 84, + "frontier.create", + json!({ + "label": "Comparator frontier", + "objective": "Keep exact-slice comparators cheap to find", + "slug": "comparators", + }), + )?); + assert_tool_ok(&harness.call_tool( + 85, + "frontier.update", + json!({ + "frontier": "comparators", + "scoreboard_metric_keys": ["nodes_solved"], + }), + )?); + assert_tool_ok(&harness.call_tool( + 86, + "hypothesis.record", + json!({ + "frontier": "comparators", + "slug": "structural-loop", + "title": "Structural loop", + "summary": "Compare exact-slice structural LP lines.", + "body": "Thread structural LP reuse through the same 4x5 parity slice so exact-slice comparators remain easy to recover and dead branches stay visible before the next iteration starts.", + }), + )?); + + for (id, slug, verdict, value, duration_s) in [ + (87_u64, "exact-kept", "kept", 111.0, 60), + (89_u64, "exact-accepted", "accepted", 125.0, 60), + (91_u64, "exact-rejected", "rejected", 98.0, 60), + (93_u64, "different-duration", "accepted", 140.0, 20), + ] { + assert_tool_ok(&harness.call_tool( + id, + "experiment.open", + json!({ + "hypothesis": "structural-loop", + "slug": slug, + "title": format!("{slug} rail"), + "summary": format!("{slug} summary"), + }), + )?); + assert_tool_ok(&harness.call_tool( + id + 1, + "experiment.close", + json!({ + "experiment": slug, + "backend": "manual", + "command": {"argv": [slug]}, + "dimensions": { + "instance": "4x5", + "profile": "parity", + "duration_s": duration_s, + }, + "primary_metric": {"key": "nodes_solved", "value": value}, + "verdict": verdict, + "rationale": format!("{slug} outcome"), + }), + )?); + } + + let nearest = harness.call_tool_full( + 95, + "experiment.nearest", + json!({ + "frontier": "comparators", + "dimensions": { + "instance": "4x5", + "profile": "parity", + "duration_s": 60, + }, + }), + )?; + assert_tool_ok(&nearest); + let content = tool_content(&nearest); + assert_eq!(content["metric"]["key"].as_str(), Some("nodes_solved")); + assert_eq!( + content["accepted"]["experiment"]["slug"].as_str(), + Some("exact-accepted") + ); + assert_eq!( + content["kept"]["experiment"]["slug"].as_str(), + Some("exact-kept") + ); + assert_eq!( + content["rejected"]["experiment"]["slug"].as_str(), + Some("exact-rejected") + ); + assert_eq!( + content["champion"]["experiment"]["slug"].as_str(), + Some("exact-accepted") + ); + assert!( + must_some( + content["accepted"]["reasons"].as_array(), + "accepted comparator reasons", + )? + .iter() + .any(|reason| reason.as_str() == Some("exact dimension match")) + ); + + Ok(()) +} + #[test] fn registry_and_history_surfaces_render_timestamps_as_strings() -> TestResult { let project_root = temp_project_root("timestamp_text")?; -- cgit v1.2.3