diff options
| author | main <main@swarm.moe> | 2026-03-21 19:19:49 -0400 |
|---|---|---|
| committer | main <main@swarm.moe> | 2026-03-21 19:19:49 -0400 |
| commit | e15fd4966e21bd8d31dbf580ede8a309c994816d (patch) | |
| tree | 691d38d549959a59c02b982dd96cab9542dc3d85 /crates | |
| parent | 3a523c3c8ac1bf9094dbe65a6f53b71085438c0c (diff) | |
| download | fidget_spinner-main.zip | |
Diffstat (limited to 'crates')
| -rw-r--r-- | crates/fidget-spinner-cli/src/main.rs | 109 | ||||
| -rw-r--r-- | crates/fidget-spinner-cli/src/mcp/catalog.rs | 55 | ||||
| -rw-r--r-- | crates/fidget-spinner-cli/src/mcp/projection.rs | 67 | ||||
| -rw-r--r-- | crates/fidget-spinner-cli/src/mcp/service.rs | 241 | ||||
| -rw-r--r-- | crates/fidget-spinner-cli/src/ui.rs | 126 | ||||
| -rw-r--r-- | crates/fidget-spinner-cli/tests/mcp_hardening.rs | 243 | ||||
| -rw-r--r-- | crates/fidget-spinner-core/src/model.rs | 2 | ||||
| -rw-r--r-- | crates/fidget-spinner-store-sqlite/src/lib.rs | 515 |
8 files changed, 1266 insertions, 92 deletions
diff --git a/crates/fidget-spinner-cli/src/main.rs b/crates/fidget-spinner-cli/src/main.rs index 7482794..63a5180 100644 --- a/crates/fidget-spinner-cli/src/main.rs +++ b/crates/fidget-spinner-cli/src/main.rs @@ -21,8 +21,8 @@ use fidget_spinner_store_sqlite::{ ExperimentOutcomePatch, FrontierRoadmapItemDraft, ListArtifactsQuery, ListExperimentsQuery, ListHypothesesQuery, MetricBestQuery, MetricKeysQuery, MetricRankOrder, MetricScope, OpenExperimentRequest, ProjectStore, STORE_DIR_NAME, StoreError, TextPatch, - UpdateArtifactRequest, UpdateExperimentRequest, UpdateFrontierBriefRequest, - UpdateHypothesisRequest, VertexSelector, + UpdateArtifactRequest, UpdateExperimentRequest, UpdateFrontierRequest, UpdateHypothesisRequest, + VertexSelector, }; #[cfg(test)] use libmcp_testkit as _; @@ -126,7 +126,7 @@ enum FrontierCommand { List(ProjectArg), Read(FrontierSelectorArgs), Open(FrontierSelectorArgs), - UpdateBrief(FrontierBriefUpdateArgs), + Update(FrontierUpdateArgs), History(FrontierSelectorArgs), } @@ -146,6 +146,7 @@ enum ExperimentCommand { Read(ExperimentSelectorArgs), Update(ExperimentUpdateArgs), Close(ExperimentCloseArgs), + Nearest(ExperimentNearestArgs), History(ExperimentSelectorArgs), } @@ -226,7 +227,7 @@ struct FrontierSelectorArgs { } #[derive(Args)] -struct FrontierBriefUpdateArgs { +struct FrontierUpdateArgs { #[command(flatten)] project: ProjectArg, #[arg(long)] @@ -234,13 +235,47 @@ struct FrontierBriefUpdateArgs { #[arg(long)] expected_revision: Option<u64>, #[arg(long)] + objective: Option<String>, + #[command(flatten)] + situation: FrontierSituationPatchArgs, + #[command(flatten)] + unknowns: FrontierUnknownsPatchArgs, + #[command(flatten)] + roadmap: FrontierRoadmapPatchArgs, + #[command(flatten)] + scoreboard: FrontierScoreboardPatchArgs, +} + +#[derive(Args)] +struct FrontierSituationPatchArgs { + #[arg(long)] situation: Option<String>, #[arg(long)] clear_situation: bool, +} + +#[derive(Args)] +struct FrontierUnknownsPatchArgs { #[arg(long = "unknown")] unknowns: Vec<String>, + #[arg(long = "clear-unknowns")] + clear_unknowns: bool, +} + +#[derive(Args)] +struct FrontierRoadmapPatchArgs { #[arg(long = "roadmap")] roadmap: Vec<String>, + #[arg(long = "clear-roadmap")] + clear_roadmap: bool, +} + +#[derive(Args)] +struct FrontierScoreboardPatchArgs { + #[arg(long = "scoreboard-metric")] + scoreboard_metric_keys: Vec<String>, + #[arg(long = "clear-scoreboard")] + clear_scoreboard_metric_keys: bool, } #[derive(Args)] @@ -418,6 +453,26 @@ struct ExperimentCloseArgs { } #[derive(Args)] +struct ExperimentNearestArgs { + #[command(flatten)] + project: ProjectArg, + #[arg(long)] + frontier: Option<String>, + #[arg(long)] + hypothesis: Option<String>, + #[arg(long)] + experiment: Option<String>, + #[arg(long)] + metric: Option<String>, + #[arg(long = "dimension")] + dimensions: Vec<String>, + #[arg(long = "tag")] + tags: Vec<String>, + #[arg(long, value_enum)] + order: Option<CliMetricRankOrder>, +} + +#[derive(Args)] struct ArtifactRecordArgs { #[command(flatten)] project: ProjectArg, @@ -599,6 +654,7 @@ enum CliMetricVisibility { #[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] enum CliMetricScope { Live, + Scoreboard, Visible, All, } @@ -679,7 +735,7 @@ fn main() -> Result<(), StoreError> { FrontierCommand::Open(args) => { print_json(&open_store(&args.project.project)?.frontier_open(&args.frontier)?) } - FrontierCommand::UpdateBrief(args) => run_frontier_brief_update(args), + FrontierCommand::Update(args) => run_frontier_update(args), FrontierCommand::History(args) => { print_json(&open_store(&args.project.project)?.frontier_history(&args.frontier)?) } @@ -703,6 +759,7 @@ fn main() -> Result<(), StoreError> { } ExperimentCommand::Update(args) => run_experiment_update(args), ExperimentCommand::Close(args) => run_experiment_close(args), + ExperimentCommand::Nearest(args) => run_experiment_nearest(args), ExperimentCommand::History(args) => print_json( &open_store(&args.project.project)?.experiment_history(&args.experiment)?, ), @@ -776,29 +833,43 @@ fn run_frontier_create(args: FrontierCreateArgs) -> Result<(), StoreError> { })?) } -fn run_frontier_brief_update(args: FrontierBriefUpdateArgs) -> Result<(), StoreError> { +fn run_frontier_update(args: FrontierUpdateArgs) -> Result<(), StoreError> { let mut store = open_store(&args.project.project)?; - let roadmap = if args.roadmap.is_empty() { + let roadmap = if args.roadmap.clear_roadmap { + Some(Vec::new()) + } else if args.roadmap.roadmap.is_empty() { None } else { Some( args.roadmap + .roadmap .into_iter() .map(parse_roadmap_item) .collect::<Result<Vec<_>, _>>()?, ) }; - let unknowns = if args.unknowns.is_empty() { + let unknowns = if args.unknowns.clear_unknowns { + Some(Vec::new()) + } else if args.unknowns.unknowns.is_empty() { None } else { - Some(to_non_empty_texts(args.unknowns)?) + Some(to_non_empty_texts(args.unknowns.unknowns)?) }; - print_json(&store.update_frontier_brief(UpdateFrontierBriefRequest { + let scoreboard_metric_keys = if args.scoreboard.clear_scoreboard_metric_keys { + Some(Vec::new()) + } else if args.scoreboard.scoreboard_metric_keys.is_empty() { + None + } else { + Some(to_non_empty_texts(args.scoreboard.scoreboard_metric_keys)?) + }; + print_json(&store.update_frontier(UpdateFrontierRequest { frontier: args.frontier, expected_revision: args.expected_revision, - situation: cli_text_patch(args.situation, args.clear_situation)?, + objective: args.objective.map(NonEmptyText::new).transpose()?, + situation: cli_text_patch(args.situation.situation, args.situation.clear_situation)?, roadmap, unknowns, + scoreboard_metric_keys, })?) } @@ -935,6 +1006,21 @@ fn run_experiment_close(args: ExperimentCloseArgs) -> Result<(), StoreError> { ) } +fn run_experiment_nearest(args: ExperimentNearestArgs) -> Result<(), StoreError> { + let store = open_store(&args.project.project)?; + print_json( + &store.experiment_nearest(fidget_spinner_store_sqlite::ExperimentNearestQuery { + frontier: args.frontier, + hypothesis: args.hypothesis, + experiment: args.experiment, + metric: args.metric.map(NonEmptyText::new).transpose()?, + dimensions: parse_dimension_assignments(args.dimensions)?, + tags: parse_tag_set(args.tags)?, + order: args.order.map(Into::into), + })?, + ) +} + fn run_artifact_record(args: ArtifactRecordArgs) -> Result<(), StoreError> { let mut store = open_store(&args.project.project)?; print_json(&store.create_artifact(CreateArtifactRequest { @@ -1414,6 +1500,7 @@ impl From<CliMetricScope> for MetricScope { fn from(value: CliMetricScope) -> Self { match value { CliMetricScope::Live => Self::Live, + CliMetricScope::Scoreboard => Self::Scoreboard, CliMetricScope::Visible => Self::Visible, CliMetricScope::All => Self::All, } diff --git a/crates/fidget-spinner-cli/src/mcp/catalog.rs b/crates/fidget-spinner-cli/src/mcp/catalog.rs index d6c8171..e741e09 100644 --- a/crates/fidget-spinner-cli/src/mcp/catalog.rs +++ b/crates/fidget-spinner-cli/src/mcp/catalog.rs @@ -96,8 +96,8 @@ const TOOL_SPECS: &[ToolSpec] = &[ replay: ReplayContract::Convergent, }, ToolSpec { - name: "frontier.brief.update", - description: "Replace or patch the singleton frontier brief.", + name: "frontier.update", + description: "Patch frontier objective and grounding state.", dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }, @@ -168,6 +168,12 @@ const TOOL_SPECS: &[ToolSpec] = &[ replay: ReplayContract::NeverReplay, }, ToolSpec { + name: "experiment.nearest", + description: "Find the nearest accepted, kept, rejected, and champion comparators for one slice.", + dispatch: DispatchTarget::Worker, + replay: ReplayContract::Convergent, + }, + ToolSpec { name: "experiment.history", description: "Read the revision history for one experiment.", dispatch: DispatchTarget::Worker, @@ -353,7 +359,7 @@ fn tool_input_schema(name: &str) -> Value { &[("frontier", selector_schema("Frontier UUID or slug."))], &["frontier"], ), - "frontier.brief.update" => object_schema( + "frontier.update" => object_schema( &[ ("frontier", selector_schema("Frontier UUID or slug.")), ( @@ -361,6 +367,10 @@ fn tool_input_schema(name: &str) -> Value { integer_schema("Optimistic concurrency guard."), ), ( + "objective", + string_schema("Optional replacement frontier objective."), + ), + ( "situation", nullable_string_schema("Optional frontier situation text."), ), @@ -369,6 +379,10 @@ fn tool_input_schema(name: &str) -> Value { "unknowns", string_array_schema("Ordered frontier unknowns."), ), + ( + "scoreboard_metric_keys", + string_array_schema("Ordered frontier scoreboard metric keys."), + ), ], &["frontier"], ), @@ -517,6 +531,36 @@ fn tool_input_schema(name: &str) -> Value { "rationale", ], ), + "experiment.nearest" => object_schema( + &[ + ( + "frontier", + selector_schema("Optional frontier UUID or slug."), + ), + ( + "hypothesis", + selector_schema("Optional hypothesis UUID or slug."), + ), + ( + "experiment", + selector_schema("Optional experiment UUID or slug used as an anchor."), + ), + ( + "metric", + string_schema("Optional metric key used to choose the champion."), + ), + ("dimensions", run_dimensions_schema()), + ("tags", string_array_schema("Require all listed tags.")), + ( + "order", + enum_string_schema( + &["asc", "desc"], + "Optional explicit champion ranking direction.", + ), + ), + ], + &[], + ), "artifact.record" => object_schema( &[ ( @@ -631,7 +675,10 @@ fn tool_input_schema(name: &str) -> Value { ), ( "scope", - enum_string_schema(&["live", "visible", "all"], "Registry slice to enumerate."), + enum_string_schema( + &["live", "scoreboard", "visible", "all"], + "Registry slice to enumerate.", + ), ), ], &[], diff --git a/crates/fidget-spinner-cli/src/mcp/projection.rs b/crates/fidget-spinner-cli/src/mcp/projection.rs index a36e915..c93d3ec 100644 --- a/crates/fidget-spinner-cli/src/mcp/projection.rs +++ b/crates/fidget-spinner-cli/src/mcp/projection.rs @@ -6,10 +6,10 @@ use fidget_spinner_core::{ RunDimensionValue, TagRecord, }; use fidget_spinner_store_sqlite::{ - ArtifactDetail, ArtifactSummary, EntityHistoryEntry, ExperimentDetail, ExperimentSummary, - FrontierOpenProjection, FrontierSummary, HypothesisCurrentState, HypothesisDetail, - MetricBestEntry, MetricKeySummary, MetricObservationSummary, ProjectStore, StoreError, - VertexSummary, + ArtifactDetail, ArtifactSummary, EntityHistoryEntry, ExperimentDetail, ExperimentNearestHit, + ExperimentNearestResult, ExperimentSummary, FrontierOpenProjection, FrontierSummary, + HypothesisCurrentState, HypothesisDetail, MetricBestEntry, MetricKeySummary, + MetricObservationSummary, ProjectStore, StoreError, VertexSummary, }; use libmcp::{ ProjectionError, SelectorProjection, StructuredProjection, SurfaceKind, SurfacePolicy, @@ -56,6 +56,7 @@ pub(crate) struct FrontierBriefProjection { pub(crate) situation: Option<String>, pub(crate) roadmap: Vec<RoadmapItemProjection>, pub(crate) unknowns: Vec<String>, + pub(crate) scoreboard_metric_keys: Vec<String>, pub(crate) revision: u64, #[serde(skip_serializing_if = "Option::is_none")] pub(crate) updated_at: Option<TimestampText>, @@ -106,6 +107,7 @@ pub(crate) struct FrontierListOutput { pub(crate) struct FrontierOpenOutput { pub(crate) frontier: FrontierOpenFrontierProjection, pub(crate) active_tags: Vec<String>, + pub(crate) scoreboard_metrics: Vec<MetricKeySummaryProjection>, pub(crate) active_metric_keys: Vec<MetricKeySummaryProjection>, pub(crate) active_hypotheses: Vec<HypothesisCurrentStateProjection>, pub(crate) open_experiments: Vec<ExperimentSummaryProjection>, @@ -519,6 +521,32 @@ pub(crate) struct MetricBestOutput { } #[derive(Clone, Serialize)] +pub(crate) struct ExperimentNearestHitProjection { + pub(crate) experiment: ExperimentSummaryProjection, + pub(crate) hypothesis: HypothesisSummaryProjection, + pub(crate) dimensions: BTreeMap<String, Value>, + pub(crate) reasons: Vec<String>, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) metric_value: Option<MetricObservationSummaryProjection>, +} + +#[derive(Clone, Serialize, libmcp::ToolProjection)] +#[libmcp(kind = "read")] +pub(crate) struct ExperimentNearestOutput { + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) metric: Option<MetricKeySummaryProjection>, + pub(crate) target_dimensions: BTreeMap<String, Value>, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) accepted: Option<ExperimentNearestHitProjection>, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) kept: Option<ExperimentNearestHitProjection>, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) rejected: Option<ExperimentNearestHitProjection>, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) champion: Option<ExperimentNearestHitProjection>, +} + +#[derive(Clone, Serialize)] pub(crate) struct TagRecordProjection { pub(crate) name: String, pub(crate) description: String, @@ -649,6 +677,11 @@ pub(crate) fn frontier_open(projection: &FrontierOpenProjection) -> FrontierOpen .iter() .map(ToString::to_string) .collect(), + scoreboard_metrics: projection + .scoreboard_metric_keys + .iter() + .map(metric_key_summary) + .collect(), active_metric_keys: projection .active_metric_keys .iter() @@ -863,6 +896,17 @@ pub(crate) fn metric_best(entries: &[MetricBestEntry]) -> MetricBestOutput { } } +pub(crate) fn experiment_nearest(result: &ExperimentNearestResult) -> ExperimentNearestOutput { + ExperimentNearestOutput { + metric: result.metric.as_ref().map(metric_key_summary), + target_dimensions: dimension_map(&result.target_dimensions), + accepted: result.accepted.as_ref().map(experiment_nearest_hit), + kept: result.kept.as_ref().map(experiment_nearest_hit), + rejected: result.rejected.as_ref().map(experiment_nearest_hit), + champion: result.champion.as_ref().map(experiment_nearest_hit), + } +} + pub(crate) fn tag_record(tag: &TagRecord) -> TagRecordOutput { TagRecordOutput { record: tag_record_projection(tag), @@ -963,6 +1007,11 @@ fn frontier_brief_projection( situation: brief.situation.as_ref().map(ToString::to_string), roadmap, unknowns: brief.unknowns.iter().map(ToString::to_string).collect(), + scoreboard_metric_keys: brief + .scoreboard_metric_keys + .iter() + .map(ToString::to_string) + .collect(), revision: brief.revision, updated_at: brief.updated_at.map(timestamp_value), } @@ -1142,6 +1191,16 @@ fn metric_best_entry(entry: &MetricBestEntry) -> MetricBestEntryProjection { } } +fn experiment_nearest_hit(hit: &ExperimentNearestHit) -> ExperimentNearestHitProjection { + ExperimentNearestHitProjection { + experiment: experiment_summary(&hit.experiment), + hypothesis: hypothesis_summary(&hit.hypothesis), + dimensions: dimension_map(&hit.dimensions), + reasons: hit.reasons.iter().map(ToString::to_string).collect(), + metric_value: hit.metric_value.as_ref().map(metric_observation_summary), + } +} + fn metric_observation_summary( metric: &MetricObservationSummary, ) -> MetricObservationSummaryProjection { diff --git a/crates/fidget-spinner-cli/src/mcp/service.rs b/crates/fidget-spinner-cli/src/mcp/service.rs index 7c649aa..70f4751 100644 --- a/crates/fidget-spinner-cli/src/mcp/service.rs +++ b/crates/fidget-spinner-cli/src/mcp/service.rs @@ -14,11 +14,11 @@ use fidget_spinner_core::{ use fidget_spinner_store_sqlite::{ AttachmentSelector, CloseExperimentRequest, CreateArtifactRequest, CreateFrontierRequest, CreateHypothesisRequest, DefineMetricRequest, DefineRunDimensionRequest, EntityHistoryEntry, - ExperimentOutcomePatch, FrontierOpenProjection, FrontierRoadmapItemDraft, FrontierSummary, - ListArtifactsQuery, ListExperimentsQuery, ListHypothesesQuery, MetricBestEntry, - MetricBestQuery, MetricKeySummary, MetricKeysQuery, MetricRankOrder, MetricScope, - OpenExperimentRequest, ProjectStatus, ProjectStore, StoreError, TextPatch, - UpdateArtifactRequest, UpdateExperimentRequest, UpdateFrontierBriefRequest, + ExperimentNearestQuery, ExperimentOutcomePatch, FrontierOpenProjection, + FrontierRoadmapItemDraft, FrontierSummary, ListArtifactsQuery, ListExperimentsQuery, + ListHypothesesQuery, MetricBestEntry, MetricBestQuery, MetricKeySummary, MetricKeysQuery, + MetricRankOrder, MetricScope, OpenExperimentRequest, ProjectStatus, ProjectStore, StoreError, + TextPatch, UpdateArtifactRequest, UpdateExperimentRequest, UpdateFrontierRequest, UpdateHypothesisRequest, VertexSelector, VertexSummary, }; use serde::Deserialize; @@ -137,44 +137,58 @@ impl WorkerService { let args = deserialize::<FrontierSelectorArgs>(arguments)?; frontier_open_output(&lift!(self.store.frontier_open(&args.frontier)), &operation)? } - "frontier.brief.update" => { - let args = deserialize::<FrontierBriefUpdateArgs>(arguments)?; + "frontier.update" => { + let args = deserialize::<FrontierUpdateArgs>(arguments)?; let frontier = lift!( - self.store - .update_frontier_brief(UpdateFrontierBriefRequest { - frontier: args.frontier, - expected_revision: args.expected_revision, - situation: nullable_text_patch_from_wire(args.situation, &operation)?, - roadmap: args - .roadmap - .map(|items| { - items - .into_iter() - .map(|item| { - Ok(FrontierRoadmapItemDraft { - rank: item.rank, - hypothesis: item.hypothesis, - summary: item - .summary - .map(NonEmptyText::new) - .transpose() - .map_err(store_fault(&operation))?, - }) + self.store.update_frontier(UpdateFrontierRequest { + frontier: args.frontier, + expected_revision: args.expected_revision, + objective: args + .objective + .map(NonEmptyText::new) + .transpose() + .map_err(store_fault(&operation))?, + situation: nullable_text_patch_from_wire(args.situation, &operation)?, + roadmap: args + .roadmap + .map(|items| { + items + .into_iter() + .map(|item| { + Ok(FrontierRoadmapItemDraft { + rank: item.rank, + hypothesis: item.hypothesis, + summary: item + .summary + .map(NonEmptyText::new) + .transpose() + .map_err(store_fault(&operation))?, }) - .collect::<Result<Vec<_>, FaultRecord>>() - }) - .transpose()?, - unknowns: args - .unknowns - .map(|items| { - items - .into_iter() - .map(NonEmptyText::new) - .collect::<Result<Vec<_>, _>>() - .map_err(store_fault(&operation)) - }) - .transpose()?, - }) + }) + .collect::<Result<Vec<_>, FaultRecord>>() + }) + .transpose()?, + unknowns: args + .unknowns + .map(|items| { + items + .into_iter() + .map(NonEmptyText::new) + .collect::<Result<Vec<_>, _>>() + .map_err(store_fault(&operation)) + }) + .transpose()?, + scoreboard_metric_keys: args + .scoreboard_metric_keys + .map(|items| { + items + .into_iter() + .map(NonEmptyText::new) + .collect::<Result<Vec<_>, _>>() + .map_err(store_fault(&operation)) + }) + .transpose()?, + }) ); frontier_record_output(&self.store, &frontier, &operation)? } @@ -366,6 +380,32 @@ impl WorkerService { ); experiment_record_output(&experiment, &operation)? } + "experiment.nearest" => { + let args = deserialize::<ExperimentNearestArgs>(arguments)?; + experiment_nearest_output( + &lift!( + self.store.experiment_nearest(ExperimentNearestQuery { + frontier: args.frontier, + hypothesis: args.hypothesis, + experiment: args.experiment, + metric: args + .metric + .map(NonEmptyText::new) + .transpose() + .map_err(store_fault(&operation))?, + dimensions: dimension_map_from_wire(args.dimensions)?, + tags: args + .tags + .map(tags_to_set) + .transpose() + .map_err(store_fault(&operation))? + .unwrap_or_default(), + order: args.order, + }) + ), + &operation, + )? + } "experiment.history" => { let args = deserialize::<ExperimentSelectorArgs>(arguments)?; history_output( @@ -583,12 +623,14 @@ struct FrontierSelectorArgs { } #[derive(Debug, Deserialize)] -struct FrontierBriefUpdateArgs { +struct FrontierUpdateArgs { frontier: String, expected_revision: Option<u64>, + objective: Option<String>, situation: Option<NullableStringArg>, roadmap: Option<Vec<FrontierRoadmapItemWire>>, unknowns: Option<Vec<String>>, + scoreboard_metric_keys: Option<Vec<String>>, } #[derive(Debug, Deserialize)] @@ -686,6 +728,17 @@ struct ExperimentCloseArgs { } #[derive(Debug, Deserialize)] +struct ExperimentNearestArgs { + frontier: Option<String>, + hypothesis: Option<String>, + experiment: Option<String>, + metric: Option<String>, + dimensions: Option<Map<String, Value>>, + tags: Option<Vec<String>>, + order: Option<MetricRankOrder>, +} + +#[derive(Debug, Deserialize)] struct ExperimentOutcomeWire { backend: ExecutionBackend, command: CommandRecipe, @@ -835,6 +888,7 @@ where | StoreError::UnknownRoadmapHypothesis(_) | StoreError::ManualExperimentRequiresCommand | StoreError::MetricOrderRequired { .. } + | StoreError::MetricScopeRequiresFrontier { .. } | StoreError::UnknownDimensionFilter(_) | StoreError::DuplicateTag(_) | StoreError::DuplicateMetricDefinition(_) @@ -1005,6 +1059,14 @@ fn json_value_to_dimension(value: Value) -> Result<RunDimensionValue, FaultRecor } } +fn run_dimension_value_text(value: &RunDimensionValue) -> String { + match value { + RunDimensionValue::String(value) | RunDimensionValue::Timestamp(value) => value.to_string(), + RunDimensionValue::Numeric(value) => value.to_string(), + RunDimensionValue::Boolean(value) => value.to_string(), + } +} + fn project_status_output( status: &ProjectStatus, operation: &str, @@ -1144,6 +1206,18 @@ fn frontier_record_output( .join("; ") )); } + if !frontier.brief.scoreboard_metric_keys.is_empty() { + lines.push(format!( + "scoreboard metrics: {}", + frontier + .brief + .scoreboard_metric_keys + .iter() + .map(ToString::to_string) + .collect::<Vec<_>>() + .join(", ") + )); + } projected_tool_output( &projection, lines.join("\n"), @@ -1187,6 +1261,17 @@ fn frontier_open_output( .join(", ") )); } + if !projection.scoreboard_metric_keys.is_empty() { + lines.push(format!( + "scoreboard metrics: {}", + projection + .scoreboard_metric_keys + .iter() + .map(|metric| metric.key.to_string()) + .collect::<Vec<_>>() + .join(", ") + )); + } if !projection.active_hypotheses.is_empty() { lines.push("active hypotheses:".to_owned()); for state in &projection.active_hypotheses { @@ -1567,6 +1652,71 @@ fn metric_best_output( ) } +fn experiment_nearest_output( + result: &fidget_spinner_store_sqlite::ExperimentNearestResult, + operation: &str, +) -> Result<ToolOutput, FaultRecord> { + let projection = projection::experiment_nearest(result); + let mut lines = Vec::new(); + if !result.target_dimensions.is_empty() { + lines.push(format!( + "target slice: {}", + result + .target_dimensions + .iter() + .map(|(key, value)| format!("{key}={}", run_dimension_value_text(value))) + .collect::<Vec<_>>() + .join(", ") + )); + } + if let Some(metric) = result.metric.as_ref() { + lines.push(format!( + "champion metric: {} [{} {}]", + metric.key, + metric.unit.as_str(), + metric.objective.as_str() + )); + } + for (label, hit) in [ + ("accepted", result.accepted.as_ref()), + ("kept", result.kept.as_ref()), + ("rejected", result.rejected.as_ref()), + ("champion", result.champion.as_ref()), + ] { + if let Some(hit) = hit { + let suffix = hit + .metric_value + .as_ref() + .map_or_else(String::new, |metric| { + format!(" | {}={}", metric.key, metric.value) + }); + lines.push(format!( + "{}: {} / {}{}", + label, hit.experiment.slug, hit.hypothesis.slug, suffix + )); + lines.push(format!( + " why: {}", + hit.reasons + .iter() + .map(ToString::to_string) + .collect::<Vec<_>>() + .join("; ") + )); + } + } + projected_tool_output( + &projection, + if lines.is_empty() { + "no comparator candidates".to_owned() + } else { + lines.join("\n") + }, + None, + FaultStage::Worker, + operation, + ) +} + fn run_dimension_definition_output( dimension: &fidget_spinner_core::RunDimensionDefinition, operation: &str, @@ -1704,6 +1854,7 @@ mod legacy_projection_values { "situation": frontier.brief.situation, "roadmap": roadmap, "unknowns": frontier.brief.unknowns, + "scoreboard_metric_keys": frontier.brief.scoreboard_metric_keys, "revision": frontier.brief.revision, "updated_at": frontier.brief.updated_at.map(timestamp_value), }, @@ -1749,11 +1900,17 @@ mod legacy_projection_values { "situation": projection.frontier.brief.situation, "roadmap": roadmap, "unknowns": projection.frontier.brief.unknowns, + "scoreboard_metric_keys": projection.frontier.brief.scoreboard_metric_keys, "revision": projection.frontier.brief.revision, "updated_at": projection.frontier.brief.updated_at.map(timestamp_value), }, }, "active_tags": projection.active_tags, + "scoreboard_metrics": projection + .scoreboard_metric_keys + .iter() + .map(metric_key_summary_value) + .collect::<Vec<_>>(), "active_metric_keys": projection .active_metric_keys .iter() diff --git a/crates/fidget-spinner-cli/src/ui.rs b/crates/fidget-spinner-cli/src/ui.rs index 0b05b29..cd067ab 100644 --- a/crates/fidget-spinner-cli/src/ui.rs +++ b/crates/fidget-spinner-cli/src/ui.rs @@ -428,19 +428,35 @@ fn render_frontier_tab_content( }) } FrontierTab::Metrics => { - let metric_keys = if projection.active_metric_keys.is_empty() { + let other_metric_keys = if projection.active_metric_keys.is_empty() { store.metric_keys(MetricKeysQuery { frontier: Some(projection.frontier.slug.to_string()), scope: MetricScope::Visible, })? } else { - projection.active_metric_keys.clone() + projection + .active_metric_keys + .iter() + .filter(|metric| { + !projection + .scoreboard_metric_keys + .iter() + .any(|scoreboard| scoreboard.key == metric.key) + }) + .cloned() + .collect() }; let selected_metric = query .metric .as_deref() .and_then(|selector| NonEmptyText::new(selector.to_owned()).ok()) - .or_else(|| metric_keys.first().map(|metric| metric.key.clone())); + .or_else(|| { + projection + .scoreboard_metric_keys + .first() + .or_else(|| other_metric_keys.first()) + .map(|metric| metric.key.clone()) + }); let series = selected_metric .as_ref() .map(|metric| { @@ -452,7 +468,8 @@ fn render_frontier_tab_content( (render_frontier_header(&projection.frontier)) (render_metric_series_section( &projection.frontier.slug, - &metric_keys, + &projection.scoreboard_metric_keys, + &other_metric_keys, selected_metric.as_ref(), series.as_ref(), &dimension_filters, @@ -534,7 +551,8 @@ fn render_closed_hypothesis_grid( fn render_metric_series_section( frontier_slug: &Slug, - metric_keys: &[fidget_spinner_store_sqlite::MetricKeySummary], + scoreboard_metric_keys: &[fidget_spinner_store_sqlite::MetricKeySummary], + other_metric_keys: &[fidget_spinner_store_sqlite::MetricKeySummary], selected_metric: Option<&NonEmptyText>, series: Option<&FrontierMetricSeries>, dimension_filters: &BTreeMap<String, String>, @@ -552,24 +570,53 @@ fn render_metric_series_section( p.prose { "Server-rendered SVG over the frontier’s closed experiment ledger. Choose a live metric, then walk to the underlying experiments deliberately." } - @if metric_keys.is_empty() { + @if scoreboard_metric_keys.is_empty() && other_metric_keys.is_empty() { p.muted { "No visible metrics registered for this frontier." } } @else { - div.metric-picker { - @for metric in metric_keys { - @let href = frontier_tab_href(frontier_slug, FrontierTab::Metrics, Some(metric.key.as_str())); - a - href=(href) - class={(if selected_metric.is_some_and(|selected| selected == &metric.key) { - "metric-choice active" - } else { - "metric-choice" - })} - { - span.metric-choice-key { (metric.key) } - span.metric-choice-meta { - (metric.objective.as_str()) " · " - (metric.unit.as_str()) + @if !scoreboard_metric_keys.is_empty() { + div.metric-picker-group { + h3 { "Scoreboard" } + div.metric-picker { + @for metric in scoreboard_metric_keys { + @let href = frontier_tab_href(frontier_slug, FrontierTab::Metrics, Some(metric.key.as_str())); + a + href=(href) + class={(if selected_metric.is_some_and(|selected| selected == &metric.key) { + "metric-choice active" + } else { + "metric-choice" + })} + { + span.metric-choice-key { (metric.key) } + span.metric-choice-meta { + (metric.objective.as_str()) " · " + (metric.unit.as_str()) + } + } + } + } + } + } + @if !other_metric_keys.is_empty() { + div.metric-picker-group { + h3 { "Other Live Metrics" } + div.metric-picker { + @for metric in other_metric_keys { + @let href = frontier_tab_href(frontier_slug, FrontierTab::Metrics, Some(metric.key.as_str())); + a + href=(href) + class={(if selected_metric.is_some_and(|selected| selected == &metric.key) { + "metric-choice active" + } else { + "metric-choice" + })} + { + span.metric-choice-key { (metric.key) } + span.metric-choice-meta { + (metric.objective.as_str()) " · " + (metric.unit.as_str()) + } + } } } } @@ -993,6 +1040,43 @@ fn render_frontier_active_sets(projection: &FrontierOpenProjection) -> Markup { } } div.subcard { + h3 { "Scoreboard Metrics" } + @if projection.scoreboard_metric_keys.is_empty() { + p.muted { "No frontier scoreboard metrics configured." } + } @else { + div.table-scroll { + table.metric-table { + thead { + tr { + th { "Key" } + th { "Unit" } + th { "Objective" } + th { "Refs" } + } + } + tbody { + @for metric in &projection.scoreboard_metric_keys { + tr { + td { + a href=(frontier_tab_href( + &projection.frontier.slug, + FrontierTab::Metrics, + Some(metric.key.as_str()), + )) { + (metric.key) + } + } + td { (metric.unit.as_str()) } + td { (metric.objective.as_str()) } + td { (metric.reference_count) } + } + } + } + } + } + } + } + div.subcard { h3 { "Live Metrics" } @if projection.active_metric_keys.is_empty() { p.muted { "No live metrics." } diff --git a/crates/fidget-spinner-cli/tests/mcp_hardening.rs b/crates/fidget-spinner-cli/tests/mcp_hardening.rs index 86b6719..4fb80ab 100644 --- a/crates/fidget-spinner-cli/tests/mcp_hardening.rs +++ b/crates/fidget-spinner-cli/tests/mcp_hardening.rs @@ -226,11 +226,14 @@ fn cold_start_exposes_bound_surface_and_new_toolset() -> TestResult { let tools = harness.tools_list()?; let tool_names = tool_names(&tools); assert!(tool_names.contains(&"frontier.open")); + assert!(tool_names.contains(&"frontier.update")); assert!(tool_names.contains(&"hypothesis.record")); assert!(tool_names.contains(&"experiment.close")); + assert!(tool_names.contains(&"experiment.nearest")); assert!(tool_names.contains(&"artifact.record")); assert!(!tool_names.contains(&"node.list")); assert!(!tool_names.contains(&"research.record")); + assert!(!tool_names.contains(&"frontier.brief.update")); let health = harness.call_tool(3, "system.health", json!({}))?; assert_tool_ok(&health); @@ -393,6 +396,246 @@ fn frontier_open_is_the_grounding_surface_for_live_state() -> TestResult { } #[test] +fn frontier_update_mutates_objective_and_scoreboard_grounding() -> TestResult { + let project_root = temp_project_root("frontier_update")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(Some(&project_root))?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + assert_tool_ok(&harness.call_tool( + 70, + "metric.define", + json!({ + "key": "nodes_solved", + "unit": "count", + "objective": "maximize", + "visibility": "canonical", + }), + )?); + assert_tool_ok(&harness.call_tool( + 71, + "frontier.create", + json!({ + "label": "LP root frontier", + "objective": "Initial root push", + "slug": "lp-root", + }), + )?); + + let updated = harness.call_tool_full( + 72, + "frontier.update", + json!({ + "frontier": "lp-root", + "objective": "Drive structural LP cash-out on parity rails", + "situation": "Structural LP churn is the active hill.", + "unknowns": ["How far queued structural reuse can cash out below root."], + "scoreboard_metric_keys": ["nodes_solved"], + }), + )?; + assert_tool_ok(&updated); + let updated_content = tool_content(&updated); + assert_eq!( + updated_content["record"]["objective"].as_str(), + Some("Drive structural LP cash-out on parity rails") + ); + assert_eq!( + must_some( + updated_content["record"]["brief"]["scoreboard_metric_keys"] + .as_array() + .and_then(|items| items.first()) + .and_then(Value::as_str), + "frontier scoreboard metric key", + )?, + "nodes_solved" + ); + + let frontier_open = + harness.call_tool_full(73, "frontier.open", json!({ "frontier": "lp-root" }))?; + assert_tool_ok(&frontier_open); + let open_content = tool_content(&frontier_open); + assert_eq!( + open_content["frontier"]["objective"].as_str(), + Some("Drive structural LP cash-out on parity rails") + ); + assert_eq!( + must_some( + open_content["scoreboard_metrics"] + .as_array() + .and_then(|items| items.first()), + "frontier scoreboard metrics entry", + )?["key"] + .as_str(), + Some("nodes_solved") + ); + + let scoreboard = harness.call_tool_full( + 74, + "metric.keys", + json!({ + "frontier": "lp-root", + "scope": "scoreboard", + }), + )?; + assert_tool_ok(&scoreboard); + assert_eq!( + must_some( + tool_content(&scoreboard)["metrics"] + .as_array() + .and_then(|items| items.first()), + "scoreboard metric entry", + )?["key"] + .as_str(), + Some("nodes_solved") + ); + + Ok(()) +} + +#[test] +fn experiment_nearest_finds_structural_buckets_and_champion() -> TestResult { + let project_root = temp_project_root("experiment_nearest")?; + init_project(&project_root)?; + + let mut harness = McpHarness::spawn(Some(&project_root))?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + assert_tool_ok(&harness.call_tool( + 80, + "metric.define", + json!({ + "key": "nodes_solved", + "unit": "count", + "objective": "maximize", + "visibility": "canonical", + }), + )?); + assert_tool_ok(&harness.call_tool( + 81, + "run.dimension.define", + json!({"key": "instance", "value_type": "string"}), + )?); + assert_tool_ok(&harness.call_tool( + 82, + "run.dimension.define", + json!({"key": "profile", "value_type": "string"}), + )?); + assert_tool_ok(&harness.call_tool( + 83, + "run.dimension.define", + json!({"key": "duration_s", "value_type": "numeric"}), + )?); + assert_tool_ok(&harness.call_tool( + 84, + "frontier.create", + json!({ + "label": "Comparator frontier", + "objective": "Keep exact-slice comparators cheap to find", + "slug": "comparators", + }), + )?); + assert_tool_ok(&harness.call_tool( + 85, + "frontier.update", + json!({ + "frontier": "comparators", + "scoreboard_metric_keys": ["nodes_solved"], + }), + )?); + assert_tool_ok(&harness.call_tool( + 86, + "hypothesis.record", + json!({ + "frontier": "comparators", + "slug": "structural-loop", + "title": "Structural loop", + "summary": "Compare exact-slice structural LP lines.", + "body": "Thread structural LP reuse through the same 4x5 parity slice so exact-slice comparators remain easy to recover and dead branches stay visible before the next iteration starts.", + }), + )?); + + for (id, slug, verdict, value, duration_s) in [ + (87_u64, "exact-kept", "kept", 111.0, 60), + (89_u64, "exact-accepted", "accepted", 125.0, 60), + (91_u64, "exact-rejected", "rejected", 98.0, 60), + (93_u64, "different-duration", "accepted", 140.0, 20), + ] { + assert_tool_ok(&harness.call_tool( + id, + "experiment.open", + json!({ + "hypothesis": "structural-loop", + "slug": slug, + "title": format!("{slug} rail"), + "summary": format!("{slug} summary"), + }), + )?); + assert_tool_ok(&harness.call_tool( + id + 1, + "experiment.close", + json!({ + "experiment": slug, + "backend": "manual", + "command": {"argv": [slug]}, + "dimensions": { + "instance": "4x5", + "profile": "parity", + "duration_s": duration_s, + }, + "primary_metric": {"key": "nodes_solved", "value": value}, + "verdict": verdict, + "rationale": format!("{slug} outcome"), + }), + )?); + } + + let nearest = harness.call_tool_full( + 95, + "experiment.nearest", + json!({ + "frontier": "comparators", + "dimensions": { + "instance": "4x5", + "profile": "parity", + "duration_s": 60, + }, + }), + )?; + assert_tool_ok(&nearest); + let content = tool_content(&nearest); + assert_eq!(content["metric"]["key"].as_str(), Some("nodes_solved")); + assert_eq!( + content["accepted"]["experiment"]["slug"].as_str(), + Some("exact-accepted") + ); + assert_eq!( + content["kept"]["experiment"]["slug"].as_str(), + Some("exact-kept") + ); + assert_eq!( + content["rejected"]["experiment"]["slug"].as_str(), + Some("exact-rejected") + ); + assert_eq!( + content["champion"]["experiment"]["slug"].as_str(), + Some("exact-accepted") + ); + assert!( + must_some( + content["accepted"]["reasons"].as_array(), + "accepted comparator reasons", + )? + .iter() + .any(|reason| reason.as_str() == Some("exact dimension match")) + ); + + Ok(()) +} + +#[test] fn registry_and_history_surfaces_render_timestamps_as_strings() -> TestResult { let project_root = temp_project_root("timestamp_text")?; init_project(&project_root)?; diff --git a/crates/fidget-spinner-core/src/model.rs b/crates/fidget-spinner-core/src/model.rs index 5f4bdeb..5adc5a3 100644 --- a/crates/fidget-spinner-core/src/model.rs +++ b/crates/fidget-spinner-core/src/model.rs @@ -595,6 +595,8 @@ pub struct FrontierBrief { pub situation: Option<NonEmptyText>, pub roadmap: Vec<FrontierRoadmapItem>, pub unknowns: Vec<NonEmptyText>, + #[serde(default)] + pub scoreboard_metric_keys: Vec<NonEmptyText>, pub revision: u64, pub updated_at: Option<OffsetDateTime>, } diff --git a/crates/fidget-spinner-store-sqlite/src/lib.rs b/crates/fidget-spinner-store-sqlite/src/lib.rs index 253929e..8a80bbc 100644 --- a/crates/fidget-spinner-store-sqlite/src/lib.rs +++ b/crates/fidget-spinner-store-sqlite/src/lib.rs @@ -105,6 +105,8 @@ pub enum StoreError { MetricOrderRequired { key: String }, #[error("dimension filter references unknown run dimension `{0}`")] UnknownDimensionFilter(String), + #[error("metric scope `{scope}` requires a frontier selector")] + MetricScopeRequiresFrontier { scope: &'static str }, } #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] @@ -141,6 +143,7 @@ pub struct ProjectStatus { #[serde(rename_all = "snake_case")] pub enum MetricScope { Live, + Scoreboard, Visible, All, } @@ -200,12 +203,14 @@ pub enum TextPatch<T> { } #[derive(Clone, Debug)] -pub struct UpdateFrontierBriefRequest { +pub struct UpdateFrontierRequest { pub frontier: String, pub expected_revision: Option<u64>, + pub objective: Option<NonEmptyText>, pub situation: Option<TextPatch<NonEmptyText>>, pub roadmap: Option<Vec<FrontierRoadmapItemDraft>>, pub unknowns: Option<Vec<NonEmptyText>>, + pub scoreboard_metric_keys: Option<Vec<NonEmptyText>>, } #[derive(Clone, Debug)] @@ -458,6 +463,36 @@ pub struct MetricBestQuery { pub order: Option<MetricRankOrder>, } +#[derive(Clone, Debug)] +pub struct ExperimentNearestQuery { + pub frontier: Option<String>, + pub hypothesis: Option<String>, + pub experiment: Option<String>, + pub metric: Option<NonEmptyText>, + pub dimensions: BTreeMap<NonEmptyText, RunDimensionValue>, + pub tags: BTreeSet<TagName>, + pub order: Option<MetricRankOrder>, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct ExperimentNearestHit { + pub experiment: ExperimentSummary, + pub hypothesis: HypothesisSummary, + pub dimensions: BTreeMap<NonEmptyText, RunDimensionValue>, + pub reasons: Vec<NonEmptyText>, + pub metric_value: Option<MetricObservationSummary>, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct ExperimentNearestResult { + pub metric: Option<MetricKeySummary>, + pub target_dimensions: BTreeMap<NonEmptyText, RunDimensionValue>, + pub accepted: Option<ExperimentNearestHit>, + pub kept: Option<ExperimentNearestHit>, + pub rejected: Option<ExperimentNearestHit>, + pub champion: Option<ExperimentNearestHit>, +} + #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct MetricBestEntry { pub experiment: ExperimentSummary, @@ -486,6 +521,7 @@ pub struct FrontierOpenProjection { pub frontier: FrontierRecord, pub active_tags: Vec<TagName>, pub active_metric_keys: Vec<MetricKeySummary>, + pub scoreboard_metric_keys: Vec<MetricKeySummary>, pub active_hypotheses: Vec<HypothesisCurrentState>, pub open_experiments: Vec<ExperimentSummary>, } @@ -793,9 +829,9 @@ impl ProjectStore { self.resolve_frontier(selector) } - pub fn update_frontier_brief( + pub fn update_frontier( &mut self, - request: UpdateFrontierBriefRequest, + request: UpdateFrontierRequest, ) -> Result<FrontierRecord, StoreError> { let frontier = self.resolve_frontier(&request.frontier)?; enforce_revision( @@ -805,6 +841,17 @@ impl ProjectStore { frontier.revision, )?; let now = OffsetDateTime::now_utc(); + if let Some(metric_keys) = request.scoreboard_metric_keys.as_ref() { + for metric_key in metric_keys { + let _ = self + .metric_definition(metric_key)? + .ok_or_else(|| StoreError::UnknownMetricDefinition(metric_key.clone()))?; + } + } + let brief_changed = request.situation.is_some() + || request.roadmap.is_some() + || request.unknowns.is_some() + || request.scoreboard_metric_keys.is_some(); let brief = FrontierBrief { situation: apply_optional_text_patch( request.situation, @@ -824,23 +871,35 @@ impl ProjectStore { None => frontier.brief.roadmap.clone(), }, unknowns: request.unknowns.unwrap_or(frontier.brief.unknowns.clone()), - revision: frontier.brief.revision.saturating_add(1), - updated_at: Some(now), + scoreboard_metric_keys: request + .scoreboard_metric_keys + .unwrap_or(frontier.brief.scoreboard_metric_keys.clone()), + revision: if brief_changed { + frontier.brief.revision.saturating_add(1) + } else { + frontier.brief.revision + }, + updated_at: if brief_changed { + Some(now) + } else { + frontier.brief.updated_at + }, }; let updated = FrontierRecord { + objective: request.objective.unwrap_or(frontier.objective.clone()), brief, revision: frontier.revision.saturating_add(1), updated_at: now, ..frontier }; let transaction = self.connection.transaction()?; - update_frontier(&transaction, &updated)?; + update_frontier_row(&transaction, &updated)?; record_event( &transaction, "frontier", &updated.id.to_string(), updated.revision, - "brief_updated", + "updated", &updated, )?; transaction.commit()?; @@ -1389,10 +1448,12 @@ impl ProjectStore { let active_tags = derive_active_tags(&active_hypotheses, &open_experiments); let active_metric_keys = self.live_metric_keys(frontier.id, &active_hypotheses, &open_experiments)?; + let scoreboard_metric_keys = self.frontier_scoreboard_metric_keys(&frontier)?; Ok(FrontierOpenProjection { frontier, active_tags, active_metric_keys, + scoreboard_metric_keys, active_hypotheses, open_experiments, }) @@ -1451,11 +1512,25 @@ impl ProjectStore { } pub fn metric_keys(&self, query: MetricKeysQuery) -> Result<Vec<MetricKeySummary>, StoreError> { - let frontier_id = query + let frontier = query .frontier .as_deref() - .map(|selector| self.resolve_frontier(selector).map(|frontier| frontier.id)) + .map(|selector| self.resolve_frontier(selector)) .transpose()?; + let frontier_id = frontier.as_ref().map(|frontier| frontier.id); + if query.scope == MetricScope::Scoreboard && frontier.is_none() { + return Err(StoreError::MetricScopeRequiresFrontier { + scope: "scoreboard", + }); + } + if query.scope == MetricScope::Scoreboard { + return match frontier.as_ref() { + Some(frontier) => self.frontier_scoreboard_metric_keys(frontier), + None => Err(StoreError::MetricScopeRequiresFrontier { + scope: "scoreboard", + }), + }; + } let definitions = self.list_metric_definitions()?; let live_keys = frontier_id .map(|frontier_id| self.live_metric_key_names(frontier_id)) @@ -1465,6 +1540,7 @@ impl ProjectStore { .into_iter() .filter(|definition| match query.scope { MetricScope::Live => live_keys.contains(definition.key.as_str()), + MetricScope::Scoreboard => unreachable!("handled above"), MetricScope::Visible => definition.visibility.is_default_visible(), MetricScope::All => true, }) @@ -1548,6 +1624,187 @@ impl ProjectStore { Ok(apply_limit(entries, query.limit)) } + pub fn experiment_nearest( + &self, + query: ExperimentNearestQuery, + ) -> Result<ExperimentNearestResult, StoreError> { + let anchor_experiment = query + .experiment + .as_deref() + .map(|selector| self.resolve_experiment(selector)) + .transpose()?; + let anchor_hypothesis = query + .hypothesis + .as_deref() + .map(|selector| self.resolve_hypothesis(selector)) + .transpose()?; + let frontier = match query.frontier.as_deref() { + Some(selector) => Some(self.resolve_frontier(selector)?), + None => anchor_experiment + .as_ref() + .map(|experiment| self.resolve_frontier(&experiment.frontier_id.to_string())) + .transpose()? + .or(anchor_hypothesis + .as_ref() + .map(|hypothesis| self.resolve_frontier(&hypothesis.frontier_id.to_string())) + .transpose()?), + }; + let frontier_id = frontier.as_ref().map(|frontier| frontier.id); + let anchor_hypothesis_id = anchor_hypothesis + .as_ref() + .map(|hypothesis| hypothesis.id) + .or_else(|| { + anchor_experiment + .as_ref() + .map(|experiment| experiment.hypothesis_id) + }); + let target_dimensions = if query.dimensions.is_empty() { + anchor_experiment + .as_ref() + .and_then(|experiment| { + experiment + .outcome + .as_ref() + .map(|outcome| outcome.dimensions.clone()) + }) + .unwrap_or_default() + } else { + query.dimensions + }; + let metric_definition = match query.metric.as_ref() { + Some(key) => Some( + self.metric_definition(key)? + .ok_or_else(|| StoreError::UnknownMetricDefinition(key.clone()))?, + ), + None => frontier + .as_ref() + .and_then(|frontier| frontier.brief.scoreboard_metric_keys.first()) + .map(|key| { + self.metric_definition(key)? + .ok_or_else(|| StoreError::UnknownMetricDefinition(key.clone())) + }) + .transpose()?, + }; + let champion_order = metric_definition.as_ref().and_then(|definition| { + query.order.or(match definition.objective { + OptimizationObjective::Minimize => Some(MetricRankOrder::Asc), + OptimizationObjective::Maximize => Some(MetricRankOrder::Desc), + OptimizationObjective::Target => None, + }) + }); + let influence_neighborhood = + self.influence_neighborhood(anchor_experiment.as_ref(), anchor_hypothesis_id)?; + let candidates = self + .load_experiment_records(frontier_id, None, false)? + .into_iter() + .filter(|record| record.status == ExperimentStatus::Closed) + .filter(|record| { + anchor_experiment + .as_ref() + .is_none_or(|anchor| record.id != anchor.id) + }) + .filter(|record| { + anchor_hypothesis_id.is_none_or(|hypothesis_id| { + anchor_hypothesis.is_none() || record.hypothesis_id == hypothesis_id + }) + }) + .map(|record| { + let Some(outcome) = record.outcome.clone() else { + return Ok(None); + }; + let hypothesis_record = self.hypothesis_by_id(record.hypothesis_id)?; + if !query.tags.is_empty() { + let candidate_tags = record + .tags + .iter() + .cloned() + .chain(hypothesis_record.tags.iter().cloned()) + .collect::<BTreeSet<_>>(); + if !query.tags.iter().all(|tag| candidate_tags.contains(tag)) { + return Ok(None); + } + } + let structural_rank = comparator_rank( + &target_dimensions, + &outcome.dimensions, + anchor_hypothesis_id, + hypothesis_record.id, + record.id, + &influence_neighborhood, + ); + let metric_value = metric_definition.as_ref().and_then(|definition| { + all_metrics(&outcome) + .into_iter() + .find(|metric| metric.key == definition.key) + .map(|metric| MetricObservationSummary { + key: metric.key, + value: metric.value, + unit: definition.unit.clone(), + objective: definition.objective, + }) + }); + Ok(Some(NearestComparatorCandidate { + closed_at: outcome.closed_at, + verdict: outcome.verdict, + experiment: self.experiment_summary_from_record(record)?, + hypothesis: self.hypothesis_summary_from_record(hypothesis_record)?, + dimensions: outcome.dimensions, + structural_rank, + metric_value, + })) + }) + .collect::<Result<Vec<_>, StoreError>>()? + .into_iter() + .flatten() + .collect::<Vec<_>>(); + let metric = if let Some(definition) = metric_definition.as_ref() { + Some(MetricKeySummary { + reference_count: self.metric_reference_count(frontier_id, &definition.key)?, + key: definition.key.clone(), + unit: definition.unit.clone(), + objective: definition.objective, + visibility: definition.visibility, + description: definition.description.clone(), + }) + } else { + None + }; + Ok(ExperimentNearestResult { + metric, + target_dimensions, + accepted: pick_nearest_bucket( + &candidates, + FrontierVerdict::Accepted, + metric_definition + .as_ref() + .map(|definition| definition.key.as_str()), + ), + kept: pick_nearest_bucket( + &candidates, + FrontierVerdict::Kept, + metric_definition + .as_ref() + .map(|definition| definition.key.as_str()), + ), + rejected: pick_nearest_bucket( + &candidates, + FrontierVerdict::Rejected, + metric_definition + .as_ref() + .map(|definition| definition.key.as_str()), + ), + champion: champion_order.and_then(|order| { + pick_champion_candidate( + &candidates, + order, + metric_definition + .as_ref() + .map(|definition| definition.key.as_str()), + ) + }), + }) + } + pub fn frontier_history(&self, selector: &str) -> Result<Vec<EntityHistoryEntry>, StoreError> { let frontier = self.resolve_frontier(selector)?; self.entity_history("frontier", &frontier.id.to_string()) @@ -1784,6 +2041,41 @@ impl ProjectStore { } } + fn influence_neighborhood( + &self, + anchor_experiment: Option<&ExperimentRecord>, + anchor_hypothesis_id: Option<HypothesisId>, + ) -> Result<Vec<VertexRef>, StoreError> { + let mut neighborhood = Vec::new(); + if let Some(hypothesis_id) = anchor_hypothesis_id { + let anchor = VertexRef::Hypothesis(hypothesis_id); + neighborhood.extend( + self.load_vertex_parents(anchor)? + .into_iter() + .map(|summary| summary.vertex), + ); + neighborhood.extend( + self.load_vertex_children(anchor)? + .into_iter() + .map(|summary| summary.vertex), + ); + } + if let Some(experiment) = anchor_experiment { + let anchor = VertexRef::Experiment(experiment.id); + neighborhood.extend( + self.load_vertex_parents(anchor)? + .into_iter() + .map(|summary| summary.vertex), + ); + neighborhood.extend( + self.load_vertex_children(anchor)? + .into_iter() + .map(|summary| summary.vertex), + ); + } + Ok(neighborhood) + } + fn load_hypothesis_records( &self, frontier_id: Option<FrontierId>, @@ -2178,6 +2470,31 @@ impl ProjectStore { Ok(keys) } + fn frontier_scoreboard_metric_keys( + &self, + frontier: &FrontierRecord, + ) -> Result<Vec<MetricKeySummary>, StoreError> { + frontier + .brief + .scoreboard_metric_keys + .iter() + .map(|key| { + let definition = self + .metric_definition(key)? + .ok_or_else(|| StoreError::UnknownMetricDefinition(key.clone()))?; + Ok(MetricKeySummary { + reference_count: self + .metric_reference_count(Some(frontier.id), &definition.key)?, + key: definition.key, + unit: definition.unit, + objective: definition.objective, + visibility: definition.visibility, + description: definition.description, + }) + }) + .collect() + } + fn live_metric_key_names( &self, frontier_id: FrontierId, @@ -2592,7 +2909,7 @@ fn insert_frontier( Ok(()) } -fn update_frontier( +fn update_frontier_row( transaction: &Transaction<'_>, frontier: &FrontierRecord, ) -> Result<(), StoreError> { @@ -3248,6 +3565,184 @@ fn all_metrics(outcome: &ExperimentOutcome) -> Vec<MetricValue> { .collect() } +#[derive(Clone)] +struct ComparatorRank { + exact_dimension_match: bool, + core_dimension_matches: usize, + matched_dimension_count: usize, + same_hypothesis: bool, + neighborhood_match: bool, +} + +#[derive(Clone)] +struct NearestComparatorCandidate { + experiment: ExperimentSummary, + hypothesis: HypothesisSummary, + dimensions: BTreeMap<NonEmptyText, RunDimensionValue>, + verdict: FrontierVerdict, + closed_at: OffsetDateTime, + structural_rank: ComparatorRank, + metric_value: Option<MetricObservationSummary>, +} + +fn comparator_rank( + target_dimensions: &BTreeMap<NonEmptyText, RunDimensionValue>, + candidate_dimensions: &BTreeMap<NonEmptyText, RunDimensionValue>, + anchor_hypothesis_id: Option<HypothesisId>, + candidate_hypothesis_id: HypothesisId, + candidate_experiment_id: ExperimentId, + influence_neighborhood: &[VertexRef], +) -> ComparatorRank { + let matched_dimension_keys = target_dimensions + .iter() + .filter(|(key, value)| { + candidate_dimensions + .get(*key) + .is_some_and(|candidate| candidate == *value) + }) + .map(|(key, _)| key.as_str()) + .collect::<Vec<_>>(); + let core_dimension_matches = matched_dimension_keys + .iter() + .filter(|key| { + matches!( + **key, + "instance" | "profile" | "family" | "duration_s" | "budget_s" + ) + }) + .count(); + let exact_dimension_match = !target_dimensions.is_empty() + && target_dimensions.len() == candidate_dimensions.len() + && dimension_subset_matches(target_dimensions, candidate_dimensions); + let same_hypothesis = anchor_hypothesis_id == Some(candidate_hypothesis_id); + let neighborhood_match = influence_neighborhood.iter().any(|vertex| { + *vertex == VertexRef::Hypothesis(candidate_hypothesis_id) + || *vertex == VertexRef::Experiment(candidate_experiment_id) + }); + ComparatorRank { + exact_dimension_match, + core_dimension_matches, + matched_dimension_count: matched_dimension_keys.len(), + same_hypothesis, + neighborhood_match, + } +} + +fn compare_structural_rank(left: &ComparatorRank, right: &ComparatorRank) -> std::cmp::Ordering { + ( + left.exact_dimension_match, + left.core_dimension_matches, + left.matched_dimension_count, + left.same_hypothesis, + left.neighborhood_match, + ) + .cmp(&( + right.exact_dimension_match, + right.core_dimension_matches, + right.matched_dimension_count, + right.same_hypothesis, + right.neighborhood_match, + )) +} + +fn preferred_metric_ordering(left: f64, right: f64, order: MetricRankOrder) -> std::cmp::Ordering { + compare_metric_values(left, right, order).reverse() +} + +fn pick_nearest_bucket( + candidates: &[NearestComparatorCandidate], + verdict: FrontierVerdict, + metric_key: Option<&str>, +) -> Option<ExperimentNearestHit> { + candidates + .iter() + .filter(|candidate| candidate.verdict == verdict) + .max_by(|left, right| { + compare_structural_rank(&left.structural_rank, &right.structural_rank) + .then_with(|| left.closed_at.cmp(&right.closed_at)) + }) + .map(|candidate| nearest_hit(candidate, metric_key, false)) +} + +fn pick_champion_candidate( + candidates: &[NearestComparatorCandidate], + order: MetricRankOrder, + metric_key: Option<&str>, +) -> Option<ExperimentNearestHit> { + candidates + .iter() + .filter(|candidate| { + matches!( + candidate.verdict, + FrontierVerdict::Accepted | FrontierVerdict::Kept + ) && candidate.metric_value.is_some() + }) + .max_by(|left, right| { + compare_structural_rank(&left.structural_rank, &right.structural_rank) + .then_with(|| match (&left.metric_value, &right.metric_value) { + (Some(left_metric), Some(right_metric)) => { + preferred_metric_ordering(left_metric.value, right_metric.value, order) + } + (Some(_), None) => std::cmp::Ordering::Greater, + (None, Some(_)) => std::cmp::Ordering::Less, + (None, None) => std::cmp::Ordering::Equal, + }) + .then_with(|| left.closed_at.cmp(&right.closed_at)) + }) + .map(|candidate| nearest_hit(candidate, metric_key, true)) +} + +fn nearest_hit( + candidate: &NearestComparatorCandidate, + metric_key: Option<&str>, + is_champion: bool, +) -> ExperimentNearestHit { + let mut reasons = Vec::new(); + if candidate.structural_rank.exact_dimension_match { + reasons.push(must_non_empty_reason("exact dimension match")); + } else if candidate.structural_rank.core_dimension_matches > 0 { + reasons.push(must_non_empty_reason(format!( + "matched {} core slice keys", + candidate.structural_rank.core_dimension_matches + ))); + } else if candidate.structural_rank.matched_dimension_count > 0 { + reasons.push(must_non_empty_reason(format!( + "matched {} requested dimensions", + candidate.structural_rank.matched_dimension_count + ))); + } + if candidate.structural_rank.same_hypothesis { + reasons.push(must_non_empty_reason("same owning hypothesis")); + } else if candidate.structural_rank.neighborhood_match { + reasons.push(must_non_empty_reason("same influence neighborhood")); + } + if is_champion { + reasons.push(must_non_empty_reason(format!( + "best closed non-rejected result{}", + metric_key.map_or_else(String::new, |key| format!(" for {key}")) + ))); + } else { + reasons.push(must_non_empty_reason(format!( + "nearest {} comparator", + candidate.verdict.as_str() + ))); + } + ExperimentNearestHit { + experiment: candidate.experiment.clone(), + hypothesis: candidate.hypothesis.clone(), + dimensions: candidate.dimensions.clone(), + reasons, + metric_value: candidate.metric_value.clone(), + } +} + +fn must_non_empty_reason(text: impl Into<String>) -> NonEmptyText { + match NonEmptyText::new(text) { + Ok(text) => text, + Err(_) => unreachable!("comparator reasons must never be empty"), + } +} + fn bool_to_sql(value: bool) -> i64 { i64::from(value) } |