diff options
Diffstat (limited to 'crates/fidget-spinner-cli')
| -rw-r--r-- | crates/fidget-spinner-cli/src/main.rs | 80 | ||||
| -rw-r--r-- | crates/fidget-spinner-cli/src/mcp/catalog.rs | 24 | ||||
| -rw-r--r-- | crates/fidget-spinner-cli/src/mcp/service.rs | 124 | ||||
| -rw-r--r-- | crates/fidget-spinner-cli/tests/mcp_hardening.rs | 161 |
4 files changed, 93 insertions, 296 deletions
diff --git a/crates/fidget-spinner-cli/src/main.rs b/crates/fidget-spinner-cli/src/main.rs index 491e30d..f56e751 100644 --- a/crates/fidget-spinner-cli/src/main.rs +++ b/crates/fidget-spinner-cli/src/main.rs @@ -10,10 +10,10 @@ use std::path::{Path, PathBuf}; use camino::{Utf8Path, Utf8PathBuf}; use clap::{Args, Parser, Subcommand, ValueEnum}; use fidget_spinner_core::{ - AnnotationVisibility, CodeSnapshotRef, CommandRecipe, DiagnosticSeverity, ExecutionBackend, - FieldPresence, FieldRole, FieldValueType, FrontierContract, FrontierNote, FrontierVerdict, - GitCommitHash, InferencePolicy, MetricSpec, MetricUnit, MetricValue, NodeAnnotation, NodeClass, - NodePayload, NonEmptyText, OptimizationObjective, ProjectFieldSpec, TagName, + AnnotationVisibility, CommandRecipe, DiagnosticSeverity, ExecutionBackend, FieldPresence, + FieldRole, FieldValueType, FrontierContract, FrontierNote, FrontierVerdict, InferencePolicy, + MetricSpec, MetricUnit, MetricValue, NodeAnnotation, NodeClass, NodePayload, NonEmptyText, + OptimizationObjective, ProjectFieldSpec, TagName, }; use fidget_spinner_store_sqlite::{ CloseExperimentRequest, CreateFrontierRequest, CreateNodeRequest, DefineMetricRequest, @@ -152,8 +152,6 @@ struct FrontierInitArgs { primary_metric_unit: CliMetricUnit, #[arg(long = "primary-metric-objective", value_enum)] primary_metric_objective: CliOptimizationObjective, - #[arg(long = "seed-summary", default_value = "initial champion checkpoint")] - seed_summary: String, } #[derive(Args)] @@ -490,11 +488,11 @@ struct MetricBestArgs { #[derive(Subcommand)] enum ExperimentCommand { - /// Open a stateful experiment against one hypothesis and base checkpoint. + /// Open a stateful experiment against one hypothesis. Open(ExperimentOpenArgs), /// List open experiments, optionally narrowed to one frontier. List(ExperimentListArgs), - /// Close a core-path experiment with checkpoint, run, note, and verdict. + /// Close a core-path experiment with run data, note, and verdict. Close(Box<ExperimentCloseArgs>), } @@ -518,8 +516,6 @@ struct ExperimentCloseArgs { project: ProjectArg, #[arg(long = "experiment")] experiment_id: String, - #[arg(long = "candidate-summary")] - candidate_summary: String, #[arg(long = "run-title")] run_title: String, #[arg(long = "run-summary")] @@ -567,8 +563,6 @@ struct ExperimentOpenArgs { project: ProjectArg, #[arg(long)] frontier: String, - #[arg(long = "base-checkpoint")] - base_checkpoint: String, #[arg(long = "hypothesis-node")] hypothesis_node: String, #[arg(long)] @@ -733,11 +727,10 @@ enum CliInferencePolicy { #[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] enum CliFrontierVerdict { - PromoteToChampion, - KeepOnFrontier, - RevertToChampion, - ArchiveDeadEnd, - NeedsMoreEvidence, + Accepted, + Kept, + Parked, + Rejected, } fn main() { @@ -834,8 +827,6 @@ fn run_init(args: InitArgs) -> Result<(), StoreError> { fn run_frontier_init(args: FrontierInitArgs) -> Result<(), StoreError> { let mut store = open_store(&args.project.project)?; - let initial_checkpoint = - store.auto_capture_checkpoint(NonEmptyText::new(args.seed_summary)?)?; let projection = store.create_frontier(CreateFrontierRequest { label: NonEmptyText::new(args.label)?, contract_title: NonEmptyText::new(args.contract_title)?, @@ -853,7 +844,6 @@ fn run_frontier_init(args: FrontierInitArgs) -> Result<(), StoreError> { }, promotion_criteria: to_text_vec(args.promotion_criteria)?, }, - initial_checkpoint, })?; print_json(&projection) } @@ -1131,7 +1121,6 @@ fn run_experiment_open(args: ExperimentOpenArgs) -> Result<(), StoreError> { let summary = args.summary.map(NonEmptyText::new).transpose()?; let experiment = store.open_experiment(OpenExperimentRequest { frontier_id: parse_frontier_id(&args.frontier)?, - base_checkpoint_id: parse_checkpoint_id(&args.base_checkpoint)?, hypothesis_node_id: parse_node_id(&args.hypothesis_node)?, title: NonEmptyText::new(args.title)?, summary, @@ -1151,12 +1140,6 @@ fn run_experiment_list(args: ExperimentListArgs) -> Result<(), StoreError> { fn run_experiment_close(args: ExperimentCloseArgs) -> Result<(), StoreError> { let mut store = open_store(&args.project.project)?; - let snapshot = store - .auto_capture_checkpoint(NonEmptyText::new(args.candidate_summary.clone())?)? - .map(|seed| seed.snapshot) - .ok_or(StoreError::GitInspectionFailed( - store.project_root().to_path_buf(), - ))?; let command = CommandRecipe::new( args.working_directory .map(utf8_path) @@ -1186,14 +1169,11 @@ fn run_experiment_close(args: ExperimentCloseArgs) -> Result<(), StoreError> { }; let receipt = store.close_experiment(CloseExperimentRequest { experiment_id: parse_experiment_id(&args.experiment_id)?, - candidate_summary: NonEmptyText::new(args.candidate_summary)?, - candidate_snapshot: snapshot, run_title: NonEmptyText::new(args.run_title)?, run_summary: args.run_summary.map(NonEmptyText::new).transpose()?, backend: args.backend.into(), dimensions: coerce_cli_dimension_filters(&store, args.dimensions)?, command, - code_snapshot: Some(capture_code_snapshot(store.project_root())?), primary_metric: parse_metric_value(args.primary_metric)?, supporting_metrics: args .metrics @@ -1539,31 +1519,6 @@ fn parse_node_class_set(classes: Vec<CliNodeClass>) -> BTreeSet<NodeClass> { classes.into_iter().map(Into::into).collect() } -fn capture_code_snapshot(project_root: &Utf8Path) -> Result<CodeSnapshotRef, StoreError> { - let head_commit = run_git(project_root, &["rev-parse", "HEAD"])?; - let dirty_paths = run_git(project_root, &["status", "--porcelain"])? - .map(|status| { - status - .lines() - .filter_map(|line| line.get(3..).map(str::trim)) - .filter(|line| !line.is_empty()) - .map(Utf8PathBuf::from) - .collect::<BTreeSet<_>>() - }) - .unwrap_or_default(); - Ok(CodeSnapshotRef { - repo_root: run_git(project_root, &["rev-parse", "--show-toplevel"])? - .map(Utf8PathBuf::from) - .unwrap_or_else(|| project_root.to_path_buf()), - worktree_root: project_root.to_path_buf(), - worktree_name: run_git(project_root, &["rev-parse", "--abbrev-ref", "HEAD"])? - .map(NonEmptyText::new) - .transpose()?, - head_commit: head_commit.map(GitCommitHash::new).transpose()?, - dirty_paths, - }) -} - fn run_git(project_root: &Utf8Path, args: &[&str]) -> Result<Option<String>, StoreError> { let output = std::process::Command::new("git") .arg("-C") @@ -1702,12 +1657,6 @@ fn parse_frontier_id(raw: &str) -> Result<fidget_spinner_core::FrontierId, Store )?)) } -fn parse_checkpoint_id(raw: &str) -> Result<fidget_spinner_core::CheckpointId, StoreError> { - Ok(fidget_spinner_core::CheckpointId::from_uuid( - Uuid::parse_str(raw)?, - )) -} - fn parse_experiment_id(raw: &str) -> Result<fidget_spinner_core::ExperimentId, StoreError> { Ok(fidget_spinner_core::ExperimentId::from_uuid( Uuid::parse_str(raw)?, @@ -1851,11 +1800,10 @@ impl From<CliInferencePolicy> for InferencePolicy { impl From<CliFrontierVerdict> for FrontierVerdict { fn from(value: CliFrontierVerdict) -> Self { match value { - CliFrontierVerdict::PromoteToChampion => Self::PromoteToChampion, - CliFrontierVerdict::KeepOnFrontier => Self::KeepOnFrontier, - CliFrontierVerdict::RevertToChampion => Self::RevertToChampion, - CliFrontierVerdict::ArchiveDeadEnd => Self::ArchiveDeadEnd, - CliFrontierVerdict::NeedsMoreEvidence => Self::NeedsMoreEvidence, + CliFrontierVerdict::Accepted => Self::Accepted, + CliFrontierVerdict::Kept => Self::Kept, + CliFrontierVerdict::Parked => Self::Parked, + CliFrontierVerdict::Rejected => Self::Rejected, } } } diff --git a/crates/fidget-spinner-cli/src/mcp/catalog.rs b/crates/fidget-spinner-cli/src/mcp/catalog.rs index 3b8abcc..ae3ca78 100644 --- a/crates/fidget-spinner-cli/src/mcp/catalog.rs +++ b/crates/fidget-spinner-cli/src/mcp/catalog.rs @@ -99,13 +99,13 @@ pub(crate) fn tool_spec(name: &str) -> Option<ToolSpec> { }), "frontier.status" => Some(ToolSpec { name: "frontier.status", - description: "Read one frontier projection, including champion and active candidates.", + description: "Read one frontier projection, including open/completed experiment counts and verdict totals.", dispatch: DispatchTarget::Worker, replay: ReplayContract::Convergent, }), "frontier.init" => Some(ToolSpec { name: "frontier.init", - description: "Create a new frontier rooted in a contract node. If the project is a git repo, the current HEAD becomes the initial champion when possible.", + description: "Create a new frontier rooted in a contract node.", dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }), @@ -183,7 +183,7 @@ pub(crate) fn tool_spec(name: &str) -> Option<ToolSpec> { }), "metric.best" => Some(ToolSpec { name: "metric.best", - description: "Rank completed experiments by one numeric key, with optional run-dimension filters and candidate commit surfacing.", + description: "Rank completed experiments by one numeric key, with optional run-dimension filters.", dispatch: DispatchTarget::Worker, replay: ReplayContract::Convergent, }), @@ -195,7 +195,7 @@ pub(crate) fn tool_spec(name: &str) -> Option<ToolSpec> { }), "experiment.open" => Some(ToolSpec { name: "experiment.open", - description: "Open a stateful experiment against one hypothesis and one base checkpoint.", + description: "Open a stateful experiment against one hypothesis.", dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }), @@ -213,7 +213,7 @@ pub(crate) fn tool_spec(name: &str) -> Option<ToolSpec> { }), "experiment.close" => Some(ToolSpec { name: "experiment.close", - description: "Close one open experiment with typed run dimensions, preregistered metric observations, candidate checkpoint capture, optional analysis, note, and verdict.", + description: "Close one open experiment with typed run dimensions, preregistered metric observations, optional analysis, note, and verdict.", dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }), @@ -562,12 +562,11 @@ fn input_schema(name: &str) -> Value { "type": "object", "properties": { "frontier_id": { "type": "string" }, - "base_checkpoint_id": { "type": "string" }, "hypothesis_node_id": { "type": "string" }, "title": { "type": "string" }, "summary": { "type": "string" } }, - "required": ["frontier_id", "base_checkpoint_id", "hypothesis_node_id", "title"], + "required": ["frontier_id", "hypothesis_node_id", "title"], "additionalProperties": false }), "experiment.list" => json!({ @@ -589,7 +588,6 @@ fn input_schema(name: &str) -> Value { "type": "object", "properties": { "experiment_id": { "type": "string" }, - "candidate_summary": { "type": "string" }, "run": run_schema(), "primary_metric": metric_value_schema(), "supporting_metrics": { "type": "array", "items": metric_value_schema() }, @@ -601,7 +599,6 @@ fn input_schema(name: &str) -> Value { }, "required": [ "experiment_id", - "candidate_summary", "run", "primary_metric", "note", @@ -753,11 +750,10 @@ fn verdict_schema() -> Value { json!({ "type": "string", "enum": [ - "promote_to_champion", - "keep_on_frontier", - "revert_to_champion", - "archive_dead_end", - "needs_more_evidence" + "accepted", + "kept", + "parked", + "rejected" ] }) } diff --git a/crates/fidget-spinner-cli/src/mcp/service.rs b/crates/fidget-spinner-cli/src/mcp/service.rs index 05f2382..f0cca1e 100644 --- a/crates/fidget-spinner-cli/src/mcp/service.rs +++ b/crates/fidget-spinner-cli/src/mcp/service.rs @@ -3,11 +3,11 @@ use std::fs; use camino::{Utf8Path, Utf8PathBuf}; use fidget_spinner_core::{ - AdmissionState, AnnotationVisibility, CodeSnapshotRef, CommandRecipe, DiagnosticSeverity, - ExecutionBackend, FieldPresence, FieldRole, FieldValueType, FrontierContract, FrontierNote, - FrontierProjection, FrontierRecord, FrontierVerdict, InferencePolicy, MetricSpec, MetricUnit, - MetricValue, NodeAnnotation, NodeClass, NodePayload, NonEmptyText, ProjectFieldSpec, - ProjectSchema, RunDimensionValue, TagName, TagRecord, + AdmissionState, AnnotationVisibility, CommandRecipe, DiagnosticSeverity, ExecutionBackend, + FieldPresence, FieldRole, FieldValueType, FrontierContract, FrontierNote, FrontierProjection, + FrontierRecord, FrontierVerdict, InferencePolicy, MetricSpec, MetricUnit, MetricValue, + NodeAnnotation, NodeClass, NodePayload, NonEmptyText, ProjectFieldSpec, ProjectSchema, + RunDimensionValue, TagName, TagRecord, }; use fidget_spinner_store_sqlite::{ CloseExperimentRequest, CreateFrontierRequest, CreateNodeRequest, DefineMetricRequest, @@ -203,16 +203,6 @@ impl WorkerService { } "frontier.init" => { let args = deserialize::<FrontierInitToolArgs>(arguments)?; - let initial_checkpoint = self - .store - .auto_capture_checkpoint( - NonEmptyText::new( - args.seed_summary - .unwrap_or_else(|| "initial champion checkpoint".to_owned()), - ) - .map_err(store_fault("tools/call:frontier.init"))?, - ) - .map_err(store_fault("tools/call:frontier.init"))?; let projection = self .store .create_frontier(CreateFrontierRequest { @@ -251,7 +241,6 @@ impl WorkerService { promotion_criteria: crate::to_text_vec(args.promotion_criteria) .map_err(store_fault("tools/call:frontier.init"))?, }, - initial_checkpoint, }) .map_err(store_fault("tools/call:frontier.init"))?; tool_success( @@ -702,8 +691,6 @@ impl WorkerService { .open_experiment(OpenExperimentRequest { frontier_id: crate::parse_frontier_id(&args.frontier_id) .map_err(store_fault("tools/call:experiment.open"))?, - base_checkpoint_id: crate::parse_checkpoint_id(&args.base_checkpoint_id) - .map_err(store_fault("tools/call:experiment.open"))?, hypothesis_node_id: crate::parse_node_id(&args.hypothesis_node_id) .map_err(store_fault("tools/call:experiment.open"))?, title: NonEmptyText::new(args.title) @@ -763,33 +750,11 @@ impl WorkerService { } "experiment.close" => { let args = deserialize::<ExperimentCloseToolArgs>(arguments)?; - let snapshot = self - .store - .auto_capture_checkpoint( - NonEmptyText::new(args.candidate_summary.clone()) - .map_err(store_fault("tools/call:experiment.close"))?, - ) - .map_err(store_fault("tools/call:experiment.close"))? - .map(|seed| seed.snapshot) - .ok_or_else(|| { - FaultRecord::new( - FaultKind::Internal, - FaultStage::Store, - "tools/call:experiment.close", - format!( - "git repository inspection failed for {}", - self.store.project_root() - ), - ) - })?; let receipt = self .store .close_experiment(CloseExperimentRequest { experiment_id: crate::parse_experiment_id(&args.experiment_id) .map_err(store_fault("tools/call:experiment.close"))?, - candidate_summary: NonEmptyText::new(args.candidate_summary) - .map_err(store_fault("tools/call:experiment.close"))?, - candidate_snapshot: snapshot, run_title: NonEmptyText::new(args.run.title) .map_err(store_fault("tools/call:experiment.close"))?, run_summary: args @@ -810,10 +775,6 @@ impl WorkerService { self.store.project_root(), ) .map_err(store_fault("tools/call:experiment.close"))?, - code_snapshot: Some( - capture_code_snapshot(self.store.project_root()) - .map_err(store_fault("tools/call:experiment.close"))?, - ), primary_metric: metric_value_from_wire(args.primary_metric) .map_err(store_fault("tools/call:experiment.close"))?, supporting_metrics: args @@ -1346,8 +1307,8 @@ fn experiment_close_output( let concise = json!({ "experiment_id": receipt.experiment.id, "frontier_id": receipt.experiment.frontier_id, - "candidate_checkpoint_id": receipt.experiment.candidate_checkpoint_id, - "verdict": format!("{:?}", receipt.experiment.verdict).to_ascii_lowercase(), + "experiment_title": receipt.experiment.title, + "verdict": metric_verdict_name(receipt.experiment.verdict), "run_id": receipt.run.run_id, "hypothesis_node_id": receipt.experiment.hypothesis_node_id, "decision_node_id": receipt.decision_node.id, @@ -1362,11 +1323,11 @@ fn experiment_close_output( "closed experiment {} on frontier {}", receipt.experiment.id, receipt.experiment.frontier_id ), + format!("title: {}", receipt.experiment.title), format!("hypothesis: {}", receipt.experiment.hypothesis_node_id), - format!("candidate: {}", receipt.experiment.candidate_checkpoint_id), format!( "verdict: {}", - format!("{:?}", receipt.experiment.verdict).to_ascii_lowercase() + metric_verdict_name(receipt.experiment.verdict) ), format!( "primary metric: {}", @@ -1393,7 +1354,6 @@ fn experiment_open_output( let concise = json!({ "experiment_id": item.id, "frontier_id": item.frontier_id, - "base_checkpoint_id": item.base_checkpoint_id, "hypothesis_node_id": item.hypothesis_node_id, "title": item.title, "summary": item.summary, @@ -1405,7 +1365,6 @@ fn experiment_open_output( format!("{action} {}", item.id), format!("frontier: {}", item.frontier_id), format!("hypothesis: {}", item.hypothesis_node_id), - format!("base checkpoint: {}", item.base_checkpoint_id), format!("title: {}", item.title), item.summary .as_ref() @@ -1426,7 +1385,6 @@ fn experiment_list_output(items: &[OpenExperimentSummary]) -> Result<ToolOutput, json!({ "experiment_id": item.id, "frontier_id": item.frontier_id, - "base_checkpoint_id": item.base_checkpoint_id, "hypothesis_node_id": item.hypothesis_node_id, "title": item.title, "summary": item.summary, @@ -1436,8 +1394,8 @@ fn experiment_list_output(items: &[OpenExperimentSummary]) -> Result<ToolOutput, let mut lines = vec![format!("{} open experiment(s)", items.len())]; lines.extend(items.iter().map(|item| { format!( - "{} {} | hypothesis={} | checkpoint={}", - item.id, item.title, item.hypothesis_node_id, item.base_checkpoint_id, + "{} {} | hypothesis={}", + item.id, item.title, item.hypothesis_node_id, ) })); detailed_tool_output( @@ -1511,12 +1469,11 @@ fn metric_best_output( "value": item.value, "order": item.order.as_str(), "experiment_id": item.experiment_id, + "experiment_title": item.experiment_title, "frontier_id": item.frontier_id, "hypothesis_node_id": item.hypothesis_node_id, "hypothesis_title": item.hypothesis_title, "verdict": metric_verdict_name(item.verdict), - "candidate_checkpoint_id": item.candidate_checkpoint_id, - "candidate_commit_hash": item.candidate_commit_hash, "run_id": item.run_id, "unit": item.unit.map(metric_unit_name), "objective": item.objective.map(metric_objective_name), @@ -1527,15 +1484,14 @@ fn metric_best_output( let mut lines = vec![format!("{} ranked experiment(s)", items.len())]; lines.extend(items.iter().enumerate().map(|(index, item)| { format!( - "{}. {}={} [{}] {} | verdict={} | commit={} | checkpoint={}", + "{}. {}={} [{}] {} | verdict={} | hypothesis={}", index + 1, item.key, item.value, item.source.as_str(), - item.hypothesis_title, + item.experiment_title, metric_verdict_name(item.verdict), - item.candidate_commit_hash, - item.candidate_checkpoint_id, + item.hypothesis_title, ) })); lines.extend( @@ -1668,17 +1624,13 @@ fn frontier_projection_summary_value(projection: &FrontierProjection) -> Value { "frontier_id": projection.frontier.id, "label": projection.frontier.label, "status": format!("{:?}", projection.frontier.status).to_ascii_lowercase(), - "champion_checkpoint_id": projection.champion_checkpoint_id, - "candidate_checkpoint_ids": projection.candidate_checkpoint_ids, - "experiment_count": projection.experiment_count, + "open_experiment_count": projection.open_experiment_count, + "completed_experiment_count": projection.completed_experiment_count, + "verdict_counts": projection.verdict_counts, }) } fn frontier_projection_text(prefix: &str, projection: &FrontierProjection) -> String { - let champion = projection - .champion_checkpoint_id - .map(|value| value.to_string()) - .unwrap_or_else(|| "none".to_owned()); [ format!( "{prefix} {} {}", @@ -1688,9 +1640,18 @@ fn frontier_projection_text(prefix: &str, projection: &FrontierProjection) -> St "status: {}", format!("{:?}", projection.frontier.status).to_ascii_lowercase() ), - format!("champion: {champion}"), - format!("candidates: {}", projection.candidate_checkpoint_ids.len()), - format!("experiments: {}", projection.experiment_count), + format!("open experiments: {}", projection.open_experiment_count), + format!( + "completed experiments: {}", + projection.completed_experiment_count + ), + format!( + "verdicts: accepted={} kept={} parked={} rejected={}", + projection.verdict_counts.accepted, + projection.verdict_counts.kept, + projection.verdict_counts.parked, + projection.verdict_counts.rejected, + ), ] .join("\n") } @@ -1991,11 +1952,10 @@ fn metric_objective_name(objective: fidget_spinner_core::OptimizationObjective) fn metric_verdict_name(verdict: FrontierVerdict) -> &'static str { match verdict { - FrontierVerdict::PromoteToChampion => "promote_to_champion", - FrontierVerdict::KeepOnFrontier => "keep_on_frontier", - FrontierVerdict::RevertToChampion => "revert_to_champion", - FrontierVerdict::ArchiveDeadEnd => "archive_dead_end", - FrontierVerdict::NeedsMoreEvidence => "needs_more_evidence", + FrontierVerdict::Accepted => "accepted", + FrontierVerdict::Kept => "kept", + FrontierVerdict::Parked => "parked", + FrontierVerdict::Rejected => "rejected", } } @@ -2192,10 +2152,6 @@ fn command_recipe_from_wire( .map_err(StoreError::from) } -fn capture_code_snapshot(project_root: &Utf8Path) -> Result<CodeSnapshotRef, StoreError> { - crate::capture_code_snapshot(project_root) -} - fn parse_node_class_name(raw: &str) -> Result<NodeClass, StoreError> { match raw { "contract" => Ok(NodeClass::Contract), @@ -2311,11 +2267,10 @@ fn parse_backend_name(raw: &str) -> Result<ExecutionBackend, StoreError> { fn parse_verdict_name(raw: &str) -> Result<FrontierVerdict, StoreError> { match raw { - "promote_to_champion" => Ok(FrontierVerdict::PromoteToChampion), - "keep_on_frontier" => Ok(FrontierVerdict::KeepOnFrontier), - "revert_to_champion" => Ok(FrontierVerdict::RevertToChampion), - "archive_dead_end" => Ok(FrontierVerdict::ArchiveDeadEnd), - "needs_more_evidence" => Ok(FrontierVerdict::NeedsMoreEvidence), + "accepted" => Ok(FrontierVerdict::Accepted), + "kept" => Ok(FrontierVerdict::Kept), + "parked" => Ok(FrontierVerdict::Parked), + "rejected" => Ok(FrontierVerdict::Rejected), other => Err(crate::invalid_input(format!("unknown verdict `{other}`"))), } } @@ -2342,7 +2297,6 @@ struct FrontierInitToolArgs { primary_metric: WireMetricSpec, #[serde(default)] supporting_metrics: Vec<WireMetricSpec>, - seed_summary: Option<String>, } #[derive(Debug, Deserialize)] @@ -2480,7 +2434,6 @@ struct MetricBestToolArgs { #[derive(Debug, Deserialize)] struct ExperimentOpenToolArgs { frontier_id: String, - base_checkpoint_id: String, hypothesis_node_id: String, title: String, summary: Option<String>, @@ -2499,7 +2452,6 @@ struct ExperimentReadToolArgs { #[derive(Debug, Deserialize)] struct ExperimentCloseToolArgs { experiment_id: String, - candidate_summary: String, run: WireRun, primary_metric: WireMetricValue, #[serde(default)] diff --git a/crates/fidget-spinner-cli/tests/mcp_hardening.rs b/crates/fidget-spinner-cli/tests/mcp_hardening.rs index 0142b77..21a3d04 100644 --- a/crates/fidget-spinner-cli/tests/mcp_hardening.rs +++ b/crates/fidget-spinner-cli/tests/mcp_hardening.rs @@ -57,48 +57,6 @@ fn init_project(root: &Utf8PathBuf) -> TestResult { Ok(()) } -fn run_command(root: &Utf8PathBuf, program: &str, args: &[&str]) -> TestResult<String> { - let output = must( - Command::new(program) - .current_dir(root.as_std_path()) - .args(args) - .output(), - format!("{program} spawn"), - )?; - if !output.status.success() { - return Err(io::Error::other(format!( - "{program} {:?} failed: {}", - args, - String::from_utf8_lossy(&output.stderr) - )) - .into()); - } - Ok(String::from_utf8_lossy(&output.stdout).trim().to_owned()) -} - -fn run_git(root: &Utf8PathBuf, args: &[&str]) -> TestResult<String> { - run_command(root, "git", args) -} - -fn init_git_project(root: &Utf8PathBuf) -> TestResult<String> { - let _ = run_git(root, &["init", "-b", "main"])?; - let _ = run_git(root, &["config", "user.name", "main"])?; - let _ = run_git(root, &["config", "user.email", "main@swarm.moe"])?; - let _ = run_git(root, &["add", "-A"])?; - let _ = run_git(root, &["commit", "-m", "initial state"])?; - run_git(root, &["rev-parse", "HEAD"]) -} - -fn commit_project_state(root: &Utf8PathBuf, marker: &str, message: &str) -> TestResult<String> { - must( - fs::write(root.join(marker).as_std_path(), message), - format!("write marker {marker}"), - )?; - let _ = run_git(root, &["add", "-A"])?; - let _ = run_git(root, &["commit", "-m", message])?; - run_git(root, &["rev-parse", "HEAD"]) -} - fn binary_path() -> PathBuf { PathBuf::from(env!("CARGO_BIN_EXE_fidget-spinner-cli")) } @@ -688,7 +646,7 @@ fn tag_registry_drives_note_creation_and_lookup() -> TestResult { } #[test] -fn research_record_accepts_tags_and_filtering() -> TestResult { +fn source_record_accepts_tags_and_filtering() -> TestResult { let project_root = temp_project_root("research_tags")?; init_project(&project_root)?; @@ -721,10 +679,7 @@ fn research_record_accepts_tags_and_filtering() -> TestResult { assert_eq!(research["result"]["isError"].as_bool(), Some(false)); let filtered = harness.call_tool(454, "node.list", json!({"tags": ["campaign/libgrid"]}))?; - let nodes = must_some( - tool_content(&filtered).as_array(), - "filtered research nodes", - )?; + let nodes = must_some(tool_content(&filtered).as_array(), "filtered source nodes")?; assert_eq!(nodes.len(), 1); assert_eq!(nodes[0]["class"].as_str(), Some("source")); assert_eq!(nodes[0]["tags"][0].as_str(), Some("campaign/libgrid")); @@ -760,20 +715,20 @@ fn prose_tools_reject_invalid_shapes_over_mcp() -> TestResult { .is_some_and(|message| message.contains("summary") || message.contains("missing field")) ); - let missing_research_summary = harness.call_tool( + let missing_source_summary = harness.call_tool( 48, "source.record", json!({ - "title": "research only", + "title": "source only", "body": "body only", }), )?; assert_eq!( - missing_research_summary["result"]["isError"].as_bool(), + missing_source_summary["result"]["isError"].as_bool(), Some(true) ); assert!( - fault_message(&missing_research_summary) + fault_message(&missing_source_summary) .is_some_and(|message| message.contains("summary") || message.contains("missing field")) ); @@ -794,7 +749,7 @@ fn prose_tools_reject_invalid_shapes_over_mcp() -> TestResult { .is_some_and(|message| message.contains("payload field `body`")) ); - let research_without_summary = harness.call_tool( + let source_without_summary = harness.call_tool( 50, "node.create", json!({ @@ -804,11 +759,11 @@ fn prose_tools_reject_invalid_shapes_over_mcp() -> TestResult { }), )?; assert_eq!( - research_without_summary["result"]["isError"].as_bool(), + source_without_summary["result"]["isError"].as_bool(), Some(true) ); assert!( - fault_message(&research_without_summary) + fault_message(&source_without_summary) .is_some_and(|message| message.contains("non-empty summary")) ); Ok(()) @@ -885,11 +840,8 @@ fn concise_prose_reads_only_surface_payload_field_names() -> TestResult { }), )?; assert_eq!(research["result"]["isError"].as_bool(), Some(false)); - let node_id = must_some( - tool_content(&research)["id"].as_str(), - "created research id", - )? - .to_owned(); + let node_id = + must_some(tool_content(&research)["id"].as_str(), "created source id")?.to_owned(); let concise = harness.call_tool(533, "node.read", json!({ "node_id": node_id }))?; let concise_structured = tool_content(&concise); @@ -1043,7 +995,7 @@ fn bind_open_backfills_legacy_missing_summary() -> TestResult { store.add_node(fidget_spinner_store_sqlite::CreateNodeRequest { class: fidget_spinner_core::NodeClass::Source, frontier_id: None, - title: must(NonEmptyText::new("legacy research"), "legacy title")?, + title: must(NonEmptyText::new("legacy source"), "legacy title")?, summary: Some(must( NonEmptyText::new("temporary summary"), "temporary summary", @@ -1059,7 +1011,7 @@ fn bind_open_backfills_legacy_missing_summary() -> TestResult { annotations: Vec::new(), attachments: Vec::new(), }), - "create legacy research node", + "create legacy source node", )?; node.id.to_string() }; @@ -1097,7 +1049,7 @@ fn bind_open_backfills_legacy_missing_summary() -> TestResult { ); let listed = harness.call_tool(62, "node.list", json!({ "class": "source" }))?; - let items = must_some(tool_content(&listed).as_array(), "research node list")?; + let items = must_some(tool_content(&listed).as_array(), "source node list")?; assert_eq!(items.len(), 1); assert_eq!( items[0]["summary"].as_str(), @@ -1110,7 +1062,6 @@ fn bind_open_backfills_legacy_missing_summary() -> TestResult { fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResult { let project_root = temp_project_root("metric_rank_e2e")?; init_project(&project_root)?; - let _initial_head = init_git_project(&project_root)?; let mut harness = McpHarness::spawn(Some(&project_root), &[])?; let _ = harness.initialize()?; @@ -1138,11 +1089,6 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu "frontier id", )? .to_owned(); - let base_checkpoint_id = must_some( - tool_content(&frontier)["champion_checkpoint_id"].as_str(), - "base checkpoint id", - )? - .to_owned(); let metric_define = harness.call_tool( 701, "metric.define", @@ -1222,7 +1168,6 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu "experiment.open", json!({ "frontier_id": frontier_id, - "base_checkpoint_id": base_checkpoint_id, "hypothesis_node_id": first_change_id, "title": "first experiment", "summary": "first experiment summary" @@ -1233,14 +1178,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu tool_content(&first_experiment)["experiment_id"].as_str(), "first experiment id", )?; - let _first_commit = commit_project_state(&project_root, "candidate-one.txt", "candidate one")?; let first_close = harness.call_tool( 72, "experiment.close", json!({ "experiment_id": first_experiment_id, - "candidate_summary": "candidate one", "run": { "title": "first run", "summary": "first run summary", @@ -1262,19 +1205,13 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu "note": { "summary": "first run note" }, - "verdict": "keep_on_frontier", + "verdict": "kept", "decision_title": "first decision", "decision_rationale": "keep first candidate around" }), )?; assert_eq!(first_close["result"]["isError"].as_bool(), Some(false)); - let first_candidate_checkpoint_id = must_some( - tool_content(&first_close)["candidate_checkpoint_id"].as_str(), - "first candidate checkpoint id", - )? - .to_owned(); - let second_change = harness.call_tool( 73, "node.create", @@ -1299,7 +1236,6 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu "experiment.open", json!({ "frontier_id": frontier_id, - "base_checkpoint_id": base_checkpoint_id, "hypothesis_node_id": second_change_id, "title": "second experiment", "summary": "second experiment summary" @@ -1313,14 +1249,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu tool_content(&second_experiment)["experiment_id"].as_str(), "second experiment id", )?; - let second_commit = commit_project_state(&project_root, "candidate-two.txt", "candidate two")?; let second_close = harness.call_tool( 74, "experiment.close", json!({ "experiment_id": second_experiment_id, - "candidate_summary": "candidate two", "run": { "title": "second run", "summary": "second run summary", @@ -1342,17 +1276,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu "note": { "summary": "second run note" }, - "verdict": "keep_on_frontier", + "verdict": "kept", "decision_title": "second decision", "decision_rationale": "second candidate looks stronger" }), )?; assert_eq!(second_close["result"]["isError"].as_bool(), Some(false)); - let second_candidate_checkpoint_id = must_some( - tool_content(&second_close)["candidate_checkpoint_id"].as_str(), - "second candidate checkpoint id", - )? - .to_owned(); let second_frontier = harness.call_tool( 80, @@ -1376,11 +1305,6 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu "second frontier id", )? .to_owned(); - let second_base_checkpoint_id = must_some( - tool_content(&second_frontier)["champion_checkpoint_id"].as_str(), - "second frontier base checkpoint id", - )? - .to_owned(); let third_change = harness.call_tool( 81, @@ -1406,7 +1330,6 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu "experiment.open", json!({ "frontier_id": second_frontier_id, - "base_checkpoint_id": second_base_checkpoint_id, "hypothesis_node_id": third_change_id, "title": "third experiment", "summary": "third experiment summary" @@ -1417,15 +1340,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu tool_content(&third_experiment)["experiment_id"].as_str(), "third experiment id", )?; - let third_commit = - commit_project_state(&project_root, "candidate-three.txt", "candidate three")?; let third_close = harness.call_tool( 82, "experiment.close", json!({ "experiment_id": third_experiment_id, - "candidate_summary": "candidate three", "run": { "title": "third run", "summary": "third run summary", @@ -1447,17 +1367,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu "note": { "summary": "third run note" }, - "verdict": "keep_on_frontier", + "verdict": "kept", "decision_title": "third decision", "decision_rationale": "third candidate is best overall but not in the first frontier" }), )?; assert_eq!(third_close["result"]["isError"].as_bool(), Some(false)); - let third_candidate_checkpoint_id = must_some( - tool_content(&third_close)["candidate_checkpoint_id"].as_str(), - "third candidate checkpoint id", - )? - .to_owned(); let keys = harness.call_tool(75, "metric.keys", json!({}))?; assert_eq!(keys["result"]["isError"].as_bool(), Some(false)); @@ -1524,13 +1439,10 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu assert_eq!(run_best_rows[0]["value"].as_f64(), Some(5.0)); assert_eq!(run_best_rows.len(), 1); assert_eq!( - run_best_rows[0]["candidate_checkpoint_id"].as_str(), - Some(second_candidate_checkpoint_id.as_str()) - ); - assert_eq!( - run_best_rows[0]["candidate_commit_hash"].as_str(), - Some(second_commit.as_str()) + run_best_rows[0]["experiment_title"].as_str(), + Some("second experiment") ); + assert_eq!(run_best_rows[0]["verdict"].as_str(), Some("kept")); assert_eq!( run_best_rows[0]["dimensions"]["scenario"].as_str(), Some("belt_4x5") @@ -1539,7 +1451,9 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu run_best_rows[0]["dimensions"]["duration_s"].as_f64(), Some(60.0) ); - assert!(must_some(tool_text(&run_metric_best), "run metric best text")?.contains("commit=")); + assert!( + must_some(tool_text(&run_metric_best), "run metric best text")?.contains("hypothesis=") + ); assert!(must_some(tool_text(&run_metric_best), "run metric best text")?.contains("dims:")); let payload_requires_order = harness.call_tool( @@ -1580,12 +1494,8 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu assert_eq!(payload_best_rows[0]["value"].as_f64(), Some(7.0)); assert_eq!(payload_best_rows.len(), 1); assert_eq!( - payload_best_rows[0]["candidate_checkpoint_id"].as_str(), - Some(second_candidate_checkpoint_id.as_str()) - ); - assert_eq!( - payload_best_rows[0]["candidate_commit_hash"].as_str(), - Some(second_commit.as_str()) + payload_best_rows[0]["experiment_title"].as_str(), + Some("second experiment") ); let filtered_best = harness.call_tool( @@ -1608,8 +1518,8 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu )?; assert_eq!(filtered_rows.len(), 2); assert_eq!( - filtered_rows[0]["candidate_checkpoint_id"].as_str(), - Some(second_candidate_checkpoint_id.as_str()) + filtered_rows[0]["experiment_title"].as_str(), + Some("second experiment") ); assert!( filtered_rows @@ -1632,12 +1542,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu "global metric best array", )?; assert_eq!( - global_rows[0]["candidate_checkpoint_id"].as_str(), - Some(third_candidate_checkpoint_id.as_str()) + global_rows[0]["experiment_title"].as_str(), + Some("third experiment") ); assert_eq!( - global_rows[0]["candidate_commit_hash"].as_str(), - Some(third_commit.as_str()) + global_rows[0]["frontier_id"].as_str(), + Some(second_frontier_id.as_str()) ); let migrate = harness.call_tool(85, "metric.migrate", json!({}))?; @@ -1654,14 +1564,5 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu tool_content(&migrate)["inserted_dimension_values"].as_u64(), Some(0) ); - - assert_ne!( - first_candidate_checkpoint_id, - second_candidate_checkpoint_id - ); - assert_ne!( - second_candidate_checkpoint_id, - third_candidate_checkpoint_id - ); Ok(()) } |