swarm repositories / source
aboutsummaryrefslogtreecommitdiff
path: root/crates/fidget-spinner-cli
diff options
context:
space:
mode:
authormain <main@swarm.moe>2026-03-20 01:11:39 -0400
committermain <main@swarm.moe>2026-03-20 01:11:39 -0400
commit22fe3d2ce7478450a1d7443c4ecbd85fd4c46716 (patch)
treed534d4585a804081b53fcf2f3bbb3a8fc5d29190 /crates/fidget-spinner-cli
parentce41a229dcd57f9a2c35359fe77d9f54f603e985 (diff)
downloadfidget_spinner-22fe3d2ce7478450a1d7443c4ecbd85fd4c46716.zip
Excise git provenance from experiment ledger
Diffstat (limited to 'crates/fidget-spinner-cli')
-rw-r--r--crates/fidget-spinner-cli/src/main.rs80
-rw-r--r--crates/fidget-spinner-cli/src/mcp/catalog.rs24
-rw-r--r--crates/fidget-spinner-cli/src/mcp/service.rs124
-rw-r--r--crates/fidget-spinner-cli/tests/mcp_hardening.rs161
4 files changed, 93 insertions, 296 deletions
diff --git a/crates/fidget-spinner-cli/src/main.rs b/crates/fidget-spinner-cli/src/main.rs
index 491e30d..f56e751 100644
--- a/crates/fidget-spinner-cli/src/main.rs
+++ b/crates/fidget-spinner-cli/src/main.rs
@@ -10,10 +10,10 @@ use std::path::{Path, PathBuf};
use camino::{Utf8Path, Utf8PathBuf};
use clap::{Args, Parser, Subcommand, ValueEnum};
use fidget_spinner_core::{
- AnnotationVisibility, CodeSnapshotRef, CommandRecipe, DiagnosticSeverity, ExecutionBackend,
- FieldPresence, FieldRole, FieldValueType, FrontierContract, FrontierNote, FrontierVerdict,
- GitCommitHash, InferencePolicy, MetricSpec, MetricUnit, MetricValue, NodeAnnotation, NodeClass,
- NodePayload, NonEmptyText, OptimizationObjective, ProjectFieldSpec, TagName,
+ AnnotationVisibility, CommandRecipe, DiagnosticSeverity, ExecutionBackend, FieldPresence,
+ FieldRole, FieldValueType, FrontierContract, FrontierNote, FrontierVerdict, InferencePolicy,
+ MetricSpec, MetricUnit, MetricValue, NodeAnnotation, NodeClass, NodePayload, NonEmptyText,
+ OptimizationObjective, ProjectFieldSpec, TagName,
};
use fidget_spinner_store_sqlite::{
CloseExperimentRequest, CreateFrontierRequest, CreateNodeRequest, DefineMetricRequest,
@@ -152,8 +152,6 @@ struct FrontierInitArgs {
primary_metric_unit: CliMetricUnit,
#[arg(long = "primary-metric-objective", value_enum)]
primary_metric_objective: CliOptimizationObjective,
- #[arg(long = "seed-summary", default_value = "initial champion checkpoint")]
- seed_summary: String,
}
#[derive(Args)]
@@ -490,11 +488,11 @@ struct MetricBestArgs {
#[derive(Subcommand)]
enum ExperimentCommand {
- /// Open a stateful experiment against one hypothesis and base checkpoint.
+ /// Open a stateful experiment against one hypothesis.
Open(ExperimentOpenArgs),
/// List open experiments, optionally narrowed to one frontier.
List(ExperimentListArgs),
- /// Close a core-path experiment with checkpoint, run, note, and verdict.
+ /// Close a core-path experiment with run data, note, and verdict.
Close(Box<ExperimentCloseArgs>),
}
@@ -518,8 +516,6 @@ struct ExperimentCloseArgs {
project: ProjectArg,
#[arg(long = "experiment")]
experiment_id: String,
- #[arg(long = "candidate-summary")]
- candidate_summary: String,
#[arg(long = "run-title")]
run_title: String,
#[arg(long = "run-summary")]
@@ -567,8 +563,6 @@ struct ExperimentOpenArgs {
project: ProjectArg,
#[arg(long)]
frontier: String,
- #[arg(long = "base-checkpoint")]
- base_checkpoint: String,
#[arg(long = "hypothesis-node")]
hypothesis_node: String,
#[arg(long)]
@@ -733,11 +727,10 @@ enum CliInferencePolicy {
#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)]
enum CliFrontierVerdict {
- PromoteToChampion,
- KeepOnFrontier,
- RevertToChampion,
- ArchiveDeadEnd,
- NeedsMoreEvidence,
+ Accepted,
+ Kept,
+ Parked,
+ Rejected,
}
fn main() {
@@ -834,8 +827,6 @@ fn run_init(args: InitArgs) -> Result<(), StoreError> {
fn run_frontier_init(args: FrontierInitArgs) -> Result<(), StoreError> {
let mut store = open_store(&args.project.project)?;
- let initial_checkpoint =
- store.auto_capture_checkpoint(NonEmptyText::new(args.seed_summary)?)?;
let projection = store.create_frontier(CreateFrontierRequest {
label: NonEmptyText::new(args.label)?,
contract_title: NonEmptyText::new(args.contract_title)?,
@@ -853,7 +844,6 @@ fn run_frontier_init(args: FrontierInitArgs) -> Result<(), StoreError> {
},
promotion_criteria: to_text_vec(args.promotion_criteria)?,
},
- initial_checkpoint,
})?;
print_json(&projection)
}
@@ -1131,7 +1121,6 @@ fn run_experiment_open(args: ExperimentOpenArgs) -> Result<(), StoreError> {
let summary = args.summary.map(NonEmptyText::new).transpose()?;
let experiment = store.open_experiment(OpenExperimentRequest {
frontier_id: parse_frontier_id(&args.frontier)?,
- base_checkpoint_id: parse_checkpoint_id(&args.base_checkpoint)?,
hypothesis_node_id: parse_node_id(&args.hypothesis_node)?,
title: NonEmptyText::new(args.title)?,
summary,
@@ -1151,12 +1140,6 @@ fn run_experiment_list(args: ExperimentListArgs) -> Result<(), StoreError> {
fn run_experiment_close(args: ExperimentCloseArgs) -> Result<(), StoreError> {
let mut store = open_store(&args.project.project)?;
- let snapshot = store
- .auto_capture_checkpoint(NonEmptyText::new(args.candidate_summary.clone())?)?
- .map(|seed| seed.snapshot)
- .ok_or(StoreError::GitInspectionFailed(
- store.project_root().to_path_buf(),
- ))?;
let command = CommandRecipe::new(
args.working_directory
.map(utf8_path)
@@ -1186,14 +1169,11 @@ fn run_experiment_close(args: ExperimentCloseArgs) -> Result<(), StoreError> {
};
let receipt = store.close_experiment(CloseExperimentRequest {
experiment_id: parse_experiment_id(&args.experiment_id)?,
- candidate_summary: NonEmptyText::new(args.candidate_summary)?,
- candidate_snapshot: snapshot,
run_title: NonEmptyText::new(args.run_title)?,
run_summary: args.run_summary.map(NonEmptyText::new).transpose()?,
backend: args.backend.into(),
dimensions: coerce_cli_dimension_filters(&store, args.dimensions)?,
command,
- code_snapshot: Some(capture_code_snapshot(store.project_root())?),
primary_metric: parse_metric_value(args.primary_metric)?,
supporting_metrics: args
.metrics
@@ -1539,31 +1519,6 @@ fn parse_node_class_set(classes: Vec<CliNodeClass>) -> BTreeSet<NodeClass> {
classes.into_iter().map(Into::into).collect()
}
-fn capture_code_snapshot(project_root: &Utf8Path) -> Result<CodeSnapshotRef, StoreError> {
- let head_commit = run_git(project_root, &["rev-parse", "HEAD"])?;
- let dirty_paths = run_git(project_root, &["status", "--porcelain"])?
- .map(|status| {
- status
- .lines()
- .filter_map(|line| line.get(3..).map(str::trim))
- .filter(|line| !line.is_empty())
- .map(Utf8PathBuf::from)
- .collect::<BTreeSet<_>>()
- })
- .unwrap_or_default();
- Ok(CodeSnapshotRef {
- repo_root: run_git(project_root, &["rev-parse", "--show-toplevel"])?
- .map(Utf8PathBuf::from)
- .unwrap_or_else(|| project_root.to_path_buf()),
- worktree_root: project_root.to_path_buf(),
- worktree_name: run_git(project_root, &["rev-parse", "--abbrev-ref", "HEAD"])?
- .map(NonEmptyText::new)
- .transpose()?,
- head_commit: head_commit.map(GitCommitHash::new).transpose()?,
- dirty_paths,
- })
-}
-
fn run_git(project_root: &Utf8Path, args: &[&str]) -> Result<Option<String>, StoreError> {
let output = std::process::Command::new("git")
.arg("-C")
@@ -1702,12 +1657,6 @@ fn parse_frontier_id(raw: &str) -> Result<fidget_spinner_core::FrontierId, Store
)?))
}
-fn parse_checkpoint_id(raw: &str) -> Result<fidget_spinner_core::CheckpointId, StoreError> {
- Ok(fidget_spinner_core::CheckpointId::from_uuid(
- Uuid::parse_str(raw)?,
- ))
-}
-
fn parse_experiment_id(raw: &str) -> Result<fidget_spinner_core::ExperimentId, StoreError> {
Ok(fidget_spinner_core::ExperimentId::from_uuid(
Uuid::parse_str(raw)?,
@@ -1851,11 +1800,10 @@ impl From<CliInferencePolicy> for InferencePolicy {
impl From<CliFrontierVerdict> for FrontierVerdict {
fn from(value: CliFrontierVerdict) -> Self {
match value {
- CliFrontierVerdict::PromoteToChampion => Self::PromoteToChampion,
- CliFrontierVerdict::KeepOnFrontier => Self::KeepOnFrontier,
- CliFrontierVerdict::RevertToChampion => Self::RevertToChampion,
- CliFrontierVerdict::ArchiveDeadEnd => Self::ArchiveDeadEnd,
- CliFrontierVerdict::NeedsMoreEvidence => Self::NeedsMoreEvidence,
+ CliFrontierVerdict::Accepted => Self::Accepted,
+ CliFrontierVerdict::Kept => Self::Kept,
+ CliFrontierVerdict::Parked => Self::Parked,
+ CliFrontierVerdict::Rejected => Self::Rejected,
}
}
}
diff --git a/crates/fidget-spinner-cli/src/mcp/catalog.rs b/crates/fidget-spinner-cli/src/mcp/catalog.rs
index 3b8abcc..ae3ca78 100644
--- a/crates/fidget-spinner-cli/src/mcp/catalog.rs
+++ b/crates/fidget-spinner-cli/src/mcp/catalog.rs
@@ -99,13 +99,13 @@ pub(crate) fn tool_spec(name: &str) -> Option<ToolSpec> {
}),
"frontier.status" => Some(ToolSpec {
name: "frontier.status",
- description: "Read one frontier projection, including champion and active candidates.",
+ description: "Read one frontier projection, including open/completed experiment counts and verdict totals.",
dispatch: DispatchTarget::Worker,
replay: ReplayContract::Convergent,
}),
"frontier.init" => Some(ToolSpec {
name: "frontier.init",
- description: "Create a new frontier rooted in a contract node. If the project is a git repo, the current HEAD becomes the initial champion when possible.",
+ description: "Create a new frontier rooted in a contract node.",
dispatch: DispatchTarget::Worker,
replay: ReplayContract::NeverReplay,
}),
@@ -183,7 +183,7 @@ pub(crate) fn tool_spec(name: &str) -> Option<ToolSpec> {
}),
"metric.best" => Some(ToolSpec {
name: "metric.best",
- description: "Rank completed experiments by one numeric key, with optional run-dimension filters and candidate commit surfacing.",
+ description: "Rank completed experiments by one numeric key, with optional run-dimension filters.",
dispatch: DispatchTarget::Worker,
replay: ReplayContract::Convergent,
}),
@@ -195,7 +195,7 @@ pub(crate) fn tool_spec(name: &str) -> Option<ToolSpec> {
}),
"experiment.open" => Some(ToolSpec {
name: "experiment.open",
- description: "Open a stateful experiment against one hypothesis and one base checkpoint.",
+ description: "Open a stateful experiment against one hypothesis.",
dispatch: DispatchTarget::Worker,
replay: ReplayContract::NeverReplay,
}),
@@ -213,7 +213,7 @@ pub(crate) fn tool_spec(name: &str) -> Option<ToolSpec> {
}),
"experiment.close" => Some(ToolSpec {
name: "experiment.close",
- description: "Close one open experiment with typed run dimensions, preregistered metric observations, candidate checkpoint capture, optional analysis, note, and verdict.",
+ description: "Close one open experiment with typed run dimensions, preregistered metric observations, optional analysis, note, and verdict.",
dispatch: DispatchTarget::Worker,
replay: ReplayContract::NeverReplay,
}),
@@ -562,12 +562,11 @@ fn input_schema(name: &str) -> Value {
"type": "object",
"properties": {
"frontier_id": { "type": "string" },
- "base_checkpoint_id": { "type": "string" },
"hypothesis_node_id": { "type": "string" },
"title": { "type": "string" },
"summary": { "type": "string" }
},
- "required": ["frontier_id", "base_checkpoint_id", "hypothesis_node_id", "title"],
+ "required": ["frontier_id", "hypothesis_node_id", "title"],
"additionalProperties": false
}),
"experiment.list" => json!({
@@ -589,7 +588,6 @@ fn input_schema(name: &str) -> Value {
"type": "object",
"properties": {
"experiment_id": { "type": "string" },
- "candidate_summary": { "type": "string" },
"run": run_schema(),
"primary_metric": metric_value_schema(),
"supporting_metrics": { "type": "array", "items": metric_value_schema() },
@@ -601,7 +599,6 @@ fn input_schema(name: &str) -> Value {
},
"required": [
"experiment_id",
- "candidate_summary",
"run",
"primary_metric",
"note",
@@ -753,11 +750,10 @@ fn verdict_schema() -> Value {
json!({
"type": "string",
"enum": [
- "promote_to_champion",
- "keep_on_frontier",
- "revert_to_champion",
- "archive_dead_end",
- "needs_more_evidence"
+ "accepted",
+ "kept",
+ "parked",
+ "rejected"
]
})
}
diff --git a/crates/fidget-spinner-cli/src/mcp/service.rs b/crates/fidget-spinner-cli/src/mcp/service.rs
index 05f2382..f0cca1e 100644
--- a/crates/fidget-spinner-cli/src/mcp/service.rs
+++ b/crates/fidget-spinner-cli/src/mcp/service.rs
@@ -3,11 +3,11 @@ use std::fs;
use camino::{Utf8Path, Utf8PathBuf};
use fidget_spinner_core::{
- AdmissionState, AnnotationVisibility, CodeSnapshotRef, CommandRecipe, DiagnosticSeverity,
- ExecutionBackend, FieldPresence, FieldRole, FieldValueType, FrontierContract, FrontierNote,
- FrontierProjection, FrontierRecord, FrontierVerdict, InferencePolicy, MetricSpec, MetricUnit,
- MetricValue, NodeAnnotation, NodeClass, NodePayload, NonEmptyText, ProjectFieldSpec,
- ProjectSchema, RunDimensionValue, TagName, TagRecord,
+ AdmissionState, AnnotationVisibility, CommandRecipe, DiagnosticSeverity, ExecutionBackend,
+ FieldPresence, FieldRole, FieldValueType, FrontierContract, FrontierNote, FrontierProjection,
+ FrontierRecord, FrontierVerdict, InferencePolicy, MetricSpec, MetricUnit, MetricValue,
+ NodeAnnotation, NodeClass, NodePayload, NonEmptyText, ProjectFieldSpec, ProjectSchema,
+ RunDimensionValue, TagName, TagRecord,
};
use fidget_spinner_store_sqlite::{
CloseExperimentRequest, CreateFrontierRequest, CreateNodeRequest, DefineMetricRequest,
@@ -203,16 +203,6 @@ impl WorkerService {
}
"frontier.init" => {
let args = deserialize::<FrontierInitToolArgs>(arguments)?;
- let initial_checkpoint = self
- .store
- .auto_capture_checkpoint(
- NonEmptyText::new(
- args.seed_summary
- .unwrap_or_else(|| "initial champion checkpoint".to_owned()),
- )
- .map_err(store_fault("tools/call:frontier.init"))?,
- )
- .map_err(store_fault("tools/call:frontier.init"))?;
let projection = self
.store
.create_frontier(CreateFrontierRequest {
@@ -251,7 +241,6 @@ impl WorkerService {
promotion_criteria: crate::to_text_vec(args.promotion_criteria)
.map_err(store_fault("tools/call:frontier.init"))?,
},
- initial_checkpoint,
})
.map_err(store_fault("tools/call:frontier.init"))?;
tool_success(
@@ -702,8 +691,6 @@ impl WorkerService {
.open_experiment(OpenExperimentRequest {
frontier_id: crate::parse_frontier_id(&args.frontier_id)
.map_err(store_fault("tools/call:experiment.open"))?,
- base_checkpoint_id: crate::parse_checkpoint_id(&args.base_checkpoint_id)
- .map_err(store_fault("tools/call:experiment.open"))?,
hypothesis_node_id: crate::parse_node_id(&args.hypothesis_node_id)
.map_err(store_fault("tools/call:experiment.open"))?,
title: NonEmptyText::new(args.title)
@@ -763,33 +750,11 @@ impl WorkerService {
}
"experiment.close" => {
let args = deserialize::<ExperimentCloseToolArgs>(arguments)?;
- let snapshot = self
- .store
- .auto_capture_checkpoint(
- NonEmptyText::new(args.candidate_summary.clone())
- .map_err(store_fault("tools/call:experiment.close"))?,
- )
- .map_err(store_fault("tools/call:experiment.close"))?
- .map(|seed| seed.snapshot)
- .ok_or_else(|| {
- FaultRecord::new(
- FaultKind::Internal,
- FaultStage::Store,
- "tools/call:experiment.close",
- format!(
- "git repository inspection failed for {}",
- self.store.project_root()
- ),
- )
- })?;
let receipt = self
.store
.close_experiment(CloseExperimentRequest {
experiment_id: crate::parse_experiment_id(&args.experiment_id)
.map_err(store_fault("tools/call:experiment.close"))?,
- candidate_summary: NonEmptyText::new(args.candidate_summary)
- .map_err(store_fault("tools/call:experiment.close"))?,
- candidate_snapshot: snapshot,
run_title: NonEmptyText::new(args.run.title)
.map_err(store_fault("tools/call:experiment.close"))?,
run_summary: args
@@ -810,10 +775,6 @@ impl WorkerService {
self.store.project_root(),
)
.map_err(store_fault("tools/call:experiment.close"))?,
- code_snapshot: Some(
- capture_code_snapshot(self.store.project_root())
- .map_err(store_fault("tools/call:experiment.close"))?,
- ),
primary_metric: metric_value_from_wire(args.primary_metric)
.map_err(store_fault("tools/call:experiment.close"))?,
supporting_metrics: args
@@ -1346,8 +1307,8 @@ fn experiment_close_output(
let concise = json!({
"experiment_id": receipt.experiment.id,
"frontier_id": receipt.experiment.frontier_id,
- "candidate_checkpoint_id": receipt.experiment.candidate_checkpoint_id,
- "verdict": format!("{:?}", receipt.experiment.verdict).to_ascii_lowercase(),
+ "experiment_title": receipt.experiment.title,
+ "verdict": metric_verdict_name(receipt.experiment.verdict),
"run_id": receipt.run.run_id,
"hypothesis_node_id": receipt.experiment.hypothesis_node_id,
"decision_node_id": receipt.decision_node.id,
@@ -1362,11 +1323,11 @@ fn experiment_close_output(
"closed experiment {} on frontier {}",
receipt.experiment.id, receipt.experiment.frontier_id
),
+ format!("title: {}", receipt.experiment.title),
format!("hypothesis: {}", receipt.experiment.hypothesis_node_id),
- format!("candidate: {}", receipt.experiment.candidate_checkpoint_id),
format!(
"verdict: {}",
- format!("{:?}", receipt.experiment.verdict).to_ascii_lowercase()
+ metric_verdict_name(receipt.experiment.verdict)
),
format!(
"primary metric: {}",
@@ -1393,7 +1354,6 @@ fn experiment_open_output(
let concise = json!({
"experiment_id": item.id,
"frontier_id": item.frontier_id,
- "base_checkpoint_id": item.base_checkpoint_id,
"hypothesis_node_id": item.hypothesis_node_id,
"title": item.title,
"summary": item.summary,
@@ -1405,7 +1365,6 @@ fn experiment_open_output(
format!("{action} {}", item.id),
format!("frontier: {}", item.frontier_id),
format!("hypothesis: {}", item.hypothesis_node_id),
- format!("base checkpoint: {}", item.base_checkpoint_id),
format!("title: {}", item.title),
item.summary
.as_ref()
@@ -1426,7 +1385,6 @@ fn experiment_list_output(items: &[OpenExperimentSummary]) -> Result<ToolOutput,
json!({
"experiment_id": item.id,
"frontier_id": item.frontier_id,
- "base_checkpoint_id": item.base_checkpoint_id,
"hypothesis_node_id": item.hypothesis_node_id,
"title": item.title,
"summary": item.summary,
@@ -1436,8 +1394,8 @@ fn experiment_list_output(items: &[OpenExperimentSummary]) -> Result<ToolOutput,
let mut lines = vec![format!("{} open experiment(s)", items.len())];
lines.extend(items.iter().map(|item| {
format!(
- "{} {} | hypothesis={} | checkpoint={}",
- item.id, item.title, item.hypothesis_node_id, item.base_checkpoint_id,
+ "{} {} | hypothesis={}",
+ item.id, item.title, item.hypothesis_node_id,
)
}));
detailed_tool_output(
@@ -1511,12 +1469,11 @@ fn metric_best_output(
"value": item.value,
"order": item.order.as_str(),
"experiment_id": item.experiment_id,
+ "experiment_title": item.experiment_title,
"frontier_id": item.frontier_id,
"hypothesis_node_id": item.hypothesis_node_id,
"hypothesis_title": item.hypothesis_title,
"verdict": metric_verdict_name(item.verdict),
- "candidate_checkpoint_id": item.candidate_checkpoint_id,
- "candidate_commit_hash": item.candidate_commit_hash,
"run_id": item.run_id,
"unit": item.unit.map(metric_unit_name),
"objective": item.objective.map(metric_objective_name),
@@ -1527,15 +1484,14 @@ fn metric_best_output(
let mut lines = vec![format!("{} ranked experiment(s)", items.len())];
lines.extend(items.iter().enumerate().map(|(index, item)| {
format!(
- "{}. {}={} [{}] {} | verdict={} | commit={} | checkpoint={}",
+ "{}. {}={} [{}] {} | verdict={} | hypothesis={}",
index + 1,
item.key,
item.value,
item.source.as_str(),
- item.hypothesis_title,
+ item.experiment_title,
metric_verdict_name(item.verdict),
- item.candidate_commit_hash,
- item.candidate_checkpoint_id,
+ item.hypothesis_title,
)
}));
lines.extend(
@@ -1668,17 +1624,13 @@ fn frontier_projection_summary_value(projection: &FrontierProjection) -> Value {
"frontier_id": projection.frontier.id,
"label": projection.frontier.label,
"status": format!("{:?}", projection.frontier.status).to_ascii_lowercase(),
- "champion_checkpoint_id": projection.champion_checkpoint_id,
- "candidate_checkpoint_ids": projection.candidate_checkpoint_ids,
- "experiment_count": projection.experiment_count,
+ "open_experiment_count": projection.open_experiment_count,
+ "completed_experiment_count": projection.completed_experiment_count,
+ "verdict_counts": projection.verdict_counts,
})
}
fn frontier_projection_text(prefix: &str, projection: &FrontierProjection) -> String {
- let champion = projection
- .champion_checkpoint_id
- .map(|value| value.to_string())
- .unwrap_or_else(|| "none".to_owned());
[
format!(
"{prefix} {} {}",
@@ -1688,9 +1640,18 @@ fn frontier_projection_text(prefix: &str, projection: &FrontierProjection) -> St
"status: {}",
format!("{:?}", projection.frontier.status).to_ascii_lowercase()
),
- format!("champion: {champion}"),
- format!("candidates: {}", projection.candidate_checkpoint_ids.len()),
- format!("experiments: {}", projection.experiment_count),
+ format!("open experiments: {}", projection.open_experiment_count),
+ format!(
+ "completed experiments: {}",
+ projection.completed_experiment_count
+ ),
+ format!(
+ "verdicts: accepted={} kept={} parked={} rejected={}",
+ projection.verdict_counts.accepted,
+ projection.verdict_counts.kept,
+ projection.verdict_counts.parked,
+ projection.verdict_counts.rejected,
+ ),
]
.join("\n")
}
@@ -1991,11 +1952,10 @@ fn metric_objective_name(objective: fidget_spinner_core::OptimizationObjective)
fn metric_verdict_name(verdict: FrontierVerdict) -> &'static str {
match verdict {
- FrontierVerdict::PromoteToChampion => "promote_to_champion",
- FrontierVerdict::KeepOnFrontier => "keep_on_frontier",
- FrontierVerdict::RevertToChampion => "revert_to_champion",
- FrontierVerdict::ArchiveDeadEnd => "archive_dead_end",
- FrontierVerdict::NeedsMoreEvidence => "needs_more_evidence",
+ FrontierVerdict::Accepted => "accepted",
+ FrontierVerdict::Kept => "kept",
+ FrontierVerdict::Parked => "parked",
+ FrontierVerdict::Rejected => "rejected",
}
}
@@ -2192,10 +2152,6 @@ fn command_recipe_from_wire(
.map_err(StoreError::from)
}
-fn capture_code_snapshot(project_root: &Utf8Path) -> Result<CodeSnapshotRef, StoreError> {
- crate::capture_code_snapshot(project_root)
-}
-
fn parse_node_class_name(raw: &str) -> Result<NodeClass, StoreError> {
match raw {
"contract" => Ok(NodeClass::Contract),
@@ -2311,11 +2267,10 @@ fn parse_backend_name(raw: &str) -> Result<ExecutionBackend, StoreError> {
fn parse_verdict_name(raw: &str) -> Result<FrontierVerdict, StoreError> {
match raw {
- "promote_to_champion" => Ok(FrontierVerdict::PromoteToChampion),
- "keep_on_frontier" => Ok(FrontierVerdict::KeepOnFrontier),
- "revert_to_champion" => Ok(FrontierVerdict::RevertToChampion),
- "archive_dead_end" => Ok(FrontierVerdict::ArchiveDeadEnd),
- "needs_more_evidence" => Ok(FrontierVerdict::NeedsMoreEvidence),
+ "accepted" => Ok(FrontierVerdict::Accepted),
+ "kept" => Ok(FrontierVerdict::Kept),
+ "parked" => Ok(FrontierVerdict::Parked),
+ "rejected" => Ok(FrontierVerdict::Rejected),
other => Err(crate::invalid_input(format!("unknown verdict `{other}`"))),
}
}
@@ -2342,7 +2297,6 @@ struct FrontierInitToolArgs {
primary_metric: WireMetricSpec,
#[serde(default)]
supporting_metrics: Vec<WireMetricSpec>,
- seed_summary: Option<String>,
}
#[derive(Debug, Deserialize)]
@@ -2480,7 +2434,6 @@ struct MetricBestToolArgs {
#[derive(Debug, Deserialize)]
struct ExperimentOpenToolArgs {
frontier_id: String,
- base_checkpoint_id: String,
hypothesis_node_id: String,
title: String,
summary: Option<String>,
@@ -2499,7 +2452,6 @@ struct ExperimentReadToolArgs {
#[derive(Debug, Deserialize)]
struct ExperimentCloseToolArgs {
experiment_id: String,
- candidate_summary: String,
run: WireRun,
primary_metric: WireMetricValue,
#[serde(default)]
diff --git a/crates/fidget-spinner-cli/tests/mcp_hardening.rs b/crates/fidget-spinner-cli/tests/mcp_hardening.rs
index 0142b77..21a3d04 100644
--- a/crates/fidget-spinner-cli/tests/mcp_hardening.rs
+++ b/crates/fidget-spinner-cli/tests/mcp_hardening.rs
@@ -57,48 +57,6 @@ fn init_project(root: &Utf8PathBuf) -> TestResult {
Ok(())
}
-fn run_command(root: &Utf8PathBuf, program: &str, args: &[&str]) -> TestResult<String> {
- let output = must(
- Command::new(program)
- .current_dir(root.as_std_path())
- .args(args)
- .output(),
- format!("{program} spawn"),
- )?;
- if !output.status.success() {
- return Err(io::Error::other(format!(
- "{program} {:?} failed: {}",
- args,
- String::from_utf8_lossy(&output.stderr)
- ))
- .into());
- }
- Ok(String::from_utf8_lossy(&output.stdout).trim().to_owned())
-}
-
-fn run_git(root: &Utf8PathBuf, args: &[&str]) -> TestResult<String> {
- run_command(root, "git", args)
-}
-
-fn init_git_project(root: &Utf8PathBuf) -> TestResult<String> {
- let _ = run_git(root, &["init", "-b", "main"])?;
- let _ = run_git(root, &["config", "user.name", "main"])?;
- let _ = run_git(root, &["config", "user.email", "main@swarm.moe"])?;
- let _ = run_git(root, &["add", "-A"])?;
- let _ = run_git(root, &["commit", "-m", "initial state"])?;
- run_git(root, &["rev-parse", "HEAD"])
-}
-
-fn commit_project_state(root: &Utf8PathBuf, marker: &str, message: &str) -> TestResult<String> {
- must(
- fs::write(root.join(marker).as_std_path(), message),
- format!("write marker {marker}"),
- )?;
- let _ = run_git(root, &["add", "-A"])?;
- let _ = run_git(root, &["commit", "-m", message])?;
- run_git(root, &["rev-parse", "HEAD"])
-}
-
fn binary_path() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_fidget-spinner-cli"))
}
@@ -688,7 +646,7 @@ fn tag_registry_drives_note_creation_and_lookup() -> TestResult {
}
#[test]
-fn research_record_accepts_tags_and_filtering() -> TestResult {
+fn source_record_accepts_tags_and_filtering() -> TestResult {
let project_root = temp_project_root("research_tags")?;
init_project(&project_root)?;
@@ -721,10 +679,7 @@ fn research_record_accepts_tags_and_filtering() -> TestResult {
assert_eq!(research["result"]["isError"].as_bool(), Some(false));
let filtered = harness.call_tool(454, "node.list", json!({"tags": ["campaign/libgrid"]}))?;
- let nodes = must_some(
- tool_content(&filtered).as_array(),
- "filtered research nodes",
- )?;
+ let nodes = must_some(tool_content(&filtered).as_array(), "filtered source nodes")?;
assert_eq!(nodes.len(), 1);
assert_eq!(nodes[0]["class"].as_str(), Some("source"));
assert_eq!(nodes[0]["tags"][0].as_str(), Some("campaign/libgrid"));
@@ -760,20 +715,20 @@ fn prose_tools_reject_invalid_shapes_over_mcp() -> TestResult {
.is_some_and(|message| message.contains("summary") || message.contains("missing field"))
);
- let missing_research_summary = harness.call_tool(
+ let missing_source_summary = harness.call_tool(
48,
"source.record",
json!({
- "title": "research only",
+ "title": "source only",
"body": "body only",
}),
)?;
assert_eq!(
- missing_research_summary["result"]["isError"].as_bool(),
+ missing_source_summary["result"]["isError"].as_bool(),
Some(true)
);
assert!(
- fault_message(&missing_research_summary)
+ fault_message(&missing_source_summary)
.is_some_and(|message| message.contains("summary") || message.contains("missing field"))
);
@@ -794,7 +749,7 @@ fn prose_tools_reject_invalid_shapes_over_mcp() -> TestResult {
.is_some_and(|message| message.contains("payload field `body`"))
);
- let research_without_summary = harness.call_tool(
+ let source_without_summary = harness.call_tool(
50,
"node.create",
json!({
@@ -804,11 +759,11 @@ fn prose_tools_reject_invalid_shapes_over_mcp() -> TestResult {
}),
)?;
assert_eq!(
- research_without_summary["result"]["isError"].as_bool(),
+ source_without_summary["result"]["isError"].as_bool(),
Some(true)
);
assert!(
- fault_message(&research_without_summary)
+ fault_message(&source_without_summary)
.is_some_and(|message| message.contains("non-empty summary"))
);
Ok(())
@@ -885,11 +840,8 @@ fn concise_prose_reads_only_surface_payload_field_names() -> TestResult {
}),
)?;
assert_eq!(research["result"]["isError"].as_bool(), Some(false));
- let node_id = must_some(
- tool_content(&research)["id"].as_str(),
- "created research id",
- )?
- .to_owned();
+ let node_id =
+ must_some(tool_content(&research)["id"].as_str(), "created source id")?.to_owned();
let concise = harness.call_tool(533, "node.read", json!({ "node_id": node_id }))?;
let concise_structured = tool_content(&concise);
@@ -1043,7 +995,7 @@ fn bind_open_backfills_legacy_missing_summary() -> TestResult {
store.add_node(fidget_spinner_store_sqlite::CreateNodeRequest {
class: fidget_spinner_core::NodeClass::Source,
frontier_id: None,
- title: must(NonEmptyText::new("legacy research"), "legacy title")?,
+ title: must(NonEmptyText::new("legacy source"), "legacy title")?,
summary: Some(must(
NonEmptyText::new("temporary summary"),
"temporary summary",
@@ -1059,7 +1011,7 @@ fn bind_open_backfills_legacy_missing_summary() -> TestResult {
annotations: Vec::new(),
attachments: Vec::new(),
}),
- "create legacy research node",
+ "create legacy source node",
)?;
node.id.to_string()
};
@@ -1097,7 +1049,7 @@ fn bind_open_backfills_legacy_missing_summary() -> TestResult {
);
let listed = harness.call_tool(62, "node.list", json!({ "class": "source" }))?;
- let items = must_some(tool_content(&listed).as_array(), "research node list")?;
+ let items = must_some(tool_content(&listed).as_array(), "source node list")?;
assert_eq!(items.len(), 1);
assert_eq!(
items[0]["summary"].as_str(),
@@ -1110,7 +1062,6 @@ fn bind_open_backfills_legacy_missing_summary() -> TestResult {
fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResult {
let project_root = temp_project_root("metric_rank_e2e")?;
init_project(&project_root)?;
- let _initial_head = init_git_project(&project_root)?;
let mut harness = McpHarness::spawn(Some(&project_root), &[])?;
let _ = harness.initialize()?;
@@ -1138,11 +1089,6 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
"frontier id",
)?
.to_owned();
- let base_checkpoint_id = must_some(
- tool_content(&frontier)["champion_checkpoint_id"].as_str(),
- "base checkpoint id",
- )?
- .to_owned();
let metric_define = harness.call_tool(
701,
"metric.define",
@@ -1222,7 +1168,6 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
"experiment.open",
json!({
"frontier_id": frontier_id,
- "base_checkpoint_id": base_checkpoint_id,
"hypothesis_node_id": first_change_id,
"title": "first experiment",
"summary": "first experiment summary"
@@ -1233,14 +1178,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
tool_content(&first_experiment)["experiment_id"].as_str(),
"first experiment id",
)?;
- let _first_commit = commit_project_state(&project_root, "candidate-one.txt", "candidate one")?;
let first_close = harness.call_tool(
72,
"experiment.close",
json!({
"experiment_id": first_experiment_id,
- "candidate_summary": "candidate one",
"run": {
"title": "first run",
"summary": "first run summary",
@@ -1262,19 +1205,13 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
"note": {
"summary": "first run note"
},
- "verdict": "keep_on_frontier",
+ "verdict": "kept",
"decision_title": "first decision",
"decision_rationale": "keep first candidate around"
}),
)?;
assert_eq!(first_close["result"]["isError"].as_bool(), Some(false));
- let first_candidate_checkpoint_id = must_some(
- tool_content(&first_close)["candidate_checkpoint_id"].as_str(),
- "first candidate checkpoint id",
- )?
- .to_owned();
-
let second_change = harness.call_tool(
73,
"node.create",
@@ -1299,7 +1236,6 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
"experiment.open",
json!({
"frontier_id": frontier_id,
- "base_checkpoint_id": base_checkpoint_id,
"hypothesis_node_id": second_change_id,
"title": "second experiment",
"summary": "second experiment summary"
@@ -1313,14 +1249,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
tool_content(&second_experiment)["experiment_id"].as_str(),
"second experiment id",
)?;
- let second_commit = commit_project_state(&project_root, "candidate-two.txt", "candidate two")?;
let second_close = harness.call_tool(
74,
"experiment.close",
json!({
"experiment_id": second_experiment_id,
- "candidate_summary": "candidate two",
"run": {
"title": "second run",
"summary": "second run summary",
@@ -1342,17 +1276,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
"note": {
"summary": "second run note"
},
- "verdict": "keep_on_frontier",
+ "verdict": "kept",
"decision_title": "second decision",
"decision_rationale": "second candidate looks stronger"
}),
)?;
assert_eq!(second_close["result"]["isError"].as_bool(), Some(false));
- let second_candidate_checkpoint_id = must_some(
- tool_content(&second_close)["candidate_checkpoint_id"].as_str(),
- "second candidate checkpoint id",
- )?
- .to_owned();
let second_frontier = harness.call_tool(
80,
@@ -1376,11 +1305,6 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
"second frontier id",
)?
.to_owned();
- let second_base_checkpoint_id = must_some(
- tool_content(&second_frontier)["champion_checkpoint_id"].as_str(),
- "second frontier base checkpoint id",
- )?
- .to_owned();
let third_change = harness.call_tool(
81,
@@ -1406,7 +1330,6 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
"experiment.open",
json!({
"frontier_id": second_frontier_id,
- "base_checkpoint_id": second_base_checkpoint_id,
"hypothesis_node_id": third_change_id,
"title": "third experiment",
"summary": "third experiment summary"
@@ -1417,15 +1340,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
tool_content(&third_experiment)["experiment_id"].as_str(),
"third experiment id",
)?;
- let third_commit =
- commit_project_state(&project_root, "candidate-three.txt", "candidate three")?;
let third_close = harness.call_tool(
82,
"experiment.close",
json!({
"experiment_id": third_experiment_id,
- "candidate_summary": "candidate three",
"run": {
"title": "third run",
"summary": "third run summary",
@@ -1447,17 +1367,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
"note": {
"summary": "third run note"
},
- "verdict": "keep_on_frontier",
+ "verdict": "kept",
"decision_title": "third decision",
"decision_rationale": "third candidate is best overall but not in the first frontier"
}),
)?;
assert_eq!(third_close["result"]["isError"].as_bool(), Some(false));
- let third_candidate_checkpoint_id = must_some(
- tool_content(&third_close)["candidate_checkpoint_id"].as_str(),
- "third candidate checkpoint id",
- )?
- .to_owned();
let keys = harness.call_tool(75, "metric.keys", json!({}))?;
assert_eq!(keys["result"]["isError"].as_bool(), Some(false));
@@ -1524,13 +1439,10 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
assert_eq!(run_best_rows[0]["value"].as_f64(), Some(5.0));
assert_eq!(run_best_rows.len(), 1);
assert_eq!(
- run_best_rows[0]["candidate_checkpoint_id"].as_str(),
- Some(second_candidate_checkpoint_id.as_str())
- );
- assert_eq!(
- run_best_rows[0]["candidate_commit_hash"].as_str(),
- Some(second_commit.as_str())
+ run_best_rows[0]["experiment_title"].as_str(),
+ Some("second experiment")
);
+ assert_eq!(run_best_rows[0]["verdict"].as_str(), Some("kept"));
assert_eq!(
run_best_rows[0]["dimensions"]["scenario"].as_str(),
Some("belt_4x5")
@@ -1539,7 +1451,9 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
run_best_rows[0]["dimensions"]["duration_s"].as_f64(),
Some(60.0)
);
- assert!(must_some(tool_text(&run_metric_best), "run metric best text")?.contains("commit="));
+ assert!(
+ must_some(tool_text(&run_metric_best), "run metric best text")?.contains("hypothesis=")
+ );
assert!(must_some(tool_text(&run_metric_best), "run metric best text")?.contains("dims:"));
let payload_requires_order = harness.call_tool(
@@ -1580,12 +1494,8 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
assert_eq!(payload_best_rows[0]["value"].as_f64(), Some(7.0));
assert_eq!(payload_best_rows.len(), 1);
assert_eq!(
- payload_best_rows[0]["candidate_checkpoint_id"].as_str(),
- Some(second_candidate_checkpoint_id.as_str())
- );
- assert_eq!(
- payload_best_rows[0]["candidate_commit_hash"].as_str(),
- Some(second_commit.as_str())
+ payload_best_rows[0]["experiment_title"].as_str(),
+ Some("second experiment")
);
let filtered_best = harness.call_tool(
@@ -1608,8 +1518,8 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
)?;
assert_eq!(filtered_rows.len(), 2);
assert_eq!(
- filtered_rows[0]["candidate_checkpoint_id"].as_str(),
- Some(second_candidate_checkpoint_id.as_str())
+ filtered_rows[0]["experiment_title"].as_str(),
+ Some("second experiment")
);
assert!(
filtered_rows
@@ -1632,12 +1542,12 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
"global metric best array",
)?;
assert_eq!(
- global_rows[0]["candidate_checkpoint_id"].as_str(),
- Some(third_candidate_checkpoint_id.as_str())
+ global_rows[0]["experiment_title"].as_str(),
+ Some("third experiment")
);
assert_eq!(
- global_rows[0]["candidate_commit_hash"].as_str(),
- Some(third_commit.as_str())
+ global_rows[0]["frontier_id"].as_str(),
+ Some(second_frontier_id.as_str())
);
let migrate = harness.call_tool(85, "metric.migrate", json!({}))?;
@@ -1654,14 +1564,5 @@ fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResu
tool_content(&migrate)["inserted_dimension_values"].as_u64(),
Some(0)
);
-
- assert_ne!(
- first_candidate_checkpoint_id,
- second_candidate_checkpoint_id
- );
- assert_ne!(
- second_candidate_checkpoint_id,
- third_candidate_checkpoint_id
- );
Ok(())
}