From 9d63844f3a28fde70b19500422f17379e99e588a Mon Sep 17 00:00:00 2001 From: main Date: Fri, 20 Mar 2026 16:00:30 -0400 Subject: Refound Spinner as an austere frontier ledger --- crates/fidget-spinner-cli/tests/mcp_hardening.rs | 1574 +++++----------------- 1 file changed, 302 insertions(+), 1272 deletions(-) (limited to 'crates/fidget-spinner-cli/tests') diff --git a/crates/fidget-spinner-cli/tests/mcp_hardening.rs b/crates/fidget-spinner-cli/tests/mcp_hardening.rs index 21a3d04..fad4937 100644 --- a/crates/fidget-spinner-cli/tests/mcp_hardening.rs +++ b/crates/fidget-spinner-cli/tests/mcp_hardening.rs @@ -1,22 +1,21 @@ use axum as _; +use clap as _; +use dirs as _; use std::fs; use std::io::{self, BufRead, BufReader, Write}; use std::path::PathBuf; use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; use camino::Utf8PathBuf; -use clap as _; -use dirs as _; use fidget_spinner_core::NonEmptyText; -use fidget_spinner_store_sqlite::{ListNodesQuery, ProjectStore}; +use fidget_spinner_store_sqlite::ProjectStore; use libmcp as _; -use linkify as _; use maud as _; +use percent_encoding as _; use serde as _; use serde_json::{Value, json}; use time as _; use tokio as _; -use uuid as _; type TestResult = Result>; @@ -50,7 +49,6 @@ fn init_project(root: &Utf8PathBuf) -> TestResult { ProjectStore::init( root, must(NonEmptyText::new("mcp test project"), "display name")?, - must(NonEmptyText::new("local.mcp.test"), "namespace")?, ), "init project store", )?; @@ -68,7 +66,7 @@ struct McpHarness { } impl McpHarness { - fn spawn(project_root: Option<&Utf8PathBuf>, envs: &[(&str, String)]) -> TestResult { + fn spawn(project_root: Option<&Utf8PathBuf>) -> TestResult { let mut command = Command::new(binary_path()); let _ = command .arg("mcp") @@ -79,9 +77,6 @@ impl McpHarness { if let Some(project_root) = project_root { let _ = command.arg("--project").arg(project_root.as_str()); } - for (key, value) in envs { - let _ = command.env(key, value); - } let mut child = must(command.spawn(), "spawn mcp host")?; let stdin = must_some(child.stdin.take(), "host stdin")?; let stdout = BufReader::new(must_some(child.stdout.take(), "host stdout")?); @@ -137,6 +132,13 @@ impl McpHarness { })) } + fn call_tool_full(&mut self, id: u64, name: &str, arguments: Value) -> TestResult { + let mut arguments = arguments.as_object().cloned().unwrap_or_default(); + let _ = arguments.insert("render".to_owned(), json!("json")); + let _ = arguments.insert("detail".to_owned(), json!("full")); + self.call_tool(id, name, Value::Object(arguments)) + } + fn request(&mut self, message: Value) -> TestResult { let encoded = must(serde_json::to_string(&message), "request json")?; must(writeln!(self.stdin, "{encoded}"), "write request")?; @@ -168,1401 +170,429 @@ fn tool_content(response: &Value) -> &Value { &response["result"]["structuredContent"] } -fn tool_text(response: &Value) -> Option<&str> { - response["result"]["content"] - .as_array() - .and_then(|content| content.first()) - .and_then(|entry| entry["text"].as_str()) -} - -fn fault_message(response: &Value) -> Option<&str> { +fn tool_error_message(response: &Value) -> Option<&str> { response["result"]["structuredContent"]["message"].as_str() } -#[test] -fn cold_start_exposes_health_and_telemetry() -> TestResult { - let project_root = temp_project_root("cold_start")?; - init_project(&project_root)?; - - let mut harness = McpHarness::spawn(None, &[])?; - let initialize = harness.initialize()?; - assert_eq!( - initialize["result"]["protocolVersion"].as_str(), - Some("2025-11-25") - ); - harness.notify_initialized()?; - - let tools = harness.tools_list()?; - let tool_count = must_some(tools["result"]["tools"].as_array(), "tools array")?.len(); - assert!(tool_count >= 20); - - let health = harness.call_tool(3, "system.health", json!({}))?; - assert_eq!(tool_content(&health)["ready"].as_bool(), Some(true)); - assert_eq!(tool_content(&health)["bound"].as_bool(), Some(false)); - - let telemetry = harness.call_tool(4, "system.telemetry", json!({}))?; - assert!(tool_content(&telemetry)["requests"].as_u64().unwrap_or(0) >= 3); - - let skills = harness.call_tool(15, "skill.list", json!({}))?; - let skill_names = must_some( - tool_content(&skills)["skills"].as_array(), - "bundled skills array", - )? - .iter() - .filter_map(|skill| skill["name"].as_str()) - .collect::>(); - assert!(skill_names.contains(&"fidget-spinner")); - assert!(skill_names.contains(&"frontier-loop")); - - let base_skill = harness.call_tool(16, "skill.show", json!({"name": "fidget-spinner"}))?; +fn assert_tool_ok(response: &Value) { assert_eq!( - tool_content(&base_skill)["name"].as_str(), - Some("fidget-spinner") + response["result"]["isError"].as_bool(), + Some(false), + "tool response unexpectedly errored: {response:#}" ); - Ok(()) } -#[test] -fn tool_output_defaults_to_porcelain_and_supports_json_render() -> TestResult { - let project_root = temp_project_root("render_modes")?; - init_project(&project_root)?; - - let mut harness = McpHarness::spawn(None, &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - let bind = harness.bind_project(21, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - - let porcelain = harness.call_tool(22, "project.status", json!({}))?; - let porcelain_text = must_some(tool_text(&porcelain), "porcelain project.status text")?; - assert!(porcelain_text.contains("root:")); - assert!(!porcelain_text.contains("\"project_root\":")); - - let health = harness.call_tool(23, "system.health", json!({}))?; - let health_text = must_some(tool_text(&health), "porcelain system.health text")?; - assert!(health_text.contains("ready | bound")); - assert!(health_text.contains("binary:")); - - let frontier = harness.call_tool( - 24, - "frontier.init", - json!({ - "label": "render frontier", - "objective": "exercise porcelain output", - "contract_title": "render contract", - "benchmark_suites": ["smoke"], - "promotion_criteria": ["retain key fields in porcelain"], - "primary_metric": { - "key": "score", - "unit": "count", - "objective": "maximize" - } - }), - )?; - assert_eq!(frontier["result"]["isError"].as_bool(), Some(false)); - - let frontier_list = harness.call_tool(25, "frontier.list", json!({}))?; - let frontier_text = must_some(tool_text(&frontier_list), "porcelain frontier.list text")?; - assert!(frontier_text.contains("render frontier")); - assert!(!frontier_text.contains("root_contract_node_id")); - - let json_render = harness.call_tool(26, "project.status", json!({"render": "json"}))?; - let json_text = must_some(tool_text(&json_render), "json project.status text")?; - assert!(json_text.contains("\"project_root\":")); - assert!(json_text.trim_start().starts_with('{')); - - let json_full = harness.call_tool( - 27, - "project.status", - json!({"render": "json", "detail": "full"}), - )?; - let json_full_text = must_some(tool_text(&json_full), "json full project.status text")?; - assert!(json_full_text.contains("\"schema\": {")); - Ok(()) -} - -#[test] -fn safe_request_retries_after_worker_crash() -> TestResult { - let project_root = temp_project_root("crash_retry")?; - init_project(&project_root)?; - - let mut harness = McpHarness::spawn( - None, - &[( - "FIDGET_SPINNER_MCP_TEST_HOST_CRASH_ONCE_KEY", - "tools/call:project.status".to_owned(), - )], - )?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - let bind = harness.bind_project(3, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - - let response = harness.call_tool(5, "project.status", json!({}))?; - assert_eq!(response["result"]["isError"].as_bool(), Some(false)); - - let telemetry = harness.call_tool(6, "system.telemetry", json!({}))?; - assert_eq!(tool_content(&telemetry)["retries"].as_u64(), Some(1)); +fn assert_tool_error(response: &Value) { assert_eq!( - tool_content(&telemetry)["worker_restarts"].as_u64(), - Some(1) + response["result"]["isError"].as_bool(), + Some(true), + "tool response unexpectedly succeeded: {response:#}" ); - Ok(()) } -#[test] -fn safe_request_retries_after_worker_transient_fault() -> TestResult { - let project_root = temp_project_root("transient_retry")?; - init_project(&project_root)?; - let marker = project_root.join("transient_once.marker"); - - let mut harness = McpHarness::spawn( - None, - &[ - ( - "FIDGET_SPINNER_MCP_TEST_WORKER_TRANSIENT_ONCE_KEY", - "tools/call:project.status".to_owned(), - ), - ( - "FIDGET_SPINNER_MCP_TEST_WORKER_TRANSIENT_ONCE_MARKER", - marker.to_string(), - ), - ], - )?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - let bind = harness.bind_project(12, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - - let response = harness.call_tool(13, "project.status", json!({}))?; - assert_eq!(response["result"]["isError"].as_bool(), Some(false)); - - let telemetry = harness.call_tool(14, "system.telemetry", json!({}))?; - assert_eq!(tool_content(&telemetry)["retries"].as_u64(), Some(1)); - assert_eq!( - tool_content(&telemetry)["worker_restarts"].as_u64(), - Some(1) - ); - Ok(()) -} - -#[test] -fn side_effecting_request_is_not_replayed_after_worker_crash() -> TestResult { - let project_root = temp_project_root("no_replay")?; - init_project(&project_root)?; - - let mut harness = McpHarness::spawn( - None, - &[( - "FIDGET_SPINNER_MCP_TEST_HOST_CRASH_ONCE_KEY", - "tools/call:source.record".to_owned(), - )], - )?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - let bind = harness.bind_project(6, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - - let response = harness.call_tool( - 7, - "source.record", - json!({ - "title": "should not duplicate", - "summary": "dedupe check", - "body": "host crash before worker execution", - }), - )?; - assert_eq!(response["result"]["isError"].as_bool(), Some(true)); - - let nodes = harness.call_tool(8, "node.list", json!({}))?; - assert_eq!( - must_some(tool_content(&nodes).as_array(), "node list")?.len(), - 0 - ); - - let telemetry = harness.call_tool(9, "system.telemetry", json!({}))?; - assert_eq!(tool_content(&telemetry)["retries"].as_u64(), Some(0)); - Ok(()) +fn tool_names(response: &Value) -> Vec<&str> { + response["result"]["tools"] + .as_array() + .into_iter() + .flatten() + .filter_map(|tool| tool["name"].as_str()) + .collect() } #[test] -fn forced_rollout_preserves_initialized_state() -> TestResult { - let project_root = temp_project_root("rollout")?; +fn cold_start_exposes_bound_surface_and_new_toolset() -> TestResult { + let project_root = temp_project_root("cold_start")?; init_project(&project_root)?; - let mut harness = McpHarness::spawn( - None, - &[( - "FIDGET_SPINNER_MCP_TEST_FORCE_ROLLOUT_KEY", - "tools/call:project.status".to_owned(), - )], - )?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - let bind = harness.bind_project(9, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - - let first = harness.call_tool(10, "project.status", json!({}))?; - assert_eq!(first["result"]["isError"].as_bool(), Some(false)); - - let second = harness.call_tool(11, "project.status", json!({}))?; - assert_eq!(second["result"]["isError"].as_bool(), Some(false)); - - let telemetry = harness.call_tool(12, "system.telemetry", json!({}))?; - assert_eq!(tool_content(&telemetry)["host_rollouts"].as_u64(), Some(1)); - Ok(()) -} - -#[test] -fn unbound_project_tools_fail_with_bind_hint() -> TestResult { - let mut harness = McpHarness::spawn(None, &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - - let response = harness.call_tool(20, "project.status", json!({}))?; - assert_eq!(response["result"]["isError"].as_bool(), Some(true)); - let message = response["result"]["structuredContent"]["message"].as_str(); - assert!(message.is_some_and(|message| message.contains("project.bind"))); - Ok(()) -} - -#[test] -fn bind_bootstraps_empty_project_root() -> TestResult { - let project_root = temp_project_root("bind_bootstrap")?; - - let mut harness = McpHarness::spawn(None, &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - - let bind = harness.bind_project(28, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - assert_eq!( - tool_content(&bind)["project_root"].as_str(), - Some(project_root.as_str()) - ); - - let status = harness.call_tool(29, "project.status", json!({}))?; - assert_eq!(status["result"]["isError"].as_bool(), Some(false)); - assert_eq!( - tool_content(&status)["project_root"].as_str(), - Some(project_root.as_str()) - ); - - let store = must(ProjectStore::open(&project_root), "open bootstrapped store")?; - assert_eq!(store.project_root().as_str(), project_root.as_str()); - Ok(()) -} - -#[test] -fn bind_rejects_nonempty_uninitialized_root() -> TestResult { - let project_root = temp_project_root("bind_nonempty")?; - must( - fs::write(project_root.join("README.txt").as_std_path(), "occupied"), - "seed nonempty directory", - )?; - - let mut harness = McpHarness::spawn(None, &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - - let bind = harness.bind_project(30, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(true)); - Ok(()) -} - -#[test] -fn successful_bind_clears_stale_fault_from_health() -> TestResult { - let bad_root = temp_project_root("bind_fault_bad")?; - must( - fs::write(bad_root.join("README.txt").as_std_path(), "occupied"), - "seed bad bind root", - )?; - let good_root = temp_project_root("bind_fault_good")?; - init_project(&good_root)?; - - let mut harness = McpHarness::spawn(None, &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - - let failed_bind = harness.bind_project(301, &bad_root)?; - assert_eq!(failed_bind["result"]["isError"].as_bool(), Some(true)); - - let failed_health = harness.call_tool(302, "system.health", json!({ "detail": "full" }))?; - assert_eq!( - tool_content(&failed_health)["last_fault"]["operation"].as_str(), - Some("tools/call:project.bind") - ); - - let good_bind = harness.bind_project(303, &good_root)?; - assert_eq!(good_bind["result"]["isError"].as_bool(), Some(false)); - - let recovered_health = harness.call_tool(304, "system.health", json!({}))?; - assert_eq!(recovered_health["result"]["isError"].as_bool(), Some(false)); - assert!(tool_content(&recovered_health).get("last_fault").is_none()); - assert!(!must_some(tool_text(&recovered_health), "recovered health text")?.contains("fault:")); - - let recovered_health_full = - harness.call_tool(306, "system.health", json!({ "detail": "full" }))?; - assert_eq!( - tool_content(&recovered_health_full)["last_fault"], - Value::Null, - ); - - let recovered_telemetry = harness.call_tool(305, "system.telemetry", json!({}))?; - assert_eq!( - recovered_telemetry["result"]["isError"].as_bool(), - Some(false) - ); + let mut harness = McpHarness::spawn(None)?; + let initialize = harness.initialize()?; assert_eq!( - tool_content(&recovered_telemetry)["errors"].as_u64(), - Some(1) + initialize["result"]["protocolVersion"].as_str(), + Some("2025-11-25") ); - assert!(tool_content(&recovered_telemetry)["last_fault"].is_null()); - Ok(()) -} - -#[test] -fn bind_retargets_writes_to_sibling_project_root() -> TestResult { - let spinner_root = temp_project_root("spinner_root")?; - let libgrid_root = temp_project_root("libgrid_root")?; - init_project(&spinner_root)?; - init_project(&libgrid_root)?; - let notes_dir = libgrid_root.join("notes"); - must( - fs::create_dir_all(notes_dir.as_std_path()), - "create nested notes dir", - )?; - - let mut harness = McpHarness::spawn(Some(&spinner_root), &[])?; - let _ = harness.initialize()?; harness.notify_initialized()?; - let initial_status = harness.call_tool(31, "project.status", json!({}))?; - assert_eq!( - tool_content(&initial_status)["project_root"].as_str(), - Some(spinner_root.as_str()) - ); + let tools = harness.tools_list()?; + let tool_names = tool_names(&tools); + assert!(tool_names.contains(&"frontier.open")); + assert!(tool_names.contains(&"hypothesis.record")); + assert!(tool_names.contains(&"experiment.close")); + assert!(tool_names.contains(&"artifact.record")); + assert!(!tool_names.contains(&"node.list")); + assert!(!tool_names.contains(&"research.record")); - let rebind = harness.bind_project(32, ¬es_dir)?; - assert_eq!(rebind["result"]["isError"].as_bool(), Some(false)); - assert_eq!( - tool_content(&rebind)["project_root"].as_str(), - Some(libgrid_root.as_str()) - ); + let health = harness.call_tool(3, "system.health", json!({}))?; + assert_tool_ok(&health); + assert_eq!(tool_content(&health)["bound"].as_bool(), Some(false)); - let status = harness.call_tool(33, "project.status", json!({}))?; + let bind = harness.bind_project(4, &project_root)?; + assert_tool_ok(&bind); assert_eq!( - tool_content(&status)["project_root"].as_str(), - Some(libgrid_root.as_str()) + tool_content(&bind)["display_name"].as_str(), + Some("mcp test project") ); - let note = harness.call_tool( - 34, - "note.quick", - json!({ - "title": "libgrid dogfood note", - "summary": "rebind summary", - "body": "rebind should redirect writes", - "tags": [], - }), - )?; - assert_eq!(note["result"]["isError"].as_bool(), Some(false)); - - let spinner_store = must(ProjectStore::open(&spinner_root), "open spinner store")?; - let libgrid_store = must(ProjectStore::open(&libgrid_root), "open libgrid store")?; - assert_eq!( - must( - spinner_store.list_nodes(ListNodesQuery::default()), - "list spinner nodes after rebind" - )? - .len(), - 0 - ); - assert_eq!( - must( - libgrid_store.list_nodes(ListNodesQuery::default()), - "list libgrid nodes after rebind" - )? - .len(), - 1 - ); + let rebound_health = harness.call_tool(5, "system.health", json!({}))?; + assert_tool_ok(&rebound_health); + assert_eq!(tool_content(&rebound_health)["bound"].as_bool(), Some(true)); Ok(()) } #[test] -fn tag_registry_drives_note_creation_and_lookup() -> TestResult { - let project_root = temp_project_root("tag_registry")?; +fn frontier_open_is_the_grounding_surface_for_live_state() -> TestResult { + let project_root = temp_project_root("frontier_open")?; init_project(&project_root)?; - let mut harness = McpHarness::spawn(None, &[])?; + let mut harness = McpHarness::spawn(Some(&project_root))?; let _ = harness.initialize()?; harness.notify_initialized()?; - let bind = harness.bind_project(40, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - let missing_tags = harness.call_tool( - 41, - "note.quick", - json!({ - "title": "untagged", - "summary": "should fail without explicit tags", - "body": "should fail", - }), - )?; - assert_eq!(missing_tags["result"]["isError"].as_bool(), Some(true)); - - let tag = harness.call_tool( - 42, + assert_tool_ok(&harness.call_tool( + 10, "tag.add", + json!({"name": "root-conquest", "description": "root work"}), + )?); + assert_tool_ok(&harness.call_tool( + 11, + "metric.define", json!({ - "name": "dogfood/mcp", - "description": "MCP dogfood observations", + "key": "nodes_solved", + "unit": "count", + "objective": "maximize", + "visibility": "canonical", }), - )?; - assert_eq!(tag["result"]["isError"].as_bool(), Some(false)); - - let tag_list = harness.call_tool(43, "tag.list", json!({}))?; - let tags = must_some(tool_content(&tag_list).as_array(), "tag list")?; - assert_eq!(tags.len(), 1); - assert_eq!(tags[0]["name"].as_str(), Some("dogfood/mcp")); - - let note = harness.call_tool( - 44, - "note.quick", + )?); + assert_tool_ok(&harness.call_tool( + 12, + "run.dimension.define", + json!({"key": "instance", "value_type": "string"}), + )?); + assert_tool_ok(&harness.call_tool( + 13, + "frontier.create", + json!({ + "label": "LP root frontier", + "objective": "Drive root cash-out on braid rails", + "slug": "lp-root", + }), + )?); + assert_tool_ok(&harness.call_tool( + 14, + "hypothesis.record", + json!({ + "frontier": "lp-root", + "slug": "node-local-loop", + "title": "Node-local logical cut loop", + "summary": "Push cut cash-out below root.", + "body": "Thread node-local logical cuts through native LP reoptimization so the same intervention can cash out below root on parity rails without corrupting root ownership semantics.", + "tags": ["root-conquest"], + }), + )?); + assert_tool_ok(&harness.call_tool( + 15, + "experiment.open", json!({ - "title": "tagged note", - "summary": "tagged lookup summary", - "body": "tagged lookup should work", - "tags": ["dogfood/mcp"], + "hypothesis": "node-local-loop", + "slug": "baseline-20s", + "title": "Baseline parity 20s", + "summary": "Reference rail.", + "tags": ["root-conquest"], }), - )?; - assert_eq!(note["result"]["isError"].as_bool(), Some(false)); - - let filtered = harness.call_tool(45, "node.list", json!({"tags": ["dogfood/mcp"]}))?; - let nodes = must_some(tool_content(&filtered).as_array(), "filtered nodes")?; - assert_eq!(nodes.len(), 1); - assert_eq!(nodes[0]["tags"][0].as_str(), Some("dogfood/mcp")); - Ok(()) -} - -#[test] -fn source_record_accepts_tags_and_filtering() -> TestResult { - let project_root = temp_project_root("research_tags")?; - init_project(&project_root)?; - - let mut harness = McpHarness::spawn(None, &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - let bind = harness.bind_project(451, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - - let tag = harness.call_tool( - 452, - "tag.add", + )?); + assert_tool_ok(&harness.call_tool( + 16, + "experiment.close", json!({ - "name": "campaign/libgrid", - "description": "libgrid migration campaign", + "experiment": "baseline-20s", + "backend": "manual", + "command": {"argv": ["baseline-20s"]}, + "dimensions": {"instance": "4x5-braid"}, + "primary_metric": {"key": "nodes_solved", "value": 220.0}, + "verdict": "kept", + "rationale": "Baseline retained as the current comparison line for the slice." }), - )?; - assert_eq!(tag["result"]["isError"].as_bool(), Some(false)); - - let research = harness.call_tool( - 453, - "source.record", + )?); + assert_tool_ok(&harness.call_tool( + 17, + "experiment.open", json!({ - "title": "ingest tranche", - "summary": "Import the next libgrid tranche.", - "body": "Full import notes live here.", - "tags": ["campaign/libgrid"], + "hypothesis": "node-local-loop", + "slug": "loop-20s", + "title": "Loop parity 20s", + "summary": "Live challenger.", + "tags": ["root-conquest"], + "parents": [{"kind": "experiment", "selector": "baseline-20s"}], }), - )?; - assert_eq!(research["result"]["isError"].as_bool(), Some(false)); - - let filtered = harness.call_tool(454, "node.list", json!({"tags": ["campaign/libgrid"]}))?; - let nodes = must_some(tool_content(&filtered).as_array(), "filtered source nodes")?; - assert_eq!(nodes.len(), 1); - assert_eq!(nodes[0]["class"].as_str(), Some("source")); - assert_eq!(nodes[0]["tags"][0].as_str(), Some("campaign/libgrid")); - Ok(()) -} - -#[test] -fn prose_tools_reject_invalid_shapes_over_mcp() -> TestResult { - let project_root = temp_project_root("prose_invalid")?; - init_project(&project_root)?; + )?); - let mut harness = McpHarness::spawn(None, &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - let bind = harness.bind_project(46, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - - let missing_note_summary = harness.call_tool( - 47, - "note.quick", - json!({ - "title": "untagged", - "body": "body only", - "tags": [], - }), - )?; + let frontier_open = + harness.call_tool_full(18, "frontier.open", json!({"frontier": "lp-root"}))?; + assert_tool_ok(&frontier_open); + let content = tool_content(&frontier_open); + assert_eq!(content["frontier"]["slug"].as_str(), Some("lp-root")); assert_eq!( - missing_note_summary["result"]["isError"].as_bool(), - Some(true) + must_some(content["active_tags"].as_array(), "active tags array")? + .iter() + .filter_map(Value::as_str) + .collect::>(), + vec!["root-conquest"] ); assert!( - fault_message(&missing_note_summary) - .is_some_and(|message| message.contains("summary") || message.contains("missing field")) + must_some( + content["active_metric_keys"].as_array(), + "active metric keys array" + )? + .iter() + .any(|metric| metric["key"].as_str() == Some("nodes_solved")) ); - - let missing_source_summary = harness.call_tool( - 48, - "source.record", - json!({ - "title": "source only", - "body": "body only", - }), + let active_hypotheses = must_some( + content["active_hypotheses"].as_array(), + "active hypotheses array", )?; + assert_eq!(active_hypotheses.len(), 1); assert_eq!( - missing_source_summary["result"]["isError"].as_bool(), - Some(true) - ); - assert!( - fault_message(&missing_source_summary) - .is_some_and(|message| message.contains("summary") || message.contains("missing field")) + active_hypotheses[0]["hypothesis"]["slug"].as_str(), + Some("node-local-loop") ); - - let note_without_body = harness.call_tool( - 49, - "node.create", - json!({ - "class": "note", - "title": "missing body", - "summary": "triage layer", - "tags": [], - "payload": {}, - }), - )?; - assert_eq!(note_without_body["result"]["isError"].as_bool(), Some(true)); - assert!( - fault_message(¬e_without_body) - .is_some_and(|message| message.contains("payload field `body`")) - ); - - let source_without_summary = harness.call_tool( - 50, - "node.create", - json!({ - "class": "source", - "title": "missing summary", - "payload": { "body": "full research body" }, - }), - )?; assert_eq!( - source_without_summary["result"]["isError"].as_bool(), - Some(true) + active_hypotheses[0]["latest_closed_experiment"]["slug"].as_str(), + Some("baseline-20s") ); - assert!( - fault_message(&source_without_summary) - .is_some_and(|message| message.contains("non-empty summary")) + assert_eq!( + must_some( + content["open_experiments"].as_array(), + "open experiments array" + )?[0]["slug"] + .as_str(), + Some("loop-20s") ); + assert!(content.get("artifacts").is_none()); + assert!(active_hypotheses[0]["hypothesis"].get("body").is_none()); Ok(()) } #[test] -fn concise_note_reads_do_not_leak_body_text() -> TestResult { - let project_root = temp_project_root("concise_note_read")?; +fn hypothesis_body_discipline_is_enforced_over_mcp() -> TestResult { + let project_root = temp_project_root("single_paragraph")?; init_project(&project_root)?; - let mut harness = McpHarness::spawn(None, &[])?; + let mut harness = McpHarness::spawn(Some(&project_root))?; let _ = harness.initialize()?; harness.notify_initialized()?; - let bind = harness.bind_project(50, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - let note = harness.call_tool( - 51, - "note.quick", + assert_tool_ok(&harness.call_tool( + 20, + "frontier.create", json!({ - "title": "tagged note", - "summary": "triage layer", - "body": "full note body should stay out of concise reads", - "tags": [], + "label": "Import frontier", + "objective": "Stress hypothesis discipline", + "slug": "discipline", }), - )?; - assert_eq!(note["result"]["isError"].as_bool(), Some(false)); - let node_id = must_some(tool_content(¬e)["id"].as_str(), "created note id")?.to_owned(); - - let concise = harness.call_tool(52, "node.read", json!({ "node_id": node_id }))?; - let concise_structured = tool_content(&concise); - assert_eq!(concise_structured["summary"].as_str(), Some("triage layer")); - assert!(concise_structured["payload_preview"].get("body").is_none()); - assert!( - !must_some(tool_text(&concise), "concise note.read text")? - .contains("full note body should stay out of concise reads") - ); + )?); - let full = harness.call_tool( - 53, - "node.read", - json!({ "node_id": node_id, "detail": "full" }), - )?; - assert_eq!( - tool_content(&full)["payload"]["fields"]["body"].as_str(), - Some("full note body should stay out of concise reads") - ); - Ok(()) -} - -#[test] -fn concise_prose_reads_only_surface_payload_field_names() -> TestResult { - let project_root = temp_project_root("concise_prose_field_names")?; - init_project(&project_root)?; - - let mut harness = McpHarness::spawn(None, &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - let bind = harness.bind_project(531, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - - let research = harness.call_tool( - 532, - "node.create", + let response = harness.call_tool( + 21, + "hypothesis.record", json!({ - "class": "source", - "title": "rich import", - "summary": "triage layer only", - "payload": { - "body": "Body stays out of concise output.", - "source_excerpt": "This imported excerpt is intentionally long and should never reappear in concise node reads as a value preview.", - "verbatim_snippet": "Another long snippet that belongs in full payload inspection only, not in triage surfaces." - } + "frontier": "discipline", + "title": "Paragraph discipline", + "summary": "Should reject multi-paragraph bodies.", + "body": "first paragraph\n\nsecond paragraph", }), )?; - assert_eq!(research["result"]["isError"].as_bool(), Some(false)); - let node_id = - must_some(tool_content(&research)["id"].as_str(), "created source id")?.to_owned(); - - let concise = harness.call_tool(533, "node.read", json!({ "node_id": node_id }))?; - let concise_structured = tool_content(&concise); - assert_eq!(concise_structured["payload_field_count"].as_u64(), Some(2)); - let payload_fields = must_some( - concise_structured["payload_fields"].as_array(), - "concise prose payload fields", - )?; - assert!( - payload_fields - .iter() - .any(|field| field.as_str() == Some("source_excerpt")) - ); - assert!(concise_structured.get("payload_preview").is_none()); - let concise_text = must_some(tool_text(&concise), "concise prose read text")?; - assert!(!concise_text.contains("This imported excerpt is intentionally long")); - assert!(concise_text.contains("payload fields: source_excerpt, verbatim_snippet")); + assert_tool_error(&response); + assert!(must_some(tool_error_message(&response), "fault message")?.contains("paragraph")); Ok(()) } #[test] -fn node_list_does_not_enumerate_full_prose_bodies() -> TestResult { - let project_root = temp_project_root("node_list_no_body_leak")?; +fn artifact_surface_preserves_reference_only() -> TestResult { + let project_root = temp_project_root("artifact_reference")?; init_project(&project_root)?; - let mut harness = McpHarness::spawn(None, &[])?; + let mut harness = McpHarness::spawn(Some(&project_root))?; let _ = harness.initialize()?; harness.notify_initialized()?; - let bind = harness.bind_project(54, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); - let note = harness.call_tool( - 55, - "note.quick", + assert_tool_ok(&harness.call_tool( + 30, + "frontier.create", json!({ - "title": "tagged note", - "summary": "triage summary", - "body": "full note body should never appear in list-like surfaces", - "tags": [], + "label": "Artifacts frontier", + "objective": "Keep dumps out of the token hot path", + "slug": "artifacts", }), - )?; - assert_eq!(note["result"]["isError"].as_bool(), Some(false)); - - let listed = harness.call_tool(56, "node.list", json!({ "class": "note" }))?; - let listed_rows = must_some(tool_content(&listed).as_array(), "listed note rows")?; - assert_eq!(listed_rows.len(), 1); - assert_eq!(listed_rows[0]["summary"].as_str(), Some("triage summary")); - assert!(listed_rows[0].get("body").is_none()); - assert!( - !must_some(tool_text(&listed), "node.list text")? - .contains("full note body should never appear in list-like surfaces") - ); - Ok(()) -} - -#[test] -fn metric_tools_are_listed_for_discovery() -> TestResult { - let project_root = temp_project_root("metric_tool_list")?; - init_project(&project_root)?; - - let mut harness = McpHarness::spawn(Some(&project_root), &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - let tools = harness.tools_list()?; - let names = must_some(tools["result"]["tools"].as_array(), "tool list")? - .iter() - .filter_map(|tool| tool["name"].as_str()) - .collect::>(); - assert!(names.contains(&"metric.define")); - assert!(names.contains(&"metric.keys")); - assert!(names.contains(&"metric.best")); - assert!(names.contains(&"metric.migrate")); - assert!(names.contains(&"run.dimension.define")); - assert!(names.contains(&"run.dimension.list")); - assert!(names.contains(&"schema.field.upsert")); - assert!(names.contains(&"schema.field.remove")); - Ok(()) -} - -#[test] -fn schema_field_tools_mutate_project_schema() -> TestResult { - let project_root = temp_project_root("schema_field_tools")?; - init_project(&project_root)?; - - let mut harness = McpHarness::spawn(Some(&project_root), &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - - let upsert = harness.call_tool( - 861, - "schema.field.upsert", + )?); + assert_tool_ok(&harness.call_tool( + 31, + "hypothesis.record", json!({ - "name": "scenario", - "node_classes": ["hypothesis", "analysis"], - "presence": "recommended", - "severity": "warning", - "role": "projection_gate", - "inference_policy": "manual_only", - "value_type": "string" + "frontier": "artifacts", + "slug": "sourced-hypothesis", + "title": "Sourced hypothesis", + "summary": "Attach a large external source by reference only.", + "body": "Treat large external writeups as artifact references rather than inline context so the ledger stays scientifically austere.", }), - )?; - assert_eq!(upsert["result"]["isError"].as_bool(), Some(false)); - assert_eq!( - tool_content(&upsert)["field"]["name"].as_str(), - Some("scenario") - ); - assert_eq!( - tool_content(&upsert)["field"]["node_classes"], - json!(["hypothesis", "analysis"]) - ); - - let schema = harness.call_tool(862, "project.schema", json!({ "detail": "full" }))?; - assert_eq!(schema["result"]["isError"].as_bool(), Some(false)); - let fields = must_some(tool_content(&schema)["fields"].as_array(), "schema fields")?; - assert!(fields.iter().any(|field| { - field["name"].as_str() == Some("scenario") && field["value_type"].as_str() == Some("string") - })); - - let remove = harness.call_tool( - 863, - "schema.field.remove", + )?); + assert_tool_ok(&harness.call_tool( + 32, + "artifact.record", json!({ - "name": "scenario", - "node_classes": ["hypothesis", "analysis"] + "kind": "document", + "slug": "lp-review-doc", + "label": "LP review tranche", + "summary": "External markdown tranche.", + "locator": "/tmp/lp-review.md", + "attachments": [{"kind": "hypothesis", "selector": "sourced-hypothesis"}], }), - )?; - assert_eq!(remove["result"]["isError"].as_bool(), Some(false)); - assert_eq!(tool_content(&remove)["removed_count"].as_u64(), Some(1)); - - let schema_after = harness.call_tool(864, "project.schema", json!({ "detail": "full" }))?; - let fields_after = must_some( - tool_content(&schema_after)["fields"].as_array(), - "schema fields after remove", - )?; - assert!( - !fields_after - .iter() - .any(|field| field["name"].as_str() == Some("scenario")) - ); - Ok(()) -} - -#[test] -fn bind_open_backfills_legacy_missing_summary() -> TestResult { - let project_root = temp_project_root("bind_backfill")?; - init_project(&project_root)?; - - let node_id = { - let mut store = must(ProjectStore::open(&project_root), "open project store")?; - let node = must( - store.add_node(fidget_spinner_store_sqlite::CreateNodeRequest { - class: fidget_spinner_core::NodeClass::Source, - frontier_id: None, - title: must(NonEmptyText::new("legacy source"), "legacy title")?, - summary: Some(must( - NonEmptyText::new("temporary summary"), - "temporary summary", - )?), - tags: None, - payload: fidget_spinner_core::NodePayload::with_schema( - store.schema().schema_ref(), - serde_json::from_value(json!({ - "body": "Derived summary first paragraph.\n\nLonger body follows." - })) - .map_err(|error| io::Error::other(format!("payload object: {error}")))?, - ), - annotations: Vec::new(), - attachments: Vec::new(), - }), - "create legacy source node", - )?; - node.id.to_string() - }; - - let database_path = project_root.join(".fidget_spinner").join("state.sqlite"); - let clear_output = must( - Command::new("sqlite3") - .current_dir(project_root.as_std_path()) - .arg(database_path.as_str()) - .arg(format!( - "UPDATE nodes SET summary = NULL WHERE id = '{node_id}';" - )) - .output(), - "spawn sqlite3 for direct summary clear", - )?; - if !clear_output.status.success() { - return Err(io::Error::other(format!( - "sqlite3 summary clear failed: {}", - String::from_utf8_lossy(&clear_output.stderr) - )) - .into()); - } - - let mut harness = McpHarness::spawn(None, &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - let bind = harness.bind_project(60, &project_root)?; - assert_eq!(bind["result"]["isError"].as_bool(), Some(false)); + )?); - let read = harness.call_tool(61, "node.read", json!({ "node_id": node_id }))?; - assert_eq!(read["result"]["isError"].as_bool(), Some(false)); + let artifact = + harness.call_tool_full(33, "artifact.read", json!({"artifact": "lp-review-doc"}))?; + assert_tool_ok(&artifact); + let content = tool_content(&artifact); assert_eq!( - tool_content(&read)["summary"].as_str(), - Some("Derived summary first paragraph.") + content["record"]["locator"].as_str(), + Some("/tmp/lp-review.md") ); - - let listed = harness.call_tool(62, "node.list", json!({ "class": "source" }))?; - let items = must_some(tool_content(&listed).as_array(), "source node list")?; - assert_eq!(items.len(), 1); + assert!(content["record"].get("body").is_none()); assert_eq!( - items[0]["summary"].as_str(), - Some("Derived summary first paragraph.") + must_some(content["attachments"].as_array(), "artifact attachments")?[0]["kind"].as_str(), + Some("hypothesis") ); Ok(()) } #[test] -fn metric_tools_rank_closed_experiments_and_enforce_disambiguation() -> TestResult { - let project_root = temp_project_root("metric_rank_e2e")?; +fn experiment_close_drives_metric_best_and_analysis() -> TestResult { + let project_root = temp_project_root("metric_best")?; init_project(&project_root)?; - let mut harness = McpHarness::spawn(Some(&project_root), &[])?; + let mut harness = McpHarness::spawn(Some(&project_root))?; let _ = harness.initialize()?; harness.notify_initialized()?; - let frontier = harness.call_tool( - 70, - "frontier.init", - json!({ - "label": "metric frontier", - "objective": "exercise metric ranking", - "contract_title": "metric contract", - "benchmark_suites": ["smoke"], - "promotion_criteria": ["rank by one key"], - "primary_metric": { - "key": "wall_clock_s", - "unit": "seconds", - "objective": "minimize" - } - }), - )?; - assert_eq!(frontier["result"]["isError"].as_bool(), Some(false)); - let frontier_id = must_some( - tool_content(&frontier)["frontier_id"].as_str(), - "frontier id", - )? - .to_owned(); - let metric_define = harness.call_tool( - 701, + assert_tool_ok(&harness.call_tool( + 40, "metric.define", json!({ - "key": "wall_clock_s", - "unit": "seconds", - "objective": "minimize", - "description": "elapsed wall time" + "key": "nodes_solved", + "unit": "count", + "objective": "maximize", + "visibility": "canonical", }), - )?; - assert_eq!(metric_define["result"]["isError"].as_bool(), Some(false)); - - let scenario_dimension = harness.call_tool( - 702, - "run.dimension.define", - json!({ - "key": "scenario", - "value_type": "string", - "description": "workload family" - }), - )?; - assert_eq!( - scenario_dimension["result"]["isError"].as_bool(), - Some(false) - ); - - let duration_dimension = harness.call_tool( - 703, + )?); + assert_tool_ok(&harness.call_tool( + 41, "run.dimension.define", + json!({"key": "instance", "value_type": "string"}), + )?); + assert_tool_ok(&harness.call_tool( + 42, + "frontier.create", json!({ - "key": "duration_s", - "value_type": "numeric", - "description": "time budget in seconds" - }), - )?; - assert_eq!( - duration_dimension["result"]["isError"].as_bool(), - Some(false) - ); - - let dimensions = harness.call_tool(704, "run.dimension.list", json!({}))?; - assert_eq!(dimensions["result"]["isError"].as_bool(), Some(false)); - let dimension_rows = must_some(tool_content(&dimensions).as_array(), "run dimension rows")?; - assert!(dimension_rows.iter().any(|row| { - row["key"].as_str() == Some("benchmark_suite") - && row["value_type"].as_str() == Some("string") - })); - assert!(dimension_rows.iter().any(|row| { - row["key"].as_str() == Some("scenario") - && row["description"].as_str() == Some("workload family") - })); - assert!(dimension_rows.iter().any(|row| { - row["key"].as_str() == Some("duration_s") && row["value_type"].as_str() == Some("numeric") - })); - - let first_change = harness.call_tool( - 71, - "node.create", - json!({ - "class": "hypothesis", - "frontier_id": frontier_id, - "title": "first change", - "summary": "first change summary", - "payload": { - "body": "first change body", - "wall_clock_s": 14.0 - } - }), - )?; - assert_eq!(first_change["result"]["isError"].as_bool(), Some(false)); - let first_change_id = must_some( - tool_content(&first_change)["id"].as_str(), - "first change id", - )?; - let first_experiment = harness.call_tool( - 711, - "experiment.open", - json!({ - "frontier_id": frontier_id, - "hypothesis_node_id": first_change_id, - "title": "first experiment", - "summary": "first experiment summary" - }), - )?; - assert_eq!(first_experiment["result"]["isError"].as_bool(), Some(false)); - let first_experiment_id = must_some( - tool_content(&first_experiment)["experiment_id"].as_str(), - "first experiment id", - )?; - - let first_close = harness.call_tool( - 72, - "experiment.close", - json!({ - "experiment_id": first_experiment_id, - "run": { - "title": "first run", - "summary": "first run summary", - "backend": "worktree_process", - "dimensions": { - "benchmark_suite": "smoke", - "scenario": "belt_4x5", - "duration_s": 20.0 - }, - "command": { - "working_directory": project_root.as_str(), - "argv": ["true"] - } - }, - "primary_metric": { - "key": "wall_clock_s", - "value": 10.0 - }, - "note": { - "summary": "first run note" - }, - "verdict": "kept", - "decision_title": "first decision", - "decision_rationale": "keep first candidate around" + "label": "Metric frontier", + "objective": "Test best-of ranking", + "slug": "metric-frontier", }), - )?; - assert_eq!(first_close["result"]["isError"].as_bool(), Some(false)); - - let second_change = harness.call_tool( - 73, - "node.create", + )?); + assert_tool_ok(&harness.call_tool( + 43, + "hypothesis.record", json!({ - "class": "hypothesis", - "frontier_id": frontier_id, - "title": "second change", - "summary": "second change summary", - "payload": { - "body": "second change body", - "wall_clock_s": 7.0 - } + "frontier": "metric-frontier", + "slug": "reopt-dominance", + "title": "Node reopt dominates native LP spend", + "summary": "Track node LP wallclock concentration on braid rails.", + "body": "Matched LP site traces indicate native LP spend is dominated by node reoptimization on the braid rails, so the next interventions should target node-local LP churn instead of root-only machinery.", }), - )?; - assert_eq!(second_change["result"]["isError"].as_bool(), Some(false)); - let second_change_id = must_some( - tool_content(&second_change)["id"].as_str(), - "second change id", - )?; - let second_experiment = harness.call_tool( - 712, + )?); + assert_tool_ok(&harness.call_tool( + 44, "experiment.open", json!({ - "frontier_id": frontier_id, - "hypothesis_node_id": second_change_id, - "title": "second experiment", - "summary": "second experiment summary" + "hypothesis": "reopt-dominance", + "slug": "trace-baseline", + "title": "Trace baseline", + "summary": "First matched trace.", }), - )?; - assert_eq!( - second_experiment["result"]["isError"].as_bool(), - Some(false) - ); - let second_experiment_id = must_some( - tool_content(&second_experiment)["experiment_id"].as_str(), - "second experiment id", - )?; - - let second_close = harness.call_tool( - 74, + )?); + assert_tool_ok(&harness.call_tool( + 45, "experiment.close", json!({ - "experiment_id": second_experiment_id, - "run": { - "title": "second run", - "summary": "second run summary", - "backend": "worktree_process", - "dimensions": { - "benchmark_suite": "smoke", - "scenario": "belt_4x5", - "duration_s": 60.0 - }, - "command": { - "working_directory": project_root.as_str(), - "argv": ["true"] - } - }, - "primary_metric": { - "key": "wall_clock_s", - "value": 5.0 - }, - "note": { - "summary": "second run note" - }, + "experiment": "trace-baseline", + "backend": "manual", + "command": {"argv": ["trace-baseline"]}, + "dimensions": {"instance": "4x5-braid"}, + "primary_metric": {"key": "nodes_solved", "value": 217.0}, "verdict": "kept", - "decision_title": "second decision", - "decision_rationale": "second candidate looks stronger" + "rationale": "Baseline trace is real but not dominant.", }), - )?; - assert_eq!(second_close["result"]["isError"].as_bool(), Some(false)); - - let second_frontier = harness.call_tool( - 80, - "frontier.init", - json!({ - "label": "metric frontier two", - "objective": "exercise frontier filtering", - "contract_title": "metric contract two", - "benchmark_suites": ["smoke"], - "promotion_criteria": ["frontier filters should isolate rankings"], - "primary_metric": { - "key": "wall_clock_s", - "unit": "seconds", - "objective": "minimize" - } - }), - )?; - assert_eq!(second_frontier["result"]["isError"].as_bool(), Some(false)); - let second_frontier_id = must_some( - tool_content(&second_frontier)["frontier_id"].as_str(), - "second frontier id", - )? - .to_owned(); - - let third_change = harness.call_tool( - 81, - "node.create", - json!({ - "class": "hypothesis", - "frontier_id": second_frontier_id, - "title": "third change", - "summary": "third change summary", - "payload": { - "body": "third change body", - "wall_clock_s": 3.0 - } - }), - )?; - assert_eq!(third_change["result"]["isError"].as_bool(), Some(false)); - let third_change_id = must_some( - tool_content(&third_change)["id"].as_str(), - "third change id", - )?; - let third_experiment = harness.call_tool( - 811, + )?); + assert_tool_ok(&harness.call_tool( + 46, "experiment.open", json!({ - "frontier_id": second_frontier_id, - "hypothesis_node_id": third_change_id, - "title": "third experiment", - "summary": "third experiment summary" + "hypothesis": "reopt-dominance", + "slug": "trace-node-reopt", + "title": "Trace node reopt", + "summary": "Matched LP site traces with node focus.", + "parents": [{"kind": "experiment", "selector": "trace-baseline"}], }), - )?; - assert_eq!(third_experiment["result"]["isError"].as_bool(), Some(false)); - let third_experiment_id = must_some( - tool_content(&third_experiment)["experiment_id"].as_str(), - "third experiment id", - )?; - - let third_close = harness.call_tool( - 82, + )?); + assert_tool_ok(&harness.call_tool( + 47, "experiment.close", json!({ - "experiment_id": third_experiment_id, - "run": { - "title": "third run", - "summary": "third run summary", - "backend": "worktree_process", - "dimensions": { - "benchmark_suite": "smoke", - "scenario": "belt_4x5_alt", - "duration_s": 60.0 - }, - "command": { - "working_directory": project_root.as_str(), - "argv": ["true"] - } - }, - "primary_metric": { - "key": "wall_clock_s", - "value": 3.0 - }, - "note": { - "summary": "third run note" - }, - "verdict": "kept", - "decision_title": "third decision", - "decision_rationale": "third candidate is best overall but not in the first frontier" - }), - )?; - assert_eq!(third_close["result"]["isError"].as_bool(), Some(false)); - - let keys = harness.call_tool(75, "metric.keys", json!({}))?; - assert_eq!(keys["result"]["isError"].as_bool(), Some(false)); - let key_rows = must_some(tool_content(&keys).as_array(), "metric keys array")?; - assert!(key_rows.iter().any(|row| { - row["key"].as_str() == Some("wall_clock_s") && row["source"].as_str() == Some("run_metric") - })); - assert!(key_rows.iter().any(|row| { - row["key"].as_str() == Some("wall_clock_s") - && row["source"].as_str() == Some("run_metric") - && row["description"].as_str() == Some("elapsed wall time") - && row["requires_order"].as_bool() == Some(false) - })); - assert!(key_rows.iter().any(|row| { - row["key"].as_str() == Some("wall_clock_s") - && row["source"].as_str() == Some("hypothesis_payload") - })); - - let filtered_keys = harness.call_tool( - 750, - "metric.keys", - json!({ - "source": "run_metric", - "dimensions": { - "scenario": "belt_4x5", - "duration_s": 60.0 + "experiment": "trace-node-reopt", + "backend": "manual", + "command": {"argv": ["matched-lp-site-traces"]}, + "dimensions": {"instance": "4x5-braid"}, + "primary_metric": {"key": "nodes_solved", "value": 273.0}, + "verdict": "accepted", + "rationale": "Matched LP site traces show node reoptimization as the dominant sink.", + "analysis": { + "summary": "Node LP work is now the primary native sink.", + "body": "The differential traces isolate node reoptimization as the dominant native LP wallclock site on the matched braid rail, which justifies prioritizing node-local LP control work over further root-only tuning." } }), - )?; - assert_eq!(filtered_keys["result"]["isError"].as_bool(), Some(false)); - let filtered_key_rows = must_some( - tool_content(&filtered_keys).as_array(), - "filtered metric keys array", - )?; - assert_eq!(filtered_key_rows.len(), 1); - assert_eq!(filtered_key_rows[0]["key"].as_str(), Some("wall_clock_s")); - assert_eq!(filtered_key_rows[0]["experiment_count"].as_u64(), Some(1)); - - let ambiguous = harness.call_tool(76, "metric.best", json!({ "key": "wall_clock_s" }))?; - assert_eq!(ambiguous["result"]["isError"].as_bool(), Some(true)); - assert!( - fault_message(&ambiguous) - .is_some_and(|message| message.contains("ambiguous across sources")) - ); - - let run_metric_best = harness.call_tool( - 77, - "metric.best", - json!({ - "key": "wall_clock_s", - "source": "run_metric", - "dimensions": { - "scenario": "belt_4x5", - "duration_s": 60.0 - }, - "limit": 5 - }), - )?; - assert_eq!(run_metric_best["result"]["isError"].as_bool(), Some(false)); - let run_best_rows = must_some( - tool_content(&run_metric_best).as_array(), - "run metric best array", - )?; - assert_eq!(run_best_rows[0]["value"].as_f64(), Some(5.0)); - assert_eq!(run_best_rows.len(), 1); - assert_eq!( - run_best_rows[0]["experiment_title"].as_str(), - Some("second experiment") - ); - assert_eq!(run_best_rows[0]["verdict"].as_str(), Some("kept")); - assert_eq!( - run_best_rows[0]["dimensions"]["scenario"].as_str(), - Some("belt_4x5") - ); - assert_eq!( - run_best_rows[0]["dimensions"]["duration_s"].as_f64(), - Some(60.0) - ); - assert!( - must_some(tool_text(&run_metric_best), "run metric best text")?.contains("hypothesis=") - ); - assert!(must_some(tool_text(&run_metric_best), "run metric best text")?.contains("dims:")); - - let payload_requires_order = harness.call_tool( - 78, - "metric.best", - json!({ - "key": "wall_clock_s", - "source": "hypothesis_payload" - }), - )?; - assert_eq!( - payload_requires_order["result"]["isError"].as_bool(), - Some(true) - ); - assert!( - fault_message(&payload_requires_order) - .is_some_and(|message| message.contains("explicit order")) - ); - - let payload_best = harness.call_tool( - 79, - "metric.best", - json!({ - "key": "wall_clock_s", - "source": "hypothesis_payload", - "dimensions": { - "scenario": "belt_4x5", - "duration_s": 60.0 - }, - "order": "asc" - }), - )?; - assert_eq!(payload_best["result"]["isError"].as_bool(), Some(false)); - let payload_best_rows = must_some( - tool_content(&payload_best).as_array(), - "payload metric best array", - )?; - assert_eq!(payload_best_rows[0]["value"].as_f64(), Some(7.0)); - assert_eq!(payload_best_rows.len(), 1); - assert_eq!( - payload_best_rows[0]["experiment_title"].as_str(), - Some("second experiment") - ); + )?); - let filtered_best = harness.call_tool( - 83, + let best = harness.call_tool_full( + 48, "metric.best", json!({ - "key": "wall_clock_s", - "source": "run_metric", - "frontier_id": frontier_id, - "dimensions": { - "scenario": "belt_4x5" - }, - "limit": 5 + "frontier": "metric-frontier", + "hypothesis": "reopt-dominance", + "key": "nodes_solved", }), )?; - assert_eq!(filtered_best["result"]["isError"].as_bool(), Some(false)); - let filtered_rows = must_some( - tool_content(&filtered_best).as_array(), - "filtered metric best array", + assert_tool_ok(&best); + let entries = must_some( + tool_content(&best)["entries"].as_array(), + "metric best entries", )?; - assert_eq!(filtered_rows.len(), 2); assert_eq!( - filtered_rows[0]["experiment_title"].as_str(), - Some("second experiment") - ); - assert!( - filtered_rows - .iter() - .all(|row| row["frontier_id"].as_str() == Some(frontier_id.as_str())) + entries[0]["experiment"]["slug"].as_str(), + Some("trace-node-reopt") ); + assert_eq!(entries[0]["value"].as_f64(), Some(273.0)); - let global_best = harness.call_tool( - 84, - "metric.best", - json!({ - "key": "wall_clock_s", - "source": "run_metric", - "limit": 5 - }), - )?; - assert_eq!(global_best["result"]["isError"].as_bool(), Some(false)); - let global_rows = must_some( - tool_content(&global_best).as_array(), - "global metric best array", + let detail = harness.call_tool_full( + 49, + "experiment.read", + json!({"experiment": "trace-node-reopt"}), )?; + assert_tool_ok(&detail); + let content = tool_content(&detail); assert_eq!( - global_rows[0]["experiment_title"].as_str(), - Some("third experiment") - ); - assert_eq!( - global_rows[0]["frontier_id"].as_str(), - Some(second_frontier_id.as_str()) - ); - - let migrate = harness.call_tool(85, "metric.migrate", json!({}))?; - assert_eq!(migrate["result"]["isError"].as_bool(), Some(false)); - assert_eq!( - tool_content(&migrate)["inserted_metric_definitions"].as_u64(), - Some(0) - ); - assert_eq!( - tool_content(&migrate)["inserted_dimension_definitions"].as_u64(), - Some(0) + content["record"]["outcome"]["verdict"].as_str(), + Some("accepted") ); assert_eq!( - tool_content(&migrate)["inserted_dimension_values"].as_u64(), - Some(0) + content["record"]["outcome"]["analysis"]["summary"].as_str(), + Some("Node LP work is now the primary native sink.") ); Ok(()) } -- cgit v1.2.3