Reuse consult context per cwd by default

author: main <main@swarm.moe> 2026-03-24 13:17:59 -0400
committer: main <main@swarm.moe> 2026-03-24 13:17:59 -0400
commit: 53797d1f9bbaf73778cbb9dd6ad2f857ba1a88e2 (patch)
tree: 69b17b86e72b5f292bde42adf839a8ed8cf8005c /crates/phone-opus/tests/mcp_hardening.rs
parent: 690b4851ea0afd8b214ddaa5450eec3a8c3a7ec9 (diff)
download: phone_opus-53797d1f9bbaf73778cbb9dd6ad2f857ba1a88e2.zip
1 files changed, 171 insertions, 90 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs
index 06861f8..f6e0e73 100644
--- a/crates/phone-opus/tests/mcp_hardening.rs
+++ b/crates/phone-opus/tests/mcp_hardening.rs
@@ -280,6 +280,40 @@ fn seed_caller_claude_home(home: &Path) -> TestResult {
     Ok(())
 }
 
+fn write_fake_claude_stdout(path: &Path, result: &str, session_id: &str, uuid: &str) -> TestResult {
+    must(
+        fs::write(
+            path,
+            serde_json::to_string(&json!({
+                "type": "result",
+                "subtype": "success",
+                "is_error": false,
+                "duration_ms": 1234,
+                "duration_api_ms": 1200,
+                "num_turns": 2,
+                "result": result,
+                "stop_reason": "end_turn",
+                "session_id": session_id,
+                "total_cost_usd": 0.125,
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 5
+                },
+                "modelUsage": {
+                    "claude-opus-4-6": {
+                        "inputTokens": 10,
+                        "outputTokens": 5
+                    }
+                },
+                "permission_denials": [],
+                "fast_mode_state": "off",
+                "uuid": uuid
+            }))?,
+        ),
+        "write fake stdout",
+    )
+}
+
 #[test]
 fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
     let root = temp_root("cold_start")?;
@@ -314,6 +348,14 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
         consult_tool["inputSchema"]["properties"]["background"].is_null(),
         "consult schema should not advertise background: {consult_tool:#}"
     );
+    assert!(
+        consult_tool["inputSchema"]["properties"]["session_id"].is_null(),
+        "consult schema should not advertise session_id: {consult_tool:#}"
+    );
+    assert_eq!(
+        consult_tool["inputSchema"]["properties"]["fresh_context"]["type"].as_str(),
+        Some("boolean")
+    );
 
     let health = harness.call_tool(3, "health_snapshot", json!({}))?;
     assert_tool_ok(&health);
@@ -322,14 +364,18 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
 }
 
 #[test]
-fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_working_directory()
--> TestResult {
+fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> TestResult {
     let root = temp_root("consult_success")?;
     let state_home = root.join("state-home");
     let sandbox = root.join("sandbox");
+    let sibling_sandbox = root.join("sibling-sandbox");
     let caller_home = root.join("caller-home");
     must(fs::create_dir_all(&state_home), "create state home")?;
     must(fs::create_dir_all(&sandbox), "create sandbox")?;
+    must(
+        fs::create_dir_all(&sibling_sandbox),
+        "create sibling sandbox",
+    )?;
     must(fs::create_dir_all(&caller_home), "create caller home")?;
     seed_caller_claude_home(&caller_home)?;
 
@@ -342,39 +388,11 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
     let cwd_probe_error_file = root.join("cwd-write-probe.err");
     let credential_probe_file = root.join("credential-write-probe.txt");
     let credential_probe_error_file = root.join("credential-write-probe.err");
-    let resumed_session = "81f218eb-568b-409b-871b-f6e86d8f666f";
+    let remembered_session = "81f218eb-568b-409b-871b-f6e86d8f666f";
+    let fresh_session = "dbd3b6c2-4757-4b45-a8f0-f3d877e1a13f";
+    let sibling_session = "d9a9a472-a091-4268-a7dd-9f31cf61f87e";
     write_fake_claude_script(&fake_claude)?;
-    must(
-        fs::write(
-            &stdout_file,
-            serde_json::to_string(&json!({
-                "type": "result",
-                "subtype": "success",
-                "is_error": false,
-                "duration_ms": 1234,
-                "duration_api_ms": 1200,
-                "num_turns": 2,
-                "result": "oracle",
-                "stop_reason": "end_turn",
-                "session_id": resumed_session,
-                "total_cost_usd": 0.125,
-                "usage": {
-                    "input_tokens": 10,
-                    "output_tokens": 5
-                },
-                "modelUsage": {
-                    "claude-opus-4-6": {
-                        "inputTokens": 10,
-                        "outputTokens": 5
-                    }
-                },
-                "permission_denials": [],
-                "fast_mode_state": "off",
-                "uuid": "uuid-123"
-            }))?,
-        ),
-        "write fake stdout",
-    )?;
+    write_fake_claude_stdout(&stdout_file, "oracle", remembered_session, "uuid-123")?;
 
     let claude_bin = fake_claude.display().to_string();
     let stdout_path = stdout_file.display().to_string();
@@ -420,43 +438,135 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
         json!({
             "prompt": "say oracle",
             "cwd": sandbox.display().to_string(),
-            "session_id": resumed_session,
+            "session_id": "not-a-uuid",
             "background": true
         }),
     )?;
     assert_tool_ok(&consult);
     assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle"));
-    assert!(tool_content(&consult)["mode"].is_null());
-    assert!(tool_content(&consult)["job_id"].is_null());
     assert_eq!(
-        tool_content(&consult)["session_mode"].as_str(),
-        Some("resumed")
+        tool_content(&consult)["context_mode"].as_str(),
+        Some("fresh")
     );
+    assert!(tool_content(&consult)["reused_session_id"].is_null());
     assert_eq!(
-        tool_content(&consult)["requested_session_id"].as_str(),
-        Some(resumed_session)
+        tool_content(&consult)["session_id"].as_str(),
+        Some(remembered_session)
     );
+    let first_args = must(fs::read_to_string(&args_file), "read first fake args file")?;
+    assert!(!first_args.contains("--resume"));
+    assert!(!first_args.contains("not-a-uuid"));
+
+    write_fake_claude_stdout(
+        &stdout_file,
+        "oracle reused",
+        remembered_session,
+        "uuid-124",
+    )?;
+    let reused = harness.call_tool(
+        4,
+        "consult",
+        json!({
+            "prompt": "say oracle reused",
+            "cwd": sandbox.display().to_string()
+        }),
+    )?;
+    assert_tool_ok(&reused);
     assert_eq!(
-        tool_content(&consult)["prompt_prefix_injected"].as_bool(),
-        Some(true)
+        tool_content(&reused)["response"].as_str(),
+        Some("oracle reused")
     );
     assert_eq!(
-        tool_content(&consult)["cwd"].as_str(),
-        Some(sandbox.display().to_string().as_str())
+        tool_content(&reused)["context_mode"].as_str(),
+        Some("reused")
     );
-    assert_eq!(tool_content(&consult)["num_turns"].as_u64(), Some(2));
     assert_eq!(
-        tool_content(&consult)["session_id"].as_str(),
-        Some(resumed_session)
+        tool_content(&reused)["reused_session_id"].as_str(),
+        Some(remembered_session)
+    );
+    let reused_args = must(fs::read_to_string(&args_file), "read reused fake args file")?;
+    assert!(reused_args.contains("--resume"));
+    assert!(reused_args.contains(remembered_session));
+
+    write_fake_claude_stdout(&stdout_file, "oracle fresh", fresh_session, "uuid-125")?;
+    let fresh = harness.call_tool(
+        5,
+        "consult",
+        json!({
+            "prompt": "say oracle fresh",
+            "cwd": sandbox.display().to_string(),
+            "fresh_context": true
+        }),
+    )?;
+    assert_tool_ok(&fresh);
+    assert_eq!(
+        tool_content(&fresh)["response"].as_str(),
+        Some("oracle fresh")
     );
+    assert_eq!(tool_content(&fresh)["context_mode"].as_str(), Some("fresh"));
+    assert!(tool_content(&fresh)["reused_session_id"].is_null());
+    let fresh_args = must(fs::read_to_string(&args_file), "read fresh fake args file")?;
+    assert!(!fresh_args.contains("--resume"));
+
+    write_fake_claude_stdout(
+        &stdout_file,
+        "oracle after fresh",
+        fresh_session,
+        "uuid-126",
+    )?;
+    let after_fresh = harness.call_tool(
+        6,
+        "consult",
+        json!({
+            "prompt": "say oracle after fresh",
+            "cwd": sandbox.display().to_string()
+        }),
+    )?;
+    assert_tool_ok(&after_fresh);
+    assert_eq!(
+        tool_content(&after_fresh)["context_mode"].as_str(),
+        Some("reused")
+    );
+    assert_eq!(
+        tool_content(&after_fresh)["reused_session_id"].as_str(),
+        Some(fresh_session)
+    );
+    let after_fresh_args = must(
+        fs::read_to_string(&args_file),
+        "read after-fresh fake args file",
+    )?;
+    assert!(after_fresh_args.contains("--resume"));
+    assert!(after_fresh_args.contains(fresh_session));
+
+    write_fake_claude_stdout(&stdout_file, "oracle sibling", sibling_session, "uuid-127")?;
+    let sibling = harness.call_tool(
+        7,
+        "consult",
+        json!({
+            "prompt": "say oracle sibling",
+            "cwd": sibling_sandbox.display().to_string()
+        }),
+    )?;
+    assert_tool_ok(&sibling);
+    assert_eq!(
+        tool_content(&sibling)["context_mode"].as_str(),
+        Some("fresh")
+    );
+    assert!(tool_content(&sibling)["reused_session_id"].is_null());
+    let sibling_args = must(
+        fs::read_to_string(&args_file),
+        "read sibling fake args file",
+    )?;
+    assert!(!sibling_args.contains("--resume"));
+
     let persisted_output_path = must_some(
-        tool_content(&consult)["persisted_output_path"]
+        tool_content(&after_fresh)["persisted_output_path"]
             .as_str()
             .map(str::to_owned),
         "persisted output path",
     )?;
     assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/"));
-    assert!(persisted_output_path.contains(resumed_session));
+    assert!(persisted_output_path.contains(fresh_session));
     let persisted_output = must(
         fs::read_to_string(&persisted_output_path),
         "read persisted consult output",
@@ -465,14 +575,18 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
         serde_json::from_str(&persisted_output),
         "parse persisted consult output",
     )?;
-    assert_eq!(persisted_output["response"].as_str(), Some("oracle"));
     assert_eq!(
-        persisted_output["requested_session_id"].as_str(),
-        Some(resumed_session)
+        persisted_output["response"].as_str(),
+        Some("oracle after fresh")
+    );
+    assert_eq!(persisted_output["context_mode"].as_str(), Some("reused"));
+    assert_eq!(
+        persisted_output["reused_session_id"].as_str(),
+        Some(fresh_session)
     );
 
     let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?;
-    assert_eq!(pwd.trim(), sandbox.display().to_string());
+    assert_eq!(pwd.trim(), sibling_sandbox.display().to_string());
 
     let args = must(fs::read_to_string(&args_file), "read fake args file")?;
     let lines = args.lines().collect::<Vec<_>>();
@@ -493,13 +607,11 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
     assert!(lines.contains(&"--dangerously-skip-permissions"));
     assert!(!lines.contains(&"--permission-mode"));
     assert!(!lines.contains(&"dontAsk"));
-    assert!(lines.contains(&"--resume"));
-    assert!(lines.contains(&resumed_session));
+    assert!(!lines.contains(&"--resume"));
     assert!(!lines.contains(&"--max-turns"));
     assert!(args.contains(PROMPT_PREFIX));
-    assert!(args.contains("The real prompt follows."));
     let prefix_index = must_some(args.find(PROMPT_PREFIX), "prefixed consult prompt")?;
-    let user_prompt_index = must_some(args.find("say oracle"), "user prompt inside args")?;
+    let user_prompt_index = must_some(args.find("say oracle sibling"), "user prompt inside args")?;
     assert!(prefix_index < user_prompt_index);
 
     let env_dump = must(fs::read_to_string(&env_file), "read fake env file")?;
@@ -572,7 +684,7 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
     )?;
     assert_eq!(credential_probe.trim(), "write_succeeded");
 
-    let telemetry = harness.call_tool(4, "telemetry_snapshot", json!({}))?;
+    let telemetry = harness.call_tool(8, "telemetry_snapshot", json!({}))?;
     assert_tool_ok(&telemetry);
     let hot_methods = tool_content(&telemetry)["hot_methods"]
         .as_array()
@@ -644,37 +756,6 @@ fn background_surfaces_are_hidden_from_public_mcp() -> TestResult {
 }
 
 #[test]
-fn consult_rejects_invalid_session_handles() -> TestResult {
-    let root = temp_root("consult_invalid_session")?;
-    let state_home = root.join("state-home");
-    must(fs::create_dir_all(&state_home), "create state home")?;
-
-    let mut harness = McpHarness::spawn(&state_home, &[])?;
-    let _ = harness.initialize()?;
-    harness.notify_initialized()?;
-
-    let consult = harness.call_tool(
-        3,
-        "consult",
-        json!({
-            "prompt": "fail",
-            "session_id": "not-a-uuid"
-        }),
-    )?;
-    assert_tool_error(&consult);
-    assert_eq!(
-        tool_content(&consult)["fault"]["class"].as_str(),
-        Some("protocol")
-    );
-    assert!(
-        tool_content(&consult)["fault"]["detail"]
-            .as_str()
-            .is_some_and(|value| value.contains("session_id must be a valid UUID"))
-    );
-    Ok(())
-}
-
-#[test]
 fn consult_surfaces_downstream_cli_failures() -> TestResult {
     let root = temp_root("consult_failure")?;
     let state_home = root.join("state-home");
author	main <main@swarm.moe>	2026-03-24 13:17:59 -0400
committer	main <main@swarm.moe>	2026-03-24 13:17:59 -0400
commit	53797d1f9bbaf73778cbb9dd6ad2f857ba1a88e2 (patch)
tree	69b17b86e72b5f292bde42adf839a8ed8cf8005c /crates/phone-opus/tests/mcp_hardening.rs
parent	690b4851ea0afd8b214ddaa5450eec3a8c3a7ec9 (diff)
download	phone_opus-53797d1f9bbaf73778cbb9dd6ad2f857ba1a88e2.zip