Disable public resume behavior

author: main <main@swarm.moe> 2026-03-25 00:43:57 -0400
committer: main <main@swarm.moe> 2026-03-25 00:43:57 -0400
commit: 2c219204d627634442d46c38d1b5df806f77f4c1 (patch)
tree: 02b6c0108a698a422aef88ed8bb7e602212bd36d /crates/phone-opus/tests
parent: 481aaa4ee150671d86655d566f52aa1bd7254c16 (diff)
download: phone_opus-2c219204d627634442d46c38d1b5df806f77f4c1.zip
1 files changed, 118 insertions, 194 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs
index 0b32442..b35e687 100644
--- a/crates/phone-opus/tests/mcp_hardening.rs
+++ b/crates/phone-opus/tests/mcp_hardening.rs
@@ -378,8 +378,8 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
         "consult schema should not advertise session_id: {consult_tool:#}"
     );
     assert_eq!(
-        consult_tool["inputSchema"]["properties"]["fresh_context"]["type"].as_str(),
-        Some("boolean")
+        consult_tool["inputSchema"]["properties"]["fresh_context"],
+        Value::Null
     );
 
     let health = harness.call_tool(3, "health_snapshot", json!({}))?;
@@ -389,18 +389,13 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
 }
 
 #[test]
-fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> TestResult {
+fn consult_is_one_shot_and_hides_session_state() -> TestResult {
     let root = temp_root("consult_success")?;
     let state_home = root.join("state-home");
     let sandbox = root.join("sandbox");
-    let sibling_sandbox = root.join("sibling-sandbox");
     let caller_home = root.join("caller-home");
     must(fs::create_dir_all(&state_home), "create state home")?;
     must(fs::create_dir_all(&sandbox), "create sandbox")?;
-    must(
-        fs::create_dir_all(&sibling_sandbox),
-        "create sibling sandbox",
-    )?;
     must(fs::create_dir_all(&caller_home), "create caller home")?;
     seed_caller_claude_home(&caller_home)?;
 
@@ -413,11 +408,10 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
     let cwd_probe_error_file = root.join("cwd-write-probe.err");
     let credential_probe_file = root.join("credential-write-probe.txt");
     let credential_probe_error_file = root.join("credential-write-probe.err");
-    let remembered_session = "81f218eb-568b-409b-871b-f6e86d8f666f";
-    let fresh_session = "dbd3b6c2-4757-4b45-a8f0-f3d877e1a13f";
-    let sibling_session = "d9a9a472-a091-4268-a7dd-9f31cf61f87e";
+    let first_observed_session = "81f218eb-568b-409b-871b-f6e86d8f666f";
+    let second_observed_session = "dbd3b6c2-4757-4b45-a8f0-f3d877e1a13f";
     write_fake_claude_script(&fake_claude)?;
-    write_fake_claude_stream_success(&stdout_file, "oracle", remembered_session, "uuid-123")?;
+    write_fake_claude_stream_success(&stdout_file, "oracle", first_observed_session, "uuid-123")?;
 
     let claude_bin = fake_claude.display().to_string();
     let stdout_path = stdout_file.display().to_string();
@@ -469,144 +463,73 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
     )?;
     assert_tool_ok(&consult);
     assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle"));
-    assert_eq!(
-        tool_content(&consult)["context_mode"].as_str(),
-        Some("fresh")
-    );
-    assert!(
-        tool_content(&consult)["planned_session_id"]
-            .as_str()
-            .is_some_and(|value| !value.is_empty())
-    );
+    assert!(tool_content(&consult)["context_mode"].is_null());
+    assert!(tool_content(&consult)["planned_session_id"].is_null());
     assert!(tool_content(&consult)["reused_session_id"].is_null());
-    assert_eq!(
-        tool_content(&consult)["observed_session_id"].as_str(),
-        Some(remembered_session)
-    );
-    assert_eq!(
-        tool_content(&consult)["session_id"].as_str(),
-        Some(remembered_session)
-    );
+    assert!(tool_content(&consult)["observed_session_id"].is_null());
+    assert!(tool_content(&consult)["session_id"].is_null());
     let first_args = must(fs::read_to_string(&args_file), "read first fake args file")?;
-    assert!(first_args.contains("--session-id"));
-    assert!(
-        tool_content(&consult)["planned_session_id"]
-            .as_str()
-            .is_some_and(|value| first_args.contains(value))
-    );
+    let first_lines = first_args.lines().collect::<Vec<_>>();
+    assert!(first_lines.contains(&"--session-id"));
     assert!(!first_args.contains("--resume"));
     assert!(!first_args.contains("not-a-uuid"));
+    let first_session_id = must_some(
+        first_lines
+            .windows(2)
+            .find_map(|window| (window[0] == "--session-id").then_some(window[1].to_owned())),
+        "first one-shot session id",
+    )?;
+    assert!(uuid::Uuid::parse_str(&first_session_id).is_ok());
 
     write_fake_claude_stream_success(
         &stdout_file,
-        "oracle reused",
-        remembered_session,
+        "oracle again",
+        second_observed_session,
         "uuid-124",
     )?;
-    let reused = harness.call_tool(
+    let repeated = harness.call_tool(
         4,
         "consult",
         json!({
-            "prompt": "say oracle reused",
+            "prompt": "say oracle again",
             "cwd": sandbox.display().to_string()
         }),
     )?;
-    assert_tool_ok(&reused);
+    assert_tool_ok(&repeated);
     assert_eq!(
-        tool_content(&reused)["response"].as_str(),
-        Some("oracle reused")
+        tool_content(&repeated)["response"].as_str(),
+        Some("oracle again")
     );
-    assert_eq!(
-        tool_content(&reused)["context_mode"].as_str(),
-        Some("reused")
-    );
-    assert_eq!(
-        tool_content(&reused)["reused_session_id"].as_str(),
-        Some(remembered_session)
-    );
-    let reused_args = must(fs::read_to_string(&args_file), "read reused fake args file")?;
-    assert!(reused_args.contains("--resume"));
-    assert!(reused_args.contains(remembered_session));
-
-    write_fake_claude_stream_success(&stdout_file, "oracle fresh", fresh_session, "uuid-125")?;
-    let fresh = harness.call_tool(
-        5,
-        "consult",
-        json!({
-            "prompt": "say oracle fresh",
-            "cwd": sandbox.display().to_string(),
-            "fresh_context": true
-        }),
-    )?;
-    assert_tool_ok(&fresh);
-    assert_eq!(
-        tool_content(&fresh)["response"].as_str(),
-        Some("oracle fresh")
-    );
-    assert_eq!(tool_content(&fresh)["context_mode"].as_str(), Some("fresh"));
-    assert!(tool_content(&fresh)["reused_session_id"].is_null());
-    let fresh_args = must(fs::read_to_string(&args_file), "read fresh fake args file")?;
-    assert!(!fresh_args.contains("--resume"));
-
-    write_fake_claude_stream_success(
-        &stdout_file,
-        "oracle after fresh",
-        fresh_session,
-        "uuid-126",
-    )?;
-    let after_fresh = harness.call_tool(
-        6,
-        "consult",
-        json!({
-            "prompt": "say oracle after fresh",
-            "cwd": sandbox.display().to_string()
-        }),
-    )?;
-    assert_tool_ok(&after_fresh);
-    assert_eq!(
-        tool_content(&after_fresh)["context_mode"].as_str(),
-        Some("reused")
-    );
-    assert_eq!(
-        tool_content(&after_fresh)["reused_session_id"].as_str(),
-        Some(fresh_session)
-    );
-    let after_fresh_args = must(
+    assert!(tool_content(&repeated)["context_mode"].is_null());
+    assert!(tool_content(&repeated)["planned_session_id"].is_null());
+    assert!(tool_content(&repeated)["reused_session_id"].is_null());
+    assert!(tool_content(&repeated)["observed_session_id"].is_null());
+    assert!(tool_content(&repeated)["session_id"].is_null());
+    let repeated_args = must(
         fs::read_to_string(&args_file),
-        "read after-fresh fake args file",
+        "read repeated fake args file",
     )?;
-    assert!(after_fresh_args.contains("--resume"));
-    assert!(after_fresh_args.contains(fresh_session));
-
-    write_fake_claude_stream_success(&stdout_file, "oracle sibling", sibling_session, "uuid-127")?;
-    let sibling = harness.call_tool(
-        7,
-        "consult",
-        json!({
-            "prompt": "say oracle sibling",
-            "cwd": sibling_sandbox.display().to_string()
-        }),
+    let repeated_lines = repeated_args.lines().collect::<Vec<_>>();
+    assert!(repeated_lines.contains(&"--session-id"));
+    assert!(!repeated_args.contains("--resume"));
+    let repeated_session_id = must_some(
+        repeated_lines
+            .windows(2)
+            .find_map(|window| (window[0] == "--session-id").then_some(window[1].to_owned())),
+        "repeated one-shot session id",
     )?;
-    assert_tool_ok(&sibling);
-    assert_eq!(
-        tool_content(&sibling)["context_mode"].as_str(),
-        Some("fresh")
-    );
-    assert!(tool_content(&sibling)["reused_session_id"].is_null());
-    let sibling_args = must(
-        fs::read_to_string(&args_file),
-        "read sibling fake args file",
-    )?;
-    assert!(!sibling_args.contains("--resume"));
+    assert!(uuid::Uuid::parse_str(&repeated_session_id).is_ok());
+    assert_ne!(repeated_session_id, first_session_id);
 
     let persisted_output_path = must_some(
-        tool_content(&after_fresh)["persisted_output_path"]
+        tool_content(&repeated)["persisted_output_path"]
             .as_str()
             .map(str::to_owned),
         "persisted output path",
     )?;
     assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/"));
-    assert!(persisted_output_path.contains(fresh_session));
+    assert!(!persisted_output_path.contains(first_observed_session));
+    assert!(!persisted_output_path.contains(second_observed_session));
     let persisted_output = must(
         fs::read_to_string(&persisted_output_path),
         "read persisted consult output",
@@ -615,18 +538,37 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
         serde_json::from_str(&persisted_output),
         "parse persisted consult output",
     )?;
+    assert_eq!(persisted_output["response"].as_str(), Some("oracle again"));
+    assert!(persisted_output["context_mode"].is_null());
+    assert!(persisted_output["planned_session_id"].is_null());
+    assert!(persisted_output["reused_session_id"].is_null());
+    assert!(persisted_output["session_id"].is_null());
+    assert!(persisted_output["observed_session_id"].is_null());
+
+    let consult_context_index = must(
+        fs::read_to_string(
+            state_home
+                .join("phone_opus")
+                .join("mcp")
+                .join("consult_contexts.json"),
+        ),
+        "read consult context index",
+    )?;
+    let consult_context_index: Value = must(
+        serde_json::from_str(&consult_context_index),
+        "parse consult context index",
+    )?;
     assert_eq!(
-        persisted_output["response"].as_str(),
-        Some("oracle after fresh")
+        consult_context_index["by_cwd"][sandbox.display().to_string()]["session_id"].as_str(),
+        Some(second_observed_session)
     );
-    assert_eq!(persisted_output["context_mode"].as_str(), Some("reused"));
     assert_eq!(
-        persisted_output["reused_session_id"].as_str(),
-        Some(fresh_session)
+        consult_context_index["by_cwd"][sandbox.display().to_string()]["state"].as_str(),
+        Some("confirmed")
     );
 
     let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?;
-    assert_eq!(pwd.trim(), sibling_sandbox.display().to_string());
+    assert_eq!(pwd.trim(), sandbox.display().to_string());
 
     let args = must(fs::read_to_string(&args_file), "read fake args file")?;
     let lines = args.lines().collect::<Vec<_>>();
@@ -653,7 +595,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
     assert!(!lines.contains(&"--max-turns"));
     assert!(args.contains(PROMPT_PREFIX));
     let prefix_index = must_some(args.find(PROMPT_PREFIX), "prefixed consult prompt")?;
-    let user_prompt_index = must_some(args.find("say oracle sibling"), "user prompt inside args")?;
+    let user_prompt_index = must_some(args.find("say oracle again"), "user prompt inside args")?;
     assert!(prefix_index < user_prompt_index);
 
     let env_dump = must(fs::read_to_string(&env_file), "read fake env file")?;
@@ -726,7 +668,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
     )?;
     assert_eq!(credential_probe.trim(), "write_succeeded");
 
-    let telemetry = harness.call_tool(8, "telemetry_snapshot", json!({}))?;
+    let telemetry = harness.call_tool(5, "telemetry_snapshot", json!({}))?;
     assert_tool_ok(&telemetry);
     let hot_methods = tool_content(&telemetry)["hot_methods"]
         .as_array()
@@ -831,10 +773,10 @@ fn consult_surfaces_downstream_cli_failures() -> TestResult {
             .is_some_and(|value| value.contains("permission denied by fake claude"))
     );
     assert_eq!(
-        tool_content(&consult)["context"]["consult"]["context_mode"].as_str(),
-        Some("fresh")
+        tool_content(&consult)["context"]["consult"]["cwd"].as_str(),
+        Some(std::env::current_dir()?.display().to_string().as_str())
     );
-    assert!(tool_content(&consult)["context"]["consult"]["reused_session_id"].is_null());
+    assert!(tool_content(&consult)["context"]["consult"]["planned_session_id"].is_null());
     Ok(())
 }
 
@@ -876,20 +818,16 @@ fn silent_claude_processes_fail_fast_instead_of_wedging() -> TestResult {
             .is_some_and(|value| value.contains("produced no stream output within 100 ms"))
     );
     assert!(elapsed < std::time::Duration::from_secs(3));
-    assert_eq!(
-        tool_content(&consult)["context"]["consult"]["context_mode"].as_str(),
-        Some("fresh")
-    );
     assert!(
-        tool_content(&consult)["context"]["consult"]["planned_session_id"]
+        tool_content(&consult)["context"]["consult"]["retry_hint"]
             .as_str()
-            .is_some_and(|value| !value.is_empty())
+            .is_some_and(|value| value.contains("fresh one-shot call"))
     );
     Ok(())
 }
 
 #[test]
-fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult {
+fn quota_failures_hide_session_state_on_public_surface() -> TestResult {
     let root = temp_root("consult_quota_failure")?;
     let state_home = root.join("state-home");
     let sandbox = root.join("sandbox");
@@ -926,10 +864,7 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult {
         }),
     )?;
     assert_tool_ok(&first);
-    assert_eq!(
-        tool_content(&first)["session_id"].as_str(),
-        Some(remembered_session)
-    );
+    assert!(tool_content(&first)["session_id"].is_null());
 
     let quota_env = [
         ("HOME", caller_home_path.as_str()),
@@ -962,26 +897,11 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult {
         tool_content(&failed)["context"]["consult"]["cwd"].as_str(),
         Some(sandbox.display().to_string().as_str())
     );
-    assert_eq!(
-        tool_content(&failed)["context"]["consult"]["context_mode"].as_str(),
-        Some("reused")
-    );
-    assert_eq!(
-        tool_content(&failed)["context"]["consult"]["planned_session_id"].as_str(),
-        Some(remembered_session)
-    );
-    assert_eq!(
-        tool_content(&failed)["context"]["consult"]["reused_session_id"].as_str(),
-        Some(remembered_session)
-    );
-    assert_eq!(
-        tool_content(&failed)["context"]["consult"]["observed_session_id"].as_str(),
-        Some(remembered_session)
-    );
-    assert_eq!(
-        tool_content(&failed)["context"]["consult"]["resume_session_id"].as_str(),
-        Some(remembered_session)
-    );
+    assert!(tool_content(&failed)["context"]["consult"]["context_mode"].is_null());
+    assert!(tool_content(&failed)["context"]["consult"]["planned_session_id"].is_null());
+    assert!(tool_content(&failed)["context"]["consult"]["reused_session_id"].is_null());
+    assert!(tool_content(&failed)["context"]["consult"]["observed_session_id"].is_null());
+    assert!(tool_content(&failed)["context"]["consult"]["resume_session_id"].is_null());
     assert_eq!(
         tool_content(&failed)["context"]["consult"]["quota_limited"].as_bool(),
         Some(true)
@@ -993,7 +913,15 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult {
     assert!(
         tool_content(&failed)["context"]["consult"]["retry_hint"]
             .as_str()
-            .is_some_and(|value| value.contains(remembered_session))
+            .is_some_and(|value| value.contains("retry the consult"))
+    );
+    assert!(
+        failed["result"]["content"]
+            .as_array()
+            .into_iter()
+            .flatten()
+            .filter_map(|entry| entry["text"].as_str())
+            .any(|text| text.contains("quota_reset: 4pm (America/New_York)"))
     );
     assert!(
         failed["result"]["content"]
@@ -1001,16 +929,13 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult {
             .into_iter()
             .flatten()
             .filter_map(|entry| entry["text"].as_str())
-            .any(|text| {
-                text.contains("resume_session: 84b9d462-5af9-4a4e-8e44-379a8d0c46d7")
-                    && text.contains("quota_reset: 4pm (America/New_York)")
-            })
+            .all(|text| !text.contains("session"))
     );
     Ok(())
 }
 
 #[test]
-fn fresh_failures_capture_streamed_session_ids_eagerly() -> TestResult {
+fn fresh_failures_keep_internal_session_state_without_public_leakage() -> TestResult {
     let root = temp_root("consult_fresh_stream_failure")?;
     let state_home = root.join("state-home");
     let sandbox = root.join("sandbox");
@@ -1054,42 +979,41 @@ fn fresh_failures_capture_streamed_session_ids_eagerly() -> TestResult {
         }),
     )?;
     assert_tool_error(&failed);
-    assert_eq!(
-        tool_content(&failed)["context"]["consult"]["context_mode"].as_str(),
-        Some("fresh")
-    );
-    assert_eq!(
-        tool_content(&failed)["context"]["consult"]["observed_session_id"].as_str(),
-        Some(init_session)
-    );
-    assert_eq!(
-        tool_content(&failed)["context"]["consult"]["resume_session_id"].as_str(),
-        Some(init_session)
-    );
+    assert!(tool_content(&failed)["context"]["consult"]["context_mode"].is_null());
+    assert!(tool_content(&failed)["context"]["consult"]["observed_session_id"].is_null());
+    assert!(tool_content(&failed)["context"]["consult"]["resume_session_id"].is_null());
     assert_eq!(
         tool_content(&failed)["context"]["consult"]["quota_reset_hint"].as_str(),
         Some("9pm (America/New_York)")
     );
-    let planned_session = must_some(
-        tool_content(&failed)["context"]["consult"]["planned_session_id"]
-            .as_str()
-            .map(str::to_owned),
-        "planned session id on failure",
-    )?;
+    assert!(tool_content(&failed)["context"]["consult"]["planned_session_id"].is_null());
     let args = must(fs::read_to_string(&args_file), "read fresh failure args")?;
     assert!(args.contains("--session-id"));
-    assert!(args.contains(&planned_session));
     assert!(!args.contains("--resume"));
+    let consult_context_index = must(
+        fs::read_to_string(
+            state_home
+                .join("phone_opus")
+                .join("mcp")
+                .join("consult_contexts.json"),
+        ),
+        "read consult context index after failure",
+    )?;
+    let consult_context_index: Value = must(
+        serde_json::from_str(&consult_context_index),
+        "parse consult context index after failure",
+    )?;
+    assert_eq!(
+        consult_context_index["by_cwd"][sandbox.display().to_string()]["session_id"].as_str(),
+        Some(init_session)
+    );
     assert!(
         failed["result"]["content"]
             .as_array()
             .into_iter()
             .flatten()
             .filter_map(|entry| entry["text"].as_str())
-            .any(|text| {
-                text.contains("observed_session: 550e8400-e29b-41d4-a716-446655440000")
-                    && text.contains("resume_session: 550e8400-e29b-41d4-a716-446655440000")
-            })
+            .all(|text| !text.contains("session"))
     );
     Ok(())
 }
author	main <main@swarm.moe>	2026-03-25 00:43:57 -0400
committer	main <main@swarm.moe>	2026-03-25 00:43:57 -0400
commit	2c219204d627634442d46c38d1b5df806f77f4c1 (patch)
tree	02b6c0108a698a422aef88ed8bb7e602212bd36d /crates/phone-opus/tests
parent	481aaa4ee150671d86655d566f52aa1bd7254c16 (diff)
download	phone_opus-2c219204d627634442d46c38d1b5df806f77f4c1.zip