Excise hidden consult machinery

author: main <main@swarm.moe> 2026-03-31 13:21:05 -0400
committer: main <main@swarm.moe> 2026-03-31 13:21:05 -0400
commit: 2fc866a3ce50b6ba9c5e84e0ad2f8c77517361ff (patch)
tree: dab1bc9d653794944369ec51c2ed250aaf680f44 /crates/phone-opus/tests
parent: 2160224b7ef21e3319a93d057165712aabe8cbe2 (diff)
download: phone_opus-2fc866a3ce50b6ba9c5e84e0ad2f8c77517361ff.zip
1 files changed, 17 insertions, 274 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs
index 754ee79..c826092 100644
--- a/crates/phone-opus/tests/mcp_hardening.rs
+++ b/crates/phone-opus/tests/mcp_hardening.rs
@@ -423,7 +423,7 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
 }
 
 #[test]
-fn consult_is_one_shot_and_hides_session_state() -> TestResult {
+fn consult_runs_blocking_in_sandbox() -> TestResult {
     let root = temp_root("consult_success")?;
     let state_home = root.join("state-home");
     let sandbox = root.join("sandbox");
@@ -442,10 +442,9 @@ fn consult_is_one_shot_and_hides_session_state() -> TestResult {
     let cwd_probe_error_file = root.join("cwd-write-probe.err");
     let credential_probe_file = root.join("credential-write-probe.txt");
     let credential_probe_error_file = root.join("credential-write-probe.err");
-    let first_observed_session = "81f218eb-568b-409b-871b-f6e86d8f666f";
-    let second_observed_session = "dbd3b6c2-4757-4b45-a8f0-f3d877e1a13f";
+    let observed_session = "81f218eb-568b-409b-871b-f6e86d8f666f";
     write_fake_claude_script(&fake_claude)?;
-    write_fake_claude_json_success(&stdout_file, "oracle", first_observed_session, "uuid-123")?;
+    write_fake_claude_json_success(&stdout_file, "oracle", observed_session, "uuid-123")?;
 
     let claude_bin = fake_claude.display().to_string();
     let stdout_path = stdout_file.display().to_string();
@@ -490,80 +489,31 @@ fn consult_is_one_shot_and_hides_session_state() -> TestResult {
         "consult",
         json!({
             "prompt": "say oracle",
-            "cwd": sandbox.display().to_string(),
-            "session_id": "not-a-uuid",
-            "background": true
+            "cwd": sandbox.display().to_string()
         }),
     )?;
     assert_tool_ok(&consult);
     assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle"));
-    assert!(tool_content(&consult)["context_mode"].is_null());
-    assert!(tool_content(&consult)["planned_session_id"].is_null());
-    assert!(tool_content(&consult)["reused_session_id"].is_null());
-    assert!(tool_content(&consult)["observed_session_id"].is_null());
-    assert!(tool_content(&consult)["session_id"].is_null());
-    let first_args = must(fs::read_to_string(&args_file), "read first fake args file")?;
-    let first_lines = first_args.lines().collect::<Vec<_>>();
-    assert!(first_lines.contains(&"--session-id"));
-    assert!(!first_args.contains("--resume"));
-    assert!(!first_args.contains("not-a-uuid"));
-    let first_session_id = must_some(
-        first_lines
-            .windows(2)
-            .find_map(|window| (window[0] == "--session-id").then_some(window[1].to_owned())),
-        "first one-shot session id",
-    )?;
-    assert!(uuid::Uuid::parse_str(&first_session_id).is_ok());
-
-    write_fake_claude_json_success(
-        &stdout_file,
-        "oracle again",
-        second_observed_session,
-        "uuid-124",
-    )?;
-    let repeated = harness.call_tool(
-        4,
-        "consult",
-        json!({
-            "prompt": "say oracle again",
-            "cwd": sandbox.display().to_string()
-        }),
-    )?;
-    assert_tool_ok(&repeated);
-    assert_eq!(
-        tool_content(&repeated)["response"].as_str(),
-        Some("oracle again")
-    );
-    assert!(tool_content(&repeated)["context_mode"].is_null());
-    assert!(tool_content(&repeated)["planned_session_id"].is_null());
-    assert!(tool_content(&repeated)["reused_session_id"].is_null());
-    assert!(tool_content(&repeated)["observed_session_id"].is_null());
-    assert!(tool_content(&repeated)["session_id"].is_null());
-    let repeated_args = must(
-        fs::read_to_string(&args_file),
-        "read repeated fake args file",
-    )?;
-    let repeated_lines = repeated_args.lines().collect::<Vec<_>>();
-    assert!(repeated_lines.contains(&"--session-id"));
-    assert!(!repeated_args.contains("--resume"));
-    let repeated_session_id = must_some(
-        repeated_lines
+    let args = must(fs::read_to_string(&args_file), "read fake args file")?;
+    let lines = args.lines().collect::<Vec<_>>();
+    assert!(lines.contains(&"--session-id"));
+    assert!(!args.contains("--resume"));
+    let session_id = must_some(
+        lines
             .windows(2)
             .find_map(|window| (window[0] == "--session-id").then_some(window[1].to_owned())),
-        "repeated one-shot session id",
+        "one-shot session id",
     )?;
-    assert!(uuid::Uuid::parse_str(&repeated_session_id).is_ok());
-    assert_ne!(repeated_session_id, first_session_id);
+    assert!(uuid::Uuid::parse_str(&session_id).is_ok());
 
     let persisted_output_path = must_some(
-        tool_content(&repeated)["persisted_output_path"]
+        tool_content(&consult)["persisted_output_path"]
             .as_str()
             .map(str::to_owned),
         "persisted output path",
     )?;
     assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/"));
-    assert!(!persisted_output_path.contains(first_observed_session));
-    assert!(!persisted_output_path.contains(second_observed_session));
+    assert!(!persisted_output_path.contains(observed_session));
     let persisted_output = must(
         fs::read_to_string(&persisted_output_path),
         "read persisted consult output",
@@ -572,40 +522,11 @@ fn consult_is_one_shot_and_hides_session_state() -> TestResult {
         serde_json::from_str(&persisted_output),
         "parse persisted consult output",
     )?;
-    assert_eq!(persisted_output["response"].as_str(), Some("oracle again"));
-    assert!(persisted_output["context_mode"].is_null());
-    assert!(persisted_output["planned_session_id"].is_null());
-    assert!(persisted_output["reused_session_id"].is_null());
-    assert!(persisted_output["session_id"].is_null());
-    assert!(persisted_output["observed_session_id"].is_null());
-
-    let consult_context_index = must(
-        fs::read_to_string(
-            state_home
-                .join("phone_opus")
-                .join("mcp")
-                .join("consult_contexts.json"),
-        ),
-        "read consult context index",
-    )?;
-    let consult_context_index: Value = must(
-        serde_json::from_str(&consult_context_index),
-        "parse consult context index",
-    )?;
-    assert_eq!(
-        consult_context_index["by_cwd"][sandbox.display().to_string()]["session_id"].as_str(),
-        Some(second_observed_session)
-    );
-    assert_eq!(
-        consult_context_index["by_cwd"][sandbox.display().to_string()]["state"].as_str(),
-        Some("confirmed")
-    );
+    assert_eq!(persisted_output["response"].as_str(), Some("oracle"));
 
     let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?;
     assert_eq!(pwd.trim(), sandbox.display().to_string());
 
-    let args = must(fs::read_to_string(&args_file), "read fake args file")?;
-    let lines = args.lines().collect::<Vec<_>>();
     assert!(lines.contains(&"-p"));
     assert!(lines.contains(&"--output-format"));
     assert!(lines.contains(&"json"));
@@ -628,7 +549,7 @@ fn consult_is_one_shot_and_hides_session_state() -> TestResult {
     assert!(!lines.contains(&"--max-turns"));
     assert!(args.contains(PROMPT_PREFIX));
     let prefix_index = must_some(args.find(PROMPT_PREFIX), "prefixed consult prompt")?;
-    let user_prompt_index = must_some(args.find("say oracle again"), "user prompt inside args")?;
+    let user_prompt_index = must_some(args.find("say oracle"), "user prompt inside args")?;
     assert!(prefix_index < user_prompt_index);
 
     let env_dump = must(fs::read_to_string(&env_file), "read fake env file")?;
@@ -803,63 +724,6 @@ fn transcript_progress_prevents_false_stall_timeout() -> TestResult {
 }
 
 #[test]
-fn background_surfaces_are_hidden_from_public_mcp() -> TestResult {
-    let root = temp_root("consult_hidden_background")?;
-    let state_home = root.join("state-home");
-    must(fs::create_dir_all(&state_home), "create state home")?;
-
-    let mut harness = McpHarness::spawn(&state_home, &[])?;
-    let _ = harness.initialize()?;
-    harness.notify_initialized()?;
-
-    let consult_job = harness.call_tool(
-        3,
-        "consult_job",
-        json!({
-            "job_id": "00000000-0000-0000-0000-000000000000"
-        }),
-    )?;
-    assert_tool_error(&consult_job);
-    assert!(
-        consult_job["result"]["content"]
-            .as_array()
-            .into_iter()
-            .flatten()
-            .filter_map(|entry| entry["text"].as_str())
-            .any(|text| text.contains("unknown tool `consult_job`"))
-    );
-
-    let consult_wait = harness.call_tool(
-        4,
-        "consult_wait",
-        json!({
-            "job_id": "00000000-0000-0000-0000-000000000000"
-        }),
-    )?;
-    assert_tool_error(&consult_wait);
-    assert!(
-        consult_wait["result"]["content"]
-            .as_array()
-            .into_iter()
-            .flatten()
-            .filter_map(|entry| entry["text"].as_str())
-            .any(|text| text.contains("unknown tool `consult_wait`"))
-    );
-
-    let consult_jobs = harness.call_tool(5, "consult_jobs", json!({}))?;
-    assert_tool_error(&consult_jobs);
-    assert!(
-        consult_jobs["result"]["content"]
-            .as_array()
-            .into_iter()
-            .flatten()
-            .filter_map(|entry| entry["text"].as_str())
-            .any(|text| text.contains("unknown tool `consult_jobs`"))
-    );
-    Ok(())
-}
-
-#[test]
 fn consult_surfaces_downstream_cli_failures() -> TestResult {
     let root = temp_root("consult_failure")?;
     let state_home = root.join("state-home");
@@ -896,12 +760,11 @@ fn consult_surfaces_downstream_cli_failures() -> TestResult {
         tool_content(&consult)["context"]["consult"]["cwd"].as_str(),
         Some(std::env::current_dir()?.display().to_string().as_str())
     );
-    assert!(tool_content(&consult)["context"]["consult"]["planned_session_id"].is_null());
     Ok(())
 }
 
 #[test]
-fn quota_failures_hide_session_state_on_public_surface() -> TestResult {
+fn quota_failures_surface_reset_hint() -> TestResult {
     let root = temp_root("consult_quota_failure")?;
     let state_home = root.join("state-home");
     let sandbox = root.join("sandbox");
@@ -912,34 +775,10 @@ fn quota_failures_hide_session_state_on_public_surface() -> TestResult {
     seed_caller_claude_home(&caller_home)?;
 
     let fake_claude = root.join("claude");
-    let stdout_file = root.join("stdout.json");
-    let remembered_session = "84b9d462-5af9-4a4e-8e44-379a8d0c46d7";
     write_fake_claude_script(&fake_claude)?;
-    write_fake_claude_json_success(&stdout_file, "ok", remembered_session, "uuid-remembered")?;
 
     let claude_bin = fake_claude.display().to_string();
-    let stdout_path = stdout_file.display().to_string();
     let caller_home_path = caller_home.display().to_string();
-    let env = [
-        ("HOME", caller_home_path.as_str()),
-        ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
-        ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()),
-    ];
-    let mut harness = McpHarness::spawn(&state_home, &env)?;
-    let _ = harness.initialize()?;
-    harness.notify_initialized()?;
-
-    let first = harness.call_tool(
-        3,
-        "consult",
-        json!({
-            "prompt": "seed remembered session",
-            "cwd": sandbox.display().to_string()
-        }),
-    )?;
-    assert_tool_ok(&first);
-    assert!(tool_content(&first)["session_id"].is_null());
-
     let quota_env = [
         ("HOME", caller_home_path.as_str()),
         ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
@@ -949,7 +788,6 @@ fn quota_failures_hide_session_state_on_public_surface() -> TestResult {
             "You've hit your limit · resets 4pm (America/New_York)",
         ),
     ];
-    drop(harness);
     let mut harness = McpHarness::spawn(&state_home, &quota_env)?;
     let _ = harness.initialize()?;
     harness.notify_initialized()?;
@@ -971,11 +809,6 @@ fn quota_failures_hide_session_state_on_public_surface() -> TestResult {
         tool_content(&failed)["context"]["consult"]["cwd"].as_str(),
         Some(sandbox.display().to_string().as_str())
     );
-    assert!(tool_content(&failed)["context"]["consult"]["context_mode"].is_null());
-    assert!(tool_content(&failed)["context"]["consult"]["planned_session_id"].is_null());
-    assert!(tool_content(&failed)["context"]["consult"]["reused_session_id"].is_null());
-    assert!(tool_content(&failed)["context"]["consult"]["observed_session_id"].is_null());
-    assert!(tool_content(&failed)["context"]["consult"]["resume_session_id"].is_null());
     assert_eq!(
         tool_content(&failed)["context"]["consult"]["quota_limited"].as_bool(),
         Some(true)
@@ -1009,96 +842,6 @@ fn quota_failures_hide_session_state_on_public_surface() -> TestResult {
 }
 
 #[test]
-fn fresh_failures_keep_internal_session_state_without_public_leakage() -> TestResult {
-    let root = temp_root("consult_fresh_json_failure")?;
-    let state_home = root.join("state-home");
-    let sandbox = root.join("sandbox");
-    let caller_home = root.join("caller-home");
-    let fake_claude = root.join("claude");
-    let stdout_file = root.join("stdout.json");
-    let args_file = root.join("args.txt");
-    must(fs::create_dir_all(&state_home), "create state home")?;
-    must(fs::create_dir_all(&sandbox), "create sandbox")?;
-    must(fs::create_dir_all(&caller_home), "create caller home")?;
-    seed_caller_claude_home(&caller_home)?;
-    write_fake_claude_script(&fake_claude)?;
-    must(fs::write(&stdout_file, ""), "write empty fake stdout")?;
-
-    let claude_bin = fake_claude.display().to_string();
-    let stdout_path = stdout_file.display().to_string();
-    let args_path = args_file.display().to_string();
-    let caller_home_path = caller_home.display().to_string();
-    let env = [
-        ("HOME", caller_home_path.as_str()),
-        ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
-        ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()),
-        ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()),
-        ("PHONE_OPUS_TEST_EXIT_CODE", "17"),
-        (
-            "PHONE_OPUS_TEST_STDERR",
-            "You've hit your limit · resets 9pm (America/New_York)",
-        ),
-    ];
-    let mut harness = McpHarness::spawn(&state_home, &env)?;
-    let _ = harness.initialize()?;
-    harness.notify_initialized()?;
-
-    let failed = harness.call_tool(
-        3,
-        "consult",
-        json!({
-            "prompt": "fresh expensive audit",
-            "cwd": sandbox.display().to_string()
-        }),
-    )?;
-    assert_tool_error(&failed);
-    assert!(tool_content(&failed)["context"]["consult"]["context_mode"].is_null());
-    assert!(tool_content(&failed)["context"]["consult"]["observed_session_id"].is_null());
-    assert!(tool_content(&failed)["context"]["consult"]["resume_session_id"].is_null());
-    assert_eq!(
-        tool_content(&failed)["context"]["consult"]["quota_reset_hint"].as_str(),
-        Some("9pm (America/New_York)")
-    );
-    assert!(tool_content(&failed)["context"]["consult"]["planned_session_id"].is_null());
-    let args = must(fs::read_to_string(&args_file), "read fresh failure args")?;
-    assert!(args.contains("--session-id"));
-    assert!(!args.contains("--resume"));
-    let planned_session_id = must_some(
-        args.lines()
-            .collect::<Vec<_>>()
-            .windows(2)
-            .find_map(|window| (window[0] == "--session-id").then_some(window[1].to_owned())),
-        "planned session id",
-    )?;
-    let consult_context_index = must(
-        fs::read_to_string(
-            state_home
-                .join("phone_opus")
-                .join("mcp")
-                .join("consult_contexts.json"),
-        ),
-        "read consult context index after failure",
-    )?;
-    let consult_context_index: Value = must(
-        serde_json::from_str(&consult_context_index),
-        "parse consult context index after failure",
-    )?;
-    assert_eq!(
-        consult_context_index["by_cwd"][sandbox.display().to_string()]["session_id"].as_str(),
-        Some(planned_session_id.as_str())
-    );
-    assert!(
-        failed["result"]["content"]
-            .as_array()
-            .into_iter()
-            .flatten()
-            .filter_map(|entry| entry["text"].as_str())
-            .all(|text| !text.contains("session"))
-    );
-    Ok(())
-}
-
-#[test]
 fn consult_never_replays_after_worker_transport_failure() -> TestResult {
     let root = temp_root("consult_no_replay")?;
     let state_home = root.join("state-home");
author	main <main@swarm.moe>	2026-03-31 13:21:05 -0400
committer	main <main@swarm.moe>	2026-03-31 13:21:05 -0400
commit	2fc866a3ce50b6ba9c5e84e0ad2f8c77517361ff (patch)
tree	dab1bc9d653794944369ec51c2ed250aaf680f44 /crates/phone-opus/tests
parent	2160224b7ef21e3319a93d057165712aabe8cbe2 (diff)
download	phone_opus-2fc866a3ce50b6ba9c5e84e0ad2f8c77517361ff.zip