diff options
| author | main <main@swarm.moe> | 2026-03-24 19:26:58 -0400 |
|---|---|---|
| committer | main <main@swarm.moe> | 2026-03-24 19:26:58 -0400 |
| commit | 57db4dc94dbf571ac8a393f61549def5afaa0209 (patch) | |
| tree | e625a2af169a7397c34339e6150fc7bee1f900a2 /crates/phone-opus/tests/mcp_hardening.rs | |
| parent | 8b090c3d0daf8b336aab9074b0d8aa31a688e232 (diff) | |
| download | phone_opus-57db4dc94dbf571ac8a393f61549def5afaa0209.zip | |
Predeclare and stream consult session ids
Diffstat (limited to 'crates/phone-opus/tests/mcp_hardening.rs')
| -rw-r--r-- | crates/phone-opus/tests/mcp_hardening.rs | 202 |
1 files changed, 168 insertions, 34 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs index e9a664b..0d53c33 100644 --- a/crates/phone-opus/tests/mcp_hardening.rs +++ b/crates/phone-opus/tests/mcp_hardening.rs @@ -280,37 +280,62 @@ fn seed_caller_claude_home(home: &Path) -> TestResult { Ok(()) } -fn write_fake_claude_stdout(path: &Path, result: &str, session_id: &str, uuid: &str) -> TestResult { +fn write_fake_claude_stream_success( + path: &Path, + result: &str, + session_id: &str, + uuid: &str, +) -> TestResult { + let payload = [ + serde_json::to_string(&json!({ + "type": "system", + "subtype": "init", + "session_id": session_id, + }))?, + serde_json::to_string(&json!({ + "type": "result", + "subtype": "success", + "is_error": false, + "duration_ms": 1234, + "duration_api_ms": 1200, + "num_turns": 2, + "result": result, + "stop_reason": "end_turn", + "session_id": session_id, + "total_cost_usd": 0.125, + "usage": { + "input_tokens": 10, + "output_tokens": 5 + }, + "modelUsage": { + "claude-opus-4-6": { + "inputTokens": 10, + "outputTokens": 5 + } + }, + "permission_denials": [], + "fast_mode_state": "off", + "uuid": uuid + }))?, + ] + .join("\n"); + must(fs::write(path, format!("{payload}\n")), "write fake stdout") +} + +fn write_fake_claude_stream_init(path: &Path, session_id: &str) -> TestResult { must( fs::write( path, - serde_json::to_string(&json!({ - "type": "result", - "subtype": "success", - "is_error": false, - "duration_ms": 1234, - "duration_api_ms": 1200, - "num_turns": 2, - "result": result, - "stop_reason": "end_turn", - "session_id": session_id, - "total_cost_usd": 0.125, - "usage": { - "input_tokens": 10, - "output_tokens": 5 - }, - "modelUsage": { - "claude-opus-4-6": { - "inputTokens": 10, - "outputTokens": 5 - } - }, - "permission_denials": [], - "fast_mode_state": "off", - "uuid": uuid - }))?, + format!( + "{}\n", + serde_json::to_string(&json!({ + "type": "system", + "subtype": "init", + "session_id": session_id, + }))? + ), ), - "write fake stdout", + "write fake init stream", ) } @@ -392,7 +417,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes let fresh_session = "dbd3b6c2-4757-4b45-a8f0-f3d877e1a13f"; let sibling_session = "d9a9a472-a091-4268-a7dd-9f31cf61f87e"; write_fake_claude_script(&fake_claude)?; - write_fake_claude_stdout(&stdout_file, "oracle", remembered_session, "uuid-123")?; + write_fake_claude_stream_success(&stdout_file, "oracle", remembered_session, "uuid-123")?; let claude_bin = fake_claude.display().to_string(); let stdout_path = stdout_file.display().to_string(); @@ -448,16 +473,31 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes tool_content(&consult)["context_mode"].as_str(), Some("fresh") ); + assert!( + tool_content(&consult)["planned_session_id"] + .as_str() + .is_some_and(|value| !value.is_empty()) + ); assert!(tool_content(&consult)["reused_session_id"].is_null()); assert_eq!( + tool_content(&consult)["observed_session_id"].as_str(), + Some(remembered_session) + ); + assert_eq!( tool_content(&consult)["session_id"].as_str(), Some(remembered_session) ); let first_args = must(fs::read_to_string(&args_file), "read first fake args file")?; + assert!(first_args.contains("--session-id")); + assert!( + tool_content(&consult)["planned_session_id"] + .as_str() + .is_some_and(|value| first_args.contains(value)) + ); assert!(!first_args.contains("--resume")); assert!(!first_args.contains("not-a-uuid")); - write_fake_claude_stdout( + write_fake_claude_stream_success( &stdout_file, "oracle reused", remembered_session, @@ -488,7 +528,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes assert!(reused_args.contains("--resume")); assert!(reused_args.contains(remembered_session)); - write_fake_claude_stdout(&stdout_file, "oracle fresh", fresh_session, "uuid-125")?; + write_fake_claude_stream_success(&stdout_file, "oracle fresh", fresh_session, "uuid-125")?; let fresh = harness.call_tool( 5, "consult", @@ -508,7 +548,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes let fresh_args = must(fs::read_to_string(&args_file), "read fresh fake args file")?; assert!(!fresh_args.contains("--resume")); - write_fake_claude_stdout( + write_fake_claude_stream_success( &stdout_file, "oracle after fresh", fresh_session, @@ -538,7 +578,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes assert!(after_fresh_args.contains("--resume")); assert!(after_fresh_args.contains(fresh_session)); - write_fake_claude_stdout(&stdout_file, "oracle sibling", sibling_session, "uuid-127")?; + write_fake_claude_stream_success(&stdout_file, "oracle sibling", sibling_session, "uuid-127")?; let sibling = harness.call_tool( 7, "consult", @@ -592,7 +632,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes let lines = args.lines().collect::<Vec<_>>(); assert!(lines.contains(&"-p")); assert!(lines.contains(&"--output-format")); - assert!(lines.contains(&"json")); + assert!(lines.contains(&"stream-json")); assert!(lines.contains(&"--strict-mcp-config")); assert!(lines.contains(&"--mcp-config")); assert!(lines.contains(&"{\"mcpServers\":{}}")); @@ -605,6 +645,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes assert!(lines.contains(&"--tools")); assert!(lines.contains(&"Bash,Read,Grep,Glob,LS,WebFetch")); assert!(lines.contains(&"--dangerously-skip-permissions")); + assert!(lines.contains(&"--session-id")); assert!(!lines.contains(&"--permission-mode")); assert!(!lines.contains(&"dontAsk")); assert!(!lines.contains(&"--resume")); @@ -811,7 +852,7 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult { let stdout_file = root.join("stdout.json"); let remembered_session = "84b9d462-5af9-4a4e-8e44-379a8d0c46d7"; write_fake_claude_script(&fake_claude)?; - write_fake_claude_stdout(&stdout_file, "ok", remembered_session, "uuid-remembered")?; + write_fake_claude_stream_success(&stdout_file, "ok", remembered_session, "uuid-remembered")?; let claude_bin = fake_claude.display().to_string(); let stdout_path = stdout_file.display().to_string(); @@ -875,10 +916,18 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult { Some("reused") ); assert_eq!( + tool_content(&failed)["context"]["consult"]["planned_session_id"].as_str(), + Some(remembered_session) + ); + assert_eq!( tool_content(&failed)["context"]["consult"]["reused_session_id"].as_str(), Some(remembered_session) ); assert_eq!( + tool_content(&failed)["context"]["consult"]["observed_session_id"].as_str(), + Some(remembered_session) + ); + assert_eq!( tool_content(&failed)["context"]["consult"]["resume_session_id"].as_str(), Some(remembered_session) ); @@ -910,6 +959,91 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult { } #[test] +fn fresh_failures_capture_streamed_session_ids_eagerly() -> TestResult { + let root = temp_root("consult_fresh_stream_failure")?; + let state_home = root.join("state-home"); + let sandbox = root.join("sandbox"); + let caller_home = root.join("caller-home"); + let fake_claude = root.join("claude"); + let stdout_file = root.join("stdout.json"); + let args_file = root.join("args.txt"); + let init_session = "550e8400-e29b-41d4-a716-446655440000"; + must(fs::create_dir_all(&state_home), "create state home")?; + must(fs::create_dir_all(&sandbox), "create sandbox")?; + must(fs::create_dir_all(&caller_home), "create caller home")?; + seed_caller_claude_home(&caller_home)?; + write_fake_claude_script(&fake_claude)?; + write_fake_claude_stream_init(&stdout_file, init_session)?; + + let claude_bin = fake_claude.display().to_string(); + let stdout_path = stdout_file.display().to_string(); + let args_path = args_file.display().to_string(); + let caller_home_path = caller_home.display().to_string(); + let env = [ + ("HOME", caller_home_path.as_str()), + ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), + ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), + ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()), + ("PHONE_OPUS_TEST_EXIT_CODE", "17"), + ( + "PHONE_OPUS_TEST_STDERR", + "You've hit your limit ยท resets 9pm (America/New_York)", + ), + ]; + let mut harness = McpHarness::spawn(&state_home, &env)?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + let failed = harness.call_tool( + 3, + "consult", + json!({ + "prompt": "fresh expensive audit", + "cwd": sandbox.display().to_string() + }), + )?; + assert_tool_error(&failed); + assert_eq!( + tool_content(&failed)["context"]["consult"]["context_mode"].as_str(), + Some("fresh") + ); + assert_eq!( + tool_content(&failed)["context"]["consult"]["observed_session_id"].as_str(), + Some(init_session) + ); + assert_eq!( + tool_content(&failed)["context"]["consult"]["resume_session_id"].as_str(), + Some(init_session) + ); + assert_eq!( + tool_content(&failed)["context"]["consult"]["quota_reset_hint"].as_str(), + Some("9pm (America/New_York)") + ); + let planned_session = must_some( + tool_content(&failed)["context"]["consult"]["planned_session_id"] + .as_str() + .map(str::to_owned), + "planned session id on failure", + )?; + let args = must(fs::read_to_string(&args_file), "read fresh failure args")?; + assert!(args.contains("--session-id")); + assert!(args.contains(&planned_session)); + assert!(!args.contains("--resume")); + assert!( + failed["result"]["content"] + .as_array() + .into_iter() + .flatten() + .filter_map(|entry| entry["text"].as_str()) + .any(|text| { + text.contains("observed_session: 550e8400-e29b-41d4-a716-446655440000") + && text.contains("resume_session: 550e8400-e29b-41d4-a716-446655440000") + }) + ); + Ok(()) +} + +#[test] fn consult_never_replays_after_worker_transport_failure() -> TestResult { let root = temp_root("consult_no_replay")?; let state_home = root.join("state-home"); |