swarm repositories / source
aboutsummaryrefslogtreecommitdiff
path: root/crates/phone-opus/tests
diff options
context:
space:
mode:
authormain <main@swarm.moe>2026-03-24 19:26:58 -0400
committermain <main@swarm.moe>2026-03-24 19:26:58 -0400
commit57db4dc94dbf571ac8a393f61549def5afaa0209 (patch)
treee625a2af169a7397c34339e6150fc7bee1f900a2 /crates/phone-opus/tests
parent8b090c3d0daf8b336aab9074b0d8aa31a688e232 (diff)
downloadphone_opus-57db4dc94dbf571ac8a393f61549def5afaa0209.zip
Predeclare and stream consult session ids
Diffstat (limited to 'crates/phone-opus/tests')
-rw-r--r--crates/phone-opus/tests/mcp_hardening.rs202
1 files changed, 168 insertions, 34 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs
index e9a664b..0d53c33 100644
--- a/crates/phone-opus/tests/mcp_hardening.rs
+++ b/crates/phone-opus/tests/mcp_hardening.rs
@@ -280,37 +280,62 @@ fn seed_caller_claude_home(home: &Path) -> TestResult {
Ok(())
}
-fn write_fake_claude_stdout(path: &Path, result: &str, session_id: &str, uuid: &str) -> TestResult {
+fn write_fake_claude_stream_success(
+ path: &Path,
+ result: &str,
+ session_id: &str,
+ uuid: &str,
+) -> TestResult {
+ let payload = [
+ serde_json::to_string(&json!({
+ "type": "system",
+ "subtype": "init",
+ "session_id": session_id,
+ }))?,
+ serde_json::to_string(&json!({
+ "type": "result",
+ "subtype": "success",
+ "is_error": false,
+ "duration_ms": 1234,
+ "duration_api_ms": 1200,
+ "num_turns": 2,
+ "result": result,
+ "stop_reason": "end_turn",
+ "session_id": session_id,
+ "total_cost_usd": 0.125,
+ "usage": {
+ "input_tokens": 10,
+ "output_tokens": 5
+ },
+ "modelUsage": {
+ "claude-opus-4-6": {
+ "inputTokens": 10,
+ "outputTokens": 5
+ }
+ },
+ "permission_denials": [],
+ "fast_mode_state": "off",
+ "uuid": uuid
+ }))?,
+ ]
+ .join("\n");
+ must(fs::write(path, format!("{payload}\n")), "write fake stdout")
+}
+
+fn write_fake_claude_stream_init(path: &Path, session_id: &str) -> TestResult {
must(
fs::write(
path,
- serde_json::to_string(&json!({
- "type": "result",
- "subtype": "success",
- "is_error": false,
- "duration_ms": 1234,
- "duration_api_ms": 1200,
- "num_turns": 2,
- "result": result,
- "stop_reason": "end_turn",
- "session_id": session_id,
- "total_cost_usd": 0.125,
- "usage": {
- "input_tokens": 10,
- "output_tokens": 5
- },
- "modelUsage": {
- "claude-opus-4-6": {
- "inputTokens": 10,
- "outputTokens": 5
- }
- },
- "permission_denials": [],
- "fast_mode_state": "off",
- "uuid": uuid
- }))?,
+ format!(
+ "{}\n",
+ serde_json::to_string(&json!({
+ "type": "system",
+ "subtype": "init",
+ "session_id": session_id,
+ }))?
+ ),
),
- "write fake stdout",
+ "write fake init stream",
)
}
@@ -392,7 +417,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
let fresh_session = "dbd3b6c2-4757-4b45-a8f0-f3d877e1a13f";
let sibling_session = "d9a9a472-a091-4268-a7dd-9f31cf61f87e";
write_fake_claude_script(&fake_claude)?;
- write_fake_claude_stdout(&stdout_file, "oracle", remembered_session, "uuid-123")?;
+ write_fake_claude_stream_success(&stdout_file, "oracle", remembered_session, "uuid-123")?;
let claude_bin = fake_claude.display().to_string();
let stdout_path = stdout_file.display().to_string();
@@ -448,16 +473,31 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
tool_content(&consult)["context_mode"].as_str(),
Some("fresh")
);
+ assert!(
+ tool_content(&consult)["planned_session_id"]
+ .as_str()
+ .is_some_and(|value| !value.is_empty())
+ );
assert!(tool_content(&consult)["reused_session_id"].is_null());
assert_eq!(
+ tool_content(&consult)["observed_session_id"].as_str(),
+ Some(remembered_session)
+ );
+ assert_eq!(
tool_content(&consult)["session_id"].as_str(),
Some(remembered_session)
);
let first_args = must(fs::read_to_string(&args_file), "read first fake args file")?;
+ assert!(first_args.contains("--session-id"));
+ assert!(
+ tool_content(&consult)["planned_session_id"]
+ .as_str()
+ .is_some_and(|value| first_args.contains(value))
+ );
assert!(!first_args.contains("--resume"));
assert!(!first_args.contains("not-a-uuid"));
- write_fake_claude_stdout(
+ write_fake_claude_stream_success(
&stdout_file,
"oracle reused",
remembered_session,
@@ -488,7 +528,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
assert!(reused_args.contains("--resume"));
assert!(reused_args.contains(remembered_session));
- write_fake_claude_stdout(&stdout_file, "oracle fresh", fresh_session, "uuid-125")?;
+ write_fake_claude_stream_success(&stdout_file, "oracle fresh", fresh_session, "uuid-125")?;
let fresh = harness.call_tool(
5,
"consult",
@@ -508,7 +548,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
let fresh_args = must(fs::read_to_string(&args_file), "read fresh fake args file")?;
assert!(!fresh_args.contains("--resume"));
- write_fake_claude_stdout(
+ write_fake_claude_stream_success(
&stdout_file,
"oracle after fresh",
fresh_session,
@@ -538,7 +578,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
assert!(after_fresh_args.contains("--resume"));
assert!(after_fresh_args.contains(fresh_session));
- write_fake_claude_stdout(&stdout_file, "oracle sibling", sibling_session, "uuid-127")?;
+ write_fake_claude_stream_success(&stdout_file, "oracle sibling", sibling_session, "uuid-127")?;
let sibling = harness.call_tool(
7,
"consult",
@@ -592,7 +632,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
let lines = args.lines().collect::<Vec<_>>();
assert!(lines.contains(&"-p"));
assert!(lines.contains(&"--output-format"));
- assert!(lines.contains(&"json"));
+ assert!(lines.contains(&"stream-json"));
assert!(lines.contains(&"--strict-mcp-config"));
assert!(lines.contains(&"--mcp-config"));
assert!(lines.contains(&"{\"mcpServers\":{}}"));
@@ -605,6 +645,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes
assert!(lines.contains(&"--tools"));
assert!(lines.contains(&"Bash,Read,Grep,Glob,LS,WebFetch"));
assert!(lines.contains(&"--dangerously-skip-permissions"));
+ assert!(lines.contains(&"--session-id"));
assert!(!lines.contains(&"--permission-mode"));
assert!(!lines.contains(&"dontAsk"));
assert!(!lines.contains(&"--resume"));
@@ -811,7 +852,7 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult {
let stdout_file = root.join("stdout.json");
let remembered_session = "84b9d462-5af9-4a4e-8e44-379a8d0c46d7";
write_fake_claude_script(&fake_claude)?;
- write_fake_claude_stdout(&stdout_file, "ok", remembered_session, "uuid-remembered")?;
+ write_fake_claude_stream_success(&stdout_file, "ok", remembered_session, "uuid-remembered")?;
let claude_bin = fake_claude.display().to_string();
let stdout_path = stdout_file.display().to_string();
@@ -875,10 +916,18 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult {
Some("reused")
);
assert_eq!(
+ tool_content(&failed)["context"]["consult"]["planned_session_id"].as_str(),
+ Some(remembered_session)
+ );
+ assert_eq!(
tool_content(&failed)["context"]["consult"]["reused_session_id"].as_str(),
Some(remembered_session)
);
assert_eq!(
+ tool_content(&failed)["context"]["consult"]["observed_session_id"].as_str(),
+ Some(remembered_session)
+ );
+ assert_eq!(
tool_content(&failed)["context"]["consult"]["resume_session_id"].as_str(),
Some(remembered_session)
);
@@ -910,6 +959,91 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult {
}
#[test]
+fn fresh_failures_capture_streamed_session_ids_eagerly() -> TestResult {
+ let root = temp_root("consult_fresh_stream_failure")?;
+ let state_home = root.join("state-home");
+ let sandbox = root.join("sandbox");
+ let caller_home = root.join("caller-home");
+ let fake_claude = root.join("claude");
+ let stdout_file = root.join("stdout.json");
+ let args_file = root.join("args.txt");
+ let init_session = "550e8400-e29b-41d4-a716-446655440000";
+ must(fs::create_dir_all(&state_home), "create state home")?;
+ must(fs::create_dir_all(&sandbox), "create sandbox")?;
+ must(fs::create_dir_all(&caller_home), "create caller home")?;
+ seed_caller_claude_home(&caller_home)?;
+ write_fake_claude_script(&fake_claude)?;
+ write_fake_claude_stream_init(&stdout_file, init_session)?;
+
+ let claude_bin = fake_claude.display().to_string();
+ let stdout_path = stdout_file.display().to_string();
+ let args_path = args_file.display().to_string();
+ let caller_home_path = caller_home.display().to_string();
+ let env = [
+ ("HOME", caller_home_path.as_str()),
+ ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
+ ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()),
+ ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()),
+ ("PHONE_OPUS_TEST_EXIT_CODE", "17"),
+ (
+ "PHONE_OPUS_TEST_STDERR",
+ "You've hit your limit ยท resets 9pm (America/New_York)",
+ ),
+ ];
+ let mut harness = McpHarness::spawn(&state_home, &env)?;
+ let _ = harness.initialize()?;
+ harness.notify_initialized()?;
+
+ let failed = harness.call_tool(
+ 3,
+ "consult",
+ json!({
+ "prompt": "fresh expensive audit",
+ "cwd": sandbox.display().to_string()
+ }),
+ )?;
+ assert_tool_error(&failed);
+ assert_eq!(
+ tool_content(&failed)["context"]["consult"]["context_mode"].as_str(),
+ Some("fresh")
+ );
+ assert_eq!(
+ tool_content(&failed)["context"]["consult"]["observed_session_id"].as_str(),
+ Some(init_session)
+ );
+ assert_eq!(
+ tool_content(&failed)["context"]["consult"]["resume_session_id"].as_str(),
+ Some(init_session)
+ );
+ assert_eq!(
+ tool_content(&failed)["context"]["consult"]["quota_reset_hint"].as_str(),
+ Some("9pm (America/New_York)")
+ );
+ let planned_session = must_some(
+ tool_content(&failed)["context"]["consult"]["planned_session_id"]
+ .as_str()
+ .map(str::to_owned),
+ "planned session id on failure",
+ )?;
+ let args = must(fs::read_to_string(&args_file), "read fresh failure args")?;
+ assert!(args.contains("--session-id"));
+ assert!(args.contains(&planned_session));
+ assert!(!args.contains("--resume"));
+ assert!(
+ failed["result"]["content"]
+ .as_array()
+ .into_iter()
+ .flatten()
+ .filter_map(|entry| entry["text"].as_str())
+ .any(|text| {
+ text.contains("observed_session: 550e8400-e29b-41d4-a716-446655440000")
+ && text.contains("resume_session: 550e8400-e29b-41d4-a716-446655440000")
+ })
+ );
+ Ok(())
+}
+
+#[test]
fn consult_never_replays_after_worker_transport_failure() -> TestResult {
let root = temp_root("consult_no_replay")?;
let state_home = root.join("state-home");