diff options
Diffstat (limited to 'crates/phone-opus/tests/mcp_hardening.rs')
| -rw-r--r-- | crates/phone-opus/tests/mcp_hardening.rs | 188 |
1 files changed, 43 insertions, 145 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs index cbe3354..107a578 100644 --- a/crates/phone-opus/tests/mcp_hardening.rs +++ b/crates/phone-opus/tests/mcp_hardening.rs @@ -288,11 +288,23 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult { let tools = harness.tools_list()?; let tool_names = tool_names(&tools); assert!(tool_names.contains(&"consult")); - assert!(tool_names.contains(&"consult_job")); - assert!(tool_names.contains(&"consult_wait")); - assert!(tool_names.contains(&"consult_jobs")); + assert!(!tool_names.contains(&"consult_job")); + assert!(!tool_names.contains(&"consult_wait")); + assert!(!tool_names.contains(&"consult_jobs")); assert!(tool_names.contains(&"health_snapshot")); assert!(tool_names.contains(&"telemetry_snapshot")); + let consult_tool = must_some( + tools["result"]["tools"] + .as_array() + .into_iter() + .flatten() + .find(|tool| tool["name"] == "consult"), + "consult tool definition", + )?; + assert!( + consult_tool["inputSchema"]["properties"]["background"].is_null(), + "consult schema should not advertise background: {consult_tool:#}" + ); let health = harness.call_tool(3, "health_snapshot", json!({}))?; assert_tool_ok(&health); @@ -387,11 +399,14 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki json!({ "prompt": "say oracle", "cwd": sandbox.display().to_string(), - "session_id": resumed_session + "session_id": resumed_session, + "background": true }), )?; assert_tool_ok(&consult); assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle")); + assert!(tool_content(&consult)["mode"].is_null()); + assert!(tool_content(&consult)["job_id"].is_null()); assert_eq!( tool_content(&consult)["session_mode"].as_str(), Some("resumed") @@ -539,176 +554,59 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki } #[test] -fn consult_can_run_in_background_and_be_waited_on_or_polled() -> TestResult { - let root = temp_root("consult_background")?; +fn background_surfaces_are_hidden_from_public_mcp() -> TestResult { + let root = temp_root("consult_hidden_background")?; let state_home = root.join("state-home"); - let sandbox = root.join("sandbox"); - let caller_home = root.join("caller-home"); must(fs::create_dir_all(&state_home), "create state home")?; - must(fs::create_dir_all(&sandbox), "create sandbox")?; - must(fs::create_dir_all(&caller_home), "create caller home")?; - let fake_claude = root.join("claude"); - let stdout_file = root.join("stdout.json"); - let args_file = root.join("args.txt"); - let pwd_file = root.join("pwd.txt"); - write_fake_claude_script(&fake_claude)?; - must( - fs::write( - &stdout_file, - serde_json::to_string(&json!({ - "type": "result", - "subtype": "success", - "is_error": false, - "duration_ms": 4321, - "duration_api_ms": 4200, - "num_turns": 3, - "result": "background oracle", - "stop_reason": "end_turn", - "session_id": "3fc69f58-7752-4d9d-a95d-19a217814b6a", - "total_cost_usd": 0.25, - "usage": { - "input_tokens": 11, - "output_tokens": 7 - }, - "modelUsage": { - "claude-opus-4-6": { - "inputTokens": 11, - "outputTokens": 7 - } - }, - "permission_denials": [], - "fast_mode_state": "off", - "uuid": "uuid-456" - }))?, - ), - "write fake stdout", - )?; - - let claude_bin = fake_claude.display().to_string(); - let stdout_path = stdout_file.display().to_string(); - let args_path = args_file.display().to_string(); - let pwd_path = pwd_file.display().to_string(); - let caller_home_path = caller_home.display().to_string(); - let env = [ - ("HOME", caller_home_path.as_str()), - ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), - ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), - ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()), - ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()), - ("PHONE_OPUS_TEST_SLEEP_MS", "100"), - ]; - let mut harness = McpHarness::spawn(&state_home, &env)?; + let mut harness = McpHarness::spawn(&state_home, &[])?; let _ = harness.initialize()?; harness.notify_initialized()?; - let submit = harness.call_tool( + let consult_job = harness.call_tool( 3, - "consult", + "consult_job", json!({ - "prompt": "background oracle", - "cwd": sandbox.display().to_string(), - "background": true + "job_id": "00000000-0000-0000-0000-000000000000" }), )?; - assert_tool_ok(&submit); - assert_eq!(tool_content(&submit)["mode"].as_str(), Some("background")); + assert_tool_error(&consult_job); assert!( - tool_content(&submit)["follow_up_tools"] + consult_job["result"]["content"] .as_array() .into_iter() .flatten() - .any(|value| value == "consult_wait") + .filter_map(|entry| entry["text"].as_str()) + .any(|text| text.contains("unknown tool `consult_job`")) ); - let job_id = must_some( - tool_content(&submit)["job_id"].as_str().map(str::to_owned), - "background job id", - )?; - let _ = uuid::Uuid::parse_str(&job_id) - .map_err(|error| io::Error::other(format!("job id uuid parse: {error}")))?; - let timed_out = harness.call_tool( + let consult_wait = harness.call_tool( 4, "consult_wait", json!({ - "job_id": job_id, - "timeout_ms": 0, - "render": "json" + "job_id": "00000000-0000-0000-0000-000000000000" }), )?; - assert_tool_ok(&timed_out); - assert_eq!(tool_content(&timed_out)["timed_out"].as_bool(), Some(true)); - assert_eq!(tool_content(&timed_out)["done"].as_bool(), Some(false)); - - let waited = harness.call_tool( - 5, - "consult_wait", - json!({ - "job_id": job_id, - "timeout_ms": 5_000, - "poll_interval_ms": 10, - "render": "json" - }), - )?; - assert_tool_ok(&waited); - assert_eq!(tool_content(&waited)["timed_out"].as_bool(), Some(false)); - assert_eq!(tool_content(&waited)["status"].as_str(), Some("succeeded")); + assert_tool_error(&consult_wait); assert!( - tool_content(&waited)["waited_ms"] - .as_u64() - .is_some_and(|value| value >= 50) - ); - assert_eq!( - tool_content(&waited)["result"]["response"].as_str(), - Some("background oracle") - ); - let persisted_output_path = must_some( - tool_content(&waited)["result"]["persisted_output_path"] - .as_str() - .map(str::to_owned), - "background persisted output path", - )?; - assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/")); - assert!(persisted_output_path.contains("3fc69f58-7752-4d9d-a95d-19a217814b6a")); - let persisted_output = must( - fs::read_to_string(&persisted_output_path), - "read background persisted consult output", - )?; - let persisted_output: Value = must( - serde_json::from_str(&persisted_output), - "parse background persisted consult output", - )?; - assert_eq!( - persisted_output["response"].as_str(), - Some("background oracle") + consult_wait["result"]["content"] + .as_array() + .into_iter() + .flatten() + .filter_map(|entry| entry["text"].as_str()) + .any(|text| text.contains("unknown tool `consult_wait`")) ); - let job = harness.call_tool( - 6, - "consult_job", - json!({ - "job_id": job_id, - "render": "json" - }), - )?; - assert_tool_ok(&job); - assert_eq!(tool_content(&job)["status"].as_str(), Some("succeeded")); - - let jobs = harness.call_tool(7, "consult_jobs", json!({ "render": "json" }))?; - assert_tool_ok(&jobs); + let consult_jobs = harness.call_tool(5, "consult_jobs", json!({}))?; + assert_tool_error(&consult_jobs); assert!( - tool_content(&jobs)["jobs"] + consult_jobs["result"]["content"] .as_array() .into_iter() .flatten() - .any(|value| value["job_id"] == job_id) + .filter_map(|entry| entry["text"].as_str()) + .any(|text| text.contains("unknown tool `consult_jobs`")) ); - - let args = must(fs::read_to_string(&args_file), "read fake args file")?; - assert!(args.contains(PROMPT_PREFIX)); - assert!(args.contains("background oracle")); - let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?; - assert_eq!(pwd.trim(), sandbox.display().to_string()); Ok(()) } |