swarm repositories / source
aboutsummaryrefslogtreecommitdiff
path: root/crates/phone-opus/tests/mcp_hardening.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/phone-opus/tests/mcp_hardening.rs')
-rw-r--r--crates/phone-opus/tests/mcp_hardening.rs188
1 files changed, 43 insertions, 145 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs
index cbe3354..107a578 100644
--- a/crates/phone-opus/tests/mcp_hardening.rs
+++ b/crates/phone-opus/tests/mcp_hardening.rs
@@ -288,11 +288,23 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
let tools = harness.tools_list()?;
let tool_names = tool_names(&tools);
assert!(tool_names.contains(&"consult"));
- assert!(tool_names.contains(&"consult_job"));
- assert!(tool_names.contains(&"consult_wait"));
- assert!(tool_names.contains(&"consult_jobs"));
+ assert!(!tool_names.contains(&"consult_job"));
+ assert!(!tool_names.contains(&"consult_wait"));
+ assert!(!tool_names.contains(&"consult_jobs"));
assert!(tool_names.contains(&"health_snapshot"));
assert!(tool_names.contains(&"telemetry_snapshot"));
+ let consult_tool = must_some(
+ tools["result"]["tools"]
+ .as_array()
+ .into_iter()
+ .flatten()
+ .find(|tool| tool["name"] == "consult"),
+ "consult tool definition",
+ )?;
+ assert!(
+ consult_tool["inputSchema"]["properties"]["background"].is_null(),
+ "consult schema should not advertise background: {consult_tool:#}"
+ );
let health = harness.call_tool(3, "health_snapshot", json!({}))?;
assert_tool_ok(&health);
@@ -387,11 +399,14 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
json!({
"prompt": "say oracle",
"cwd": sandbox.display().to_string(),
- "session_id": resumed_session
+ "session_id": resumed_session,
+ "background": true
}),
)?;
assert_tool_ok(&consult);
assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle"));
+ assert!(tool_content(&consult)["mode"].is_null());
+ assert!(tool_content(&consult)["job_id"].is_null());
assert_eq!(
tool_content(&consult)["session_mode"].as_str(),
Some("resumed")
@@ -539,176 +554,59 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
}
#[test]
-fn consult_can_run_in_background_and_be_waited_on_or_polled() -> TestResult {
- let root = temp_root("consult_background")?;
+fn background_surfaces_are_hidden_from_public_mcp() -> TestResult {
+ let root = temp_root("consult_hidden_background")?;
let state_home = root.join("state-home");
- let sandbox = root.join("sandbox");
- let caller_home = root.join("caller-home");
must(fs::create_dir_all(&state_home), "create state home")?;
- must(fs::create_dir_all(&sandbox), "create sandbox")?;
- must(fs::create_dir_all(&caller_home), "create caller home")?;
- let fake_claude = root.join("claude");
- let stdout_file = root.join("stdout.json");
- let args_file = root.join("args.txt");
- let pwd_file = root.join("pwd.txt");
- write_fake_claude_script(&fake_claude)?;
- must(
- fs::write(
- &stdout_file,
- serde_json::to_string(&json!({
- "type": "result",
- "subtype": "success",
- "is_error": false,
- "duration_ms": 4321,
- "duration_api_ms": 4200,
- "num_turns": 3,
- "result": "background oracle",
- "stop_reason": "end_turn",
- "session_id": "3fc69f58-7752-4d9d-a95d-19a217814b6a",
- "total_cost_usd": 0.25,
- "usage": {
- "input_tokens": 11,
- "output_tokens": 7
- },
- "modelUsage": {
- "claude-opus-4-6": {
- "inputTokens": 11,
- "outputTokens": 7
- }
- },
- "permission_denials": [],
- "fast_mode_state": "off",
- "uuid": "uuid-456"
- }))?,
- ),
- "write fake stdout",
- )?;
-
- let claude_bin = fake_claude.display().to_string();
- let stdout_path = stdout_file.display().to_string();
- let args_path = args_file.display().to_string();
- let pwd_path = pwd_file.display().to_string();
- let caller_home_path = caller_home.display().to_string();
- let env = [
- ("HOME", caller_home_path.as_str()),
- ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
- ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()),
- ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()),
- ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()),
- ("PHONE_OPUS_TEST_SLEEP_MS", "100"),
- ];
- let mut harness = McpHarness::spawn(&state_home, &env)?;
+ let mut harness = McpHarness::spawn(&state_home, &[])?;
let _ = harness.initialize()?;
harness.notify_initialized()?;
- let submit = harness.call_tool(
+ let consult_job = harness.call_tool(
3,
- "consult",
+ "consult_job",
json!({
- "prompt": "background oracle",
- "cwd": sandbox.display().to_string(),
- "background": true
+ "job_id": "00000000-0000-0000-0000-000000000000"
}),
)?;
- assert_tool_ok(&submit);
- assert_eq!(tool_content(&submit)["mode"].as_str(), Some("background"));
+ assert_tool_error(&consult_job);
assert!(
- tool_content(&submit)["follow_up_tools"]
+ consult_job["result"]["content"]
.as_array()
.into_iter()
.flatten()
- .any(|value| value == "consult_wait")
+ .filter_map(|entry| entry["text"].as_str())
+ .any(|text| text.contains("unknown tool `consult_job`"))
);
- let job_id = must_some(
- tool_content(&submit)["job_id"].as_str().map(str::to_owned),
- "background job id",
- )?;
- let _ = uuid::Uuid::parse_str(&job_id)
- .map_err(|error| io::Error::other(format!("job id uuid parse: {error}")))?;
- let timed_out = harness.call_tool(
+ let consult_wait = harness.call_tool(
4,
"consult_wait",
json!({
- "job_id": job_id,
- "timeout_ms": 0,
- "render": "json"
+ "job_id": "00000000-0000-0000-0000-000000000000"
}),
)?;
- assert_tool_ok(&timed_out);
- assert_eq!(tool_content(&timed_out)["timed_out"].as_bool(), Some(true));
- assert_eq!(tool_content(&timed_out)["done"].as_bool(), Some(false));
-
- let waited = harness.call_tool(
- 5,
- "consult_wait",
- json!({
- "job_id": job_id,
- "timeout_ms": 5_000,
- "poll_interval_ms": 10,
- "render": "json"
- }),
- )?;
- assert_tool_ok(&waited);
- assert_eq!(tool_content(&waited)["timed_out"].as_bool(), Some(false));
- assert_eq!(tool_content(&waited)["status"].as_str(), Some("succeeded"));
+ assert_tool_error(&consult_wait);
assert!(
- tool_content(&waited)["waited_ms"]
- .as_u64()
- .is_some_and(|value| value >= 50)
- );
- assert_eq!(
- tool_content(&waited)["result"]["response"].as_str(),
- Some("background oracle")
- );
- let persisted_output_path = must_some(
- tool_content(&waited)["result"]["persisted_output_path"]
- .as_str()
- .map(str::to_owned),
- "background persisted output path",
- )?;
- assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/"));
- assert!(persisted_output_path.contains("3fc69f58-7752-4d9d-a95d-19a217814b6a"));
- let persisted_output = must(
- fs::read_to_string(&persisted_output_path),
- "read background persisted consult output",
- )?;
- let persisted_output: Value = must(
- serde_json::from_str(&persisted_output),
- "parse background persisted consult output",
- )?;
- assert_eq!(
- persisted_output["response"].as_str(),
- Some("background oracle")
+ consult_wait["result"]["content"]
+ .as_array()
+ .into_iter()
+ .flatten()
+ .filter_map(|entry| entry["text"].as_str())
+ .any(|text| text.contains("unknown tool `consult_wait`"))
);
- let job = harness.call_tool(
- 6,
- "consult_job",
- json!({
- "job_id": job_id,
- "render": "json"
- }),
- )?;
- assert_tool_ok(&job);
- assert_eq!(tool_content(&job)["status"].as_str(), Some("succeeded"));
-
- let jobs = harness.call_tool(7, "consult_jobs", json!({ "render": "json" }))?;
- assert_tool_ok(&jobs);
+ let consult_jobs = harness.call_tool(5, "consult_jobs", json!({}))?;
+ assert_tool_error(&consult_jobs);
assert!(
- tool_content(&jobs)["jobs"]
+ consult_jobs["result"]["content"]
.as_array()
.into_iter()
.flatten()
- .any(|value| value["job_id"] == job_id)
+ .filter_map(|entry| entry["text"].as_str())
+ .any(|text| text.contains("unknown tool `consult_jobs`"))
);
-
- let args = must(fs::read_to_string(&args_file), "read fake args file")?;
- assert!(args.contains(PROMPT_PREFIX));
- assert!(args.contains("background oracle"));
- let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?;
- assert_eq!(pwd.trim(), sandbox.display().to_string());
Ok(())
}