swarm repositories / source
aboutsummaryrefslogtreecommitdiff
path: root/crates/phone-opus/tests/mcp_hardening.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/phone-opus/tests/mcp_hardening.rs')
-rw-r--r--crates/phone-opus/tests/mcp_hardening.rs121
1 files changed, 121 insertions, 0 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs
index e9ee06b..a1fb6ae 100644
--- a/crates/phone-opus/tests/mcp_hardening.rs
+++ b/crates/phone-opus/tests/mcp_hardening.rs
@@ -6,6 +6,7 @@ use std::io::{self, BufRead, BufReader, Write};
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
+use std::time::Duration;
use libmcp_testkit::read_json_lines;
use serde as _;
@@ -221,6 +222,8 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
let tools = harness.tools_list()?;
let tool_names = tool_names(&tools);
assert!(tool_names.contains(&"consult"));
+ assert!(tool_names.contains(&"consult_job"));
+ assert!(tool_names.contains(&"consult_jobs"));
assert!(tool_names.contains(&"health_snapshot"));
assert!(tool_names.contains(&"telemetry_snapshot"));
@@ -371,6 +374,124 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
}
#[test]
+fn consult_can_run_in_background_and_be_polled() -> TestResult {
+ let root = temp_root("consult_background")?;
+ let state_home = root.join("state-home");
+ let sandbox = root.join("sandbox");
+ must(fs::create_dir_all(&state_home), "create state home")?;
+ must(fs::create_dir_all(&sandbox), "create sandbox")?;
+
+ let fake_claude = root.join("claude");
+ let stdout_file = root.join("stdout.json");
+ let args_file = root.join("args.txt");
+ let pwd_file = root.join("pwd.txt");
+ write_fake_claude_script(&fake_claude)?;
+ must(
+ fs::write(
+ &stdout_file,
+ serde_json::to_string(&json!({
+ "type": "result",
+ "subtype": "success",
+ "is_error": false,
+ "duration_ms": 4321,
+ "duration_api_ms": 4200,
+ "num_turns": 3,
+ "result": "background oracle",
+ "stop_reason": "end_turn",
+ "session_id": "3fc69f58-7752-4d9d-a95d-19a217814b6a",
+ "total_cost_usd": 0.25,
+ "usage": {
+ "input_tokens": 11,
+ "output_tokens": 7
+ },
+ "modelUsage": {
+ "claude-opus-4-6": {
+ "inputTokens": 11,
+ "outputTokens": 7
+ }
+ },
+ "permission_denials": [],
+ "fast_mode_state": "off",
+ "uuid": "uuid-456"
+ }))?,
+ ),
+ "write fake stdout",
+ )?;
+
+ let claude_bin = fake_claude.display().to_string();
+ let stdout_path = stdout_file.display().to_string();
+ let args_path = args_file.display().to_string();
+ let pwd_path = pwd_file.display().to_string();
+ let env = [
+ ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
+ ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()),
+ ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()),
+ ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()),
+ ];
+ let mut harness = McpHarness::spawn(&state_home, &env)?;
+ let _ = harness.initialize()?;
+ harness.notify_initialized()?;
+
+ let submit = harness.call_tool(
+ 3,
+ "consult",
+ json!({
+ "prompt": "background oracle",
+ "cwd": sandbox.display().to_string(),
+ "background": true
+ }),
+ )?;
+ assert_tool_ok(&submit);
+ assert_eq!(tool_content(&submit)["mode"].as_str(), Some("background"));
+ let job_id = must_some(
+ tool_content(&submit)["job_id"].as_str().map(str::to_owned),
+ "background job id",
+ )?;
+ let _ = uuid::Uuid::parse_str(&job_id)
+ .map_err(|error| io::Error::other(format!("job id uuid parse: {error}")))?;
+
+ let mut job = Value::Null;
+ for _ in 0..100 {
+ job = harness.call_tool(
+ 4,
+ "consult_job",
+ json!({
+ "job_id": job_id,
+ "render": "json"
+ }),
+ )?;
+ assert_tool_ok(&job);
+ if tool_content(&job)["status"].as_str() == Some("succeeded") {
+ break;
+ }
+ std::thread::sleep(Duration::from_millis(10));
+ }
+
+ assert_eq!(tool_content(&job)["status"].as_str(), Some("succeeded"));
+ assert_eq!(
+ tool_content(&job)["result"]["response"].as_str(),
+ Some("background oracle")
+ );
+
+ let jobs = harness.call_tool(5, "consult_jobs", json!({ "render": "json" }))?;
+ assert_tool_ok(&jobs);
+ assert!(
+ tool_content(&jobs)["jobs"]
+ .as_array()
+ .into_iter()
+ .flatten()
+ .any(|value| value["job_id"] == job_id)
+ );
+
+ let args = must(fs::read_to_string(&args_file), "read fake args file")?;
+ assert!(args.contains(PROMPT_PREFIX));
+ assert!(args.contains("background oracle"));
+ let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?;
+ assert_eq!(pwd.trim(), sandbox.display().to_string());
+ Ok(())
+}
+
+#[test]
fn consult_rejects_invalid_session_handles() -> TestResult {
let root = temp_root("consult_invalid_session")?;
let state_home = root.join("state-home");