diff options
Diffstat (limited to 'crates/phone-opus/tests')
| -rw-r--r-- | crates/phone-opus/tests/mcp_hardening.rs | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs index e9ee06b..a1fb6ae 100644 --- a/crates/phone-opus/tests/mcp_hardening.rs +++ b/crates/phone-opus/tests/mcp_hardening.rs @@ -6,6 +6,7 @@ use std::io::{self, BufRead, BufReader, Write}; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; +use std::time::Duration; use libmcp_testkit::read_json_lines; use serde as _; @@ -221,6 +222,8 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult { let tools = harness.tools_list()?; let tool_names = tool_names(&tools); assert!(tool_names.contains(&"consult")); + assert!(tool_names.contains(&"consult_job")); + assert!(tool_names.contains(&"consult_jobs")); assert!(tool_names.contains(&"health_snapshot")); assert!(tool_names.contains(&"telemetry_snapshot")); @@ -371,6 +374,124 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki } #[test] +fn consult_can_run_in_background_and_be_polled() -> TestResult { + let root = temp_root("consult_background")?; + let state_home = root.join("state-home"); + let sandbox = root.join("sandbox"); + must(fs::create_dir_all(&state_home), "create state home")?; + must(fs::create_dir_all(&sandbox), "create sandbox")?; + + let fake_claude = root.join("claude"); + let stdout_file = root.join("stdout.json"); + let args_file = root.join("args.txt"); + let pwd_file = root.join("pwd.txt"); + write_fake_claude_script(&fake_claude)?; + must( + fs::write( + &stdout_file, + serde_json::to_string(&json!({ + "type": "result", + "subtype": "success", + "is_error": false, + "duration_ms": 4321, + "duration_api_ms": 4200, + "num_turns": 3, + "result": "background oracle", + "stop_reason": "end_turn", + "session_id": "3fc69f58-7752-4d9d-a95d-19a217814b6a", + "total_cost_usd": 0.25, + "usage": { + "input_tokens": 11, + "output_tokens": 7 + }, + "modelUsage": { + "claude-opus-4-6": { + "inputTokens": 11, + "outputTokens": 7 + } + }, + "permission_denials": [], + "fast_mode_state": "off", + "uuid": "uuid-456" + }))?, + ), + "write fake stdout", + )?; + + let claude_bin = fake_claude.display().to_string(); + let stdout_path = stdout_file.display().to_string(); + let args_path = args_file.display().to_string(); + let pwd_path = pwd_file.display().to_string(); + let env = [ + ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), + ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), + ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()), + ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()), + ]; + let mut harness = McpHarness::spawn(&state_home, &env)?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + let submit = harness.call_tool( + 3, + "consult", + json!({ + "prompt": "background oracle", + "cwd": sandbox.display().to_string(), + "background": true + }), + )?; + assert_tool_ok(&submit); + assert_eq!(tool_content(&submit)["mode"].as_str(), Some("background")); + let job_id = must_some( + tool_content(&submit)["job_id"].as_str().map(str::to_owned), + "background job id", + )?; + let _ = uuid::Uuid::parse_str(&job_id) + .map_err(|error| io::Error::other(format!("job id uuid parse: {error}")))?; + + let mut job = Value::Null; + for _ in 0..100 { + job = harness.call_tool( + 4, + "consult_job", + json!({ + "job_id": job_id, + "render": "json" + }), + )?; + assert_tool_ok(&job); + if tool_content(&job)["status"].as_str() == Some("succeeded") { + break; + } + std::thread::sleep(Duration::from_millis(10)); + } + + assert_eq!(tool_content(&job)["status"].as_str(), Some("succeeded")); + assert_eq!( + tool_content(&job)["result"]["response"].as_str(), + Some("background oracle") + ); + + let jobs = harness.call_tool(5, "consult_jobs", json!({ "render": "json" }))?; + assert_tool_ok(&jobs); + assert!( + tool_content(&jobs)["jobs"] + .as_array() + .into_iter() + .flatten() + .any(|value| value["job_id"] == job_id) + ); + + let args = must(fs::read_to_string(&args_file), "read fake args file")?; + assert!(args.contains(PROMPT_PREFIX)); + assert!(args.contains("background oracle")); + let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?; + assert_eq!(pwd.trim(), sandbox.display().to_string()); + Ok(()) +} + +#[test] fn consult_rejects_invalid_session_handles() -> TestResult { let root = temp_root("consult_invalid_session")?; let state_home = root.join("state-home"); |