diff options
Diffstat (limited to 'crates/phone-opus/tests')
| -rw-r--r-- | crates/phone-opus/tests/mcp_hardening.rs | 79 |
1 files changed, 57 insertions, 22 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs index f4a8a89..cbe3354 100644 --- a/crates/phone-opus/tests/mcp_hardening.rs +++ b/crates/phone-opus/tests/mcp_hardening.rs @@ -6,7 +6,6 @@ use std::io::{self, BufRead, BufReader, Write}; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; -use std::time::Duration; use libmcp_testkit::read_json_lines; use serde as _; @@ -197,6 +196,9 @@ fi if [ -n "${PHONE_OPUS_TEST_ARGS_FILE:-}" ]; then printf '%s\n' "$@" >"$PHONE_OPUS_TEST_ARGS_FILE" fi +if [ -n "${PHONE_OPUS_TEST_SLEEP_MS:-}" ]; then + python3 -c 'import os,time; time.sleep(int(os.environ["PHONE_OPUS_TEST_SLEEP_MS"]) / 1000.0)' +fi if [ -n "${PHONE_OPUS_TEST_CWD_WRITE_PROBE_FILE:-}" ]; then probe_target="${PWD}/.phone_opus_write_probe" probe_error="${PHONE_OPUS_TEST_CWD_WRITE_ERROR_FILE:-/tmp/phone-opus-write.err}" @@ -287,6 +289,7 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult { let tool_names = tool_names(&tools); assert!(tool_names.contains(&"consult")); assert!(tool_names.contains(&"consult_job")); + assert!(tool_names.contains(&"consult_wait")); assert!(tool_names.contains(&"consult_jobs")); assert!(tool_names.contains(&"health_snapshot")); assert!(tool_names.contains(&"telemetry_snapshot")); @@ -536,7 +539,7 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki } #[test] -fn consult_can_run_in_background_and_be_polled() -> TestResult { +fn consult_can_run_in_background_and_be_waited_on_or_polled() -> TestResult { let root = temp_root("consult_background")?; let state_home = root.join("state-home"); let sandbox = root.join("sandbox"); @@ -593,6 +596,7 @@ fn consult_can_run_in_background_and_be_polled() -> TestResult { ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()), ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()), + ("PHONE_OPUS_TEST_SLEEP_MS", "100"), ]; let mut harness = McpHarness::spawn(&state_home, &env)?; let _ = harness.initialize()?; @@ -609,6 +613,13 @@ fn consult_can_run_in_background_and_be_polled() -> TestResult { )?; assert_tool_ok(&submit); assert_eq!(tool_content(&submit)["mode"].as_str(), Some("background")); + assert!( + tool_content(&submit)["follow_up_tools"] + .as_array() + .into_iter() + .flatten() + .any(|value| value == "consult_wait") + ); let job_id = must_some( tool_content(&submit)["job_id"].as_str().map(str::to_owned), "background job id", @@ -616,30 +627,43 @@ fn consult_can_run_in_background_and_be_polled() -> TestResult { let _ = uuid::Uuid::parse_str(&job_id) .map_err(|error| io::Error::other(format!("job id uuid parse: {error}")))?; - let mut job = Value::Null; - for _ in 0..100 { - job = harness.call_tool( - 4, - "consult_job", - json!({ - "job_id": job_id, - "render": "json" - }), - )?; - assert_tool_ok(&job); - if tool_content(&job)["status"].as_str() == Some("succeeded") { - break; - } - std::thread::sleep(Duration::from_millis(10)); - } + let timed_out = harness.call_tool( + 4, + "consult_wait", + json!({ + "job_id": job_id, + "timeout_ms": 0, + "render": "json" + }), + )?; + assert_tool_ok(&timed_out); + assert_eq!(tool_content(&timed_out)["timed_out"].as_bool(), Some(true)); + assert_eq!(tool_content(&timed_out)["done"].as_bool(), Some(false)); - assert_eq!(tool_content(&job)["status"].as_str(), Some("succeeded")); + let waited = harness.call_tool( + 5, + "consult_wait", + json!({ + "job_id": job_id, + "timeout_ms": 5_000, + "poll_interval_ms": 10, + "render": "json" + }), + )?; + assert_tool_ok(&waited); + assert_eq!(tool_content(&waited)["timed_out"].as_bool(), Some(false)); + assert_eq!(tool_content(&waited)["status"].as_str(), Some("succeeded")); + assert!( + tool_content(&waited)["waited_ms"] + .as_u64() + .is_some_and(|value| value >= 50) + ); assert_eq!( - tool_content(&job)["result"]["response"].as_str(), + tool_content(&waited)["result"]["response"].as_str(), Some("background oracle") ); let persisted_output_path = must_some( - tool_content(&job)["result"]["persisted_output_path"] + tool_content(&waited)["result"]["persisted_output_path"] .as_str() .map(str::to_owned), "background persisted output path", @@ -659,7 +683,18 @@ fn consult_can_run_in_background_and_be_polled() -> TestResult { Some("background oracle") ); - let jobs = harness.call_tool(5, "consult_jobs", json!({ "render": "json" }))?; + let job = harness.call_tool( + 6, + "consult_job", + json!({ + "job_id": job_id, + "render": "json" + }), + )?; + assert_tool_ok(&job); + assert_eq!(tool_content(&job)["status"].as_str(), Some("succeeded")); + + let jobs = harness.call_tool(7, "consult_jobs", json!({ "render": "json" }))?; assert_tool_ok(&jobs); assert!( tool_content(&jobs)["jobs"] |