diff options
| author | main <main@swarm.moe> | 2026-03-25 12:49:33 -0400 |
|---|---|---|
| committer | main <main@swarm.moe> | 2026-03-25 12:49:33 -0400 |
| commit | 7f28cb7356092934a25ab5fd277dce38b91eb8c0 (patch) | |
| tree | 054bcf33708f253e5aaa8668105dc066cc7aa73c /crates/phone-opus/tests/mcp_hardening.rs | |
| parent | da6410fd33148e7dd0fec9190a1624e34f745b96 (diff) | |
| download | phone_opus-7f28cb7356092934a25ab5fd277dce38b91eb8c0.zip | |
Fail fast on inert Claude consults
Diffstat (limited to 'crates/phone-opus/tests/mcp_hardening.rs')
| -rw-r--r-- | crates/phone-opus/tests/mcp_hardening.rs | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs index ed996db..754ee79 100644 --- a/crates/phone-opus/tests/mcp_hardening.rs +++ b/crates/phone-opus/tests/mcp_hardening.rs @@ -6,6 +6,7 @@ use std::io::{self, BufRead, BufReader, Write}; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; +use std::time::{Duration, Instant}; use libmcp_testkit::read_json_lines; use serde as _; @@ -196,6 +197,56 @@ fi if [ -n "${PHONE_OPUS_TEST_ARGS_FILE:-}" ]; then printf '%s\n' "$@" >"$PHONE_OPUS_TEST_ARGS_FILE" fi +if [ -n "${PHONE_OPUS_TEST_TRANSCRIPT_HEARTBEAT_MS:-}" ]; then + session_id="" + previous="" + for argument in "$@"; do + if [ "$previous" = "--session-id" ]; then + session_id="$argument" + break + fi + previous="$argument" + done + if [ -n "$session_id" ]; then + SESSION_ID="$session_id" python3 - <<'PY' +import os +import time +from pathlib import Path + +session_id = os.environ["SESSION_ID"] +cwd = Path.cwd() +slug = [] +last_dash = False +for ch in str(cwd): + nxt = ch.lower() if ch.isalnum() else "-" + if nxt == "-": + if not slug: + slug.append("-") + last_dash = True + continue + if last_dash: + continue + last_dash = True + else: + last_dash = False + slug.append(nxt) +transcript = ( + Path(os.environ["HOME"]) + / ".claude" + / "projects" + / "".join(slug) + / f"{session_id}.jsonl" +) +transcript.parent.mkdir(parents=True, exist_ok=True) +interval = int(os.environ["PHONE_OPUS_TEST_TRANSCRIPT_HEARTBEAT_MS"]) / 1000.0 +count = int(os.environ.get("PHONE_OPUS_TEST_TRANSCRIPT_HEARTBEAT_COUNT", "1")) +for index in range(count): + with transcript.open("a", encoding="utf-8") as handle: + handle.write(f'{{"kind":"heartbeat","index":{index}}}\n') + time.sleep(interval) +PY + fi +fi if [ -n "${PHONE_OPUS_TEST_SLEEP_MS:-}" ]; then python3 -c 'import os,time; time.sleep(int(os.environ["PHONE_OPUS_TEST_SLEEP_MS"]) / 1000.0)' fi @@ -665,6 +716,93 @@ fn consult_is_one_shot_and_hides_session_state() -> TestResult { } #[test] +fn silent_claude_is_failed_fast_when_progress_stalls() -> TestResult { + let root = temp_root("consult_stall_timeout")?; + let state_home = root.join("state-home"); + let caller_home = root.join("caller-home"); + let fake_claude = root.join("claude"); + must(fs::create_dir_all(&state_home), "create state home")?; + must(fs::create_dir_all(&caller_home), "create caller home")?; + seed_caller_claude_home(&caller_home)?; + write_fake_claude_script(&fake_claude)?; + + let claude_bin = fake_claude.display().to_string(); + let caller_home_path = caller_home.display().to_string(); + let env = [ + ("HOME", caller_home_path.as_str()), + ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), + ("PHONE_OPUS_TEST_SLEEP_MS", "5000"), + ("PHONE_OPUS_CLAUDE_STALL_TIMEOUT_MS", "750"), + ]; + let mut harness = McpHarness::spawn(&state_home, &env)?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + let started = Instant::now(); + let stalled = harness.call_tool(3, "consult", json!({ "prompt": "stall" }))?; + assert_tool_error(&stalled); + assert!(started.elapsed() < Duration::from_secs(4)); + assert_eq!( + tool_content(&stalled)["fault"]["class"].as_str(), + Some("downstream") + ); + assert!( + tool_content(&stalled)["fault"]["detail"] + .as_str() + .is_some_and(|value| value.contains("no observable progress")) + ); + assert_eq!( + tool_content(&stalled)["context"]["consult"]["retry_hint"].as_str(), + Some("Claude stopped making observable progress; retry the consult") + ); + Ok(()) +} + +#[test] +fn transcript_progress_prevents_false_stall_timeout() -> TestResult { + let root = temp_root("consult_transcript_progress")?; + let state_home = root.join("state-home"); + let caller_home = root.join("caller-home"); + let fake_claude = root.join("claude"); + let stdout_file = root.join("stdout.json"); + must(fs::create_dir_all(&state_home), "create state home")?; + must(fs::create_dir_all(&caller_home), "create caller home")?; + seed_caller_claude_home(&caller_home)?; + write_fake_claude_script(&fake_claude)?; + write_fake_claude_json_success( + &stdout_file, + "heartbeat oracle", + "1bfb2c8a-c6d8-42f6-8f18-6b3c70ad2e11", + "uuid-heartbeat", + )?; + + let claude_bin = fake_claude.display().to_string(); + let stdout_path = stdout_file.display().to_string(); + let caller_home_path = caller_home.display().to_string(); + let env = [ + ("HOME", caller_home_path.as_str()), + ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), + ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), + ("PHONE_OPUS_TEST_TRANSCRIPT_HEARTBEAT_MS", "200"), + ("PHONE_OPUS_TEST_TRANSCRIPT_HEARTBEAT_COUNT", "5"), + ("PHONE_OPUS_CLAUDE_STALL_TIMEOUT_MS", "500"), + ]; + let mut harness = McpHarness::spawn(&state_home, &env)?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + let started = Instant::now(); + let consult = harness.call_tool(3, "consult", json!({ "prompt": "heartbeat" }))?; + assert_tool_ok(&consult); + assert!(started.elapsed() >= Duration::from_millis(800)); + assert_eq!( + tool_content(&consult)["response"].as_str(), + Some("heartbeat oracle") + ); + Ok(()) +} + +#[test] fn background_surfaces_are_hidden_from_public_mcp() -> TestResult { let root = temp_root("consult_hidden_background")?; let state_home = root.join("state-home"); |