use clap as _; use dirs as _; use libmcp as _; use std::fs; use std::io::{self, BufRead, BufReader, Write}; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; use std::time::{Duration, Instant}; use libmcp_testkit::read_json_lines; use serde as _; use serde_json::{Value, json}; use thiserror as _; use time as _; use users as _; use uuid as _; use phone_opus_test_support::PROMPT_PREFIX; mod phone_opus_test_support { pub(super) const PROMPT_PREFIX: &str = "You are being invoked in a read-only consultation mode by another model."; } type TestResult = Result>; fn must( result: Result, context: C, ) -> TestResult { result.map_err(|error| io::Error::other(format!("{context}: {error}")).into()) } fn must_some(value: Option, context: &str) -> TestResult { value.ok_or_else(|| io::Error::other(context).into()) } fn temp_root(name: &str) -> TestResult { let root = std::env::temp_dir().join(format!( "phone_opus_{name}_{}_{}", std::process::id(), must( std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH), "current time after unix epoch", )? .as_nanos() )); must(fs::create_dir_all(&root), "create temp root")?; Ok(root) } fn binary_path() -> PathBuf { PathBuf::from(env!("CARGO_BIN_EXE_phone-opus")) } struct McpHarness { child: Child, stdin: ChildStdin, stdout: BufReader, } impl McpHarness { fn spawn(state_home: &Path, extra_env: &[(&str, &str)]) -> TestResult { let mut command = Command::new(binary_path()); let _ = command .arg("mcp") .arg("serve") .env("XDG_STATE_HOME", state_home) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::inherit()); for (key, value) in extra_env { let _ = command.env(key, value); } let mut child = must(command.spawn(), "spawn mcp host")?; let stdin = must_some(child.stdin.take(), "host stdin")?; let stdout = BufReader::new(must_some(child.stdout.take(), "host stdout")?); Ok(Self { child, stdin, stdout, }) } fn initialize(&mut self) -> TestResult { self.request(json!({ "jsonrpc": "2.0", "id": 1, "method": "initialize", "params": { "protocolVersion": "2025-11-25", "capabilities": {}, "clientInfo": { "name": "mcp-hardening-test", "version": "0" } } })) } fn notify_initialized(&mut self) -> TestResult { self.notify(json!({ "jsonrpc": "2.0", "method": "notifications/initialized", })) } fn tools_list(&mut self) -> TestResult { self.request(json!({ "jsonrpc": "2.0", "id": 2, "method": "tools/list", "params": {}, })) } fn call_tool(&mut self, id: u64, name: &str, arguments: Value) -> TestResult { self.request(json!({ "jsonrpc": "2.0", "id": id, "method": "tools/call", "params": { "name": name, "arguments": arguments, } })) } fn request(&mut self, message: Value) -> TestResult { let encoded = must(serde_json::to_string(&message), "request json")?; must(writeln!(self.stdin, "{encoded}"), "write request")?; must(self.stdin.flush(), "flush request")?; let mut line = String::new(); let byte_count = must(self.stdout.read_line(&mut line), "read response")?; if byte_count == 0 { return Err(io::Error::other("unexpected EOF reading response").into()); } must(serde_json::from_str(&line), "response json") } fn notify(&mut self, message: Value) -> TestResult { let encoded = must(serde_json::to_string(&message), "notify json")?; must(writeln!(self.stdin, "{encoded}"), "write notify")?; must(self.stdin.flush(), "flush notify")?; Ok(()) } } impl Drop for McpHarness { fn drop(&mut self) { let _ = self.child.kill(); let _ = self.child.wait(); } } fn assert_tool_ok(response: &Value) { assert_eq!( response["result"]["isError"].as_bool(), Some(false), "tool response unexpectedly errored: {response:#}" ); } fn assert_tool_error(response: &Value) { assert_eq!( response["result"]["isError"].as_bool(), Some(true), "tool response unexpectedly succeeded: {response:#}" ); } fn tool_content(response: &Value) -> &Value { &response["result"]["structuredContent"] } fn tool_names(response: &Value) -> Vec<&str> { response["result"]["tools"] .as_array() .into_iter() .flatten() .filter_map(|tool| tool["name"].as_str()) .collect() } fn write_fake_claude_script(path: &Path) -> TestResult { let script = r#"#!/bin/sh set -eu if [ -n "${PHONE_OPUS_TEST_PWD_FILE:-}" ]; then pwd >"$PHONE_OPUS_TEST_PWD_FILE" fi if [ -n "${PHONE_OPUS_TEST_ENV_FILE:-}" ]; then { printf 'HOME=%s\n' "${HOME:-}" printf 'XDG_CONFIG_HOME=%s\n' "${XDG_CONFIG_HOME:-}" printf 'XDG_CACHE_HOME=%s\n' "${XDG_CACHE_HOME:-}" printf 'XDG_STATE_HOME=%s\n' "${XDG_STATE_HOME:-}" } >"$PHONE_OPUS_TEST_ENV_FILE" fi if [ -n "${PHONE_OPUS_TEST_ARGS_FILE:-}" ]; then printf '%s\n' "$@" >"$PHONE_OPUS_TEST_ARGS_FILE" fi if [ -n "${PHONE_OPUS_TEST_TRANSCRIPT_HEARTBEAT_MS:-}" ]; then session_id="" previous="" for argument in "$@"; do if [ "$previous" = "--session-id" ]; then session_id="$argument" break fi previous="$argument" done if [ -n "$session_id" ]; then SESSION_ID="$session_id" python3 - <<'PY' import os import time from pathlib import Path session_id = os.environ["SESSION_ID"] cwd = Path.cwd() slug = [] last_dash = False for ch in str(cwd): nxt = ch.lower() if ch.isalnum() else "-" if nxt == "-": if not slug: slug.append("-") last_dash = True continue if last_dash: continue last_dash = True else: last_dash = False slug.append(nxt) transcript = ( Path(os.environ["HOME"]) / ".claude" / "projects" / "".join(slug) / f"{session_id}.jsonl" ) transcript.parent.mkdir(parents=True, exist_ok=True) interval = int(os.environ["PHONE_OPUS_TEST_TRANSCRIPT_HEARTBEAT_MS"]) / 1000.0 count = int(os.environ.get("PHONE_OPUS_TEST_TRANSCRIPT_HEARTBEAT_COUNT", "1")) for index in range(count): with transcript.open("a", encoding="utf-8") as handle: handle.write(f'{{"kind":"heartbeat","index":{index}}}\n') time.sleep(interval) PY fi fi if [ -n "${PHONE_OPUS_TEST_SLEEP_MS:-}" ]; then python3 -c 'import os,time; time.sleep(int(os.environ["PHONE_OPUS_TEST_SLEEP_MS"]) / 1000.0)' fi if [ -n "${PHONE_OPUS_TEST_CWD_WRITE_PROBE_FILE:-}" ]; then probe_target="${PWD}/.phone_opus_write_probe" probe_error="${PHONE_OPUS_TEST_CWD_WRITE_ERROR_FILE:-/tmp/phone-opus-write.err}" if printf probe >"$probe_target" 2>"$probe_error"; then printf 'write_succeeded\n' >"$PHONE_OPUS_TEST_CWD_WRITE_PROBE_FILE" rm -f "$probe_target" else printf 'write_failed\n' >"$PHONE_OPUS_TEST_CWD_WRITE_PROBE_FILE" fi fi if [ -n "${PHONE_OPUS_TEST_CREDENTIAL_WRITE_PROBE_FILE:-}" ]; then credentials_target="${HOME}/.claude/.credentials.json" credentials_error="${PHONE_OPUS_TEST_CREDENTIAL_WRITE_ERROR_FILE:-/tmp/phone-opus-credentials.err}" if : >>"$credentials_target" 2>"$credentials_error"; then printf 'write_succeeded\n' >"$PHONE_OPUS_TEST_CREDENTIAL_WRITE_PROBE_FILE" else printf 'write_failed\n' >"$PHONE_OPUS_TEST_CREDENTIAL_WRITE_PROBE_FILE" fi fi if [ -n "${PHONE_OPUS_TEST_STDERR:-}" ]; then printf '%s\n' "$PHONE_OPUS_TEST_STDERR" >&2 fi if [ -n "${PHONE_OPUS_TEST_STDOUT_FILE:-}" ]; then cat "$PHONE_OPUS_TEST_STDOUT_FILE" fi exit "${PHONE_OPUS_TEST_EXIT_CODE:-0}" "#; must(fs::write(path, script), "write fake claude script")?; let mut permissions = must(fs::metadata(path), "fake claude metadata")?.permissions(); permissions.set_mode(0o755); must( fs::set_permissions(path, permissions), "chmod fake claude script", )?; Ok(()) } fn seed_caller_claude_home(home: &Path) -> TestResult { let claude_root = home.join(".claude"); must( fs::create_dir_all(claude_root.join(".claude")), "create caller .claude tree", )?; must( fs::write( claude_root.join(".credentials.json"), "{\n \"auth\": \"token\"\n}\n", ), "write caller credentials", )?; must( fs::write( claude_root.join("settings.json"), "{\n \"theme\": \"default\"\n}\n", ), "write caller settings", )?; must( fs::write( claude_root.join("settings.local.json"), "{\n \"profile\": \"local\"\n}\n", ), "write caller local settings", )?; must( fs::write( claude_root.join(".claude").join("settings.local.json"), "{\n \"sandbox\": \"read-only\"\n}\n", ), "write nested caller local settings", )?; must( fs::write( claude_root.join("CLAUDE.md"), "Global Claude instructions for phone_opus tests.\n", ), "write caller CLAUDE.md", )?; Ok(()) } fn write_fake_claude_json_success( path: &Path, result: &str, session_id: &str, uuid: &str, ) -> TestResult { must( fs::write( path, format!( "{}\n", serde_json::to_string(&json!({ "type": "result", "subtype": "success", "is_error": false, "duration_ms": 1234, "duration_api_ms": 1200, "num_turns": 2, "result": result, "stop_reason": "end_turn", "session_id": session_id, "total_cost_usd": 0.125, "usage": { "input_tokens": 10, "output_tokens": 5 }, "modelUsage": { "claude-opus-4-6": { "inputTokens": 10, "outputTokens": 5 } }, "permission_denials": [], "fast_mode_state": "off", "uuid": uuid }))? ), ), "write fake stdout", ) } #[test] fn cold_start_exposes_consult_and_ops_tools() -> TestResult { let root = temp_root("cold_start")?; let state_home = root.join("state-home"); must(fs::create_dir_all(&state_home), "create state home")?; let mut harness = McpHarness::spawn(&state_home, &[])?; let initialize = harness.initialize()?; assert_eq!( initialize["result"]["protocolVersion"].as_str(), Some("2025-11-25") ); harness.notify_initialized()?; let tools = harness.tools_list()?; let tool_names = tool_names(&tools); assert!(tool_names.contains(&"consult")); assert!(!tool_names.contains(&"consult_job")); assert!(!tool_names.contains(&"consult_wait")); assert!(!tool_names.contains(&"consult_jobs")); assert!(tool_names.contains(&"health_snapshot")); assert!(tool_names.contains(&"telemetry_snapshot")); let consult_tool = must_some( tools["result"]["tools"] .as_array() .into_iter() .flatten() .find(|tool| tool["name"] == "consult"), "consult tool definition", )?; assert!( consult_tool["inputSchema"]["properties"]["background"].is_null(), "consult schema should not advertise background: {consult_tool:#}" ); assert!( consult_tool["inputSchema"]["properties"]["session_id"].is_null(), "consult schema should not advertise session_id: {consult_tool:#}" ); assert_eq!( consult_tool["inputSchema"]["properties"]["fresh_context"], Value::Null ); let health = harness.call_tool(3, "health_snapshot", json!({}))?; assert_tool_ok(&health); assert_eq!(tool_content(&health)["worker_generation"].as_u64(), Some(1)); Ok(()) } #[test] fn consult_is_one_shot_and_hides_session_state() -> TestResult { let root = temp_root("consult_success")?; let state_home = root.join("state-home"); let sandbox = root.join("sandbox"); let caller_home = root.join("caller-home"); must(fs::create_dir_all(&state_home), "create state home")?; must(fs::create_dir_all(&sandbox), "create sandbox")?; must(fs::create_dir_all(&caller_home), "create caller home")?; seed_caller_claude_home(&caller_home)?; let fake_claude = root.join("claude"); let stdout_file = root.join("stdout.json"); let args_file = root.join("args.txt"); let pwd_file = root.join("pwd.txt"); let env_file = root.join("env.txt"); let cwd_probe_file = root.join("cwd-write-probe.txt"); let cwd_probe_error_file = root.join("cwd-write-probe.err"); let credential_probe_file = root.join("credential-write-probe.txt"); let credential_probe_error_file = root.join("credential-write-probe.err"); let first_observed_session = "81f218eb-568b-409b-871b-f6e86d8f666f"; let second_observed_session = "dbd3b6c2-4757-4b45-a8f0-f3d877e1a13f"; write_fake_claude_script(&fake_claude)?; write_fake_claude_json_success(&stdout_file, "oracle", first_observed_session, "uuid-123")?; let claude_bin = fake_claude.display().to_string(); let stdout_path = stdout_file.display().to_string(); let args_path = args_file.display().to_string(); let pwd_path = pwd_file.display().to_string(); let env_path = env_file.display().to_string(); let cwd_probe_path = cwd_probe_file.display().to_string(); let cwd_probe_error_path = cwd_probe_error_file.display().to_string(); let credential_probe_path = credential_probe_file.display().to_string(); let credential_probe_error_path = credential_probe_error_file.display().to_string(); let caller_home_path = caller_home.display().to_string(); let env = [ ("HOME", caller_home_path.as_str()), ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()), ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()), ("PHONE_OPUS_TEST_ENV_FILE", env_path.as_str()), ( "PHONE_OPUS_TEST_CWD_WRITE_PROBE_FILE", cwd_probe_path.as_str(), ), ( "PHONE_OPUS_TEST_CWD_WRITE_ERROR_FILE", cwd_probe_error_path.as_str(), ), ( "PHONE_OPUS_TEST_CREDENTIAL_WRITE_PROBE_FILE", credential_probe_path.as_str(), ), ( "PHONE_OPUS_TEST_CREDENTIAL_WRITE_ERROR_FILE", credential_probe_error_path.as_str(), ), ]; let mut harness = McpHarness::spawn(&state_home, &env)?; let _ = harness.initialize()?; harness.notify_initialized()?; let consult = harness.call_tool( 3, "consult", json!({ "prompt": "say oracle", "cwd": sandbox.display().to_string(), "session_id": "not-a-uuid", "background": true }), )?; assert_tool_ok(&consult); assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle")); assert!(tool_content(&consult)["context_mode"].is_null()); assert!(tool_content(&consult)["planned_session_id"].is_null()); assert!(tool_content(&consult)["reused_session_id"].is_null()); assert!(tool_content(&consult)["observed_session_id"].is_null()); assert!(tool_content(&consult)["session_id"].is_null()); let first_args = must(fs::read_to_string(&args_file), "read first fake args file")?; let first_lines = first_args.lines().collect::>(); assert!(first_lines.contains(&"--session-id")); assert!(!first_args.contains("--resume")); assert!(!first_args.contains("not-a-uuid")); let first_session_id = must_some( first_lines .windows(2) .find_map(|window| (window[0] == "--session-id").then_some(window[1].to_owned())), "first one-shot session id", )?; assert!(uuid::Uuid::parse_str(&first_session_id).is_ok()); write_fake_claude_json_success( &stdout_file, "oracle again", second_observed_session, "uuid-124", )?; let repeated = harness.call_tool( 4, "consult", json!({ "prompt": "say oracle again", "cwd": sandbox.display().to_string() }), )?; assert_tool_ok(&repeated); assert_eq!( tool_content(&repeated)["response"].as_str(), Some("oracle again") ); assert!(tool_content(&repeated)["context_mode"].is_null()); assert!(tool_content(&repeated)["planned_session_id"].is_null()); assert!(tool_content(&repeated)["reused_session_id"].is_null()); assert!(tool_content(&repeated)["observed_session_id"].is_null()); assert!(tool_content(&repeated)["session_id"].is_null()); let repeated_args = must( fs::read_to_string(&args_file), "read repeated fake args file", )?; let repeated_lines = repeated_args.lines().collect::>(); assert!(repeated_lines.contains(&"--session-id")); assert!(!repeated_args.contains("--resume")); let repeated_session_id = must_some( repeated_lines .windows(2) .find_map(|window| (window[0] == "--session-id").then_some(window[1].to_owned())), "repeated one-shot session id", )?; assert!(uuid::Uuid::parse_str(&repeated_session_id).is_ok()); assert_ne!(repeated_session_id, first_session_id); let persisted_output_path = must_some( tool_content(&repeated)["persisted_output_path"] .as_str() .map(str::to_owned), "persisted output path", )?; assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/")); assert!(!persisted_output_path.contains(first_observed_session)); assert!(!persisted_output_path.contains(second_observed_session)); let persisted_output = must( fs::read_to_string(&persisted_output_path), "read persisted consult output", )?; let persisted_output: Value = must( serde_json::from_str(&persisted_output), "parse persisted consult output", )?; assert_eq!(persisted_output["response"].as_str(), Some("oracle again")); assert!(persisted_output["context_mode"].is_null()); assert!(persisted_output["planned_session_id"].is_null()); assert!(persisted_output["reused_session_id"].is_null()); assert!(persisted_output["session_id"].is_null()); assert!(persisted_output["observed_session_id"].is_null()); let consult_context_index = must( fs::read_to_string( state_home .join("phone_opus") .join("mcp") .join("consult_contexts.json"), ), "read consult context index", )?; let consult_context_index: Value = must( serde_json::from_str(&consult_context_index), "parse consult context index", )?; assert_eq!( consult_context_index["by_cwd"][sandbox.display().to_string()]["session_id"].as_str(), Some(second_observed_session) ); assert_eq!( consult_context_index["by_cwd"][sandbox.display().to_string()]["state"].as_str(), Some("confirmed") ); let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?; assert_eq!(pwd.trim(), sandbox.display().to_string()); let args = must(fs::read_to_string(&args_file), "read fake args file")?; let lines = args.lines().collect::>(); assert!(lines.contains(&"-p")); assert!(lines.contains(&"--output-format")); assert!(lines.contains(&"json")); assert!(lines.contains(&"--strict-mcp-config")); assert!(lines.contains(&"--mcp-config")); assert!(lines.contains(&"{\"mcpServers\":{}}")); assert!(lines.contains(&"--disable-slash-commands")); assert!(lines.contains(&"--no-chrome")); assert!(lines.contains(&"--model")); assert!(lines.contains(&"claude-opus-4-6")); assert!(lines.contains(&"--effort")); assert!(lines.contains(&"max")); assert!(lines.contains(&"--tools")); assert!(lines.contains(&"Bash,Read,Grep,Glob,LS,WebFetch")); assert!(lines.contains(&"--dangerously-skip-permissions")); assert!(lines.contains(&"--session-id")); assert!(!lines.contains(&"--permission-mode")); assert!(!lines.contains(&"dontAsk")); assert!(!lines.contains(&"--resume")); assert!(!lines.contains(&"--max-turns")); assert!(args.contains(PROMPT_PREFIX)); let prefix_index = must_some(args.find(PROMPT_PREFIX), "prefixed consult prompt")?; let user_prompt_index = must_some(args.find("say oracle again"), "user prompt inside args")?; assert!(prefix_index < user_prompt_index); let env_dump = must(fs::read_to_string(&env_file), "read fake env file")?; let state_root = state_home.join("phone_opus"); let claude_home = state_root.join("claude-home"); let xdg_config_home = state_root.join("xdg-config"); let xdg_cache_home = state_root.join("xdg-cache"); let xdg_state_home = state_root.join("xdg-state"); assert!(env_dump.contains(format!("HOME={}", claude_home.display()).as_str())); assert!(env_dump.contains(format!("XDG_CONFIG_HOME={}", xdg_config_home.display()).as_str())); assert!(env_dump.contains(format!("XDG_CACHE_HOME={}", xdg_cache_home.display()).as_str())); assert!(env_dump.contains(format!("XDG_STATE_HOME={}", xdg_state_home.display()).as_str())); assert_eq!( must( fs::read_link(claude_home.join(".claude").join(".credentials.json")), "read credentials symlink" )?, caller_home.join(".claude").join(".credentials.json") ); assert_eq!( must( fs::read_to_string(claude_home.join(".claude").join(".credentials.json")), "read linked credentials" )?, "{\n \"auth\": \"token\"\n}\n" ); assert_eq!( must( fs::read_to_string(claude_home.join(".claude").join("settings.json")), "read mirrored settings" )?, "{\n \"theme\": \"default\"\n}\n" ); assert_eq!( must( fs::read_to_string(claude_home.join(".claude").join("settings.local.json")), "read mirrored local settings" )?, "{\n \"profile\": \"local\"\n}\n" ); assert_eq!( must( fs::read_to_string( claude_home .join(".claude") .join(".claude") .join("settings.local.json") ), "read mirrored nested local settings" )?, "{\n \"sandbox\": \"read-only\"\n}\n" ); assert_eq!( must( fs::read_to_string(claude_home.join(".claude").join("CLAUDE.md")), "read mirrored CLAUDE.md" )?, "Global Claude instructions for phone_opus tests.\n" ); let cwd_probe = must( fs::read_to_string(&cwd_probe_file), "read cwd write probe result", )?; assert_eq!(cwd_probe.trim(), "write_failed"); let credential_probe = must( fs::read_to_string(&credential_probe_file), "read credential write probe result", )?; assert_eq!(credential_probe.trim(), "write_succeeded"); let telemetry = harness.call_tool(5, "telemetry_snapshot", json!({}))?; assert_tool_ok(&telemetry); let hot_methods = tool_content(&telemetry)["hot_methods"] .as_array() .cloned() .unwrap_or_default(); assert!( hot_methods .iter() .any(|value| value["method"] == "tools/call:consult") ); Ok(()) } #[test] fn silent_claude_is_failed_fast_when_progress_stalls() -> TestResult { let root = temp_root("consult_stall_timeout")?; let state_home = root.join("state-home"); let caller_home = root.join("caller-home"); let fake_claude = root.join("claude"); must(fs::create_dir_all(&state_home), "create state home")?; must(fs::create_dir_all(&caller_home), "create caller home")?; seed_caller_claude_home(&caller_home)?; write_fake_claude_script(&fake_claude)?; let claude_bin = fake_claude.display().to_string(); let caller_home_path = caller_home.display().to_string(); let env = [ ("HOME", caller_home_path.as_str()), ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), ("PHONE_OPUS_TEST_SLEEP_MS", "5000"), ("PHONE_OPUS_CLAUDE_STALL_TIMEOUT_MS", "750"), ]; let mut harness = McpHarness::spawn(&state_home, &env)?; let _ = harness.initialize()?; harness.notify_initialized()?; let started = Instant::now(); let stalled = harness.call_tool(3, "consult", json!({ "prompt": "stall" }))?; assert_tool_error(&stalled); assert!(started.elapsed() < Duration::from_secs(4)); assert_eq!( tool_content(&stalled)["fault"]["class"].as_str(), Some("downstream") ); assert!( tool_content(&stalled)["fault"]["detail"] .as_str() .is_some_and(|value| value.contains("no observable progress")) ); assert_eq!( tool_content(&stalled)["context"]["consult"]["retry_hint"].as_str(), Some("Claude stopped making observable progress; retry the consult") ); Ok(()) } #[test] fn transcript_progress_prevents_false_stall_timeout() -> TestResult { let root = temp_root("consult_transcript_progress")?; let state_home = root.join("state-home"); let caller_home = root.join("caller-home"); let fake_claude = root.join("claude"); let stdout_file = root.join("stdout.json"); must(fs::create_dir_all(&state_home), "create state home")?; must(fs::create_dir_all(&caller_home), "create caller home")?; seed_caller_claude_home(&caller_home)?; write_fake_claude_script(&fake_claude)?; write_fake_claude_json_success( &stdout_file, "heartbeat oracle", "1bfb2c8a-c6d8-42f6-8f18-6b3c70ad2e11", "uuid-heartbeat", )?; let claude_bin = fake_claude.display().to_string(); let stdout_path = stdout_file.display().to_string(); let caller_home_path = caller_home.display().to_string(); let env = [ ("HOME", caller_home_path.as_str()), ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), ("PHONE_OPUS_TEST_TRANSCRIPT_HEARTBEAT_MS", "200"), ("PHONE_OPUS_TEST_TRANSCRIPT_HEARTBEAT_COUNT", "5"), ("PHONE_OPUS_CLAUDE_STALL_TIMEOUT_MS", "500"), ]; let mut harness = McpHarness::spawn(&state_home, &env)?; let _ = harness.initialize()?; harness.notify_initialized()?; let started = Instant::now(); let consult = harness.call_tool(3, "consult", json!({ "prompt": "heartbeat" }))?; assert_tool_ok(&consult); assert!(started.elapsed() >= Duration::from_millis(800)); assert_eq!( tool_content(&consult)["response"].as_str(), Some("heartbeat oracle") ); Ok(()) } #[test] fn background_surfaces_are_hidden_from_public_mcp() -> TestResult { let root = temp_root("consult_hidden_background")?; let state_home = root.join("state-home"); must(fs::create_dir_all(&state_home), "create state home")?; let mut harness = McpHarness::spawn(&state_home, &[])?; let _ = harness.initialize()?; harness.notify_initialized()?; let consult_job = harness.call_tool( 3, "consult_job", json!({ "job_id": "00000000-0000-0000-0000-000000000000" }), )?; assert_tool_error(&consult_job); assert!( consult_job["result"]["content"] .as_array() .into_iter() .flatten() .filter_map(|entry| entry["text"].as_str()) .any(|text| text.contains("unknown tool `consult_job`")) ); let consult_wait = harness.call_tool( 4, "consult_wait", json!({ "job_id": "00000000-0000-0000-0000-000000000000" }), )?; assert_tool_error(&consult_wait); assert!( consult_wait["result"]["content"] .as_array() .into_iter() .flatten() .filter_map(|entry| entry["text"].as_str()) .any(|text| text.contains("unknown tool `consult_wait`")) ); let consult_jobs = harness.call_tool(5, "consult_jobs", json!({}))?; assert_tool_error(&consult_jobs); assert!( consult_jobs["result"]["content"] .as_array() .into_iter() .flatten() .filter_map(|entry| entry["text"].as_str()) .any(|text| text.contains("unknown tool `consult_jobs`")) ); Ok(()) } #[test] fn consult_surfaces_downstream_cli_failures() -> TestResult { let root = temp_root("consult_failure")?; let state_home = root.join("state-home"); let fake_claude = root.join("claude"); let caller_home = root.join("caller-home"); must(fs::create_dir_all(&state_home), "create state home")?; must(fs::create_dir_all(&caller_home), "create caller home")?; write_fake_claude_script(&fake_claude)?; let claude_bin = fake_claude.display().to_string(); let caller_home_path = caller_home.display().to_string(); let env = [ ("HOME", caller_home_path.as_str()), ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), ("PHONE_OPUS_TEST_EXIT_CODE", "17"), ("PHONE_OPUS_TEST_STDERR", "permission denied by fake claude"), ]; let mut harness = McpHarness::spawn(&state_home, &env)?; let _ = harness.initialize()?; harness.notify_initialized()?; let consult = harness.call_tool(3, "consult", json!({ "prompt": "fail" }))?; assert_tool_error(&consult); assert_eq!( tool_content(&consult)["fault"]["class"].as_str(), Some("downstream") ); assert!( tool_content(&consult)["fault"]["detail"] .as_str() .is_some_and(|value| value.contains("permission denied by fake claude")) ); assert_eq!( tool_content(&consult)["context"]["consult"]["cwd"].as_str(), Some(std::env::current_dir()?.display().to_string().as_str()) ); assert!(tool_content(&consult)["context"]["consult"]["planned_session_id"].is_null()); Ok(()) } #[test] fn quota_failures_hide_session_state_on_public_surface() -> TestResult { let root = temp_root("consult_quota_failure")?; let state_home = root.join("state-home"); let sandbox = root.join("sandbox"); let caller_home = root.join("caller-home"); must(fs::create_dir_all(&state_home), "create state home")?; must(fs::create_dir_all(&sandbox), "create sandbox")?; must(fs::create_dir_all(&caller_home), "create caller home")?; seed_caller_claude_home(&caller_home)?; let fake_claude = root.join("claude"); let stdout_file = root.join("stdout.json"); let remembered_session = "84b9d462-5af9-4a4e-8e44-379a8d0c46d7"; write_fake_claude_script(&fake_claude)?; write_fake_claude_json_success(&stdout_file, "ok", remembered_session, "uuid-remembered")?; let claude_bin = fake_claude.display().to_string(); let stdout_path = stdout_file.display().to_string(); let caller_home_path = caller_home.display().to_string(); let env = [ ("HOME", caller_home_path.as_str()), ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), ]; let mut harness = McpHarness::spawn(&state_home, &env)?; let _ = harness.initialize()?; harness.notify_initialized()?; let first = harness.call_tool( 3, "consult", json!({ "prompt": "seed remembered session", "cwd": sandbox.display().to_string() }), )?; assert_tool_ok(&first); assert!(tool_content(&first)["session_id"].is_null()); let quota_env = [ ("HOME", caller_home_path.as_str()), ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), ("PHONE_OPUS_TEST_EXIT_CODE", "17"), ( "PHONE_OPUS_TEST_STDERR", "You've hit your limit · resets 4pm (America/New_York)", ), ]; drop(harness); let mut harness = McpHarness::spawn(&state_home, "a_env)?; let _ = harness.initialize()?; harness.notify_initialized()?; let failed = harness.call_tool( 4, "consult", json!({ "prompt": "quota me", "cwd": sandbox.display().to_string() }), )?; assert_tool_error(&failed); assert_eq!( tool_content(&failed)["fault"]["detail"].as_str(), Some("You've hit your limit · resets 4pm (America/New_York)") ); assert_eq!( tool_content(&failed)["context"]["consult"]["cwd"].as_str(), Some(sandbox.display().to_string().as_str()) ); assert!(tool_content(&failed)["context"]["consult"]["context_mode"].is_null()); assert!(tool_content(&failed)["context"]["consult"]["planned_session_id"].is_null()); assert!(tool_content(&failed)["context"]["consult"]["reused_session_id"].is_null()); assert!(tool_content(&failed)["context"]["consult"]["observed_session_id"].is_null()); assert!(tool_content(&failed)["context"]["consult"]["resume_session_id"].is_null()); assert_eq!( tool_content(&failed)["context"]["consult"]["quota_limited"].as_bool(), Some(true) ); assert_eq!( tool_content(&failed)["context"]["consult"]["quota_reset_hint"].as_str(), Some("4pm (America/New_York)") ); assert!( tool_content(&failed)["context"]["consult"]["retry_hint"] .as_str() .is_some_and(|value| value.contains("retry the consult")) ); assert!( failed["result"]["content"] .as_array() .into_iter() .flatten() .filter_map(|entry| entry["text"].as_str()) .any(|text| text.contains("quota_reset: 4pm (America/New_York)")) ); assert!( failed["result"]["content"] .as_array() .into_iter() .flatten() .filter_map(|entry| entry["text"].as_str()) .all(|text| !text.contains("session")) ); Ok(()) } #[test] fn fresh_failures_keep_internal_session_state_without_public_leakage() -> TestResult { let root = temp_root("consult_fresh_json_failure")?; let state_home = root.join("state-home"); let sandbox = root.join("sandbox"); let caller_home = root.join("caller-home"); let fake_claude = root.join("claude"); let stdout_file = root.join("stdout.json"); let args_file = root.join("args.txt"); must(fs::create_dir_all(&state_home), "create state home")?; must(fs::create_dir_all(&sandbox), "create sandbox")?; must(fs::create_dir_all(&caller_home), "create caller home")?; seed_caller_claude_home(&caller_home)?; write_fake_claude_script(&fake_claude)?; must(fs::write(&stdout_file, ""), "write empty fake stdout")?; let claude_bin = fake_claude.display().to_string(); let stdout_path = stdout_file.display().to_string(); let args_path = args_file.display().to_string(); let caller_home_path = caller_home.display().to_string(); let env = [ ("HOME", caller_home_path.as_str()), ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()), ("PHONE_OPUS_TEST_EXIT_CODE", "17"), ( "PHONE_OPUS_TEST_STDERR", "You've hit your limit · resets 9pm (America/New_York)", ), ]; let mut harness = McpHarness::spawn(&state_home, &env)?; let _ = harness.initialize()?; harness.notify_initialized()?; let failed = harness.call_tool( 3, "consult", json!({ "prompt": "fresh expensive audit", "cwd": sandbox.display().to_string() }), )?; assert_tool_error(&failed); assert!(tool_content(&failed)["context"]["consult"]["context_mode"].is_null()); assert!(tool_content(&failed)["context"]["consult"]["observed_session_id"].is_null()); assert!(tool_content(&failed)["context"]["consult"]["resume_session_id"].is_null()); assert_eq!( tool_content(&failed)["context"]["consult"]["quota_reset_hint"].as_str(), Some("9pm (America/New_York)") ); assert!(tool_content(&failed)["context"]["consult"]["planned_session_id"].is_null()); let args = must(fs::read_to_string(&args_file), "read fresh failure args")?; assert!(args.contains("--session-id")); assert!(!args.contains("--resume")); let planned_session_id = must_some( args.lines() .collect::>() .windows(2) .find_map(|window| (window[0] == "--session-id").then_some(window[1].to_owned())), "planned session id", )?; let consult_context_index = must( fs::read_to_string( state_home .join("phone_opus") .join("mcp") .join("consult_contexts.json"), ), "read consult context index after failure", )?; let consult_context_index: Value = must( serde_json::from_str(&consult_context_index), "parse consult context index after failure", )?; assert_eq!( consult_context_index["by_cwd"][sandbox.display().to_string()]["session_id"].as_str(), Some(planned_session_id.as_str()) ); assert!( failed["result"]["content"] .as_array() .into_iter() .flatten() .filter_map(|entry| entry["text"].as_str()) .all(|text| !text.contains("session")) ); Ok(()) } #[test] fn consult_never_replays_after_worker_transport_failure() -> TestResult { let root = temp_root("consult_no_replay")?; let state_home = root.join("state-home"); let fake_claude = root.join("claude"); let caller_home = root.join("caller-home"); must(fs::create_dir_all(&state_home), "create state home")?; must(fs::create_dir_all(&caller_home), "create caller home")?; write_fake_claude_script(&fake_claude)?; let claude_bin = fake_claude.display().to_string(); let caller_home_path = caller_home.display().to_string(); let env = [ ("HOME", caller_home_path.as_str()), ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), ( "PHONE_OPUS_MCP_TEST_WORKER_CRASH_ONCE_KEY", "tools/call:consult", ), ]; let mut harness = McpHarness::spawn(&state_home, &env)?; let _ = harness.initialize()?; harness.notify_initialized()?; let consult = harness.call_tool(3, "consult", json!({ "prompt": "crash once" }))?; assert_tool_error(&consult); assert_eq!( tool_content(&consult)["fault"]["class"].as_str(), Some("transport") ); assert_eq!(tool_content(&consult)["retryable"].as_bool(), Some(true)); assert_eq!(tool_content(&consult)["retried"].as_bool(), Some(false)); let telemetry = harness.call_tool(4, "telemetry_snapshot", json!({ "render": "json" }))?; assert_tool_ok(&telemetry); assert_eq!(tool_content(&telemetry)["retries"].as_u64(), Some(0)); let hot_methods = tool_content(&telemetry)["hot_methods"] .as_array() .cloned() .unwrap_or_default(); let consult_method = hot_methods .iter() .find(|value| value["method"] == "tools/call:consult") .cloned() .unwrap_or_default(); assert_eq!(consult_method["transport_faults"].as_u64(), Some(1)); let telemetry_log_path = state_home .join("phone_opus") .join("mcp") .join("telemetry.jsonl"); let telemetry_rows = read_json_lines::(&telemetry_log_path)?; assert!( telemetry_rows .iter() .any(|row| row["event"] == "tool_call" && row["tool_name"] == "consult") ); Ok(()) }