From d986442e8e4bc2d716c9d63159a1cfa7b1e6ed76 Mon Sep 17 00:00:00 2001 From: main Date: Sun, 22 Mar 2026 22:20:17 -0400 Subject: Bootstrap consultative Claude Code MCP --- crates/phone-opus/tests/mcp_hardening.rs | 422 +++++++++++++++++++++++++++++++ 1 file changed, 422 insertions(+) create mode 100644 crates/phone-opus/tests/mcp_hardening.rs (limited to 'crates/phone-opus/tests') diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs new file mode 100644 index 0000000..b47b365 --- /dev/null +++ b/crates/phone-opus/tests/mcp_hardening.rs @@ -0,0 +1,422 @@ +use clap as _; +use dirs as _; +use libmcp as _; +use std::fs; +use std::io::{self, BufRead, BufReader, Write}; +use std::os::unix::fs::PermissionsExt; +use std::path::{Path, PathBuf}; +use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; + +use libmcp_testkit::read_json_lines; +use serde as _; +use serde_json::{Value, json}; +use thiserror as _; + +type TestResult = Result>; + +fn must( + result: Result, + context: C, +) -> TestResult { + result.map_err(|error| io::Error::other(format!("{context}: {error}")).into()) +} + +fn must_some(value: Option, context: &str) -> TestResult { + value.ok_or_else(|| io::Error::other(context).into()) +} + +fn temp_root(name: &str) -> TestResult { + let root = std::env::temp_dir().join(format!( + "phone_opus_{name}_{}_{}", + std::process::id(), + must( + std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH), + "current time after unix epoch", + )? + .as_nanos() + )); + must(fs::create_dir_all(&root), "create temp root")?; + Ok(root) +} + +fn binary_path() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_phone-opus")) +} + +struct McpHarness { + child: Child, + stdin: ChildStdin, + stdout: BufReader, +} + +impl McpHarness { + fn spawn(state_home: &Path, extra_env: &[(&str, &str)]) -> TestResult { + let mut command = Command::new(binary_path()); + let _ = command + .arg("mcp") + .arg("serve") + .env("XDG_STATE_HOME", state_home) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()); + for (key, value) in extra_env { + let _ = command.env(key, value); + } + let mut child = must(command.spawn(), "spawn mcp host")?; + let stdin = must_some(child.stdin.take(), "host stdin")?; + let stdout = BufReader::new(must_some(child.stdout.take(), "host stdout")?); + Ok(Self { + child, + stdin, + stdout, + }) + } + + fn initialize(&mut self) -> TestResult { + self.request(json!({ + "jsonrpc": "2.0", + "id": 1, + "method": "initialize", + "params": { + "protocolVersion": "2025-11-25", + "capabilities": {}, + "clientInfo": { "name": "mcp-hardening-test", "version": "0" } + } + })) + } + + fn notify_initialized(&mut self) -> TestResult { + self.notify(json!({ + "jsonrpc": "2.0", + "method": "notifications/initialized", + })) + } + + fn tools_list(&mut self) -> TestResult { + self.request(json!({ + "jsonrpc": "2.0", + "id": 2, + "method": "tools/list", + "params": {}, + })) + } + + fn call_tool(&mut self, id: u64, name: &str, arguments: Value) -> TestResult { + self.request(json!({ + "jsonrpc": "2.0", + "id": id, + "method": "tools/call", + "params": { + "name": name, + "arguments": arguments, + } + })) + } + + fn request(&mut self, message: Value) -> TestResult { + let encoded = must(serde_json::to_string(&message), "request json")?; + must(writeln!(self.stdin, "{encoded}"), "write request")?; + must(self.stdin.flush(), "flush request")?; + let mut line = String::new(); + let byte_count = must(self.stdout.read_line(&mut line), "read response")?; + if byte_count == 0 { + return Err(io::Error::other("unexpected EOF reading response").into()); + } + must(serde_json::from_str(&line), "response json") + } + + fn notify(&mut self, message: Value) -> TestResult { + let encoded = must(serde_json::to_string(&message), "notify json")?; + must(writeln!(self.stdin, "{encoded}"), "write notify")?; + must(self.stdin.flush(), "flush notify")?; + Ok(()) + } +} + +impl Drop for McpHarness { + fn drop(&mut self) { + let _ = self.child.kill(); + let _ = self.child.wait(); + } +} + +fn assert_tool_ok(response: &Value) { + assert_eq!( + response["result"]["isError"].as_bool(), + Some(false), + "tool response unexpectedly errored: {response:#}" + ); +} + +fn assert_tool_error(response: &Value) { + assert_eq!( + response["result"]["isError"].as_bool(), + Some(true), + "tool response unexpectedly succeeded: {response:#}" + ); +} + +fn tool_content(response: &Value) -> &Value { + &response["result"]["structuredContent"] +} + +fn tool_names(response: &Value) -> Vec<&str> { + response["result"]["tools"] + .as_array() + .into_iter() + .flatten() + .filter_map(|tool| tool["name"].as_str()) + .collect() +} + +fn write_fake_claude_script(path: &Path) -> TestResult { + let script = r#"#!/bin/sh +set -eu +if [ -n "${PHONE_OPUS_TEST_PWD_FILE:-}" ]; then + pwd >"$PHONE_OPUS_TEST_PWD_FILE" +fi +if [ -n "${PHONE_OPUS_TEST_ARGS_FILE:-}" ]; then + printf '%s\n' "$@" >"$PHONE_OPUS_TEST_ARGS_FILE" +fi +if [ -n "${PHONE_OPUS_TEST_STDERR:-}" ]; then + printf '%s\n' "$PHONE_OPUS_TEST_STDERR" >&2 +fi +if [ -n "${PHONE_OPUS_TEST_STDOUT_FILE:-}" ]; then + cat "$PHONE_OPUS_TEST_STDOUT_FILE" +fi +exit "${PHONE_OPUS_TEST_EXIT_CODE:-0}" +"#; + must(fs::write(path, script), "write fake claude script")?; + let mut permissions = must(fs::metadata(path), "fake claude metadata")?.permissions(); + permissions.set_mode(0o755); + must( + fs::set_permissions(path, permissions), + "chmod fake claude script", + )?; + Ok(()) +} + +#[test] +fn cold_start_exposes_consult_and_ops_tools() -> TestResult { + let root = temp_root("cold_start")?; + let state_home = root.join("state-home"); + must(fs::create_dir_all(&state_home), "create state home")?; + + let mut harness = McpHarness::spawn(&state_home, &[])?; + let initialize = harness.initialize()?; + assert_eq!( + initialize["result"]["protocolVersion"].as_str(), + Some("2025-11-25") + ); + harness.notify_initialized()?; + + let tools = harness.tools_list()?; + let tool_names = tool_names(&tools); + assert!(tool_names.contains(&"consult")); + assert!(tool_names.contains(&"health_snapshot")); + assert!(tool_names.contains(&"telemetry_snapshot")); + + let health = harness.call_tool(3, "health_snapshot", json!({}))?; + assert_tool_ok(&health); + assert_eq!(tool_content(&health)["worker_generation"].as_u64(), Some(1)); + Ok(()) +} + +#[test] +fn consult_uses_read_only_toolset_and_requested_working_directory() -> TestResult { + let root = temp_root("consult_success")?; + let state_home = root.join("state-home"); + let sandbox = root.join("sandbox"); + must(fs::create_dir_all(&state_home), "create state home")?; + must(fs::create_dir_all(&sandbox), "create sandbox")?; + + let fake_claude = root.join("claude"); + let stdout_file = root.join("stdout.json"); + let args_file = root.join("args.txt"); + let pwd_file = root.join("pwd.txt"); + write_fake_claude_script(&fake_claude)?; + must( + fs::write( + &stdout_file, + serde_json::to_string(&json!({ + "type": "result", + "subtype": "success", + "is_error": false, + "duration_ms": 1234, + "duration_api_ms": 1200, + "num_turns": 2, + "result": "oracle", + "stop_reason": "end_turn", + "session_id": "session-123", + "total_cost_usd": 0.125, + "usage": { + "input_tokens": 10, + "output_tokens": 5 + }, + "modelUsage": { + "claude-sonnet-4-6": { + "inputTokens": 10, + "outputTokens": 5 + } + }, + "permission_denials": [], + "fast_mode_state": "off", + "uuid": "uuid-123" + }))?, + ), + "write fake stdout", + )?; + + let claude_bin = fake_claude.display().to_string(); + let stdout_path = stdout_file.display().to_string(); + let args_path = args_file.display().to_string(); + let pwd_path = pwd_file.display().to_string(); + let env = [ + ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), + ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), + ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()), + ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()), + ]; + let mut harness = McpHarness::spawn(&state_home, &env)?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + let consult = harness.call_tool( + 3, + "consult", + json!({ + "prompt": "say oracle", + "cwd": sandbox.display().to_string(), + "max_turns": 7 + }), + )?; + assert_tool_ok(&consult); + assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle")); + assert_eq!( + tool_content(&consult)["cwd"].as_str(), + Some(sandbox.display().to_string().as_str()) + ); + assert_eq!(tool_content(&consult)["num_turns"].as_u64(), Some(2)); + + let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?; + assert_eq!(pwd.trim(), sandbox.display().to_string()); + + let args = must(fs::read_to_string(&args_file), "read fake args file")?; + let lines = args.lines().collect::>(); + assert!(lines.contains(&"-p")); + assert!(lines.contains(&"--output-format")); + assert!(lines.contains(&"json")); + assert!(lines.contains(&"--strict-mcp-config")); + assert!(lines.contains(&"--mcp-config")); + assert!(lines.contains(&"{\"mcpServers\":{}}")); + assert!(lines.contains(&"--disable-slash-commands")); + assert!(lines.contains(&"--no-chrome")); + assert!(lines.contains(&"--tools")); + assert!(lines.contains(&"Bash,Read,Grep,Glob,LS,WebFetch,WebSearch")); + assert!(lines.contains(&"--permission-mode")); + assert!(lines.contains(&"dontAsk")); + assert!(lines.contains(&"--max-turns")); + assert!(lines.contains(&"7")); + assert_eq!(lines.last().copied(), Some("say oracle")); + + let telemetry = harness.call_tool(4, "telemetry_snapshot", json!({}))?; + assert_tool_ok(&telemetry); + let hot_methods = tool_content(&telemetry)["hot_methods"] + .as_array() + .cloned() + .unwrap_or_default(); + assert!( + hot_methods + .iter() + .any(|value| value["method"] == "tools/call:consult") + ); + Ok(()) +} + +#[test] +fn consult_surfaces_downstream_cli_failures() -> TestResult { + let root = temp_root("consult_failure")?; + let state_home = root.join("state-home"); + let fake_claude = root.join("claude"); + must(fs::create_dir_all(&state_home), "create state home")?; + write_fake_claude_script(&fake_claude)?; + + let claude_bin = fake_claude.display().to_string(); + let env = [ + ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), + ("PHONE_OPUS_TEST_EXIT_CODE", "17"), + ("PHONE_OPUS_TEST_STDERR", "permission denied by fake claude"), + ]; + let mut harness = McpHarness::spawn(&state_home, &env)?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + let consult = harness.call_tool(3, "consult", json!({ "prompt": "fail" }))?; + assert_tool_error(&consult); + assert_eq!( + tool_content(&consult)["fault"]["class"].as_str(), + Some("downstream") + ); + assert!( + tool_content(&consult)["fault"]["detail"] + .as_str() + .is_some_and(|value| value.contains("permission denied by fake claude")) + ); + Ok(()) +} + +#[test] +fn consult_never_replays_after_worker_transport_failure() -> TestResult { + let root = temp_root("consult_no_replay")?; + let state_home = root.join("state-home"); + let fake_claude = root.join("claude"); + must(fs::create_dir_all(&state_home), "create state home")?; + write_fake_claude_script(&fake_claude)?; + + let claude_bin = fake_claude.display().to_string(); + let env = [ + ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), + ( + "PHONE_OPUS_MCP_TEST_WORKER_CRASH_ONCE_KEY", + "tools/call:consult", + ), + ]; + let mut harness = McpHarness::spawn(&state_home, &env)?; + let _ = harness.initialize()?; + harness.notify_initialized()?; + + let consult = harness.call_tool(3, "consult", json!({ "prompt": "crash once" }))?; + assert_tool_error(&consult); + assert_eq!( + tool_content(&consult)["fault"]["class"].as_str(), + Some("transport") + ); + assert_eq!(tool_content(&consult)["retryable"].as_bool(), Some(true)); + assert_eq!(tool_content(&consult)["retried"].as_bool(), Some(false)); + + let telemetry = harness.call_tool(4, "telemetry_snapshot", json!({ "render": "json" }))?; + assert_tool_ok(&telemetry); + assert_eq!(tool_content(&telemetry)["retries"].as_u64(), Some(0)); + let hot_methods = tool_content(&telemetry)["hot_methods"] + .as_array() + .cloned() + .unwrap_or_default(); + let consult_method = hot_methods + .iter() + .find(|value| value["method"] == "tools/call:consult") + .cloned() + .unwrap_or_default(); + assert_eq!(consult_method["transport_faults"].as_u64(), Some(1)); + + let telemetry_log_path = state_home + .join("phone_opus") + .join("mcp") + .join("telemetry.jsonl"); + let telemetry_rows = read_json_lines::(&telemetry_log_path)?; + assert!( + telemetry_rows + .iter() + .any(|row| row["event"] == "tool_call" && row["tool_name"] == "consult") + ); + Ok(()) +} -- cgit v1.2.3