Bootstrap consultative Claude Code MCP

author: main <main@swarm.moe> 2026-03-22 22:20:17 -0400
committer: main <main@swarm.moe> 2026-03-22 22:20:17 -0400
commit: d986442e8e4bc2d716c9d63159a1cfa7b1e6ed76 (patch)
tree: b9ca3d0cb62b5c59e614abfb6f74ac5310c69c2f /crates/phone-opus/tests
download: phone_opus-d986442e8e4bc2d716c9d63159a1cfa7b1e6ed76.zip
1 files changed, 422 insertions, 0 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs
new file mode 100644
index 0000000..b47b365
--- /dev/null
+++ b/crates/phone-opus/tests/mcp_hardening.rs
@@ -0,0 +1,422 @@
+use clap as _;
+use dirs as _;
+use libmcp as _;
+use std::fs;
+use std::io::{self, BufRead, BufReader, Write};
+use std::os::unix::fs::PermissionsExt;
+use std::path::{Path, PathBuf};
+use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
+
+use libmcp_testkit::read_json_lines;
+use serde as _;
+use serde_json::{Value, json};
+use thiserror as _;
+
+type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>;
+
+fn must<T, E: std::fmt::Display, C: std::fmt::Display>(
+    result: Result<T, E>,
+    context: C,
+) -> TestResult<T> {
+    result.map_err(|error| io::Error::other(format!("{context}: {error}")).into())
+}
+
+fn must_some<T>(value: Option<T>, context: &str) -> TestResult<T> {
+    value.ok_or_else(|| io::Error::other(context).into())
+}
+
+fn temp_root(name: &str) -> TestResult<PathBuf> {
+    let root = std::env::temp_dir().join(format!(
+        "phone_opus_{name}_{}_{}",
+        std::process::id(),
+        must(
+            std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH),
+            "current time after unix epoch",
+        )?
+        .as_nanos()
+    ));
+    must(fs::create_dir_all(&root), "create temp root")?;
+    Ok(root)
+}
+
+fn binary_path() -> PathBuf {
+    PathBuf::from(env!("CARGO_BIN_EXE_phone-opus"))
+}
+
+struct McpHarness {
+    child: Child,
+    stdin: ChildStdin,
+    stdout: BufReader<ChildStdout>,
+}
+
+impl McpHarness {
+    fn spawn(state_home: &Path, extra_env: &[(&str, &str)]) -> TestResult<Self> {
+        let mut command = Command::new(binary_path());
+        let _ = command
+            .arg("mcp")
+            .arg("serve")
+            .env("XDG_STATE_HOME", state_home)
+            .stdin(Stdio::piped())
+            .stdout(Stdio::piped())
+            .stderr(Stdio::inherit());
+        for (key, value) in extra_env {
+            let _ = command.env(key, value);
+        }
+        let mut child = must(command.spawn(), "spawn mcp host")?;
+        let stdin = must_some(child.stdin.take(), "host stdin")?;
+        let stdout = BufReader::new(must_some(child.stdout.take(), "host stdout")?);
+        Ok(Self {
+            child,
+            stdin,
+            stdout,
+        })
+    }
+
+    fn initialize(&mut self) -> TestResult<Value> {
+        self.request(json!({
+            "jsonrpc": "2.0",
+            "id": 1,
+            "method": "initialize",
+            "params": {
+                "protocolVersion": "2025-11-25",
+                "capabilities": {},
+                "clientInfo": { "name": "mcp-hardening-test", "version": "0" }
+            }
+        }))
+    }
+
+    fn notify_initialized(&mut self) -> TestResult {
+        self.notify(json!({
+            "jsonrpc": "2.0",
+            "method": "notifications/initialized",
+        }))
+    }
+
+    fn tools_list(&mut self) -> TestResult<Value> {
+        self.request(json!({
+            "jsonrpc": "2.0",
+            "id": 2,
+            "method": "tools/list",
+            "params": {},
+        }))
+    }
+
+    fn call_tool(&mut self, id: u64, name: &str, arguments: Value) -> TestResult<Value> {
+        self.request(json!({
+            "jsonrpc": "2.0",
+            "id": id,
+            "method": "tools/call",
+            "params": {
+                "name": name,
+                "arguments": arguments,
+            }
+        }))
+    }
+
+    fn request(&mut self, message: Value) -> TestResult<Value> {
+        let encoded = must(serde_json::to_string(&message), "request json")?;
+        must(writeln!(self.stdin, "{encoded}"), "write request")?;
+        must(self.stdin.flush(), "flush request")?;
+        let mut line = String::new();
+        let byte_count = must(self.stdout.read_line(&mut line), "read response")?;
+        if byte_count == 0 {
+            return Err(io::Error::other("unexpected EOF reading response").into());
+        }
+        must(serde_json::from_str(&line), "response json")
+    }
+
+    fn notify(&mut self, message: Value) -> TestResult {
+        let encoded = must(serde_json::to_string(&message), "notify json")?;
+        must(writeln!(self.stdin, "{encoded}"), "write notify")?;
+        must(self.stdin.flush(), "flush notify")?;
+        Ok(())
+    }
+}
+
+impl Drop for McpHarness {
+    fn drop(&mut self) {
+        let _ = self.child.kill();
+        let _ = self.child.wait();
+    }
+}
+
+fn assert_tool_ok(response: &Value) {
+    assert_eq!(
+        response["result"]["isError"].as_bool(),
+        Some(false),
+        "tool response unexpectedly errored: {response:#}"
+    );
+}
+
+fn assert_tool_error(response: &Value) {
+    assert_eq!(
+        response["result"]["isError"].as_bool(),
+        Some(true),
+        "tool response unexpectedly succeeded: {response:#}"
+    );
+}
+
+fn tool_content(response: &Value) -> &Value {
+    &response["result"]["structuredContent"]
+}
+
+fn tool_names(response: &Value) -> Vec<&str> {
+    response["result"]["tools"]
+        .as_array()
+        .into_iter()
+        .flatten()
+        .filter_map(|tool| tool["name"].as_str())
+        .collect()
+}
+
+fn write_fake_claude_script(path: &Path) -> TestResult {
+    let script = r#"#!/bin/sh
+set -eu
+if [ -n "${PHONE_OPUS_TEST_PWD_FILE:-}" ]; then
+  pwd >"$PHONE_OPUS_TEST_PWD_FILE"
+fi
+if [ -n "${PHONE_OPUS_TEST_ARGS_FILE:-}" ]; then
+  printf '%s\n' "$@" >"$PHONE_OPUS_TEST_ARGS_FILE"
+fi
+if [ -n "${PHONE_OPUS_TEST_STDERR:-}" ]; then
+  printf '%s\n' "$PHONE_OPUS_TEST_STDERR" >&2
+fi
+if [ -n "${PHONE_OPUS_TEST_STDOUT_FILE:-}" ]; then
+  cat "$PHONE_OPUS_TEST_STDOUT_FILE"
+fi
+exit "${PHONE_OPUS_TEST_EXIT_CODE:-0}"
+"#;
+    must(fs::write(path, script), "write fake claude script")?;
+    let mut permissions = must(fs::metadata(path), "fake claude metadata")?.permissions();
+    permissions.set_mode(0o755);
+    must(
+        fs::set_permissions(path, permissions),
+        "chmod fake claude script",
+    )?;
+    Ok(())
+}
+
+#[test]
+fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
+    let root = temp_root("cold_start")?;
+    let state_home = root.join("state-home");
+    must(fs::create_dir_all(&state_home), "create state home")?;
+
+    let mut harness = McpHarness::spawn(&state_home, &[])?;
+    let initialize = harness.initialize()?;
+    assert_eq!(
+        initialize["result"]["protocolVersion"].as_str(),
+        Some("2025-11-25")
+    );
+    harness.notify_initialized()?;
+
+    let tools = harness.tools_list()?;
+    let tool_names = tool_names(&tools);
+    assert!(tool_names.contains(&"consult"));
+    assert!(tool_names.contains(&"health_snapshot"));
+    assert!(tool_names.contains(&"telemetry_snapshot"));
+
+    let health = harness.call_tool(3, "health_snapshot", json!({}))?;
+    assert_tool_ok(&health);
+    assert_eq!(tool_content(&health)["worker_generation"].as_u64(), Some(1));
+    Ok(())
+}
+
+#[test]
+fn consult_uses_read_only_toolset_and_requested_working_directory() -> TestResult {
+    let root = temp_root("consult_success")?;
+    let state_home = root.join("state-home");
+    let sandbox = root.join("sandbox");
+    must(fs::create_dir_all(&state_home), "create state home")?;
+    must(fs::create_dir_all(&sandbox), "create sandbox")?;
+
+    let fake_claude = root.join("claude");
+    let stdout_file = root.join("stdout.json");
+    let args_file = root.join("args.txt");
+    let pwd_file = root.join("pwd.txt");
+    write_fake_claude_script(&fake_claude)?;
+    must(
+        fs::write(
+            &stdout_file,
+            serde_json::to_string(&json!({
+                "type": "result",
+                "subtype": "success",
+                "is_error": false,
+                "duration_ms": 1234,
+                "duration_api_ms": 1200,
+                "num_turns": 2,
+                "result": "oracle",
+                "stop_reason": "end_turn",
+                "session_id": "session-123",
+                "total_cost_usd": 0.125,
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 5
+                },
+                "modelUsage": {
+                    "claude-sonnet-4-6": {
+                        "inputTokens": 10,
+                        "outputTokens": 5
+                    }
+                },
+                "permission_denials": [],
+                "fast_mode_state": "off",
+                "uuid": "uuid-123"
+            }))?,
+        ),
+        "write fake stdout",
+    )?;
+
+    let claude_bin = fake_claude.display().to_string();
+    let stdout_path = stdout_file.display().to_string();
+    let args_path = args_file.display().to_string();
+    let pwd_path = pwd_file.display().to_string();
+    let env = [
+        ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
+        ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()),
+        ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()),
+        ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()),
+    ];
+    let mut harness = McpHarness::spawn(&state_home, &env)?;
+    let _ = harness.initialize()?;
+    harness.notify_initialized()?;
+
+    let consult = harness.call_tool(
+        3,
+        "consult",
+        json!({
+            "prompt": "say oracle",
+            "cwd": sandbox.display().to_string(),
+            "max_turns": 7
+        }),
+    )?;
+    assert_tool_ok(&consult);
+    assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle"));
+    assert_eq!(
+        tool_content(&consult)["cwd"].as_str(),
+        Some(sandbox.display().to_string().as_str())
+    );
+    assert_eq!(tool_content(&consult)["num_turns"].as_u64(), Some(2));
+
+    let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?;
+    assert_eq!(pwd.trim(), sandbox.display().to_string());
+
+    let args = must(fs::read_to_string(&args_file), "read fake args file")?;
+    let lines = args.lines().collect::<Vec<_>>();
+    assert!(lines.contains(&"-p"));
+    assert!(lines.contains(&"--output-format"));
+    assert!(lines.contains(&"json"));
+    assert!(lines.contains(&"--strict-mcp-config"));
+    assert!(lines.contains(&"--mcp-config"));
+    assert!(lines.contains(&"{\"mcpServers\":{}}"));
+    assert!(lines.contains(&"--disable-slash-commands"));
+    assert!(lines.contains(&"--no-chrome"));
+    assert!(lines.contains(&"--tools"));
+    assert!(lines.contains(&"Bash,Read,Grep,Glob,LS,WebFetch,WebSearch"));
+    assert!(lines.contains(&"--permission-mode"));
+    assert!(lines.contains(&"dontAsk"));
+    assert!(lines.contains(&"--max-turns"));
+    assert!(lines.contains(&"7"));
+    assert_eq!(lines.last().copied(), Some("say oracle"));
+
+    let telemetry = harness.call_tool(4, "telemetry_snapshot", json!({}))?;
+    assert_tool_ok(&telemetry);
+    let hot_methods = tool_content(&telemetry)["hot_methods"]
+        .as_array()
+        .cloned()
+        .unwrap_or_default();
+    assert!(
+        hot_methods
+            .iter()
+            .any(|value| value["method"] == "tools/call:consult")
+    );
+    Ok(())
+}
+
+#[test]
+fn consult_surfaces_downstream_cli_failures() -> TestResult {
+    let root = temp_root("consult_failure")?;
+    let state_home = root.join("state-home");
+    let fake_claude = root.join("claude");
+    must(fs::create_dir_all(&state_home), "create state home")?;
+    write_fake_claude_script(&fake_claude)?;
+
+    let claude_bin = fake_claude.display().to_string();
+    let env = [
+        ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
+        ("PHONE_OPUS_TEST_EXIT_CODE", "17"),
+        ("PHONE_OPUS_TEST_STDERR", "permission denied by fake claude"),
+    ];
+    let mut harness = McpHarness::spawn(&state_home, &env)?;
+    let _ = harness.initialize()?;
+    harness.notify_initialized()?;
+
+    let consult = harness.call_tool(3, "consult", json!({ "prompt": "fail" }))?;
+    assert_tool_error(&consult);
+    assert_eq!(
+        tool_content(&consult)["fault"]["class"].as_str(),
+        Some("downstream")
+    );
+    assert!(
+        tool_content(&consult)["fault"]["detail"]
+            .as_str()
+            .is_some_and(|value| value.contains("permission denied by fake claude"))
+    );
+    Ok(())
+}
+
+#[test]
+fn consult_never_replays_after_worker_transport_failure() -> TestResult {
+    let root = temp_root("consult_no_replay")?;
+    let state_home = root.join("state-home");
+    let fake_claude = root.join("claude");
+    must(fs::create_dir_all(&state_home), "create state home")?;
+    write_fake_claude_script(&fake_claude)?;
+
+    let claude_bin = fake_claude.display().to_string();
+    let env = [
+        ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
+        (
+            "PHONE_OPUS_MCP_TEST_WORKER_CRASH_ONCE_KEY",
+            "tools/call:consult",
+        ),
+    ];
+    let mut harness = McpHarness::spawn(&state_home, &env)?;
+    let _ = harness.initialize()?;
+    harness.notify_initialized()?;
+
+    let consult = harness.call_tool(3, "consult", json!({ "prompt": "crash once" }))?;
+    assert_tool_error(&consult);
+    assert_eq!(
+        tool_content(&consult)["fault"]["class"].as_str(),
+        Some("transport")
+    );
+    assert_eq!(tool_content(&consult)["retryable"].as_bool(), Some(true));
+    assert_eq!(tool_content(&consult)["retried"].as_bool(), Some(false));
+
+    let telemetry = harness.call_tool(4, "telemetry_snapshot", json!({ "render": "json" }))?;
+    assert_tool_ok(&telemetry);
+    assert_eq!(tool_content(&telemetry)["retries"].as_u64(), Some(0));
+    let hot_methods = tool_content(&telemetry)["hot_methods"]
+        .as_array()
+        .cloned()
+        .unwrap_or_default();
+    let consult_method = hot_methods
+        .iter()
+        .find(|value| value["method"] == "tools/call:consult")
+        .cloned()
+        .unwrap_or_default();
+    assert_eq!(consult_method["transport_faults"].as_u64(), Some(1));
+
+    let telemetry_log_path = state_home
+        .join("phone_opus")
+        .join("mcp")
+        .join("telemetry.jsonl");
+    let telemetry_rows = read_json_lines::<Value>(&telemetry_log_path)?;
+    assert!(
+        telemetry_rows
+            .iter()
+            .any(|row| row["event"] == "tool_call" && row["tool_name"] == "consult")
+    );
+    Ok(())
+}
author	main <main@swarm.moe>	2026-03-22 22:20:17 -0400
committer	main <main@swarm.moe>	2026-03-22 22:20:17 -0400
commit	d986442e8e4bc2d716c9d63159a1cfa7b1e6ed76 (patch)
tree	b9ca3d0cb62b5c59e614abfb6f74ac5310c69c2f /crates/phone-opus/tests
download	phone_opus-d986442e8e4bc2d716c9d63159a1cfa7b1e6ed76.zip