swarm repositories / source
aboutsummaryrefslogtreecommitdiff
path: root/crates/phone-opus/tests
diff options
context:
space:
mode:
authormain <main@swarm.moe>2026-03-22 22:20:17 -0400
committermain <main@swarm.moe>2026-03-22 22:20:17 -0400
commitd986442e8e4bc2d716c9d63159a1cfa7b1e6ed76 (patch)
treeb9ca3d0cb62b5c59e614abfb6f74ac5310c69c2f /crates/phone-opus/tests
downloadphone_opus-d986442e8e4bc2d716c9d63159a1cfa7b1e6ed76.zip
Bootstrap consultative Claude Code MCP
Diffstat (limited to 'crates/phone-opus/tests')
-rw-r--r--crates/phone-opus/tests/mcp_hardening.rs422
1 files changed, 422 insertions, 0 deletions
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs
new file mode 100644
index 0000000..b47b365
--- /dev/null
+++ b/crates/phone-opus/tests/mcp_hardening.rs
@@ -0,0 +1,422 @@
+use clap as _;
+use dirs as _;
+use libmcp as _;
+use std::fs;
+use std::io::{self, BufRead, BufReader, Write};
+use std::os::unix::fs::PermissionsExt;
+use std::path::{Path, PathBuf};
+use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
+
+use libmcp_testkit::read_json_lines;
+use serde as _;
+use serde_json::{Value, json};
+use thiserror as _;
+
+type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>;
+
+fn must<T, E: std::fmt::Display, C: std::fmt::Display>(
+ result: Result<T, E>,
+ context: C,
+) -> TestResult<T> {
+ result.map_err(|error| io::Error::other(format!("{context}: {error}")).into())
+}
+
+fn must_some<T>(value: Option<T>, context: &str) -> TestResult<T> {
+ value.ok_or_else(|| io::Error::other(context).into())
+}
+
+fn temp_root(name: &str) -> TestResult<PathBuf> {
+ let root = std::env::temp_dir().join(format!(
+ "phone_opus_{name}_{}_{}",
+ std::process::id(),
+ must(
+ std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH),
+ "current time after unix epoch",
+ )?
+ .as_nanos()
+ ));
+ must(fs::create_dir_all(&root), "create temp root")?;
+ Ok(root)
+}
+
+fn binary_path() -> PathBuf {
+ PathBuf::from(env!("CARGO_BIN_EXE_phone-opus"))
+}
+
+struct McpHarness {
+ child: Child,
+ stdin: ChildStdin,
+ stdout: BufReader<ChildStdout>,
+}
+
+impl McpHarness {
+ fn spawn(state_home: &Path, extra_env: &[(&str, &str)]) -> TestResult<Self> {
+ let mut command = Command::new(binary_path());
+ let _ = command
+ .arg("mcp")
+ .arg("serve")
+ .env("XDG_STATE_HOME", state_home)
+ .stdin(Stdio::piped())
+ .stdout(Stdio::piped())
+ .stderr(Stdio::inherit());
+ for (key, value) in extra_env {
+ let _ = command.env(key, value);
+ }
+ let mut child = must(command.spawn(), "spawn mcp host")?;
+ let stdin = must_some(child.stdin.take(), "host stdin")?;
+ let stdout = BufReader::new(must_some(child.stdout.take(), "host stdout")?);
+ Ok(Self {
+ child,
+ stdin,
+ stdout,
+ })
+ }
+
+ fn initialize(&mut self) -> TestResult<Value> {
+ self.request(json!({
+ "jsonrpc": "2.0",
+ "id": 1,
+ "method": "initialize",
+ "params": {
+ "protocolVersion": "2025-11-25",
+ "capabilities": {},
+ "clientInfo": { "name": "mcp-hardening-test", "version": "0" }
+ }
+ }))
+ }
+
+ fn notify_initialized(&mut self) -> TestResult {
+ self.notify(json!({
+ "jsonrpc": "2.0",
+ "method": "notifications/initialized",
+ }))
+ }
+
+ fn tools_list(&mut self) -> TestResult<Value> {
+ self.request(json!({
+ "jsonrpc": "2.0",
+ "id": 2,
+ "method": "tools/list",
+ "params": {},
+ }))
+ }
+
+ fn call_tool(&mut self, id: u64, name: &str, arguments: Value) -> TestResult<Value> {
+ self.request(json!({
+ "jsonrpc": "2.0",
+ "id": id,
+ "method": "tools/call",
+ "params": {
+ "name": name,
+ "arguments": arguments,
+ }
+ }))
+ }
+
+ fn request(&mut self, message: Value) -> TestResult<Value> {
+ let encoded = must(serde_json::to_string(&message), "request json")?;
+ must(writeln!(self.stdin, "{encoded}"), "write request")?;
+ must(self.stdin.flush(), "flush request")?;
+ let mut line = String::new();
+ let byte_count = must(self.stdout.read_line(&mut line), "read response")?;
+ if byte_count == 0 {
+ return Err(io::Error::other("unexpected EOF reading response").into());
+ }
+ must(serde_json::from_str(&line), "response json")
+ }
+
+ fn notify(&mut self, message: Value) -> TestResult {
+ let encoded = must(serde_json::to_string(&message), "notify json")?;
+ must(writeln!(self.stdin, "{encoded}"), "write notify")?;
+ must(self.stdin.flush(), "flush notify")?;
+ Ok(())
+ }
+}
+
+impl Drop for McpHarness {
+ fn drop(&mut self) {
+ let _ = self.child.kill();
+ let _ = self.child.wait();
+ }
+}
+
+fn assert_tool_ok(response: &Value) {
+ assert_eq!(
+ response["result"]["isError"].as_bool(),
+ Some(false),
+ "tool response unexpectedly errored: {response:#}"
+ );
+}
+
+fn assert_tool_error(response: &Value) {
+ assert_eq!(
+ response["result"]["isError"].as_bool(),
+ Some(true),
+ "tool response unexpectedly succeeded: {response:#}"
+ );
+}
+
+fn tool_content(response: &Value) -> &Value {
+ &response["result"]["structuredContent"]
+}
+
+fn tool_names(response: &Value) -> Vec<&str> {
+ response["result"]["tools"]
+ .as_array()
+ .into_iter()
+ .flatten()
+ .filter_map(|tool| tool["name"].as_str())
+ .collect()
+}
+
+fn write_fake_claude_script(path: &Path) -> TestResult {
+ let script = r#"#!/bin/sh
+set -eu
+if [ -n "${PHONE_OPUS_TEST_PWD_FILE:-}" ]; then
+ pwd >"$PHONE_OPUS_TEST_PWD_FILE"
+fi
+if [ -n "${PHONE_OPUS_TEST_ARGS_FILE:-}" ]; then
+ printf '%s\n' "$@" >"$PHONE_OPUS_TEST_ARGS_FILE"
+fi
+if [ -n "${PHONE_OPUS_TEST_STDERR:-}" ]; then
+ printf '%s\n' "$PHONE_OPUS_TEST_STDERR" >&2
+fi
+if [ -n "${PHONE_OPUS_TEST_STDOUT_FILE:-}" ]; then
+ cat "$PHONE_OPUS_TEST_STDOUT_FILE"
+fi
+exit "${PHONE_OPUS_TEST_EXIT_CODE:-0}"
+"#;
+ must(fs::write(path, script), "write fake claude script")?;
+ let mut permissions = must(fs::metadata(path), "fake claude metadata")?.permissions();
+ permissions.set_mode(0o755);
+ must(
+ fs::set_permissions(path, permissions),
+ "chmod fake claude script",
+ )?;
+ Ok(())
+}
+
+#[test]
+fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
+ let root = temp_root("cold_start")?;
+ let state_home = root.join("state-home");
+ must(fs::create_dir_all(&state_home), "create state home")?;
+
+ let mut harness = McpHarness::spawn(&state_home, &[])?;
+ let initialize = harness.initialize()?;
+ assert_eq!(
+ initialize["result"]["protocolVersion"].as_str(),
+ Some("2025-11-25")
+ );
+ harness.notify_initialized()?;
+
+ let tools = harness.tools_list()?;
+ let tool_names = tool_names(&tools);
+ assert!(tool_names.contains(&"consult"));
+ assert!(tool_names.contains(&"health_snapshot"));
+ assert!(tool_names.contains(&"telemetry_snapshot"));
+
+ let health = harness.call_tool(3, "health_snapshot", json!({}))?;
+ assert_tool_ok(&health);
+ assert_eq!(tool_content(&health)["worker_generation"].as_u64(), Some(1));
+ Ok(())
+}
+
+#[test]
+fn consult_uses_read_only_toolset_and_requested_working_directory() -> TestResult {
+ let root = temp_root("consult_success")?;
+ let state_home = root.join("state-home");
+ let sandbox = root.join("sandbox");
+ must(fs::create_dir_all(&state_home), "create state home")?;
+ must(fs::create_dir_all(&sandbox), "create sandbox")?;
+
+ let fake_claude = root.join("claude");
+ let stdout_file = root.join("stdout.json");
+ let args_file = root.join("args.txt");
+ let pwd_file = root.join("pwd.txt");
+ write_fake_claude_script(&fake_claude)?;
+ must(
+ fs::write(
+ &stdout_file,
+ serde_json::to_string(&json!({
+ "type": "result",
+ "subtype": "success",
+ "is_error": false,
+ "duration_ms": 1234,
+ "duration_api_ms": 1200,
+ "num_turns": 2,
+ "result": "oracle",
+ "stop_reason": "end_turn",
+ "session_id": "session-123",
+ "total_cost_usd": 0.125,
+ "usage": {
+ "input_tokens": 10,
+ "output_tokens": 5
+ },
+ "modelUsage": {
+ "claude-sonnet-4-6": {
+ "inputTokens": 10,
+ "outputTokens": 5
+ }
+ },
+ "permission_denials": [],
+ "fast_mode_state": "off",
+ "uuid": "uuid-123"
+ }))?,
+ ),
+ "write fake stdout",
+ )?;
+
+ let claude_bin = fake_claude.display().to_string();
+ let stdout_path = stdout_file.display().to_string();
+ let args_path = args_file.display().to_string();
+ let pwd_path = pwd_file.display().to_string();
+ let env = [
+ ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
+ ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()),
+ ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()),
+ ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()),
+ ];
+ let mut harness = McpHarness::spawn(&state_home, &env)?;
+ let _ = harness.initialize()?;
+ harness.notify_initialized()?;
+
+ let consult = harness.call_tool(
+ 3,
+ "consult",
+ json!({
+ "prompt": "say oracle",
+ "cwd": sandbox.display().to_string(),
+ "max_turns": 7
+ }),
+ )?;
+ assert_tool_ok(&consult);
+ assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle"));
+ assert_eq!(
+ tool_content(&consult)["cwd"].as_str(),
+ Some(sandbox.display().to_string().as_str())
+ );
+ assert_eq!(tool_content(&consult)["num_turns"].as_u64(), Some(2));
+
+ let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?;
+ assert_eq!(pwd.trim(), sandbox.display().to_string());
+
+ let args = must(fs::read_to_string(&args_file), "read fake args file")?;
+ let lines = args.lines().collect::<Vec<_>>();
+ assert!(lines.contains(&"-p"));
+ assert!(lines.contains(&"--output-format"));
+ assert!(lines.contains(&"json"));
+ assert!(lines.contains(&"--strict-mcp-config"));
+ assert!(lines.contains(&"--mcp-config"));
+ assert!(lines.contains(&"{\"mcpServers\":{}}"));
+ assert!(lines.contains(&"--disable-slash-commands"));
+ assert!(lines.contains(&"--no-chrome"));
+ assert!(lines.contains(&"--tools"));
+ assert!(lines.contains(&"Bash,Read,Grep,Glob,LS,WebFetch,WebSearch"));
+ assert!(lines.contains(&"--permission-mode"));
+ assert!(lines.contains(&"dontAsk"));
+ assert!(lines.contains(&"--max-turns"));
+ assert!(lines.contains(&"7"));
+ assert_eq!(lines.last().copied(), Some("say oracle"));
+
+ let telemetry = harness.call_tool(4, "telemetry_snapshot", json!({}))?;
+ assert_tool_ok(&telemetry);
+ let hot_methods = tool_content(&telemetry)["hot_methods"]
+ .as_array()
+ .cloned()
+ .unwrap_or_default();
+ assert!(
+ hot_methods
+ .iter()
+ .any(|value| value["method"] == "tools/call:consult")
+ );
+ Ok(())
+}
+
+#[test]
+fn consult_surfaces_downstream_cli_failures() -> TestResult {
+ let root = temp_root("consult_failure")?;
+ let state_home = root.join("state-home");
+ let fake_claude = root.join("claude");
+ must(fs::create_dir_all(&state_home), "create state home")?;
+ write_fake_claude_script(&fake_claude)?;
+
+ let claude_bin = fake_claude.display().to_string();
+ let env = [
+ ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
+ ("PHONE_OPUS_TEST_EXIT_CODE", "17"),
+ ("PHONE_OPUS_TEST_STDERR", "permission denied by fake claude"),
+ ];
+ let mut harness = McpHarness::spawn(&state_home, &env)?;
+ let _ = harness.initialize()?;
+ harness.notify_initialized()?;
+
+ let consult = harness.call_tool(3, "consult", json!({ "prompt": "fail" }))?;
+ assert_tool_error(&consult);
+ assert_eq!(
+ tool_content(&consult)["fault"]["class"].as_str(),
+ Some("downstream")
+ );
+ assert!(
+ tool_content(&consult)["fault"]["detail"]
+ .as_str()
+ .is_some_and(|value| value.contains("permission denied by fake claude"))
+ );
+ Ok(())
+}
+
+#[test]
+fn consult_never_replays_after_worker_transport_failure() -> TestResult {
+ let root = temp_root("consult_no_replay")?;
+ let state_home = root.join("state-home");
+ let fake_claude = root.join("claude");
+ must(fs::create_dir_all(&state_home), "create state home")?;
+ write_fake_claude_script(&fake_claude)?;
+
+ let claude_bin = fake_claude.display().to_string();
+ let env = [
+ ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
+ (
+ "PHONE_OPUS_MCP_TEST_WORKER_CRASH_ONCE_KEY",
+ "tools/call:consult",
+ ),
+ ];
+ let mut harness = McpHarness::spawn(&state_home, &env)?;
+ let _ = harness.initialize()?;
+ harness.notify_initialized()?;
+
+ let consult = harness.call_tool(3, "consult", json!({ "prompt": "crash once" }))?;
+ assert_tool_error(&consult);
+ assert_eq!(
+ tool_content(&consult)["fault"]["class"].as_str(),
+ Some("transport")
+ );
+ assert_eq!(tool_content(&consult)["retryable"].as_bool(), Some(true));
+ assert_eq!(tool_content(&consult)["retried"].as_bool(), Some(false));
+
+ let telemetry = harness.call_tool(4, "telemetry_snapshot", json!({ "render": "json" }))?;
+ assert_tool_ok(&telemetry);
+ assert_eq!(tool_content(&telemetry)["retries"].as_u64(), Some(0));
+ let hot_methods = tool_content(&telemetry)["hot_methods"]
+ .as_array()
+ .cloned()
+ .unwrap_or_default();
+ let consult_method = hot_methods
+ .iter()
+ .find(|value| value["method"] == "tools/call:consult")
+ .cloned()
+ .unwrap_or_default();
+ assert_eq!(consult_method["transport_faults"].as_u64(), Some(1));
+
+ let telemetry_log_path = state_home
+ .join("phone_opus")
+ .join("mcp")
+ .join("telemetry.jsonl");
+ let telemetry_rows = read_json_lines::<Value>(&telemetry_log_path)?;
+ assert!(
+ telemetry_rows
+ .iter()
+ .any(|row| row["event"] == "tool_call" && row["tool_name"] == "consult")
+ );
+ Ok(())
+}