diff options
Diffstat (limited to 'crates')
| -rw-r--r-- | crates/phone-opus/src/mcp/catalog.rs | 64 | ||||
| -rw-r--r-- | crates/phone-opus/src/mcp/service.rs | 55 | ||||
| -rw-r--r-- | crates/phone-opus/tests/mcp_hardening.rs | 188 |
3 files changed, 51 insertions, 256 deletions
diff --git a/crates/phone-opus/src/mcp/catalog.rs b/crates/phone-opus/src/mcp/catalog.rs index a4a1780..3570b1f 100644 --- a/crates/phone-opus/src/mcp/catalog.rs +++ b/crates/phone-opus/src/mcp/catalog.rs @@ -41,29 +41,11 @@ impl ToolSpec { const TOOL_SPECS: &[ToolSpec] = &[ ToolSpec { name: "consult", - description: "Run a consult against the system Claude Code install using a read-only built-in toolset, optionally resume a prior Claude session by session_id, optionally queue the consult in the background, and return the response or job handle.", + description: "Run a blocking consult against the system Claude Code install using a read-only built-in toolset, optionally resume a prior Claude session by session_id, and return the response plus execution metadata.", dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }, ToolSpec { - name: "consult_job", - description: "Read the status of one background consult job by job_id. When the job has finished, the final Claude response or failure is included.", - dispatch: DispatchTarget::Host, - replay: ReplayContract::Convergent, - }, - ToolSpec { - name: "consult_wait", - description: "Block until one background consult job finishes or a timeout elapses. When the job has finished, the final Claude response or failure is included.", - dispatch: DispatchTarget::Host, - replay: ReplayContract::Convergent, - }, - ToolSpec { - name: "consult_jobs", - description: "List recent background consult jobs. Defaults to render=porcelain; use render=json for structured output.", - dispatch: DispatchTarget::Host, - replay: ReplayContract::Convergent, - }, - ToolSpec { name: "health_snapshot", description: "Read host lifecycle, worker generation, rollout state, and latest fault. Defaults to render=porcelain; use render=json for structured output.", dispatch: DispatchTarget::Host, @@ -111,54 +93,10 @@ fn tool_schema(name: &str) -> Value { "session_id": { "type": "string", "description": "Optional Claude session handle returned by a previous consult call. When set, phone_opus resumes that conversation instead of starting a fresh one." - }, - "background": { - "type": "boolean", - "description": "When true, queue the consult as a background job and return immediately with a job handle. The default is false, which keeps consult synchronous." } }, "required": ["prompt"] })), - "consult_job" => with_common_presentation(json!({ - "type": "object", - "properties": { - "job_id": { - "type": "string", - "description": "Background consult job handle returned by consult with background=true." - } - }, - "required": ["job_id"] - })), - "consult_wait" => with_common_presentation(json!({ - "type": "object", - "properties": { - "job_id": { - "type": "string", - "description": "Background consult job handle returned by consult with background=true." - }, - "timeout_ms": { - "type": "integer", - "minimum": 0, - "description": "Maximum time to wait for completion before returning the current job state. Defaults to 1800000 (30 minutes)." - }, - "poll_interval_ms": { - "type": "integer", - "minimum": 10, - "description": "Polling interval used while waiting. Defaults to 1000." - } - }, - "required": ["job_id"] - })), - "consult_jobs" => with_common_presentation(json!({ - "type": "object", - "properties": { - "limit": { - "type": "integer", - "minimum": 1, - "description": "Maximum number of recent background jobs to return. Defaults to 10." - } - } - })), "health_snapshot" | "telemetry_snapshot" => with_common_presentation(json!({ "type": "object", "properties": {} diff --git a/crates/phone-opus/src/mcp/service.rs b/crates/phone-opus/src/mcp/service.rs index 773a516..64cb778 100644 --- a/crates/phone-opus/src/mcp/service.rs +++ b/crates/phone-opus/src/mcp/service.rs @@ -86,19 +86,9 @@ impl WorkerService { let args = deserialize::<ConsultArgs>(arguments, &operation, self.generation)?; let request = ConsultRequest::parse(args) .map_err(|error| invalid_consult_request(self.generation, &operation, error))?; - match request.mode() { - ConsultMode::Sync => { - let response = invoke_claude(&request) - .map_err(|error| consult_fault(self.generation, &operation, error))?; - consult_output(&request, &response, self.generation, &operation)? - } - ConsultMode::Background => submit_background_consult( - &request, - self.generation, - FaultStage::Worker, - &operation, - )?, - } + let response = invoke_claude(&request) + .map_err(|error| consult_fault(self.generation, &operation, error))?; + consult_output(&request, &response, self.generation, &operation)? } other => { return Err(FaultRecord::invalid_input( @@ -124,7 +114,6 @@ struct ConsultArgs { prompt: String, cwd: Option<String>, session_id: Option<String>, - background: Option<bool>, } #[derive(Debug, Deserialize)] @@ -153,7 +142,6 @@ struct ConsultRequest { prompt: PromptText, cwd: WorkingDirectory, session: Option<SessionHandle>, - mode: ConsultMode, } impl ConsultRequest { @@ -162,14 +150,9 @@ impl ConsultRequest { prompt: PromptText::parse(args.prompt)?, cwd: WorkingDirectory::resolve(args.cwd)?, session: args.session_id.map(SessionHandle::parse).transpose()?, - mode: ConsultMode::from_background(args.background), }) } - fn mode(&self) -> ConsultMode { - self.mode - } - fn session_mode(&self) -> &'static str { if self.session.is_some() { "resumed" @@ -182,6 +165,7 @@ impl ConsultRequest { self.session.as_ref().map(SessionHandle::display) } + #[allow(dead_code, reason = "background submission is parked but not exposed")] fn background_request(&self) -> BackgroundConsultRequest { BackgroundConsultRequest { prompt: self.prompt.as_str().to_owned(), @@ -191,29 +175,6 @@ impl ConsultRequest { } } -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -enum ConsultMode { - Sync, - Background, -} - -impl ConsultMode { - fn from_background(raw: Option<bool>) -> Self { - if raw.unwrap_or(false) { - Self::Background - } else { - Self::Sync - } - } - - fn as_str(self) -> &'static str { - match self { - Self::Sync => "sync", - Self::Background => "background", - } - } -} - #[derive(Debug, Clone)] struct PromptText { original: String, @@ -305,7 +266,6 @@ impl BackgroundConsultRequest { prompt: self.prompt, cwd: Some(self.cwd), session_id: self.session_id, - background: Some(false), }) } } @@ -895,6 +855,7 @@ pub(crate) fn consult_jobs_tool_output( ) } +#[allow(dead_code, reason = "background submission is parked but not exposed")] fn submit_background_consult( request: &ConsultRequest, generation: Generation, @@ -929,7 +890,7 @@ fn submit_background_consult( .map_err(|error| FaultRecord::process(generation, stage, operation, error.to_string()))?; let concise = json!({ - "mode": request.mode().as_str(), + "mode": "background", "job_id": record.job_id.display(), "status": record.status, "done": false, @@ -939,7 +900,7 @@ fn submit_background_consult( "follow_up_tools": ["consult_wait", "consult_job", "consult_jobs"], }); let full = json!({ - "mode": request.mode().as_str(), + "mode": "background", "job_id": record.job_id.display(), "status": record.status, "done": false, @@ -1495,7 +1456,6 @@ fn consult_output( operation: &str, ) -> Result<ToolOutput, FaultRecord> { let concise = json!({ - "mode": request.mode().as_str(), "response": response.result, "cwd": response.cwd.display(), "persisted_output_path": response.persisted_output_path.display(), @@ -1511,7 +1471,6 @@ fn consult_output( "permission_denial_count": response.permission_denials.len(), }); let full = json!({ - "mode": request.mode().as_str(), "response": response.result, "cwd": response.cwd.display(), "persisted_output_path": response.persisted_output_path.display(), diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs index cbe3354..107a578 100644 --- a/crates/phone-opus/tests/mcp_hardening.rs +++ b/crates/phone-opus/tests/mcp_hardening.rs @@ -288,11 +288,23 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult { let tools = harness.tools_list()?; let tool_names = tool_names(&tools); assert!(tool_names.contains(&"consult")); - assert!(tool_names.contains(&"consult_job")); - assert!(tool_names.contains(&"consult_wait")); - assert!(tool_names.contains(&"consult_jobs")); + assert!(!tool_names.contains(&"consult_job")); + assert!(!tool_names.contains(&"consult_wait")); + assert!(!tool_names.contains(&"consult_jobs")); assert!(tool_names.contains(&"health_snapshot")); assert!(tool_names.contains(&"telemetry_snapshot")); + let consult_tool = must_some( + tools["result"]["tools"] + .as_array() + .into_iter() + .flatten() + .find(|tool| tool["name"] == "consult"), + "consult tool definition", + )?; + assert!( + consult_tool["inputSchema"]["properties"]["background"].is_null(), + "consult schema should not advertise background: {consult_tool:#}" + ); let health = harness.call_tool(3, "health_snapshot", json!({}))?; assert_tool_ok(&health); @@ -387,11 +399,14 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki json!({ "prompt": "say oracle", "cwd": sandbox.display().to_string(), - "session_id": resumed_session + "session_id": resumed_session, + "background": true }), )?; assert_tool_ok(&consult); assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle")); + assert!(tool_content(&consult)["mode"].is_null()); + assert!(tool_content(&consult)["job_id"].is_null()); assert_eq!( tool_content(&consult)["session_mode"].as_str(), Some("resumed") @@ -539,176 +554,59 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki } #[test] -fn consult_can_run_in_background_and_be_waited_on_or_polled() -> TestResult { - let root = temp_root("consult_background")?; +fn background_surfaces_are_hidden_from_public_mcp() -> TestResult { + let root = temp_root("consult_hidden_background")?; let state_home = root.join("state-home"); - let sandbox = root.join("sandbox"); - let caller_home = root.join("caller-home"); must(fs::create_dir_all(&state_home), "create state home")?; - must(fs::create_dir_all(&sandbox), "create sandbox")?; - must(fs::create_dir_all(&caller_home), "create caller home")?; - let fake_claude = root.join("claude"); - let stdout_file = root.join("stdout.json"); - let args_file = root.join("args.txt"); - let pwd_file = root.join("pwd.txt"); - write_fake_claude_script(&fake_claude)?; - must( - fs::write( - &stdout_file, - serde_json::to_string(&json!({ - "type": "result", - "subtype": "success", - "is_error": false, - "duration_ms": 4321, - "duration_api_ms": 4200, - "num_turns": 3, - "result": "background oracle", - "stop_reason": "end_turn", - "session_id": "3fc69f58-7752-4d9d-a95d-19a217814b6a", - "total_cost_usd": 0.25, - "usage": { - "input_tokens": 11, - "output_tokens": 7 - }, - "modelUsage": { - "claude-opus-4-6": { - "inputTokens": 11, - "outputTokens": 7 - } - }, - "permission_denials": [], - "fast_mode_state": "off", - "uuid": "uuid-456" - }))?, - ), - "write fake stdout", - )?; - - let claude_bin = fake_claude.display().to_string(); - let stdout_path = stdout_file.display().to_string(); - let args_path = args_file.display().to_string(); - let pwd_path = pwd_file.display().to_string(); - let caller_home_path = caller_home.display().to_string(); - let env = [ - ("HOME", caller_home_path.as_str()), - ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()), - ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()), - ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()), - ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()), - ("PHONE_OPUS_TEST_SLEEP_MS", "100"), - ]; - let mut harness = McpHarness::spawn(&state_home, &env)?; + let mut harness = McpHarness::spawn(&state_home, &[])?; let _ = harness.initialize()?; harness.notify_initialized()?; - let submit = harness.call_tool( + let consult_job = harness.call_tool( 3, - "consult", + "consult_job", json!({ - "prompt": "background oracle", - "cwd": sandbox.display().to_string(), - "background": true + "job_id": "00000000-0000-0000-0000-000000000000" }), )?; - assert_tool_ok(&submit); - assert_eq!(tool_content(&submit)["mode"].as_str(), Some("background")); + assert_tool_error(&consult_job); assert!( - tool_content(&submit)["follow_up_tools"] + consult_job["result"]["content"] .as_array() .into_iter() .flatten() - .any(|value| value == "consult_wait") + .filter_map(|entry| entry["text"].as_str()) + .any(|text| text.contains("unknown tool `consult_job`")) ); - let job_id = must_some( - tool_content(&submit)["job_id"].as_str().map(str::to_owned), - "background job id", - )?; - let _ = uuid::Uuid::parse_str(&job_id) - .map_err(|error| io::Error::other(format!("job id uuid parse: {error}")))?; - let timed_out = harness.call_tool( + let consult_wait = harness.call_tool( 4, "consult_wait", json!({ - "job_id": job_id, - "timeout_ms": 0, - "render": "json" + "job_id": "00000000-0000-0000-0000-000000000000" }), )?; - assert_tool_ok(&timed_out); - assert_eq!(tool_content(&timed_out)["timed_out"].as_bool(), Some(true)); - assert_eq!(tool_content(&timed_out)["done"].as_bool(), Some(false)); - - let waited = harness.call_tool( - 5, - "consult_wait", - json!({ - "job_id": job_id, - "timeout_ms": 5_000, - "poll_interval_ms": 10, - "render": "json" - }), - )?; - assert_tool_ok(&waited); - assert_eq!(tool_content(&waited)["timed_out"].as_bool(), Some(false)); - assert_eq!(tool_content(&waited)["status"].as_str(), Some("succeeded")); + assert_tool_error(&consult_wait); assert!( - tool_content(&waited)["waited_ms"] - .as_u64() - .is_some_and(|value| value >= 50) - ); - assert_eq!( - tool_content(&waited)["result"]["response"].as_str(), - Some("background oracle") - ); - let persisted_output_path = must_some( - tool_content(&waited)["result"]["persisted_output_path"] - .as_str() - .map(str::to_owned), - "background persisted output path", - )?; - assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/")); - assert!(persisted_output_path.contains("3fc69f58-7752-4d9d-a95d-19a217814b6a")); - let persisted_output = must( - fs::read_to_string(&persisted_output_path), - "read background persisted consult output", - )?; - let persisted_output: Value = must( - serde_json::from_str(&persisted_output), - "parse background persisted consult output", - )?; - assert_eq!( - persisted_output["response"].as_str(), - Some("background oracle") + consult_wait["result"]["content"] + .as_array() + .into_iter() + .flatten() + .filter_map(|entry| entry["text"].as_str()) + .any(|text| text.contains("unknown tool `consult_wait`")) ); - let job = harness.call_tool( - 6, - "consult_job", - json!({ - "job_id": job_id, - "render": "json" - }), - )?; - assert_tool_ok(&job); - assert_eq!(tool_content(&job)["status"].as_str(), Some("succeeded")); - - let jobs = harness.call_tool(7, "consult_jobs", json!({ "render": "json" }))?; - assert_tool_ok(&jobs); + let consult_jobs = harness.call_tool(5, "consult_jobs", json!({}))?; + assert_tool_error(&consult_jobs); assert!( - tool_content(&jobs)["jobs"] + consult_jobs["result"]["content"] .as_array() .into_iter() .flatten() - .any(|value| value["job_id"] == job_id) + .filter_map(|entry| entry["text"].as_str()) + .any(|text| text.contains("unknown tool `consult_jobs`")) ); - - let args = must(fs::read_to_string(&args_file), "read fake args file")?; - assert!(args.contains(PROMPT_PREFIX)); - assert!(args.contains("background oracle")); - let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?; - assert_eq!(pwd.trim(), sandbox.display().to_string()); Ok(()) } |