Hide background consult surfaces

author: main <main@swarm.moe> 2026-03-23 22:32:57 -0400
committer: main <main@swarm.moe> 2026-03-23 22:32:57 -0400
commit: d64d1bd730aec23bcc5b01a78a8945863ea4d5a7 (patch)
tree: b757569797231838509f74051c2fdb828ab57857 /crates/phone-opus
parent: 00949559a8a4757e1198e1ea582ebfcf7268fec4 (diff)
download: phone_opus-d64d1bd730aec23bcc5b01a78a8945863ea4d5a7.zip
3 files changed, 51 insertions, 256 deletions
diff --git a/crates/phone-opus/src/mcp/catalog.rs b/crates/phone-opus/src/mcp/catalog.rs
index a4a1780..3570b1f 100644
--- a/crates/phone-opus/src/mcp/catalog.rs
+++ b/crates/phone-opus/src/mcp/catalog.rs
@@ -41,29 +41,11 @@ impl ToolSpec {
 const TOOL_SPECS: &[ToolSpec] = &[
     ToolSpec {
         name: "consult",
-        description: "Run a consult against the system Claude Code install using a read-only built-in toolset, optionally resume a prior Claude session by session_id, optionally queue the consult in the background, and return the response or job handle.",
+        description: "Run a blocking consult against the system Claude Code install using a read-only built-in toolset, optionally resume a prior Claude session by session_id, and return the response plus execution metadata.",
         dispatch: DispatchTarget::Worker,
         replay: ReplayContract::NeverReplay,
     },
     ToolSpec {
-        name: "consult_job",
-        description: "Read the status of one background consult job by job_id. When the job has finished, the final Claude response or failure is included.",
-        dispatch: DispatchTarget::Host,
-        replay: ReplayContract::Convergent,
-    },
-    ToolSpec {
-        name: "consult_wait",
-        description: "Block until one background consult job finishes or a timeout elapses. When the job has finished, the final Claude response or failure is included.",
-        dispatch: DispatchTarget::Host,
-        replay: ReplayContract::Convergent,
-    },
-    ToolSpec {
-        name: "consult_jobs",
-        description: "List recent background consult jobs. Defaults to render=porcelain; use render=json for structured output.",
-        dispatch: DispatchTarget::Host,
-        replay: ReplayContract::Convergent,
-    },
-    ToolSpec {
         name: "health_snapshot",
         description: "Read host lifecycle, worker generation, rollout state, and latest fault. Defaults to render=porcelain; use render=json for structured output.",
         dispatch: DispatchTarget::Host,
@@ -111,54 +93,10 @@ fn tool_schema(name: &str) -> Value {
                 "session_id": {
                     "type": "string",
                     "description": "Optional Claude session handle returned by a previous consult call. When set, phone_opus resumes that conversation instead of starting a fresh one."
-                },
-                "background": {
-                    "type": "boolean",
-                    "description": "When true, queue the consult as a background job and return immediately with a job handle. The default is false, which keeps consult synchronous."
                 }
             },
             "required": ["prompt"]
         })),
-        "consult_job" => with_common_presentation(json!({
-            "type": "object",
-            "properties": {
-                "job_id": {
-                    "type": "string",
-                    "description": "Background consult job handle returned by consult with background=true."
-                }
-            },
-            "required": ["job_id"]
-        })),
-        "consult_wait" => with_common_presentation(json!({
-            "type": "object",
-            "properties": {
-                "job_id": {
-                    "type": "string",
-                    "description": "Background consult job handle returned by consult with background=true."
-                },
-                "timeout_ms": {
-                    "type": "integer",
-                    "minimum": 0,
-                    "description": "Maximum time to wait for completion before returning the current job state. Defaults to 1800000 (30 minutes)."
-                },
-                "poll_interval_ms": {
-                    "type": "integer",
-                    "minimum": 10,
-                    "description": "Polling interval used while waiting. Defaults to 1000."
-                }
-            },
-            "required": ["job_id"]
-        })),
-        "consult_jobs" => with_common_presentation(json!({
-            "type": "object",
-            "properties": {
-                "limit": {
-                    "type": "integer",
-                    "minimum": 1,
-                    "description": "Maximum number of recent background jobs to return. Defaults to 10."
-                }
-            }
-        })),
         "health_snapshot" | "telemetry_snapshot" => with_common_presentation(json!({
             "type": "object",
             "properties": {}
diff --git a/crates/phone-opus/src/mcp/service.rs b/crates/phone-opus/src/mcp/service.rs
index 773a516..64cb778 100644
--- a/crates/phone-opus/src/mcp/service.rs
+++ b/crates/phone-opus/src/mcp/service.rs
@@ -86,19 +86,9 @@ impl WorkerService {
                 let args = deserialize::<ConsultArgs>(arguments, &operation, self.generation)?;
                 let request = ConsultRequest::parse(args)
                     .map_err(|error| invalid_consult_request(self.generation, &operation, error))?;
-                match request.mode() {
-                    ConsultMode::Sync => {
-                        let response = invoke_claude(&request)
-                            .map_err(|error| consult_fault(self.generation, &operation, error))?;
-                        consult_output(&request, &response, self.generation, &operation)?
-                    }
-                    ConsultMode::Background => submit_background_consult(
-                        &request,
-                        self.generation,
-                        FaultStage::Worker,
-                        &operation,
-                    )?,
-                }
+                let response = invoke_claude(&request)
+                    .map_err(|error| consult_fault(self.generation, &operation, error))?;
+                consult_output(&request, &response, self.generation, &operation)?
             }
             other => {
                 return Err(FaultRecord::invalid_input(
@@ -124,7 +114,6 @@ struct ConsultArgs {
     prompt: String,
     cwd: Option<String>,
     session_id: Option<String>,
-    background: Option<bool>,
 }
 
 #[derive(Debug, Deserialize)]
@@ -153,7 +142,6 @@ struct ConsultRequest {
     prompt: PromptText,
     cwd: WorkingDirectory,
     session: Option<SessionHandle>,
-    mode: ConsultMode,
 }
 
 impl ConsultRequest {
@@ -162,14 +150,9 @@ impl ConsultRequest {
             prompt: PromptText::parse(args.prompt)?,
             cwd: WorkingDirectory::resolve(args.cwd)?,
             session: args.session_id.map(SessionHandle::parse).transpose()?,
-            mode: ConsultMode::from_background(args.background),
         })
     }
 
-    fn mode(&self) -> ConsultMode {
-        self.mode
-    }
-
     fn session_mode(&self) -> &'static str {
         if self.session.is_some() {
             "resumed"
@@ -182,6 +165,7 @@ impl ConsultRequest {
         self.session.as_ref().map(SessionHandle::display)
     }
 
+    #[allow(dead_code, reason = "background submission is parked but not exposed")]
     fn background_request(&self) -> BackgroundConsultRequest {
         BackgroundConsultRequest {
             prompt: self.prompt.as_str().to_owned(),
@@ -191,29 +175,6 @@ impl ConsultRequest {
     }
 }
 
-#[derive(Debug, Clone, Copy, Eq, PartialEq)]
-enum ConsultMode {
-    Sync,
-    Background,
-}
-
-impl ConsultMode {
-    fn from_background(raw: Option<bool>) -> Self {
-        if raw.unwrap_or(false) {
-            Self::Background
-        } else {
-            Self::Sync
-        }
-    }
-
-    fn as_str(self) -> &'static str {
-        match self {
-            Self::Sync => "sync",
-            Self::Background => "background",
-        }
-    }
-}
-
 #[derive(Debug, Clone)]
 struct PromptText {
     original: String,
@@ -305,7 +266,6 @@ impl BackgroundConsultRequest {
             prompt: self.prompt,
             cwd: Some(self.cwd),
             session_id: self.session_id,
-            background: Some(false),
         })
     }
 }
@@ -895,6 +855,7 @@ pub(crate) fn consult_jobs_tool_output(
     )
 }
 
+#[allow(dead_code, reason = "background submission is parked but not exposed")]
 fn submit_background_consult(
     request: &ConsultRequest,
     generation: Generation,
@@ -929,7 +890,7 @@ fn submit_background_consult(
         .map_err(|error| FaultRecord::process(generation, stage, operation, error.to_string()))?;
 
     let concise = json!({
-        "mode": request.mode().as_str(),
+        "mode": "background",
         "job_id": record.job_id.display(),
         "status": record.status,
         "done": false,
@@ -939,7 +900,7 @@ fn submit_background_consult(
         "follow_up_tools": ["consult_wait", "consult_job", "consult_jobs"],
     });
     let full = json!({
-        "mode": request.mode().as_str(),
+        "mode": "background",
         "job_id": record.job_id.display(),
         "status": record.status,
         "done": false,
@@ -1495,7 +1456,6 @@ fn consult_output(
     operation: &str,
 ) -> Result<ToolOutput, FaultRecord> {
     let concise = json!({
-        "mode": request.mode().as_str(),
         "response": response.result,
         "cwd": response.cwd.display(),
         "persisted_output_path": response.persisted_output_path.display(),
@@ -1511,7 +1471,6 @@ fn consult_output(
         "permission_denial_count": response.permission_denials.len(),
     });
     let full = json!({
-        "mode": request.mode().as_str(),
         "response": response.result,
         "cwd": response.cwd.display(),
         "persisted_output_path": response.persisted_output_path.display(),
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs
index cbe3354..107a578 100644
--- a/crates/phone-opus/tests/mcp_hardening.rs
+++ b/crates/phone-opus/tests/mcp_hardening.rs
@@ -288,11 +288,23 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
     let tools = harness.tools_list()?;
     let tool_names = tool_names(&tools);
     assert!(tool_names.contains(&"consult"));
-    assert!(tool_names.contains(&"consult_job"));
-    assert!(tool_names.contains(&"consult_wait"));
-    assert!(tool_names.contains(&"consult_jobs"));
+    assert!(!tool_names.contains(&"consult_job"));
+    assert!(!tool_names.contains(&"consult_wait"));
+    assert!(!tool_names.contains(&"consult_jobs"));
     assert!(tool_names.contains(&"health_snapshot"));
     assert!(tool_names.contains(&"telemetry_snapshot"));
+    let consult_tool = must_some(
+        tools["result"]["tools"]
+            .as_array()
+            .into_iter()
+            .flatten()
+            .find(|tool| tool["name"] == "consult"),
+        "consult tool definition",
+    )?;
+    assert!(
+        consult_tool["inputSchema"]["properties"]["background"].is_null(),
+        "consult schema should not advertise background: {consult_tool:#}"
+    );
 
     let health = harness.call_tool(3, "health_snapshot", json!({}))?;
     assert_tool_ok(&health);
@@ -387,11 +399,14 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
         json!({
             "prompt": "say oracle",
             "cwd": sandbox.display().to_string(),
-            "session_id": resumed_session
+            "session_id": resumed_session,
+            "background": true
         }),
     )?;
     assert_tool_ok(&consult);
     assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle"));
+    assert!(tool_content(&consult)["mode"].is_null());
+    assert!(tool_content(&consult)["job_id"].is_null());
     assert_eq!(
         tool_content(&consult)["session_mode"].as_str(),
         Some("resumed")
@@ -539,176 +554,59 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
 }
 
 #[test]
-fn consult_can_run_in_background_and_be_waited_on_or_polled() -> TestResult {
-    let root = temp_root("consult_background")?;
+fn background_surfaces_are_hidden_from_public_mcp() -> TestResult {
+    let root = temp_root("consult_hidden_background")?;
     let state_home = root.join("state-home");
-    let sandbox = root.join("sandbox");
-    let caller_home = root.join("caller-home");
     must(fs::create_dir_all(&state_home), "create state home")?;
-    must(fs::create_dir_all(&sandbox), "create sandbox")?;
-    must(fs::create_dir_all(&caller_home), "create caller home")?;
 
-    let fake_claude = root.join("claude");
-    let stdout_file = root.join("stdout.json");
-    let args_file = root.join("args.txt");
-    let pwd_file = root.join("pwd.txt");
-    write_fake_claude_script(&fake_claude)?;
-    must(
-        fs::write(
-            &stdout_file,
-            serde_json::to_string(&json!({
-                "type": "result",
-                "subtype": "success",
-                "is_error": false,
-                "duration_ms": 4321,
-                "duration_api_ms": 4200,
-                "num_turns": 3,
-                "result": "background oracle",
-                "stop_reason": "end_turn",
-                "session_id": "3fc69f58-7752-4d9d-a95d-19a217814b6a",
-                "total_cost_usd": 0.25,
-                "usage": {
-                    "input_tokens": 11,
-                    "output_tokens": 7
-                },
-                "modelUsage": {
-                    "claude-opus-4-6": {
-                        "inputTokens": 11,
-                        "outputTokens": 7
-                    }
-                },
-                "permission_denials": [],
-                "fast_mode_state": "off",
-                "uuid": "uuid-456"
-            }))?,
-        ),
-        "write fake stdout",
-    )?;
-
-    let claude_bin = fake_claude.display().to_string();
-    let stdout_path = stdout_file.display().to_string();
-    let args_path = args_file.display().to_string();
-    let pwd_path = pwd_file.display().to_string();
-    let caller_home_path = caller_home.display().to_string();
-    let env = [
-        ("HOME", caller_home_path.as_str()),
-        ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
-        ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()),
-        ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()),
-        ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()),
-        ("PHONE_OPUS_TEST_SLEEP_MS", "100"),
-    ];
-    let mut harness = McpHarness::spawn(&state_home, &env)?;
+    let mut harness = McpHarness::spawn(&state_home, &[])?;
     let _ = harness.initialize()?;
     harness.notify_initialized()?;
 
-    let submit = harness.call_tool(
+    let consult_job = harness.call_tool(
         3,
-        "consult",
+        "consult_job",
         json!({
-            "prompt": "background oracle",
-            "cwd": sandbox.display().to_string(),
-            "background": true
+            "job_id": "00000000-0000-0000-0000-000000000000"
         }),
     )?;
-    assert_tool_ok(&submit);
-    assert_eq!(tool_content(&submit)["mode"].as_str(), Some("background"));
+    assert_tool_error(&consult_job);
     assert!(
-        tool_content(&submit)["follow_up_tools"]
+        consult_job["result"]["content"]
             .as_array()
             .into_iter()
             .flatten()
-            .any(|value| value == "consult_wait")
+            .filter_map(|entry| entry["text"].as_str())
+            .any(|text| text.contains("unknown tool `consult_job`"))
     );
-    let job_id = must_some(
-        tool_content(&submit)["job_id"].as_str().map(str::to_owned),
-        "background job id",
-    )?;
-    let _ = uuid::Uuid::parse_str(&job_id)
-        .map_err(|error| io::Error::other(format!("job id uuid parse: {error}")))?;
 
-    let timed_out = harness.call_tool(
+    let consult_wait = harness.call_tool(
         4,
         "consult_wait",
         json!({
-            "job_id": job_id,
-            "timeout_ms": 0,
-            "render": "json"
+            "job_id": "00000000-0000-0000-0000-000000000000"
         }),
     )?;
-    assert_tool_ok(&timed_out);
-    assert_eq!(tool_content(&timed_out)["timed_out"].as_bool(), Some(true));
-    assert_eq!(tool_content(&timed_out)["done"].as_bool(), Some(false));
-
-    let waited = harness.call_tool(
-        5,
-        "consult_wait",
-        json!({
-            "job_id": job_id,
-            "timeout_ms": 5_000,
-            "poll_interval_ms": 10,
-            "render": "json"
-        }),
-    )?;
-    assert_tool_ok(&waited);
-    assert_eq!(tool_content(&waited)["timed_out"].as_bool(), Some(false));
-    assert_eq!(tool_content(&waited)["status"].as_str(), Some("succeeded"));
+    assert_tool_error(&consult_wait);
     assert!(
-        tool_content(&waited)["waited_ms"]
-            .as_u64()
-            .is_some_and(|value| value >= 50)
-    );
-    assert_eq!(
-        tool_content(&waited)["result"]["response"].as_str(),
-        Some("background oracle")
-    );
-    let persisted_output_path = must_some(
-        tool_content(&waited)["result"]["persisted_output_path"]
-            .as_str()
-            .map(str::to_owned),
-        "background persisted output path",
-    )?;
-    assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/"));
-    assert!(persisted_output_path.contains("3fc69f58-7752-4d9d-a95d-19a217814b6a"));
-    let persisted_output = must(
-        fs::read_to_string(&persisted_output_path),
-        "read background persisted consult output",
-    )?;
-    let persisted_output: Value = must(
-        serde_json::from_str(&persisted_output),
-        "parse background persisted consult output",
-    )?;
-    assert_eq!(
-        persisted_output["response"].as_str(),
-        Some("background oracle")
+        consult_wait["result"]["content"]
+            .as_array()
+            .into_iter()
+            .flatten()
+            .filter_map(|entry| entry["text"].as_str())
+            .any(|text| text.contains("unknown tool `consult_wait`"))
     );
 
-    let job = harness.call_tool(
-        6,
-        "consult_job",
-        json!({
-            "job_id": job_id,
-            "render": "json"
-        }),
-    )?;
-    assert_tool_ok(&job);
-    assert_eq!(tool_content(&job)["status"].as_str(), Some("succeeded"));
-
-    let jobs = harness.call_tool(7, "consult_jobs", json!({ "render": "json" }))?;
-    assert_tool_ok(&jobs);
+    let consult_jobs = harness.call_tool(5, "consult_jobs", json!({}))?;
+    assert_tool_error(&consult_jobs);
     assert!(
-        tool_content(&jobs)["jobs"]
+        consult_jobs["result"]["content"]
             .as_array()
             .into_iter()
             .flatten()
-            .any(|value| value["job_id"] == job_id)
+            .filter_map(|entry| entry["text"].as_str())
+            .any(|text| text.contains("unknown tool `consult_jobs`"))
     );
-
-    let args = must(fs::read_to_string(&args_file), "read fake args file")?;
-    assert!(args.contains(PROMPT_PREFIX));
-    assert!(args.contains("background oracle"));
-    let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?;
-    assert_eq!(pwd.trim(), sandbox.display().to_string());
     Ok(())
 }
author	main <main@swarm.moe>	2026-03-23 22:32:57 -0400
committer	main <main@swarm.moe>	2026-03-23 22:32:57 -0400
commit	d64d1bd730aec23bcc5b01a78a8945863ea4d5a7 (patch)
tree	b757569797231838509f74051c2fdb828ab57857 /crates/phone-opus
parent	00949559a8a4757e1198e1ea582ebfcf7268fec4 (diff)
download	phone_opus-d64d1bd730aec23bcc5b01a78a8945863ea4d5a7.zip