swarm repositories / source
aboutsummaryrefslogtreecommitdiff
path: root/crates/phone-opus
diff options
context:
space:
mode:
authormain <main@swarm.moe>2026-03-23 22:32:57 -0400
committermain <main@swarm.moe>2026-03-23 22:32:57 -0400
commitd64d1bd730aec23bcc5b01a78a8945863ea4d5a7 (patch)
treeb757569797231838509f74051c2fdb828ab57857 /crates/phone-opus
parent00949559a8a4757e1198e1ea582ebfcf7268fec4 (diff)
downloadphone_opus-d64d1bd730aec23bcc5b01a78a8945863ea4d5a7.zip
Hide background consult surfaces
Diffstat (limited to 'crates/phone-opus')
-rw-r--r--crates/phone-opus/src/mcp/catalog.rs64
-rw-r--r--crates/phone-opus/src/mcp/service.rs55
-rw-r--r--crates/phone-opus/tests/mcp_hardening.rs188
3 files changed, 51 insertions, 256 deletions
diff --git a/crates/phone-opus/src/mcp/catalog.rs b/crates/phone-opus/src/mcp/catalog.rs
index a4a1780..3570b1f 100644
--- a/crates/phone-opus/src/mcp/catalog.rs
+++ b/crates/phone-opus/src/mcp/catalog.rs
@@ -41,29 +41,11 @@ impl ToolSpec {
const TOOL_SPECS: &[ToolSpec] = &[
ToolSpec {
name: "consult",
- description: "Run a consult against the system Claude Code install using a read-only built-in toolset, optionally resume a prior Claude session by session_id, optionally queue the consult in the background, and return the response or job handle.",
+ description: "Run a blocking consult against the system Claude Code install using a read-only built-in toolset, optionally resume a prior Claude session by session_id, and return the response plus execution metadata.",
dispatch: DispatchTarget::Worker,
replay: ReplayContract::NeverReplay,
},
ToolSpec {
- name: "consult_job",
- description: "Read the status of one background consult job by job_id. When the job has finished, the final Claude response or failure is included.",
- dispatch: DispatchTarget::Host,
- replay: ReplayContract::Convergent,
- },
- ToolSpec {
- name: "consult_wait",
- description: "Block until one background consult job finishes or a timeout elapses. When the job has finished, the final Claude response or failure is included.",
- dispatch: DispatchTarget::Host,
- replay: ReplayContract::Convergent,
- },
- ToolSpec {
- name: "consult_jobs",
- description: "List recent background consult jobs. Defaults to render=porcelain; use render=json for structured output.",
- dispatch: DispatchTarget::Host,
- replay: ReplayContract::Convergent,
- },
- ToolSpec {
name: "health_snapshot",
description: "Read host lifecycle, worker generation, rollout state, and latest fault. Defaults to render=porcelain; use render=json for structured output.",
dispatch: DispatchTarget::Host,
@@ -111,54 +93,10 @@ fn tool_schema(name: &str) -> Value {
"session_id": {
"type": "string",
"description": "Optional Claude session handle returned by a previous consult call. When set, phone_opus resumes that conversation instead of starting a fresh one."
- },
- "background": {
- "type": "boolean",
- "description": "When true, queue the consult as a background job and return immediately with a job handle. The default is false, which keeps consult synchronous."
}
},
"required": ["prompt"]
})),
- "consult_job" => with_common_presentation(json!({
- "type": "object",
- "properties": {
- "job_id": {
- "type": "string",
- "description": "Background consult job handle returned by consult with background=true."
- }
- },
- "required": ["job_id"]
- })),
- "consult_wait" => with_common_presentation(json!({
- "type": "object",
- "properties": {
- "job_id": {
- "type": "string",
- "description": "Background consult job handle returned by consult with background=true."
- },
- "timeout_ms": {
- "type": "integer",
- "minimum": 0,
- "description": "Maximum time to wait for completion before returning the current job state. Defaults to 1800000 (30 minutes)."
- },
- "poll_interval_ms": {
- "type": "integer",
- "minimum": 10,
- "description": "Polling interval used while waiting. Defaults to 1000."
- }
- },
- "required": ["job_id"]
- })),
- "consult_jobs" => with_common_presentation(json!({
- "type": "object",
- "properties": {
- "limit": {
- "type": "integer",
- "minimum": 1,
- "description": "Maximum number of recent background jobs to return. Defaults to 10."
- }
- }
- })),
"health_snapshot" | "telemetry_snapshot" => with_common_presentation(json!({
"type": "object",
"properties": {}
diff --git a/crates/phone-opus/src/mcp/service.rs b/crates/phone-opus/src/mcp/service.rs
index 773a516..64cb778 100644
--- a/crates/phone-opus/src/mcp/service.rs
+++ b/crates/phone-opus/src/mcp/service.rs
@@ -86,19 +86,9 @@ impl WorkerService {
let args = deserialize::<ConsultArgs>(arguments, &operation, self.generation)?;
let request = ConsultRequest::parse(args)
.map_err(|error| invalid_consult_request(self.generation, &operation, error))?;
- match request.mode() {
- ConsultMode::Sync => {
- let response = invoke_claude(&request)
- .map_err(|error| consult_fault(self.generation, &operation, error))?;
- consult_output(&request, &response, self.generation, &operation)?
- }
- ConsultMode::Background => submit_background_consult(
- &request,
- self.generation,
- FaultStage::Worker,
- &operation,
- )?,
- }
+ let response = invoke_claude(&request)
+ .map_err(|error| consult_fault(self.generation, &operation, error))?;
+ consult_output(&request, &response, self.generation, &operation)?
}
other => {
return Err(FaultRecord::invalid_input(
@@ -124,7 +114,6 @@ struct ConsultArgs {
prompt: String,
cwd: Option<String>,
session_id: Option<String>,
- background: Option<bool>,
}
#[derive(Debug, Deserialize)]
@@ -153,7 +142,6 @@ struct ConsultRequest {
prompt: PromptText,
cwd: WorkingDirectory,
session: Option<SessionHandle>,
- mode: ConsultMode,
}
impl ConsultRequest {
@@ -162,14 +150,9 @@ impl ConsultRequest {
prompt: PromptText::parse(args.prompt)?,
cwd: WorkingDirectory::resolve(args.cwd)?,
session: args.session_id.map(SessionHandle::parse).transpose()?,
- mode: ConsultMode::from_background(args.background),
})
}
- fn mode(&self) -> ConsultMode {
- self.mode
- }
-
fn session_mode(&self) -> &'static str {
if self.session.is_some() {
"resumed"
@@ -182,6 +165,7 @@ impl ConsultRequest {
self.session.as_ref().map(SessionHandle::display)
}
+ #[allow(dead_code, reason = "background submission is parked but not exposed")]
fn background_request(&self) -> BackgroundConsultRequest {
BackgroundConsultRequest {
prompt: self.prompt.as_str().to_owned(),
@@ -191,29 +175,6 @@ impl ConsultRequest {
}
}
-#[derive(Debug, Clone, Copy, Eq, PartialEq)]
-enum ConsultMode {
- Sync,
- Background,
-}
-
-impl ConsultMode {
- fn from_background(raw: Option<bool>) -> Self {
- if raw.unwrap_or(false) {
- Self::Background
- } else {
- Self::Sync
- }
- }
-
- fn as_str(self) -> &'static str {
- match self {
- Self::Sync => "sync",
- Self::Background => "background",
- }
- }
-}
-
#[derive(Debug, Clone)]
struct PromptText {
original: String,
@@ -305,7 +266,6 @@ impl BackgroundConsultRequest {
prompt: self.prompt,
cwd: Some(self.cwd),
session_id: self.session_id,
- background: Some(false),
})
}
}
@@ -895,6 +855,7 @@ pub(crate) fn consult_jobs_tool_output(
)
}
+#[allow(dead_code, reason = "background submission is parked but not exposed")]
fn submit_background_consult(
request: &ConsultRequest,
generation: Generation,
@@ -929,7 +890,7 @@ fn submit_background_consult(
.map_err(|error| FaultRecord::process(generation, stage, operation, error.to_string()))?;
let concise = json!({
- "mode": request.mode().as_str(),
+ "mode": "background",
"job_id": record.job_id.display(),
"status": record.status,
"done": false,
@@ -939,7 +900,7 @@ fn submit_background_consult(
"follow_up_tools": ["consult_wait", "consult_job", "consult_jobs"],
});
let full = json!({
- "mode": request.mode().as_str(),
+ "mode": "background",
"job_id": record.job_id.display(),
"status": record.status,
"done": false,
@@ -1495,7 +1456,6 @@ fn consult_output(
operation: &str,
) -> Result<ToolOutput, FaultRecord> {
let concise = json!({
- "mode": request.mode().as_str(),
"response": response.result,
"cwd": response.cwd.display(),
"persisted_output_path": response.persisted_output_path.display(),
@@ -1511,7 +1471,6 @@ fn consult_output(
"permission_denial_count": response.permission_denials.len(),
});
let full = json!({
- "mode": request.mode().as_str(),
"response": response.result,
"cwd": response.cwd.display(),
"persisted_output_path": response.persisted_output_path.display(),
diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs
index cbe3354..107a578 100644
--- a/crates/phone-opus/tests/mcp_hardening.rs
+++ b/crates/phone-opus/tests/mcp_hardening.rs
@@ -288,11 +288,23 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult {
let tools = harness.tools_list()?;
let tool_names = tool_names(&tools);
assert!(tool_names.contains(&"consult"));
- assert!(tool_names.contains(&"consult_job"));
- assert!(tool_names.contains(&"consult_wait"));
- assert!(tool_names.contains(&"consult_jobs"));
+ assert!(!tool_names.contains(&"consult_job"));
+ assert!(!tool_names.contains(&"consult_wait"));
+ assert!(!tool_names.contains(&"consult_jobs"));
assert!(tool_names.contains(&"health_snapshot"));
assert!(tool_names.contains(&"telemetry_snapshot"));
+ let consult_tool = must_some(
+ tools["result"]["tools"]
+ .as_array()
+ .into_iter()
+ .flatten()
+ .find(|tool| tool["name"] == "consult"),
+ "consult tool definition",
+ )?;
+ assert!(
+ consult_tool["inputSchema"]["properties"]["background"].is_null(),
+ "consult schema should not advertise background: {consult_tool:#}"
+ );
let health = harness.call_tool(3, "health_snapshot", json!({}))?;
assert_tool_ok(&health);
@@ -387,11 +399,14 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
json!({
"prompt": "say oracle",
"cwd": sandbox.display().to_string(),
- "session_id": resumed_session
+ "session_id": resumed_session,
+ "background": true
}),
)?;
assert_tool_ok(&consult);
assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle"));
+ assert!(tool_content(&consult)["mode"].is_null());
+ assert!(tool_content(&consult)["job_id"].is_null());
assert_eq!(
tool_content(&consult)["session_mode"].as_str(),
Some("resumed")
@@ -539,176 +554,59 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki
}
#[test]
-fn consult_can_run_in_background_and_be_waited_on_or_polled() -> TestResult {
- let root = temp_root("consult_background")?;
+fn background_surfaces_are_hidden_from_public_mcp() -> TestResult {
+ let root = temp_root("consult_hidden_background")?;
let state_home = root.join("state-home");
- let sandbox = root.join("sandbox");
- let caller_home = root.join("caller-home");
must(fs::create_dir_all(&state_home), "create state home")?;
- must(fs::create_dir_all(&sandbox), "create sandbox")?;
- must(fs::create_dir_all(&caller_home), "create caller home")?;
- let fake_claude = root.join("claude");
- let stdout_file = root.join("stdout.json");
- let args_file = root.join("args.txt");
- let pwd_file = root.join("pwd.txt");
- write_fake_claude_script(&fake_claude)?;
- must(
- fs::write(
- &stdout_file,
- serde_json::to_string(&json!({
- "type": "result",
- "subtype": "success",
- "is_error": false,
- "duration_ms": 4321,
- "duration_api_ms": 4200,
- "num_turns": 3,
- "result": "background oracle",
- "stop_reason": "end_turn",
- "session_id": "3fc69f58-7752-4d9d-a95d-19a217814b6a",
- "total_cost_usd": 0.25,
- "usage": {
- "input_tokens": 11,
- "output_tokens": 7
- },
- "modelUsage": {
- "claude-opus-4-6": {
- "inputTokens": 11,
- "outputTokens": 7
- }
- },
- "permission_denials": [],
- "fast_mode_state": "off",
- "uuid": "uuid-456"
- }))?,
- ),
- "write fake stdout",
- )?;
-
- let claude_bin = fake_claude.display().to_string();
- let stdout_path = stdout_file.display().to_string();
- let args_path = args_file.display().to_string();
- let pwd_path = pwd_file.display().to_string();
- let caller_home_path = caller_home.display().to_string();
- let env = [
- ("HOME", caller_home_path.as_str()),
- ("PHONE_OPUS_CLAUDE_BIN", claude_bin.as_str()),
- ("PHONE_OPUS_TEST_STDOUT_FILE", stdout_path.as_str()),
- ("PHONE_OPUS_TEST_ARGS_FILE", args_path.as_str()),
- ("PHONE_OPUS_TEST_PWD_FILE", pwd_path.as_str()),
- ("PHONE_OPUS_TEST_SLEEP_MS", "100"),
- ];
- let mut harness = McpHarness::spawn(&state_home, &env)?;
+ let mut harness = McpHarness::spawn(&state_home, &[])?;
let _ = harness.initialize()?;
harness.notify_initialized()?;
- let submit = harness.call_tool(
+ let consult_job = harness.call_tool(
3,
- "consult",
+ "consult_job",
json!({
- "prompt": "background oracle",
- "cwd": sandbox.display().to_string(),
- "background": true
+ "job_id": "00000000-0000-0000-0000-000000000000"
}),
)?;
- assert_tool_ok(&submit);
- assert_eq!(tool_content(&submit)["mode"].as_str(), Some("background"));
+ assert_tool_error(&consult_job);
assert!(
- tool_content(&submit)["follow_up_tools"]
+ consult_job["result"]["content"]
.as_array()
.into_iter()
.flatten()
- .any(|value| value == "consult_wait")
+ .filter_map(|entry| entry["text"].as_str())
+ .any(|text| text.contains("unknown tool `consult_job`"))
);
- let job_id = must_some(
- tool_content(&submit)["job_id"].as_str().map(str::to_owned),
- "background job id",
- )?;
- let _ = uuid::Uuid::parse_str(&job_id)
- .map_err(|error| io::Error::other(format!("job id uuid parse: {error}")))?;
- let timed_out = harness.call_tool(
+ let consult_wait = harness.call_tool(
4,
"consult_wait",
json!({
- "job_id": job_id,
- "timeout_ms": 0,
- "render": "json"
+ "job_id": "00000000-0000-0000-0000-000000000000"
}),
)?;
- assert_tool_ok(&timed_out);
- assert_eq!(tool_content(&timed_out)["timed_out"].as_bool(), Some(true));
- assert_eq!(tool_content(&timed_out)["done"].as_bool(), Some(false));
-
- let waited = harness.call_tool(
- 5,
- "consult_wait",
- json!({
- "job_id": job_id,
- "timeout_ms": 5_000,
- "poll_interval_ms": 10,
- "render": "json"
- }),
- )?;
- assert_tool_ok(&waited);
- assert_eq!(tool_content(&waited)["timed_out"].as_bool(), Some(false));
- assert_eq!(tool_content(&waited)["status"].as_str(), Some("succeeded"));
+ assert_tool_error(&consult_wait);
assert!(
- tool_content(&waited)["waited_ms"]
- .as_u64()
- .is_some_and(|value| value >= 50)
- );
- assert_eq!(
- tool_content(&waited)["result"]["response"].as_str(),
- Some("background oracle")
- );
- let persisted_output_path = must_some(
- tool_content(&waited)["result"]["persisted_output_path"]
- .as_str()
- .map(str::to_owned),
- "background persisted output path",
- )?;
- assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/"));
- assert!(persisted_output_path.contains("3fc69f58-7752-4d9d-a95d-19a217814b6a"));
- let persisted_output = must(
- fs::read_to_string(&persisted_output_path),
- "read background persisted consult output",
- )?;
- let persisted_output: Value = must(
- serde_json::from_str(&persisted_output),
- "parse background persisted consult output",
- )?;
- assert_eq!(
- persisted_output["response"].as_str(),
- Some("background oracle")
+ consult_wait["result"]["content"]
+ .as_array()
+ .into_iter()
+ .flatten()
+ .filter_map(|entry| entry["text"].as_str())
+ .any(|text| text.contains("unknown tool `consult_wait`"))
);
- let job = harness.call_tool(
- 6,
- "consult_job",
- json!({
- "job_id": job_id,
- "render": "json"
- }),
- )?;
- assert_tool_ok(&job);
- assert_eq!(tool_content(&job)["status"].as_str(), Some("succeeded"));
-
- let jobs = harness.call_tool(7, "consult_jobs", json!({ "render": "json" }))?;
- assert_tool_ok(&jobs);
+ let consult_jobs = harness.call_tool(5, "consult_jobs", json!({}))?;
+ assert_tool_error(&consult_jobs);
assert!(
- tool_content(&jobs)["jobs"]
+ consult_jobs["result"]["content"]
.as_array()
.into_iter()
.flatten()
- .any(|value| value["job_id"] == job_id)
+ .filter_map(|entry| entry["text"].as_str())
+ .any(|text| text.contains("unknown tool `consult_jobs`"))
);
-
- let args = must(fs::read_to_string(&args_file), "read fake args file")?;
- assert!(args.contains(PROMPT_PREFIX));
- assert!(args.contains("background oracle"));
- let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?;
- assert_eq!(pwd.trim(), sandbox.display().to_string());
Ok(())
}