From 53797d1f9bbaf73778cbb9dd6ad2f857ba1a88e2 Mon Sep 17 00:00:00 2001 From: main Date: Tue, 24 Mar 2026 13:17:59 -0400 Subject: Reuse consult context per cwd by default --- README.md | 3 +- assets/codex-skills/phone-opus/SKILL.md | 9 +- crates/phone-opus/src/mcp/catalog.rs | 8 +- crates/phone-opus/src/mcp/service.rs | 168 +++++++++++++++----- crates/phone-opus/tests/mcp_hardening.rs | 261 ++++++++++++++++++++----------- 5 files changed, 312 insertions(+), 137 deletions(-) diff --git a/README.md b/README.md index 1cdde02..ff456c5 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,8 @@ It exposes one blocking domain tool: - `consult`: run the system `claude` install in print mode, wait for the answer, and return the response plus execution metadata - - pass `session_id` from a previous response to resume that Claude Code conversation + - by default, phone_opus reuses the remembered Claude context for the consulted `cwd` + - pass `fresh_context: true` to opt out and start a fresh Claude context for that `cwd` - a fixed consult prefix is prepended before the caller-supplied prompt The server keeps the public MCP session in a durable host, isolates the actual diff --git a/assets/codex-skills/phone-opus/SKILL.md b/assets/codex-skills/phone-opus/SKILL.md index 1b087a4..3750531 100644 --- a/assets/codex-skills/phone-opus/SKILL.md +++ b/assets/codex-skills/phone-opus/SKILL.md @@ -15,13 +15,14 @@ should be taken as authoritative or final. It is a pure consultant. - Ask for a second opinion on code, architecture, debugging, or design. - Point Claude at a specific repository with `cwd`. -- Reuse `session_id` from an earlier call when you want Claude to continue the same conversation. +- By default, phone_opus reuses the remembered Claude context for that `cwd`. +- Set `fresh_context: true` when you explicitly want a fresh Claude context instead of the remembered `cwd`-scoped one. ## Tool surface - `consult` - required: `prompt` - - optional: `cwd`, `session_id`, `render`, `detail` + - optional: `cwd`, `fresh_context`, `render`, `detail` - `health_snapshot` - `telemetry_snapshot` @@ -32,9 +33,9 @@ should be taken as authoritative or final. It is a pure consultant. - Uses `--dangerously-skip-permissions`, but wraps Claude in an external `systemd-run --user` sandbox. - The sandbox keeps the filesystem globally read-only, gives Claude a separate persistent home under phone-opus state, leaves `/tmp` and `/var/tmp` writable, and forces the consulted `cwd` read-only when that tree would otherwise be writable. - Previous consult outputs can be found in `/tmp/phone_opus-consults`. -- For related follow-ups, strongly prefer reusing `session_id`; cold-start Opus burns quota rereading files, while session reuse is much cheaper. +- For related follow-ups on the same repository, keep using the same `cwd`; phone-opus will reuse that remembered context by default, which is much cheaper than cold-starting Opus. - This surface is consultative only. Edit tools are unavailable. -- The returned `session_id` is reusable: pass it back into a later `consult` call to continue that Claude conversation. +- The returned `session_id` is still surfaced for traceability, but context reuse is now automatic per `cwd` instead of caller-managed. ## Example diff --git a/crates/phone-opus/src/mcp/catalog.rs b/crates/phone-opus/src/mcp/catalog.rs index 3570b1f..f17a3c5 100644 --- a/crates/phone-opus/src/mcp/catalog.rs +++ b/crates/phone-opus/src/mcp/catalog.rs @@ -41,7 +41,7 @@ impl ToolSpec { const TOOL_SPECS: &[ToolSpec] = &[ ToolSpec { name: "consult", - description: "Run a blocking consult against the system Claude Code install using a read-only built-in toolset, optionally resume a prior Claude session by session_id, and return the response plus execution metadata.", + description: "Run a blocking consult against the system Claude Code install using a read-only built-in toolset, automatically reuse the remembered context for the current cwd by default, optionally opt out with fresh_context, and return the response plus execution metadata.", dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }, @@ -90,9 +90,9 @@ fn tool_schema(name: &str) -> Value { "type": "string", "description": "Optional working directory for the Claude Code session. Relative paths resolve against the MCP host working directory." }, - "session_id": { - "type": "string", - "description": "Optional Claude session handle returned by a previous consult call. When set, phone_opus resumes that conversation instead of starting a fresh one." + "fresh_context": { + "type": "boolean", + "description": "When true, start a fresh Claude context instead of reusing the remembered context for this cwd. Defaults to false." } }, "required": ["prompt"] diff --git a/crates/phone-opus/src/mcp/service.rs b/crates/phone-opus/src/mcp/service.rs index d958a81..993a0e4 100644 --- a/crates/phone-opus/src/mcp/service.rs +++ b/crates/phone-opus/src/mcp/service.rs @@ -115,7 +115,7 @@ impl WorkerService { struct ConsultArgs { prompt: String, cwd: Option, - session_id: Option, + fresh_context: Option, } #[derive(Debug, Deserialize)] @@ -143,36 +143,53 @@ const MIN_CONSULT_WAIT_POLL_INTERVAL_MS: u64 = 10; struct ConsultRequest { prompt: PromptText, cwd: WorkingDirectory, + context_key: ConsultContextKey, + fresh_context: bool, session: Option, } impl ConsultRequest { fn parse(args: ConsultArgs) -> Result { + let prompt = PromptText::parse(args.prompt)?; + let cwd = WorkingDirectory::resolve(args.cwd)?; + let context_key = ConsultContextKey::from_cwd(&cwd); + let fresh_context = args.fresh_context.unwrap_or(false); Ok(Self { - prompt: PromptText::parse(args.prompt)?, - cwd: WorkingDirectory::resolve(args.cwd)?, - session: args.session_id.map(SessionHandle::parse).transpose()?, + prompt, + cwd, + session: if fresh_context { + None + } else { + load_consult_context(&context_key) + .map_err(|source| ConsultRequestError::ContextIndex { source })? + }, + context_key, + fresh_context, }) } - fn session_mode(&self) -> &'static str { + fn context_mode(&self) -> &'static str { if self.session.is_some() { - "resumed" + "reused" } else { - "new" + "fresh" } } - fn requested_session_id(&self) -> Option { + fn reused_session_id(&self) -> Option { self.session.as_ref().map(SessionHandle::display) } + fn remember_context(&self, session_id: Option<&str>) -> io::Result<()> { + remember_consult_context(&self.context_key, session_id) + } + #[allow(dead_code, reason = "background submission is parked but not exposed")] fn background_request(&self) -> BackgroundConsultRequest { BackgroundConsultRequest { prompt: self.prompt.as_str().to_owned(), cwd: self.cwd.display(), - session_id: self.requested_session_id(), + fresh_context: self.fresh_context, } } } @@ -244,10 +261,8 @@ impl WorkingDirectory { struct SessionHandle(Uuid); impl SessionHandle { - fn parse(raw: String) -> Result { - Uuid::parse_str(&raw) - .map(Self) - .map_err(|_| ConsultRequestError::InvalidSessionHandle(raw)) + fn parse(raw: &str) -> Option { + Uuid::parse_str(raw).ok().map(Self) } fn display(&self) -> String { @@ -255,11 +270,53 @@ impl SessionHandle { } } +#[derive(Debug, Clone, Eq, Ord, PartialEq, PartialOrd)] +struct ConsultContextKey(String); + +impl ConsultContextKey { + fn from_cwd(cwd: &WorkingDirectory) -> Self { + Self(cwd.display()) + } + + fn as_str(&self) -> &str { + self.0.as_str() + } +} + +#[derive(Debug, Clone, Deserialize, Serialize)] +struct StoredConsultContext { + session_id: String, + updated_unix_ms: u64, +} + +#[derive(Debug, Default, Deserialize, Serialize)] +struct ConsultContextIndex { + by_cwd: BTreeMap, +} + +impl ConsultContextIndex { + fn session_for(&self, key: &ConsultContextKey) -> Option { + self.by_cwd + .get(key.as_str()) + .and_then(|entry| SessionHandle::parse(entry.session_id.as_str())) + } + + fn remember(&mut self, key: &ConsultContextKey, session: &SessionHandle) { + let _ = self.by_cwd.insert( + key.as_str().to_owned(), + StoredConsultContext { + session_id: session.display(), + updated_unix_ms: unix_ms_now(), + }, + ); + } +} + #[derive(Debug, Clone, Deserialize, Eq, PartialEq, Serialize)] struct BackgroundConsultRequest { prompt: String, cwd: String, - session_id: Option, + fresh_context: bool, } impl BackgroundConsultRequest { @@ -267,7 +324,7 @@ impl BackgroundConsultRequest { ConsultRequest::parse(ConsultArgs { prompt: self.prompt, cwd: Some(self.cwd), - session_id: self.session_id, + fresh_context: Some(self.fresh_context), }) } } @@ -454,7 +511,7 @@ impl BackgroundConsultJobRecord { "finished_unix_ms": self.finished_unix_ms, "runner_pid": self.runner_pid, "cwd": self.request.cwd, - "requested_session_id": self.request.session_id, + "fresh_context": self.request.fresh_context, "prompt_prefix_injected": self.prompt_prefix_injected, }) } @@ -470,8 +527,8 @@ enum ConsultRequestError { Canonicalize { path: String, source: io::Error }, #[error("working directory `{0}` is not a directory")] NotDirectory(String), - #[error("session_id must be a valid UUID, got `{0}`")] - InvalidSessionHandle(String), + #[error("failed to resolve consult context state: {source}")] + ContextIndex { source: io::Error }, #[error("job_id must be a valid UUID, got `{0}`")] InvalidJobHandle(String), } @@ -513,6 +570,8 @@ struct ConsultResponse { cwd: WorkingDirectory, result: String, persisted_output_path: PersistedConsultPath, + context_mode: &'static str, + reused_session_id: Option, duration_ms: u64, duration_api_ms: Option, num_turns: u64, @@ -535,6 +594,7 @@ impl ConsultResponse { const SYSTEMD_RUN_BINARY: &str = "systemd-run"; const DEFAULT_PATH: &str = "/usr/local/bin:/usr/bin:/bin"; const PHONE_OPUS_STATE_ROOT_NAME: &str = "phone_opus"; +const CONSULT_CONTEXT_INDEX_FILE_NAME: &str = "consult_contexts.json"; const CLAUDE_HOME_DIR_NAME: &str = "claude-home"; const XDG_CONFIG_DIR_NAME: &str = "xdg-config"; const XDG_CACHE_DIR_NAME: &str = "xdg-cache"; @@ -921,8 +981,8 @@ fn submit_background_consult( "job_id": record.job_id.display(), "status": record.status, "done": false, - "requested_session_id": request.requested_session_id(), - "session_mode": request.session_mode(), + "reused_session_id": request.reused_session_id(), + "context_mode": request.context_mode(), "prompt_prefix_injected": true, "follow_up_tools": ["consult_wait", "consult_job", "consult_jobs"], }); @@ -931,8 +991,8 @@ fn submit_background_consult( "job_id": record.job_id.display(), "status": record.status, "done": false, - "requested_session_id": request.requested_session_id(), - "session_mode": request.session_mode(), + "reused_session_id": request.reused_session_id(), + "context_mode": request.context_mode(), "prompt_prefix_injected": true, "prompt": request.prompt.as_str(), "effective_prompt": request.prompt.rendered(), @@ -1266,12 +1326,17 @@ fn invoke_claude(request: &ConsultRequest) -> Result io::Result { Ok(root) } +fn consult_context_index_path() -> io::Result { + let root = phone_opus_state_root()?.join("mcp"); + fs::create_dir_all(&root)?; + Ok(root.join(CONSULT_CONTEXT_INDEX_FILE_NAME)) +} + +fn load_consult_context_index() -> io::Result { + let path = consult_context_index_path()?; + match read_json_file::(path.as_path()) { + Ok(index) => Ok(index), + Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(ConsultContextIndex::default()), + Err(error) => Err(error), + } +} + +fn load_consult_context(key: &ConsultContextKey) -> io::Result> { + Ok(load_consult_context_index()?.session_for(key)) +} + +fn remember_consult_context(key: &ConsultContextKey, session_id: Option<&str>) -> io::Result<()> { + let Some(session_id) = session_id.and_then(SessionHandle::parse) else { + return Ok(()); + }; + let mut index = load_consult_context_index()?; + index.remember(key, &session_id); + write_json_file(consult_context_index_path()?.as_path(), &index) +} + fn caller_home_dir() -> Option { std::env::var_os("HOME") .filter(|value| !value.is_empty()) @@ -1396,8 +1489,8 @@ fn persist_consult_output( "prompt": request.prompt.as_str(), "prompt_prefix": CLAUDE_CONSULT_PREFIX, "effective_prompt": request.prompt.rendered(), - "session_mode": request.session_mode(), - "requested_session_id": request.requested_session_id(), + "context_mode": request.context_mode(), + "reused_session_id": request.reused_session_id(), "response": result, "model": model_name(envelope.model_usage.as_ref()), "duration_ms": envelope.duration_ms.unwrap_or(0), @@ -1495,8 +1588,8 @@ fn consult_output( "response": response.result, "cwd": response.cwd.display(), "persisted_output_path": response.persisted_output_path.display(), - "session_mode": request.session_mode(), - "requested_session_id": request.requested_session_id(), + "context_mode": response.context_mode, + "reused_session_id": response.reused_session_id, "prompt_prefix_injected": true, "model": response.model_name(), "duration_ms": response.duration_ms, @@ -1513,8 +1606,8 @@ fn consult_output( "prompt": request.prompt.as_str(), "prompt_prefix": CLAUDE_CONSULT_PREFIX, "effective_prompt": request.prompt.rendered(), - "session_mode": request.session_mode(), - "requested_session_id": request.requested_session_id(), + "context_mode": response.context_mode, + "reused_session_id": response.reused_session_id, "duration_ms": response.duration_ms, "duration_api_ms": response.duration_api_ms, "num_turns": response.num_turns, @@ -1539,10 +1632,10 @@ fn consult_output( ) } -fn concise_text(request: &ConsultRequest, response: &ConsultResponse) -> String { +fn concise_text(_request: &ConsultRequest, response: &ConsultResponse) -> String { let mut status = vec![ "consult ok".to_owned(), - format!("session={}", request.session_mode()), + format!("context={}", response.context_mode), format!("turns={}", response.num_turns), format!("duration={}", render_duration_ms(response.duration_ms)), ]; @@ -1558,8 +1651,8 @@ fn concise_text(request: &ConsultRequest, response: &ConsultResponse) -> String let mut lines = vec![status.join(" ")]; lines.push(format!("cwd: {}", response.cwd.display())); - if let Some(session_id) = request.requested_session_id() { - lines.push(format!("requested_session: {session_id}")); + if let Some(session_id) = response.reused_session_id.as_deref() { + lines.push(format!("reused_session: {session_id}")); } if let Some(session_id) = response.session_id.as_deref() { lines.push(format!("session: {session_id}")); @@ -1579,18 +1672,17 @@ fn concise_text(request: &ConsultRequest, response: &ConsultResponse) -> String lines.join("\n") } -fn full_text(request: &ConsultRequest, response: &ConsultResponse) -> String { +fn full_text(_request: &ConsultRequest, response: &ConsultResponse) -> String { let mut lines = vec![ format!( - "consult ok session={} turns={}", - request.session_mode(), - response.num_turns + "consult ok context={} turns={}", + response.context_mode, response.num_turns ), format!("cwd: {}", response.cwd.display()), format!("duration: {}", render_duration_ms(response.duration_ms)), ]; - if let Some(session_id) = request.requested_session_id() { - lines.push(format!("requested_session: {session_id}")); + if let Some(session_id) = response.reused_session_id.as_deref() { + lines.push(format!("reused_session: {session_id}")); } if let Some(duration_api_ms) = response.duration_api_ms { lines.push(format!( diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs index 06861f8..f6e0e73 100644 --- a/crates/phone-opus/tests/mcp_hardening.rs +++ b/crates/phone-opus/tests/mcp_hardening.rs @@ -280,6 +280,40 @@ fn seed_caller_claude_home(home: &Path) -> TestResult { Ok(()) } +fn write_fake_claude_stdout(path: &Path, result: &str, session_id: &str, uuid: &str) -> TestResult { + must( + fs::write( + path, + serde_json::to_string(&json!({ + "type": "result", + "subtype": "success", + "is_error": false, + "duration_ms": 1234, + "duration_api_ms": 1200, + "num_turns": 2, + "result": result, + "stop_reason": "end_turn", + "session_id": session_id, + "total_cost_usd": 0.125, + "usage": { + "input_tokens": 10, + "output_tokens": 5 + }, + "modelUsage": { + "claude-opus-4-6": { + "inputTokens": 10, + "outputTokens": 5 + } + }, + "permission_denials": [], + "fast_mode_state": "off", + "uuid": uuid + }))?, + ), + "write fake stdout", + ) +} + #[test] fn cold_start_exposes_consult_and_ops_tools() -> TestResult { let root = temp_root("cold_start")?; @@ -314,6 +348,14 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult { consult_tool["inputSchema"]["properties"]["background"].is_null(), "consult schema should not advertise background: {consult_tool:#}" ); + assert!( + consult_tool["inputSchema"]["properties"]["session_id"].is_null(), + "consult schema should not advertise session_id: {consult_tool:#}" + ); + assert_eq!( + consult_tool["inputSchema"]["properties"]["fresh_context"]["type"].as_str(), + Some("boolean") + ); let health = harness.call_tool(3, "health_snapshot", json!({}))?; assert_tool_ok(&health); @@ -322,14 +364,18 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult { } #[test] -fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_working_directory() --> TestResult { +fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> TestResult { let root = temp_root("consult_success")?; let state_home = root.join("state-home"); let sandbox = root.join("sandbox"); + let sibling_sandbox = root.join("sibling-sandbox"); let caller_home = root.join("caller-home"); must(fs::create_dir_all(&state_home), "create state home")?; must(fs::create_dir_all(&sandbox), "create sandbox")?; + must( + fs::create_dir_all(&sibling_sandbox), + "create sibling sandbox", + )?; must(fs::create_dir_all(&caller_home), "create caller home")?; seed_caller_claude_home(&caller_home)?; @@ -342,39 +388,11 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki let cwd_probe_error_file = root.join("cwd-write-probe.err"); let credential_probe_file = root.join("credential-write-probe.txt"); let credential_probe_error_file = root.join("credential-write-probe.err"); - let resumed_session = "81f218eb-568b-409b-871b-f6e86d8f666f"; + let remembered_session = "81f218eb-568b-409b-871b-f6e86d8f666f"; + let fresh_session = "dbd3b6c2-4757-4b45-a8f0-f3d877e1a13f"; + let sibling_session = "d9a9a472-a091-4268-a7dd-9f31cf61f87e"; write_fake_claude_script(&fake_claude)?; - must( - fs::write( - &stdout_file, - serde_json::to_string(&json!({ - "type": "result", - "subtype": "success", - "is_error": false, - "duration_ms": 1234, - "duration_api_ms": 1200, - "num_turns": 2, - "result": "oracle", - "stop_reason": "end_turn", - "session_id": resumed_session, - "total_cost_usd": 0.125, - "usage": { - "input_tokens": 10, - "output_tokens": 5 - }, - "modelUsage": { - "claude-opus-4-6": { - "inputTokens": 10, - "outputTokens": 5 - } - }, - "permission_denials": [], - "fast_mode_state": "off", - "uuid": "uuid-123" - }))?, - ), - "write fake stdout", - )?; + write_fake_claude_stdout(&stdout_file, "oracle", remembered_session, "uuid-123")?; let claude_bin = fake_claude.display().to_string(); let stdout_path = stdout_file.display().to_string(); @@ -420,43 +438,135 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki json!({ "prompt": "say oracle", "cwd": sandbox.display().to_string(), - "session_id": resumed_session, + "session_id": "not-a-uuid", "background": true }), )?; assert_tool_ok(&consult); assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle")); - assert!(tool_content(&consult)["mode"].is_null()); - assert!(tool_content(&consult)["job_id"].is_null()); assert_eq!( - tool_content(&consult)["session_mode"].as_str(), - Some("resumed") + tool_content(&consult)["context_mode"].as_str(), + Some("fresh") ); + assert!(tool_content(&consult)["reused_session_id"].is_null()); assert_eq!( - tool_content(&consult)["requested_session_id"].as_str(), - Some(resumed_session) + tool_content(&consult)["session_id"].as_str(), + Some(remembered_session) ); + let first_args = must(fs::read_to_string(&args_file), "read first fake args file")?; + assert!(!first_args.contains("--resume")); + assert!(!first_args.contains("not-a-uuid")); + + write_fake_claude_stdout( + &stdout_file, + "oracle reused", + remembered_session, + "uuid-124", + )?; + let reused = harness.call_tool( + 4, + "consult", + json!({ + "prompt": "say oracle reused", + "cwd": sandbox.display().to_string() + }), + )?; + assert_tool_ok(&reused); assert_eq!( - tool_content(&consult)["prompt_prefix_injected"].as_bool(), - Some(true) + tool_content(&reused)["response"].as_str(), + Some("oracle reused") ); assert_eq!( - tool_content(&consult)["cwd"].as_str(), - Some(sandbox.display().to_string().as_str()) + tool_content(&reused)["context_mode"].as_str(), + Some("reused") ); - assert_eq!(tool_content(&consult)["num_turns"].as_u64(), Some(2)); assert_eq!( - tool_content(&consult)["session_id"].as_str(), - Some(resumed_session) + tool_content(&reused)["reused_session_id"].as_str(), + Some(remembered_session) + ); + let reused_args = must(fs::read_to_string(&args_file), "read reused fake args file")?; + assert!(reused_args.contains("--resume")); + assert!(reused_args.contains(remembered_session)); + + write_fake_claude_stdout(&stdout_file, "oracle fresh", fresh_session, "uuid-125")?; + let fresh = harness.call_tool( + 5, + "consult", + json!({ + "prompt": "say oracle fresh", + "cwd": sandbox.display().to_string(), + "fresh_context": true + }), + )?; + assert_tool_ok(&fresh); + assert_eq!( + tool_content(&fresh)["response"].as_str(), + Some("oracle fresh") ); + assert_eq!(tool_content(&fresh)["context_mode"].as_str(), Some("fresh")); + assert!(tool_content(&fresh)["reused_session_id"].is_null()); + let fresh_args = must(fs::read_to_string(&args_file), "read fresh fake args file")?; + assert!(!fresh_args.contains("--resume")); + + write_fake_claude_stdout( + &stdout_file, + "oracle after fresh", + fresh_session, + "uuid-126", + )?; + let after_fresh = harness.call_tool( + 6, + "consult", + json!({ + "prompt": "say oracle after fresh", + "cwd": sandbox.display().to_string() + }), + )?; + assert_tool_ok(&after_fresh); + assert_eq!( + tool_content(&after_fresh)["context_mode"].as_str(), + Some("reused") + ); + assert_eq!( + tool_content(&after_fresh)["reused_session_id"].as_str(), + Some(fresh_session) + ); + let after_fresh_args = must( + fs::read_to_string(&args_file), + "read after-fresh fake args file", + )?; + assert!(after_fresh_args.contains("--resume")); + assert!(after_fresh_args.contains(fresh_session)); + + write_fake_claude_stdout(&stdout_file, "oracle sibling", sibling_session, "uuid-127")?; + let sibling = harness.call_tool( + 7, + "consult", + json!({ + "prompt": "say oracle sibling", + "cwd": sibling_sandbox.display().to_string() + }), + )?; + assert_tool_ok(&sibling); + assert_eq!( + tool_content(&sibling)["context_mode"].as_str(), + Some("fresh") + ); + assert!(tool_content(&sibling)["reused_session_id"].is_null()); + let sibling_args = must( + fs::read_to_string(&args_file), + "read sibling fake args file", + )?; + assert!(!sibling_args.contains("--resume")); + let persisted_output_path = must_some( - tool_content(&consult)["persisted_output_path"] + tool_content(&after_fresh)["persisted_output_path"] .as_str() .map(str::to_owned), "persisted output path", )?; assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/")); - assert!(persisted_output_path.contains(resumed_session)); + assert!(persisted_output_path.contains(fresh_session)); let persisted_output = must( fs::read_to_string(&persisted_output_path), "read persisted consult output", @@ -465,14 +575,18 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki serde_json::from_str(&persisted_output), "parse persisted consult output", )?; - assert_eq!(persisted_output["response"].as_str(), Some("oracle")); assert_eq!( - persisted_output["requested_session_id"].as_str(), - Some(resumed_session) + persisted_output["response"].as_str(), + Some("oracle after fresh") + ); + assert_eq!(persisted_output["context_mode"].as_str(), Some("reused")); + assert_eq!( + persisted_output["reused_session_id"].as_str(), + Some(fresh_session) ); let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?; - assert_eq!(pwd.trim(), sandbox.display().to_string()); + assert_eq!(pwd.trim(), sibling_sandbox.display().to_string()); let args = must(fs::read_to_string(&args_file), "read fake args file")?; let lines = args.lines().collect::>(); @@ -493,13 +607,11 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki assert!(lines.contains(&"--dangerously-skip-permissions")); assert!(!lines.contains(&"--permission-mode")); assert!(!lines.contains(&"dontAsk")); - assert!(lines.contains(&"--resume")); - assert!(lines.contains(&resumed_session)); + assert!(!lines.contains(&"--resume")); assert!(!lines.contains(&"--max-turns")); assert!(args.contains(PROMPT_PREFIX)); - assert!(args.contains("The real prompt follows.")); let prefix_index = must_some(args.find(PROMPT_PREFIX), "prefixed consult prompt")?; - let user_prompt_index = must_some(args.find("say oracle"), "user prompt inside args")?; + let user_prompt_index = must_some(args.find("say oracle sibling"), "user prompt inside args")?; assert!(prefix_index < user_prompt_index); let env_dump = must(fs::read_to_string(&env_file), "read fake env file")?; @@ -572,7 +684,7 @@ fn consult_can_resume_a_prior_session_with_read_only_toolset_and_requested_worki )?; assert_eq!(credential_probe.trim(), "write_succeeded"); - let telemetry = harness.call_tool(4, "telemetry_snapshot", json!({}))?; + let telemetry = harness.call_tool(8, "telemetry_snapshot", json!({}))?; assert_tool_ok(&telemetry); let hot_methods = tool_content(&telemetry)["hot_methods"] .as_array() @@ -643,37 +755,6 @@ fn background_surfaces_are_hidden_from_public_mcp() -> TestResult { Ok(()) } -#[test] -fn consult_rejects_invalid_session_handles() -> TestResult { - let root = temp_root("consult_invalid_session")?; - let state_home = root.join("state-home"); - must(fs::create_dir_all(&state_home), "create state home")?; - - let mut harness = McpHarness::spawn(&state_home, &[])?; - let _ = harness.initialize()?; - harness.notify_initialized()?; - - let consult = harness.call_tool( - 3, - "consult", - json!({ - "prompt": "fail", - "session_id": "not-a-uuid" - }), - )?; - assert_tool_error(&consult); - assert_eq!( - tool_content(&consult)["fault"]["class"].as_str(), - Some("protocol") - ); - assert!( - tool_content(&consult)["fault"]["detail"] - .as_str() - .is_some_and(|value| value.contains("session_id must be a valid UUID")) - ); - Ok(()) -} - #[test] fn consult_surfaces_downstream_cli_failures() -> TestResult { let root = temp_root("consult_failure")?; -- cgit v1.2.3