From 2c219204d627634442d46c38d1b5df806f77f4c1 Mon Sep 17 00:00:00 2001 From: main Date: Wed, 25 Mar 2026 00:43:57 -0400 Subject: Disable public resume behavior --- README.md | 6 +- assets/codex-skills/phone-opus/SKILL.md | 10 +- crates/phone-opus/src/mcp/catalog.rs | 6 +- crates/phone-opus/src/mcp/fault.rs | 27 +-- crates/phone-opus/src/mcp/service.rs | 183 ++++++------------ crates/phone-opus/tests/mcp_hardening.rs | 312 ++++++++++++------------------- 6 files changed, 187 insertions(+), 357 deletions(-) diff --git a/README.md b/README.md index c2aaefa..cdc97ae 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,8 @@ It exposes one blocking domain tool: - `consult`: run the system `claude` install in print mode, wait for the answer, and return the response plus execution metadata - - by default, phone_opus reuses the remembered Claude context for the consulted `cwd` - - pass `fresh_context: true` to opt out and start a fresh Claude context for that `cwd` - a fixed consult prefix is prepended before the caller-supplied prompt - - fresh consults predeclare a UUID session id and stream-confirm it eagerly, so downstream failures still surface a reusable context handle for that `cwd` + - each consult is a fresh one-shot invocation The server keeps the public MCP session in a durable host, isolates the actual Claude invocation in a disposable worker, and ships standard health and @@ -31,7 +29,7 @@ Each `consult` call runs Claude Code with: - a read-only built-in toolset: - `Bash,Read,Grep,Glob,LS,WebFetch` - `--dangerously-skip-permissions` -- `--session-id ` on fresh consults, or `--resume ` when reusing remembered `cwd` context +- `--session-id ` on each fresh consult - `--output-format stream-json` so phone_opus can capture the init/result session metadata eagerly instead of waiting for the terminal result blob - an external `systemd-run --user` sandbox instead of Claude's internal permission gate - the filesystem stays globally read-only under `ProtectSystem=strict` diff --git a/assets/codex-skills/phone-opus/SKILL.md b/assets/codex-skills/phone-opus/SKILL.md index 953a71b..1ddec2a 100644 --- a/assets/codex-skills/phone-opus/SKILL.md +++ b/assets/codex-skills/phone-opus/SKILL.md @@ -15,14 +15,13 @@ should be taken as authoritative or final. It is a pure consultant. - Ask for a second opinion on code, architecture, debugging, or design. - Point Claude at a specific repository with `cwd`. -- By default, phone_opus reuses the remembered Claude context for that `cwd`. -- Set `fresh_context: true` when you explicitly want a fresh Claude context instead of the remembered `cwd`-scoped one. +- Treat each consult as a fresh one-shot pass. ## Tool surface - `consult` - required: `prompt` - - optional: `cwd`, `fresh_context`, `render`, `detail` + - optional: `cwd`, `render`, `detail` - `health_snapshot` - `telemetry_snapshot` @@ -34,10 +33,7 @@ should be taken as authoritative or final. It is a pure consultant. - The sandbox keeps the filesystem globally read-only, gives Claude a separate persistent home under phone-opus state, leaves `/tmp` and `/var/tmp` writable, and forces the consulted `cwd` read-only when that tree would otherwise be writable. - Web search is disabled; keep Opus focused on local inspection, reasoning, and any direct web fetches that are truly necessary. - Previous consult outputs can be found in `/tmp/phone_opus-consults`. -- For related follow-ups on the same repository, keep using the same `cwd`; phone-opus will reuse that remembered context by default, which is much cheaper than cold-starting Opus. -- Fresh consults predeclare a UUID session id and stream-confirm it eagerly; if a consult fails downstream, inspect the returned error metadata for the reusable handle. - This surface is consultative only. Edit tools are unavailable. -- The returned `session_id` is a resumable handle, but context reuse is automatic per `cwd` instead of caller-managed. ## Example @@ -46,4 +42,4 @@ Call `phone_opus.consult` with: - `prompt`: `Inspect the Rust workspace and tell me where the retry logic is brittle.` - `cwd`: `/absolute/path/to/repo` -Expect a blocking response containing Claude's answer plus execution metadata such as duration, turn count, session id, and cost when available. +Expect a blocking response containing Claude's answer plus execution metadata such as duration, turn count, and cost when available. diff --git a/crates/phone-opus/src/mcp/catalog.rs b/crates/phone-opus/src/mcp/catalog.rs index f17a3c5..cf18fc3 100644 --- a/crates/phone-opus/src/mcp/catalog.rs +++ b/crates/phone-opus/src/mcp/catalog.rs @@ -41,7 +41,7 @@ impl ToolSpec { const TOOL_SPECS: &[ToolSpec] = &[ ToolSpec { name: "consult", - description: "Run a blocking consult against the system Claude Code install using a read-only built-in toolset, automatically reuse the remembered context for the current cwd by default, optionally opt out with fresh_context, and return the response plus execution metadata.", + description: "Run a blocking one-shot consult against the system Claude Code install using a read-only built-in toolset and return the response plus execution metadata.", dispatch: DispatchTarget::Worker, replay: ReplayContract::NeverReplay, }, @@ -89,10 +89,6 @@ fn tool_schema(name: &str) -> Value { "cwd": { "type": "string", "description": "Optional working directory for the Claude Code session. Relative paths resolve against the MCP host working directory." - }, - "fresh_context": { - "type": "boolean", - "description": "When true, start a fresh Claude context instead of reusing the remembered context for this cwd. Defaults to false." } }, "required": ["prompt"] diff --git a/crates/phone-opus/src/mcp/fault.rs b/crates/phone-opus/src/mcp/fault.rs index c2d4a6c..b0b1e28 100644 --- a/crates/phone-opus/src/mcp/fault.rs +++ b/crates/phone-opus/src/mcp/fault.rs @@ -13,14 +13,6 @@ pub(crate) struct FaultContext { #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub(crate) struct ConsultFaultContext { pub(crate) cwd: String, - pub(crate) context_mode: String, - pub(crate) planned_session_id: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub(crate) reused_session_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub(crate) observed_session_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub(crate) resume_session_id: Option, #[serde(default, skip_serializing_if = "is_false")] pub(crate) quota_limited: bool, #[serde(skip_serializing_if = "Option::is_none")] @@ -200,23 +192,8 @@ impl FaultRecord { else { return lines.join("\n"); }; - let mut fields: BTreeMap = BTreeMap::from([ - ("cwd".to_owned(), consult.cwd.clone()), - ("context_mode".to_owned(), consult.context_mode.clone()), - ( - "planned_session".to_owned(), - consult.planned_session_id.clone(), - ), - ]); - if let Some(session_id) = consult.reused_session_id.as_ref() { - let _ = fields.insert("reused_session".to_owned(), session_id.clone()); - } - if let Some(session_id) = consult.observed_session_id.as_ref() { - let _ = fields.insert("observed_session".to_owned(), session_id.clone()); - } - if let Some(session_id) = consult.resume_session_id.as_ref() { - let _ = fields.insert("resume_session".to_owned(), session_id.clone()); - } + let mut fields: BTreeMap = + BTreeMap::from([("cwd".to_owned(), consult.cwd.clone())]); if consult.quota_limited { let _ = fields.insert("quota_limited".to_owned(), "true".to_owned()); } diff --git a/crates/phone-opus/src/mcp/service.rs b/crates/phone-opus/src/mcp/service.rs index bb56ad3..e152fa7 100644 --- a/crates/phone-opus/src/mcp/service.rs +++ b/crates/phone-opus/src/mcp/service.rs @@ -156,14 +156,7 @@ impl ConsultRequest { let cwd = WorkingDirectory::resolve(args.cwd)?; let context_key = ConsultContextKey::from_cwd(&cwd); let fresh_context = args.fresh_context.unwrap_or(false); - let session_plan = if fresh_context { - ConsultSessionPlan::fresh() - } else { - load_consult_context(&context_key) - .map_err(|source| ConsultRequestError::ContextIndex { source })? - .and_then(ConsultSessionPlan::from_stored) - .unwrap_or_else(ConsultSessionPlan::fresh) - }; + let session_plan = ConsultSessionPlan::fresh(); Ok(Self { prompt, cwd, @@ -173,6 +166,10 @@ impl ConsultRequest { }) } + fn planned_session_id(&self) -> String { + self.session_plan.planned_session().display() + } + fn context_mode(&self) -> &'static str { self.session_plan.context_mode() } @@ -181,16 +178,6 @@ impl ConsultRequest { self.session_plan.reused_session_id() } - fn planned_session_id(&self) -> String { - self.session_plan.planned_session().display() - } - - fn launch_resume_session(&self) -> Option { - self.session_plan - .resume_session() - .map(SessionHandle::display) - } - fn launch_session_id(&self) -> Option { match self.session_plan { ConsultSessionPlan::Start { .. } => Some(self.planned_session_id()), @@ -214,14 +201,6 @@ impl ConsultRequest { } } - fn current_context_session_id(&self) -> Option { - load_consult_context(&self.context_key) - .ok() - .flatten() - .and_then(ConsultSessionPlan::from_stored) - .map(|plan| plan.planned_session().display()) - } - #[allow(dead_code, reason = "background submission is parked but not exposed")] fn background_request(&self) -> BackgroundConsultRequest { BackgroundConsultRequest { @@ -351,6 +330,10 @@ struct ConsultContextIndex { } impl ConsultContextIndex { + #[allow( + dead_code, + reason = "context lookup is parked while session reuse stays disabled" + )] fn context_for(&self, key: &ConsultContextKey) -> Option { self.by_cwd.get(key.as_str()).cloned() } @@ -378,6 +361,10 @@ enum ConsultSessionPlan { session: SessionHandle, reused: bool, }, + #[allow( + dead_code, + reason = "resume plans are parked while one-shot consults are enforced" + )] Resume(SessionHandle), } @@ -389,6 +376,10 @@ impl ConsultSessionPlan { } } + #[allow( + dead_code, + reason = "stored-session revival is parked while one-shot consults are enforced" + )] fn from_stored(context: StoredConsultContext) -> Option { let session = SessionHandle::parse(context.session_id.as_str())?; Some(match context.state { @@ -406,6 +397,10 @@ impl ConsultSessionPlan { } } + #[allow( + dead_code, + reason = "resume paths are parked while one-shot consults are enforced" + )] fn resume_session(&self) -> Option<&SessionHandle> { match self { Self::Resume(session) => Some(session), @@ -647,6 +642,10 @@ enum ConsultRequestError { Canonicalize { path: String, source: io::Error }, #[error("working directory `{0}` is not a directory")] NotDirectory(String), + #[allow( + dead_code, + reason = "context index loading is parked while one-shot consults are enforced" + )] #[error("failed to resolve consult context state: {source}")] ContextIndex { source: io::Error }, #[error("job_id must be a valid UUID, got `{0}`")] @@ -705,14 +704,30 @@ struct ConsultResponse { cwd: WorkingDirectory, result: String, persisted_output_path: PersistedConsultPath, + #[allow( + dead_code, + reason = "session metadata is retained internally but hidden from the public surface" + )] context_mode: &'static str, + #[allow( + dead_code, + reason = "session metadata is retained internally but hidden from the public surface" + )] planned_session_id: String, + #[allow( + dead_code, + reason = "session metadata is retained internally but hidden from the public surface" + )] reused_session_id: Option, duration_ms: u64, duration_api_ms: Option, num_turns: u64, stop_reason: Option, session_id: Option, + #[allow( + dead_code, + reason = "session metadata is retained internally but hidden from the public surface" + )] observed_session_id: Option, total_cost_usd: Option, usage: Option, @@ -932,18 +947,14 @@ impl ClaudeSandbox { struct PersistedConsultPath(PathBuf); impl PersistedConsultPath { - fn new(request: &ConsultRequest, session_id: Option<&str>) -> io::Result { + fn new(request: &ConsultRequest) -> io::Result { fs::create_dir_all(CONSULT_OUTPUT_ROOT)?; let timestamp = OffsetDateTime::now_utc() .format(CONSULT_TIMESTAMP_FORMAT) .map_err(|error| io::Error::other(error.to_string()))?; let slug = consult_slug(request.prompt.as_str()); - let session_slug = session_id.map_or_else( - || "session-none".to_owned(), - |session_id| format!("session-{}", consult_slug(session_id)), - ); Ok(Self(Path::new(CONSULT_OUTPUT_ROOT).join(format!( - "{timestamp}-{slug}-{session_slug}-{}.json", + "{timestamp}-{slug}-{}.json", Uuid::new_v4() )))) } @@ -1010,27 +1021,12 @@ fn consult_fault_context(request: &ConsultRequest, error: &ConsultInvocationErro | ConsultInvocationError::Stalled(detail) | ConsultInvocationError::Downstream(detail) => Some(detail.as_str()), }; - let reused_session_id = request.reused_session_id(); - let planned_session_id = request.planned_session_id(); - let observed_session_id = detail - .and_then(downstream_session_id) - .clone() - .or_else(|| request.current_context_session_id()); - let resume_session_id = observed_session_id - .clone() - .or_else(|| reused_session_id.clone()) - .or_else(|| Some(planned_session_id.clone())); let quota_reset_hint = detail.and_then(quota_reset_hint); let quota_limited = quota_reset_hint.is_some(); - let retry_hint = consult_retry_hint(quota_limited, resume_session_id.as_deref()); + let retry_hint = consult_retry_hint(quota_limited, error); FaultContext { consult: Some(ConsultFaultContext { cwd: request.cwd.display(), - context_mode: request.context_mode().to_owned(), - planned_session_id, - reused_session_id, - observed_session_id, - resume_session_id, quota_limited, quota_reset_hint, retry_hint, @@ -1038,34 +1034,23 @@ fn consult_fault_context(request: &ConsultRequest, error: &ConsultInvocationErro } } -fn downstream_session_id(detail: &str) -> Option { - let value = serde_json::from_str::(detail).ok()?; - let session_id = value.get("session_id")?.as_str()?; - SessionHandle::parse(session_id).map(|session| session.display()) -} - fn quota_reset_hint(detail: &str) -> Option { let (_, suffix) = detail.split_once("resets ")?; let hint = suffix.trim(); (!hint.is_empty()).then(|| hint.to_owned()) } -fn consult_retry_hint(quota_limited: bool, resume_session_id: Option<&str>) -> Option { +fn consult_retry_hint(quota_limited: bool, error: &ConsultInvocationError) -> Option { if quota_limited { - return Some(match resume_session_id { - Some(session_id) => format!( - "wait for the quota window to reset, then retry consult on the same cwd; phone_opus will reuse resume_session {session_id} automatically" - ), - None => { - "wait for the quota window to reset, then retry consult on the same cwd".to_owned() - } - }); + return Some("wait for the quota window to reset, then retry the consult".to_owned()); + } + match error { + ConsultInvocationError::Stalled(_) => Some( + "Claude stalled before producing output; retry the consult as a fresh one-shot call" + .to_owned(), + ), + _ => None, } - resume_session_id.map(|session_id| { - format!( - "retry consult on the same cwd; phone_opus will reuse resume_session {session_id} automatically" - ) - }) } pub(crate) fn consult_job_tool_output( @@ -1502,9 +1487,6 @@ fn invoke_claude(request: &ConsultRequest) -> Result Result io::Result { } } +#[allow( + dead_code, + reason = "context lookup is parked while one-shot consults are enforced" +)] fn load_consult_context(key: &ConsultContextKey) -> io::Result> { Ok(load_consult_context_index()?.context_for(key)) } @@ -1886,10 +1866,8 @@ fn persist_consult_output( request: &ConsultRequest, result: &str, envelope: &ClaudeJsonEnvelope, - session_id: Option<&str>, - observed_session_id: Option<&str>, ) -> io::Result { - let path = PersistedConsultPath::new(request, session_id)?; + let path = PersistedConsultPath::new(request)?; let saved_at = OffsetDateTime::now_utc() .format(&Rfc3339) .map_err(|error| io::Error::other(error.to_string()))?; @@ -1901,17 +1879,12 @@ fn persist_consult_output( "prompt": request.prompt.as_str(), "prompt_prefix": CLAUDE_CONSULT_PREFIX, "effective_prompt": request.prompt.rendered(), - "context_mode": request.context_mode(), - "planned_session_id": request.planned_session_id(), - "reused_session_id": request.reused_session_id(), "response": result, "model": model_name(envelope.model_usage.as_ref()), "duration_ms": envelope.duration_ms.unwrap_or(0), "duration_api_ms": envelope.duration_api_ms, "num_turns": envelope.num_turns.unwrap_or(0), "stop_reason": envelope.stop_reason, - "session_id": session_id, - "observed_session_id": observed_session_id, "total_cost_usd": envelope.total_cost_usd, "usage": envelope.usage, "model_usage": envelope.model_usage, @@ -2002,16 +1975,11 @@ fn consult_output( "response": response.result, "cwd": response.cwd.display(), "persisted_output_path": response.persisted_output_path.display(), - "context_mode": response.context_mode, - "planned_session_id": response.planned_session_id, - "reused_session_id": response.reused_session_id, "prompt_prefix_injected": true, "model": response.model_name(), "duration_ms": response.duration_ms, "num_turns": response.num_turns, "stop_reason": response.stop_reason, - "session_id": response.session_id, - "observed_session_id": response.observed_session_id, "total_cost_usd": response.total_cost_usd, "permission_denial_count": response.permission_denials.len(), }); @@ -2022,15 +1990,10 @@ fn consult_output( "prompt": request.prompt.as_str(), "prompt_prefix": CLAUDE_CONSULT_PREFIX, "effective_prompt": request.prompt.rendered(), - "context_mode": response.context_mode, - "planned_session_id": response.planned_session_id, - "reused_session_id": response.reused_session_id, "duration_ms": response.duration_ms, "duration_api_ms": response.duration_api_ms, "num_turns": response.num_turns, "stop_reason": response.stop_reason, - "session_id": response.session_id, - "observed_session_id": response.observed_session_id, "total_cost_usd": response.total_cost_usd, "usage": response.usage, "model_usage": response.model_usage, @@ -2053,7 +2016,6 @@ fn consult_output( fn concise_text(_request: &ConsultRequest, response: &ConsultResponse) -> String { let mut status = vec![ "consult ok".to_owned(), - format!("context={}", response.context_mode), format!("turns={}", response.num_turns), format!("duration={}", render_duration_ms(response.duration_ms)), ]; @@ -2069,16 +2031,6 @@ fn concise_text(_request: &ConsultRequest, response: &ConsultResponse) -> String let mut lines = vec![status.join(" ")]; lines.push(format!("cwd: {}", response.cwd.display())); - lines.push(format!("planned_session: {}", response.planned_session_id)); - if let Some(session_id) = response.reused_session_id.as_deref() { - lines.push(format!("reused_session: {session_id}")); - } - if let Some(session_id) = response.observed_session_id.as_deref() { - lines.push(format!("observed_session: {session_id}")); - } - if let Some(session_id) = response.session_id.as_deref() { - lines.push(format!("session: {session_id}")); - } lines.push(format!( "saved: {}", response.persisted_output_path.display() @@ -2096,20 +2048,10 @@ fn concise_text(_request: &ConsultRequest, response: &ConsultResponse) -> String fn full_text(_request: &ConsultRequest, response: &ConsultResponse) -> String { let mut lines = vec![ - format!( - "consult ok context={} turns={}", - response.context_mode, response.num_turns - ), + format!("consult ok turns={}", response.num_turns), format!("cwd: {}", response.cwd.display()), - format!("planned_session: {}", response.planned_session_id), format!("duration: {}", render_duration_ms(response.duration_ms)), ]; - if let Some(session_id) = response.reused_session_id.as_deref() { - lines.push(format!("reused_session: {session_id}")); - } - if let Some(session_id) = response.observed_session_id.as_deref() { - lines.push(format!("observed_session: {session_id}")); - } if let Some(duration_api_ms) = response.duration_api_ms { lines.push(format!( "api_duration: {}", @@ -2122,9 +2064,6 @@ fn full_text(_request: &ConsultRequest, response: &ConsultResponse) -> String { if let Some(stop_reason) = response.stop_reason.as_deref() { lines.push(format!("stop: {stop_reason}")); } - if let Some(session_id) = response.session_id.as_deref() { - lines.push(format!("session: {session_id}")); - } lines.push(format!( "saved: {}", response.persisted_output_path.display() diff --git a/crates/phone-opus/tests/mcp_hardening.rs b/crates/phone-opus/tests/mcp_hardening.rs index 0b32442..b35e687 100644 --- a/crates/phone-opus/tests/mcp_hardening.rs +++ b/crates/phone-opus/tests/mcp_hardening.rs @@ -378,8 +378,8 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult { "consult schema should not advertise session_id: {consult_tool:#}" ); assert_eq!( - consult_tool["inputSchema"]["properties"]["fresh_context"]["type"].as_str(), - Some("boolean") + consult_tool["inputSchema"]["properties"]["fresh_context"], + Value::Null ); let health = harness.call_tool(3, "health_snapshot", json!({}))?; @@ -389,18 +389,13 @@ fn cold_start_exposes_consult_and_ops_tools() -> TestResult { } #[test] -fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> TestResult { +fn consult_is_one_shot_and_hides_session_state() -> TestResult { let root = temp_root("consult_success")?; let state_home = root.join("state-home"); let sandbox = root.join("sandbox"); - let sibling_sandbox = root.join("sibling-sandbox"); let caller_home = root.join("caller-home"); must(fs::create_dir_all(&state_home), "create state home")?; must(fs::create_dir_all(&sandbox), "create sandbox")?; - must( - fs::create_dir_all(&sibling_sandbox), - "create sibling sandbox", - )?; must(fs::create_dir_all(&caller_home), "create caller home")?; seed_caller_claude_home(&caller_home)?; @@ -413,11 +408,10 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes let cwd_probe_error_file = root.join("cwd-write-probe.err"); let credential_probe_file = root.join("credential-write-probe.txt"); let credential_probe_error_file = root.join("credential-write-probe.err"); - let remembered_session = "81f218eb-568b-409b-871b-f6e86d8f666f"; - let fresh_session = "dbd3b6c2-4757-4b45-a8f0-f3d877e1a13f"; - let sibling_session = "d9a9a472-a091-4268-a7dd-9f31cf61f87e"; + let first_observed_session = "81f218eb-568b-409b-871b-f6e86d8f666f"; + let second_observed_session = "dbd3b6c2-4757-4b45-a8f0-f3d877e1a13f"; write_fake_claude_script(&fake_claude)?; - write_fake_claude_stream_success(&stdout_file, "oracle", remembered_session, "uuid-123")?; + write_fake_claude_stream_success(&stdout_file, "oracle", first_observed_session, "uuid-123")?; let claude_bin = fake_claude.display().to_string(); let stdout_path = stdout_file.display().to_string(); @@ -469,144 +463,73 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes )?; assert_tool_ok(&consult); assert_eq!(tool_content(&consult)["response"].as_str(), Some("oracle")); - assert_eq!( - tool_content(&consult)["context_mode"].as_str(), - Some("fresh") - ); - assert!( - tool_content(&consult)["planned_session_id"] - .as_str() - .is_some_and(|value| !value.is_empty()) - ); + assert!(tool_content(&consult)["context_mode"].is_null()); + assert!(tool_content(&consult)["planned_session_id"].is_null()); assert!(tool_content(&consult)["reused_session_id"].is_null()); - assert_eq!( - tool_content(&consult)["observed_session_id"].as_str(), - Some(remembered_session) - ); - assert_eq!( - tool_content(&consult)["session_id"].as_str(), - Some(remembered_session) - ); + assert!(tool_content(&consult)["observed_session_id"].is_null()); + assert!(tool_content(&consult)["session_id"].is_null()); let first_args = must(fs::read_to_string(&args_file), "read first fake args file")?; - assert!(first_args.contains("--session-id")); - assert!( - tool_content(&consult)["planned_session_id"] - .as_str() - .is_some_and(|value| first_args.contains(value)) - ); + let first_lines = first_args.lines().collect::>(); + assert!(first_lines.contains(&"--session-id")); assert!(!first_args.contains("--resume")); assert!(!first_args.contains("not-a-uuid")); + let first_session_id = must_some( + first_lines + .windows(2) + .find_map(|window| (window[0] == "--session-id").then_some(window[1].to_owned())), + "first one-shot session id", + )?; + assert!(uuid::Uuid::parse_str(&first_session_id).is_ok()); write_fake_claude_stream_success( &stdout_file, - "oracle reused", - remembered_session, + "oracle again", + second_observed_session, "uuid-124", )?; - let reused = harness.call_tool( + let repeated = harness.call_tool( 4, "consult", json!({ - "prompt": "say oracle reused", + "prompt": "say oracle again", "cwd": sandbox.display().to_string() }), )?; - assert_tool_ok(&reused); + assert_tool_ok(&repeated); assert_eq!( - tool_content(&reused)["response"].as_str(), - Some("oracle reused") + tool_content(&repeated)["response"].as_str(), + Some("oracle again") ); - assert_eq!( - tool_content(&reused)["context_mode"].as_str(), - Some("reused") - ); - assert_eq!( - tool_content(&reused)["reused_session_id"].as_str(), - Some(remembered_session) - ); - let reused_args = must(fs::read_to_string(&args_file), "read reused fake args file")?; - assert!(reused_args.contains("--resume")); - assert!(reused_args.contains(remembered_session)); - - write_fake_claude_stream_success(&stdout_file, "oracle fresh", fresh_session, "uuid-125")?; - let fresh = harness.call_tool( - 5, - "consult", - json!({ - "prompt": "say oracle fresh", - "cwd": sandbox.display().to_string(), - "fresh_context": true - }), - )?; - assert_tool_ok(&fresh); - assert_eq!( - tool_content(&fresh)["response"].as_str(), - Some("oracle fresh") - ); - assert_eq!(tool_content(&fresh)["context_mode"].as_str(), Some("fresh")); - assert!(tool_content(&fresh)["reused_session_id"].is_null()); - let fresh_args = must(fs::read_to_string(&args_file), "read fresh fake args file")?; - assert!(!fresh_args.contains("--resume")); - - write_fake_claude_stream_success( - &stdout_file, - "oracle after fresh", - fresh_session, - "uuid-126", - )?; - let after_fresh = harness.call_tool( - 6, - "consult", - json!({ - "prompt": "say oracle after fresh", - "cwd": sandbox.display().to_string() - }), - )?; - assert_tool_ok(&after_fresh); - assert_eq!( - tool_content(&after_fresh)["context_mode"].as_str(), - Some("reused") - ); - assert_eq!( - tool_content(&after_fresh)["reused_session_id"].as_str(), - Some(fresh_session) - ); - let after_fresh_args = must( + assert!(tool_content(&repeated)["context_mode"].is_null()); + assert!(tool_content(&repeated)["planned_session_id"].is_null()); + assert!(tool_content(&repeated)["reused_session_id"].is_null()); + assert!(tool_content(&repeated)["observed_session_id"].is_null()); + assert!(tool_content(&repeated)["session_id"].is_null()); + let repeated_args = must( fs::read_to_string(&args_file), - "read after-fresh fake args file", + "read repeated fake args file", )?; - assert!(after_fresh_args.contains("--resume")); - assert!(after_fresh_args.contains(fresh_session)); - - write_fake_claude_stream_success(&stdout_file, "oracle sibling", sibling_session, "uuid-127")?; - let sibling = harness.call_tool( - 7, - "consult", - json!({ - "prompt": "say oracle sibling", - "cwd": sibling_sandbox.display().to_string() - }), + let repeated_lines = repeated_args.lines().collect::>(); + assert!(repeated_lines.contains(&"--session-id")); + assert!(!repeated_args.contains("--resume")); + let repeated_session_id = must_some( + repeated_lines + .windows(2) + .find_map(|window| (window[0] == "--session-id").then_some(window[1].to_owned())), + "repeated one-shot session id", )?; - assert_tool_ok(&sibling); - assert_eq!( - tool_content(&sibling)["context_mode"].as_str(), - Some("fresh") - ); - assert!(tool_content(&sibling)["reused_session_id"].is_null()); - let sibling_args = must( - fs::read_to_string(&args_file), - "read sibling fake args file", - )?; - assert!(!sibling_args.contains("--resume")); + assert!(uuid::Uuid::parse_str(&repeated_session_id).is_ok()); + assert_ne!(repeated_session_id, first_session_id); let persisted_output_path = must_some( - tool_content(&after_fresh)["persisted_output_path"] + tool_content(&repeated)["persisted_output_path"] .as_str() .map(str::to_owned), "persisted output path", )?; assert!(persisted_output_path.starts_with("/tmp/phone_opus-consults/")); - assert!(persisted_output_path.contains(fresh_session)); + assert!(!persisted_output_path.contains(first_observed_session)); + assert!(!persisted_output_path.contains(second_observed_session)); let persisted_output = must( fs::read_to_string(&persisted_output_path), "read persisted consult output", @@ -615,18 +538,37 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes serde_json::from_str(&persisted_output), "parse persisted consult output", )?; + assert_eq!(persisted_output["response"].as_str(), Some("oracle again")); + assert!(persisted_output["context_mode"].is_null()); + assert!(persisted_output["planned_session_id"].is_null()); + assert!(persisted_output["reused_session_id"].is_null()); + assert!(persisted_output["session_id"].is_null()); + assert!(persisted_output["observed_session_id"].is_null()); + + let consult_context_index = must( + fs::read_to_string( + state_home + .join("phone_opus") + .join("mcp") + .join("consult_contexts.json"), + ), + "read consult context index", + )?; + let consult_context_index: Value = must( + serde_json::from_str(&consult_context_index), + "parse consult context index", + )?; assert_eq!( - persisted_output["response"].as_str(), - Some("oracle after fresh") + consult_context_index["by_cwd"][sandbox.display().to_string()]["session_id"].as_str(), + Some(second_observed_session) ); - assert_eq!(persisted_output["context_mode"].as_str(), Some("reused")); assert_eq!( - persisted_output["reused_session_id"].as_str(), - Some(fresh_session) + consult_context_index["by_cwd"][sandbox.display().to_string()]["state"].as_str(), + Some("confirmed") ); let pwd = must(fs::read_to_string(&pwd_file), "read fake pwd file")?; - assert_eq!(pwd.trim(), sibling_sandbox.display().to_string()); + assert_eq!(pwd.trim(), sandbox.display().to_string()); let args = must(fs::read_to_string(&args_file), "read fake args file")?; let lines = args.lines().collect::>(); @@ -653,7 +595,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes assert!(!lines.contains(&"--max-turns")); assert!(args.contains(PROMPT_PREFIX)); let prefix_index = must_some(args.find(PROMPT_PREFIX), "prefixed consult prompt")?; - let user_prompt_index = must_some(args.find("say oracle sibling"), "user prompt inside args")?; + let user_prompt_index = must_some(args.find("say oracle again"), "user prompt inside args")?; assert!(prefix_index < user_prompt_index); let env_dump = must(fs::read_to_string(&env_file), "read fake env file")?; @@ -726,7 +668,7 @@ fn consult_reuses_context_per_cwd_by_default_and_fresh_context_opts_out() -> Tes )?; assert_eq!(credential_probe.trim(), "write_succeeded"); - let telemetry = harness.call_tool(8, "telemetry_snapshot", json!({}))?; + let telemetry = harness.call_tool(5, "telemetry_snapshot", json!({}))?; assert_tool_ok(&telemetry); let hot_methods = tool_content(&telemetry)["hot_methods"] .as_array() @@ -831,10 +773,10 @@ fn consult_surfaces_downstream_cli_failures() -> TestResult { .is_some_and(|value| value.contains("permission denied by fake claude")) ); assert_eq!( - tool_content(&consult)["context"]["consult"]["context_mode"].as_str(), - Some("fresh") + tool_content(&consult)["context"]["consult"]["cwd"].as_str(), + Some(std::env::current_dir()?.display().to_string().as_str()) ); - assert!(tool_content(&consult)["context"]["consult"]["reused_session_id"].is_null()); + assert!(tool_content(&consult)["context"]["consult"]["planned_session_id"].is_null()); Ok(()) } @@ -876,20 +818,16 @@ fn silent_claude_processes_fail_fast_instead_of_wedging() -> TestResult { .is_some_and(|value| value.contains("produced no stream output within 100 ms")) ); assert!(elapsed < std::time::Duration::from_secs(3)); - assert_eq!( - tool_content(&consult)["context"]["consult"]["context_mode"].as_str(), - Some("fresh") - ); assert!( - tool_content(&consult)["context"]["consult"]["planned_session_id"] + tool_content(&consult)["context"]["consult"]["retry_hint"] .as_str() - .is_some_and(|value| !value.is_empty()) + .is_some_and(|value| value.contains("fresh one-shot call")) ); Ok(()) } #[test] -fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult { +fn quota_failures_hide_session_state_on_public_surface() -> TestResult { let root = temp_root("consult_quota_failure")?; let state_home = root.join("state-home"); let sandbox = root.join("sandbox"); @@ -926,10 +864,7 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult { }), )?; assert_tool_ok(&first); - assert_eq!( - tool_content(&first)["session_id"].as_str(), - Some(remembered_session) - ); + assert!(tool_content(&first)["session_id"].is_null()); let quota_env = [ ("HOME", caller_home_path.as_str()), @@ -962,26 +897,11 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult { tool_content(&failed)["context"]["consult"]["cwd"].as_str(), Some(sandbox.display().to_string().as_str()) ); - assert_eq!( - tool_content(&failed)["context"]["consult"]["context_mode"].as_str(), - Some("reused") - ); - assert_eq!( - tool_content(&failed)["context"]["consult"]["planned_session_id"].as_str(), - Some(remembered_session) - ); - assert_eq!( - tool_content(&failed)["context"]["consult"]["reused_session_id"].as_str(), - Some(remembered_session) - ); - assert_eq!( - tool_content(&failed)["context"]["consult"]["observed_session_id"].as_str(), - Some(remembered_session) - ); - assert_eq!( - tool_content(&failed)["context"]["consult"]["resume_session_id"].as_str(), - Some(remembered_session) - ); + assert!(tool_content(&failed)["context"]["consult"]["context_mode"].is_null()); + assert!(tool_content(&failed)["context"]["consult"]["planned_session_id"].is_null()); + assert!(tool_content(&failed)["context"]["consult"]["reused_session_id"].is_null()); + assert!(tool_content(&failed)["context"]["consult"]["observed_session_id"].is_null()); + assert!(tool_content(&failed)["context"]["consult"]["resume_session_id"].is_null()); assert_eq!( tool_content(&failed)["context"]["consult"]["quota_limited"].as_bool(), Some(true) @@ -993,7 +913,15 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult { assert!( tool_content(&failed)["context"]["consult"]["retry_hint"] .as_str() - .is_some_and(|value| value.contains(remembered_session)) + .is_some_and(|value| value.contains("retry the consult")) + ); + assert!( + failed["result"]["content"] + .as_array() + .into_iter() + .flatten() + .filter_map(|entry| entry["text"].as_str()) + .any(|text| text.contains("quota_reset: 4pm (America/New_York)")) ); assert!( failed["result"]["content"] @@ -1001,16 +929,13 @@ fn quota_failures_surface_resume_context_for_same_cwd() -> TestResult { .into_iter() .flatten() .filter_map(|entry| entry["text"].as_str()) - .any(|text| { - text.contains("resume_session: 84b9d462-5af9-4a4e-8e44-379a8d0c46d7") - && text.contains("quota_reset: 4pm (America/New_York)") - }) + .all(|text| !text.contains("session")) ); Ok(()) } #[test] -fn fresh_failures_capture_streamed_session_ids_eagerly() -> TestResult { +fn fresh_failures_keep_internal_session_state_without_public_leakage() -> TestResult { let root = temp_root("consult_fresh_stream_failure")?; let state_home = root.join("state-home"); let sandbox = root.join("sandbox"); @@ -1054,42 +979,41 @@ fn fresh_failures_capture_streamed_session_ids_eagerly() -> TestResult { }), )?; assert_tool_error(&failed); - assert_eq!( - tool_content(&failed)["context"]["consult"]["context_mode"].as_str(), - Some("fresh") - ); - assert_eq!( - tool_content(&failed)["context"]["consult"]["observed_session_id"].as_str(), - Some(init_session) - ); - assert_eq!( - tool_content(&failed)["context"]["consult"]["resume_session_id"].as_str(), - Some(init_session) - ); + assert!(tool_content(&failed)["context"]["consult"]["context_mode"].is_null()); + assert!(tool_content(&failed)["context"]["consult"]["observed_session_id"].is_null()); + assert!(tool_content(&failed)["context"]["consult"]["resume_session_id"].is_null()); assert_eq!( tool_content(&failed)["context"]["consult"]["quota_reset_hint"].as_str(), Some("9pm (America/New_York)") ); - let planned_session = must_some( - tool_content(&failed)["context"]["consult"]["planned_session_id"] - .as_str() - .map(str::to_owned), - "planned session id on failure", - )?; + assert!(tool_content(&failed)["context"]["consult"]["planned_session_id"].is_null()); let args = must(fs::read_to_string(&args_file), "read fresh failure args")?; assert!(args.contains("--session-id")); - assert!(args.contains(&planned_session)); assert!(!args.contains("--resume")); + let consult_context_index = must( + fs::read_to_string( + state_home + .join("phone_opus") + .join("mcp") + .join("consult_contexts.json"), + ), + "read consult context index after failure", + )?; + let consult_context_index: Value = must( + serde_json::from_str(&consult_context_index), + "parse consult context index after failure", + )?; + assert_eq!( + consult_context_index["by_cwd"][sandbox.display().to_string()]["session_id"].as_str(), + Some(init_session) + ); assert!( failed["result"]["content"] .as_array() .into_iter() .flatten() .filter_map(|entry| entry["text"].as_str()) - .any(|text| { - text.contains("observed_session: 550e8400-e29b-41d4-a716-446655440000") - && text.contains("resume_session: 550e8400-e29b-41d4-a716-446655440000") - }) + .all(|text| !text.contains("session")) ); Ok(()) } -- cgit v1.2.3