diff --git a/Cargo.lock b/Cargo.lock index 92bc18499..ad7efabc9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3717,6 +3717,7 @@ dependencies = [ "openshell-driver-podman", "openshell-ocsf", "openshell-policy", + "openshell-prover", "openshell-providers", "openshell-router", "openshell-server-macros", diff --git a/architecture/security-policy.md b/architecture/security-policy.md index bc7b0c7a8..ad8d7d0ec 100644 --- a/architecture/security-policy.md +++ b/architecture/security-policy.md @@ -89,21 +89,71 @@ because it changes the effective access model for every sandbox on the gateway. ## Policy Advisor The policy advisor pipeline turns observed denials into draft policy -recommendations: - -1. The sandbox aggregates denied network events. -2. A mechanistic mapper proposes minimal endpoint, binary, or rule additions. -3. The gateway validates and stores draft recommendations. -4. A human or admin workflow approves or rejects drafts. -5. Approved drafts merge into the target sandbox policy. +recommendations. There are two proposers (sandbox-side mechanistic mapper, +agent-authored via `policy.local`); the gateway is the single referee. +When enabled, L7 `policy_denied` responses include both structured +`next_steps` and a short `agent_guidance` string so generic agents can continue +through the proposal loop instead of treating the denial as terminal. + +1. **Submit.** Both proposers POST through the same `SubmitPolicyAnalysis` + path. Each chunk is persisted with its `analysis_mode` for audit provenance. +2. **Validate.** The gateway runs the prover (`openshell-prover`) on every + chunk regardless of mode. The prover builds a Z3 model from the merged + policy plus the sandbox's attached-provider credential set, then computes + the delta of findings between the current baseline and the merged policy. +3. **Auto-approval gate (proposer-agnostic, opt-in).** Auto-approval fires + when *both* (a) the prover delta is empty (`prover: no new findings`) AND + (b) the `proposal_approval_mode` setting resolves to `"auto"` — gateway + scope wins, sandbox scope is the per-sandbox override, default is + `"manual"`. When both hold, the gateway internally invokes the approve + path with actor identity `system:auto`. The audit event uses + `CONFIG:APPROVED` and carries `auto=true`, `source=`, + `prover_delta=empty`, and `resolved_from=` as unmapped + fields, with message text `"auto-approved: no new prover findings"` — + never `safe`. The opt-in gate preserves OpenShell's default-deny + posture: with no setting at either scope, every proposal lands in + `pending` for human review, even when the prover sees no findings. +4. **Implicit supersede.** On any successful submission, the gateway scans + the sandbox's pending chunks for matches on `(host, port, binary)` and + auto-rejects the older ones with reason `"superseded by chunk X"`. This + gives the agent a refinement path (broad mechanistic L4 → narrow agent + L7) without an explicit `supersedes_chunk_id` field. +5. **Escalation.** Anything else lands in `pending` for human review. + +## What the prover decides + +The prover answers four formal questions about each proposed policy +change. Each "yes" answer becomes its own categorical finding — there is +no severity grade. Any finding (of any category) blocks auto-approval. +The categories are intended to be (mostly) mutually exclusive per +underlying change: the gateway suppresses `capability_expansion` paths +whose `(binary, host, port)` is also in the `credential_reach_expansion` +delta, so a brand-new credentialed reach surfaces as one finding rather +than one reach + N method findings. + +| Category | The prover detects… | +|---|---| +| `link_local_reach` | The proposal grants reach to a host in `169.254.0.0/16`, `fe80::/10`, or a known metadata hostname such as `metadata.google.internal`. Unconditional — cloud-metadata endpoints serve credentials regardless of sandbox state. | +| `l7_bypass_credentialed` | The proposal lets a binary using a non-HTTP wire protocol (`git-remote-https`, `ssh`, `nc`) reach a host where a sandbox credential is in scope. The L7 proxy cannot inspect the wire protocol; the reviewer decides whether to trust the binary with the credential. | +| `credential_reach_expansion` | A binary gained credentialed reach to a (host, port) it could not reach before. New authenticated reach is a stated intent change; the reviewer confirms the binary should authenticate to the host at all. | +| `capability_expansion` | On a (binary, host, port) that already had credentialed reach, the policy adds a new HTTP method. The reviewer sees exactly which method was added (e.g., PUT) and decides if it's part of the agent's task. | + +"Credential in scope" is sandbox-coarse, not binary-fine: a credential is +considered in scope if the sandbox has a provider attached whose +`target_hosts` include the proposed endpoint's host, including runtime-like +first-label wildcard coverage such as `*.github.com` covering +`api.github.com`. v1 does not model credential scopes (read-only vs write); +presence is enough. Proposals intentionally omit `allowed_ips`. If a proposed rule targets a host that resolves to a private IP, the proxy's runtime SSRF classification blocks the connection. The operator must then add an explicit `allowed_ips` entry to permit it — a two-step flow that keeps SSRF protection on by default. -The advisor should propose narrow additions and preserve explicit-deny behavior. -It is a workflow aid, not an automatic permission grant. +The advisor proposes narrow additions and preserves explicit-deny behavior. +Auto-approval is gated on prover determinism, not human judgment; an LLM-based +contextual reviewer is a deliberate future addition layered on top of the +deterministic prover gate. ## Security Logging diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 917c8faa1..7b8f5d15f 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -1148,6 +1148,11 @@ enum DoctorCommands { } #[derive(Subcommand, Debug)] +// `Create` carries enough optional fields to be ~3x larger than the next +// variant; boxing it would obscure the clap derive ergonomics for one +// (rare) enum allocation per parse, which isn't worth the readability +// cost. +#[allow(clippy::large_enum_variant)] enum SandboxCommands { /// Create a sandbox. #[command(help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] @@ -1256,6 +1261,18 @@ enum SandboxCommands { #[arg(long = "label")] labels: Vec, + /// Approval mode for agent-authored policy proposals. + /// + /// `manual` (default): every proposal lands in the draft inbox for + /// human review, regardless of the prover verdict. + /// + /// `auto`: proposals whose prover delta is empty are approved + /// automatically; proposals with findings still require human + /// approval. Auto mode is an explicit opt-in — `OpenShell`'s + /// default-deny posture is preserved unless you choose otherwise. + #[arg(long, value_parser = ["manual", "auto"], default_value = "manual")] + approval_mode: String, + /// Command to run after "--" (defaults to an interactive shell). #[arg(last = true, allow_hyphen_values = true)] command: Vec, @@ -2526,6 +2543,7 @@ async fn main() -> Result<()> { auto_providers, no_auto_providers, labels, + approval_mode, command, } => { // Resolve --tty / --no-tty into an Option override. @@ -2594,6 +2612,7 @@ async fn main() -> Result<()> { tty_override, auto_providers_override, &labels_map, + &approval_mode, &tls, )) .await?; @@ -4134,6 +4153,60 @@ mod tests { } } + /// `sandbox create` defaults `--approval-mode` to `"manual"`. The CLI + /// always sends an explicit value so the wire form is human-readable + /// (the gateway treats `""` as `"manual"` too, but the CLI's job is to + /// be unambiguous). + #[test] + fn sandbox_create_approval_mode_defaults_to_manual() { + let cli = Cli::try_parse_from(["openshell", "sandbox", "create"]) + .expect("sandbox create with no flags should parse"); + match cli.command { + Some(Commands::Sandbox { + command: Some(SandboxCommands::Create { approval_mode, .. }), + .. + }) => { + assert_eq!(approval_mode, "manual"); + } + other => panic!("expected SandboxCommands::Create, got: {other:?}"), + } + } + + /// `--approval-mode auto` parses through. + #[test] + fn sandbox_create_approval_mode_accepts_auto() { + let cli = + Cli::try_parse_from(["openshell", "sandbox", "create", "--approval-mode", "auto"]) + .expect("--approval-mode auto should parse"); + match cli.command { + Some(Commands::Sandbox { + command: Some(SandboxCommands::Create { approval_mode, .. }), + .. + }) => { + assert_eq!(approval_mode, "auto"); + } + other => panic!("expected SandboxCommands::Create, got: {other:?}"), + } + } + + /// `--approval-mode ` is rejected by clap's value parser, so the + /// CLI can't smuggle through a future-mode value that the gateway + /// doesn't yet know about. + #[test] + fn sandbox_create_approval_mode_rejects_unknown_value() { + let result = Cli::try_parse_from([ + "openshell", + "sandbox", + "create", + "--approval-mode", + "auto_on_low_risk", + ]); + assert!( + result.is_err(), + "--approval-mode auto_on_low_risk should be rejected until added to the value parser" + ); + } + #[test] fn sandbox_create_resource_flags_parse() { let cli = Cli::try_parse_from([ diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index e5d89b93a..1dd890852 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -1693,6 +1693,7 @@ pub async fn sandbox_create( tty_override: Option, auto_providers_override: Option, labels: &HashMap, + approval_mode: &str, tls: &TlsOptions, ) -> Result<()> { if editor.is_some() && !command.is_empty() { @@ -1806,6 +1807,38 @@ pub async fn sandbox_create( let _ = save_last_sandbox(gateway, &sandbox_name); } + // Persist `--approval-mode` as a sandbox-scoped setting now that the + // sandbox exists. `manual` is the implicit default (no setting needed); + // any other value is written so it survives sandbox restarts and can be + // flipped later via `openshell settings set proposal_approval_mode`. + // If the write fails the sandbox still runs in default `manual` — surface + // the recovery command so the user can retry. + if approval_mode != "manual" { + let setting = parse_cli_setting_value(settings::PROPOSAL_APPROVAL_MODE_KEY, approval_mode)?; + match client + .update_config(UpdateConfigRequest { + name: sandbox_name.clone(), + policy: None, + setting_key: settings::PROPOSAL_APPROVAL_MODE_KEY.to_string(), + setting_value: Some(setting), + delete_setting: false, + global: false, + merge_operations: vec![], + expected_resource_version: 0, + }) + .await + { + Ok(_) => {} + Err(status) => { + eprintln!( + "{} failed to set approval mode '{approval_mode}' on sandbox '{sandbox_name}': {}\n retry with: openshell settings set {sandbox_name} proposal_approval_mode {approval_mode}", + "warning:".yellow().bold(), + status.message(), + ); + } + } + } + // Set up display — interactive terminals get a step-based checklist with // spinners; non-interactive (pipes / CI) get timestamped lines. let mut display = if interactive { @@ -5519,7 +5552,23 @@ fn parse_cli_setting_value(key: &str, raw_value: &str) -> Result { })?; let value = match setting.kind { - SettingValueKind::String => setting_value::Value::StringValue(raw_value.to_string()), + SettingValueKind::String => { + // Reject typos client-side so `openshell settings set ... + // proposal_approval_mode autom` errors immediately instead of + // round-tripping through the server. The server enforces the + // same check independently for non-CLI callers. + setting + .validate_string_value(raw_value) + .map_err(|allowed| { + miette::miette!( + "invalid value '{}' for key '{}'; expected one of: {}", + raw_value, + key, + allowed.join(", ") + ) + })?; + setting_value::Value::StringValue(raw_value.to_string()) + } SettingValueKind::Int => { let parsed = raw_value.trim().parse::().map_err(|_| { miette::miette!( @@ -6739,6 +6788,13 @@ pub async fn sandbox_draft_get( chunk.security_notes.yellow() ); } + if !chunk.validation_result.is_empty() { + println!( + " {} {}", + "Validation:".dimmed(), + chunk.validation_result.cyan() + ); + } if let Some(ref rule) = chunk.proposed_rule { println!(" {} {}", "Endpoints:".dimmed(), format_endpoints(rule)); diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index f51d5e3c9..aee91de56 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -794,6 +794,7 @@ async fn sandbox_create_keeps_command_sessions_by_default() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -835,6 +836,7 @@ async fn sandbox_create_sends_cpu_and_memory_limits_only() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -911,6 +913,7 @@ async fn sandbox_create_does_not_infer_command_providers_when_v2_enabled() { Some(true), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -967,6 +970,7 @@ async fn sandbox_create_returns_vm_error_without_waiting_for_timeout() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -1019,6 +1023,7 @@ async fn sandbox_create_keeps_waiting_while_vm_progress_arrives() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -1063,6 +1068,7 @@ async fn sandbox_create_times_out_when_only_logs_arrive() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -1103,6 +1109,7 @@ async fn sandbox_create_deletes_command_sessions_with_no_keep() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -1147,6 +1154,7 @@ async fn sandbox_create_deletes_shell_sessions_with_no_keep() { Some(true), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -1191,6 +1199,7 @@ async fn sandbox_create_keeps_sandbox_with_hidden_keep_flag() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -1235,6 +1244,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await diff --git a/crates/openshell-core/src/net.rs b/crates/openshell-core/src/net.rs index 0e2654fc3..06e6096ee 100644 --- a/crates/openshell-core/src/net.rs +++ b/crates/openshell-core/src/net.rs @@ -12,6 +12,16 @@ use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +/// Check if a hostname is a known cloud metadata hostname that resolves to an +/// always-blocked metadata service. +/// +/// This is intentionally a static name check. Do not perform DNS resolution in +/// policy validation or proposal generation paths. +pub fn is_known_metadata_hostname(host: &str) -> bool { + let normalized = host.trim().trim_end_matches('.').to_ascii_lowercase(); + matches!(normalized.as_str(), "metadata.google.internal") +} + /// Check if an IP address is link-local. /// /// Covers IPv4 `169.254.0.0/16`, IPv6 `fe80::/10`, and IPv4-mapped IPv6 @@ -213,6 +223,21 @@ fn is_internal_v4(v4: Ipv4Addr) -> bool { mod tests { use super::*; + // -- is_known_metadata_hostname -- + + #[test] + fn test_known_metadata_hostname_accepts_gcp_variants() { + assert!(is_known_metadata_hostname("metadata.google.internal")); + assert!(is_known_metadata_hostname("METADATA.GOOGLE.INTERNAL")); + assert!(is_known_metadata_hostname("metadata.google.internal.")); + } + + #[test] + fn test_known_metadata_hostname_rejects_public_hosts() { + assert!(!is_known_metadata_hostname("api.github.com")); + assert!(!is_known_metadata_hostname("")); + } + // -- is_link_local_ip -- #[test] diff --git a/crates/openshell-core/src/settings.rs b/crates/openshell-core/src/settings.rs index 897317a5a..3ff1f36f8 100644 --- a/crates/openshell-core/src/settings.rs +++ b/crates/openshell-core/src/settings.rs @@ -28,6 +28,32 @@ impl SettingValueKind { pub struct RegisteredSetting { pub key: &'static str, pub kind: SettingValueKind, + /// Optional whitelist of allowed string values. When `Some`, values + /// outside the list are rejected at configure time by every API surface + /// that goes through [`validate_string_value`] (CLI, TUI, gRPC). `None` + /// means the value is free-form and any string is accepted. Only + /// meaningful for [`SettingValueKind::String`] entries. + pub allowed_string_values: Option<&'static [&'static str]>, +} + +impl RegisteredSetting { + /// Validate a string value against [`allowed_string_values`]. Returns + /// `Ok(())` when the setting has no constraint or when the value is in + /// the allowed list. On rejection, returns the allowed slice so callers + /// can format their own diagnostic. + /// + /// [`allowed_string_values`]: Self::allowed_string_values + /// + /// # Errors + /// + /// Returns the allowed-value slice when the setting has an + /// `allowed_string_values` whitelist and `value` is not in it. + pub fn validate_string_value(&self, value: &str) -> Result<(), &'static [&'static str]> { + match self.allowed_string_values { + Some(allowed) if !allowed.contains(&value) => Err(allowed), + _ => Ok(()), + } + } } /// Static registry of currently-supported runtime settings. @@ -59,12 +85,35 @@ pub const PROVIDERS_V2_ENABLED_KEY: &str = "providers_v2_enabled"; /// still applies when this flag is on. pub const AGENT_POLICY_PROPOSALS_ENABLED_KEY: &str = "agent_policy_proposals_enabled"; +/// Approval mode for agent-authored policy proposals. +/// +/// `"manual"` (the default when unset): every proposal lands in the draft +/// inbox for human review, regardless of the prover verdict. `"auto"`: +/// proposals whose prover delta is empty are approved automatically; +/// proposals with findings still require human approval. Any other value +/// (typos, future-reserved modes like `"auto_on_low_risk"`) falls back to +/// manual — auto mode is an explicit, exact opt-in. +/// +/// Resolution precedence (matches the rest of the settings model): gateway +/// scope wins over sandbox scope. A reviewer can pin manual mode for a +/// fleet by setting it globally; per-sandbox overrides only apply when no +/// global is set. +pub const PROPOSAL_APPROVAL_MODE_KEY: &str = "proposal_approval_mode"; + +/// Allowed values for [`PROPOSAL_APPROVAL_MODE_KEY`]. +/// +/// Any other string is rejected at configure time (so operators get immediate +/// feedback on typos like `"autom"`) while the runtime resolver still +/// fail-closes on unknown persisted values for defense in depth. +pub const PROPOSAL_APPROVAL_MODE_VALUES: &[&str] = &["manual", "auto"]; + pub const REGISTERED_SETTINGS: &[RegisteredSetting] = &[ // Gateway-level opt-in for provider profile policy composition. Defaults // to false when unset. RegisteredSetting { key: PROVIDERS_V2_ENABLED_KEY, kind: SettingValueKind::Bool, + allowed_string_values: None, }, // When true the sandbox writes OCSF v1.7.0 JSONL records to // `/var/log/openshell-ocsf*.log` (daily rotation, 3 files) in addition @@ -72,12 +121,21 @@ pub const REGISTERED_SETTINGS: &[RegisteredSetting] = &[ RegisteredSetting { key: "ocsf_json_enabled", kind: SettingValueKind::Bool, + allowed_string_values: None, }, // Sandbox-level opt-in for the agent-driven policy proposal surface. // See AGENT_POLICY_PROPOSALS_ENABLED_KEY for details. Defaults to false. RegisteredSetting { key: AGENT_POLICY_PROPOSALS_ENABLED_KEY, kind: SettingValueKind::Bool, + allowed_string_values: None, + }, + // Approval mode for agent-authored proposals. See + // PROPOSAL_APPROVAL_MODE_KEY for details. Defaults to manual. + RegisteredSetting { + key: PROPOSAL_APPROVAL_MODE_KEY, + kind: SettingValueKind::String, + allowed_string_values: Some(PROPOSAL_APPROVAL_MODE_VALUES), }, // Test-only keys live behind the `dev-settings` feature flag so they // don't appear in production builds. @@ -85,11 +143,13 @@ pub const REGISTERED_SETTINGS: &[RegisteredSetting] = &[ RegisteredSetting { key: "dummy_int", kind: SettingValueKind::Int, + allowed_string_values: None, }, #[cfg(feature = "dev-settings")] RegisteredSetting { key: "dummy_bool", kind: SettingValueKind::Bool, + allowed_string_values: None, }, ]; @@ -122,8 +182,9 @@ pub fn parse_bool_like(raw: &str) -> Option { #[cfg(test)] mod tests { use super::{ - PROVIDERS_V2_ENABLED_KEY, REGISTERED_SETTINGS, RegisteredSetting, SettingValueKind, - parse_bool_like, registered_keys_csv, setting_for_key, + PROPOSAL_APPROVAL_MODE_KEY, PROPOSAL_APPROVAL_MODE_VALUES, PROVIDERS_V2_ENABLED_KEY, + REGISTERED_SETTINGS, RegisteredSetting, SettingValueKind, parse_bool_like, + registered_keys_csv, setting_for_key, }; #[cfg(feature = "dev-settings")] @@ -153,6 +214,52 @@ mod tests { assert_eq!(setting.kind, SettingValueKind::Bool); } + // ---- RegisteredSetting::validate_string_value ---- + + #[test] + fn validate_string_value_accepts_anything_when_unconstrained() { + let setting = setting_for_key(PROVIDERS_V2_ENABLED_KEY) + .expect("providers_v2_enabled should be registered"); + // Bool-kind entries currently leave `allowed_string_values = None`; + // the helper still returns Ok for arbitrary strings. + assert!(setting.validate_string_value("anything").is_ok()); + assert!(setting.validate_string_value("").is_ok()); + } + + #[test] + fn proposal_approval_mode_accepts_manual_and_auto() { + let setting = setting_for_key(PROPOSAL_APPROVAL_MODE_KEY) + .expect("proposal_approval_mode should be registered"); + assert_eq!(setting.kind, SettingValueKind::String); + assert_eq!( + setting.allowed_string_values, + Some(PROPOSAL_APPROVAL_MODE_VALUES) + ); + assert!(setting.validate_string_value("manual").is_ok()); + assert!(setting.validate_string_value("auto").is_ok()); + } + + #[test] + fn proposal_approval_mode_rejects_typos_and_future_modes() { + let setting = setting_for_key(PROPOSAL_APPROVAL_MODE_KEY) + .expect("proposal_approval_mode should be registered"); + for bad in [ + "autom", + "AUTO", + "Manual", + "", + " auto", + "auto_on_low_risk", + "yes", + ] { + let err = setting + .validate_string_value(bad) + .expect_err(&format!("expected '{bad}' to be rejected")); + // Caller gets the allowed slice back for diagnostics. + assert_eq!(err, PROPOSAL_APPROVAL_MODE_VALUES); + } + } + // ---- parse_bool_like ---- #[test] @@ -271,6 +378,7 @@ mod tests { let a = RegisteredSetting { key: "test", kind: SettingValueKind::Bool, + allowed_string_values: None, }; let b = a; assert_eq!(a, b); diff --git a/crates/openshell-ocsf/src/format/shorthand.rs b/crates/openshell-ocsf/src/format/shorthand.rs index 0e50fc6c5..aeae77f9e 100644 --- a/crates/openshell-ocsf/src/format/shorthand.rs +++ b/crates/openshell-ocsf/src/format/shorthand.rs @@ -300,22 +300,41 @@ impl OcsfEvent { }, ); let what = e.base.message.as_deref().unwrap_or("config"); - let version_ctx = e + // Bracketed suffix carries the structured provenance fields a + // reviewer needs to scan a CONFIG audit line. Auto-approval + // emits `auto`/`source`/`prover_delta`; every config change + // also carries `policy_version` and `policy_hash`. Order is + // stable so logs are greppable. + let suffix = e .base .unmapped .as_ref() - .and_then(|u| { - let ver = u.get("policy_version").and_then(|v| v.as_str()); - let hash = u.get("policy_hash").and_then(|v| v.as_str()); - match (ver, hash) { - (Some(v), Some(h)) => Some(format!(" [version:{v} hash:{h}]")), - (Some(v), None) => Some(format!(" [version:{v}]")), - _ => None, + .map(|u| { + let mut parts: Vec = Vec::new(); + let mut push = |key: &str| { + if let Some(value) = u.get(key).and_then(|v| v.as_str()) { + parts.push(format!("{key}:{value}")); + } + }; + push("auto"); + push("source"); + push("prover_delta"); + push("resolved_from"); + if let Some(ver) = u.get("policy_version").and_then(|v| v.as_str()) { + parts.push(format!("version:{ver}")); + } + if let Some(hash) = u.get("policy_hash").and_then(|v| v.as_str()) { + parts.push(format!("hash:{hash}")); + } + if parts.is_empty() { + String::new() + } else { + format!(" [{}]", parts.join(" ")) } }) .unwrap_or_default(); - format!("CONFIG:{state} {sev} {what}{version_ctx}") + format!("CONFIG:{state} {sev} {what}{suffix}") } Self::Base(e) => { @@ -829,6 +848,37 @@ mod tests { ); } + /// Auto-approval audit events carry `auto`, `source`, `prover_delta`, and + /// `resolved_from` as unmapped fields. Lock the suffix order so operators + /// (and the demo's grep) can rely on it. + #[test] + fn test_config_state_change_shorthand_includes_auto_approve_fields() { + let mut b = base(5019, "Device Config State Change", 5, "Discovery", 1, "Log"); + b.set_message("auto-approved: no new prover findings (source=agent_authored)"); + b.add_unmapped("auto", serde_json::json!("true")); + b.add_unmapped("source", serde_json::json!("agent_authored")); + b.add_unmapped("prover_delta", serde_json::json!("empty")); + b.add_unmapped("resolved_from", serde_json::json!("sandbox")); + b.add_unmapped("policy_version", serde_json::json!("v4")); + b.add_unmapped("policy_hash", serde_json::json!("sha256:cafe")); + + let event = OcsfEvent::DeviceConfigStateChange(DeviceConfigStateChangeEvent { + base: b, + state: Some(StateId::Other), + state_custom_label: Some("APPROVED".to_string()), + security_level: None, + prev_security_level: None, + }); + + let shorthand = event.format_shorthand(); + assert_eq!( + shorthand, + "CONFIG:APPROVED [INFO] auto-approved: no new prover findings (source=agent_authored) \ + [auto:true source:agent_authored prover_delta:empty resolved_from:sandbox \ + version:v4 hash:sha256:cafe]" + ); + } + #[test] fn test_base_event_shorthand() { let mut b = base(0, "Base Event", 0, "Uncategorized", 99, "Other"); diff --git a/crates/openshell-policy/src/merge.rs b/crates/openshell-policy/src/merge.rs index c01445b11..60da5e4f1 100644 --- a/crates/openshell-policy/src/merge.rs +++ b/crates/openshell-policy/src/merge.rs @@ -392,17 +392,36 @@ fn add_rule( incoming_rule.name = rule_name.to_string(); } + // Endpoint-overlap fallback: when a chunk arrives with a new rule_name + // that doesn't already exist, fold it into a same-host/port rule if one + // is present. This is intentional for user-authored policies (incremental + // refinements live under one rule name). + // + // Provider-injected rules (`_provider_*` — see `compose.rs::provider_rule_name`) + // are deliberately EXCLUDED from this fallback. Provider profiles supply a + // baseline layer that should stay separate from agent/user contributions; + // merging an agent's narrow proposal into a provider's broad rule would + // (a) expand the provider rule's `access` shorthand into wildcard + // `path: "**"` rules at the prover's input, masking the agent's narrow + // scope behind the existing broad coverage, and (b) silently widen the + // provider rule's binary list. The agent's contribution is kept on its + // own rule key, the prover sees the actual narrow proposal, and the + // reviewer gets honest signal about what's being added. let target_key = if policy.network_policies.contains_key(rule_name) { Some(rule_name.to_string()) } else { let mut keys: Vec<_> = policy.network_policies.keys().cloned().collect(); keys.sort(); - keys.into_iter().find(|key| { - policy - .network_policies - .get(key) - .is_some_and(|existing_rule| rules_share_endpoint(existing_rule, &incoming_rule)) - }) + keys.into_iter() + .filter(|k| !k.starts_with("_provider_")) + .find(|key| { + policy + .network_policies + .get(key) + .is_some_and(|existing_rule| { + rules_share_endpoint(existing_rule, &incoming_rule) + }) + }) }; if let Some(key) = target_key { @@ -619,15 +638,28 @@ fn find_endpoint_mut<'a>( host: &str, port: u32, ) -> Option<&'a mut NetworkEndpoint> { + // `_provider_*` rules are excluded from this lookup for the same reason + // they're excluded from `add_rule`'s endpoint-overlap fallback: callers + // (`AddAllowRules`, `AddDenyRules`) must not mutate provider-injected + // rules in place. If the operation should target a provider rule, the + // caller should reference it by its exact name through the merge ops + // that take a `rule_name`. Defense-in-depth: even if a future caller + // accidentally passes a composed policy here, `AddAllowRules` would no + // longer be able to expand a provider rule's `access` shorthand into + // wildcard `path: "**"` rules (which would mask the prover's narrowness + // verdict on agent contributions). let mut keys: Vec<_> = policy.network_policies.keys().cloned().collect(); keys.sort(); - let target_key = keys.into_iter().find(|key| { - policy.network_policies.get(key).is_some_and(|rule| { - rule.endpoints - .iter() - .any(|endpoint| endpoint_matches_host_port(endpoint, host, port)) - }) - })?; + let target_key = keys + .into_iter() + .filter(|k| !k.starts_with("_provider_")) + .find(|key| { + policy.network_policies.get(key).is_some_and(|rule| { + rule.endpoints + .iter() + .any(|endpoint| endpoint_matches_host_port(endpoint, host, port)) + }) + })?; policy .network_policies @@ -1571,4 +1603,159 @@ mod tests { .contains_key("allow_api_example_com_443") ); } + + /// Provider-injected rules (`_provider_*`) are excluded from the + /// endpoint-overlap fallback: an agent chunk for the same `(host, port)` + /// as a provider rule lands as its own key instead of being merged into + /// the provider's rule. This keeps agent contributions honestly narrow + /// (no silent expansion via the provider rule's `access` shorthand) and + /// preserves binary-list separation. + #[test] + fn add_rule_does_not_merge_agent_chunk_into_provider_rule() { + use crate::compose::{ProviderPolicyLayer, compose_effective_policy}; + use openshell_core::proto::SandboxPolicy; + + // Compose a policy where the github provider profile contributes a + // `_provider_*` rule for api.github.com with `access: read-write` + // and gh/git binaries. + let provider_rule = NetworkPolicyRule { + name: "_provider_work_github".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + access: "read-write".to_string(), + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/gh".to_string(), + ..Default::default() + }], + }; + let composed = compose_effective_policy( + &SandboxPolicy::default(), + &[ProviderPolicyLayer { + rule_name: "_provider_work_github".to_string(), + rule: provider_rule, + }], + ); + assert!( + composed + .network_policies + .contains_key("_provider_work_github"), + "precondition: provider rule must be present in baseline" + ); + + // Agent submits a narrow PUT rule targeting the same host/port via + // curl. Without the filter, this would merge into the provider rule. + let agent_rule = NetworkPolicyRule { + name: "github_contents_put".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![rest_rule("PUT", "/repos/owner/repo/contents/file.md")], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + let result = merge_policy( + composed, + &[PolicyMergeOp::AddRule { + rule_name: "github_contents_put".to_string(), + rule: agent_rule, + }], + ) + .expect("merge should succeed"); + + // The agent's chunk lands as its own rule key. + assert!( + result + .policy + .network_policies + .contains_key("github_contents_put"), + "agent chunk must land as a separate rule (not merged into the provider rule); \ + got keys: {:?}", + result.policy.network_policies.keys().collect::>() + ); + + // The provider rule is unchanged: still has only gh as a binary + // (no silent broadening), still has the read-write shorthand + // intact (no preset expansion into wildcard paths). + let provider_rule_after = result + .policy + .network_policies + .get("_provider_work_github") + .expect("provider rule must still be present"); + assert_eq!( + provider_rule_after.binaries.len(), + 1, + "provider rule's binary list must NOT have been merged with the agent's binaries" + ); + assert_eq!(provider_rule_after.binaries[0].path, "/usr/bin/gh"); + assert_eq!( + provider_rule_after.endpoints[0].access, "read-write", + "provider rule's `access` shorthand must remain intact" + ); + assert!( + provider_rule_after.endpoints[0].rules.is_empty(), + "provider rule must NOT have had its access expanded into explicit wildcard rules" + ); + + // The agent's rule retains its narrow scope. + let agent_rule_after = &result.policy.network_policies["github_contents_put"]; + assert_eq!(agent_rule_after.binaries[0].path, "/usr/bin/curl"); + assert_eq!(agent_rule_after.endpoints[0].rules.len(), 1); + } + + /// Non-provider rules still merge by endpoint overlap when the incoming + /// `rule_name` doesn't match an existing key. This preserves the + /// long-standing behavior for user-authored and mechanistic chunks. + #[test] + fn add_rule_still_merges_user_chunk_into_user_rule_by_endpoint_overlap() { + let mut policy = restrictive_default_policy(); + policy.network_policies.insert( + "custom_github".to_string(), + rule_with_endpoint("custom_github", "api.github.com", 443), + ); + + let incoming = NetworkPolicyRule { + name: "ignored_when_merging".to_string(), + endpoints: vec![endpoint("api.github.com", 443)], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + let result = merge_policy( + policy, + &[PolicyMergeOp::AddRule { + rule_name: "different_name".to_string(), + rule: incoming, + }], + ) + .expect("merge should succeed"); + + // No new rule entry was created — the chunk merged into the + // existing user rule via endpoint overlap. + assert!( + !result + .policy + .network_policies + .contains_key("different_name"), + "user-authored rule overlap should still merge (no new key); \ + got keys: {:?}", + result.policy.network_policies.keys().collect::>() + ); + let merged = &result.policy.network_policies["custom_github"]; + assert!( + merged.binaries.iter().any(|b| b.path == "/usr/bin/curl"), + "user rule should have absorbed the incoming curl binary" + ); + } } diff --git a/crates/openshell-prover/README.md b/crates/openshell-prover/README.md new file mode 100644 index 000000000..f8b45eca6 --- /dev/null +++ b/crates/openshell-prover/README.md @@ -0,0 +1,136 @@ + + + +# openshell-prover + +Formal verifier for OpenShell sandbox policies. Encodes a policy + its +attached credential set + a binary capability registry as a Z3 SMT +model, then runs reachability queries to detect credentialed-reach and +capability changes a reviewer should be aware of. + +Used by the gateway to gate auto-approval of agent-authored policy +proposals: any finding blocks auto-approval, an empty delta lets the +chunk pass through (when the reviewer opts in via the +`proposal_approval_mode` setting at either gateway or sandbox scope). + +## What it decides + +The prover answers four formal questions. Each "yes" answer is its own +categorical finding — there is no severity grade. The categories live +in [`finding::category`](src/finding.rs). + +| Category | Question the prover decides | +|---|---| +| `link_local_reach` | Does this policy grant reach to a host in `169.254.0.0/16` or `fe80::/10`? | +| `l7_bypass_credentialed` | Does it let a binary using a non-HTTP wire protocol (per the binary registry's `bypasses_l7` flag) reach a host where a credential is in scope? | +| `credential_reach_expansion` | Does it let a binary reach a (host, port) with a credential in scope, where the binary couldn't reach that endpoint before? | +| `capability_expansion` | On a (binary, host, port) the binary already reaches with credentials, does it add a new HTTP method? | + +The first two are unconditional risks. The latter two are *delta* +properties — the gateway runs the prover on both the baseline policy +and the merged policy and surfaces only the new paths. + +## Evidence shape + +Each finding carries one or more [`FindingPath::Exfil`](src/finding.rs) +entries: + +```rust +pub struct ExfilPath { + pub binary: String, + pub endpoint_host: String, + pub endpoint_port: u16, + pub mechanism: String, // human-readable description + pub policy_name: String, // rule the path traverses + pub category: String, // one of the category constants + pub method: String, // populated for capability_expansion; empty otherwise +} +``` + +The gateway's `finding_delta` keys paths by `(category, binary, +host:port, category, method)` so that adding a new method on an +already-reached host surfaces as exactly one new path (not the whole +re-emission of the existing method set). + +### Category suppression at the delta layer + +`capability_expansion` paths whose `(binary, host, port)` tuple is also +in the `credential_reach_expansion` delta are suppressed by the +gateway. A brand-new credentialed reach is described by the +reach-expansion finding alone, not also by N per-method findings. + +## Adding a new category + +1. Add a constant to `src/finding.rs::category`. +2. In `src/queries.rs::check_credential_safety`, add the branch that + detects the new category and emits one `ExfilPath` per evidence + row. Set `path.category` to the new constant. +3. In `src/report.rs::format_path_line`, add a `match` arm rendering + the per-path display string the reviewer sees. +4. (Gateway) If the new category should be suppressed by another, add + the suppression rule to `crates/openshell-server/src/grpc/policy.rs::finding_delta`. +5. Add a unit test in `src/queries.rs` and an integration test in + `crates/openshell-server/src/grpc/policy.rs::tests`. + +The four v1 categories cover the formal properties the OpenShell +auto-approval gate cares about today. Additional categories (e.g., +"destructive method introduced," "new outbound TLS without SNI") would +be additive — they don't displace existing categories. + +## What the prover does *not* decide + +- **Semantic risk of an action.** The prover models *can the binary do + this?*, not *is this destructive?*. `PUT /repos/.../contents/file.md` + and `GET /repos/.../contents/file.md` are both authenticated actions; + the reviewer (or a downstream layer like an LLM contextual reviewer + or an intent file) decides if the action is desired. +- **Cross-sandbox or cross-binary intent.** The model is per-sandbox. + If two sandboxes share a credential through external policy, the + prover reasons about each independently. +- **Runtime behavior.** The prover analyzes the policy as written; it + doesn't observe the proxy's actual decisions. The proxy is the + enforcement layer; the prover is the change-review layer. + +## Inputs + +- **Policy** — a `SandboxPolicy` proto, parsed via + `openshell-policy::parse_sandbox_policy`. +- **Credential set** — built from the sandbox's attached providers in + `crates/openshell-server/src/grpc/policy.rs::build_credential_set_for_sandbox`. + v1 captures presence only (host-coarse); no scope modeling. +- **Binary registry** — YAML descriptors at + `crates/openshell-prover/registry/binaries/*.yaml`. Each describes + the binary's protocols, `bypasses_l7` flag, and `can_exfiltrate` + capability. + +## Outputs + +- A list of `Finding` values, one per fired category. Each finding's + `query` field holds the category name. +- The CLI renderer (`report::render_compact` / `render_report`) prints + human-readable output for the `openshell-prover` binary. +- The gateway calls `report::finding_shorthand` to build the + `validation_result` string persisted on each draft chunk. + +## Z3 model layout + +See `src/model.rs`. Briefly: + +- Bool sorts per `(binary, endpoint)` pair encode policy reachability, + filtered by binary capability flags (`can_exfiltrate`, + `bypasses_l7`). +- Bool sorts per `(binary, host)` encode credential-in-scope (one + credential set per sandbox). +- The reachability formula composes these into the SAT query the + `queries::check_credential_safety` loop iterates over. + +## Tests + +- Unit tests in each module (`src/queries.rs`, `src/report.rs`, + `src/policy.rs`) cover individual primitives and category emission. +- Integration tests in `src/lib.rs::tests` exercise the full + parse → build_model → run_all_queries pipeline against testdata + policies in `testdata/`. +- Gateway-level acceptance tests in + `crates/openshell-server/src/grpc/policy.rs::tests` lock in the + end-to-end `validation_result` shape and the auto-approval gate. diff --git a/crates/openshell-prover/src/accepted_risks.rs b/crates/openshell-prover/src/accepted_risks.rs index 61aa025be..8c28a4418 100644 --- a/crates/openshell-prover/src/accepted_risks.rs +++ b/crates/openshell-prover/src/accepted_risks.rs @@ -80,23 +80,12 @@ pub fn load_accepted_risks(path: &Path) -> Result> { /// Check if a single finding path matches an accepted risk. fn path_matches_risk(path: &FindingPath, risk: &AcceptedRisk) -> bool { - if !risk.binary.is_empty() { - let path_binary = match path { - FindingPath::Exfil(p) => &p.binary, - FindingPath::WriteBypass(p) => &p.binary, - }; - if path_binary != &risk.binary { - return false; - } + let FindingPath::Exfil(p) = path; + if !risk.binary.is_empty() && p.binary != risk.binary { + return false; } - if !risk.endpoint.is_empty() { - let endpoint_host = match path { - FindingPath::Exfil(p) => &p.endpoint_host, - FindingPath::WriteBypass(p) => &p.endpoint_host, - }; - if endpoint_host != &risk.endpoint { - return false; - } + if !risk.endpoint.is_empty() && p.endpoint_host != risk.endpoint { + return false; } true } diff --git a/crates/openshell-prover/src/credentials.rs b/crates/openshell-prover/src/credentials.rs index dffbc2e8b..586d0fbbf 100644 --- a/crates/openshell-prover/src/credentials.rs +++ b/crates/openshell-prover/src/credentials.rs @@ -135,18 +135,115 @@ pub struct CredentialSet { } impl CredentialSet { - /// Credentials that target a given host. + /// Credentials that target a given host. Matching mirrors runtime host + /// policy semantics for exact names and first-label wildcards, so a + /// proposal for `*.github.com` is treated as credentialed when the + /// attached credential targets `api.github.com`. pub fn credentials_for_host(&self, host: &str) -> Vec<&Credential> { self.credentials .iter() - .filter(|c| c.target_hosts.iter().any(|h| h == host)) + .filter(|c| { + c.target_hosts + .iter() + .any(|target| host_patterns_overlap(host, target)) + }) .collect() } - /// API capability registry for a given host. + /// API capability registry for a given host. Exact matches win, then + /// wildcard host overlap is used so credentialed wildcard proposals can be + /// evaluated against concrete API capability registries. pub fn api_for_host(&self, host: &str) -> Option<&ApiCapability> { - self.api_registries.values().find(|api| api.host == host) + let needle = normalize_host(host); + self.api_registries + .values() + .find(|api| normalize_host(&api.host) == needle) + .or_else(|| { + self.api_registries + .values() + .find(|api| host_patterns_overlap(host, &api.host)) + }) + } +} + +fn normalize_host(host: &str) -> String { + host.trim().trim_end_matches('.').to_ascii_lowercase() +} + +fn host_patterns_overlap(left: &str, right: &str) -> bool { + let left = normalize_host(left); + let right = normalize_host(right); + if left.is_empty() || right.is_empty() { + return false; + } + left == right || host_pattern_covers(&left, &right) || host_pattern_covers(&right, &left) +} + +fn host_pattern_covers(pattern: &str, host: &str) -> bool { + let pattern_labels: Vec<&str> = pattern.split('.').collect(); + let host_labels: Vec<&str> = host.split('.').collect(); + let Some(first_pattern_label) = pattern_labels.first().copied() else { + return false; + }; + + if first_pattern_label == "**" { + let suffix = &pattern_labels[1..]; + let host_suffix = host_labels + .len() + .checked_sub(suffix.len()) + .map(|start| &host_labels[start..]); + return !suffix.is_empty() + && host_labels.len() > suffix.len() + && matches!(host_suffix, Some(host_suffix) if host_suffix == suffix); + } + + if !first_pattern_label.contains('*') { + return false; + } + + // Runtime host wildcards only apply in the first DNS label. Wildcards in + // later labels are not treated as policy globs here. + pattern_labels.len() == host_labels.len() + && pattern_labels[1..] == host_labels[1..] + && wildcard_label_matches(first_pattern_label, host_labels[0]) +} + +fn wildcard_label_matches(pattern: &str, label: &str) -> bool { + if pattern == "*" { + return !label.is_empty(); + } + if label.is_empty() || !pattern.contains('*') { + return false; + } + + let parts: Vec<&str> = pattern.split('*').collect(); + let mut remaining = label; + + if let Some(prefix) = parts.first().copied().filter(|part| !part.is_empty()) { + let Some(stripped) = remaining.strip_prefix(prefix) else { + return false; + }; + remaining = stripped; + } + + if parts.len() > 2 { + for part in parts[1..parts.len() - 1] + .iter() + .copied() + .filter(|part| !part.is_empty()) + { + let Some(offset) = remaining.find(part) else { + return false; + }; + remaining = &remaining[offset + part.len()..]; + } } + + parts + .last() + .copied() + .filter(|suffix| !suffix.is_empty()) + .is_none_or(|suffix| remaining.ends_with(suffix)) } // --------------------------------------------------------------------------- @@ -267,3 +364,97 @@ pub fn load_credential_set_from_dir( api_registries, }) } + +#[cfg(test)] +mod tests { + use super::*; + + fn github_credential() -> Credential { + Credential { + name: "github-pat".to_string(), + cred_type: "github-pat".to_string(), + scopes: vec!["repo".to_string()], + injected_via: "GITHUB_TOKEN".to_string(), + target_hosts: vec!["api.github.com".to_string()], + } + } + + fn github_api() -> ApiCapability { + ApiCapability { + api: "github".to_string(), + host: "api.github.com".to_string(), + port: 443, + credential_type: "github-pat".to_string(), + scope_capabilities: HashMap::new(), + action_risk: HashMap::new(), + } + } + + #[test] + fn host_patterns_overlap_matches_exact_case_and_trailing_dot() { + assert!(host_patterns_overlap("API.GITHUB.COM.", "api.github.com")); + assert!(!host_patterns_overlap( + "api.github.com", + "uploads.github.com" + )); + } + + #[test] + fn host_patterns_overlap_matches_first_label_wildcard_only() { + assert!(host_patterns_overlap("*.github.com", "api.github.com")); + assert!(!host_patterns_overlap("*.github.com", "github.com")); + assert!(!host_patterns_overlap( + "*.github.com", + "deep.api.github.com" + )); + } + + #[test] + fn host_patterns_overlap_matches_intra_label_first_label_wildcard() { + assert!(host_patterns_overlap( + "api-*.github.com", + "api-v3.github.com" + )); + assert!(!host_patterns_overlap( + "api-*.github.com", + "uploads.github.com" + )); + assert!(!host_patterns_overlap( + "api.*.github.com", + "api.v3.github.com" + )); + } + + #[test] + fn host_patterns_overlap_matches_recursive_first_label_wildcard() { + assert!(host_patterns_overlap("**.github.com", "api.github.com")); + assert!(host_patterns_overlap( + "**.github.com", + "deep.api.github.com" + )); + assert!(!host_patterns_overlap("**.github.com", "github.com")); + } + + #[test] + fn wildcard_policy_host_finds_credentialed_concrete_target() { + let set = CredentialSet { + credentials: vec![github_credential()], + api_registries: HashMap::new(), + }; + + let creds = set.credentials_for_host("*.github.com"); + assert_eq!(creds.len(), 1); + assert_eq!(creds[0].name, "github-pat"); + } + + #[test] + fn wildcard_policy_host_finds_concrete_api_registry() { + let set = CredentialSet { + credentials: Vec::new(), + api_registries: HashMap::from([("github".to_string(), github_api())]), + }; + + let api = set.api_for_host("*.github.com").expect("github API"); + assert_eq!(api.host, "api.github.com"); + } +} diff --git a/crates/openshell-prover/src/finding.rs b/crates/openshell-prover/src/finding.rs index ab4d4f47f..4e06d1b4e 100644 --- a/crates/openshell-prover/src/finding.rs +++ b/crates/openshell-prover/src/finding.rs @@ -2,26 +2,41 @@ // SPDX-License-Identifier: Apache-2.0 //! Finding types emitted by verification queries. +//! +//! The prover answers four formal questions about a proposed policy and +//! emits one finding category per "yes" answer. Findings are categorical +//! (not severity-graded): the reviewer reads the category name and the +//! structured evidence to decide. The auto-approval gate is binary — +//! delta empty = candidate for auto-approval; any finding = human review. +//! +//! Categories: +//! +//! - `credential_reach_expansion` — a binary gained credentialed reach to +//! a (host, port) it could not reach before. +//! - `capability_expansion` — on a (binary, host, port) that already had +//! credentialed reach, a new HTTP method was added. +//! - `l7_bypass_credentialed` — a binary using a wire protocol the L7 +//! proxy cannot inspect (`git-remote-https`, `ssh`, `nc`) gained reach +//! to a host where a credential is in scope. +//! - `link_local_reach` — any reach to a link-local IP range +//! (`169.254.0.0/16`, `fe80::/10`), unconditional. Cloud metadata +//! endpoints serve credentials regardless of the sandbox's own +//! credential state. -use std::fmt; - -/// Severity level for a finding. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum RiskLevel { - High, - Critical, -} - -impl fmt::Display for RiskLevel { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::High => write!(f, "HIGH"), - Self::Critical => write!(f, "CRITICAL"), - } - } +/// Stable category names. Used as the `query` field on [`Finding`] and +/// in the per-path key used by `finding_delta`. +pub mod category { + pub const CREDENTIAL_REACH_EXPANSION: &str = "credential_reach_expansion"; + pub const CAPABILITY_EXPANSION: &str = "capability_expansion"; + pub const L7_BYPASS_CREDENTIALED: &str = "l7_bypass_credentialed"; + pub const LINK_LOCAL_REACH: &str = "link_local_reach"; } -/// A concrete path through which data can be exfiltrated. +/// A concrete path through which the prover observed a tracked property. +/// +/// One `ExfilPath` per (binary, host, port, category) tuple — plus +/// `method` for `capability_expansion` so the gateway's per-path delta +/// surfaces the specific method that was added. #[derive(Debug, Clone)] pub struct ExfilPath { pub binary: String, @@ -29,37 +44,30 @@ pub struct ExfilPath { pub endpoint_port: u16, pub mechanism: String, pub policy_name: String, - /// One of `"l4_only"`, `"l7_allows_write"`, `"l7_bypassed"`. - pub l7_status: String, -} - -/// A path that allows writing despite read-only intent. -#[derive(Debug, Clone)] -pub struct WriteBypassPath { - pub binary: String, - pub endpoint_host: String, - pub endpoint_port: u16, - pub policy_name: String, - pub policy_intent: String, - /// One of `"l4_only"`, `"l7_bypass_protocol"`, `"credential_write_scope"`. - pub bypass_reason: String, - pub credential_actions: Vec, + /// Category name (see `category::*` constants). + pub category: String, + /// HTTP method, populated only for `capability_expansion` paths. + /// Empty string for the other categories. + pub method: String, } /// Concrete evidence attached to a [`Finding`]. #[derive(Debug, Clone)] pub enum FindingPath { Exfil(ExfilPath), - WriteBypass(WriteBypassPath), } /// A single verification finding. +/// +/// `query` is the category name (one of the `category::*` constants). +/// Each finding carries one or more `paths` with the structured evidence +/// the reviewer needs to decide. There is no severity field — the +/// category itself is the signal. #[derive(Debug, Clone)] pub struct Finding { pub query: String, pub title: String, pub description: String, - pub risk: RiskLevel, pub paths: Vec, pub remediation: Vec, pub accepted: bool, diff --git a/crates/openshell-prover/src/lib.rs b/crates/openshell-prover/src/lib.rs index 82922253d..226705204 100644 --- a/crates/openshell-prover/src/lib.rs +++ b/crates/openshell-prover/src/lib.rs @@ -157,9 +157,13 @@ filesystem_policy: assert_eq!(sandbox_count, 1); } - // 6. End-to-end: git push bypass findings detected (uses embedded registry). + // 6. End-to-end: testdata policy with a github credential in scope and a + // bypass-L7 binary (git) emits an `l7_bypass_credentialed` finding. + // The prover output is categorical, not severity-graded. #[test] - fn test_git_push_bypass_findings() { + fn test_findings_for_github_policy() { + use finding::category; + let policy_path = testdata_dir().join("policy.yaml"); let creds_path = testdata_dir().join("credentials.yaml"); @@ -170,23 +174,102 @@ filesystem_policy: let z3_model = build_model(pol, cred_set, bin_reg); let findings = run_all_queries(&z3_model); - let query_types: std::collections::HashSet<&str> = + let categories: std::collections::HashSet<&str> = findings.iter().map(|f| f.query.as_str()).collect(); assert!( - query_types.contains("data_exfiltration"), - "expected data_exfiltration finding" - ); - assert!( - query_types.contains("write_bypass"), - "expected write_bypass finding" - ); - assert!( - findings.iter().any(|f| matches!( - f.risk, - finding::RiskLevel::Critical | finding::RiskLevel::High - )), - "expected at least one critical/high finding" + categories.contains(category::L7_BYPASS_CREDENTIALED), + "expected l7_bypass_credentialed finding for bypass-L7 binary with credential in scope; \ + got categories: {categories:?}" ); + // Every emitted category must be one of the four v1 categories. + let allowed: std::collections::HashSet<&str> = [ + category::LINK_LOCAL_REACH, + category::L7_BYPASS_CREDENTIALED, + category::CREDENTIAL_REACH_EXPANSION, + category::CAPABILITY_EXPANSION, + ] + .into_iter() + .collect(); + for c in &categories { + assert!( + allowed.contains(*c), + "unexpected category {c} emitted by the prover" + ); + } + } + + #[test] + fn test_wildcard_endpoint_covering_credential_host_emits_credential_reach() { + use finding::{FindingPath, category}; + + let policy = policy::parse_policy_str( + r#" +version: 1 +network_policies: + github_wildcard: + name: github-wildcard + endpoints: + - host: "*.github.com" + port: 443 + protocol: rest + enforcement: enforce + access: read-write + binaries: + - path: /usr/bin/curl +"#, + ) + .expect("parse policy"); + let cred_set = + credentials::load_credential_set_embedded(&testdata_dir().join("credentials.yaml")) + .expect("load creds"); + let bin_reg = registry::load_embedded_binary_registry().expect("load registry"); + + let z3_model = build_model(policy, cred_set, bin_reg); + let findings = run_all_queries(&z3_model); + + let reach = findings + .iter() + .find(|finding| finding.query == category::CREDENTIAL_REACH_EXPANSION) + .expect("wildcard host covering api.github.com must be credentialed"); + assert!(reach.paths.iter().any(|path| matches!( + path, + FindingPath::Exfil(exfil) + if exfil.endpoint_host == "*.github.com" && exfil.binary == "/usr/bin/curl" + ))); + } + + #[test] + fn test_known_metadata_hostname_emits_link_local_finding() { + use finding::{FindingPath, category}; + + let policy = policy::parse_policy_str( + r" +version: 1 +network_policies: + metadata: + name: metadata + endpoints: + - host: metadata.google.internal + port: 80 + binaries: + - path: /usr/bin/curl +", + ) + .expect("parse policy"); + let bin_reg = registry::load_embedded_binary_registry().expect("load registry"); + + let z3_model = build_model(policy, credentials::CredentialSet::default(), bin_reg); + let findings = run_all_queries(&z3_model); + + let link_local = findings + .iter() + .find(|finding| finding.query == category::LINK_LOCAL_REACH) + .expect("known metadata hostname must emit link-local/metadata finding"); + assert!(link_local.paths.iter().any(|path| matches!( + path, + FindingPath::Exfil(exfil) + if exfil.endpoint_host == "metadata.google.internal" + ))); } // 7. Empty policy produces no findings. diff --git a/crates/openshell-prover/src/queries.rs b/crates/openshell-prover/src/queries.rs index 6a0c7f6a6..24e1402b7 100644 --- a/crates/openshell-prover/src/queries.rs +++ b/crates/openshell-prover/src/queries.rs @@ -1,22 +1,83 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -//! Verification queries: `check_data_exfiltration` and `check_write_bypass`. +//! Verification queries. +//! +//! The prover answers four formal questions about a policy and emits one +//! finding category per "yes" answer (see +//! [`crate::finding::category`] for the canonical names). The output is +//! categorical — there is no severity grade. The gateway's +//! `finding_delta` decides which findings are *new* relative to a +//! baseline, and the auto-approval gate triggers when no new findings +//! exist. +//! +//! Categories: +//! +//! 1. **Link-local reach** — any reachable path to a host in +//! `169.254.0.0/16` or `fe80::/10`. Emitted unconditionally: +//! cloud-metadata endpoints serve credentials, so reachability alone +//! is the risk. +//! 2. **L7-bypass + credential** — a binary whose wire protocol the L7 +//! proxy cannot inspect (`git-remote-https`, `ssh`, `nc`) gains reach +//! to a host where a sandbox credential is in scope. +//! 3. **Credential reach expansion** — a binary gains credentialed reach +//! to a host:port it could not reach before. The gateway's delta +//! surfaces only newly-reachable tuples. +//! 4. **Capability expansion** — on a (binary, host, port) that already +//! had credentialed reach, the policy adds a new HTTP method. The +//! gateway's delta surfaces only newly-allowed methods. +//! +//! These categories are intended to be (mostly) mutually exclusive per +//! underlying change: at the gateway, `capability_expansion` paths whose +//! `(binary, host, port)` is also in the `credential_reach_expansion` +//! delta are suppressed, so a brand-new credentialed reach surfaces as +//! one `credential_reach_expansion` finding rather than that plus N +//! capability findings. See `crates/openshell-server/src/grpc/policy.rs`. + +use std::collections::HashSet; +use std::net::IpAddr; use z3::SatResult; -use crate::finding::{ExfilPath, Finding, FindingPath, RiskLevel, WriteBypassPath}; +use crate::finding::{ExfilPath, Finding, FindingPath, category}; use crate::model::ReachabilityModel; -use crate::policy::PolicyIntent; -/// Check for data exfiltration paths from readable filesystem to writable -/// egress channels. -pub fn check_data_exfiltration(model: &ReachabilityModel) -> Vec { - if model.policy.filesystem_policy.readable_paths().is_empty() { - return Vec::new(); +/// Return true iff the host string parses as an IP in a reserved +/// link-local range (IPv4 `169.254.0.0/16` or IPv6 `fe80::/10`). +/// +/// Hostname-only strings (not parseable as IPs) return false. We don't +/// perform DNS resolution at validation time; the model evaluates the +/// policy as written. +pub(crate) fn is_link_local(host: &str) -> bool { + match host.parse::() { + Ok(IpAddr::V4(v4)) => v4.is_link_local(), + Ok(IpAddr::V6(v6)) => v6.is_unicast_link_local(), + Err(_) => false, } +} - let mut exfil_paths: Vec = Vec::new(); +/// Return true for static cloud metadata hostnames that should be treated like +/// link-local metadata reach without performing DNS resolution. +pub(crate) fn is_known_metadata_hostname(host: &str) -> bool { + let normalized = host.trim().trim_end_matches('.').to_ascii_lowercase(); + matches!(normalized.as_str(), "metadata.google.internal") +} + +fn is_link_local_or_metadata_host(host: &str) -> bool { + is_link_local(host) || is_known_metadata_hostname(host) +} + +/// Run all four formal queries against the model and emit one finding +/// per category that has at least one path. +/// +/// We deliberately do NOT gate on `filesystem_policy.readable_paths()` +/// being non-empty: the credential itself is the lever for the tracked +/// risks, not anything in `/etc/`. +pub fn check_credential_safety(model: &ReachabilityModel) -> Vec { + let mut reach_paths: Vec = Vec::new(); + let mut capability_paths: Vec = Vec::new(); + let mut bypass_paths: Vec = Vec::new(); + let mut link_local_paths: Vec = Vec::new(); for bpath in &model.binary_paths { let cap = model.binary_registry.get_or_unknown(bpath); @@ -26,228 +87,261 @@ pub fn check_data_exfiltration(model: &ReachabilityModel) -> Vec { for eid in &model.endpoints { let expr = model.can_exfil_via_endpoint(bpath, eid); + if model.check_sat(&expr) != SatResult::Sat { + continue; + } + + let host_is_link_local = is_link_local_or_metadata_host(&eid.host); + let has_credential = !model.credentials.credentials_for_host(&eid.host).is_empty(); - if model.check_sat(&expr) == SatResult::Sat { - // Determine L7 status and mechanism - let ep_is_l7 = is_endpoint_l7_enforced(&model.policy, &eid.host, eid.port); - let bypass = cap.bypasses_l7(); - - let (l7_status, mut mechanism) = if bypass { - ( - "l7_bypassed".to_owned(), - format!( - "{} — uses non-HTTP protocol, bypasses L7 inspection", - cap.description - ), - ) - } else if !ep_is_l7 { - ( - "l4_only".to_owned(), - format!( - "L4-only endpoint — no HTTP inspection, {bpath} can send arbitrary data" - ), - ) - } else { - // L7 is enforced and allows write — policy is - // working as intended. Not a finding. - continue; - }; - - if !cap.exfil_mechanism.is_empty() { - mechanism = format!("{}. Exfil via: {}", mechanism, cap.exfil_mechanism); - } - - exfil_paths.push(ExfilPath { + // Tier 1: link-local/metadata. Unconditional. Other categories + // are not emitted on these hosts — the metadata signal is the + // story. + if host_is_link_local { + link_local_paths.push(ExfilPath { binary: bpath.clone(), endpoint_host: eid.host.clone(), endpoint_port: eid.port, - mechanism, + mechanism: format!( + "Link-local endpoint — {bpath} can reach the host's metadata range \ + (cloud-credential exfiltration territory regardless of declared scopes)" + ), policy_name: eid.policy_name.clone(), - l7_status, + category: category::LINK_LOCAL_REACH.to_string(), + method: String::new(), }); + continue; } - } - } - - if exfil_paths.is_empty() { - return Vec::new(); - } - - let readable = model.policy.filesystem_policy.readable_paths(); - let has_l4_only = exfil_paths.iter().any(|p| p.l7_status == "l4_only"); - let has_bypass = exfil_paths.iter().any(|p| p.l7_status == "l7_bypassed"); - let risk = if has_l4_only || has_bypass { - RiskLevel::Critical - } else { - RiskLevel::High - }; - let mut remediation = Vec::new(); - if has_l4_only { - remediation.push( - "Add `protocol: rest` with specific L7 rules to L4-only endpoints \ - to enable HTTP inspection and restrict to safe methods/paths." - .to_owned(), - ); - } - if has_bypass { - remediation.push( - "Binaries using non-HTTP protocols (git, ssh, nc) bypass L7 inspection. \ - Remove these binaries from the policy if write access is not intended, \ - or restrict credential scopes to read-only." - .to_owned(), - ); - } - remediation - .push("Restrict filesystem read access to only the paths the agent needs.".to_owned()); - - let paths: Vec = exfil_paths.into_iter().map(FindingPath::Exfil).collect(); - - let n_paths = paths.len(); - vec![Finding { - query: "data_exfiltration".to_owned(), - title: "Data Exfiltration Paths Detected".to_owned(), - description: format!( - "{n_paths} exfiltration path(s) found from {} readable filesystem path(s) to external endpoints.", - readable.len() - ), - risk, - paths, - remediation, - accepted: false, - accepted_reason: String::new(), - }] -} - -/// Check for write capabilities that bypass read-only policy intent. -pub fn check_write_bypass(model: &ReachabilityModel) -> Vec { - let mut bypass_paths: Vec = Vec::new(); + // Un-credentialed reach is not a tracked risk. + if !has_credential { + continue; + } - for (policy_name, rule) in &model.policy.network_policies { - for ep in &rule.endpoints { - // Only check endpoints where the intent is read-only or L4-only - let intent = ep.intent(); - if !matches!(intent, PolicyIntent::ReadOnly) { + // Tier 2: bypass-L7 binary on a credentialed host. Wire + // protocol cannot be inspected; mark and move on. + if cap.bypasses_l7() { + bypass_paths.push(ExfilPath { + binary: bpath.clone(), + endpoint_host: eid.host.clone(), + endpoint_port: eid.port, + mechanism: format!( + "{} — uses non-HTTP protocol, bypasses L7 inspection, and a credential \ + is in scope for this host", + cap.description + ), + policy_name: eid.policy_name.clone(), + category: category::L7_BYPASS_CREDENTIALED.to_string(), + method: String::new(), + }); continue; } - for port in ep.effective_ports() { - for b in &rule.binaries { - let cap = model.binary_registry.get_or_unknown(&b.path); - - // Check: binary bypasses L7 and can write - if cap.bypasses_l7() && cap.can_write() { - let cred_actions = collect_credential_actions(model, &ep.host, &cap); - if !cred_actions.is_empty() - || model.credentials.credentials_for_host(&ep.host).is_empty() - { - bypass_paths.push(WriteBypassPath { - binary: b.path.clone(), - endpoint_host: ep.host.clone(), - endpoint_port: port, - policy_name: policy_name.clone(), - policy_intent: intent.to_string(), - bypass_reason: "l7_bypass_protocol".to_owned(), - credential_actions: cred_actions, - }); - } - } - - // Check: L4-only endpoint + binary can construct HTTP + credential has write - if !ep.is_l7_enforced() && cap.can_construct_http { - let cred_actions = collect_credential_actions(model, &ep.host, &cap); - if !cred_actions.is_empty() { - bypass_paths.push(WriteBypassPath { - binary: b.path.clone(), - endpoint_host: ep.host.clone(), - endpoint_port: port, - policy_name: policy_name.clone(), - policy_intent: intent.to_string(), - bypass_reason: "l4_only".to_owned(), - credential_actions: cred_actions, - }); - } - } - } + // Tiers 3 + 4: credentialed L7 reach. We emit both + // credential_reach_expansion and capability_expansion paths + // here; the gateway's delta will keep only the relevant + // category (see `finding_delta` and the suppression rule). + reach_paths.push(ExfilPath { + binary: bpath.clone(), + endpoint_host: eid.host.clone(), + endpoint_port: eid.port, + mechanism: format!( + "Binary {bpath} has credentialed reach to {host}:{port}", + host = eid.host, + port = eid.port, + ), + policy_name: eid.policy_name.clone(), + category: category::CREDENTIAL_REACH_EXPANSION.to_string(), + method: String::new(), + }); + + // One capability_expansion path per allowed method on this + // (binary, host:port) under this specific rule. + let methods = endpoint_allowed_methods_in_rule( + &model.policy, + &eid.policy_name, + &eid.host, + eid.port, + ); + for method in methods { + capability_paths.push(ExfilPath { + binary: bpath.clone(), + endpoint_host: eid.host.clone(), + endpoint_port: eid.port, + mechanism: format!( + "Method {method} allowed for {bpath} on {host}:{port}", + host = eid.host, + port = eid.port, + ), + policy_name: eid.policy_name.clone(), + category: category::CAPABILITY_EXPANSION.to_string(), + method, + }); } } } - if bypass_paths.is_empty() { - return Vec::new(); + let mut findings = Vec::new(); + if !link_local_paths.is_empty() { + findings.push(build_finding( + category::LINK_LOCAL_REACH, + "Link-Local or Metadata Reach", + "Reach to a host in a link-local range or known metadata hostname — cloud-metadata territory.", + link_local_paths, + vec![ + "Endpoint host is in a link-local range or known metadata hostname \ + (cloud-metadata territory). Sandboxes should not reach these \ + endpoints — reaching them can return host credentials the sandbox \ + should not have." + .to_owned(), + ], + )); + } + if !bypass_paths.is_empty() { + findings.push(build_finding( + category::L7_BYPASS_CREDENTIALED, + "L7-Bypass Binary with Credential in Scope", + "A binary using a wire protocol the L7 proxy cannot inspect has reach to \ + a host where a sandbox credential is in scope.", + bypass_paths, + vec![ + "Binaries using non-HTTP protocols (git, ssh, nc) bypass L7 inspection. \ + Remove these binaries from the policy if credentialed write access is \ + not intended." + .to_owned(), + ], + )); } + if !reach_paths.is_empty() { + findings.push(build_finding( + category::CREDENTIAL_REACH_EXPANSION, + "Credentialed Reach Expansion", + "A binary gained credentialed reach to a (host, port) it could not reach \ + before.", + reach_paths, + vec![ + "Credentialed reach is a privileged action surface. A human reviewer \ + should confirm the binary should be able to authenticate to this host \ + at all." + .to_owned(), + ], + )); + } + if !capability_paths.is_empty() { + findings.push(build_finding( + category::CAPABILITY_EXPANSION, + "Capability Expansion on Credentialed Host", + "New methods were added on a (binary, host, port) that already had \ + credentialed reach. The agent is changing what the sandbox can do with \ + its credentials.", + capability_paths, + vec![ + "A capability expansion is a stated intent change. The reviewer should \ + confirm the new methods (especially mutating methods like PUT, POST, \ + PATCH, DELETE) are part of the agent's task." + .to_owned(), + ], + )); + } + findings +} - let n = bypass_paths.len(); - let paths: Vec = bypass_paths - .into_iter() - .map(FindingPath::WriteBypass) - .collect(); - - vec![Finding { - query: "write_bypass".to_owned(), - title: "Write Bypass Detected — Read-Only Intent Violated".to_owned(), - description: format!("{n} path(s) allow write operations despite read-only policy intent."), - risk: RiskLevel::High, - paths, - remediation: vec![ - "For L4-only endpoints: add `protocol: rest` with `access: read-only` \ - to enable HTTP method filtering." - .to_owned(), - "For L7-bypassing binaries (git, ssh, nc): remove them from the policy's \ - binary list if write access is not intended." - .to_owned(), - "Restrict credential scopes to read-only where possible.".to_owned(), - ], +fn build_finding( + query: &str, + title: &str, + description: &str, + paths: Vec, + remediation: Vec, +) -> Finding { + let n = paths.len(); + Finding { + query: query.to_owned(), + title: title.to_owned(), + // Per-finding description prefixes the count with the category's + // canonical sentence so the audit string is self-describing. + description: format!("{description} ({n} path(s).)"), + paths: paths.into_iter().map(FindingPath::Exfil).collect(), + remediation, accepted: false, accepted_reason: String::new(), - }] + } } -/// Run both verification queries. +/// Run all queries (single entry point for end-to-end callers). pub fn run_all_queries(model: &ReachabilityModel) -> Vec { - let mut findings = Vec::new(); - findings.extend(check_data_exfiltration(model)); - findings.extend(check_write_bypass(model)); - findings + check_credential_safety(model) } // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- -/// Check whether an endpoint in the policy is L7-enforced. -fn is_endpoint_l7_enforced(policy: &crate::policy::PolicyModel, host: &str, port: u16) -> bool { - for rule in policy.network_policies.values() { - for ep in &rule.endpoints { - if ep.host == host && ep.effective_ports().contains(&port) { - return ep.is_l7_enforced(); - } +/// Allowed HTTP methods for the endpoint in `policy.network_policies[policy_name]` +/// matching `(host, port)`. Returns empty when the rule or endpoint is not +/// found (e.g. SAT path threaded through a stale model). +fn endpoint_allowed_methods_in_rule( + policy: &crate::policy::PolicyModel, + policy_name: &str, + host: &str, + port: u16, +) -> HashSet { + let Some(rule) = policy.network_policies.get(policy_name) else { + return HashSet::new(); + }; + for ep in &rule.endpoints { + if ep.host.eq_ignore_ascii_case(host) && ep.effective_ports().contains(&port) { + return ep.allowed_methods(); } } - false + HashSet::new() } -/// Collect human-readable credential action descriptions for a host. -fn collect_credential_actions( - model: &ReachabilityModel, - host: &str, - _cap: &crate::registry::BinaryCapability, -) -> Vec { - let creds = model.credentials.credentials_for_host(host); - let api = model.credentials.api_for_host(host); - let mut actions = Vec::new(); - - for cred in &creds { - if let Some(api) = api { - for wa in api.write_actions_for_scopes(&cred.scopes) { - actions.push(format!("{} {} ({})", wa.method, wa.path, wa.action)); - } - } else { - actions.push(format!( - "credential '{}' has scopes: {:?}", - cred.name, cred.scopes - )); - } +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn is_link_local_recognises_ipv4_169_254() { + assert!(is_link_local("169.254.169.254")); + assert!(is_link_local("169.254.0.1")); + assert!(is_link_local("169.254.255.255")); + } + + #[test] + fn is_link_local_recognises_ipv6_fe80() { + assert!(is_link_local("fe80::1")); + assert!(is_link_local("fe80::abcd:ef01")); + } + + #[test] + fn is_link_local_rejects_non_link_local_ips() { + assert!(!is_link_local("8.8.8.8")); + assert!(!is_link_local("10.0.0.1")); + assert!(!is_link_local("192.168.1.1")); + assert!(!is_link_local("::1")); + assert!(!is_link_local("2001:db8::1")); + } + + #[test] + fn is_link_local_rejects_hostnames() { + assert!(!is_link_local("api.github.com")); + assert!(!is_link_local("")); + } + + #[test] + fn is_known_metadata_hostname_recognises_gcp_variants() { + assert!(is_known_metadata_hostname("metadata.google.internal")); + assert!(is_known_metadata_hostname("METADATA.GOOGLE.INTERNAL")); + assert!(is_known_metadata_hostname("metadata.google.internal.")); + } + + #[test] + fn is_known_metadata_hostname_rejects_other_hostnames() { + assert!(!is_known_metadata_hostname("api.github.com")); + assert!(!is_known_metadata_hostname("")); } - actions } diff --git a/crates/openshell-prover/src/report.rs b/crates/openshell-prover/src/report.rs index 27207a6ae..f250eb1cd 100644 --- a/crates/openshell-prover/src/report.rs +++ b/crates/openshell-prover/src/report.rs @@ -2,191 +2,122 @@ // SPDX-License-Identifier: Apache-2.0 //! Terminal report rendering (full and compact). +//! +//! The prover output is categorical, not severity-graded. Each finding +//! names *what* the policy change does (e.g., `capability_expansion`); +//! per-path evidence carries the structured detail. There is no HIGH / +//! MEDIUM / CRITICAL grade — the category itself is the signal. -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet}; use std::path::Path; use owo_colors::OwoColorize; -use crate::finding::{Finding, FindingPath, RiskLevel}; +use crate::finding::{Finding, FindingPath, category}; // --------------------------------------------------------------------------- -// Compact titles (short labels for each query type) +// Category labels (display strings keyed off `Finding.query`) // --------------------------------------------------------------------------- -fn compact_title(query: &str) -> &str { +fn category_label(query: &str) -> &str { match query { - "data_exfiltration" => "Data exfiltration possible", - "write_bypass" => "Write bypass \u{2014} read-only intent violated", - _ => "Unknown finding", + category::LINK_LOCAL_REACH => "link-local reach", + category::L7_BYPASS_CREDENTIALED => "L7-bypass binary with credential", + category::CREDENTIAL_REACH_EXPANSION => "credentialed reach expansion", + category::CAPABILITY_EXPANSION => "capability expansion on credentialed host", + _ => "unknown finding", } } // --------------------------------------------------------------------------- -// Compact detail line +// One-line shorthand (used by the gateway's `validation_result`) // --------------------------------------------------------------------------- -fn compact_detail(finding: &Finding) -> String { - match finding.query.as_str() { - "data_exfiltration" => { - let mut by_status: HashMap<&str, HashSet> = HashMap::new(); - for path in &finding.paths { - if let FindingPath::Exfil(p) = path { - by_status - .entry(&p.l7_status) - .or_default() - .insert(format!("{}:{}", p.endpoint_host, p.endpoint_port)); - } - } - let mut parts = Vec::new(); - if let Some(eps) = by_status.get("l4_only") { - let mut sorted: Vec<&String> = eps.iter().collect(); - sorted.sort(); - parts.push(format!( - "L4-only: {}", - sorted - .iter() - .map(|s| s.as_str()) - .collect::>() - .join(", ") - )); - } - if let Some(eps) = by_status.get("l7_bypassed") { - let mut sorted: Vec<&String> = eps.iter().collect(); - sorted.sort(); - parts.push(format!( - "wire protocol bypass: {}", - sorted - .iter() - .map(|s| s.as_str()) - .collect::>() - .join(", ") - )); - } - if let Some(eps) = by_status.get("l7_allows_write") { - let mut sorted: Vec<&String> = eps.iter().collect(); - sorted.sort(); - parts.push(format!( - "L7 write: {}", - sorted - .iter() - .map(|s| s.as_str()) - .collect::>() - .join(", ") - )); - } - parts.join("; ") - } - "write_bypass" => { - let mut reasons = HashSet::new(); - let mut endpoints = HashSet::new(); - for path in &finding.paths { - if let FindingPath::WriteBypass(p) = path { - reasons.insert(p.bypass_reason.as_str()); - endpoints.insert(format!("{}:{}", p.endpoint_host, p.endpoint_port)); - } - } - let mut sorted_eps: Vec<&String> = endpoints.iter().collect(); - sorted_eps.sort(); - let ep_list = sorted_eps - .iter() - .map(|s| s.as_str()) - .collect::>() - .join(", "); - if reasons.contains("l4_only") && reasons.contains("l7_bypass_protocol") { - format!("L4-only + wire protocol: {ep_list}") - } else if reasons.contains("l4_only") { - format!("L4-only (no inspection): {ep_list}") - } else if reasons.contains("l7_bypass_protocol") { - format!("wire protocol bypasses L7: {ep_list}") - } else { - String::new() - } - } - _ => String::new(), - } -} - -// --------------------------------------------------------------------------- -// Risk formatting -// --------------------------------------------------------------------------- - -fn risk_label(risk: RiskLevel) -> String { - match risk { - RiskLevel::Critical => "CRITICAL".to_owned(), - RiskLevel::High => "HIGH".to_owned(), +/// Render a finding as one or more single-line strings, suitable for +/// embedding in the gateway `validation_result`, demo output, and logs. +/// +/// Shape: `: ` — one line per path. The +/// gateway concatenates these into the chunk's `validation_result` so +/// the reviewer reads what changed without parsing the category enum. +pub fn finding_shorthand(finding: &Finding) -> String { + let mut lines = Vec::new(); + for path in &finding.paths { + let FindingPath::Exfil(p) = path; + lines.push(format_path_line(&finding.query, p)); } + lines.join("\n ") } -fn print_risk_label(risk: RiskLevel) { - match risk { - RiskLevel::Critical => print!("{}", "CRITICAL".bold().red()), - RiskLevel::High => print!("{}", " HIGH".red()), +fn format_path_line(query: &str, p: &crate::finding::ExfilPath) -> String { + let endpoint = format!("{}:{}", p.endpoint_host, p.endpoint_port); + match query { + category::LINK_LOCAL_REACH => { + format!("link_local_reach: {endpoint} via {}", p.binary) + } + category::L7_BYPASS_CREDENTIALED => { + format!("l7_bypass_credentialed: {endpoint} via {}", p.binary) + } + category::CREDENTIAL_REACH_EXPANSION => { + format!("credential_reach_expansion: {endpoint} via {}", p.binary) + } + category::CAPABILITY_EXPANSION => { + format!( + "capability_expansion: {method} on {endpoint} via {bin}", + method = p.method, + bin = p.binary + ) + } + _ => format!("{query}: {endpoint} via {}", p.binary), } } // --------------------------------------------------------------------------- -// Compact output +// Compact output (CLI lint mode) // --------------------------------------------------------------------------- -/// Render compact output (one-line-per-finding for demos and CI). -/// Returns exit code: 0 = pass, 1 = critical/high found. +/// Render compact output (one-line-per-finding-line for demos and CI). +/// Returns exit code: 0 = pass, 1 = any findings present. pub fn render_compact(findings: &[Finding], _policy_path: &str, _credentials_path: &str) -> i32 { let active: Vec<&Finding> = findings.iter().filter(|f| !f.accepted).collect(); let accepted: Vec<&Finding> = findings.iter().filter(|f| f.accepted).collect(); for finding in &active { - print!(" "); - print_risk_label(finding.risk); - println!(" {}", compact_title(&finding.query)); - let detail = compact_detail(finding); - if !detail.is_empty() { - println!(" {detail}"); + for path in &finding.paths { + let FindingPath::Exfil(p) = path; + println!(" {} {}", "•".yellow(), format_path_line(&finding.query, p)); + } + if !finding.paths.is_empty() { + println!(); } - println!(); } for finding in &accepted { println!( - " {} {}", + " {} {}", "ACCEPTED".dimmed(), - compact_title(&finding.query).dimmed() + category_label(&finding.query).dimmed() ); } if !accepted.is_empty() { println!(); } - // Verdict - let mut counts: HashMap = HashMap::new(); - for f in &active { - *counts.entry(f.risk).or_default() += 1; - } - let has_critical = counts.contains_key(&RiskLevel::Critical); - let has_high = counts.contains_key(&RiskLevel::High); let accepted_note = if accepted.is_empty() { String::new() } else { format!(", {} accepted", accepted.len()) }; - if has_critical || has_high { - let n = counts.get(&RiskLevel::Critical).unwrap_or(&0) - + counts.get(&RiskLevel::High).unwrap_or(&0); + let path_count: usize = active.iter().map(|f| f.paths.len()).sum(); + if path_count > 0 { println!( - " {} {n} critical/high gaps{accepted_note}", - " FAIL ".white().bold().on_red() + " {} {path_count} finding path(s) require review{accepted_note}", + " REVIEW ".black().bold().on_yellow() ); 1 - } else if !active.is_empty() { - println!( - " {} advisories only{accepted_note}", - " PASS ".black().bold().on_yellow() - ); - 0 } else { println!( - " {} all findings accepted{accepted_note}", + " {} no findings{accepted_note}", " PASS ".white().bold().on_green() ); 0 @@ -198,7 +129,7 @@ pub fn render_compact(findings: &[Finding], _policy_path: &str, _credentials_pat // --------------------------------------------------------------------------- /// Render a full terminal report with finding panels. -/// Returns exit code: 0 = pass, 1 = critical/high found. +/// Returns exit code: 0 = pass, 1 = any findings present. pub fn render_report(findings: &[Finding], policy_path: &str, credentials_path: &str) -> i32 { let policy_name = Path::new(policy_path) .file_name() @@ -221,50 +152,36 @@ pub fn render_report(findings: &[Finding], policy_path: &str, credentials_path: let active: Vec<&Finding> = findings.iter().filter(|f| !f.accepted).collect(); let accepted: Vec<&Finding> = findings.iter().filter(|f| f.accepted).collect(); - // Summary - let mut counts: HashMap = HashMap::new(); + // Per-category summary + let mut counts: BTreeMap<&str, usize> = BTreeMap::new(); for f in &active { - *counts.entry(f.risk).or_default() += 1; + *counts.entry(f.query.as_str()).or_default() += f.paths.len(); + } + + if active.is_empty() && accepted.is_empty() { + println!("{}", "No findings. Policy posture is clean.".green().bold()); + return 0; } println!("{}", "Finding Summary".bold().underline()); - for level in [RiskLevel::Critical, RiskLevel::High] { - if let Some(&count) = counts.get(&level) { - match level { - RiskLevel::Critical => { - println!(" {:>10} {count}", "CRITICAL".bold().red()); - } - RiskLevel::High => println!(" {:>10} {count}", "HIGH".red()), - } - } + for (query, count) in &counts { + println!(" {:>40} {count} path(s)", category_label(query).yellow()); } if !accepted.is_empty() { - println!(" {:>10} {}", "ACCEPTED".dimmed(), accepted.len()); + println!(" {:>40} {}", "ACCEPTED".dimmed(), accepted.len()); } println!(); - if active.is_empty() && accepted.is_empty() { - println!("{}", "No findings. Policy posture is clean.".green().bold()); - return 0; - } - - // Per-finding details for (i, finding) in active.iter().enumerate() { - let label = risk_label(finding.risk); - let border = match finding.risk { - RiskLevel::Critical => format!("{}", format!("[{label}]").bold().red()), - RiskLevel::High => format!("{}", format!("[{label}]").red()), - }; - - println!("--- Finding #{} {border} ---", i + 1); + println!( + "--- Finding #{} [{}] ---", + i + 1, + category_label(&finding.query) + ); println!(" {}", finding.title.bold()); println!(" {}", finding.description); println!(); - - // Render paths render_paths(&finding.paths); - - // Remediation if !finding.remediation.is_empty() { println!(" {}", "Remediation:".bold()); for r in &finding.remediation { @@ -274,13 +191,12 @@ pub fn render_report(findings: &[Finding], policy_path: &str, credentials_path: } } - // Accepted findings if !accepted.is_empty() { - println!("{}", "--- Accepted Risks ---".dimmed()); + println!("{}", "--- Accepted Findings ---".dimmed()); for finding in &accepted { println!( " {} {}", - risk_label(finding.risk).dimmed(), + category_label(&finding.query).dimmed(), finding.title.dimmed() ); println!( @@ -291,33 +207,20 @@ pub fn render_report(findings: &[Finding], policy_path: &str, credentials_path: } } - // Verdict - let has_critical = counts.contains_key(&RiskLevel::Critical); - let has_high = counts.contains_key(&RiskLevel::High); + let path_count: usize = active.iter().map(|f| f.paths.len()).sum(); let accepted_note = if accepted.is_empty() { String::new() } else { format!(" ({} accepted)", accepted.len()) }; - - if has_critical { - println!( - "{}{accepted_note}", - "FAIL \u{2014} Critical gaps found.".bold().red() - ); - 1 - } else if has_high { + if path_count > 0 { println!( "{}{accepted_note}", - "FAIL \u{2014} High-risk gaps found.".bold().red() + "REVIEW \u{2014} prover findings require human attention." + .bold() + .yellow() ); 1 - } else if !active.is_empty() { - println!( - "{}{accepted_note}", - "PASS \u{2014} Advisories only.".bold().yellow() - ); - 0 } else { println!( "{}{accepted_note}", @@ -331,63 +234,134 @@ fn render_paths(paths: &[FindingPath]) { if paths.is_empty() { return; } - - match &paths[0] { - FindingPath::Exfil(_) => render_exfil_paths(paths), - FindingPath::WriteBypass(_) => render_write_bypass_paths(paths), - } -} - -fn render_exfil_paths(paths: &[FindingPath]) { - println!( - " {:<30} {:<25} {:<15} {}", - "Binary".bold(), - "Endpoint".bold(), - "L7 Status".bold(), - "Mechanism".bold(), - ); + // Group paths by binary for compact display. + let mut by_binary: BTreeMap<&str, Vec<&crate::finding::ExfilPath>> = BTreeMap::new(); for path in paths { - if let FindingPath::Exfil(p) = path { - let l7_display = match p.l7_status.as_str() { - "l4_only" => format!("{}", "L4-only".red()), - "l7_bypassed" => format!("{}", "bypassed".red()), - "l7_allows_write" => format!("{}", "L7 write".yellow()), - _ => p.l7_status.clone(), - }; - let ep = format!("{}:{}", p.endpoint_host, p.endpoint_port); - // Truncate mechanism for display - let mech = if p.mechanism.len() > 50 { - format!("{}...", &p.mechanism[..47]) - } else { - p.mechanism.clone() - }; - println!(" {:<30} {:<25} {:<15} {}", p.binary, ep, l7_display, mech); + let FindingPath::Exfil(p) = path; + by_binary.entry(&p.binary).or_default().push(p); + } + for (binary, ps) in &by_binary { + println!(" Binary: {}", binary.cyan()); + let mut endpoints: BTreeSet = BTreeSet::new(); + let mut methods: BTreeSet = BTreeSet::new(); + for p in ps { + endpoints.insert(format!("{}:{}", p.endpoint_host, p.endpoint_port)); + if !p.method.is_empty() { + methods.insert(p.method.clone()); + } + } + println!( + " Endpoints: {}", + endpoints.iter().cloned().collect::>().join(", ") + ); + if !methods.is_empty() { + println!( + " Methods: {}", + methods.iter().cloned().collect::>().join(", ") + ); } } println!(); } -fn render_write_bypass_paths(paths: &[FindingPath]) { - println!( - " {:<30} {:<25} {:<15} {}", - "Binary".bold(), - "Endpoint".bold(), - "Bypass".bold(), - "Intent".bold(), - ); - for path in paths { - if let FindingPath::WriteBypass(p) = path { - let ep = format!("{}:{}", p.endpoint_host, p.endpoint_port); - let bypass_display = match p.bypass_reason.as_str() { - "l4_only" => format!("{}", "L4-only".red()), - "l7_bypass_protocol" => format!("{}", "wire proto".red()), - _ => p.bypass_reason.clone(), - }; - println!( - " {:<30} {:<25} {:<15} {}", - p.binary, ep, bypass_display, p.policy_intent - ); +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::finding::ExfilPath; + + fn exfil_path(category_name: &str, method: &str, host: &str, port: u16) -> ExfilPath { + ExfilPath { + binary: "/usr/bin/curl".to_owned(), + endpoint_host: host.to_owned(), + endpoint_port: port, + mechanism: String::new(), + policy_name: "rule".to_owned(), + category: category_name.to_owned(), + method: method.to_owned(), } } - println!(); + + fn finding_with(category_name: &str, paths: Vec) -> Finding { + Finding { + query: category_name.to_owned(), + title: "test".to_owned(), + description: String::new(), + paths: paths.into_iter().map(FindingPath::Exfil).collect(), + remediation: vec![], + accepted: false, + accepted_reason: String::new(), + } + } + + #[test] + fn shorthand_renders_capability_expansion_with_method() { + let f = finding_with( + category::CAPABILITY_EXPANSION, + vec![exfil_path( + category::CAPABILITY_EXPANSION, + "PUT", + "api.github.com", + 443, + )], + ); + assert_eq!( + finding_shorthand(&f), + "capability_expansion: PUT on api.github.com:443 via /usr/bin/curl" + ); + } + + #[test] + fn shorthand_renders_credential_reach_expansion() { + let f = finding_with( + category::CREDENTIAL_REACH_EXPANSION, + vec![exfil_path( + category::CREDENTIAL_REACH_EXPANSION, + "", + "uploads.github.com", + 443, + )], + ); + assert_eq!( + finding_shorthand(&f), + "credential_reach_expansion: uploads.github.com:443 via /usr/bin/curl" + ); + } + + #[test] + fn shorthand_renders_link_local() { + let f = finding_with( + category::LINK_LOCAL_REACH, + vec![exfil_path( + category::LINK_LOCAL_REACH, + "", + "169.254.169.254", + 80, + )], + ); + assert_eq!( + finding_shorthand(&f), + "link_local_reach: 169.254.169.254:80 via /usr/bin/curl" + ); + } + + #[test] + fn shorthand_renders_l7_bypass() { + let f = finding_with( + category::L7_BYPASS_CREDENTIALED, + vec![exfil_path( + category::L7_BYPASS_CREDENTIALED, + "", + "github.com", + 443, + )], + ); + assert_eq!( + finding_shorthand(&f), + "l7_bypass_credentialed: github.com:443 via /usr/bin/curl" + ); + } } diff --git a/crates/openshell-sandbox/src/l7/rest.rs b/crates/openshell-sandbox/src/l7/rest.rs index c513499f4..20d52459c 100644 --- a/crates/openshell-sandbox/src/l7/rest.rs +++ b/crates/openshell-sandbox/src/l7/rest.rs @@ -1317,6 +1317,9 @@ fn deny_response_body( "next_steps".to_string(), crate::policy_local::agent_next_steps(), ); + if let Some(guidance) = crate::policy_local::agent_guidance() { + body.insert("agent_guidance".to_string(), serde_json::json!(guidance)); + } serde_json::Value::Object(body) } @@ -2333,12 +2336,49 @@ mod tests { "/etc/openshell/skills/policy_advisor.md" ); assert_eq!(body["next_steps"][3]["body_type"], "PolicyMergeOperation"); + let guidance = body["agent_guidance"] + .as_str() + .expect("agent_guidance is present when proposals are enabled"); + assert!(guidance.contains("do not stop")); + assert!(guidance.contains("/etc/openshell/skills/policy_advisor.md")); + assert!(guidance.contains("http://policy.local/v1/proposals")); assert!( !body.to_string().contains("secret-token"), "deny body must not leak query params or credential values" ); } + #[test] + fn deny_response_body_omits_agent_guidance_when_policy_advisor_is_off() { + let _proposals = crate::test_helpers::ProposalsFlagGuard::set_blocking(false); + let req = L7Request { + action: "GET".to_string(), + target: "/gists".to_string(), + query_params: HashMap::new(), + raw_header: Vec::new(), + body_length: BodyLength::None, + }; + + let body = deny_response_body( + &req, + "github-readonly", + "no matching L7 allow rule", + None, + Some(DenyResponseContext { + host: Some("api.github.com"), + port: Some(443), + binary: Some("/usr/bin/gh"), + }), + ); + + assert_eq!(body["error"], "policy_denied"); + assert_eq!(body["next_steps"], serde_json::json!([])); + assert!( + body.get("agent_guidance").is_none(), + "agent_guidance must only be present when the policy advisor is enabled" + ); + } + #[tokio::test] async fn send_deny_response_writes_structured_json_403() { // Agent-readable next_steps is gated on the proposals feature flag. @@ -2384,6 +2424,7 @@ mod tests { assert_eq!(body["path"], "/user/repos"); assert_eq!(body["rule_missing"]["host"], "api.github.com"); assert_eq!(body["next_steps"][2]["action"], "inspect_recent_denials"); + assert!(body["agent_guidance"].as_str().unwrap().contains("retry")); } #[test] diff --git a/crates/openshell-sandbox/src/mechanistic_mapper.rs b/crates/openshell-sandbox/src/mechanistic_mapper.rs index 521c882a0..50cb0b040 100644 --- a/crates/openshell-sandbox/src/mechanistic_mapper.rs +++ b/crates/openshell-sandbox/src/mechanistic_mapper.rs @@ -12,7 +12,7 @@ //! The LLM-powered `PolicyAdvisor` (issue #205) wraps and enriches these //! mechanistic proposals with context-aware rationale and smarter grouping. -use openshell_core::net::is_always_blocked_ip; +use openshell_core::net::{is_always_blocked_ip, is_known_metadata_hostname}; use openshell_core::proto::{ DenialSummary, L7Allow, L7Rule, NetworkBinary, NetworkEndpoint, NetworkPolicyRule, PolicyChunk, }; @@ -106,15 +106,15 @@ pub fn generate_proposals(summaries: &[DenialSummary]) -> Vec { } // Skip proposals for always-blocked destinations (loopback, - // link-local, unspecified). These would be denied at runtime by the - // proxy's is_always_blocked_ip check regardless of policy, producing - // an infinite proposal loop in the TUI. + // link-local, unspecified, and known metadata hostnames). These would + // be denied at runtime regardless of policy, producing an infinite + // proposal loop in the TUI. if is_always_blocked_destination(host) { tracing::info!( host, port, "Skipped proposal for always-blocked destination \ - (SSRF hardening — loopback/link-local/unspecified)" + (SSRF hardening — loopback/link-local/unspecified/metadata)" ); continue; } @@ -416,7 +416,7 @@ fn short_binary_name(path: &str) -> String { /// Check if a destination host is always-blocked. /// /// For literal IP hosts, checks against [`is_always_blocked_ip`]. -/// For hostnames like "localhost", checks well-known loopback names. +/// For hostnames, checks well-known loopback and cloud metadata names. /// For other hostnames, returns false (DNS may resolve to anything). fn is_always_blocked_destination(host: &str) -> bool { // Check literal IP addresses @@ -425,7 +425,7 @@ fn is_always_blocked_destination(host: &str) -> bool { } // Check well-known loopback hostnames let host_lc = host.to_lowercase(); - host_lc == "localhost" || host_lc == "localhost." + host_lc == "localhost" || host_lc == "localhost." || is_known_metadata_hostname(host) } #[cfg(test)] @@ -598,6 +598,12 @@ mod tests { assert!(is_always_blocked_destination("LOCALHOST")); } + #[test] + fn test_always_blocked_destination_known_metadata_hostname() { + assert!(is_always_blocked_destination("metadata.google.internal")); + assert!(is_always_blocked_destination("METADATA.GOOGLE.INTERNAL.")); + } + #[test] fn test_always_blocked_destination_allows_rfc1918() { assert!(!is_always_blocked_destination("10.0.5.20")); @@ -651,6 +657,26 @@ mod tests { ); } + #[test] + fn test_generate_proposals_skips_known_metadata_hostname() { + let summaries = vec![DenialSummary { + host: "metadata.google.internal".to_string(), + port: 80, + binary: "/usr/bin/curl".to_string(), + count: 5, + first_seen_ms: 1000, + last_seen_ms: 2000, + denial_stage: "ssrf".to_string(), + ..Default::default() + }]; + + let proposals = generate_proposals(&summaries); + assert!( + proposals.is_empty(), + "should skip proposals for metadata hostname: {proposals:?}" + ); + } + #[test] fn test_generate_proposals_skips_localhost_hostname() { let summaries = vec![DenialSummary { diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs index 657fd760f..aca07596c 100644 --- a/crates/openshell-sandbox/src/policy_local.rs +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -24,6 +24,10 @@ pub const POLICY_LOCAL_HOST: &str = "policy.local"; /// renders the same path. Changing the location is a one-line update here. pub const SKILL_PATH: &str = "/etc/openshell/skills/policy_advisor.md"; +/// Human-readable guidance for agents that are more likely to follow plain +/// instructions than structured next-step JSON alone. +pub const AGENT_GUIDANCE: &str = "OpenShell blocked this request with sandbox policy. If the user's task still needs this network action, do not stop here: read /etc/openshell/skills/policy_advisor.md, submit the narrowest policy proposal to http://policy.local/v1/proposals, wait for approval and `policy_reloaded: true`, then retry the original request."; + /// Routes served by the in-sandbox policy advisor API. Held in one place so /// the L7 deny `next_steps` array, the route dispatcher, the skill content, /// and tests all stay in sync — change the wire path here and every caller @@ -242,6 +246,12 @@ pub fn agent_next_steps() -> serde_json::Value { ]) } +/// Build the optional natural-language guidance embedded in L7 deny bodies. +#[must_use] +pub fn agent_guidance() -> Option<&'static str> { + crate::agent_proposals_enabled().then_some(AGENT_GUIDANCE) +} + async fn current_policy_response(ctx: &PolicyLocalContext) -> (u16, serde_json::Value) { let Some(policy) = ctx.current_policy.read().await.clone() else { return ( @@ -1567,6 +1577,22 @@ mod tests { assert!(actions.contains(&"submit_proposal")); } + #[test] + fn agent_guidance_is_absent_when_flag_off() { + let _guard = ProposalsFlagGuard::set_blocking(false); + assert!(agent_guidance().is_none()); + } + + #[test] + fn agent_guidance_points_to_policy_advisor_when_flag_on() { + let _guard = ProposalsFlagGuard::set_blocking(true); + let guidance = agent_guidance().expect("guidance when proposals are enabled"); + assert!(guidance.contains("do not stop")); + assert!(guidance.contains("/etc/openshell/skills/policy_advisor.md")); + assert!(guidance.contains("http://policy.local/v1/proposals")); + assert!(guidance.contains("policy_reloaded: true")); + } + #[tokio::test] async fn route_request_returns_feature_disabled_when_flag_off() { let _guard = ProposalsFlagGuard::set(false).await; diff --git a/crates/openshell-sandbox/src/process.rs b/crates/openshell-sandbox/src/process.rs index c770526be..76786a84d 100644 --- a/crates/openshell-sandbox/src/process.rs +++ b/crates/openshell-sandbox/src/process.rs @@ -21,8 +21,6 @@ use std::path::PathBuf; use std::process::Stdio; use tokio::process::{Child, Command}; use tracing::debug; -#[cfg(target_os = "linux")] -use tracing::warn; fn inject_provider_env(cmd: &mut Command, provider_env: &HashMap) { for (key, value) in provider_env { @@ -91,7 +89,7 @@ fn check_runtime_pid_limit_status( if matches!(mode, RuntimePidLimitMode::Require) { Err(miette::miette!(message)) } else { - warn!("{message}"); + tracing::warn!("{message}"); Ok(()) } } @@ -102,7 +100,7 @@ fn check_runtime_pid_limit_status( if matches!(mode, RuntimePidLimitMode::Require) { Err(miette::miette!(message)) } else { - warn!("{message}"); + tracing::warn!("{message}"); Ok(()) } } diff --git a/crates/openshell-sandbox/src/skills.rs b/crates/openshell-sandbox/src/skills.rs index d29d56247..13e1da025 100644 --- a/crates/openshell-sandbox/src/skills.rs +++ b/crates/openshell-sandbox/src/skills.rs @@ -8,11 +8,21 @@ use std::path::{Path, PathBuf}; const SKILLS_RELATIVE_DIR: &str = "etc/openshell/skills"; const POLICY_ADVISOR_FILE: &str = "policy_advisor.md"; +const POLICY_ADVISOR_SKILL_DIR: &str = "policy-advisor"; +const POLICY_ADVISOR_SKILL_FILE: &str = "SKILL.md"; const POLICY_ADVISOR_CONTENT: &str = include_str!("skills/policy_advisor.md"); +const POLICY_ADVISOR_SKILL_CONTENT: &str = include_str!("skills/policy-advisor/SKILL.md"); +const AGENTS_FILE: &str = "AGENTS.md"; +const AGENTS_CONTENT: &str = r"# OpenShell Sandbox Guidance + +When you see `policy_denied`, do not treat it as final if the user task still needs that request. Read `/etc/openshell/skills/policy_advisor.md` and use `http://policy.local` to submit the narrowest policy proposal, then wait for approval before retrying. +"; #[derive(Debug, Clone, PartialEq, Eq)] pub struct InstalledSkills { pub policy_advisor: PathBuf, + pub policy_advisor_skill: PathBuf, + pub agents: Option, } pub fn install_static_skills() -> Result { @@ -24,17 +34,43 @@ fn install_static_skills_at(root: &Path) -> Result { std::fs::create_dir_all(&skills_dir).into_diagnostic()?; let policy_advisor = skills_dir.join(POLICY_ADVISOR_FILE); - std::fs::write(&policy_advisor, POLICY_ADVISOR_CONTENT).into_diagnostic()?; + write_readonly(&policy_advisor, POLICY_ADVISOR_CONTENT)?; + + let policy_advisor_skill_dir = skills_dir.join(POLICY_ADVISOR_SKILL_DIR); + std::fs::create_dir_all(&policy_advisor_skill_dir).into_diagnostic()?; + let policy_advisor_skill = policy_advisor_skill_dir.join(POLICY_ADVISOR_SKILL_FILE); + write_readonly(&policy_advisor_skill, POLICY_ADVISOR_SKILL_CONTENT)?; + + let agents = install_optional_agents_pointer(root); + + Ok(InstalledSkills { + policy_advisor, + policy_advisor_skill, + agents, + }) +} + +fn write_readonly(path: &Path, contents: &str) -> Result<()> { + std::fs::write(path, contents).into_diagnostic()?; #[cfg(unix)] { use std::os::unix::fs::PermissionsExt as _; - std::fs::set_permissions(&policy_advisor, std::fs::Permissions::from_mode(0o444)) - .into_diagnostic()?; + std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o444)).into_diagnostic()?; } + Ok(()) +} - Ok(InstalledSkills { policy_advisor }) +fn install_optional_agents_pointer(root: &Path) -> Option { + let agents_path = root.join(AGENTS_FILE); + match std::fs::symlink_metadata(&agents_path) { + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + write_readonly(&agents_path, AGENTS_CONTENT).ok()?; + Some(agents_path) + } + Ok(_) | Err(_) => None, + } } #[cfg(test)] @@ -55,7 +91,7 @@ mod tests { .join("policy_advisor.md"); assert_eq!(installed.policy_advisor, expected); - let content = std::fs::read_to_string(expected).unwrap(); + let content = std::fs::read_to_string(&expected).unwrap(); assert!(content.contains("# OpenShell Policy Advisor")); assert!(content.contains("policy.local")); assert!(content.contains("addRule")); @@ -71,5 +107,56 @@ mod tests { // and re-runs into policy_denied. assert!(content.contains("`policy_reloaded: true`")); assert!(content.contains("`policy_reloaded: false`")); + + let skill_file = dir + .path() + .join("etc") + .join("openshell") + .join("skills") + .join("policy-advisor") + .join("SKILL.md"); + assert_eq!(installed.policy_advisor_skill, skill_file); + let skill_content = std::fs::read_to_string(&skill_file).unwrap(); + assert!(skill_content.contains("policy_denied")); + assert!(skill_content.contains("policy.local")); + assert!(skill_content.contains("/etc/openshell/skills/policy_advisor.md")); + + let agents = installed.agents.expect("AGENTS.md should be installed"); + assert_eq!(agents, dir.path().join("AGENTS.md")); + let agents_content = std::fs::read_to_string(agents).unwrap(); + assert!(agents_content.contains("policy_denied")); + assert!(agents_content.contains("policy.local")); + } + + #[test] + fn install_static_skills_at_does_not_overwrite_existing_agents_file() { + let dir = tempfile::tempdir().unwrap(); + let agents = dir.path().join("AGENTS.md"); + std::fs::write(&agents, "keep me").unwrap(); + + let installed = install_static_skills_at(dir.path()).unwrap(); + + assert_eq!(installed.agents, None); + assert_eq!(std::fs::read_to_string(agents).unwrap(), "keep me"); + } + + #[cfg(unix)] + #[test] + fn install_static_skills_at_treats_broken_agents_symlink_as_existing() { + use std::os::unix::fs::symlink; + + let dir = tempfile::tempdir().unwrap(); + let agents = dir.path().join("AGENTS.md"); + symlink(dir.path().join("missing-target"), &agents).unwrap(); + + let installed = install_static_skills_at(dir.path()).unwrap(); + + assert_eq!(installed.agents, None); + assert!( + std::fs::symlink_metadata(agents) + .unwrap() + .file_type() + .is_symlink() + ); } } diff --git a/crates/openshell-sandbox/src/skills/policy-advisor/SKILL.md b/crates/openshell-sandbox/src/skills/policy-advisor/SKILL.md new file mode 100644 index 000000000..f05ba6a0b --- /dev/null +++ b/crates/openshell-sandbox/src/skills/policy-advisor/SKILL.md @@ -0,0 +1,8 @@ +--- +name: openshell-policy-advisor +description: Use when an OpenShell sandbox returns policy_denied, mentions policy.local, or needs a narrow network policy proposal. +--- + +# OpenShell Policy Advisor + +When a request fails with `policy_denied`, do not treat the denial as final if the user task still needs the request. Read `/etc/openshell/skills/policy_advisor.md`, inspect `http://policy.local/v1/policy/current`, submit the narrowest proposal to `http://policy.local/v1/proposals`, wait on `/v1/proposals/{chunk_id}/wait?timeout=300`, and retry only after approval with `policy_reloaded: true`. diff --git a/crates/openshell-sandbox/src/skills/policy_advisor.md b/crates/openshell-sandbox/src/skills/policy_advisor.md index 8ca64f977..4b7074f6d 100644 --- a/crates/openshell-sandbox/src/skills/policy_advisor.md +++ b/crates/openshell-sandbox/src/skills/policy_advisor.md @@ -46,8 +46,14 @@ operations. Each `addRule` carries a complete narrow `NetworkPolicyRule`. `port`, `binary`, `rule_missing`, and `detail` as evidence. 2. Fetch the current policy from `/v1/policy/current`. 3. Fetch recent denials from `/v1/denials` if the response body is incomplete. -4. Prefer L7 REST rules for REST APIs. Use L4 only for non-REST protocols or - when the client tunnels opaque traffic that OpenShell cannot inspect. +4. Prefer L7 REST rules for REST APIs. **Proposals against hosts where no + credential is in scope auto-approve** (see Auto-approval below). Any + credentialed reach or capability change goes to human review — that is + the design. L7 is still the agent-speed path because the prover can + precisely describe the change (which method was added on which path); + L4 to a credentialed host loses that precision. Use L4 only when the + binary's wire protocol is opaque to L7 inspection (`ssh`, `nc`, + `git-remote-http`) or the host has no documented REST surface. 5. Draft the narrowest rule: exact host, exact port, exact binary when known, exact method, and the smallest safe path. 6. Submit the proposal, save `accepted_chunk_ids` from the response, and @@ -119,10 +125,88 @@ A complete narrow REST-inspected rule looks like this: } ``` +## Auto-approval + +Auto-approval is opt-in via the `proposal_approval_mode` setting, +managed through the standard settings model. Reviewers set it at the +gateway scope (fleet-wide) with `openshell settings set --global +proposal_approval_mode auto` or at the sandbox scope with `openshell +settings set proposal_approval_mode auto`. The CLI's `openshell +sandbox create --approval-mode auto` is a shorthand that writes the +sandbox-scoped setting at create time. Gateway scope wins when both are +set; the default (no setting) is `"manual"`. + +When auto-approval is enabled and the prover finds nothing new, the +gateway approves the chunk with actor `system:auto` and the +`CONFIG:APPROVED` audit event carries `auto=true`, `source=`, +`prover_delta=empty`, and `resolved_from=`. The +agent's `/wait` returns approved in ~1 second. When the prover does +find something — or the setting is `"manual"`/unset — the chunk lands +in `pending` for human review. + +The prover answers four formal questions about each proposed change. +Each "yes" answer is its own categorical finding — there is no +severity grade. Any finding blocks auto-approval. + +- **`link_local_reach`** — the proposal grants reach to a link-local IP + range (`169.254.0.0/16`, `fe80::/10`) or a known metadata hostname + such as `metadata.google.internal`. Cloud metadata endpoints like + `169.254.169.254` live here. **Never** propose access to these — + these endpoints serve credentials regardless of what the sandbox + itself holds. +- **`l7_bypass_credentialed`** — the proposal lets a binary using a + wire protocol the L7 proxy cannot inspect (`/usr/bin/git`, + `/usr/lib/git-core/git-remote-http`, `/usr/bin/ssh`, `/usr/bin/nc`) + reach a host where a sandbox credential is in scope. Wire protocols + opaque to L7 are unbounded by L7 scoping; the reviewer must decide + whether to trust the binary with the credential. +- **`credential_reach_expansion`** — the proposal grants a binary + credentialed reach to a (host, port) it could not reach before. New + authenticated reach is a stated intent change — the reviewer + confirms whether the binary should be able to authenticate to the + host at all. +- **`capability_expansion`** — the proposal adds a new HTTP method on + a (binary, host, port) that already had credentialed reach. The + reviewer sees exactly which method was added and decides if it's + part of the agent's task. Mutating methods (PUT, POST, PATCH, + DELETE) are typical sources of this finding. + +What auto-approves (under `auto` mode): + +- Proposals where the prover finds zero of the four categories — for + example, L7 rules against hosts with no credential in scope + (public-content fetches from CDNs, schema URLs, public API + discovery). + +If your proposal escalates and you'd like it to auto-approve, look +first at whether the host actually needs a credentialed binary. A +public-content GET often doesn't, and switching to a different host +(or removing the credential dependency) makes the finding go away. +Credentialed mutations are *supposed* to escalate — propose the +narrow rule and wait for review. + +## Refining an earlier auto-suggested rule + +When the sandbox observes a denial it cannot scope to L7 — e.g., a binary +trying to connect to a host the proxy hasn't seen at the application layer +— it auto-drafts a broad L4 proposal so the operator has something concrete +to look at. These mechanistic drafts are visible to you alongside any other +pending proposals. + +If you see a pending mechanistic L4 draft you can do better than, just +submit a refined L7 proposal for the same `(host, port, binary)`. The +gateway will automatically reject the mechanistic draft with reason +"superseded by chunk X" — no extra cleanup or `supersedes_chunk_id` needed. +The new submission wins by structural overlap. + ## Norms - Do not propose wildcard hosts such as `**` or `*.com`. - Do not propose `access: full` to fix a single denied REST request. +- Do not propose access to link-local addresses (`169.254.0.0/16`, + `fe80::/10`) or known metadata hostnames such as + `metadata.google.internal`. Cloud-metadata endpoints there can hand out + the host's credentials. - Do not include query strings, tokens, credentials, or secret values in paths. - Explain uncertainty in `intent_summary` instead of widening the rule. diff --git a/crates/openshell-server/Cargo.toml b/crates/openshell-server/Cargo.toml index ef58ae17b..8bc91a5c6 100644 --- a/crates/openshell-server/Cargo.toml +++ b/crates/openshell-server/Cargo.toml @@ -22,6 +22,7 @@ openshell-driver-kubernetes = { path = "../openshell-driver-kubernetes" } openshell-driver-podman = { path = "../openshell-driver-podman" } openshell-ocsf = { path = "../openshell-ocsf" } openshell-policy = { path = "../openshell-policy" } +openshell-prover = { path = "../openshell-prover" } openshell-providers = { path = "../openshell-providers" } openshell-router = { path = "../openshell-router" } openshell-server-macros = { path = "../openshell-server-macros" } diff --git a/crates/openshell-server/src/grpc/policy.rs b/crates/openshell-server/src/grpc/policy.rs index 2abc670fd..38268fcd0 100644 --- a/crates/openshell-server/src/grpc/policy.rs +++ b/crates/openshell-server/src/grpc/policy.rs @@ -46,11 +46,21 @@ use openshell_ocsf::{ }; use openshell_policy::{ PolicyMergeOp, ProviderPolicyLayer, compose_effective_policy, merge_policy, + serialize_sandbox_policy, +}; +use openshell_prover::{ + credentials::{Credential, CredentialSet}, + finding::{Finding, FindingPath}, + model::build_model, + policy::parse_policy_str, + queries::run_all_queries, + registry::load_embedded_binary_registry, + report::finding_shorthand, }; use openshell_providers::{get_default_profile, normalize_provider_type}; use prost::Message; use sha2::{Digest, Sha256}; -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::net::{IpAddr, Ipv4Addr}; use std::sync::Arc; use tonic::{Request, Response, Status}; @@ -92,6 +102,45 @@ fn emit_gateway_policy_audit_log( detail, version, policy_hash, + &[], + ); + info!( + target: OCSF_TARGET, + sandbox_id = %sandbox_id, + message = %message + ); +} + +/// Emit a `CONFIG:APPROVED` audit event for an auto-approval — same event +/// class as a human approval, with extra unmapped fields carrying the +/// safety reasoning so the audit is reconstructable. `source` records the +/// proposer (`mechanistic` or `agent_authored`) for provenance. +/// `resolved_from` records the scope that supplied the `auto` mode setting +/// (`gateway`, `sandbox`, or `default`) so operators can see why a given +/// approval was auto vs manual. +fn emit_gateway_policy_auto_approve_audit_log( + sandbox_id: &str, + sandbox_name: &str, + detail: impl Into, + version: i64, + policy_hash: &str, + source: &str, + resolved_from: &str, +) { + let extra = [ + ("auto", "true".to_string()), + ("source", source.to_string()), + ("prover_delta", "empty".to_string()), + ("resolved_from", resolved_from.to_string()), + ]; + let message = build_gateway_policy_audit_message( + sandbox_id, + sandbox_name, + "approved", + detail, + version, + policy_hash, + &extra, ); info!( target: OCSF_TARGET, @@ -107,6 +156,7 @@ fn build_gateway_policy_audit_message( detail: impl Into, version: i64, policy_hash: &str, + extra_fields: &[(&str, String)], ) -> String { let ctx = SandboxContext { sandbox_id: sandbox_id.to_string(), @@ -128,6 +178,9 @@ fn build_gateway_policy_audit_message( if !policy_hash.is_empty() { builder = builder.unmapped("policy_hash", policy_hash.to_string()); } + for (key, value) in extra_fields { + builder = builder.unmapped(key, value.clone()); + } let event: OcsfEvent = builder.build(); event.format_shorthand() } @@ -305,358 +358,682 @@ fn summarize_draft_chunk_rule(chunk: &DraftChunkRecord) -> Result String { - let mut chars = input.chars(); - let truncated: String = chars.by_ref().take(max_chars).collect(); - if chars.next().is_some() { - format!("{truncated}...") - } else { - truncated +/// Run prover queries against the merged policy and render a short +/// human-readable verdict for the reviewer. The verdict reports only the +/// **delta** — findings the proposal introduces on top of the current policy. +/// Baseline gaps (pre-existing findings) are intentionally not surfaced here; +/// they belong on a posture surface, not on the per-proposal approval moment. +/// +/// The string is the entire output — no taxonomy, no greppable prefixes; the +/// reviewer reads it like an OCSF shorthand line. One of: +/// +/// - `prover: no new findings` +/// - `prover: N new finding(s)` followed by one ` : ` +/// line per finding path (categorical shorthand from `openshell-prover`) +/// - `merge failed: ` — proposal won't merge into the current +/// policy +/// - `policy invalid: ` — merged policy fails the cheap +/// structural safety check +/// - `validation unavailable` — gateway-side infrastructure failure (registry +/// load, YAML serialize/parse). Internal error detail is logged via +/// `warn!`, never exposed to the reviewer. +fn validation_result_for_agent_proposal( + current_policy: ProtoSandboxPolicy, + rule_name: &str, + proposed_rule: &NetworkPolicyRule, + credentials: &CredentialSet, +) -> String { + let merge_op = PolicyMergeOp::AddRule { + rule_name: rule_name.to_string(), + rule: proposed_rule.clone(), + }; + let merged = match merge_policy(current_policy.clone(), &[merge_op]) { + Ok(result) => result.policy, + Err(error) => return format!("merge failed: {}", one_line(&error.to_string())), + }; + if let Err(error) = validate_policy_safety(&merged) { + return format!("policy invalid: {}", one_line(&error.to_string())); } -} -#[cfg(test)] -fn is_sandbox_caller(request: &Request) -> bool { - matches!( - request.extensions().get::(), - Some(Principal::Sandbox(_)) - ) -} + let merged_findings = match run_prover_findings(&merged, credentials) { + Ok(findings) => findings, + Err(error) => { + warn!(error = %error, "prover validation unavailable for merged policy"); + return "validation unavailable".to_string(); + } + }; + // If the baseline prover run fails (e.g. the current policy uses a shape + // the prover hasn't caught up to yet), fall back to an empty baseline so + // every merged finding surfaces as new. Safer to over-warn than miss a + // real regression introduced by the proposal. + let base_findings = match run_prover_findings(¤t_policy, credentials) { + Ok(findings) => findings, + Err(error) => { + warn!(error = %error, "prover baseline run failed; treating baseline as empty"); + Vec::new() + } + }; -/// Sandbox-class callers may only perform sandbox-scoped policy sync. They -/// must not mutate global config or sandbox settings. -fn validate_sandbox_caller_update(req: &UpdateConfigRequest) -> Result<(), Status> { - if req.global { - return Err(Status::permission_denied( - "sandbox callers cannot mutate global config", - )); - } - if req.delete_setting { - return Err(Status::permission_denied( - "sandbox callers cannot delete settings", - )); - } - if req.name.trim().is_empty() { - return Err(Status::permission_denied( - "sandbox callers may only perform sandbox policy sync", - )); + let new_findings = finding_delta(&base_findings, &merged_findings); + if new_findings.is_empty() { + return "prover: no new findings".to_string(); } - if req.policy.is_none() || !req.setting_key.trim().is_empty() { - return Err(Status::permission_denied( - "sandbox callers may only perform sandbox policy sync", - )); + let count = new_findings.len(); + let mut out = format!( + "prover: {} new finding{}", + count, + if count == 1 { "" } else { "s" } + ); + for finding in &new_findings { + out.push_str("\n "); + out.push_str(&finding_shorthand(finding)); } - Ok(()) + out } -async fn resolve_sandbox_by_name_for_principal( +/// Run the prover end-to-end against a single policy with the given +/// credential set. Returns the raw finding list, or a short error string +/// identifying which infrastructure step failed. +/// +/// The credential set is passed in because it's stable across all chunks in +/// one `SubmitPolicyAnalysis` batch — the caller builds it once and shares. +fn run_prover_findings( + policy: &ProtoSandboxPolicy, + credentials: &CredentialSet, +) -> Result, String> { + let yaml = + serialize_sandbox_policy(policy).map_err(|e| format!("serialize policy failed: {e}"))?; + let prover_policy = parse_policy_str(&yaml).map_err(|e| format!("parse policy failed: {e}"))?; + let registry = + load_embedded_binary_registry().map_err(|e| format!("load registry failed: {e}"))?; + let model = build_model(prover_policy, credentials.clone(), registry); + Ok(run_all_queries(&model)) +} + +/// Build a `CredentialSet` for the sandbox by walking its attached providers. +/// +/// v1 models "credential is present in scope for these hosts" — no scope +/// modeling. Each attached provider produces one [`Credential`] entry whose +/// `target_hosts` lists the hosts from the provider's profile endpoints. +/// Missing providers or providers whose type has no profile are skipped with +/// a `warn!` — the merged policy already excludes them at compose time, so +/// silently treating them as absent here keeps the credential set consistent +/// with the merged policy the prover validates against. +async fn build_credential_set_for_sandbox( store: &Store, - principal: &Principal, - name: &str, -) -> Result { - let sandbox = store - .get_message_by_name::(name) - .await - .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))?; + provider_names: &[String], +) -> Result { + let mut credentials = Vec::new(); - match principal { - Principal::Sandbox(_) => { - let Some(sandbox) = sandbox else { - return Err(Status::permission_denied( - "sandbox not found or not owned by caller", - )); + for name in provider_names { + let Some(provider) = store + .get_message_by_name::(name) + .await + .map_err(|e| Status::internal(format!("failed to fetch provider '{name}': {e}")))? + else { + warn!(provider_name = %name, "provider not found while building credential set; skipping"); + continue; + }; + + let provider_type = provider.r#type.trim(); + let profile = if let Some(canonical_type) = normalize_provider_type(provider_type) { + let Some(profile) = get_default_profile(canonical_type) else { + warn!( + provider_name = %name, + provider_type, + "legacy provider type has no profile; skipping credential entry" + ); + continue; }; - crate::auth::guard::ensure_sandbox_scope(principal, sandbox.object_id()).map_err( - |status| { - if status.code() == tonic::Code::PermissionDenied { - Status::permission_denied("sandbox not found or not owned by caller") - } else { - status - } - }, - )?; - Ok(sandbox) + profile.clone() + } else { + let Some(profile) = + super::provider::get_provider_type_profile(store, provider_type).await? + else { + warn!( + provider_name = %name, + provider_type, + "provider type has no profile; skipping credential entry" + ); + continue; + }; + profile + }; + + let target_hosts: Vec = profile + .endpoints + .iter() + .map(|ep| ep.host.to_lowercase()) + .filter(|h| !h.is_empty()) + .collect(); + + if target_hosts.is_empty() { + continue; } - Principal::User(_) => sandbox.ok_or_else(|| Status::not_found("sandbox not found")), - Principal::Anonymous => Err(Status::unauthenticated( - "sandbox-scoped methods require an authenticated caller", - )), + + credentials.push(Credential { + name: name.clone(), + cred_type: provider_type.to_string(), + scopes: Vec::new(), + injected_via: String::new(), + target_hosts, + }); } + + Ok(CredentialSet { + credentials, + api_registries: HashMap::new(), + }) } -// --------------------------------------------------------------------------- -// Config handlers -// --------------------------------------------------------------------------- +/// Stable identity key for a finding path. Deliberately excludes +/// `policy_name`: two paths with identical (binary, endpoint, mechanism) are +/// the same security gap whether they live in rule `foo` or rule `bar`. This +/// keeps the delta from spuriously surfacing baseline gaps just because the +/// proposal added a new rule name that produces the same gap shape. +fn finding_path_key(path: &FindingPath) -> String { + let FindingPath::Exfil(p) = path; + // Include the category and (for capability_expansion) the method so + // adding a new method on an already-reached host surfaces as a new + // path; reuse of an existing method does not. + format!( + "exfil|{}|{}:{}|{}|{}", + p.binary, p.endpoint_host, p.endpoint_port, p.category, p.method + ) +} -pub(super) async fn handle_get_sandbox_config( - state: &Arc, - request: Request, -) -> Result, Status> { - let sandbox_id = request.get_ref().sandbox_id.clone(); - crate::auth::guard::enforce_sandbox_scope(&request, &sandbox_id)?; - drop(request); +/// Return the merged-policy findings that aren't already present in the +/// baseline. Comparison is per-(query, path) so that a single finding whose +/// evidence grew (e.g. a new method allowed on an already-reached host) +/// surfaces only the new evidence paths. +/// +/// **Category suppression:** `capability_expansion` paths whose (binary, +/// host, port) tuple appears in the `credential_reach_expansion` delta +/// are suppressed. A brand-new credentialed reach is described by the +/// reach-expansion finding alone; we don't double-report by also +/// flagging every method as a separate `capability_expansion`. +fn finding_delta(base: &[Finding], merged: &[Finding]) -> Vec { + use openshell_prover::finding::category; + + let base_keys: HashSet<(String, String)> = base + .iter() + .flat_map(|f| { + let query = f.query.clone(); + f.paths + .iter() + .map(move |p| (query.clone(), finding_path_key(p))) + }) + .collect(); + let mut delta: Vec = Vec::new(); + for finding in merged { + let new_paths: Vec = finding + .paths + .iter() + .filter(|p| !base_keys.contains(&(finding.query.clone(), finding_path_key(p)))) + .cloned() + .collect(); + if new_paths.is_empty() { + continue; + } + delta.push(Finding { + paths: new_paths, + ..finding.clone() + }); + } - let sandbox = state - .store - .get_message::(&sandbox_id) - .await - .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? - .ok_or_else(|| Status::not_found("sandbox not found"))?; - let sandbox_provider_names = sandbox - .spec - .as_ref() - .map(|spec| spec.providers.clone()) - .unwrap_or_default(); + // Suppress capability_expansion paths whose (binary, host, port) + // appears in the credential_reach_expansion delta — a new reach is + // described once, by the reach-expansion category, not also by per- + // method capability findings. + let reach_tuples: HashSet<(String, String, u16)> = delta + .iter() + .filter(|f| f.query == category::CREDENTIAL_REACH_EXPANSION) + .flat_map(|f| { + f.paths.iter().map(|p| { + let FindingPath::Exfil(e) = p; + (e.binary.clone(), e.endpoint_host.clone(), e.endpoint_port) + }) + }) + .collect(); + delta.retain_mut(|f| { + if f.query != category::CAPABILITY_EXPANSION { + return true; + } + f.paths.retain(|p| { + let FindingPath::Exfil(e) = p; + !reach_tuples.contains(&(e.binary.clone(), e.endpoint_host.clone(), e.endpoint_port)) + }); + !f.paths.is_empty() + }); - // Try to get the latest policy from the policy history table. - let latest = state + delta +} + +/// Collapse multi-line / multi-message error text to a single line so the +/// `validation_result` stays a clean, scannable string. +fn one_line(s: &str) -> String { + s.split('\n') + .map(str::trim) + .filter(|line| !line.is_empty()) + .collect::>() + .join("; ") +} + +/// Auto-reject any pending chunks for the same sandbox that share the +/// `(host, port, binary)` of the newly-submitted chunk. Mode-agnostic: the +/// rule is "the latest submission for this endpoint wins; older pending +/// proposals are stale." +/// +/// In practice this implements the supersede behavior for the +/// `mechanistic`→`agent_authored` refinement loop: when the agent submits a +/// narrow L7 proposal in response to a denial, any pending mechanistic L4 +/// draft for the same key gets auto-rejected here, without the agent or the +/// proto needing an explicit `supersedes_chunk_id` field. +/// +/// Failures (DB error, scan error) are logged via `warn!` and the function +/// returns silently. The new chunk's persistence has already succeeded; +/// failing this cleanup pass should not abort the submission flow. +async fn supersede_other_pending_chunks_for_endpoint( + state: &Arc, + sandbox_id: &str, + new_chunk_id: &str, + host: &str, + port: i32, + binary: &str, +) { + // Empty host/port/binary should not supersede anything — the matcher would + // accidentally cover unrelated chunks. Defensive skip. + if host.is_empty() || port == 0 || binary.is_empty() { + return; + } + + let pending = match state .store - .get_latest_policy(&sandbox_id) + .list_draft_chunks(sandbox_id, Some("pending")) .await - .map_err(|e| Status::internal(format!("fetch policy history failed: {e}")))?; + { + Ok(records) => records, + Err(err) => { + warn!( + sandbox_id = %sandbox_id, + error = %err, + "supersede scan failed; older pending chunks (if any) remain pending" + ); + return; + } + }; - let mut policy_source = PolicySource::Sandbox; - let (mut policy, mut version, mut policy_hash) = if let Some(record) = latest { - let decoded = ProtoSandboxPolicy::decode(record.policy_payload.as_slice()) - .map_err(|e| Status::internal(format!("decode policy failed: {e}")))?; - debug!( - sandbox_id = %sandbox_id, - version = record.version, - "GetSandboxConfig served from policy history" - ); - ( - Some(decoded), - u32::try_from(record.version).unwrap_or(0), - record.policy_hash, - ) - } else { - // Lazy backfill: no policy history exists yet. - let spec = sandbox - .spec - .as_ref() - .ok_or_else(|| Status::internal("sandbox has no spec"))?; + let now_ms = current_time_ms(); + for other in pending { + if other.id == new_chunk_id + || other.host != host + || other.port != port + || other.binary != binary + { + continue; + } - match spec.policy.clone() { - None => { - debug!( + let reason = format!("superseded by chunk {new_chunk_id}"); + match state + .store + .update_draft_chunk_status(&other.id, "rejected", Some(now_ms), Some(&reason)) + .await + { + Ok(_) => { + info!( sandbox_id = %sandbox_id, - "GetSandboxConfig: no policy configured, returning empty response" + superseded_chunk = %other.id, + by_chunk = %new_chunk_id, + host = %host, + port = port, + binary = %binary, + "Auto-rejected pending chunk: superseded by newer submission for same (host, port, binary)" ); - (None, 0, String::new()) } - Some(spec_policy) => { - let hash = deterministic_policy_hash(&spec_policy); - let payload = spec_policy.encode_to_vec(); - let policy_id = uuid::Uuid::new_v4().to_string(); + Err(err) => { + warn!( + chunk_id = %other.id, + error = %err, + "supersede auto-reject failed; chunk remains pending" + ); + } + } + } +} - if let Err(e) = state - .store - .put_policy_revision(&policy_id, &sandbox_id, 1, &payload, &hash) - .await - { - warn!( - sandbox_id = %sandbox_id, - error = %e, - "Failed to backfill policy version 1" - ); - } else if let Err(e) = state - .store - .update_policy_status(&sandbox_id, 1, "loaded", None, None) - .await - { - warn!( - sandbox_id = %sandbox_id, - error = %e, - "Failed to mark backfilled policy as loaded" - ); - } - - info!( - sandbox_id = %sandbox_id, - "GetSandboxConfig served from spec (backfilled version 1)" - ); +/// If the just-submitted mechanistic chunk targets a `(host, port, binary)` +/// already covered by an approved `agent_authored` chunk, auto-reject the +/// mechanistic chunk on arrival. The agent has already handled this access +/// decision; the mechanistic draft would only add approval-queue noise. +/// +/// `agent_authored` submissions are NEVER self-rejected — that path remains +/// open for refinement. Only the mechanistic side is asymmetric. +async fn self_reject_mechanistic_if_already_covered( + state: &Arc, + sandbox_id: &str, + new_chunk_id: &str, + host: &str, + port: i32, + binary: &str, +) { + if host.is_empty() || port == 0 || binary.is_empty() { + return; + } - (Some(spec_policy), 1, hash) - } + let approved = match state + .store + .list_draft_chunks(sandbox_id, Some("approved")) + .await + { + Ok(records) => records, + Err(err) => { + warn!( + sandbox_id = %sandbox_id, + error = %err, + "approved-chunk scan for self-reject failed; mechanistic chunk remains pending" + ); + return; } }; - let global_settings = load_global_settings(state.store.as_ref()).await?; - let sandbox_settings = - load_sandbox_settings(state.store.as_ref(), sandbox.object_name()).await?; - let providers_v2_enabled = - bool_setting_enabled(&global_settings, settings::PROVIDERS_V2_ENABLED_KEY)?; - - let mut global_policy_version: u32 = 0; + // If any approved chunk for this sandbox already targets the same + // (host, port, binary), the mechanistic submission is redundant. + let covered_by = approved + .iter() + .find(|c| c.host == host && c.port == port && c.binary == binary); + let Some(covering) = covered_by else { + return; + }; - if let Some(global_policy) = decode_policy_from_global_settings(&global_settings)? { - policy = Some(global_policy.clone()); - policy_hash = deterministic_policy_hash(&global_policy); - policy_source = PolicySource::Global; - if version == 0 { - version = 1; + let reason = format!( + "already covered by approved chunk {} (agent_authored or prior auto-approval)", + covering.id + ); + match state + .store + .update_draft_chunk_status( + new_chunk_id, + "rejected", + Some(current_time_ms()), + Some(&reason), + ) + .await + { + Ok(_) => { + info!( + sandbox_id = %sandbox_id, + chunk_id = %new_chunk_id, + covering_chunk = %covering.id, + host = %host, + port = port, + binary = %binary, + "Auto-rejected incoming mechanistic chunk: endpoint already covered by an approved chunk" + ); } - if let Ok(Some(global_rev)) = state - .store - .get_latest_policy(GLOBAL_POLICY_SANDBOX_ID) - .await - { - global_policy_version = u32::try_from(global_rev.version).unwrap_or(0); + Err(err) => { + warn!( + chunk_id = %new_chunk_id, + error = %err, + "mechanistic self-reject failed; chunk remains pending" + ); } } +} - if providers_v2_enabled - && !matches!(policy_source, PolicySource::Global) - && let Some(source_policy) = policy.as_ref() +/// Internally approve a chunk on the auto-approval path: merge into the +/// active policy, flip status to "approved", notify watchers, and emit a +/// `CONFIG:APPROVED` audit event carrying `auto=true`, `source=`, +/// `prover_delta=empty` so the audit trail records why no human approved +/// this chunk. +/// +/// `source` is the `analysis_mode` of the originating submission +/// (`mechanistic` or `agent_authored`). The audit copy says "auto-approved: +/// no new prover findings" — never "safe" — because the claim is about the +/// prover's reasoning, not the world. +/// Resolve the effective proposal-approval mode for a sandbox. +/// +/// Precedence (matches the rest of the settings model): gateway scope wins +/// over sandbox scope. A reviewer can pin manual mode fleet-wide by setting +/// it globally; per-sandbox overrides only apply when no global is set. +/// +/// Returns `(auto_approve_enabled, resolved_from)` where `resolved_from` +/// is `"gateway"`, `"sandbox"`, or `"default"`. Only an exact `"auto"` +/// value enables auto-approval; any other string (including future- +/// reserved modes like `"auto_on_low_risk"`) is conservatively treated as +/// manual. +async fn resolve_proposal_approval_mode( + store: &Store, + sandbox_name: &str, +) -> Result<(bool, &'static str), Status> { + let global = load_global_settings(store).await?; + if let Some(StoredSettingValue::String(value)) = + global.settings.get(settings::PROPOSAL_APPROVAL_MODE_KEY) { - let provider_layers = - profile_provider_policy_layers(state.store.as_ref(), &sandbox_provider_names).await?; - if !provider_layers.is_empty() { - let effective_policy = compose_effective_policy(source_policy, &provider_layers); - policy_hash = deterministic_policy_hash(&effective_policy); - policy = Some(effective_policy); - } + return Ok((value == "auto", "gateway")); } - let settings = merge_effective_settings(&global_settings, &sandbox_settings)?; - let config_revision = compute_config_revision(policy.as_ref(), &settings, policy_source); - let provider_env_revision = - compute_provider_env_revision(state.store.as_ref(), &sandbox_provider_names).await?; + let sandbox = load_sandbox_settings(store, sandbox_name).await?; + if let Some(StoredSettingValue::String(value)) = + sandbox.settings.get(settings::PROPOSAL_APPROVAL_MODE_KEY) + { + return Ok((value == "auto", "sandbox")); + } - Ok(Response::new(GetSandboxConfigResponse { - policy, - version, - policy_hash, - settings, - config_revision, - policy_source: policy_source.into(), - global_policy_version, - provider_env_revision, - })) + Ok((false, "default")) } -pub(super) async fn compute_provider_env_revision( - store: &Store, - provider_names: &[String], -) -> Result { - let mut hasher = Sha256::new(); - hasher.update(b"openshell-provider-env-revision-v1"); - - for provider_name in provider_names { - hasher.update(provider_name.as_bytes()); - match store - .get_by_name(Provider::object_type(), provider_name) - .await - .map_err(|e| { - Status::internal(format!("fetch provider '{provider_name}' failed: {e}")) - })? { - Some(record) => { - hasher.update(record.id.as_bytes()); - hasher.update(record.updated_at_ms.to_le_bytes()); +async fn auto_approve_chunk( + state: &Arc, + sandbox_id: &str, + sandbox_name: &str, + chunk_id: &str, + source: &str, + resolved_from: &str, +) -> Result<(), Status> { + // Same gate the human-driven approve paths apply: if a global policy is + // active, sandbox-scoped chunk approvals are meaningless because + // `GetSandboxConfig` prefers the global policy. Auto-approving here + // would persist a sandbox revision that the runtime silently ignores + // and leave a misleading "approved" chunk in the table. Bail before + // touching state; the calling site logs this as `warn!` and leaves the + // chunk pending. + require_no_global_policy(state).await?; - let provider = Provider::decode(record.payload.as_slice()).map_err(|e| { - Status::internal(format!("decode provider '{provider_name}' failed: {e}")) - })?; - hasher.update(provider.r#type.as_bytes()); + let chunk = state + .store + .get_draft_chunk(chunk_id) + .await + .map_err(|e| Status::internal(format!("fetch chunk failed: {e}")))? + .ok_or_else(|| Status::not_found("chunk not found"))?; - let mut credential_keys: Vec<_> = provider.credentials.keys().collect(); - credential_keys.sort(); - for key in credential_keys { - hasher.update(key.as_bytes()); - } - let mut expiry_keys: Vec<_> = provider.credential_expires_at_ms.keys().collect(); - expiry_keys.sort(); - for key in expiry_keys { - hasher.update(key.as_bytes()); - hasher.update(provider.credential_expires_at_ms[key].to_le_bytes()); - } - } - None => { - hasher.update(b"missing"); - } - } + // The chunk may have been superseded or rejected by something else + // between persist and auto-approve. Only approve from a pending state. + if chunk.status != "pending" { + return Ok(()); } - let digest = hasher.finalize(); - Ok(u64::from_le_bytes(digest[..8].try_into().map_err( - |_| Status::internal("provider env revision digest too short"), - )?)) -} + let (version, hash) = merge_chunk_into_policy(state.store.as_ref(), sandbox_id, &chunk).await?; + let chunk_summary = summarize_draft_chunk_rule(&chunk)?; -async fn profile_provider_policy_layers( - store: &Store, - provider_names: &[String], -) -> Result, Status> { - let mut layers = Vec::new(); + let now_ms = current_time_ms(); + state + .store + .update_draft_chunk_status(chunk_id, "approved", Some(now_ms), None) + .await + .map_err(|e| Status::internal(format!("update chunk status failed: {e}")))?; - for name in provider_names { - let provider = store - .get_message_by_name::(name) - .await - .map_err(|e| Status::internal(format!("failed to fetch provider '{name}': {e}")))? - .ok_or_else(|| Status::failed_precondition(format!("provider '{name}' not found")))?; + state.sandbox_watch_bus.notify(sandbox_id); - let provider_type = provider.r#type.trim(); - let profile = if let Some(canonical_type) = normalize_provider_type(provider_type) { - let Some(profile) = get_default_profile(canonical_type) else { - warn!( - provider_name = %name, - provider_type, - "legacy provider type has no profile; skipping provider policy layer" - ); - continue; - }; - profile.clone() - } else { - let Some(profile) = - super::provider::get_provider_type_profile(store, provider_type).await? - else { - warn!( - provider_name = %name, - provider_type, - "provider type has no profile; skipping provider policy layer" - ); - continue; - }; - profile - }; + let source_label = if source.is_empty() { + "unspecified" + } else { + source + }; + emit_gateway_policy_auto_approve_audit_log( + sandbox_id, + sandbox_name, + format!( + "auto-approved: no new prover findings (source={source_label}) — chunk {chunk_id}: {chunk_summary}" + ), + version, + &hash, + source_label, + resolved_from, + ); - let rule_name = openshell_policy::provider_rule_name(provider.object_name()); - layers.push(ProviderPolicyLayer { - rule_name: rule_name.clone(), - rule: profile.network_policy_rule(&rule_name), - }); - } + info!( + sandbox_id = %sandbox_id, + chunk_id = %chunk_id, + rule_name = %chunk.rule_name, + version = version, + policy_hash = %hash, + source = %source_label, + resolved_from = %resolved_from, + "Auto-approved chunk: no new prover findings" + ); - Ok(layers) + Ok(()) } -fn bool_setting_enabled(settings: &StoredSettings, key: &str) -> Result { - match settings.settings.get(key) { - None => Ok(false), - Some(StoredSettingValue::Bool(value)) => Ok(*value), - Some(_) => Err(Status::internal(format!( - "setting '{key}' has invalid value type; expected bool" - ))), +// TODO: share effective-policy lookup with `load_sandbox_policy` / +// `GetSandboxConfig`. They re-implement very similar global-settings + +// providers_v2 + compose logic; consolidating them is out of scope for the +// agent-authored proposal validation slice. +async fn current_effective_policy_for_sandbox( + state: &ServerState, + sandbox: &Sandbox, + sandbox_id: &str, +) -> Result { + let mut policy = if let Some(record) = state + .store + .get_latest_policy(sandbox_id) + .await + .map_err(|e| Status::internal(format!("fetch latest policy failed: {e}")))? + { + ProtoSandboxPolicy::decode(record.policy_payload.as_slice()) + .map_err(|e| Status::internal(format!("decode current policy failed: {e}")))? + } else { + sandbox + .spec + .as_ref() + .and_then(|spec| spec.policy.clone()) + .unwrap_or_default() + }; + + let global_settings = load_global_settings(state.store.as_ref()).await?; + let policy_source = decode_policy_from_global_settings(&global_settings)?.map_or( + PolicySource::Sandbox, + |global_policy| { + policy = global_policy; + PolicySource::Global + }, + ); + + let providers_v2_enabled = + bool_setting_enabled(&global_settings, settings::PROVIDERS_V2_ENABLED_KEY)?; + if providers_v2_enabled && !matches!(policy_source, PolicySource::Global) { + let provider_names = sandbox + .spec + .as_ref() + .map(|spec| spec.providers.clone()) + .unwrap_or_default(); + let provider_layers = + profile_provider_policy_layers(state.store.as_ref(), &provider_names).await?; + if !provider_layers.is_empty() { + policy = compose_effective_policy(&policy, &provider_layers); + } } + + Ok(policy) } -pub(super) async fn handle_get_gateway_config( - state: &Arc, - _request: Request, -) -> Result, Status> { - let global_settings = load_global_settings(state.store.as_ref()).await?; - let settings = materialize_global_settings(&global_settings)?; - Ok(Response::new(GetGatewayConfigResponse { - settings, - settings_revision: global_settings.revision, - })) +fn truncate_for_log(input: &str, max_chars: usize) -> String { + let mut chars = input.chars(); + let truncated: String = chars.by_ref().take(max_chars).collect(); + if chars.next().is_some() { + format!("{truncated}...") + } else { + truncated + } } -pub(super) async fn handle_get_sandbox_provider_environment( +#[cfg(test)] +fn is_sandbox_caller(request: &Request) -> bool { + matches!( + request.extensions().get::(), + Some(Principal::Sandbox(_)) + ) +} + +/// Sandbox-class callers may only perform sandbox-scoped policy sync. They +/// must not mutate global config or sandbox settings. +fn validate_sandbox_caller_update(req: &UpdateConfigRequest) -> Result<(), Status> { + if req.global { + return Err(Status::permission_denied( + "sandbox callers cannot mutate global config", + )); + } + if req.delete_setting { + return Err(Status::permission_denied( + "sandbox callers cannot delete settings", + )); + } + if req.name.trim().is_empty() { + return Err(Status::permission_denied( + "sandbox callers may only perform sandbox policy sync", + )); + } + if req.policy.is_none() || !req.setting_key.trim().is_empty() { + return Err(Status::permission_denied( + "sandbox callers may only perform sandbox policy sync", + )); + } + Ok(()) +} + +async fn resolve_sandbox_by_name_for_principal( + store: &Store, + principal: &Principal, + name: &str, +) -> Result { + let sandbox = store + .get_message_by_name::(name) + .await + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))?; + + match principal { + Principal::Sandbox(_) => { + let Some(sandbox) = sandbox else { + return Err(Status::permission_denied( + "sandbox not found or not owned by caller", + )); + }; + crate::auth::guard::ensure_sandbox_scope(principal, sandbox.object_id()).map_err( + |status| { + if status.code() == tonic::Code::PermissionDenied { + Status::permission_denied("sandbox not found or not owned by caller") + } else { + status + } + }, + )?; + Ok(sandbox) + } + Principal::User(_) => sandbox.ok_or_else(|| Status::not_found("sandbox not found")), + Principal::Anonymous => Err(Status::unauthenticated( + "sandbox-scoped methods require an authenticated caller", + )), + } +} + +// --------------------------------------------------------------------------- +// Config handlers +// --------------------------------------------------------------------------- + +pub(super) async fn handle_get_sandbox_config( state: &Arc, - request: Request, -) -> Result, Status> { + request: Request, +) -> Result, Status> { let sandbox_id = request.get_ref().sandbox_id.clone(); crate::auth::guard::enforce_sandbox_scope(&request, &sandbox_id)?; drop(request); @@ -667,1305 +1044,1352 @@ pub(super) async fn handle_get_sandbox_provider_environment( .await .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? .ok_or_else(|| Status::not_found("sandbox not found"))?; - - let spec = sandbox + let sandbox_provider_names = sandbox .spec - .ok_or_else(|| Status::internal("sandbox has no spec"))?; - - let provider_names = spec.providers; - let provider_env_revision = - compute_provider_env_revision(state.store.as_ref(), &provider_names).await?; - let provider_environment = - super::provider::resolve_provider_environment(state.store.as_ref(), &provider_names) - .await?; - - info!( - sandbox_id = %sandbox_id, - provider_count = provider_names.len(), - env_count = provider_environment.environment.len(), - provider_env_revision, - "GetSandboxProviderEnvironment request completed successfully" - ); - - Ok(Response::new(GetSandboxProviderEnvironmentResponse { - environment: provider_environment.environment, - provider_env_revision, - credential_expires_at_ms: provider_environment.credential_expires_at_ms, - })) -} + .as_ref() + .map(|spec| spec.providers.clone()) + .unwrap_or_default(); -// --------------------------------------------------------------------------- -// Update config handler (policy + settings mutations) -// --------------------------------------------------------------------------- + // Try to get the latest policy from the policy history table. + let latest = state + .store + .get_latest_policy(&sandbox_id) + .await + .map_err(|e| Status::internal(format!("fetch policy history failed: {e}")))?; -pub(super) async fn handle_update_config( - state: &Arc, - request: Request, -) -> Result, Status> { - let principal = request.extensions().get::().cloned(); - let sandbox_caller = matches!(principal, Some(Principal::Sandbox(_))); - let req = request.into_inner(); - if sandbox_caller { - validate_sandbox_caller_update(&req)?; - resolve_sandbox_by_name_for_principal( - state.store.as_ref(), - principal - .as_ref() - .expect("sandbox_caller implies principal"), - &req.name, + let mut policy_source = PolicySource::Sandbox; + let (mut policy, mut version, mut policy_hash) = if let Some(record) = latest { + let decoded = ProtoSandboxPolicy::decode(record.policy_payload.as_slice()) + .map_err(|e| Status::internal(format!("decode policy failed: {e}")))?; + debug!( + sandbox_id = %sandbox_id, + version = record.version, + "GetSandboxConfig served from policy history" + ); + ( + Some(decoded), + u32::try_from(record.version).unwrap_or(0), + record.policy_hash, ) - .await?; - } - let key = req.setting_key.trim(); - let has_policy = req.policy.is_some(); - let has_setting = !key.is_empty(); - let has_merge_ops = !req.merge_operations.is_empty(); - let mut mutation_count = 0_u8; - mutation_count += u8::from(has_policy); - mutation_count += u8::from(has_setting); - mutation_count += u8::from(has_merge_ops); + } else { + // Lazy backfill: no policy history exists yet. + let spec = sandbox + .spec + .as_ref() + .ok_or_else(|| Status::internal("sandbox has no spec"))?; - if mutation_count > 1 { - return Err(Status::invalid_argument( - "policy, setting_key, and merge_operations are mutually exclusive", - )); - } - if mutation_count == 0 { - return Err(Status::invalid_argument( - "one of policy, setting_key, or merge_operations must be provided", - )); - } + match spec.policy.clone() { + None => { + debug!( + sandbox_id = %sandbox_id, + "GetSandboxConfig: no policy configured, returning empty response" + ); + (None, 0, String::new()) + } + Some(spec_policy) => { + let hash = deterministic_policy_hash(&spec_policy); + let payload = spec_policy.encode_to_vec(); + let policy_id = uuid::Uuid::new_v4().to_string(); - if req.global { - let _settings_guard = state.settings_mutex.lock().await; + if let Err(e) = state + .store + .put_policy_revision(&policy_id, &sandbox_id, 1, &payload, &hash) + .await + { + warn!( + sandbox_id = %sandbox_id, + error = %e, + "Failed to backfill policy version 1" + ); + } else if let Err(e) = state + .store + .update_policy_status(&sandbox_id, 1, "loaded", None, None) + .await + { + warn!( + sandbox_id = %sandbox_id, + error = %e, + "Failed to mark backfilled policy as loaded" + ); + } - if has_merge_ops { - return Err(Status::invalid_argument( - "merge_operations are not supported for global policy updates", - )); - } + info!( + sandbox_id = %sandbox_id, + "GetSandboxConfig served from spec (backfilled version 1)" + ); - if has_policy { - if req.delete_setting { - return Err(Status::invalid_argument( - "delete_setting cannot be combined with policy payload", - )); + (Some(spec_policy), 1, hash) } - let mut new_policy = req.policy.ok_or_else(|| { - Status::invalid_argument("policy is required for global policy update") - })?; - openshell_policy::ensure_sandbox_process_identity(&mut new_policy); - validate_policy_safety(&new_policy)?; + } + }; - let payload = new_policy.encode_to_vec(); - let hash = deterministic_policy_hash(&new_policy); + let global_settings = load_global_settings(state.store.as_ref()).await?; + let sandbox_settings = + load_sandbox_settings(state.store.as_ref(), sandbox.object_name()).await?; + let providers_v2_enabled = + bool_setting_enabled(&global_settings, settings::PROVIDERS_V2_ENABLED_KEY)?; - let latest = state - .store - .get_latest_policy(GLOBAL_POLICY_SANDBOX_ID) - .await - .map_err(|e| Status::internal(format!("fetch latest global policy failed: {e}")))?; + let mut global_policy_version: u32 = 0; - if let Some(ref current) = latest - && current.policy_hash == hash - && current.status == "loaded" - { - let mut global_settings = load_global_settings(state.store.as_ref()).await?; - let stored_value = StoredSettingValue::Bytes(hex::encode(&payload)); - let changed = upsert_setting_value( - &mut global_settings.settings, - POLICY_SETTING_KEY, - stored_value, - ); - if changed { - global_settings.revision = global_settings.revision.wrapping_add(1); - save_global_settings(state.store.as_ref(), &global_settings).await?; - } - return Ok(Response::new(UpdateConfigResponse { - version: u32::try_from(current.version).unwrap_or(0), - policy_hash: hash, - settings_revision: global_settings.revision, - deleted: false, - })); - } - - let next_version = latest.map_or(1, |r| r.version + 1); - let policy_id = uuid::Uuid::new_v4().to_string(); - - state - .store - .put_policy_revision( - &policy_id, - GLOBAL_POLICY_SANDBOX_ID, - next_version, - &payload, - &hash, - ) - .await - .map_err(|e| { - Status::internal(format!("persist global policy revision failed: {e}")) - })?; - - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map_or(0, |d| d.as_millis() as i64); - let _ = state - .store - .update_policy_status( - GLOBAL_POLICY_SANDBOX_ID, - next_version, - "loaded", - None, - Some(now_ms), - ) - .await; - let _ = state - .store - .supersede_older_policies(GLOBAL_POLICY_SANDBOX_ID, next_version) - .await; - - let mut global_settings = load_global_settings(state.store.as_ref()).await?; - let stored_value = StoredSettingValue::Bytes(hex::encode(&payload)); - let changed = upsert_setting_value( - &mut global_settings.settings, - POLICY_SETTING_KEY, - stored_value, - ); - if changed { - global_settings.revision = global_settings.revision.wrapping_add(1); - save_global_settings(state.store.as_ref(), &global_settings).await?; - } - - return Ok(Response::new(UpdateConfigResponse { - version: u32::try_from(next_version).unwrap_or(0), - policy_hash: hash, - settings_revision: global_settings.revision, - deleted: false, - })); - } - - // Global setting mutation. - if key == POLICY_SETTING_KEY && !req.delete_setting { - return Err(Status::invalid_argument( - "reserved key 'policy' must be set via the policy field", - )); + if let Some(global_policy) = decode_policy_from_global_settings(&global_settings)? { + policy = Some(global_policy.clone()); + policy_hash = deterministic_policy_hash(&global_policy); + policy_source = PolicySource::Global; + if version == 0 { + version = 1; } - if key != POLICY_SETTING_KEY { - validate_registered_setting_key(key)?; + if let Ok(Some(global_rev)) = state + .store + .get_latest_policy(GLOBAL_POLICY_SANDBOX_ID) + .await + { + global_policy_version = u32::try_from(global_rev.version).unwrap_or(0); } + } - let mut global_settings = load_global_settings(state.store.as_ref()).await?; - let changed = if req.delete_setting { - let removed = global_settings.settings.remove(key).is_some(); - if removed - && key == POLICY_SETTING_KEY - && let Ok(Some(latest)) = state - .store - .get_latest_policy(GLOBAL_POLICY_SANDBOX_ID) - .await - { - let _ = state - .store - .supersede_older_policies(GLOBAL_POLICY_SANDBOX_ID, latest.version + 1) - .await; - } - removed - } else { - let setting = req - .setting_value - .as_ref() - .ok_or_else(|| Status::invalid_argument("setting_value is required"))?; - let stored = proto_setting_to_stored(key, setting)?; - upsert_setting_value(&mut global_settings.settings, key, stored) - }; - - if changed { - global_settings.revision = global_settings.revision.wrapping_add(1); - save_global_settings(state.store.as_ref(), &global_settings).await?; + if providers_v2_enabled + && !matches!(policy_source, PolicySource::Global) + && let Some(source_policy) = policy.as_ref() + { + let provider_layers = + profile_provider_policy_layers(state.store.as_ref(), &sandbox_provider_names).await?; + if !provider_layers.is_empty() { + let effective_policy = compose_effective_policy(source_policy, &provider_layers); + policy_hash = deterministic_policy_hash(&effective_policy); + policy = Some(effective_policy); } - - return Ok(Response::new(UpdateConfigResponse { - version: 0, - policy_hash: String::new(), - settings_revision: global_settings.revision, - deleted: req.delete_setting && changed, - })); } - if req.name.is_empty() { - return Err(Status::invalid_argument( - "name is required for sandbox-scoped updates", - )); - } + let settings = merge_effective_settings(&global_settings, &sandbox_settings)?; + let config_revision = compute_config_revision(policy.as_ref(), &settings, policy_source); + let provider_env_revision = + compute_provider_env_revision(state.store.as_ref(), &sandbox_provider_names).await?; - // Resolve sandbox by name. - let sandbox = state - .store - .get_message_by_name::(&req.name) - .await - .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? - .ok_or_else(|| Status::not_found("sandbox not found"))?; - let sandbox_id = sandbox.object_id().to_string(); + Ok(Response::new(GetSandboxConfigResponse { + policy, + version, + policy_hash, + settings, + config_revision, + policy_source: policy_source.into(), + global_policy_version, + provider_env_revision, + })) +} - if has_setting { - let _settings_guard = state.settings_mutex.lock().await; +pub(super) async fn compute_provider_env_revision( + store: &Store, + provider_names: &[String], +) -> Result { + let mut hasher = Sha256::new(); + hasher.update(b"openshell-provider-env-revision-v1"); - if key == POLICY_SETTING_KEY { - return Err(Status::invalid_argument( - "reserved key 'policy' must be set via policy commands", - )); - } + for provider_name in provider_names { + hasher.update(provider_name.as_bytes()); + match store + .get_by_name(Provider::object_type(), provider_name) + .await + .map_err(|e| { + Status::internal(format!("fetch provider '{provider_name}' failed: {e}")) + })? { + Some(record) => { + hasher.update(record.id.as_bytes()); + hasher.update(record.updated_at_ms.to_le_bytes()); - let global_settings = load_global_settings(state.store.as_ref()).await?; - let globally_managed = global_settings.settings.contains_key(key); + let provider = Provider::decode(record.payload.as_slice()).map_err(|e| { + Status::internal(format!("decode provider '{provider_name}' failed: {e}")) + })?; + hasher.update(provider.r#type.as_bytes()); - if req.delete_setting { - if globally_managed { - return Err(Status::failed_precondition(format!( - "setting '{key}' is managed globally; delete the global setting first" - ))); + let mut credential_keys: Vec<_> = provider.credentials.keys().collect(); + credential_keys.sort(); + for key in credential_keys { + hasher.update(key.as_bytes()); + } + let mut expiry_keys: Vec<_> = provider.credential_expires_at_ms.keys().collect(); + expiry_keys.sort(); + for key in expiry_keys { + hasher.update(key.as_bytes()); + hasher.update(provider.credential_expires_at_ms[key].to_le_bytes()); + } } - - let mut sandbox_settings = - load_sandbox_settings(state.store.as_ref(), sandbox.object_name()).await?; - let removed = sandbox_settings.settings.remove(key).is_some(); - if removed { - sandbox_settings.revision = sandbox_settings.revision.wrapping_add(1); - save_sandbox_settings( - state.store.as_ref(), - sandbox.object_name(), - &sandbox_settings, - ) - .await?; + None => { + hasher.update(b"missing"); } - - return Ok(Response::new(UpdateConfigResponse { - version: 0, - policy_hash: String::new(), - settings_revision: sandbox_settings.revision, - deleted: removed, - })); } + } - if globally_managed { - return Err(Status::failed_precondition(format!( - "setting '{key}' is managed globally; delete the global setting before sandbox update" - ))); - } + let digest = hasher.finalize(); + Ok(u64::from_le_bytes(digest[..8].try_into().map_err( + |_| Status::internal("provider env revision digest too short"), + )?)) +} - let setting = req - .setting_value - .as_ref() - .ok_or_else(|| Status::invalid_argument("setting_value is required"))?; - let stored = proto_setting_to_stored(key, setting)?; +async fn profile_provider_policy_layers( + store: &Store, + provider_names: &[String], +) -> Result, Status> { + let mut layers = Vec::new(); - let mut sandbox_settings = - load_sandbox_settings(state.store.as_ref(), sandbox.object_name()).await?; - let changed = upsert_setting_value(&mut sandbox_settings.settings, key, stored); - if changed { - sandbox_settings.revision = sandbox_settings.revision.wrapping_add(1); - save_sandbox_settings( - state.store.as_ref(), - sandbox.object_name(), - &sandbox_settings, - ) - .await?; - } - - return Ok(Response::new(UpdateConfigResponse { - version: 0, - policy_hash: String::new(), - settings_revision: sandbox_settings.revision, - deleted: false, - })); - } - - if has_merge_ops { - let global_settings = load_global_settings(state.store.as_ref()).await?; - if global_settings.settings.contains_key(POLICY_SETTING_KEY) { - return Err(Status::failed_precondition( - "policy is managed globally; delete global policy before sandbox policy update", - )); - } - - let spec = sandbox - .spec - .as_ref() - .ok_or_else(|| Status::internal("sandbox has no spec"))?; - let merge_ops = parse_merge_operations(&req.merge_operations)?; - validate_merge_operations_for_server(&merge_ops)?; - let (version, hash) = apply_merge_operations_with_retry( - state.store.as_ref(), - &sandbox_id, - spec.policy.as_ref(), - &merge_ops, - ) - .await?; - - state.sandbox_watch_bus.notify(&sandbox_id); - emit_gateway_policy_audit_log( - &sandbox_id, - sandbox.object_name(), - "merged", - format!( - "gateway merged {} incremental policy operation(s)", - merge_ops.len() - ), - version, - &hash, - ); - for operation in &merge_ops { - emit_gateway_policy_audit_log( - &sandbox_id, - sandbox.object_name(), - "merged", - format!( - "gateway merged incremental policy op: {}", - summarize_cli_policy_merge_op(operation) - ), - version, - &hash, - ); - } - info!( - sandbox_id = %sandbox_id, - version, - policy_hash = %hash, - operation_count = merge_ops.len(), - "UpdateConfig: merged incremental policy operations" - ); - - return Ok(Response::new(UpdateConfigResponse { - version: u32::try_from(version).unwrap_or(0), - policy_hash: hash, - settings_revision: 0, - deleted: false, - })); - } + for name in provider_names { + let provider = store + .get_message_by_name::(name) + .await + .map_err(|e| Status::internal(format!("failed to fetch provider '{name}': {e}")))? + .ok_or_else(|| Status::failed_precondition(format!("provider '{name}' not found")))?; - // Sandbox-scoped policy update. - let mut new_policy = req - .policy - .ok_or_else(|| Status::invalid_argument("policy is required"))?; + let provider_type = provider.r#type.trim(); + let profile = if let Some(canonical_type) = normalize_provider_type(provider_type) { + let Some(profile) = get_default_profile(canonical_type) else { + warn!( + provider_name = %name, + provider_type, + "legacy provider type has no profile; skipping provider policy layer" + ); + continue; + }; + profile.clone() + } else { + let Some(profile) = + super::provider::get_provider_type_profile(store, provider_type).await? + else { + warn!( + provider_name = %name, + provider_type, + "provider type has no profile; skipping provider policy layer" + ); + continue; + }; + profile + }; - let global_settings = load_global_settings(state.store.as_ref()).await?; - if global_settings.settings.contains_key(POLICY_SETTING_KEY) { - return Err(Status::failed_precondition( - "policy is managed globally; delete global policy before sandbox policy update", - )); + let rule_name = openshell_policy::provider_rule_name(provider.object_name()); + layers.push(ProviderPolicyLayer { + rule_name: rule_name.clone(), + rule: profile.network_policy_rule(&rule_name), + }); } - let spec = sandbox - .spec - .as_ref() - .ok_or_else(|| Status::internal("sandbox has no spec"))?; - - openshell_policy::ensure_sandbox_process_identity(&mut new_policy); + Ok(layers) +} - if let Some(baseline_policy) = spec.policy.as_ref() { - validate_static_fields_unchanged(baseline_policy, &new_policy)?; - validate_policy_safety(&new_policy)?; - } else { - // Backfill spec.policy using CAS (first-time policy discovery) - let _sandbox_sync_guard = state.compute.sandbox_sync_guard().await; - let sandbox_id = sandbox.object_id().to_string(); - let new_policy_clone = new_policy.clone(); - state - .store - .update_message_cas::( - &sandbox_id, - req.expected_resource_version, - |sandbox| { - if let Some(ref mut spec) = sandbox.spec - && spec.policy.is_none() - { - spec.policy = Some(new_policy_clone.clone()); - } - }, - ) - .await - .map_err(|e| super::persistence_error_to_status(e, "backfill spec.policy"))?; - info!( - sandbox_id = %sandbox_id, - "UpdateConfig: backfilled spec.policy from sandbox-discovered policy" - ); +fn bool_setting_enabled(settings: &StoredSettings, key: &str) -> Result { + match settings.settings.get(key) { + None => Ok(false), + Some(StoredSettingValue::Bool(value)) => Ok(*value), + Some(_) => Err(Status::internal(format!( + "setting '{key}' has invalid value type; expected bool" + ))), } +} - let latest = state - .store - .get_latest_policy(&sandbox_id) - .await - .map_err(|e| Status::internal(format!("fetch latest policy failed: {e}")))?; - - let payload = new_policy.encode_to_vec(); - let hash = deterministic_policy_hash(&new_policy); - - if let Some(ref current) = latest - && current.policy_hash == hash - { - return Ok(Response::new(UpdateConfigResponse { - version: u32::try_from(current.version).unwrap_or(0), - policy_hash: hash, - settings_revision: 0, - deleted: false, - })); - } +pub(super) async fn handle_get_gateway_config( + state: &Arc, + _request: Request, +) -> Result, Status> { + let global_settings = load_global_settings(state.store.as_ref()).await?; + let settings = materialize_global_settings(&global_settings)?; + Ok(Response::new(GetGatewayConfigResponse { + settings, + settings_revision: global_settings.revision, + })) +} - let next_version = latest.map_or(1, |r| r.version + 1); - let policy_id = uuid::Uuid::new_v4().to_string(); +pub(super) async fn handle_get_sandbox_provider_environment( + state: &Arc, + request: Request, +) -> Result, Status> { + let sandbox_id = request.get_ref().sandbox_id.clone(); + crate::auth::guard::enforce_sandbox_scope(&request, &sandbox_id)?; + drop(request); - state + let sandbox = state .store - .put_policy_revision(&policy_id, &sandbox_id, next_version, &payload, &hash) + .get_message::(&sandbox_id) .await - .map_err(|e| Status::internal(format!("persist policy revision failed: {e}")))?; + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? + .ok_or_else(|| Status::not_found("sandbox not found"))?; - let _ = state - .store - .supersede_older_policies(&sandbox_id, next_version) - .await; + let spec = sandbox + .spec + .ok_or_else(|| Status::internal("sandbox has no spec"))?; - state.sandbox_watch_bus.notify(&sandbox_id); + let provider_names = spec.providers; + let provider_env_revision = + compute_provider_env_revision(state.store.as_ref(), &provider_names).await?; + let provider_environment = + super::provider::resolve_provider_environment(state.store.as_ref(), &provider_names) + .await?; info!( sandbox_id = %sandbox_id, - version = next_version, - policy_hash = %hash, - "UpdateConfig: new policy version persisted" + provider_count = provider_names.len(), + env_count = provider_environment.environment.len(), + provider_env_revision, + "GetSandboxProviderEnvironment request completed successfully" ); - Ok(Response::new(UpdateConfigResponse { - version: u32::try_from(next_version).unwrap_or(0), - policy_hash: hash, - settings_revision: 0, - deleted: false, + Ok(Response::new(GetSandboxProviderEnvironmentResponse { + environment: provider_environment.environment, + provider_env_revision, + credential_expires_at_ms: provider_environment.credential_expires_at_ms, })) } // --------------------------------------------------------------------------- -// Policy status handlers +// Update config handler (policy + settings mutations) // --------------------------------------------------------------------------- -pub(super) async fn handle_get_sandbox_policy_status( +pub(super) async fn handle_update_config( state: &Arc, - request: Request, -) -> Result, Status> { + request: Request, +) -> Result, Status> { + let principal = request.extensions().get::().cloned(); + let sandbox_caller = matches!(principal, Some(Principal::Sandbox(_))); let req = request.into_inner(); - - let (policy_id, active_version) = if req.global { - (GLOBAL_POLICY_SANDBOX_ID.to_string(), 0_u32) - } else { - if req.name.is_empty() { - return Err(Status::invalid_argument("name is required")); - } - let sandbox = state - .store - .get_message_by_name::(&req.name) - .await - .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? - .ok_or_else(|| Status::not_found("sandbox not found"))?; - ( - sandbox.object_id().to_string(), - sandbox.current_policy_version(), + if sandbox_caller { + validate_sandbox_caller_update(&req)?; + resolve_sandbox_by_name_for_principal( + state.store.as_ref(), + principal + .as_ref() + .expect("sandbox_caller implies principal"), + &req.name, ) - }; - - let record = if req.version == 0 { - state - .store - .get_latest_policy(&policy_id) - .await - .map_err(|e| Status::internal(format!("fetch policy failed: {e}")))? - } else { - state - .store - .get_policy_by_version(&policy_id, i64::from(req.version)) - .await - .map_err(|e| Status::internal(format!("fetch policy failed: {e}")))? - }; - - let not_found_msg = if req.global { - "no global policy revision found" - } else { - "no policy revision found for this sandbox" - }; - let record = record.ok_or_else(|| Status::not_found(not_found_msg))?; - - Ok(Response::new(GetSandboxPolicyStatusResponse { - revision: Some(policy_record_to_revision(&record, true)), - active_version, - })) -} - -pub(super) async fn handle_list_sandbox_policies( - state: &Arc, - request: Request, -) -> Result, Status> { - let req = request.into_inner(); - - let policy_id = if req.global { - GLOBAL_POLICY_SANDBOX_ID.to_string() - } else { - if req.name.is_empty() { - return Err(Status::invalid_argument("name is required")); - } - let sandbox = state - .store - .get_message_by_name::(&req.name) - .await - .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? - .ok_or_else(|| Status::not_found("sandbox not found"))?; - sandbox.object_id().to_string() - }; - - let limit = clamp_limit(req.limit, 50, MAX_PAGE_SIZE); - let records = state - .store - .list_policies(&policy_id, limit, req.offset) - .await - .map_err(|e| Status::internal(format!("list policies failed: {e}")))?; - - let revisions = records - .iter() - .map(|r| policy_record_to_revision(r, false)) - .collect(); - - Ok(Response::new(ListSandboxPoliciesResponse { revisions })) -} - -pub(super) async fn handle_report_policy_status( - state: &Arc, - request: Request, -) -> Result, Status> { - let sandbox_id = request.get_ref().sandbox_id.clone(); - crate::auth::guard::enforce_sandbox_scope(&request, &sandbox_id)?; - let req = request.into_inner(); - if req.sandbox_id.is_empty() { - return Err(Status::invalid_argument("sandbox_id is required")); - } - if req.version == 0 { - return Err(Status::invalid_argument("version is required")); + .await?; } + let key = req.setting_key.trim(); + let has_policy = req.policy.is_some(); + let has_setting = !key.is_empty(); + let has_merge_ops = !req.merge_operations.is_empty(); + let mut mutation_count = 0_u8; + mutation_count += u8::from(has_policy); + mutation_count += u8::from(has_setting); + mutation_count += u8::from(has_merge_ops); - let version = i64::from(req.version); - let status_str = match PolicyStatus::try_from(req.status) { - Ok(PolicyStatus::Loaded) => "loaded", - Ok(PolicyStatus::Failed) => "failed", - _ => return Err(Status::invalid_argument("status must be LOADED or FAILED")), - }; - - let loaded_at_ms = if status_str == "loaded" { - Some(current_time_ms()) - } else { - None - }; - - let load_error = if status_str == "failed" && !req.load_error.is_empty() { - Some(req.load_error.as_str()) - } else { - None - }; - - let updated = state - .store - .update_policy_status( - &req.sandbox_id, - version, - status_str, - load_error, - loaded_at_ms, - ) - .await - .map_err(|e| Status::internal(format!("update policy status failed: {e}")))?; - - if !updated { - return Err(Status::not_found("policy revision not found")); + if mutation_count > 1 { + return Err(Status::invalid_argument( + "policy, setting_key, and merge_operations are mutually exclusive", + )); } - - if status_str == "loaded" { - let _ = state - .store - .supersede_older_policies(&req.sandbox_id, version) - .await; - - // Update current_policy_version using CAS - // TODO: Accept expected_version from UpdateConfigRequest for proper client-driven CAS - let _sandbox_sync_guard = state.compute.sandbox_sync_guard().await; - let version_to_set = req.version; - state - .store - .update_message_cas::(&req.sandbox_id, 0, |sandbox| { - sandbox.set_current_policy_version(version_to_set); - }) - .await - .map_err(|e| super::persistence_error_to_status(e, "update current_policy_version"))?; - - state.sandbox_watch_bus.notify(&req.sandbox_id); + if mutation_count == 0 { + return Err(Status::invalid_argument( + "one of policy, setting_key, or merge_operations must be provided", + )); } - info!( - sandbox_id = %req.sandbox_id, - version = req.version, - status = %status_str, - "ReportPolicyStatus: sandbox reported policy load result" - ); - - Ok(Response::new(ReportPolicyStatusResponse {})) -} + if req.global { + let _settings_guard = state.settings_mutex.lock().await; -// --------------------------------------------------------------------------- -// Sandbox logs handlers -// --------------------------------------------------------------------------- + if has_merge_ops { + return Err(Status::invalid_argument( + "merge_operations are not supported for global policy updates", + )); + } -#[allow(clippy::unused_async)] // Must be async to match the trait signature -pub(super) async fn handle_get_sandbox_logs( - state: &Arc, - request: Request, -) -> Result, Status> { - let req = request.into_inner(); - if req.sandbox_id.is_empty() { - return Err(Status::invalid_argument("sandbox_id is required")); - } + if has_policy { + if req.delete_setting { + return Err(Status::invalid_argument( + "delete_setting cannot be combined with policy payload", + )); + } + let mut new_policy = req.policy.ok_or_else(|| { + Status::invalid_argument("policy is required for global policy update") + })?; + openshell_policy::ensure_sandbox_process_identity(&mut new_policy); + validate_policy_safety(&new_policy)?; - let lines = if req.lines == 0 { 2000 } else { req.lines }; - let tail = state.tracing_log_bus.tail(&req.sandbox_id, lines as usize); + let payload = new_policy.encode_to_vec(); + let hash = deterministic_policy_hash(&new_policy); - let buffer_total = tail.len() as u32; + let latest = state + .store + .get_latest_policy(GLOBAL_POLICY_SANDBOX_ID) + .await + .map_err(|e| Status::internal(format!("fetch latest global policy failed: {e}")))?; - let logs: Vec = tail - .into_iter() - .filter_map(|evt| { - if let Some(openshell_core::proto::sandbox_stream_event::Payload::Log(log)) = - evt.payload + if let Some(ref current) = latest + && current.policy_hash == hash + && current.status == "loaded" { - if req.since_ms > 0 && log.timestamp_ms < req.since_ms { - return None; - } - if !req.sources.is_empty() && !source_matches(&log.source, &req.sources) { - return None; - } - if !level_matches(&log.level, &req.min_level) { - return None; + let mut global_settings = load_global_settings(state.store.as_ref()).await?; + let stored_value = StoredSettingValue::Bytes(hex::encode(&payload)); + let changed = upsert_setting_value( + &mut global_settings.settings, + POLICY_SETTING_KEY, + stored_value, + ); + if changed { + global_settings.revision = global_settings.revision.wrapping_add(1); + save_global_settings(state.store.as_ref(), &global_settings).await?; } - Some(log) - } else { - None + return Ok(Response::new(UpdateConfigResponse { + version: u32::try_from(current.version).unwrap_or(0), + policy_hash: hash, + settings_revision: global_settings.revision, + deleted: false, + })); } - }) - .collect(); - Ok(Response::new(GetSandboxLogsResponse { logs, buffer_total })) -} + let next_version = latest.map_or(1, |r| r.version + 1); + let policy_id = uuid::Uuid::new_v4().to_string(); -pub(super) async fn handle_push_sandbox_logs( - state: &Arc, - request: Request>, -) -> Result, Status> { - let principal = request - .extensions() - .get::() - .cloned() - .ok_or_else(|| Status::unauthenticated("missing principal"))?; - let mut stream = request.into_inner(); - let mut validated_sandbox_id = None; + state + .store + .put_policy_revision( + &policy_id, + GLOBAL_POLICY_SANDBOX_ID, + next_version, + &payload, + &hash, + ) + .await + .map_err(|e| { + Status::internal(format!("persist global policy revision failed: {e}")) + })?; - while let Some(batch) = stream - .message() - .await - .map_err(|e| Status::internal(format!("stream error: {e}")))? - { - if batch.sandbox_id.is_empty() { - continue; - } + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map_or(0, |d| d.as_millis() as i64); + let _ = state + .store + .update_policy_status( + GLOBAL_POLICY_SANDBOX_ID, + next_version, + "loaded", + None, + Some(now_ms), + ) + .await; + let _ = state + .store + .supersede_older_policies(GLOBAL_POLICY_SANDBOX_ID, next_version) + .await; - ensure_log_stream_sandbox_scope( - state, - &principal, - &batch.sandbox_id, - &mut validated_sandbox_id, - ) - .await?; + let mut global_settings = load_global_settings(state.store.as_ref()).await?; + let stored_value = StoredSettingValue::Bytes(hex::encode(&payload)); + let changed = upsert_setting_value( + &mut global_settings.settings, + POLICY_SETTING_KEY, + stored_value, + ); + if changed { + global_settings.revision = global_settings.revision.wrapping_add(1); + save_global_settings(state.store.as_ref(), &global_settings).await?; + } - for log in batch.logs.into_iter().take(100) { - let mut log = log; - log.source = "sandbox".to_string(); - log.sandbox_id.clone_from(&batch.sandbox_id); - state.tracing_log_bus.publish_external(log); + return Ok(Response::new(UpdateConfigResponse { + version: u32::try_from(next_version).unwrap_or(0), + policy_hash: hash, + settings_revision: global_settings.revision, + deleted: false, + })); } - } - Ok(Response::new(PushSandboxLogsResponse {})) -} - -async fn ensure_log_stream_sandbox_scope( - state: &Arc, - principal: &Principal, - sandbox_id: &str, - validated_sandbox_id: &mut Option, -) -> Result<(), Status> { - if let Some(validated) = validated_sandbox_id.as_deref() { - if sandbox_id != validated { - return Err(Status::permission_denied( - "log stream sandbox_id changed after validation", + // Global setting mutation. + if key == POLICY_SETTING_KEY && !req.delete_setting { + return Err(Status::invalid_argument( + "reserved key 'policy' must be set via the policy field", )); } - return Ok(()); + if key != POLICY_SETTING_KEY { + validate_registered_setting_key(key)?; + } + + let mut global_settings = load_global_settings(state.store.as_ref()).await?; + let changed = if req.delete_setting { + let removed = global_settings.settings.remove(key).is_some(); + if removed + && key == POLICY_SETTING_KEY + && let Ok(Some(latest)) = state + .store + .get_latest_policy(GLOBAL_POLICY_SANDBOX_ID) + .await + { + let _ = state + .store + .supersede_older_policies(GLOBAL_POLICY_SANDBOX_ID, latest.version + 1) + .await; + } + removed + } else { + let setting = req + .setting_value + .as_ref() + .ok_or_else(|| Status::invalid_argument("setting_value is required"))?; + let stored = proto_setting_to_stored(key, setting)?; + upsert_setting_value(&mut global_settings.settings, key, stored) + }; + + if changed { + global_settings.revision = global_settings.revision.wrapping_add(1); + save_global_settings(state.store.as_ref(), &global_settings).await?; + } + + return Ok(Response::new(UpdateConfigResponse { + version: 0, + policy_hash: String::new(), + settings_revision: global_settings.revision, + deleted: req.delete_setting && changed, + })); } - crate::auth::guard::ensure_sandbox_scope(principal, sandbox_id)?; - state + if req.name.is_empty() { + return Err(Status::invalid_argument( + "name is required for sandbox-scoped updates", + )); + } + + // Resolve sandbox by name. + let sandbox = state .store - .get_message::(sandbox_id) + .get_message_by_name::(&req.name) .await .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? .ok_or_else(|| Status::not_found("sandbox not found"))?; - *validated_sandbox_id = Some(sandbox_id.to_string()); - Ok(()) -} + let sandbox_id = sandbox.object_id().to_string(); -// --------------------------------------------------------------------------- -// Draft policy recommendation handlers -// --------------------------------------------------------------------------- + if has_setting { + let _settings_guard = state.settings_mutex.lock().await; -pub(super) async fn handle_submit_policy_analysis( - state: &Arc, - request: Request, -) -> Result, Status> { - let principal = request - .extensions() - .get::() - .cloned() - .ok_or_else(|| Status::unauthenticated("missing principal"))?; - let req = request.into_inner(); - if req.name.is_empty() { - return Err(Status::invalid_argument("name is required")); - } + if key == POLICY_SETTING_KEY { + return Err(Status::invalid_argument( + "reserved key 'policy' must be set via policy commands", + )); + } - let sandbox = - resolve_sandbox_by_name_for_principal(state.store.as_ref(), &principal, &req.name).await?; - let sandbox_id = sandbox.object_id().to_string(); + let global_settings = load_global_settings(state.store.as_ref()).await?; + let globally_managed = global_settings.settings.contains_key(key); - let current_version = state - .store - .get_draft_version(&sandbox_id) - .await - .map_err(|e| Status::internal(format!("get draft version failed: {e}")))?; - let draft_version = current_version + 1; + if req.delete_setting { + if globally_managed { + return Err(Status::failed_precondition(format!( + "setting '{key}' is managed globally; delete the global setting first" + ))); + } - let mut accepted: u32 = 0; - let mut rejected: u32 = 0; - let mut rejection_reasons: Vec = Vec::new(); - let mut accepted_chunk_ids: Vec = Vec::new(); + let mut sandbox_settings = + load_sandbox_settings(state.store.as_ref(), sandbox.object_name()).await?; + let removed = sandbox_settings.settings.remove(key).is_some(); + if removed { + sandbox_settings.revision = sandbox_settings.revision.wrapping_add(1); + save_sandbox_settings( + state.store.as_ref(), + sandbox.object_name(), + &sandbox_settings, + ) + .await?; + } - for chunk in &req.proposed_chunks { - if chunk.rule_name.is_empty() { - rejected += 1; - rejection_reasons.push("chunk missing rule_name".to_string()); - continue; + return Ok(Response::new(UpdateConfigResponse { + version: 0, + policy_hash: String::new(), + settings_revision: sandbox_settings.revision, + deleted: removed, + })); } - if chunk.proposed_rule.is_none() { - rejected += 1; - rejection_reasons.push(format!("chunk '{}' missing proposed_rule", chunk.rule_name)); - continue; + + if globally_managed { + return Err(Status::failed_precondition(format!( + "setting '{key}' is managed globally; delete the global setting before sandbox update" + ))); } - let now_ms = current_time_ms(); - let proposed_rule_bytes = chunk - .proposed_rule + let setting = req + .setting_value .as_ref() - .map(Message::encode_to_vec) - .unwrap_or_default(); + .ok_or_else(|| Status::invalid_argument("setting_value is required"))?; + let stored = proto_setting_to_stored(key, setting)?; - let rule_ref = chunk.proposed_rule.as_ref(); - let (ep_host, ep_port) = rule_ref - .and_then(|r| r.endpoints.first()) - .map(|ep| (ep.host.to_lowercase(), ep.port as i32)) - .unwrap_or_default(); - let ep_binary = rule_ref - .and_then(|r| r.binaries.first()) - .map(|b| b.path.clone()) - .unwrap_or_default(); + let mut sandbox_settings = + load_sandbox_settings(state.store.as_ref(), sandbox.object_name()).await?; + let changed = upsert_setting_value(&mut sandbox_settings.settings, key, stored); + if changed { + sandbox_settings.revision = sandbox_settings.revision.wrapping_add(1); + save_sandbox_settings( + state.store.as_ref(), + sandbox.object_name(), + &sandbox_settings, + ) + .await?; + } - let record = DraftChunkRecord { - // The handler proposes an id; the store may swap it for an - // existing row's id on dedup. Always trust `effective_id` for - // anything user-facing. - id: uuid::Uuid::new_v4().to_string(), - sandbox_id: sandbox_id.clone(), - draft_version, - status: "pending".to_string(), - rule_name: chunk.rule_name.clone(), - proposed_rule: proposed_rule_bytes, - rationale: chunk.rationale.clone(), - security_notes: generate_security_notes( - &ep_host, - u16::try_from(ep_port as u32).unwrap_or(0), - ), - confidence: f64::from(chunk.confidence.clamp(0.0, 1.0)), - created_at_ms: now_ms, - decided_at_ms: None, - host: ep_host, - port: ep_port, - binary: ep_binary, - hit_count: chunk.hit_count.clamp(1, 100), - first_seen_ms: if chunk.first_seen_ms > 0 { - chunk.first_seen_ms - } else { - now_ms - }, - last_seen_ms: if chunk.last_seen_ms > 0 { - chunk.last_seen_ms - } else { - now_ms - }, - validation_result: String::new(), - rejection_reason: String::new(), - }; - // Mechanistic mode dedups N denials targeting the same endpoint - // into one chunk. All other modes (agent-authored proposals, future - // modes) submit each chunk as a distinct row — the redraft loop - // relies on it, and the conservative default for an unknown mode - // is to keep the proposal rather than silently fold it away. - let dedup_key = matches!(req.analysis_mode.as_str(), "mechanistic") - .then(|| crate::policy_store::observation_dedup_key(&record)); - let effective_id = state - .store - .put_draft_chunk(&record, dedup_key.as_deref()) - .await - .map_err(|e| Status::internal(format!("persist draft chunk failed: {e}")))?; - accepted += 1; - accepted_chunk_ids.push(effective_id); + return Ok(Response::new(UpdateConfigResponse { + version: 0, + policy_hash: String::new(), + settings_revision: sandbox_settings.revision, + deleted: false, + })); } - state.sandbox_watch_bus.notify(&sandbox_id); + if has_merge_ops { + let global_settings = load_global_settings(state.store.as_ref()).await?; + if global_settings.settings.contains_key(POLICY_SETTING_KEY) { + return Err(Status::failed_precondition( + "policy is managed globally; delete global policy before sandbox policy update", + )); + } - info!( - sandbox_id = %sandbox_id, - accepted = accepted, - rejected = rejected, - draft_version = draft_version, - summaries = req.summaries.len(), - "SubmitPolicyAnalysis: persisted draft chunks" - ); + let spec = sandbox + .spec + .as_ref() + .ok_or_else(|| Status::internal("sandbox has no spec"))?; + let merge_ops = parse_merge_operations(&req.merge_operations)?; + validate_merge_operations_for_server(&merge_ops)?; + let (version, hash) = apply_merge_operations_with_retry( + state.store.as_ref(), + &sandbox_id, + spec.policy.as_ref(), + &merge_ops, + ) + .await?; - Ok(Response::new(SubmitPolicyAnalysisResponse { - accepted_chunks: accepted, - rejected_chunks: rejected, - rejection_reasons, - accepted_chunk_ids, - })) -} + state.sandbox_watch_bus.notify(&sandbox_id); + emit_gateway_policy_audit_log( + &sandbox_id, + sandbox.object_name(), + "merged", + format!( + "gateway merged {} incremental policy operation(s)", + merge_ops.len() + ), + version, + &hash, + ); + for operation in &merge_ops { + emit_gateway_policy_audit_log( + &sandbox_id, + sandbox.object_name(), + "merged", + format!( + "gateway merged incremental policy op: {}", + summarize_cli_policy_merge_op(operation) + ), + version, + &hash, + ); + } + info!( + sandbox_id = %sandbox_id, + version, + policy_hash = %hash, + operation_count = merge_ops.len(), + "UpdateConfig: merged incremental policy operations" + ); -pub(super) async fn handle_get_draft_policy( - state: &Arc, - request: Request, -) -> Result, Status> { - let principal = request - .extensions() - .get::() - .cloned() - .ok_or_else(|| Status::unauthenticated("missing principal"))?; - let req = request.into_inner(); - if req.name.is_empty() { - return Err(Status::invalid_argument("name is required")); + return Ok(Response::new(UpdateConfigResponse { + version: u32::try_from(version).unwrap_or(0), + policy_hash: hash, + settings_revision: 0, + deleted: false, + })); } - let sandbox = - resolve_sandbox_by_name_for_principal(state.store.as_ref(), &principal, &req.name).await?; - let sandbox_id = sandbox.object_id().to_string(); + // Sandbox-scoped policy update. + let mut new_policy = req + .policy + .ok_or_else(|| Status::invalid_argument("policy is required"))?; - let status_filter = if req.status_filter.is_empty() { - None + let global_settings = load_global_settings(state.store.as_ref()).await?; + if global_settings.settings.contains_key(POLICY_SETTING_KEY) { + return Err(Status::failed_precondition( + "policy is managed globally; delete global policy before sandbox policy update", + )); + } + + let spec = sandbox + .spec + .as_ref() + .ok_or_else(|| Status::internal("sandbox has no spec"))?; + + openshell_policy::ensure_sandbox_process_identity(&mut new_policy); + + if let Some(baseline_policy) = spec.policy.as_ref() { + validate_static_fields_unchanged(baseline_policy, &new_policy)?; + validate_policy_safety(&new_policy)?; } else { - Some(req.status_filter.as_str()) - }; + // Backfill spec.policy using CAS (first-time policy discovery) + let _sandbox_sync_guard = state.compute.sandbox_sync_guard().await; + let sandbox_id = sandbox.object_id().to_string(); + let new_policy_clone = new_policy.clone(); + state + .store + .update_message_cas::( + &sandbox_id, + req.expected_resource_version, + |sandbox| { + if let Some(ref mut spec) = sandbox.spec + && spec.policy.is_none() + { + spec.policy = Some(new_policy_clone.clone()); + } + }, + ) + .await + .map_err(|e| super::persistence_error_to_status(e, "backfill spec.policy"))?; + info!( + sandbox_id = %sandbox_id, + "UpdateConfig: backfilled spec.policy from sandbox-discovered policy" + ); + } - let records = state + let latest = state .store - .list_draft_chunks(&sandbox_id, status_filter) + .get_latest_policy(&sandbox_id) .await - .map_err(|e| Status::internal(format!("list draft chunks failed: {e}")))?; + .map_err(|e| Status::internal(format!("fetch latest policy failed: {e}")))?; - let draft_version = state + let payload = new_policy.encode_to_vec(); + let hash = deterministic_policy_hash(&new_policy); + + if let Some(ref current) = latest + && current.policy_hash == hash + { + return Ok(Response::new(UpdateConfigResponse { + version: u32::try_from(current.version).unwrap_or(0), + policy_hash: hash, + settings_revision: 0, + deleted: false, + })); + } + + let next_version = latest.map_or(1, |r| r.version + 1); + let policy_id = uuid::Uuid::new_v4().to_string(); + + state .store - .get_draft_version(&sandbox_id) + .put_policy_revision(&policy_id, &sandbox_id, next_version, &payload, &hash) .await - .map_err(|e| Status::internal(format!("get draft version failed: {e}")))?; + .map_err(|e| Status::internal(format!("persist policy revision failed: {e}")))?; - let chunks: Vec = records - .into_iter() - .map(|r| draft_chunk_record_to_proto(&r)) - .collect::, _>>()?; + let _ = state + .store + .supersede_older_policies(&sandbox_id, next_version) + .await; - let last_analyzed_at_ms = chunks.iter().map(|c| c.created_at_ms).max().unwrap_or(0); + state.sandbox_watch_bus.notify(&sandbox_id); - debug!( + info!( sandbox_id = %sandbox_id, - chunk_count = chunks.len(), - draft_version = draft_version, - "GetDraftPolicy: served draft chunks" + version = next_version, + policy_hash = %hash, + "UpdateConfig: new policy version persisted" ); - Ok(Response::new(GetDraftPolicyResponse { - chunks, - rolling_summary: String::new(), - draft_version: u64::try_from(draft_version).unwrap_or(0), - last_analyzed_at_ms, + Ok(Response::new(UpdateConfigResponse { + version: u32::try_from(next_version).unwrap_or(0), + policy_hash: hash, + settings_revision: 0, + deleted: false, })) } -pub(super) async fn handle_approve_draft_chunk( +// --------------------------------------------------------------------------- +// Policy status handlers +// --------------------------------------------------------------------------- + +pub(super) async fn handle_get_sandbox_policy_status( state: &Arc, - request: Request, -) -> Result, Status> { + request: Request, +) -> Result, Status> { let req = request.into_inner(); - if req.name.is_empty() { - return Err(Status::invalid_argument("name is required")); - } - if req.chunk_id.is_empty() { - return Err(Status::invalid_argument("chunk_id is required")); - } - require_no_global_policy(state).await?; + let (policy_id, active_version) = if req.global { + (GLOBAL_POLICY_SANDBOX_ID.to_string(), 0_u32) + } else { + if req.name.is_empty() { + return Err(Status::invalid_argument("name is required")); + } + let sandbox = state + .store + .get_message_by_name::(&req.name) + .await + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? + .ok_or_else(|| Status::not_found("sandbox not found"))?; + ( + sandbox.object_id().to_string(), + sandbox.current_policy_version(), + ) + }; - let sandbox = state - .store - .get_message_by_name::(&req.name) - .await - .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? - .ok_or_else(|| Status::not_found("sandbox not found"))?; - let sandbox_id = sandbox.object_id().to_string(); + let record = if req.version == 0 { + state + .store + .get_latest_policy(&policy_id) + .await + .map_err(|e| Status::internal(format!("fetch policy failed: {e}")))? + } else { + state + .store + .get_policy_by_version(&policy_id, i64::from(req.version)) + .await + .map_err(|e| Status::internal(format!("fetch policy failed: {e}")))? + }; - let chunk = state - .store - .get_draft_chunk(&req.chunk_id) - .await - .map_err(|e| Status::internal(format!("fetch chunk failed: {e}")))? - .ok_or_else(|| Status::not_found("chunk not found"))?; - ensure_chunk_belongs_to_sandbox(&chunk, &sandbox_id)?; + let not_found_msg = if req.global { + "no global policy revision found" + } else { + "no policy revision found for this sandbox" + }; + let record = record.ok_or_else(|| Status::not_found(not_found_msg))?; - if chunk.status != "pending" && chunk.status != "rejected" { - return Err(Status::failed_precondition(format!( - "chunk status is '{}', expected 'pending' or 'rejected'", - chunk.status - ))); - } + Ok(Response::new(GetSandboxPolicyStatusResponse { + revision: Some(policy_record_to_revision(&record, true)), + active_version, + })) +} - info!( - sandbox_id = %sandbox_id, - chunk_id = %req.chunk_id, - rule_name = %chunk.rule_name, - host = %chunk.host, - port = chunk.port, - hit_count = chunk.hit_count, - prev_status = %chunk.status, - "ApproveDraftChunk: merging rule into active policy" - ); +pub(super) async fn handle_list_sandbox_policies( + state: &Arc, + request: Request, +) -> Result, Status> { + let req = request.into_inner(); - let (version, hash) = - merge_chunk_into_policy(state.store.as_ref(), &sandbox_id, &chunk).await?; - let chunk_summary = summarize_draft_chunk_rule(&chunk)?; + let policy_id = if req.global { + GLOBAL_POLICY_SANDBOX_ID.to_string() + } else { + if req.name.is_empty() { + return Err(Status::invalid_argument("name is required")); + } + let sandbox = state + .store + .get_message_by_name::(&req.name) + .await + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? + .ok_or_else(|| Status::not_found("sandbox not found"))?; + sandbox.object_id().to_string() + }; - let now_ms = current_time_ms(); - state + let limit = clamp_limit(req.limit, 50, MAX_PAGE_SIZE); + let records = state .store - .update_draft_chunk_status(&req.chunk_id, "approved", Some(now_ms), None) + .list_policies(&policy_id, limit, req.offset) .await - .map_err(|e| Status::internal(format!("update chunk status failed: {e}")))?; - - state.sandbox_watch_bus.notify(&sandbox_id); - emit_gateway_policy_audit_log( - &sandbox_id, - sandbox.object_name(), - "approved", - format!( - "gateway approved draft chunk {}: {chunk_summary}", - req.chunk_id - ), - version, - &hash, - ); + .map_err(|e| Status::internal(format!("list policies failed: {e}")))?; - info!( - sandbox_id = %sandbox_id, - chunk_id = %req.chunk_id, - rule_name = %chunk.rule_name, - version = version, - policy_hash = %hash, - "ApproveDraftChunk: rule merged successfully" - ); + let revisions = records + .iter() + .map(|r| policy_record_to_revision(r, false)) + .collect(); - Ok(Response::new(ApproveDraftChunkResponse { - policy_version: u32::try_from(version).unwrap_or(0), - policy_hash: hash, - })) + Ok(Response::new(ListSandboxPoliciesResponse { revisions })) } -pub(super) async fn handle_reject_draft_chunk( +pub(super) async fn handle_report_policy_status( state: &Arc, - request: Request, -) -> Result, Status> { + request: Request, +) -> Result, Status> { + let sandbox_id = request.get_ref().sandbox_id.clone(); + crate::auth::guard::enforce_sandbox_scope(&request, &sandbox_id)?; let req = request.into_inner(); - if req.name.is_empty() { - return Err(Status::invalid_argument("name is required")); + if req.sandbox_id.is_empty() { + return Err(Status::invalid_argument("sandbox_id is required")); } - if req.chunk_id.is_empty() { - return Err(Status::invalid_argument("chunk_id is required")); + if req.version == 0 { + return Err(Status::invalid_argument("version is required")); } - let sandbox = state - .store - .get_message_by_name::(&req.name) - .await - .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? - .ok_or_else(|| Status::not_found("sandbox not found"))?; - let sandbox_id = sandbox.object_id().to_string(); + let version = i64::from(req.version); + let status_str = match PolicyStatus::try_from(req.status) { + Ok(PolicyStatus::Loaded) => "loaded", + Ok(PolicyStatus::Failed) => "failed", + _ => return Err(Status::invalid_argument("status must be LOADED or FAILED")), + }; - let chunk = state + let loaded_at_ms = if status_str == "loaded" { + Some(current_time_ms()) + } else { + None + }; + + let load_error = if status_str == "failed" && !req.load_error.is_empty() { + Some(req.load_error.as_str()) + } else { + None + }; + + let updated = state .store - .get_draft_chunk(&req.chunk_id) + .update_policy_status( + &req.sandbox_id, + version, + status_str, + load_error, + loaded_at_ms, + ) .await - .map_err(|e| Status::internal(format!("fetch chunk failed: {e}")))? - .ok_or_else(|| Status::not_found("chunk not found"))?; - ensure_chunk_belongs_to_sandbox(&chunk, &sandbox_id)?; + .map_err(|e| Status::internal(format!("update policy status failed: {e}")))?; - if chunk.status != "pending" && chunk.status != "approved" { - return Err(Status::failed_precondition(format!( - "chunk status is '{}', expected 'pending' or 'approved'", - chunk.status - ))); + if !updated { + return Err(Status::not_found("policy revision not found")); } - let was_approved = chunk.status == "approved"; + if status_str == "loaded" { + let _ = state + .store + .supersede_older_policies(&req.sandbox_id, version) + .await; + + // Update current_policy_version using CAS + // TODO: Accept expected_version from UpdateConfigRequest for proper client-driven CAS + let _sandbox_sync_guard = state.compute.sandbox_sync_guard().await; + let version_to_set = req.version; + state + .store + .update_message_cas::(&req.sandbox_id, 0, |sandbox| { + sandbox.set_current_policy_version(version_to_set); + }) + .await + .map_err(|e| super::persistence_error_to_status(e, "update current_policy_version"))?; + + state.sandbox_watch_bus.notify(&req.sandbox_id); + } info!( - sandbox_id = %sandbox_id, - chunk_id = %req.chunk_id, - rule_name = %chunk.rule_name, - host = %chunk.host, - port = chunk.port, - reason = %req.reason, - prev_status = %chunk.status, - "RejectDraftChunk: rejecting chunk" + sandbox_id = %req.sandbox_id, + version = req.version, + status = %status_str, + "ReportPolicyStatus: sandbox reported policy load result" ); - if was_approved { - require_no_global_policy(state).await?; - let (version, hash) = remove_chunk_from_policy(state, &sandbox_id, &chunk).await?; - emit_gateway_policy_audit_log( - &sandbox_id, - sandbox.object_name(), - "removed", - format!( - "gateway removed previously approved draft chunk {}: remove-binary {} {}", - req.chunk_id, chunk.rule_name, chunk.binary - ), - version, - &hash, - ); - } + Ok(Response::new(ReportPolicyStatusResponse {})) +} - let now_ms = current_time_ms(); - // Persist the reviewer's free-form `reason` into the chunk's - // `rejection_reason` field so the in-sandbox agent can read it back via - // GetDraftPolicy / policy.local and revise the proposal. - let persisted_reason = if req.reason.is_empty() { - None - } else { - Some(req.reason.as_str()) - }; - state - .store - .update_draft_chunk_status(&req.chunk_id, "rejected", Some(now_ms), persisted_reason) - .await - .map_err(|e| Status::internal(format!("update chunk status failed: {e}")))?; - - state.sandbox_watch_bus.notify(&sandbox_id); - - Ok(Response::new(RejectDraftChunkResponse {})) -} +// --------------------------------------------------------------------------- +// Sandbox logs handlers +// --------------------------------------------------------------------------- -pub(super) async fn handle_approve_all_draft_chunks( +#[allow(clippy::unused_async)] // Must be async to match the trait signature +pub(super) async fn handle_get_sandbox_logs( state: &Arc, - request: Request, -) -> Result, Status> { + request: Request, +) -> Result, Status> { let req = request.into_inner(); - if req.name.is_empty() { - return Err(Status::invalid_argument("name is required")); + if req.sandbox_id.is_empty() { + return Err(Status::invalid_argument("sandbox_id is required")); } - require_no_global_policy(state).await?; - - let sandbox = state - .store - .get_message_by_name::(&req.name) - .await - .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? - .ok_or_else(|| Status::not_found("sandbox not found"))?; - let sandbox_id = sandbox.object_id().to_string(); + let lines = if req.lines == 0 { 2000 } else { req.lines }; + let tail = state.tracing_log_bus.tail(&req.sandbox_id, lines as usize); - let pending_chunks = state - .store - .list_draft_chunks(&sandbox_id, Some("pending")) - .await - .map_err(|e| Status::internal(format!("list draft chunks failed: {e}")))?; + let buffer_total = tail.len() as u32; - if pending_chunks.is_empty() { - return Err(Status::failed_precondition("no pending chunks to approve")); - } + let logs: Vec = tail + .into_iter() + .filter_map(|evt| { + if let Some(openshell_core::proto::sandbox_stream_event::Payload::Log(log)) = + evt.payload + { + if req.since_ms > 0 && log.timestamp_ms < req.since_ms { + return None; + } + if !req.sources.is_empty() && !source_matches(&log.source, &req.sources) { + return None; + } + if !level_matches(&log.level, &req.min_level) { + return None; + } + Some(log) + } else { + None + } + }) + .collect(); - info!( - sandbox_id = %sandbox_id, - pending_count = pending_chunks.len(), - include_security_flagged = req.include_security_flagged, - "ApproveAllDraftChunks: starting bulk approval" - ); + Ok(Response::new(GetSandboxLogsResponse { logs, buffer_total })) +} - let mut chunks_approved: u32 = 0; - let mut chunks_skipped: u32 = 0; - let mut last_version: i64 = 0; - let mut last_hash = String::new(); +pub(super) async fn handle_push_sandbox_logs( + state: &Arc, + request: Request>, +) -> Result, Status> { + let principal = request + .extensions() + .get::() + .cloned() + .ok_or_else(|| Status::unauthenticated("missing principal"))?; + let mut stream = request.into_inner(); + let mut validated_sandbox_id = None; - for chunk in &pending_chunks { - if !req.include_security_flagged && !chunk.security_notes.is_empty() { - info!( - sandbox_id = %sandbox_id, - chunk_id = %chunk.id, - rule_name = %chunk.rule_name, - security_notes = %chunk.security_notes, - "ApproveAllDraftChunks: skipping security-flagged chunk" - ); - chunks_skipped += 1; + while let Some(batch) = stream + .message() + .await + .map_err(|e| Status::internal(format!("stream error: {e}")))? + { + if batch.sandbox_id.is_empty() { continue; } - info!( - sandbox_id = %sandbox_id, - chunk_id = %chunk.id, - rule_name = %chunk.rule_name, - host = %chunk.host, - port = chunk.port, - "ApproveAllDraftChunks: merging chunk" - ); - - let (version, hash) = - merge_chunk_into_policy(state.store.as_ref(), &sandbox_id, chunk).await?; - last_version = version; - last_hash = hash; - let chunk_summary = summarize_draft_chunk_rule(chunk)?; - - let now_ms = current_time_ms(); - state - .store - .update_draft_chunk_status(&chunk.id, "approved", Some(now_ms), None) - .await - .map_err(|e| Status::internal(format!("update chunk status failed: {e}")))?; + ensure_log_stream_sandbox_scope( + state, + &principal, + &batch.sandbox_id, + &mut validated_sandbox_id, + ) + .await?; - emit_gateway_policy_audit_log( - &sandbox_id, - sandbox.object_name(), - "approved", - format!("gateway approved draft chunk {}: {chunk_summary}", chunk.id), - version, - &last_hash, - ); - chunks_approved += 1; + for log in batch.logs.into_iter().take(100) { + let mut log = log; + log.source = "sandbox".to_string(); + log.sandbox_id.clone_from(&batch.sandbox_id); + state.tracing_log_bus.publish_external(log); + } } - state.sandbox_watch_bus.notify(&sandbox_id); - emit_gateway_policy_audit_log( - &sandbox_id, - sandbox.object_name(), - "merged", - format!( - "gateway bulk-approved {chunks_approved} draft chunk(s) and skipped {chunks_skipped}" - ), - last_version, - &last_hash, - ); - - info!( - sandbox_id = %sandbox_id, - chunks_approved = chunks_approved, - chunks_skipped = chunks_skipped, - version = last_version, - policy_hash = %last_hash, - "ApproveAllDraftChunks: bulk approval complete" - ); - - Ok(Response::new(ApproveAllDraftChunksResponse { - policy_version: u32::try_from(last_version).unwrap_or(0), - policy_hash: last_hash, - chunks_approved, - chunks_skipped, - })) + Ok(Response::new(PushSandboxLogsResponse {})) } -pub(super) async fn handle_edit_draft_chunk( +async fn ensure_log_stream_sandbox_scope( state: &Arc, - request: Request, -) -> Result, Status> { - let req = request.into_inner(); - if req.name.is_empty() { - return Err(Status::invalid_argument("name is required")); - } - if req.chunk_id.is_empty() { - return Err(Status::invalid_argument("chunk_id is required")); + principal: &Principal, + sandbox_id: &str, + validated_sandbox_id: &mut Option, +) -> Result<(), Status> { + if let Some(validated) = validated_sandbox_id.as_deref() { + if sandbox_id != validated { + return Err(Status::permission_denied( + "log stream sandbox_id changed after validation", + )); + } + return Ok(()); } - let proposed_rule = req - .proposed_rule - .ok_or_else(|| Status::invalid_argument("proposed_rule is required"))?; - let sandbox = state + crate::auth::guard::ensure_sandbox_scope(principal, sandbox_id)?; + state .store - .get_message_by_name::(&req.name) + .get_message::(sandbox_id) .await .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? .ok_or_else(|| Status::not_found("sandbox not found"))?; - let sandbox_id = sandbox.object_id().to_string(); - - let chunk = state - .store - .get_draft_chunk(&req.chunk_id) - .await - .map_err(|e| Status::internal(format!("fetch chunk failed: {e}")))? - .ok_or_else(|| Status::not_found("chunk not found"))?; - ensure_chunk_belongs_to_sandbox(&chunk, &sandbox_id)?; - - if chunk.status != "pending" { - return Err(Status::failed_precondition(format!( - "chunk status is '{}', expected 'pending'", - chunk.status - ))); - } - - let rule_bytes = proposed_rule.encode_to_vec(); - state - .store - .update_draft_chunk_rule(&req.chunk_id, &rule_bytes) - .await - .map_err(|e| Status::internal(format!("update chunk rule failed: {e}")))?; - - info!( - chunk_id = %req.chunk_id, - "EditDraftChunk: proposed rule updated" - ); - - Ok(Response::new(EditDraftChunkResponse {})) + *validated_sandbox_id = Some(sandbox_id.to_string()); + Ok(()) } -pub(super) async fn handle_undo_draft_chunk( +// --------------------------------------------------------------------------- +// Draft policy recommendation handlers +// --------------------------------------------------------------------------- + +pub(super) async fn handle_submit_policy_analysis( state: &Arc, - request: Request, -) -> Result, Status> { + request: Request, +) -> Result, Status> { + let principal = request + .extensions() + .get::() + .cloned() + .ok_or_else(|| Status::unauthenticated("missing principal"))?; let req = request.into_inner(); if req.name.is_empty() { return Err(Status::invalid_argument("name is required")); } - if req.chunk_id.is_empty() { - return Err(Status::invalid_argument("chunk_id is required")); + + let sandbox = + resolve_sandbox_by_name_for_principal(state.store.as_ref(), &principal, &req.name).await?; + let sandbox_id = sandbox.object_id().to_string(); + // `current_policy` is captured ONCE at the top of the batch and frozen + // for every chunk's delta computation, even if an earlier chunk in the + // batch auto-approves and merges. This is intentional v1 behavior: + // multi-chunk batches with overlapping endpoints would otherwise have + // chunk N+1 fail to see chunk N's contribution, which is a degenerate + // case for the common single-chunk submission shape. If real workloads + // surface a problem with batches that interact across chunks, the right + // fix is to recompute baseline after each successful auto-approve. + let current_policy = current_effective_policy_for_sandbox(state, &sandbox, &sandbox_id).await?; + + // Auto-approval is an opt-in behavior, sourced from the settings model + // (sandbox or gateway scope) so it can be flipped on a running sandbox + // and managed fleet-wide. Default (no setting, or any value other than + // exact "auto") preserves OpenShell's default-deny posture: every + // proposal lands in `pending` for a human reviewer. + let (auto_approve_enabled, resolved_from) = + resolve_proposal_approval_mode(state.store.as_ref(), sandbox.object_name()).await?; + + // The credential set is stable across all chunks in this batch, so build + // it once. v1 captures presence only — no scope modeling — so the prover + // can answer "is there a credential in scope for this host?" but not + // "what action class does that credential authorize?" + let provider_names_for_creds: Vec = sandbox + .spec + .as_ref() + .map(|spec| spec.providers.clone()) + .unwrap_or_default(); + let credential_set = + build_credential_set_for_sandbox(state.store.as_ref(), &provider_names_for_creds).await?; + + let current_version = state + .store + .get_draft_version(&sandbox_id) + .await + .map_err(|e| Status::internal(format!("get draft version failed: {e}")))?; + let draft_version = current_version + 1; + + let mut accepted: u32 = 0; + let mut rejected: u32 = 0; + let mut rejection_reasons: Vec = Vec::new(); + let mut accepted_chunk_ids: Vec = Vec::new(); + + for chunk in &req.proposed_chunks { + if chunk.rule_name.is_empty() { + rejected += 1; + rejection_reasons.push("chunk missing rule_name".to_string()); + continue; + } + // `_provider_*` is the reserved namespace for rules synthesized from + // provider profiles during composition. Agent submissions that target + // those keys would merge directly into the provider rule and bypass + // the merge.rs guard that splits agent-authored chunks into their + // own rule so the prover sees their contribution honestly. Reject at + // the entry boundary — the agent never has reason to address a + // provider rule by name. + if chunk.rule_name.starts_with("_provider_") { + rejected += 1; + rejection_reasons.push(format!( + "chunk '{}' uses reserved '_provider_' rule-name prefix", + chunk.rule_name + )); + continue; + } + if chunk.proposed_rule.is_none() { + rejected += 1; + rejection_reasons.push(format!("chunk '{}' missing proposed_rule", chunk.rule_name)); + continue; + } + + let now_ms = current_time_ms(); + let proposed_rule_bytes = chunk + .proposed_rule + .as_ref() + .map(Message::encode_to_vec) + .unwrap_or_default(); + + let rule_ref = chunk.proposed_rule.as_ref(); + let (ep_host, ep_port) = rule_ref + .and_then(|r| r.endpoints.first()) + .map(|ep| (ep.host.to_lowercase(), ep.port as i32)) + .unwrap_or_default(); + let ep_binary = rule_ref + .and_then(|r| r.binaries.first()) + .map(|b| b.path.clone()) + .unwrap_or_default(); + + // The prover runs on every proposal regardless of `analysis_mode`. + // Source provenance (mechanistic vs agent_authored) is preserved in + // OCSF audit fields, but the safety decision is grounded in the + // merged-policy consequence, not the author — proposer-agnostic. + let validation_result = validation_result_for_agent_proposal( + current_policy.clone(), + &chunk.rule_name, + chunk.proposed_rule.as_ref().expect("checked above"), + &credential_set, + ); + + let record = DraftChunkRecord { + // The handler proposes an id; the store may swap it for an + // existing row's id on dedup. Always trust `effective_id` for + // anything user-facing. + id: uuid::Uuid::new_v4().to_string(), + sandbox_id: sandbox_id.clone(), + draft_version, + status: "pending".to_string(), + rule_name: chunk.rule_name.clone(), + proposed_rule: proposed_rule_bytes, + rationale: chunk.rationale.clone(), + security_notes: generate_security_notes( + &ep_host, + u16::try_from(ep_port as u32).unwrap_or(0), + ), + confidence: f64::from(chunk.confidence.clamp(0.0, 1.0)), + created_at_ms: now_ms, + decided_at_ms: None, + host: ep_host, + port: ep_port, + binary: ep_binary, + hit_count: chunk.hit_count.clamp(1, 100), + first_seen_ms: if chunk.first_seen_ms > 0 { + chunk.first_seen_ms + } else { + now_ms + }, + last_seen_ms: if chunk.last_seen_ms > 0 { + chunk.last_seen_ms + } else { + now_ms + }, + validation_result: validation_result.clone(), + rejection_reason: String::new(), + }; + // Mechanistic mode dedups N denials targeting the same endpoint + // into one chunk. All other modes (agent-authored proposals, future + // modes) submit each chunk as a distinct row — the redraft loop + // relies on it, and the conservative default for an unknown mode + // is to keep the proposal rather than silently fold it away. + let dedup_key = matches!(req.analysis_mode.as_str(), "mechanistic") + .then(|| crate::policy_store::observation_dedup_key(&record)); + let effective_id = state + .store + .put_draft_chunk(&record, dedup_key.as_deref()) + .await + .map_err(|e| Status::internal(format!("persist draft chunk failed: {e}")))?; + accepted += 1; + + // Implicit supersede: any other pending chunk for the same + // (host, port, binary) in this sandbox is now stale because this + // newer submission covers the same access decision. Auto-reject the + // older chunks with a clear reason. This is what lets the agent + // refine a mechanistic L4 draft into an L7 narrow proposal without + // any explicit `supersedes_chunk_id` plumbing — the gateway figures + // out the relationship by structural overlap. + supersede_other_pending_chunks_for_endpoint( + state, + &sandbox_id, + &effective_id, + &record.host, + record.port, + &record.binary, + ) + .await; + + // Asymmetric self-reject: if this is a mechanistic proposal that + // arrived AFTER an already-approved agent_authored chunk covered the + // same (host, port, binary), the mechanistic submission is + // redundant — the agent already handled it. Auto-reject so it + // doesn't pile up as approval-queue noise. Agent_authored + // submissions never self-reject; refinement is always allowed. + if req.analysis_mode == "mechanistic" { + self_reject_mechanistic_if_already_covered( + state, + &sandbox_id, + &effective_id, + &record.host, + record.port, + &record.binary, + ) + .await; + } + + // Auto-approval gate (proposer-agnostic, opt-in): only fire when + // BOTH the prover found nothing new in this proposal's delta AND + // the reviewer opted in via the `proposal_approval_mode` setting + // (gateway or sandbox scope). On any failure (merge conflict, + // status update error), the chunk stays pending so a human can + // review — never silently lose a proposal. The `validation_result` + // literal here is the canonical empty-delta verdict; any other + // string means findings or infrastructure error, both of which + // require human attention. + if auto_approve_enabled + && validation_result == "prover: no new findings" + && let Err(err) = auto_approve_chunk( + state, + &sandbox_id, + sandbox.object_name(), + &effective_id, + &req.analysis_mode, + resolved_from, + ) + .await + { + warn!( + chunk_id = %effective_id, + sandbox_id = %sandbox_id, + error = %err, + "auto-approval failed; chunk remains pending for human review" + ); + } + + accepted_chunk_ids.push(effective_id); + } + + state.sandbox_watch_bus.notify(&sandbox_id); + + info!( + sandbox_id = %sandbox_id, + accepted = accepted, + rejected = rejected, + draft_version = draft_version, + summaries = req.summaries.len(), + "SubmitPolicyAnalysis: persisted draft chunks" + ); + + Ok(Response::new(SubmitPolicyAnalysisResponse { + accepted_chunks: accepted, + rejected_chunks: rejected, + rejection_reasons, + accepted_chunk_ids, + })) +} + +pub(super) async fn handle_get_draft_policy( + state: &Arc, + request: Request, +) -> Result, Status> { + let principal = request + .extensions() + .get::() + .cloned() + .ok_or_else(|| Status::unauthenticated("missing principal"))?; + let req = request.into_inner(); + if req.name.is_empty() { + return Err(Status::invalid_argument("name is required")); + } + + let sandbox = + resolve_sandbox_by_name_for_principal(state.store.as_ref(), &principal, &req.name).await?; + let sandbox_id = sandbox.object_id().to_string(); + + let status_filter = if req.status_filter.is_empty() { + None + } else { + Some(req.status_filter.as_str()) + }; + + let records = state + .store + .list_draft_chunks(&sandbox_id, status_filter) + .await + .map_err(|e| Status::internal(format!("list draft chunks failed: {e}")))?; + + let draft_version = state + .store + .get_draft_version(&sandbox_id) + .await + .map_err(|e| Status::internal(format!("get draft version failed: {e}")))?; + + let chunks: Vec = records + .into_iter() + .map(|r| draft_chunk_record_to_proto(&r)) + .collect::, _>>()?; + + let last_analyzed_at_ms = chunks.iter().map(|c| c.created_at_ms).max().unwrap_or(0); + + debug!( + sandbox_id = %sandbox_id, + chunk_count = chunks.len(), + draft_version = draft_version, + "GetDraftPolicy: served draft chunks" + ); + + Ok(Response::new(GetDraftPolicyResponse { + chunks, + rolling_summary: String::new(), + draft_version: u64::try_from(draft_version).unwrap_or(0), + last_analyzed_at_ms, + })) +} + +pub(super) async fn handle_approve_draft_chunk( + state: &Arc, + request: Request, +) -> Result, Status> { + let req = request.into_inner(); + if req.name.is_empty() { + return Err(Status::invalid_argument("name is required")); + } + if req.chunk_id.is_empty() { + return Err(Status::invalid_argument("chunk_id is required")); } + require_no_global_policy(state).await?; + let sandbox = state .store .get_message_by_name::(&req.name) @@ -1982,9 +2406,9 @@ pub(super) async fn handle_undo_draft_chunk( .ok_or_else(|| Status::not_found("chunk not found"))?; ensure_chunk_belongs_to_sandbox(&chunk, &sandbox_id)?; - if chunk.status != "approved" { + if chunk.status != "pending" && chunk.status != "rejected" { return Err(Status::failed_precondition(format!( - "chunk status is '{}', expected 'approved'", + "chunk status is '{}', expected 'pending' or 'rejected'", chunk.status ))); } @@ -1995,17 +2419,19 @@ pub(super) async fn handle_undo_draft_chunk( rule_name = %chunk.rule_name, host = %chunk.host, port = chunk.port, - "UndoDraftChunk: removing rule from active policy" + hit_count = chunk.hit_count, + prev_status = %chunk.status, + "ApproveDraftChunk: merging rule into active policy" ); - let (version, hash) = remove_chunk_from_policy(state, &sandbox_id, &chunk).await?; + let (version, hash) = + merge_chunk_into_policy(state.store.as_ref(), &sandbox_id, &chunk).await?; + let chunk_summary = summarize_draft_chunk_rule(&chunk)?; - // Clear any prior rejection_reason on the way back to "pending" so an - // agent reading the chunk via policy.local cannot see a stale guidance - // string left over from a previous reject → undo round. + let now_ms = current_time_ms(); state .store - .update_draft_chunk_status(&req.chunk_id, "pending", None, Some("")) + .update_draft_chunk_status(&req.chunk_id, "approved", Some(now_ms), None) .await .map_err(|e| Status::internal(format!("update chunk status failed: {e}")))?; @@ -2013,10 +2439,10 @@ pub(super) async fn handle_undo_draft_chunk( emit_gateway_policy_audit_log( &sandbox_id, sandbox.object_name(), - "removed", + "approved", format!( - "gateway reverted approved draft chunk {}: remove-binary {} {}", - req.chunk_id, chunk.rule_name, chunk.binary + "gateway approved draft chunk {}: {chunk_summary}", + req.chunk_id ), version, &hash, @@ -2028,23 +2454,26 @@ pub(super) async fn handle_undo_draft_chunk( rule_name = %chunk.rule_name, version = version, policy_hash = %hash, - "UndoDraftChunk: rule removed, chunk reverted to pending" + "ApproveDraftChunk: rule merged successfully" ); - Ok(Response::new(UndoDraftChunkResponse { + Ok(Response::new(ApproveDraftChunkResponse { policy_version: u32::try_from(version).unwrap_or(0), policy_hash: hash, })) } -pub(super) async fn handle_clear_draft_chunks( +pub(super) async fn handle_reject_draft_chunk( state: &Arc, - request: Request, -) -> Result, Status> { + request: Request, +) -> Result, Status> { let req = request.into_inner(); if req.name.is_empty() { return Err(Status::invalid_argument("name is required")); } + if req.chunk_id.is_empty() { + return Err(Status::invalid_argument("chunk_id is required")); + } let sandbox = state .store @@ -2054,34 +2483,81 @@ pub(super) async fn handle_clear_draft_chunks( .ok_or_else(|| Status::not_found("sandbox not found"))?; let sandbox_id = sandbox.object_id().to_string(); - let deleted = state + let chunk = state .store - .delete_draft_chunks(&sandbox_id, "pending") + .get_draft_chunk(&req.chunk_id) .await - .map_err(|e| Status::internal(format!("delete draft chunks failed: {e}")))?; + .map_err(|e| Status::internal(format!("fetch chunk failed: {e}")))? + .ok_or_else(|| Status::not_found("chunk not found"))?; + ensure_chunk_belongs_to_sandbox(&chunk, &sandbox_id)?; - state.sandbox_watch_bus.notify(&sandbox_id); + if chunk.status != "pending" && chunk.status != "approved" { + return Err(Status::failed_precondition(format!( + "chunk status is '{}', expected 'pending' or 'approved'", + chunk.status + ))); + } + + let was_approved = chunk.status == "approved"; info!( sandbox_id = %sandbox_id, - chunks_cleared = deleted, - "ClearDraftChunks: pending chunks cleared" + chunk_id = %req.chunk_id, + rule_name = %chunk.rule_name, + host = %chunk.host, + port = chunk.port, + reason = %req.reason, + prev_status = %chunk.status, + "RejectDraftChunk: rejecting chunk" ); - Ok(Response::new(ClearDraftChunksResponse { - chunks_cleared: u32::try_from(deleted).unwrap_or(0), - })) + if was_approved { + require_no_global_policy(state).await?; + let (version, hash) = remove_chunk_from_policy(state, &sandbox_id, &chunk).await?; + emit_gateway_policy_audit_log( + &sandbox_id, + sandbox.object_name(), + "removed", + format!( + "gateway removed previously approved draft chunk {}: remove-binary {} {}", + req.chunk_id, chunk.rule_name, chunk.binary + ), + version, + &hash, + ); + } + + let now_ms = current_time_ms(); + // Persist the reviewer's free-form `reason` into the chunk's + // `rejection_reason` field so the in-sandbox agent can read it back via + // GetDraftPolicy / policy.local and revise the proposal. + let persisted_reason = if req.reason.is_empty() { + None + } else { + Some(req.reason.as_str()) + }; + state + .store + .update_draft_chunk_status(&req.chunk_id, "rejected", Some(now_ms), persisted_reason) + .await + .map_err(|e| Status::internal(format!("update chunk status failed: {e}")))?; + + state.sandbox_watch_bus.notify(&sandbox_id); + + Ok(Response::new(RejectDraftChunkResponse {})) } -pub(super) async fn handle_get_draft_history( +pub(super) async fn handle_approve_all_draft_chunks( state: &Arc, - request: Request, -) -> Result, Status> { + request: Request, +) -> Result, Status> { let req = request.into_inner(); if req.name.is_empty() { return Err(Status::invalid_argument("name is required")); } + require_no_global_policy(state).await?; + let sandbox = state .store .get_message_by_name::(&req.name) @@ -2090,85 +2566,371 @@ pub(super) async fn handle_get_draft_history( .ok_or_else(|| Status::not_found("sandbox not found"))?; let sandbox_id = sandbox.object_id().to_string(); - let all_chunks = state + let pending_chunks = state .store - .list_draft_chunks(&sandbox_id, None) + .list_draft_chunks(&sandbox_id, Some("pending")) .await .map_err(|e| Status::internal(format!("list draft chunks failed: {e}")))?; - let mut entries: Vec = Vec::new(); + if pending_chunks.is_empty() { + return Err(Status::failed_precondition("no pending chunks to approve")); + } - for chunk in &all_chunks { - entries.push(DraftHistoryEntry { - timestamp_ms: chunk.created_at_ms, - event_type: "proposed".to_string(), - description: format!( - "Rule '{}' proposed (confidence: {:.0}%)", - chunk.rule_name, - chunk.confidence * 100.0 - ), - chunk_id: chunk.id.clone(), - }); + info!( + sandbox_id = %sandbox_id, + pending_count = pending_chunks.len(), + include_security_flagged = req.include_security_flagged, + "ApproveAllDraftChunks: starting bulk approval" + ); - if let Some(decided_at) = chunk.decided_at_ms { - entries.push(DraftHistoryEntry { - timestamp_ms: decided_at, - event_type: chunk.status.clone(), - description: format!("Rule '{}' {}", chunk.rule_name, chunk.status), - chunk_id: chunk.id.clone(), - }); + let mut chunks_approved: u32 = 0; + let mut chunks_skipped: u32 = 0; + let mut last_version: i64 = 0; + let mut last_hash = String::new(); + + for chunk in &pending_chunks { + if !req.include_security_flagged && !chunk.security_notes.is_empty() { + info!( + sandbox_id = %sandbox_id, + chunk_id = %chunk.id, + rule_name = %chunk.rule_name, + security_notes = %chunk.security_notes, + "ApproveAllDraftChunks: skipping security-flagged chunk" + ); + chunks_skipped += 1; + continue; } + + info!( + sandbox_id = %sandbox_id, + chunk_id = %chunk.id, + rule_name = %chunk.rule_name, + host = %chunk.host, + port = chunk.port, + "ApproveAllDraftChunks: merging chunk" + ); + + let (version, hash) = + merge_chunk_into_policy(state.store.as_ref(), &sandbox_id, chunk).await?; + last_version = version; + last_hash = hash; + let chunk_summary = summarize_draft_chunk_rule(chunk)?; + + let now_ms = current_time_ms(); + state + .store + .update_draft_chunk_status(&chunk.id, "approved", Some(now_ms), None) + .await + .map_err(|e| Status::internal(format!("update chunk status failed: {e}")))?; + + emit_gateway_policy_audit_log( + &sandbox_id, + sandbox.object_name(), + "approved", + format!("gateway approved draft chunk {}: {chunk_summary}", chunk.id), + version, + &last_hash, + ); + chunks_approved += 1; } - entries.sort_by_key(|e| e.timestamp_ms); + state.sandbox_watch_bus.notify(&sandbox_id); + emit_gateway_policy_audit_log( + &sandbox_id, + sandbox.object_name(), + "merged", + format!( + "gateway bulk-approved {chunks_approved} draft chunk(s) and skipped {chunks_skipped}" + ), + last_version, + &last_hash, + ); - debug!( + info!( sandbox_id = %sandbox_id, - entry_count = entries.len(), - "GetDraftHistory: served draft history" + chunks_approved = chunks_approved, + chunks_skipped = chunks_skipped, + version = last_version, + policy_hash = %last_hash, + "ApproveAllDraftChunks: bulk approval complete" ); - Ok(Response::new(GetDraftHistoryResponse { entries })) + Ok(Response::new(ApproveAllDraftChunksResponse { + policy_version: u32::try_from(last_version).unwrap_or(0), + policy_hash: last_hash, + chunks_approved, + chunks_skipped, + })) } -// --------------------------------------------------------------------------- -// Policy helper functions -// --------------------------------------------------------------------------- - -/// Compute a deterministic SHA-256 hash of a `SandboxPolicy`. -fn deterministic_policy_hash(policy: &ProtoSandboxPolicy) -> String { - let mut hasher = Sha256::new(); - hasher.update(policy.version.to_le_bytes()); - if let Some(fs) = &policy.filesystem { - hasher.update(fs.encode_to_vec()); - } - if let Some(ll) = &policy.landlock { - hasher.update(ll.encode_to_vec()); +pub(super) async fn handle_edit_draft_chunk( + state: &Arc, + request: Request, +) -> Result, Status> { + let req = request.into_inner(); + if req.name.is_empty() { + return Err(Status::invalid_argument("name is required")); } - if let Some(p) = &policy.process { - hasher.update(p.encode_to_vec()); + if req.chunk_id.is_empty() { + return Err(Status::invalid_argument("chunk_id is required")); } - let mut entries: Vec<_> = policy.network_policies.iter().collect(); - entries.sort_by_key(|(k, _)| k.as_str()); - for (key, value) in entries { - hasher.update(key.as_bytes()); - hasher.update(value.encode_to_vec()); + let proposed_rule = req + .proposed_rule + .ok_or_else(|| Status::invalid_argument("proposed_rule is required"))?; + + let sandbox = state + .store + .get_message_by_name::(&req.name) + .await + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? + .ok_or_else(|| Status::not_found("sandbox not found"))?; + let sandbox_id = sandbox.object_id().to_string(); + + let chunk = state + .store + .get_draft_chunk(&req.chunk_id) + .await + .map_err(|e| Status::internal(format!("fetch chunk failed: {e}")))? + .ok_or_else(|| Status::not_found("chunk not found"))?; + ensure_chunk_belongs_to_sandbox(&chunk, &sandbox_id)?; + + if chunk.status != "pending" { + return Err(Status::failed_precondition(format!( + "chunk status is '{}', expected 'pending'", + chunk.status + ))); } - hex::encode(hasher.finalize()) + + let rule_bytes = proposed_rule.encode_to_vec(); + state + .store + .update_draft_chunk_rule(&req.chunk_id, &rule_bytes) + .await + .map_err(|e| Status::internal(format!("update chunk rule failed: {e}")))?; + + info!( + chunk_id = %req.chunk_id, + "EditDraftChunk: proposed rule updated" + ); + + Ok(Response::new(EditDraftChunkResponse {})) } -/// Compute a fingerprint for the effective sandbox configuration. -fn compute_config_revision( - policy: Option<&ProtoSandboxPolicy>, - settings: &HashMap, - policy_source: PolicySource, -) -> u64 { - let mut hasher = Sha256::new(); - hasher.update((policy_source as i32).to_le_bytes()); - if let Some(policy) = policy { - hasher.update(deterministic_policy_hash(policy).as_bytes()); - } - let mut entries: Vec<_> = settings.iter().collect(); +pub(super) async fn handle_undo_draft_chunk( + state: &Arc, + request: Request, +) -> Result, Status> { + let req = request.into_inner(); + if req.name.is_empty() { + return Err(Status::invalid_argument("name is required")); + } + if req.chunk_id.is_empty() { + return Err(Status::invalid_argument("chunk_id is required")); + } + + let sandbox = state + .store + .get_message_by_name::(&req.name) + .await + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? + .ok_or_else(|| Status::not_found("sandbox not found"))?; + let sandbox_id = sandbox.object_id().to_string(); + + let chunk = state + .store + .get_draft_chunk(&req.chunk_id) + .await + .map_err(|e| Status::internal(format!("fetch chunk failed: {e}")))? + .ok_or_else(|| Status::not_found("chunk not found"))?; + ensure_chunk_belongs_to_sandbox(&chunk, &sandbox_id)?; + + if chunk.status != "approved" { + return Err(Status::failed_precondition(format!( + "chunk status is '{}', expected 'approved'", + chunk.status + ))); + } + + info!( + sandbox_id = %sandbox_id, + chunk_id = %req.chunk_id, + rule_name = %chunk.rule_name, + host = %chunk.host, + port = chunk.port, + "UndoDraftChunk: removing rule from active policy" + ); + + let (version, hash) = remove_chunk_from_policy(state, &sandbox_id, &chunk).await?; + + // Clear any prior rejection_reason on the way back to "pending" so an + // agent reading the chunk via policy.local cannot see a stale guidance + // string left over from a previous reject → undo round. + state + .store + .update_draft_chunk_status(&req.chunk_id, "pending", None, Some("")) + .await + .map_err(|e| Status::internal(format!("update chunk status failed: {e}")))?; + + state.sandbox_watch_bus.notify(&sandbox_id); + emit_gateway_policy_audit_log( + &sandbox_id, + sandbox.object_name(), + "removed", + format!( + "gateway reverted approved draft chunk {}: remove-binary {} {}", + req.chunk_id, chunk.rule_name, chunk.binary + ), + version, + &hash, + ); + + info!( + sandbox_id = %sandbox_id, + chunk_id = %req.chunk_id, + rule_name = %chunk.rule_name, + version = version, + policy_hash = %hash, + "UndoDraftChunk: rule removed, chunk reverted to pending" + ); + + Ok(Response::new(UndoDraftChunkResponse { + policy_version: u32::try_from(version).unwrap_or(0), + policy_hash: hash, + })) +} + +pub(super) async fn handle_clear_draft_chunks( + state: &Arc, + request: Request, +) -> Result, Status> { + let req = request.into_inner(); + if req.name.is_empty() { + return Err(Status::invalid_argument("name is required")); + } + + let sandbox = state + .store + .get_message_by_name::(&req.name) + .await + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? + .ok_or_else(|| Status::not_found("sandbox not found"))?; + let sandbox_id = sandbox.object_id().to_string(); + + let deleted = state + .store + .delete_draft_chunks(&sandbox_id, "pending") + .await + .map_err(|e| Status::internal(format!("delete draft chunks failed: {e}")))?; + + state.sandbox_watch_bus.notify(&sandbox_id); + + info!( + sandbox_id = %sandbox_id, + chunks_cleared = deleted, + "ClearDraftChunks: pending chunks cleared" + ); + + Ok(Response::new(ClearDraftChunksResponse { + chunks_cleared: u32::try_from(deleted).unwrap_or(0), + })) +} + +pub(super) async fn handle_get_draft_history( + state: &Arc, + request: Request, +) -> Result, Status> { + let req = request.into_inner(); + if req.name.is_empty() { + return Err(Status::invalid_argument("name is required")); + } + + let sandbox = state + .store + .get_message_by_name::(&req.name) + .await + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? + .ok_or_else(|| Status::not_found("sandbox not found"))?; + let sandbox_id = sandbox.object_id().to_string(); + + let all_chunks = state + .store + .list_draft_chunks(&sandbox_id, None) + .await + .map_err(|e| Status::internal(format!("list draft chunks failed: {e}")))?; + + let mut entries: Vec = Vec::new(); + + for chunk in &all_chunks { + entries.push(DraftHistoryEntry { + timestamp_ms: chunk.created_at_ms, + event_type: "proposed".to_string(), + description: format!( + "Rule '{}' proposed (confidence: {:.0}%)", + chunk.rule_name, + chunk.confidence * 100.0 + ), + chunk_id: chunk.id.clone(), + }); + + if let Some(decided_at) = chunk.decided_at_ms { + entries.push(DraftHistoryEntry { + timestamp_ms: decided_at, + event_type: chunk.status.clone(), + description: format!("Rule '{}' {}", chunk.rule_name, chunk.status), + chunk_id: chunk.id.clone(), + }); + } + } + + entries.sort_by_key(|e| e.timestamp_ms); + + debug!( + sandbox_id = %sandbox_id, + entry_count = entries.len(), + "GetDraftHistory: served draft history" + ); + + Ok(Response::new(GetDraftHistoryResponse { entries })) +} + +// --------------------------------------------------------------------------- +// Policy helper functions +// --------------------------------------------------------------------------- + +/// Compute a deterministic SHA-256 hash of a `SandboxPolicy`. +fn deterministic_policy_hash(policy: &ProtoSandboxPolicy) -> String { + let mut hasher = Sha256::new(); + hasher.update(policy.version.to_le_bytes()); + if let Some(fs) = &policy.filesystem { + hasher.update(fs.encode_to_vec()); + } + if let Some(ll) = &policy.landlock { + hasher.update(ll.encode_to_vec()); + } + if let Some(p) = &policy.process { + hasher.update(p.encode_to_vec()); + } + let mut entries: Vec<_> = policy.network_policies.iter().collect(); + entries.sort_by_key(|(k, _)| k.as_str()); + for (key, value) in entries { + hasher.update(key.as_bytes()); + hasher.update(value.encode_to_vec()); + } + hex::encode(hasher.finalize()) +} + +/// Compute a fingerprint for the effective sandbox configuration. +fn compute_config_revision( + policy: Option<&ProtoSandboxPolicy>, + settings: &HashMap, + policy_source: PolicySource, +) -> u64 { + let mut hasher = Sha256::new(); + hasher.update((policy_source as i32).to_le_bytes()); + if let Some(policy) = policy { + hasher.update(deterministic_policy_hash(policy).as_bytes()); + } + let mut entries: Vec<_> = settings.iter().collect(); entries.sort_by_key(|(k, _)| k.as_str()); for (key, setting) in entries { hasher.update(key.as_bytes()); @@ -2302,30 +3064,44 @@ fn generate_security_notes(host: &str, port: u16) -> String { /// /// This is defense-in-depth: the proxy blocks these at runtime, so /// merging them into the active policy would be silently un-enforceable. +fn validate_host_not_always_blocked(host: &str) -> Result<(), Status> { + use openshell_core::net::{is_always_blocked_ip, is_known_metadata_hostname}; + use std::net::IpAddr; + + let host = host.trim(); + // Check if the host is a literal always-blocked IP. + if let Ok(ip) = host.parse::() + && is_always_blocked_ip(ip) + { + return Err(Status::invalid_argument(format!( + "proposed rule endpoint host '{host}' is an always-blocked address \ + (loopback/link-local/unspecified); the proxy will deny traffic \ + to this destination regardless of policy" + ))); + } + let host_lc = host.to_lowercase(); + if host_lc == "localhost" || host_lc == "localhost." { + return Err(Status::invalid_argument( + "proposed rule endpoint host 'localhost' is always blocked; \ + the proxy will deny traffic to loopback regardless of policy" + .to_string(), + )); + } + if is_known_metadata_hostname(host) { + return Err(Status::invalid_argument(format!( + "proposed rule endpoint host '{host}' is a known cloud metadata hostname; \ + the proxy will deny traffic to this destination regardless of policy" + ))); + } + Ok(()) +} + fn validate_rule_not_always_blocked(rule: &NetworkPolicyRule) -> Result<(), Status> { - use openshell_core::net::{is_always_blocked_ip, is_always_blocked_net}; + use openshell_core::net::is_always_blocked_net; use std::net::IpAddr; for ep in &rule.endpoints { - // Check if the endpoint host is a literal always-blocked IP. - if let Ok(ip) = ep.host.parse::() - && is_always_blocked_ip(ip) - { - return Err(Status::invalid_argument(format!( - "proposed rule endpoint host '{}' is an always-blocked address \ - (loopback/link-local/unspecified); the proxy will deny traffic \ - to this destination regardless of policy", - ep.host - ))); - } - let host_lc = ep.host.to_lowercase(); - if host_lc == "localhost" || host_lc == "localhost." { - return Err(Status::invalid_argument( - "proposed rule endpoint host 'localhost' is always blocked; \ - the proxy will deny traffic to loopback regardless of policy" - .to_string(), - )); - } + validate_host_not_always_blocked(&ep.host)?; // Check allowed_ips entries. for entry in &ep.allowed_ips { @@ -2496,8 +3272,10 @@ fn parse_proto_add_allow_rules( fn validate_merge_operations_for_server(operations: &[PolicyMergeOp]) -> Result<(), Status> { for operation in operations { - if let PolicyMergeOp::AddRule { rule, .. } = operation { - validate_rule_not_always_blocked(rule)?; + match operation { + PolicyMergeOp::AddRule { rule, .. } => validate_rule_not_always_blocked(rule)?, + PolicyMergeOp::AddAllowRules { host, .. } => validate_host_not_always_blocked(host)?, + _ => {} } } Ok(()) @@ -2646,25 +3424,36 @@ async fn remove_chunk_from_policy( // Settings helpers // --------------------------------------------------------------------------- -fn validate_registered_setting_key(key: &str) -> Result { - settings::setting_for_key(key) - .map(|entry| entry.kind) - .ok_or_else(|| { - Status::invalid_argument(format!( - "unknown setting key '{key}'. Allowed keys: {}", - settings::registered_keys_csv() - )) - }) +fn validate_registered_setting_key( + key: &str, +) -> Result<&'static settings::RegisteredSetting, Status> { + settings::setting_for_key(key).ok_or_else(|| { + Status::invalid_argument(format!( + "unknown setting key '{key}'. Allowed keys: {}", + settings::registered_keys_csv() + )) + }) } fn proto_setting_to_stored(key: &str, value: &SettingValue) -> Result { - let expected = validate_registered_setting_key(key)?; + let setting = validate_registered_setting_key(key)?; + let expected = setting.kind; let inner = value .value .as_ref() .ok_or_else(|| Status::invalid_argument("setting_value.value is required"))?; let stored = match (expected, inner) { (SettingValueKind::String, setting_value::Value::StringValue(v)) => { + // Enforce per-key string whitelist at configure time so typos + // (e.g. `proposal_approval_mode=autom`) get rejected here instead + // of silently falling back to the default at runtime. + if let Err(allowed) = setting.validate_string_value(v) { + return Err(Status::invalid_argument(format!( + "setting '{key}' expects one of [{}]; got '{}'", + allowed.join(", "), + v + ))); + } StoredSettingValue::String(v.clone()) } (SettingValueKind::Bool, setting_value::Value::BoolValue(v)) => { @@ -3747,679 +4536,2089 @@ mod tests { ); } - #[tokio::test] - async fn sandbox_config_composition_is_jit_and_does_not_persist_provider_layers() { - let state = test_server_state().await; - enable_providers_v2(&state).await; - state - .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-jit", - "jit", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - vec!["work-github".to_string()], - )) - .await - .unwrap(); + #[tokio::test] + async fn sandbox_config_composition_is_jit_and_does_not_persist_provider_layers() { + let state = test_server_state().await; + enable_providers_v2(&state).await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-jit", + "jit", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + vec!["work-github".to_string()], + )) + .await + .unwrap(); + + let effective_policy = get_sandbox_policy(&state, "sb-jit").await; + assert!( + effective_policy + .network_policies + .contains_key("_provider_work_github") + ); + + let persisted = state + .store + .get_latest_policy("sb-jit") + .await + .unwrap() + .expect("sandbox policy should be lazily backfilled"); + let persisted_policy = ProtoSandboxPolicy::decode(persisted.policy_payload.as_slice()) + .expect("persisted sandbox policy should decode"); + assert!( + persisted_policy + .network_policies + .contains_key("sandbox_only") + ); + assert!( + !persisted_policy + .network_policies + .contains_key("_provider_work_github") + ); + } + + #[tokio::test] + async fn sandbox_config_preserves_overlapping_user_and_provider_rules() { + let state = test_server_state().await; + enable_providers_v2(&state).await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-overlap", + "overlap", + test_policy_with_rule("_provider_work_github", "api.github.com"), + vec!["work-github".to_string()], + )) + .await + .unwrap(); + + let effective_policy = get_sandbox_policy(&state, "sb-overlap").await; + + assert!( + effective_policy + .network_policies + .contains_key("_provider_work_github") + ); + assert!( + effective_policy + .network_policies + .contains_key("_provider_work_github_2") + ); + assert_eq!( + effective_policy + .network_policies + .get("_provider_work_github") + .unwrap() + .endpoints[0] + .host, + "api.github.com" + ); + } + + #[tokio::test] + async fn provider_environment_resolution_is_unchanged_by_providers_v2_setting() { + use openshell_core::proto::GetSandboxProviderEnvironmentRequest; + + let state = test_server_state().await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-provider-env", + "provider-env", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + vec!["work-github".to_string()], + )) + .await + .unwrap(); + + let legacy_env = handle_get_sandbox_provider_environment( + &state, + with_user(Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-provider-env".to_string(), + })), + ) + .await + .unwrap() + .into_inner() + .environment; + + enable_providers_v2(&state).await; + let v2_env = handle_get_sandbox_provider_environment( + &state, + with_user(Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-provider-env".to_string(), + })), + ) + .await + .unwrap() + .into_inner() + .environment; + + assert_eq!(legacy_env, v2_env); + assert_eq!(v2_env.get("GITHUB_TOKEN"), Some(&"ghp-test".to_string())); + } + + #[tokio::test] + async fn provider_env_revision_changes_when_attached_provider_record_changes() { + use openshell_core::proto::GetSandboxProviderEnvironmentRequest; + use std::time::Duration; + + let state = test_server_state().await; + let mut provider = test_provider("work-github", "github"); + state.store.put_message(&provider).await.unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-provider-revision", + "provider-revision", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + vec!["work-github".to_string()], + )) + .await + .unwrap(); + + let first = handle_get_sandbox_provider_environment( + &state, + with_user(Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-provider-revision".to_string(), + })), + ) + .await + .unwrap() + .into_inner(); + + tokio::time::sleep(Duration::from_millis(2)).await; + provider + .credentials + .insert("GITHUB_TOKEN".to_string(), "rotated".to_string()); + state.store.put_message(&provider).await.unwrap(); + + let second = handle_get_sandbox_provider_environment( + &state, + with_user(Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-provider-revision".to_string(), + })), + ) + .await + .unwrap() + .into_inner(); + + assert_ne!( + first.provider_env_revision, second.provider_env_revision, + "provider object updates must trigger sandbox credential refresh" + ); + assert_eq!( + second.environment.get("GITHUB_TOKEN"), + Some(&"rotated".to_string()) + ); + } + + #[tokio::test] + async fn sandbox_config_and_provider_env_follow_attached_provider_lifecycle() { + use crate::grpc::sandbox::{ + handle_attach_sandbox_provider, handle_detach_sandbox_provider, + }; + use openshell_core::proto::{ + AttachSandboxProviderRequest, DetachSandboxProviderRequest, + GetSandboxProviderEnvironmentRequest, + }; + + let state = test_server_state().await; + enable_providers_v2(&state).await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-attach-lifecycle", + "attach-lifecycle", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + Vec::new(), + )) + .await + .unwrap(); + + let baseline_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; + assert!( + !baseline_policy + .network_policies + .contains_key("_provider_work_github") + ); + let baseline_env = handle_get_sandbox_provider_environment( + &state, + with_user(Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-attach-lifecycle".to_string(), + })), + ) + .await + .unwrap() + .into_inner(); + + handle_attach_sandbox_provider( + &state, + with_user(Request::new(AttachSandboxProviderRequest { + sandbox_name: "attach-lifecycle".to_string(), + provider_name: "work-github".to_string(), + expected_resource_version: 0, + })), + ) + .await + .unwrap(); + + let attached_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; + assert!( + attached_policy + .network_policies + .contains_key("_provider_work_github") + ); + + let attached_env = handle_get_sandbox_provider_environment( + &state, + with_user(Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-attach-lifecycle".to_string(), + })), + ) + .await + .unwrap() + .into_inner(); + assert_ne!( + baseline_env.provider_env_revision, + attached_env.provider_env_revision + ); + assert_eq!( + attached_env.environment.get("GITHUB_TOKEN"), + Some(&"ghp-test".to_string()) + ); + + handle_detach_sandbox_provider( + &state, + Request::new(DetachSandboxProviderRequest { + sandbox_name: "attach-lifecycle".to_string(), + provider_name: "work-github".to_string(), + expected_resource_version: 0, + }), + ) + .await + .unwrap(); + + let detached_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; + assert!( + !detached_policy + .network_policies + .contains_key("_provider_work_github") + ); + + let detached_env = handle_get_sandbox_provider_environment( + &state, + with_user(Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-attach-lifecycle".to_string(), + })), + ) + .await + .unwrap() + .into_inner(); + assert_ne!( + attached_env.provider_env_revision, + detached_env.provider_env_revision + ); + assert!(!detached_env.environment.contains_key("GITHUB_TOKEN")); + } + + #[tokio::test] + #[allow(deprecated)] + async fn custom_imported_profile_policy_and_env_follow_attach_detach_lifecycle() { + use crate::grpc::provider::handle_import_provider_profiles; + use crate::grpc::sandbox::{ + handle_attach_sandbox_provider, handle_detach_sandbox_provider, + }; + use openshell_core::proto::{ + AttachSandboxProviderRequest, DetachSandboxProviderRequest, + GetSandboxProviderEnvironmentRequest, ImportProviderProfilesRequest, NetworkBinary, + ProviderProfile, ProviderProfileCategory, ProviderProfileCredential, + ProviderProfileImportItem, + }; + + let state = test_server_state().await; + enable_providers_v2(&state).await; + handle_import_provider_profiles( + &state, + Request::new(ImportProviderProfilesRequest { + profiles: vec![ProviderProfileImportItem { + source: "custom-api.yaml".to_string(), + profile: Some(ProviderProfile { + id: "custom-api".to_string(), + display_name: "Custom API".to_string(), + description: String::new(), + category: ProviderProfileCategory::Other as i32, + credentials: vec![ProviderProfileCredential { + name: "api_key".to_string(), + env_vars: vec!["CUSTOM_API_KEY".to_string()], + auth_style: "bearer".to_string(), + header_name: "authorization".to_string(), + required: true, + ..Default::default() + }], + endpoints: vec![NetworkEndpoint { + host: "api.custom.example".to_string(), + port: 443, + protocol: "rest".to_string(), + rules: vec![L7Rule { + allow: Some(openshell_core::proto::L7Allow { + method: "GET".to_string(), + path: "/v1/**".to_string(), + ..Default::default() + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/custom".to_string(), + harness: true, + }], + inference_capable: false, + discovery: None, + }), + }], + }), + ) + .await + .unwrap(); + + let mut provider = test_provider("work-custom", "custom-api"); + provider.credentials = + std::iter::once(("CUSTOM_API_KEY".to_string(), "custom-secret".to_string())).collect(); + state.store.put_message(&provider).await.unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-custom-attach-lifecycle", + "custom-attach-lifecycle", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + Vec::new(), + )) + .await + .unwrap(); + + let baseline_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; + assert!( + !baseline_policy + .network_policies + .contains_key("_provider_work_custom") + ); + let baseline_env = handle_get_sandbox_provider_environment( + &state, + with_user(Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-custom-attach-lifecycle".to_string(), + })), + ) + .await + .unwrap() + .into_inner(); + + handle_attach_sandbox_provider( + &state, + with_user(Request::new(AttachSandboxProviderRequest { + sandbox_name: "custom-attach-lifecycle".to_string(), + provider_name: "work-custom".to_string(), + expected_resource_version: 0, + })), + ) + .await + .unwrap(); + + let attached_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; + let custom_rule = attached_policy + .network_policies + .get("_provider_work_custom") + .expect("custom provider rule should be composed after attach"); + assert_eq!(custom_rule.endpoints[0].host, "api.custom.example"); + assert_eq!(custom_rule.endpoints[0].protocol, "rest"); + assert_eq!(custom_rule.endpoints[0].rules.len(), 1); + assert_eq!(custom_rule.binaries[0].path, "/usr/bin/custom"); + + let attached_env = handle_get_sandbox_provider_environment( + &state, + with_user(Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-custom-attach-lifecycle".to_string(), + })), + ) + .await + .unwrap() + .into_inner(); + assert_ne!( + baseline_env.provider_env_revision, + attached_env.provider_env_revision + ); + assert_eq!( + attached_env.environment.get("CUSTOM_API_KEY"), + Some(&"custom-secret".to_string()) + ); + + handle_detach_sandbox_provider( + &state, + Request::new(DetachSandboxProviderRequest { + sandbox_name: "custom-attach-lifecycle".to_string(), + provider_name: "work-custom".to_string(), + expected_resource_version: 0, + }), + ) + .await + .unwrap(); + + let detached_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; + assert!( + !detached_policy + .network_policies + .contains_key("_provider_work_custom") + ); + let detached_env = handle_get_sandbox_provider_environment( + &state, + with_user(Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-custom-attach-lifecycle".to_string(), + })), + ) + .await + .unwrap() + .into_inner(); + assert_ne!( + attached_env.provider_env_revision, + detached_env.provider_env_revision + ); + assert!(!detached_env.environment.contains_key("CUSTOM_API_KEY")); + } + + #[tokio::test] + async fn global_policy_suppresses_provider_profile_layers_when_v2_enabled() { + use openshell_core::proto::{ + GetSandboxConfigRequest, NetworkEndpoint, NetworkPolicyRule, SandboxPhase, + SandboxPolicy, SandboxSpec, + }; + + let state = test_server_state().await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + + let sandbox_policy = SandboxPolicy { + network_policies: std::iter::once(( + "sandbox_only".to_string(), + NetworkPolicyRule { + name: "sandbox_only".to_string(), + endpoints: vec![NetworkEndpoint { + host: "sandbox.example.com".to_string(), + port: 443, + ..Default::default() + }], + ..Default::default() + }, + )) + .collect(), + ..Default::default() + }; + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-global-profile".to_string(), + name: "global-profile-sandbox".to_string(), + created_at_ms: 1_000_000, + labels: HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: Some(sandbox_policy), + providers: vec!["work-github".to_string()], + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); + + let global_policy = SandboxPolicy { + network_policies: std::iter::once(( + "global_only".to_string(), + NetworkPolicyRule { + name: "global_only".to_string(), + endpoints: vec![NetworkEndpoint { + host: "global.example.com".to_string(), + port: 443, + ..Default::default() + }], + ..Default::default() + }, + )) + .collect(), + ..Default::default() + }; + let global_settings = StoredSettings { + revision: 1, + settings: [ + ( + settings::PROVIDERS_V2_ENABLED_KEY.to_string(), + StoredSettingValue::Bool(true), + ), + ( + POLICY_SETTING_KEY.to_string(), + StoredSettingValue::Bytes(hex::encode(global_policy.encode_to_vec())), + ), + ] + .into_iter() + .collect(), + ..Default::default() + }; + save_global_settings(state.store.as_ref(), &global_settings) + .await + .unwrap(); + + let response = handle_get_sandbox_config( + &state, + with_user(Request::new(GetSandboxConfigRequest { + sandbox_id: "sb-global-profile".to_string(), + })), + ) + .await + .unwrap() + .into_inner(); + + let effective_policy = response.policy.expect("global policy should be returned"); + assert_eq!(response.policy_source, PolicySource::Global as i32); + assert!( + effective_policy + .network_policies + .contains_key("global_only") + ); + assert!( + !effective_policy + .network_policies + .contains_key("sandbox_only") + ); + assert!( + !effective_policy + .network_policies + .contains_key("_provider_work_github") + ); + } + + #[tokio::test] + async fn sandbox_policy_backfill_on_update_when_no_baseline() { + use openshell_core::proto::{FilesystemPolicy, LandlockPolicy, SandboxPhase, SandboxSpec}; + + let store = test_store().await; + + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-backfill".to_string(), + name: "backfill-sandbox".to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: None, + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Provisioning as i32); + store.put_message(&sandbox).await.unwrap(); + + let new_policy = ProtoSandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + include_workdir: true, + read_only: vec!["/usr".into()], + read_write: vec!["/tmp".into()], + }), + landlock: Some(LandlockPolicy { + compatibility: "best_effort".into(), + }), + process: Some(openshell_core::proto::ProcessPolicy { + run_as_user: "sandbox".into(), + run_as_group: "sandbox".into(), + }), + ..Default::default() + }; + + let mut sandbox = store + .get_message::("sb-backfill") + .await + .unwrap() + .unwrap(); + if let Some(ref mut spec) = sandbox.spec { + spec.policy = Some(new_policy.clone()); + } + store.put_message(&sandbox).await.unwrap(); + + let loaded = store + .get_message::("sb-backfill") + .await + .unwrap() + .unwrap(); + let policy = loaded.spec.unwrap().policy.unwrap(); + assert_eq!(policy.version, 1); + assert!(policy.filesystem.is_some()); + assert_eq!(policy.process.unwrap().run_as_user, "sandbox"); + } + + /// Test helper: pin the proposal approval mode for a sandbox via the + /// settings model, mirroring what `openshell settings set + /// proposal_approval_mode ` would do at runtime. + async fn seed_sandbox_approval_mode(state: &Arc, sandbox_name: &str, mode: &str) { + let mut settings = load_sandbox_settings(state.store.as_ref(), sandbox_name) + .await + .unwrap(); + settings.settings.insert( + settings::PROPOSAL_APPROVAL_MODE_KEY.to_string(), + StoredSettingValue::String(mode.to_string()), + ); + settings.revision = settings.revision.wrapping_add(1); + save_sandbox_settings(state.store.as_ref(), sandbox_name, &settings) + .await + .unwrap(); + } + + /// Test helper: pin the gateway-wide proposal approval mode, mirroring + /// `openshell settings set --global proposal_approval_mode `. + async fn seed_global_approval_mode(state: &Arc, mode: &str) { + let mut settings = load_global_settings(state.store.as_ref()).await.unwrap(); + settings.settings.insert( + settings::PROPOSAL_APPROVAL_MODE_KEY.to_string(), + StoredSettingValue::String(mode.to_string()), + ); + settings.revision = settings.revision.wrapping_add(1); + save_global_settings(state.store.as_ref(), &settings) + .await + .unwrap(); + } + + #[tokio::test] + async fn draft_chunk_handler_lifecycle_round_trip() { + use openshell_core::proto::{ + GetDraftPolicyRequest, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxSpec, + }; + + let state = test_server_state().await; + // Attach a github provider so the proposal below has a credential in + // scope for api.github.com. This causes the prover to emit a HIGH + // finding (L4 + credential in scope), keeping the chunk pending so + // the manual approve/reject lifecycle this test exercises is + // reachable. Without a provider, the proposal would auto-approve and + // the lifecycle assertions would no longer apply. + state + .store + .put_message(&test_provider("github-pat", "github")) + .await + .unwrap(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-draft-flow".to_string(), + name: "draft-flow".to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: None, + providers: vec!["github-pat".to_string()], + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); + let sandbox_name = sandbox.object_name().to_string(); + + let proposed_rule = NetworkPolicyRule { + name: "allow_github".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + let submit = handle_submit_policy_analysis( + &state, + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + proposed_chunks: vec![PolicyChunk { + rule_name: "allow_github".to_string(), + proposed_rule: Some(proposed_rule.clone()), + rationale: "observed denied request".to_string(), + confidence: 0.85, + hit_count: 3, + first_seen_ms: 100, + last_seen_ms: 200, + binary: "/usr/bin/curl".to_string(), + ..Default::default() + }], + ..Default::default() + })), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(submit.accepted_chunks, 1); + assert_eq!(submit.rejected_chunks, 0); + assert_eq!(submit.accepted_chunk_ids.len(), 1); + assert!(!submit.accepted_chunk_ids[0].is_empty()); + + let draft_policy = handle_get_draft_policy( + &state, + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name.clone(), + status_filter: String::new(), + })), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(draft_policy.draft_version, 1); + assert_eq!(draft_policy.chunks.len(), 1); + // The proposal is L4 to a host with a credential in scope, so the + // prover emits a HIGH finding and the chunk stays pending for the + // manual approve path this test exercises. + assert_eq!(draft_policy.chunks[0].status, "pending"); + let chunk_id = draft_policy.chunks[0].id.clone(); + + let approve = handle_approve_draft_chunk( + &state, + Request::new(ApproveDraftChunkRequest { + name: sandbox_name.clone(), + chunk_id: chunk_id.clone(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(approve.policy_version, 1); + assert!(!approve.policy_hash.is_empty()); + + let history_after_approve = handle_get_draft_history( + &state, + Request::new(GetDraftHistoryRequest { + name: sandbox_name.clone(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(history_after_approve.entries.len(), 2); + assert_eq!(history_after_approve.entries[0].event_type, "proposed"); + assert_eq!(history_after_approve.entries[1].event_type, "approved"); + assert_eq!(history_after_approve.entries[1].chunk_id, chunk_id); + + let policies_after_approve = handle_list_sandbox_policies( + &state, + Request::new(ListSandboxPoliciesRequest { + name: sandbox_name.clone(), + limit: 10, + offset: 0, + global: false, + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(policies_after_approve.revisions.len(), 1); + assert_eq!(policies_after_approve.revisions[0].version, 1); + + let undo = handle_undo_draft_chunk( + &state, + Request::new(UndoDraftChunkRequest { + name: sandbox_name.clone(), + chunk_id: chunk_id.clone(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(undo.policy_version, 2); + assert!(!undo.policy_hash.is_empty()); + + let draft_policy_after_undo = handle_get_draft_policy( + &state, + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name.clone(), + status_filter: String::new(), + })), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(draft_policy_after_undo.chunks.len(), 1); + assert_eq!(draft_policy_after_undo.chunks[0].status, "pending"); + + let history_after_undo = handle_get_draft_history( + &state, + Request::new(GetDraftHistoryRequest { + name: sandbox_name.clone(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(history_after_undo.entries.len(), 1); + assert_eq!(history_after_undo.entries[0].event_type, "proposed"); + + let policies_after_undo = handle_list_sandbox_policies( + &state, + Request::new(ListSandboxPoliciesRequest { + name: sandbox_name.clone(), + limit: 10, + offset: 0, + global: false, + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(policies_after_undo.revisions.len(), 2); + assert_eq!(policies_after_undo.revisions[0].version, 2); + assert_eq!(policies_after_undo.revisions[1].version, 1); + + let cleared = handle_clear_draft_chunks( + &state, + Request::new(ClearDraftChunksRequest { + name: sandbox_name.clone(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(cleared.chunks_cleared, 1); + + let draft_policy_after_clear = handle_get_draft_policy( + &state, + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name.clone(), + status_filter: String::new(), + })), + ) + .await + .unwrap() + .into_inner(); + assert!(draft_policy_after_clear.chunks.is_empty()); + + let history_after_clear = handle_get_draft_history( + &state, + Request::new(GetDraftHistoryRequest { name: sandbox_name }), + ) + .await + .unwrap() + .into_inner(); + assert!(history_after_clear.entries.is_empty()); + } + + /// A reviewer's free-form rejection reason must round-trip through + /// persistence and surface on the chunk via `GetDraftPolicy`, so the + /// in-sandbox agent can read the guidance and redraft. The MVP-v2 agent + /// feedback loop hangs off this guarantee. + #[tokio::test] + async fn reject_with_reason_persists_into_chunk_for_agent_readback() { + use openshell_core::proto::{NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxSpec}; + + let state = test_server_state().await; + let sandbox_name = "agent-feedback-loop".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-feedback".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: None, + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); + + let proposed_rule = NetworkPolicyRule { + name: "allow_example".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; - let effective_policy = get_sandbox_policy(&state, "sb-jit").await; - assert!( - effective_policy - .network_policies - .contains_key("_provider_work_github") - ); + let submit = handle_submit_policy_analysis( + &state, + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + proposed_chunks: vec![PolicyChunk { + rule_name: "allow_example".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "agent intent".to_string(), + ..Default::default() + }], + ..Default::default() + })), + ) + .await + .unwrap() + .into_inner(); + let chunk_id = submit.accepted_chunk_ids[0].clone(); - let persisted = state - .store - .get_latest_policy("sb-jit") - .await - .unwrap() - .expect("sandbox policy should be lazily backfilled"); - let persisted_policy = ProtoSandboxPolicy::decode(persisted.policy_payload.as_slice()) - .expect("persisted sandbox policy should decode"); - assert!( - persisted_policy - .network_policies - .contains_key("sandbox_only") - ); - assert!( - !persisted_policy - .network_policies - .contains_key("_provider_work_github") + let guidance = "scope to docs/ paths only, not all repo contents"; + handle_reject_draft_chunk( + &state, + Request::new(RejectDraftChunkRequest { + name: sandbox_name.clone(), + chunk_id: chunk_id.clone(), + reason: guidance.to_string(), + }), + ) + .await + .unwrap(); + + let draft = handle_get_draft_policy( + &state, + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), + })), + ) + .await + .unwrap() + .into_inner(); + let rejected = draft + .chunks + .iter() + .find(|c| c.id == chunk_id) + .expect("rejected chunk should still be visible"); + assert_eq!(rejected.status, "rejected"); + assert_eq!( + rejected.rejection_reason, guidance, + "reviewer's free-form reason must round-trip into the chunk for agent readback" ); + // The prover now runs on every proposal regardless of analysis_mode. + // For this rule (L4 to api.example.com, no provider attached, no + // credential in scope), v1 calibration emits no finding — so the + // verdict is the clean "no new findings" string, not empty. + assert_eq!(rejected.validation_result, "prover: no new findings"); } #[tokio::test] - async fn sandbox_config_preserves_overlapping_user_and_provider_rules() { + async fn agent_authored_exact_l7_proposal_gets_prover_pass_verdict() { + use openshell_core::proto::{ + FilesystemPolicy, L7Allow, L7Rule, NetworkBinary, NetworkEndpoint, SandboxPhase, + SandboxPolicy, SandboxSpec, + }; + let state = test_server_state().await; - enable_providers_v2(&state).await; - state - .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-overlap", - "overlap", - test_policy_with_rule("_provider_work_github", "api.github.com"), - vec!["work-github".to_string()], - )) - .await - .unwrap(); + let sandbox_name = "agent-l7-verdict".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-agent-l7-verdict".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); + // Opt this sandbox into auto-approval via the settings model — same + // path the CLI's `--approval-mode auto` exercises — to test the + // empty-delta → approved path. + seed_sandbox_approval_mode(&state, &sandbox_name, "auto").await; - let effective_policy = get_sandbox_policy(&state, "sb-overlap").await; + let proposed_rule = NetworkPolicyRule { + name: "github_contents_write".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "PUT".to_string(), + path: "/repos/org/repo/contents/demo/file.md".to_string(), + ..Default::default() + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; - assert!( - effective_policy - .network_policies - .contains_key("_provider_work_github") - ); - assert!( - effective_policy - .network_policies - .contains_key("_provider_work_github_2") + handle_submit_policy_analysis( + &state, + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "github_contents_write".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "write one demo file".to_string(), + ..Default::default() + }], + ..Default::default() + })), + ) + .await + .unwrap(); + + let draft = handle_get_draft_policy( + &state, + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), + })), + ) + .await + .unwrap() + .into_inner(); + let verdict = &draft.chunks[0].validation_result; + assert_eq!( + verdict, "prover: no new findings", + "exact L7 PUT against an inspected endpoint should not introduce \ + any new findings over baseline; got: {verdict}" ); + // Auto-approval gate: empty delta + sandbox opted into auto mode → + // status flips to approved without human action. The canonical + // happy path for agent speed. assert_eq!( - effective_policy - .network_policies - .get("_provider_work_github") - .unwrap() - .endpoints[0] - .host, - "api.github.com" + draft.chunks[0].status, "approved", + "empty-delta agent-authored proposal under auto mode must auto-approve; \ + got status: {}", + draft.chunks[0].status ); } + /// Implicit supersede: when a refined agent-authored proposal lands for + /// the same `(host, port, binary)` as a pending mechanistic chunk, the + /// older mechanistic chunk is auto-rejected with a "superseded by + /// chunk X" reason. This is the refinement loop without a + /// `supersedes_chunk_id` field — structural overlap is enough. #[tokio::test] - async fn provider_environment_resolution_is_unchanged_by_providers_v2_setting() { - use openshell_core::proto::GetSandboxProviderEnvironmentRequest; + async fn agent_authored_submission_supersedes_pending_mechanistic_for_same_endpoint() { + use openshell_core::proto::{ + FilesystemPolicy, L7Allow, L7Rule, NetworkBinary, NetworkEndpoint, SandboxPhase, + SandboxPolicy, SandboxSpec, + }; let state = test_server_state().await; + // github provider attached so the mechanistic L4 lands a HIGH + // finding and stays pending. state .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-provider-env", - "provider-env", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - vec!["work-github".to_string()], - )) + .put_message(&test_provider("github-pat", "github")) .await .unwrap(); + let sandbox_name = "supersede-flow".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-supersede-flow".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + providers: vec!["github-pat".to_string()], + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); - let legacy_env = handle_get_sandbox_provider_environment( + // Step 1: mechanistic submits a broad L4 grant; the prover flags it + // HIGH, so it lands in pending. + let mechanistic_rule = NetworkPolicyRule { + name: "allow_api_github_com_443".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + let mechanistic_submit = handle_submit_policy_analysis( &state, - with_user(Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-provider-env".to_string(), + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "mechanistic".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "allow_api_github_com_443".to_string(), + proposed_rule: Some(mechanistic_rule), + rationale: "Allow /usr/bin/curl to connect to api.github.com:443.".to_string(), + ..Default::default() + }], + ..Default::default() })), ) .await .unwrap() - .into_inner() - .environment; + .into_inner(); + let mechanistic_chunk_id = mechanistic_submit.accepted_chunk_ids[0].clone(); - enable_providers_v2(&state).await; - let v2_env = handle_get_sandbox_provider_environment( + // Sanity-check: the mechanistic chunk is pending and carries a HIGH + // finding. + let draft = handle_get_draft_policy( &state, - with_user(Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-provider-env".to_string(), + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name.clone(), + status_filter: String::new(), })), ) .await .unwrap() - .into_inner() - .environment; - - assert_eq!(legacy_env, v2_env); - assert_eq!(v2_env.get("GITHUB_TOKEN"), Some(&"ghp-test".to_string())); - } - - #[tokio::test] - async fn provider_env_revision_changes_when_attached_provider_record_changes() { - use openshell_core::proto::GetSandboxProviderEnvironmentRequest; - use std::time::Duration; - - let state = test_server_state().await; - let mut provider = test_provider("work-github", "github"); - state.store.put_message(&provider).await.unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-provider-revision", - "provider-revision", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - vec!["work-github".to_string()], - )) - .await - .unwrap(); + .into_inner(); + let mech = draft + .chunks + .iter() + .find(|c| c.id == mechanistic_chunk_id) + .expect("mechanistic chunk present"); + assert_eq!(mech.status, "pending"); + // Mechanistic L4 with credential in scope flags as new credentialed + // reach for the binary on the host. + assert!( + mech.validation_result + .contains("credential_reach_expansion"), + "mechanistic L4 with credential in scope should emit \ + credential_reach_expansion; got: {}", + mech.validation_result + ); - let first = handle_get_sandbox_provider_environment( + // Step 2: the agent refines into a narrow L7 proposal for the SAME + // (host, port, binary). Under the v1 calibration, an L7 PUT on a + // host where the binary already had credentialed reach (read-only) + // emits a capability_expansion finding (new method on already- + // reached host) rather than a fresh reach expansion. The agent + // chunk stays pending for human review. The mechanistic chunk gets + // auto-rejected as superseded regardless of the agent chunk's own + // validation verdict — supersede is unconditional on `(host, port, + // binary)` overlap. + let agent_rule = NetworkPolicyRule { + name: "github_contents_put".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "PUT".to_string(), + path: "/repos/owner/name/contents/path/file.md".to_string(), + ..Default::default() + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + let agent_submit = handle_submit_policy_analysis( &state, - with_user(Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-provider-revision".to_string(), + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "github_contents_put".to_string(), + proposed_rule: Some(agent_rule), + rationale: "refined L7 scope for the demo write".to_string(), + ..Default::default() + }], + ..Default::default() })), ) .await .unwrap() .into_inner(); + let agent_chunk_id = agent_submit.accepted_chunk_ids[0].clone(); - tokio::time::sleep(Duration::from_millis(2)).await; - provider - .credentials - .insert("GITHUB_TOKEN".to_string(), "rotated".to_string()); - state.store.put_message(&provider).await.unwrap(); - - let second = handle_get_sandbox_provider_environment( + let draft_after = handle_get_draft_policy( &state, - with_user(Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-provider-revision".to_string(), + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), })), ) .await .unwrap() .into_inner(); - assert_ne!( - first.provider_env_revision, second.provider_env_revision, - "provider object updates must trigger sandbox credential refresh" + let agent = draft_after + .chunks + .iter() + .find(|c| c.id == agent_chunk_id) + .expect("agent chunk present"); + let mech_after = draft_after + .chunks + .iter() + .find(|c| c.id == mechanistic_chunk_id) + .expect("mechanistic chunk should still be visible (with new status)"); + + assert_eq!( + agent.status, "pending", + "agent-authored L7 PUT with credential in scope must land in pending; \ + the baseline policy has no pre-existing rule for curl on api.github.com \ + so the agent's chunk grants brand-new credentialed reach. got: {}", + agent.status + ); + assert!( + agent + .validation_result + .contains("credential_reach_expansion"), + "agent chunk should carry credential_reach_expansion (new credentialed reach \ + on api.github.com); got: {}", + agent.validation_result ); assert_eq!( - second.environment.get("GITHUB_TOKEN"), - Some(&"rotated".to_string()) + mech_after.status, "rejected", + "older mechanistic chunk for same (host, port, binary) should be superseded; \ + got: {}", + mech_after.status + ); + assert!( + mech_after.rejection_reason.contains(&agent_chunk_id), + "rejection reason should cite the superseding chunk id; got: {}", + mech_after.rejection_reason + ); + assert!( + mech_after.rejection_reason.contains("superseded"), + "rejection reason should explain the supersede; got: {}", + mech_after.rejection_reason ); } + /// Auto-approval is **proposer-agnostic**: a mechanistic proposal whose + /// prover delta is empty auto-approves the same way an agent-authored one + /// does. Source provenance is preserved in the audit trail (OCSF event + /// `source=mechanistic`) but does not change the safety decision. #[tokio::test] - async fn sandbox_config_and_provider_env_follow_attached_provider_lifecycle() { - use crate::grpc::sandbox::{ - handle_attach_sandbox_provider, handle_detach_sandbox_provider, - }; + async fn mechanistic_proposal_with_empty_delta_also_auto_approves() { use openshell_core::proto::{ - AttachSandboxProviderRequest, DetachSandboxProviderRequest, - GetSandboxProviderEnvironmentRequest, + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, }; let state = test_server_state().await; - enable_providers_v2(&state).await; - state - .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-attach-lifecycle", - "attach-lifecycle", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - Vec::new(), - )) - .await - .unwrap(); + let sandbox_name = "mechanistic-clean".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-mechanistic-clean".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + // No providers → no credential in scope for the proposed host. + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); + // Opt into auto mode via the settings model to test the + // proposer-agnostic gate. + seed_sandbox_approval_mode(&state, &sandbox_name, "auto").await; - let baseline_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; - assert!( - !baseline_policy - .network_policies - .contains_key("_provider_work_github") - ); - let baseline_env = handle_get_sandbox_provider_environment( - &state, - with_user(Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-attach-lifecycle".to_string(), - })), - ) - .await - .unwrap() - .into_inner(); + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; - handle_attach_sandbox_provider( + handle_submit_policy_analysis( &state, - with_user(Request::new(AttachSandboxProviderRequest { - sandbox_name: "attach-lifecycle".to_string(), - provider_name: "work-github".to_string(), - expected_resource_version: 0, + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "mechanistic".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "Allow /usr/bin/curl to connect to example.com:443.".to_string(), + ..Default::default() + }], + ..Default::default() })), ) .await .unwrap(); - let attached_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; - assert!( - attached_policy - .network_policies - .contains_key("_provider_work_github") - ); - - let attached_env = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - with_user(Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-attach-lifecycle".to_string(), + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), })), ) .await .unwrap() .into_inner(); - assert_ne!( - baseline_env.provider_env_revision, - attached_env.provider_env_revision - ); + let verdict = &draft.chunks[0].validation_result; + assert_eq!(verdict, "prover: no new findings"); assert_eq!( - attached_env.environment.get("GITHUB_TOKEN"), - Some(&"ghp-test".to_string()) + draft.chunks[0].status, "approved", + "empty-delta mechanistic proposal under auto mode must auto-approve \ + (proposer-agnostic); got status: {}", + draft.chunks[0].status ); + } - handle_detach_sandbox_provider( - &state, - Request::new(DetachSandboxProviderRequest { - sandbox_name: "attach-lifecycle".to_string(), - provider_name: "work-github".to_string(), - expected_resource_version: 0, + /// `protocol: rest, access: full` on a host where the binary had no + /// prior credentialed reach: the prover emits + /// `credential_reach_expansion`. (The per-method `capability_expansion` + /// paths are suppressed by the gateway delta because the reach is + /// new; one finding describes the change, not eight.) + #[tokio::test] + async fn agent_authored_l7_full_with_credential_emits_reach_expansion() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + + let state = test_server_state().await; + state + .store + .put_message(&test_provider("github-pat", "github")) + .await + .unwrap(); + let sandbox_name = "l7-full-with-cred".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-l7-full-with-cred".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + providers: vec!["github-pat".to_string()], + ..Default::default() }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); + seed_sandbox_approval_mode(&state, &sandbox_name, "auto").await; + + // L7-annotated (protocol: rest, enforce) but access: full — no + // method/path bound. Credential in scope. + let proposed_rule = NetworkPolicyRule { + name: "github_l7_full".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + access: "full".to_string(), + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( + &state, + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "github_l7_full".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "broad L7 dressing".to_string(), + ..Default::default() + }], + ..Default::default() + })), ) .await .unwrap(); - let detached_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; - assert!( - !detached_policy - .network_policies - .contains_key("_provider_work_github") - ); - - let detached_env = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - with_user(Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-attach-lifecycle".to_string(), + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), })), ) .await .unwrap() .into_inner(); - assert_ne!( - attached_env.provider_env_revision, - detached_env.provider_env_revision + let verdict = &draft.chunks[0].validation_result; + assert!( + verdict.contains("credential_reach_expansion"), + "L7 `access: full` on a host the binary did not previously reach must emit \ + credential_reach_expansion; got: {verdict}" + ); + // Capability_expansion paths for the same (binary, host:port) are + // suppressed when the reach itself is new — one finding, not many. + assert!( + !verdict.contains("capability_expansion"), + "capability_expansion must be suppressed when reach itself is new; got: {verdict}" + ); + assert_eq!( + draft.chunks[0].status, "pending", + "any prover finding must keep the chunk in pending despite auto mode; got: {}", + draft.chunks[0].status ); - assert!(!detached_env.environment.contains_key("GITHUB_TOKEN")); } + /// Acceptance criterion #7: default approval mode is manual. A sandbox + /// with no `proposal_approval_mode` setting at either scope must NOT + /// auto-approve empty-delta proposals; the chunk lands in `pending` for + /// human review. This is the default-deny safeguard: auto-approval is + /// an explicit opt-in, not a global behavior change shipped under a + /// feature. #[tokio::test] - #[allow(deprecated)] - async fn custom_imported_profile_policy_and_env_follow_attach_detach_lifecycle() { - use crate::grpc::provider::handle_import_provider_profiles; - use crate::grpc::sandbox::{ - handle_attach_sandbox_provider, handle_detach_sandbox_provider, - }; + async fn empty_delta_does_not_auto_approve_when_mode_unset() { use openshell_core::proto::{ - AttachSandboxProviderRequest, DetachSandboxProviderRequest, - GetSandboxProviderEnvironmentRequest, ImportProviderProfilesRequest, NetworkBinary, - ProviderProfile, ProviderProfileCategory, ProviderProfileCredential, - ProviderProfileImportItem, + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, }; let state = test_server_state().await; - enable_providers_v2(&state).await; - handle_import_provider_profiles( - &state, - Request::new(ImportProviderProfilesRequest { - profiles: vec![ProviderProfileImportItem { - source: "custom-api.yaml".to_string(), - profile: Some(ProviderProfile { - id: "custom-api".to_string(), - display_name: "Custom API".to_string(), - description: String::new(), - category: ProviderProfileCategory::Other as i32, - credentials: vec![ProviderProfileCredential { - name: "api_key".to_string(), - env_vars: vec!["CUSTOM_API_KEY".to_string()], - auth_style: "bearer".to_string(), - header_name: "authorization".to_string(), - required: true, - ..Default::default() - }], - endpoints: vec![NetworkEndpoint { - host: "api.custom.example".to_string(), - port: 443, - protocol: "rest".to_string(), - rules: vec![L7Rule { - allow: Some(openshell_core::proto::L7Allow { - method: "GET".to_string(), - path: "/v1/**".to_string(), - ..Default::default() - }), - }], - ..Default::default() - }], - binaries: vec![NetworkBinary { - path: "/usr/bin/custom".to_string(), - harness: true, - }], - inference_capable: false, - discovery: None, + let sandbox_name = "default-manual-mode".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-default-manual-mode".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() }), - }], + ..Default::default() + }), + // No approval-mode setting seeded at sandbox or gateway + // scope — the resolver must treat absence as "manual". + ..Default::default() }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); + + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( + &state, + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "un-credentialed L4 — prover sees no finding".to_string(), + ..Default::default() + }], + ..Default::default() + })), ) .await .unwrap(); - let mut provider = test_provider("work-custom", "custom-api"); - provider.credentials = - std::iter::once(("CUSTOM_API_KEY".to_string(), "custom-secret".to_string())).collect(); - state.store.put_message(&provider).await.unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-custom-attach-lifecycle", - "custom-attach-lifecycle", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - Vec::new(), - )) - .await - .unwrap(); - - let baseline_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; - assert!( - !baseline_policy - .network_policies - .contains_key("_provider_work_custom") - ); - let baseline_env = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - with_user(Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-custom-attach-lifecycle".to_string(), + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), })), ) .await .unwrap() .into_inner(); + let verdict = &draft.chunks[0].validation_result; + assert_eq!( + verdict, "prover: no new findings", + "prover should still emit no findings; gate is downstream", + ); + assert_eq!( + draft.chunks[0].status, "pending", + "default (unset) proposal_approval_mode must not auto-approve; \ + chunk should wait for human review. got status: {}", + draft.chunks[0].status + ); + } - handle_attach_sandbox_provider( + /// Unknown `proposal_approval_mode` strings (typos, future-mode values + /// the gateway doesn't yet know about) fall back to manual. This locks + /// in forward-compat: a future CLI that learns about `"auto_on_low_risk"` + /// can never accidentally bypass an older gateway's review gate just by + /// virtue of an unrecognized value defaulting to "auto." + #[tokio::test] + async fn empty_delta_does_not_auto_approve_when_mode_unknown_string() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + + let state = test_server_state().await; + let sandbox_name = "unknown-mode".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-unknown-mode".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); + // A future-CLI value the current gateway doesn't recognize. + seed_sandbox_approval_mode(&state, &sandbox_name, "auto_on_low_risk").await; + + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( &state, - with_user(Request::new(AttachSandboxProviderRequest { - sandbox_name: "custom-attach-lifecycle".to_string(), - provider_name: "work-custom".to_string(), - expected_resource_version: 0, + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "un-credentialed L4".to_string(), + ..Default::default() + }], + ..Default::default() })), ) .await .unwrap(); - let attached_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; - let custom_rule = attached_policy - .network_policies - .get("_provider_work_custom") - .expect("custom provider rule should be composed after attach"); - assert_eq!(custom_rule.endpoints[0].host, "api.custom.example"); - assert_eq!(custom_rule.endpoints[0].protocol, "rest"); - assert_eq!(custom_rule.endpoints[0].rules.len(), 1); - assert_eq!(custom_rule.binaries[0].path, "/usr/bin/custom"); - - let attached_env = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - with_user(Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-custom-attach-lifecycle".to_string(), + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), })), ) .await .unwrap() .into_inner(); - assert_ne!( - baseline_env.provider_env_revision, - attached_env.provider_env_revision - ); assert_eq!( - attached_env.environment.get("CUSTOM_API_KEY"), - Some(&"custom-secret".to_string()) + draft.chunks[0].status, "pending", + "unknown approval-mode strings must fall back to manual; \ + only the literal \"auto\" opts in. got: {}", + draft.chunks[0].status ); + } - handle_detach_sandbox_provider( - &state, - Request::new(DetachSandboxProviderRequest { - sandbox_name: "custom-attach-lifecycle".to_string(), - provider_name: "work-custom".to_string(), - expected_resource_version: 0, + /// Explicit `"manual"` is equivalent to the unset default — chunk lands + /// in pending even with empty delta. + #[tokio::test] + async fn empty_delta_does_not_auto_approve_when_mode_explicit_manual() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + + let state = test_server_state().await; + let sandbox_name = "explicit-manual-mode".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-explicit-manual-mode".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); + seed_sandbox_approval_mode(&state, &sandbox_name, "manual").await; + + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( + &state, + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "un-credentialed L4 — prover sees no finding".to_string(), + ..Default::default() + }], + ..Default::default() + })), ) .await .unwrap(); - let detached_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; - assert!( - !detached_policy - .network_policies - .contains_key("_provider_work_custom") - ); - let detached_env = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - with_user(Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-custom-attach-lifecycle".to_string(), + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), })), ) .await .unwrap() .into_inner(); - assert_ne!( - attached_env.provider_env_revision, - detached_env.provider_env_revision + assert_eq!( + draft.chunks[0].status, "pending", + "explicit manual mode must equal default mode — no auto-approval; \ + got: {}", + draft.chunks[0].status ); - assert!(!detached_env.environment.contains_key("CUSTOM_API_KEY")); } + /// Gateway-scope `proposal_approval_mode = "auto"` enables auto-approval + /// for any sandbox under that gateway, with no per-sandbox setting + /// required. This is the fleet-wide opt-in path — a reviewer flips the + /// gateway setting once and every sandbox without an explicit override + /// gets prover-gated auto-approval. #[tokio::test] - async fn global_policy_suppresses_provider_profile_layers_when_v2_enabled() { + async fn empty_delta_auto_approves_from_gateway_scope_setting() { use openshell_core::proto::{ - GetSandboxConfigRequest, NetworkEndpoint, NetworkPolicyRule, SandboxPhase, - SandboxPolicy, SandboxSpec, + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, }; let state = test_server_state().await; - state - .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); - - let sandbox_policy = SandboxPolicy { - network_policies: std::iter::once(( - "sandbox_only".to_string(), - NetworkPolicyRule { - name: "sandbox_only".to_string(), - endpoints: vec![NetworkEndpoint { - host: "sandbox.example.com".to_string(), - port: 443, - ..Default::default() - }], - ..Default::default() - }, - )) - .collect(), - ..Default::default() - }; + let sandbox_name = "gateway-auto-mode".to_string(); let mut sandbox = Sandbox { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { - id: "sb-global-profile".to_string(), - name: "global-profile-sandbox".to_string(), + id: "sb-gateway-auto-mode".to_string(), + name: sandbox_name.clone(), created_at_ms: 1_000_000, - labels: HashMap::new(), + labels: std::collections::HashMap::new(), resource_version: 0, }), spec: Some(SandboxSpec { - policy: Some(sandbox_policy), - providers: vec!["work-github".to_string()], + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), ..Default::default() }), ..Default::default() }; sandbox.set_phase(SandboxPhase::Ready as i32); state.store.put_message(&sandbox).await.unwrap(); + // Fleet-wide opt-in — no sandbox-scope setting. + seed_global_approval_mode(&state, "auto").await; - let global_policy = SandboxPolicy { - network_policies: std::iter::once(( - "global_only".to_string(), - NetworkPolicyRule { - name: "global_only".to_string(), - endpoints: vec![NetworkEndpoint { - host: "global.example.com".to_string(), - port: 443, + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( + &state, + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "un-credentialed L4 — empty delta".to_string(), + ..Default::default() + }], + ..Default::default() + })), + ) + .await + .unwrap(); + + let draft = handle_get_draft_policy( + &state, + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), + })), + ) + .await + .unwrap() + .into_inner(); + assert_eq!( + draft.chunks[0].status, "approved", + "empty-delta proposal must auto-approve when the gateway-scope \ + setting is \"auto\" and no sandbox-scope override exists. got: {}", + draft.chunks[0].status + ); + } + + /// Gateway scope wins over sandbox scope. A reviewer can pin manual mode + /// fleet-wide; a per-sandbox `"auto"` value is silently ignored. Matches + /// the existing settings precedence convention (global wins, sandbox is + /// the per-sandbox override only when no global is set). + #[tokio::test] + async fn gateway_manual_overrides_sandbox_auto() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + + let state = test_server_state().await; + let sandbox_name = "gateway-pinned-manual".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-gateway-pinned-manual".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], ..Default::default() - }], + }), ..Default::default() - }, - )) - .collect(), + }), + ..Default::default() + }), ..Default::default() }; - let global_settings = StoredSettings { - revision: 1, - settings: [ - ( - settings::PROVIDERS_V2_ENABLED_KEY.to_string(), - StoredSettingValue::Bool(true), - ), - ( - POLICY_SETTING_KEY.to_string(), - StoredSettingValue::Bytes(hex::encode(global_policy.encode_to_vec())), - ), - ] - .into_iter() - .collect(), - ..Default::default() + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); + // Gateway pins manual; the sandbox-scope override is supplied (test + // helper bypasses the UpdateConfig precondition, simulating the + // before-pin state) to prove the resolver still picks the gateway + // value. + seed_global_approval_mode(&state, "manual").await; + seed_sandbox_approval_mode(&state, &sandbox_name, "auto").await; + + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], }; - save_global_settings(state.store.as_ref(), &global_settings) - .await - .unwrap(); - let response = handle_get_sandbox_config( + handle_submit_policy_analysis( &state, - with_user(Request::new(GetSandboxConfigRequest { - sandbox_id: "sb-global-profile".to_string(), + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "un-credentialed L4 — empty delta".to_string(), + ..Default::default() + }], + ..Default::default() + })), + ) + .await + .unwrap(); + + let draft = handle_get_draft_policy( + &state, + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), })), ) .await .unwrap() .into_inner(); - - let effective_policy = response.policy.expect("global policy should be returned"); - assert_eq!(response.policy_source, PolicySource::Global as i32); - assert!( - effective_policy - .network_policies - .contains_key("global_only") - ); - assert!( - !effective_policy - .network_policies - .contains_key("sandbox_only") - ); - assert!( - !effective_policy - .network_policies - .contains_key("_provider_work_github") + assert_eq!( + draft.chunks[0].status, "pending", + "gateway-scope \"manual\" must win over sandbox-scope \"auto\"; \ + got: {}", + draft.chunks[0].status ); } + /// Agent submissions targeting a `_provider_*` rule name are rejected at + /// the submit boundary. Provider-synthesized rules are a reserved + /// namespace; an agent that addresses one by name could otherwise + /// circumvent the merge guard that splits agent contributions into their + /// own rule (so the prover sees them honestly). #[tokio::test] - async fn sandbox_policy_backfill_on_update_when_no_baseline() { - use openshell_core::proto::{FilesystemPolicy, LandlockPolicy, SandboxPhase, SandboxSpec}; - - let store = test_store().await; + async fn submit_rejects_reserved_provider_rule_name_prefix() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + let state = test_server_state().await; + let sandbox_name = "reject-provider-prefix".to_string(); let mut sandbox = Sandbox { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { - id: "sb-backfill".to_string(), - name: "backfill-sandbox".to_string(), + id: "sb-reject-provider-prefix".to_string(), + name: sandbox_name.clone(), created_at_ms: 1_000_000, labels: std::collections::HashMap::new(), resource_version: 0, }), spec: Some(SandboxSpec { - policy: None, + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), ..Default::default() }), ..Default::default() }; - sandbox.set_phase(SandboxPhase::Provisioning as i32); - store.put_message(&sandbox).await.unwrap(); + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); - let new_policy = ProtoSandboxPolicy { - version: 1, - filesystem: Some(FilesystemPolicy { - include_workdir: true, - read_only: vec!["/usr".into()], - read_write: vec!["/tmp".into()], - }), - landlock: Some(LandlockPolicy { - compatibility: "best_effort".into(), - }), - process: Some(openshell_core::proto::ProcessPolicy { - run_as_user: "sandbox".into(), - run_as_group: "sandbox".into(), - }), - ..Default::default() + let proposed_rule = NetworkPolicyRule { + name: "github".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], }; - let mut sandbox = store - .get_message::("sb-backfill") - .await - .unwrap() - .unwrap(); - if let Some(ref mut spec) = sandbox.spec { - spec.policy = Some(new_policy.clone()); - } - store.put_message(&sandbox).await.unwrap(); + let response = handle_submit_policy_analysis( + &state, + with_user(Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "_provider_work_github".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "should be rejected — addresses provider rule by name".to_string(), + ..Default::default() + }], + ..Default::default() + })), + ) + .await + .unwrap() + .into_inner(); - let loaded = store - .get_message::("sb-backfill") - .await - .unwrap() - .unwrap(); - let policy = loaded.spec.unwrap().policy.unwrap(); - assert_eq!(policy.version, 1); - assert!(policy.filesystem.is_some()); - assert_eq!(policy.process.unwrap().run_as_user, "sandbox"); + assert_eq!(response.accepted_chunks, 0, "chunk must be rejected"); + assert_eq!(response.rejected_chunks, 1); + assert!( + response + .rejection_reasons + .iter() + .any(|r| r.contains("_provider_")), + "rejection reason must cite the reserved-prefix rule. got: {:?}", + response.rejection_reasons, + ); } + /// v1 calibration row: **L4 with a credential in scope → HIGH finding.** + /// The sandbox has a github provider attached, so a credential is in + /// scope for api.github.com. A broad L4 proposal therefore lands in + /// pending with a HIGH finding. #[tokio::test] - async fn draft_chunk_handler_lifecycle_round_trip() { + async fn agent_authored_l4_proposal_with_credential_records_high_finding() { use openshell_core::proto::{ - GetDraftPolicyRequest, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxSpec, + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, }; let state = test_server_state().await; + // Attach a github provider so a credential is in scope for api.github.com. + state + .store + .put_message(&test_provider("github-pat", "github")) + .await + .unwrap(); + let sandbox_name = "agent-l4-with-cred".to_string(); let mut sandbox = Sandbox { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { - id: "sb-draft-flow".to_string(), - name: "draft-flow".to_string(), + id: "sb-agent-l4-with-cred".to_string(), + name: sandbox_name.clone(), created_at_ms: 1_000_000, labels: std::collections::HashMap::new(), resource_version: 0, }), spec: Some(SandboxSpec { - policy: None, + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + providers: vec!["github-pat".to_string()], ..Default::default() }), ..Default::default() }; sandbox.set_phase(SandboxPhase::Ready as i32); state.store.put_message(&sandbox).await.unwrap(); - let sandbox_name = sandbox.object_name().to_string(); let proposed_rule = NetworkPolicyRule { - name: "allow_example".to_string(), + name: "github_l4".to_string(), endpoints: vec![NetworkEndpoint { - host: "api.example.com".to_string(), + host: "api.github.com".to_string(), port: 443, ..Default::default() }], @@ -4429,208 +6628,444 @@ mod tests { }], }; - let submit = handle_submit_policy_analysis( + handle_submit_policy_analysis( &state, with_user(Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), proposed_chunks: vec![PolicyChunk { - rule_name: "allow_example".to_string(), - proposed_rule: Some(proposed_rule.clone()), - rationale: "observed denied request".to_string(), - confidence: 0.85, - hit_count: 3, - first_seen_ms: 100, - last_seen_ms: 200, - binary: "/usr/bin/curl".to_string(), + rule_name: "github_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "broad fallback".to_string(), ..Default::default() }], ..Default::default() })), ) .await - .unwrap() - .into_inner(); - assert_eq!(submit.accepted_chunks, 1); - assert_eq!(submit.rejected_chunks, 0); - assert_eq!(submit.accepted_chunk_ids.len(), 1); - assert!(!submit.accepted_chunk_ids[0].is_empty()); + .unwrap(); - let draft_policy = handle_get_draft_policy( + let draft = handle_get_draft_policy( &state, with_user(Request::new(GetDraftPolicyRequest { - name: sandbox_name.clone(), + name: sandbox_name, status_filter: String::new(), })), ) .await .unwrap() .into_inner(); - assert_eq!(draft_policy.draft_version, 1); - assert_eq!(draft_policy.chunks.len(), 1); - assert_eq!(draft_policy.chunks[0].status, "pending"); - let chunk_id = draft_policy.chunks[0].id.clone(); + let verdict = &draft.chunks[0].validation_result; + let first_line = verdict.lines().next().unwrap_or(""); + assert!( + first_line.starts_with("prover: ") && first_line.contains("new finding"), + "expected first line like `prover: N new finding(s)`, got: {verdict}" + ); + assert!( + verdict.contains("credential_reach_expansion"), + "L4 + credential in scope emits credential_reach_expansion (the binary gains \ + credentialed reach to a new host:port); got: {verdict}" + ); + assert!( + verdict.contains("api.github.com:443"), + "expected the finding line to cite the proposed endpoint, got: {verdict}" + ); + } - let approve = handle_approve_draft_chunk( - &state, - Request::new(ApproveDraftChunkRequest { + /// v1 calibration row: **L4 with NO credential in scope → no finding.** + /// Without an attached provider, no credential targets api.github.com, + /// so the prover treats the L4 grant as bounded (no privileged action + /// available) and emits nothing. The proposal verdict reads + /// `prover: no new findings`, eligible for auto-approval. + #[tokio::test] + async fn agent_authored_l4_proposal_without_credential_emits_no_finding() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + + let state = test_server_state().await; + let sandbox_name = "agent-l4-no-cred".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-agent-l4-no-cred".to_string(), name: sandbox_name.clone(), - chunk_id: chunk_id.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, }), - ) - .await - .unwrap() - .into_inner(); - assert_eq!(approve.policy_version, 1); - assert!(!approve.policy_hash.is_empty()); + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + // No providers — credential set will be empty. + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); - let history_after_approve = handle_get_draft_history( + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( &state, - Request::new(GetDraftHistoryRequest { + with_user(Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), - }), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "no privileged access available".to_string(), + ..Default::default() + }], + ..Default::default() + })), ) .await - .unwrap() - .into_inner(); - assert_eq!(history_after_approve.entries.len(), 2); - assert_eq!(history_after_approve.entries[0].event_type, "proposed"); - assert_eq!(history_after_approve.entries[1].event_type, "approved"); - assert_eq!(history_after_approve.entries[1].chunk_id, chunk_id); + .unwrap(); - let policies_after_approve = handle_list_sandbox_policies( + let draft = handle_get_draft_policy( &state, - Request::new(ListSandboxPoliciesRequest { - name: sandbox_name.clone(), - limit: 10, - offset: 0, - global: false, - }), + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), + })), ) .await .unwrap() .into_inner(); - assert_eq!(policies_after_approve.revisions.len(), 1); - assert_eq!(policies_after_approve.revisions[0].version, 1); + let verdict = &draft.chunks[0].validation_result; + assert_eq!( + verdict, "prover: no new findings", + "L4 grant with no credential in scope is bounded in v1; got: {verdict}" + ); + } - let undo = handle_undo_draft_chunk( - &state, - Request::new(UndoDraftChunkRequest { + /// v1 calibration row: **link-local host → HIGH finding regardless of + /// credentials.** Even with no provider attached, a proposal targeting + /// `169.254.169.254` (AWS IMDS / cloud metadata) emits a HIGH finding. + /// This is the one categorical safety floor v1 ships. + #[tokio::test] + async fn agent_authored_link_local_proposal_records_high_finding() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + + let state = test_server_state().await; + let sandbox_name = "agent-link-local".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-agent-link-local".to_string(), name: sandbox_name.clone(), - chunk_id: chunk_id.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, }), - ) - .await - .unwrap() - .into_inner(); - assert_eq!(undo.policy_version, 2); - assert!(!undo.policy_hash.is_empty()); + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + // Deliberately no provider — link-local should still fire. + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); - let draft_policy_after_undo = handle_get_draft_policy( + let proposed_rule = NetworkPolicyRule { + name: "metadata_endpoint".to_string(), + endpoints: vec![NetworkEndpoint { + host: "169.254.169.254".to_string(), + port: 80, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( &state, - with_user(Request::new(GetDraftPolicyRequest { + with_user(Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), - status_filter: String::new(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "metadata_endpoint".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "agent is curious about IMDS".to_string(), + ..Default::default() + }], + ..Default::default() })), ) .await - .unwrap() - .into_inner(); - assert_eq!(draft_policy_after_undo.chunks.len(), 1); - assert_eq!(draft_policy_after_undo.chunks[0].status, "pending"); + .unwrap(); - let history_after_undo = handle_get_draft_history( + let draft = handle_get_draft_policy( &state, - Request::new(GetDraftHistoryRequest { - name: sandbox_name.clone(), - }), + with_user(Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), + })), ) .await .unwrap() .into_inner(); - assert_eq!(history_after_undo.entries.len(), 1); - assert_eq!(history_after_undo.entries[0].event_type, "proposed"); + let verdict = &draft.chunks[0].validation_result; + assert!( + verdict.contains("link_local_reach"), + "link-local proposal must emit link_local_reach regardless of credentials; \ + got: {verdict}" + ); + assert!( + verdict.contains("169.254.169.254"), + "finding line must cite the link-local host; got: {verdict}" + ); + } + + #[tokio::test] + async fn agent_authored_validation_uses_providers_v2_effective_policy() { + use openshell_core::proto::{ + FilesystemPolicy, L7Allow, L7DenyRule, L7Rule, NetworkBinary, NetworkEndpoint, + ProviderProfile, ProviderProfileCategory, SandboxPhase, SandboxPolicy, SandboxSpec, + StoredProviderProfile, + }; + + let state = test_server_state().await; + enable_providers_v2(&state).await; + state + .store + .put_message(&test_provider("work-custom", "custom-api")) + .await + .unwrap(); + state + .store + .put_message(&StoredProviderProfile { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "profile-custom-api".to_string(), + name: "custom-api".to_string(), + created_at_ms: 1_000_000, + labels: HashMap::new(), + resource_version: 0, + }), + profile: Some(ProviderProfile { + id: "custom-api".to_string(), + display_name: "Custom API".to_string(), + description: String::new(), + category: ProviderProfileCategory::Other as i32, + credentials: Vec::new(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + deny_rules: vec![L7DenyRule { + method: "DELETE".to_string(), + path: "/repos/*".to_string(), + ..Default::default() + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + inference_capable: false, + discovery: None, + }), + }) + .await + .unwrap(); - let policies_after_undo = handle_list_sandbox_policies( - &state, - Request::new(ListSandboxPoliciesRequest { + let sandbox_name = "agent-provider-effective-policy".to_string(); + let mut sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-agent-provider-effective-policy".to_string(), name: sandbox_name.clone(), - limit: 10, - offset: 0, - global: false, + created_at_ms: 1_000_000, + labels: HashMap::new(), + resource_version: 0, }), - ) - .await - .unwrap() - .into_inner(); - assert_eq!(policies_after_undo.revisions.len(), 2); - assert_eq!(policies_after_undo.revisions[0].version, 2); - assert_eq!(policies_after_undo.revisions[1].version, 1); + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + providers: vec!["work-custom".to_string()], + ..Default::default() + }), + ..Default::default() + }; + sandbox.set_phase(SandboxPhase::Ready as i32); + state.store.put_message(&sandbox).await.unwrap(); - let cleared = handle_clear_draft_chunks( + let proposed_rule = NetworkPolicyRule { + name: "github_contents_write".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "PUT".to_string(), + path: "/repos/org/repo/contents/demo/file.md".to_string(), + ..Default::default() + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( &state, - Request::new(ClearDraftChunksRequest { + with_user(Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), - }), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "github_contents_write".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "write one demo file".to_string(), + ..Default::default() + }], + ..Default::default() + })), ) .await - .unwrap() - .into_inner(); - assert_eq!(cleared.chunks_cleared, 1); + .unwrap(); - let draft_policy_after_clear = handle_get_draft_policy( + let draft = handle_get_draft_policy( &state, with_user(Request::new(GetDraftPolicyRequest { - name: sandbox_name.clone(), + name: sandbox_name, status_filter: String::new(), })), ) .await .unwrap() .into_inner(); - assert!(draft_policy_after_clear.chunks.is_empty()); - - let history_after_clear = handle_get_draft_history( - &state, - Request::new(GetDraftHistoryRequest { name: sandbox_name }), - ) - .await - .unwrap() - .into_inner(); - assert!(history_after_clear.entries.is_empty()); + let verdict = &draft.chunks[0].validation_result; + let first_line = verdict.lines().next().unwrap_or(""); + assert!( + first_line.starts_with("prover: "), + "validation should run end-to-end against the providers-v2 composed \ + effective policy and produce a prover verdict; got: {verdict}" + ); + assert!( + !verdict.contains("validation unavailable"), + "providers-v2 composition must not break the prover pipeline; \ + got: {verdict}" + ); } - /// A reviewer's free-form rejection reason must round-trip through - /// persistence and surface on the chunk via `GetDraftPolicy`, so the - /// in-sandbox agent can read the guidance and redraft. The MVP-v2 agent - /// feedback loop hangs off this guarantee. + /// End-to-end loop test against the v1 calibration and the auto-approval + /// gate. Mirrors the two-path flow in `examples/agent-driven-policy-management`: + /// + /// 1. Un-credentialed L7 proposal (raw.githubusercontent.com GET) → + /// prover sees no findings → sandbox in `auto` mode → chunk + /// auto-approves without human action. + /// + /// 2. Credentialed L7 proposal (api.github.com PUT) → prover sees + /// `github_token` in scope, emits MEDIUM → chunk lands in pending + /// for human review even under `auto` mode. + /// + /// This is the deterministic counterpart of the demo's product UX + /// claim: "narrow safe = free, narrow credentialed = one approval." #[tokio::test] - async fn reject_with_reason_persists_into_chunk_for_agent_readback() { - use openshell_core::proto::{NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxSpec}; + async fn full_loop_under_v2_auto_mode_splits_credentialed_and_uncredentialed() { + use openshell_core::proto::{ + FilesystemPolicy, L7Allow, L7Rule, NetworkBinary, NetworkEndpoint, SandboxPhase, + SandboxPolicy, SandboxSpec, + }; let state = test_server_state().await; - let sandbox_name = "agent-feedback-loop".to_string(); + enable_providers_v2(&state).await; + + // Github provider attached: a credential ends up in scope for + // api.github.com (PUT proposal flags MEDIUM). raw.githubusercontent.com + // is not declared by any provider, so the bootstrap fetch is + // un-credentialed and auto-approves. + state + .store + .put_message(&test_provider("github-pat", "github")) + .await + .unwrap(); + + let sandbox_name = "full-loop-v2".to_string(); let mut sandbox = Sandbox { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { - id: "sb-feedback".to_string(), + id: "sb-full-loop-v2".to_string(), name: sandbox_name.clone(), created_at_ms: 1_000_000, labels: std::collections::HashMap::new(), resource_version: 0, }), spec: Some(SandboxSpec { - policy: None, + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + providers: vec!["github-pat".to_string()], ..Default::default() }), ..Default::default() }; sandbox.set_phase(SandboxPhase::Ready as i32); state.store.put_message(&sandbox).await.unwrap(); + seed_sandbox_approval_mode(&state, &sandbox_name, "auto").await; - let proposed_rule = NetworkPolicyRule { - name: "allow_example".to_string(), + // ── Step 1: un-credentialed GET → expected auto-approve ── + let uncredentialed_rule = NetworkPolicyRule { + name: "github_raw_openapi_get".to_string(), endpoints: vec![NetworkEndpoint { - host: "api.example.com".to_string(), + host: "raw.githubusercontent.com".to_string(), port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "GET".to_string(), + path: "/github/rest-api-description/main/descriptions/api.github.com/api.github.com.json" + .to_string(), + ..Default::default() + }), + }], ..Default::default() }], binaries: vec![NetworkBinary { @@ -4638,15 +7073,15 @@ mod tests { ..Default::default() }], }; - - let submit = handle_submit_policy_analysis( + let step1 = handle_submit_policy_analysis( &state, with_user(Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), proposed_chunks: vec![PolicyChunk { - rule_name: "allow_example".to_string(), - proposed_rule: Some(proposed_rule), - rationale: "agent intent".to_string(), + rule_name: "github_raw_openapi_get".to_string(), + proposed_rule: Some(uncredentialed_rule), + rationale: "fetch the public github openapi description".to_string(), ..Default::default() }], ..Default::default() @@ -4655,19 +7090,48 @@ mod tests { .await .unwrap() .into_inner(); - let chunk_id = submit.accepted_chunk_ids[0].clone(); + let step1_chunk_id = step1.accepted_chunk_ids[0].clone(); - let guidance = "scope to docs/ paths only, not all repo contents"; - handle_reject_draft_chunk( + // ── Step 2: credentialed PUT → expected MEDIUM, pending ── + let credentialed_rule = NetworkPolicyRule { + name: "github_contents_put".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "PUT".to_string(), + path: "/repos/owner/name/contents/path/file.md".to_string(), + ..Default::default() + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + let step2 = handle_submit_policy_analysis( &state, - Request::new(RejectDraftChunkRequest { + with_user(Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), - chunk_id: chunk_id.clone(), - reason: guidance.to_string(), - }), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "github_contents_put".to_string(), + proposed_rule: Some(credentialed_rule), + rationale: "write the demo file via the GitHub Contents API".to_string(), + ..Default::default() + }], + ..Default::default() + })), ) .await - .unwrap(); + .unwrap() + .into_inner(); + let step2_chunk_id = step2.accepted_chunk_ids[0].clone(); let draft = handle_get_draft_policy( &state, @@ -4679,18 +7143,57 @@ mod tests { .await .unwrap() .into_inner(); - let rejected = draft + + let step1_chunk = draft .chunks .iter() - .find(|c| c.id == chunk_id) - .expect("rejected chunk should still be visible"); - assert_eq!(rejected.status, "rejected"); + .find(|c| c.id == step1_chunk_id) + .expect("step1 chunk present"); + let step2_chunk = draft + .chunks + .iter() + .find(|c| c.id == step2_chunk_id) + .expect("step2 chunk present"); + assert_eq!( - rejected.rejection_reason, guidance, - "reviewer's free-form reason must round-trip into the chunk for agent readback" + step1_chunk.status, "approved", + "un-credentialed L7 proposal under v2 + auto mode must auto-approve; got: {}", + step1_chunk.status + ); + assert_eq!( + step1_chunk.validation_result, "prover: no new findings", + "un-credentialed L7 verdict should be `no new findings`; got: {}", + step1_chunk.validation_result + ); + + assert_eq!( + step2_chunk.status, "pending", + "credentialed L7 PUT under v2 + auto mode must stay pending; got: {}", + step2_chunk.status + ); + // This test's spec policy has no pre-existing rule for curl on + // api.github.com, so the agent's chunk grants brand-new + // credentialed reach: the finding is credential_reach_expansion, + // not capability_expansion. (The capability_expansion path is + // suppressed by the delta because the reach is new — one finding + // per change, not two.) The demo's policy.template.yaml has + // github_api_readonly which exercises the capability_expansion + // path; that's covered by the supersede test above. + assert!( + step2_chunk + .validation_result + .contains("credential_reach_expansion"), + "credentialed PUT on a host the binary did not previously reach must carry \ + credential_reach_expansion; got: {}", + step2_chunk.validation_result + ); + assert!( + !step2_chunk + .validation_result + .contains("capability_expansion"), + "capability_expansion must be suppressed when reach itself is new; got: {}", + step2_chunk.validation_result ); - // validation_result is unpopulated until the prover runs (#1097). - assert!(rejected.validation_result.is_empty()); } /// Two agent-authored proposals targeting the same host/port/binary must @@ -5027,6 +7530,14 @@ mod tests { use openshell_core::proto::{NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxSpec}; let state = test_server_state().await; + // Attach a github provider so the L4 proposal below has a credential + // in scope and the prover emits a HIGH finding — keeps the chunk + // pending so this cross-sandbox approve check is reachable. + state + .store + .put_message(&test_provider("github-pat", "github")) + .await + .unwrap(); let mut sandbox_a = Sandbox { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { id: "sb-draft-owner".to_string(), @@ -5037,6 +7548,7 @@ mod tests { }), spec: Some(SandboxSpec { policy: None, + providers: vec!["github-pat".to_string()], ..Default::default() }), ..Default::default() @@ -5061,9 +7573,9 @@ mod tests { state.store.put_message(&sandbox_b).await.unwrap(); let proposed_rule = NetworkPolicyRule { - name: "allow_example".to_string(), + name: "allow_github".to_string(), endpoints: vec![NetworkEndpoint { - host: "api.example.com".to_string(), + host: "api.github.com".to_string(), port: 443, ..Default::default() }], @@ -5173,6 +7685,7 @@ mod tests { "gateway merged incremental policy op: add-allow api.github.com:443 [POST /repos/*/issues]", 7, "sha256:testhash", + &[], ); assert_eq!( @@ -5181,6 +7694,50 @@ mod tests { ); } + /// Auto-approval audit messages carry `auto=true`, `source=`, and + /// `prover_delta=empty` as extra unmapped fields so a reviewer can + /// reconstruct the safety reasoning without needing to grep the chunk + /// table. The message text itself says "auto-approved: no new prover + /// findings" — never "safe" — because the claim is about the prover's + /// reasoning, not the world. + #[test] + fn build_gateway_policy_audit_message_carries_auto_approve_provenance() { + let extra = [ + ("auto", "true".to_string()), + ("source", "agent_authored".to_string()), + ("prover_delta", "empty".to_string()), + ]; + let message = build_gateway_policy_audit_message( + "sb-123", + "demo-sandbox", + "approved", + "auto-approved: no new prover findings (source=agent_authored) — chunk abc: add-rule x", + 12, + "sha256:autohash", + &extra, + ); + assert!( + message.contains("CONFIG:APPROVED"), + "auto-approval reuses CONFIG:APPROVED; got: {message}" + ); + assert!( + message.contains("auto-approved: no new prover findings"), + "audit copy must say `no new prover findings`, not `safe`; got: {message}" + ); + assert!( + message.contains("auto:true"), + "missing auto field: {message}" + ); + assert!( + message.contains("source:agent_authored"), + "missing source field: {message}" + ); + assert!( + message.contains("prover_delta:empty"), + "missing prover_delta field: {message}" + ); + } + #[test] fn summarize_cli_policy_merge_op_formats_rest_allow_rules() { let operation = PolicyMergeOp::AddAllowRules { @@ -5702,6 +8259,52 @@ mod tests { assert!(result.unwrap_err().message().contains("always blocked")); } + #[test] + fn validate_rule_rejects_known_metadata_hostname() { + use openshell_core::proto::{NetworkEndpoint, NetworkPolicyRule}; + + let rule = NetworkPolicyRule { + name: "bad".to_string(), + endpoints: vec![NetworkEndpoint { + host: "METADATA.GOOGLE.INTERNAL.".to_string(), + port: 80, + ..Default::default() + }], + binaries: vec![], + }; + let result = validate_rule_not_always_blocked(&rule); + assert!(result.is_err()); + let status = result.unwrap_err(); + assert_eq!(status.code(), Code::InvalidArgument); + assert!(status.message().contains("cloud metadata hostname")); + } + + #[test] + fn validate_merge_operations_rejects_add_allow_for_known_metadata_hostname() { + let operation = PolicyMergeOp::AddAllowRules { + host: "metadata.google.internal".to_string(), + port: 80, + rules: vec![L7Rule { + allow: Some(openshell_core::proto::L7Allow { + method: "GET".to_string(), + path: "/computeMetadata/v1/**".to_string(), + command: String::new(), + query: HashMap::new(), + operation_type: String::new(), + operation_name: String::new(), + fields: Vec::new(), + }), + }], + }; + + let result = validate_merge_operations_for_server(&[operation]); + + assert!(result.is_err()); + let status = result.unwrap_err(); + assert_eq!(status.code(), Code::InvalidArgument); + assert!(status.message().contains("cloud metadata hostname")); + } + #[test] fn validate_rule_accepts_rfc1918_allowed_ips() { use openshell_core::proto::{NetworkEndpoint, NetworkPolicyRule}; @@ -5853,6 +8456,78 @@ mod tests { assert_eq!(stored, StoredSettingValue::Bool(true)); } + #[test] + fn proto_setting_to_stored_accepts_allowed_proposal_approval_mode_values() { + for raw in ["manual", "auto"] { + let value = SettingValue { + value: Some(setting_value::Value::StringValue(raw.to_string())), + }; + let stored = proto_setting_to_stored(settings::PROPOSAL_APPROVAL_MODE_KEY, &value) + .unwrap_or_else(|e| panic!("expected '{raw}' to be accepted, got: {e}")); + assert_eq!(stored, StoredSettingValue::String(raw.to_string())); + } + } + + #[test] + fn proto_setting_to_stored_rejects_invalid_proposal_approval_mode_value() { + // Typos and future-reserved modes must be rejected at configure time + // — without this, the value silently resolves to manual at runtime + // (fail-closed) and the operator never finds out they fat-fingered + // the setting. + for raw in ["autom", "AUTO", "Manual", "auto_on_low_risk", "", " auto"] { + let value = SettingValue { + value: Some(setting_value::Value::StringValue(raw.to_string())), + }; + let res = proto_setting_to_stored(settings::PROPOSAL_APPROVAL_MODE_KEY, &value); + assert!( + res.is_err(), + "expected '{raw}' to be rejected, got: {res:?}" + ); + let err = res.unwrap_err(); + assert_eq!(err.code(), Code::InvalidArgument); + } + } + + #[test] + fn proto_setting_to_stored_rejection_message_lists_allowed_proposal_approval_mode_values() { + let value = SettingValue { + value: Some(setting_value::Value::StringValue("autom".to_string())), + }; + let err = + proto_setting_to_stored(settings::PROPOSAL_APPROVAL_MODE_KEY, &value).unwrap_err(); + assert_eq!(err.code(), Code::InvalidArgument); + let msg = err.message(); + assert!(msg.contains("manual"), "missing 'manual' in {msg}"); + assert!(msg.contains("auto"), "missing 'auto' in {msg}"); + assert!(msg.contains("autom"), "missing offending value in {msg}"); + } + + /// Locks in that invalid `proposal_approval_mode` is rejected at the + /// `UpdateConfig` RPC boundary — not just in the `proto_setting_to_stored` + /// helper. Prevents a future refactor from accidentally routing setting + /// writes around the validation chokepoint. + #[tokio::test] + async fn update_config_global_rejects_invalid_proposal_approval_mode() { + let state = test_server_state().await; + let req = with_user(Request::new(UpdateConfigRequest { + global: true, + setting_key: settings::PROPOSAL_APPROVAL_MODE_KEY.to_string(), + setting_value: Some(SettingValue { + value: Some(setting_value::Value::StringValue("autom".to_string())), + }), + ..Default::default() + })); + let err = handle_update_config(&state, req) + .await + .expect_err("invalid proposal_approval_mode must be rejected at UpdateConfig"); + assert_eq!(err.code(), Code::InvalidArgument); + assert!( + err.message().contains("autom") && err.message().contains("manual"), + "expected rejection message to echo the bad value and list allowed values; got: {}", + err.message() + ); + } + #[cfg(feature = "dev-settings")] #[test] fn merge_effective_settings_global_overrides_sandbox_key() { diff --git a/crates/openshell-tui/src/app.rs b/crates/openshell-tui/src/app.rs index ba817bcf8..47fa02c64 100644 --- a/crates/openshell-tui/src/app.rs +++ b/crates/openshell-tui/src/app.rs @@ -1017,7 +1017,15 @@ impl App { return; } } - SettingValueKind::String => {} + SettingValueKind::String => { + if let Some(setting) = settings::setting_for_key(&entry.key) + && let Err(allowed) = setting.validate_string_value(raw) + { + edit.error = + Some(format!("expected one of: {}", allowed.join(", "))); + return; + } + } } } edit.error = None; @@ -1261,7 +1269,15 @@ impl App { return; } } - SettingValueKind::String => {} + SettingValueKind::String => { + if let Some(setting) = settings::setting_for_key(&entry.key) + && let Err(allowed) = setting.validate_string_value(raw) + { + edit.error = + Some(format!("expected one of: {}", allowed.join(", "))); + return; + } + } } } edit.error = None; diff --git a/docs/observability/logging.mdx b/docs/observability/logging.mdx index 81c47248c..dcfe9f19d 100644 --- a/docs/observability/logging.mdx +++ b/docs/observability/logging.mdx @@ -198,6 +198,8 @@ An upstream that the proxy cannot reach returns `502 Bad Gateway`: The `error` field is a short machine-readable code (`policy_denied`, `ssrf_denied`, `upstream_unreachable`). The `detail` field is a human-readable explanation suitable for display in an agent transcript. +For L7 REST denials, the body also includes structured policy fields such as `method`, `path`, `rule_missing`, and `next_steps`. When policy advisor is enabled, it also includes `agent_guidance`, a short plain-language instruction telling the agent to read `/etc/openshell/skills/policy_advisor.md`, propose the narrowest rule through `http://policy.local/v1/proposals`, wait for `policy_reloaded: true`, and retry. + ## Filesystem Sandbox Logs Landlock filesystem restrictions emit `CONFIG:` events at startup and whenever the sandbox has to skip a requested path. diff --git a/docs/sandboxes/policy-advisor.mdx b/docs/sandboxes/policy-advisor.mdx index d129e5e5a..050db004f 100644 --- a/docs/sandboxes/policy-advisor.mdx +++ b/docs/sandboxes/policy-advisor.mdx @@ -10,7 +10,7 @@ position: 6 Policy advisor lets a running sandboxed agent ask for a narrow network policy change after OpenShell denies a request. The agent submits a draft through `policy.local`, a developer approves or rejects it from outside the sandbox, and approved network policy hot-reloads into the same sandbox. -Policy advisor does not grant access automatically. The structured rule is the approval contract, and the agent's rationale is supporting context. +Policy advisor preserves OpenShell's default-deny posture. The structured rule is the approval contract, and the agent's rationale is supporting context. By default every proposal lands in the draft inbox for human review. Opt-in [auto mode](#approval-modes) lets the gateway approve provably safe proposals — those whose [prover delta](#what-auto-approval-checks) is empty — without a reviewer in the loop; proposals with any prover finding still require human approval. ## Enable Policy Advisor @@ -47,24 +47,66 @@ openshell settings delete --global \ Set the value before creating a sandbox when you want the first denied request to include policy advisor guidance. Running sandboxes poll settings and can enable the surface after startup, but startup enablement gives the agent the clearest first-denial path. +## Approval Modes + +Every proposal — mechanistic or agent-authored — is routed through the [policy prover](#what-auto-approval-checks). The `proposal_approval_mode` setting decides what happens when the prover finds nothing to flag. + +| Mode | When unset / `manual` | `auto` | +|---|---|---| +| Empty prover delta | Lands in the draft inbox for human review. | Approved automatically; the sandbox hot-reloads the new rule and the agent retries. | +| Any prover finding | Lands in the draft inbox. | Lands in the draft inbox — auto-approval is gated on an empty delta. | + +`manual` is the default. Auto mode is an explicit opt-in; OpenShell's default-deny posture is preserved unless you choose otherwise. + +Enable auto mode at gateway scope when you want every sandbox on this gateway to auto-approve safe proposals: + +```shell +openshell settings set --global \ + --key proposal_approval_mode \ + --value auto \ + --yes +``` + +Enable it for one sandbox when no global value is set: + +```shell +openshell settings set \ + --key proposal_approval_mode \ + --value auto +``` + +The shorthand at create time writes the sandbox-scoped setting for you: + +```shell +openshell sandbox create --approval-mode auto +``` + +Only `manual` and `auto` are accepted; typos like `autom` are rejected at configure time. Stale or unknown values found in storage are still treated as `manual` at runtime as a defense-in-depth measure. + +**Precedence.** Gateway scope wins over sandbox scope. A reviewer can pin `manual` for a fleet by setting it globally; per-sandbox overrides only apply when no global value is set. + +**Audit trail.** Every auto-approval emits a `CONFIG:APPROVED` event with `auto=true`, `source=`, `prover_delta=empty`, and `resolved_from=` so operators can reconstruct why a given approval ran without human review. + ## How It Works When policy advisor is enabled, the sandbox supervisor turns on three agent-facing surfaces: - It installs `/etc/openshell/skills/policy_advisor.md` inside the sandbox. +- It also installs `/etc/openshell/skills/policy-advisor/SKILL.md` as a short Codex/generic-agent pointer, and writes a root `/AGENTS.md` pointer only when the image does not already provide one. - It serves `http://policy.local` from inside the sandbox. -- It adds `next_steps` to L7 `policy_denied` response bodies so the agent can find the skill and local API. +- It adds `agent_guidance` and `next_steps` to L7 `policy_denied` response bodies so the agent can find the skill and local API. -The loop has six steps: +The loop has seven steps: 1. A sandboxed process attempts a network request that policy denies. -2. For inspected REST traffic, OpenShell returns a structured `403` body with fields such as `layer`, `host`, `port`, `binary`, `method`, `path`, `rule_missing`, and `next_steps`. +2. For inspected REST traffic, OpenShell returns a structured `403` body with fields such as `layer`, `host`, `port`, `binary`, `method`, `path`, `rule_missing`, `agent_guidance`, and `next_steps`. 3. The agent reads the policy advisor skill, inspects the current policy, and optionally reads recent denial log lines. 4. The agent submits one or more `addRule` proposals to `http://policy.local/v1/proposals`. -5. The gateway stores accepted proposals as pending draft chunks for the sandbox. -6. A developer reviews the draft, approves or rejects it, and the agent waits on `/v1/proposals/{chunk_id}/wait` until a decision is available. +5. The gateway stores accepted proposals as pending draft chunks for the sandbox and runs the [policy prover](#what-auto-approval-checks) against the proposed delta. +6. Under `auto` mode, proposals with an empty prover delta are approved immediately and skipped past human review. Under `manual` mode (the default), every proposal — and under `auto` mode, every proposal with a prover finding — lands in the draft inbox for a developer to approve or reject. +7. The agent waits on `/v1/proposals/{chunk_id}/wait` until a decision is available. Approved proposals hot-reload into the sandbox; rejected proposals return `rejection_reason` and `validation_result` so the agent can revise. -When a proposal is approved, `/wait` reports `policy_reloaded: true` only after the local sandbox policy covers the approved rule. At that point the agent can retry the original denied action once. If a proposal is rejected, `/wait` returns `rejection_reason` and `validation_result` so the agent can revise or stop. +When a proposal is approved, `/wait` reports `policy_reloaded: true` only after the local sandbox policy covers the approved rule. At that point the agent can retry the original denied action once. If a proposal is rejected, `/wait` returns `rejection_reason` and `validation_result` so the agent can revise or stop. `validation_result` carries the categorical prover findings — `link_local_reach`, `l7_bypass_credentialed`, `credential_reach_expansion`, `capability_expansion` — so the agent can narrow the next attempt to the specific concern the prover flagged. ## What Gets Proposed @@ -118,6 +160,21 @@ The current `policy.local` JSON shape covers L4 endpoints and REST method or pat Policy advisor proposals do not add `allowed_ips` automatically. If a hostname resolves to an internal or private address, OpenShell's SSRF protections still block the connection until a developer explicitly adds the required `allowed_ips` entry. +## What Auto-Approval Checks + +The policy prover runs against every proposal — mechanistic and agent-authored alike — and asks four formal questions about the proposed change. Each "yes" is one categorical finding. Any finding blocks auto-approval; only an empty delta is eligible. + +| Category | Triggered when | +|---|---| +| `link_local_reach` | The proposal reaches a host in `169.254.0.0/16`, `fe80::/10`, or a known metadata hostname such as `metadata.google.internal` (cloud-metadata territory, which serves credentials regardless of sandbox state). Unconditional. | +| `l7_bypass_credentialed` | A binary using a wire protocol the L7 proxy cannot inspect (`git-remote-https`, `ssh`, `nc`) gains reach to a host where a credential is in scope. | +| `credential_reach_expansion` | A binary gains credentialed reach to a `(host, port)` it could not reach before. | +| `capability_expansion` | On a `(binary, host, port)` that already had credentialed reach, the proposal adds a new HTTP method. The finding cites the specific method. | + +Findings are categorical — there is no severity tier. The reviewer reads the category and the structured evidence to decide. When the prover delta is empty, the proposal is provably safe under the model and auto-approval (if enabled) can fire. + +The full reasoning model lives in [`crates/openshell-prover/README.md`](https://github.com/NVIDIA/OpenShell/blob/main/crates/openshell-prover/README.md). Provider profiles composed in via [Providers v2](/sandboxes/providers-v2) are part of the effective policy the prover reasons over. + ## Review Proposals Review pending chunks from the host: @@ -126,6 +183,8 @@ Review pending chunks from the host: openshell rule get --status pending ``` +Under `auto` mode, only proposals the prover flagged appear here; empty-delta proposals are already approved and visible under `--status approved` with the auto-approval audit fields described in [Approval Modes](#approval-modes). Under `manual` mode, every proposal — regardless of prover verdict — shows up as pending. + The output shows the chunk ID, status, rationale, binary, and endpoint summary. For L7 proposals, the endpoint summary includes the protocol, method, and path: ```text @@ -160,7 +219,7 @@ The rejection reason is returned to the agent through `policy.local`. The agent | `GET /v1/proposals/{chunk_id}` | Returns one proposal's current `pending`, `approved`, or `rejected` status. | | `GET /v1/proposals/{chunk_id}/wait?timeout=300` | Holds one HTTP request open until the proposal is approved, rejected, or the timeout expires. | -If policy advisor is disabled, every route returns `404 feature_disabled`, the skill is not installed for new sandboxes, and L7 deny bodies do not advertise `policy.local` routes. +If policy advisor is disabled, every route returns `404 feature_disabled`, the skill is not installed for new sandboxes, and L7 deny bodies do not advertise `policy.local` routes or include `agent_guidance`. ## What to Expect @@ -172,7 +231,7 @@ Policy advisor emits audit events into the sandbox log. Use these lines to trace openshell logs --since 10m ``` -Look for `HTTP:* DENIED`, `CONFIG:PROPOSED`, `CONFIG:APPROVED` or `CONFIG:REJECTED`, `CONFIG:LOADED`, and the final allowed request if the agent retries successfully. +Look for `HTTP:* DENIED`, `CONFIG:PROPOSED`, `CONFIG:APPROVED` or `CONFIG:REJECTED`, `CONFIG:LOADED`, and the final allowed request if the agent retries successfully. Auto-approved chunks emit `CONFIG:APPROVED` with `auto=true`, `source=`, `prover_delta=empty`, and `resolved_from=`. ## Next Steps diff --git a/examples/agent-driven-policy-management/README.md b/examples/agent-driven-policy-management/README.md index 190123cfe..4d604d974 100644 --- a/examples/agent-driven-policy-management/README.md +++ b/examples/agent-driven-policy-management/README.md @@ -12,12 +12,16 @@ Run the full agent-driven policy loop end-to-end: 3. The agent reads `/etc/openshell/skills/policy_advisor.md`, drafts the narrowest rule needed, and submits it to `http://policy.local/v1/proposals`. It saves the returned `chunk_id`. -4. The agent calls `GET /v1/proposals/{chunk_id}/wait?timeout=300` — a single +4. The gateway merges the proposed rule with the current sandbox policy, runs + the policy prover, and stores a concise `validation_result` on the pending + chunk. This is deterministic control-plane evidence, not agent prose. +5. The agent calls `GET /v1/proposals/{chunk_id}/wait?timeout=300` — a single HTTP request that the supervisor holds open until the developer decides. This is the load-bearing UX point: the agent burns zero LLM tokens while it waits; it's literally sleeping on a socket. -5. You approve the proposal from the host with one keystroke. -6. The agent's `/wait` returns within ~1 second of the approval. The sandbox +6. You approve the proposal from the host with one keystroke after seeing the + exact rule and the prover verdict in `openshell rule get`. +7. The agent's `/wait` returns within ~1 second of the approval. The sandbox has hot-reloaded the merged policy; the agent retries the original PUT once and exits. @@ -78,6 +82,8 @@ reject with `--reason "scope to docs/ paths only"` and the agent reads | `DEMO_KEEP_SANDBOX` | `0` (set `1` to inspect the sandbox after the demo) | | `DEMO_MANUAL_APPROVE` | `0` (set `1` to pause for host-side `rule approve` / `rule reject --reason`) | | `DEMO_APPROVAL_TIMEOUT_SECS` | `240` (auto), `1800` (manual mode) | +| `DEMO_CODEX_MODEL` | `gpt-5.4-mini` (pinned for ChatGPT-account compatibility; override if your account supports a different model) | +| `DEMO_CODEX_REASONING` | `low` (the demo task is mechanical; `medium`/`high` slow it down without changing outcomes) | | `OPENSHELL_BIN` | `target/debug/openshell` if present, else `openshell` on `PATH` | ## What the agent sees @@ -99,12 +105,29 @@ with three parts, each with a different trust level: | `validation_result` (prover output) | gateway-side prover | trust signal — but this surface is in progress (see [RFC 0001](../../rfc/0001-agent-driven-policy-management.md)) | The MVP today shows the structured rule plus the agent's rationale in -`openshell rule get` and the TUI inbox panel. The demo's `openshell rule -approve-all` auto-approves to keep the loop short — in a real session a -developer reviews the structured grant before pressing `a`. Prover-backed -validation badges, computed reachability deltas, and a richer "this is what -the rule actually permits" summary are the next phase. For now, **always -approve based on the structured rule, not the agent's rationale.** +`openshell rule get` and the TUI inbox panel. With prover validation wired +into the gateway, `openshell rule get` also shows a `Validation:` line for +agent-authored chunks. The value is the prover's verdict in OCSF-shorthand +style — one short, scannable string per chunk: + +```text +Validation: prover: no new findings +``` + +```text +Validation: prover: 1 new finding + capability_expansion: PUT on api.github.com:443 via /usr/bin/curl +``` + +Other possible verdicts: `validation unavailable` (gateway-side prover infra +issue — surfaces in the gateway log, not as proposal failure), `merge failed: +…` (proposal won't merge into the current policy), and `policy invalid: …` +(merged policy fails the structural safety check). + +Read the structured rule (Endpoints + Binary). Read the Validation line. +Approve if both look right. The demo's `openshell rule approve-all` +auto-approves to keep the loop short; in a real session a developer makes +that judgment per chunk before pressing `a`. ## Going further diff --git a/examples/agent-driven-policy-management/agent-task.md b/examples/agent-driven-policy-management/agent-task.md index e83a653bd..69e1a4e55 100644 --- a/examples/agent-driven-policy-management/agent-task.md +++ b/examples/agent-driven-policy-management/agent-task.md @@ -3,8 +3,11 @@ # Agent Task -You are running inside an OpenShell sandbox. Your job is to write one -markdown file to GitHub via the GitHub Contents API. +You are running inside an OpenShell sandbox. Your job has two steps. Each +step starts with a `curl` call that the L7 proxy will deny; for each +denial, read `/etc/openshell/skills/policy_advisor.md`, draft a narrow +proposal, submit it to `http://policy.local/v1/proposals`, wait on +`GET /v1/proposals/{chunk_id}/wait?timeout=300`, then retry. ## Target @@ -15,50 +18,90 @@ markdown file to GitHub via the GitHub Contents API. ## What to do +### Step 1 — Fetch a public schema (un-credentialed action) + +1. Fetch the well-known GitHub OpenAPI description from + `https://raw.githubusercontent.com/github/rest-api-description/main/descriptions/api.github.com/api.github.com.json` + with `curl`. Do not send any authentication header — this resource is + public. Save the response to `/tmp/github-openapi.json`. + + The first attempt will fail with HTTP 403 and a JSON body containing + `"error": "policy_denied"`. Read the body — it tells you which layer + denied you, which method/path was missing, and where to learn more. + +2. Read `/etc/openshell/skills/policy_advisor.md` and follow it. Submit + the narrowest possible proposal to `http://policy.local/v1/proposals` + — exact host (`raw.githubusercontent.com`), exact port (443), exact + method (`GET`), exact path (`/github/rest-api-description/main/descriptions/api.github.com/api.github.com.json`), + binary `/usr/bin/curl`. Do not propose wildcard hosts. Save + `accepted_chunk_ids[0]`. + +3. Block on the gateway's decision by calling + `GET http://policy.local/v1/proposals/{chunk_id}/wait?timeout=300`. + + - `status: "approved"` — retry the fetch once. Because the host has + no credentials in scope (it's a public-content endpoint), the + prover sees no findings and the gateway auto-approves the + proposal without human review. The whole step typically completes + in ~1 second after submission. + - `status: "rejected"` — read `rejection_reason`. If it has text, + address the specific feedback and submit a revised proposal; + otherwise stop and tell the developer you can't proceed. + - `status: "pending"` with `timed_out: true` — re-issue the same + `/wait` request once. Each `/wait` is one long-lived HTTP call. + +4. After the retry, verify the file exists at `/tmp/github-openapi.json` + and is non-empty. + +### Step 2 — Write to GitHub (credentialed action) + 1. PUT the file with `curl` against `https://api.github.com/repos/{{OWNER}}/{{REPO}}/contents/{{FILE_PATH}}`. - Authenticate with the `GITHUB_TOKEN` environment variable. Do not echo the - token. Body content (base64-encoded into the JSON `content` field): + Authenticate with the `GITHUB_TOKEN` environment variable. Do not + echo the token. Body content (base64-encoded into the JSON `content` + field): ``` # OpenShell policy advisor demo Run id: {{RUN_ID}} - Written from inside an OpenShell sandbox after a narrowly-scoped policy - proposal was approved by the developer. + Written from inside an OpenShell sandbox after a narrowly-scoped + policy proposal was reviewed by the developer. ``` 2. The first attempt will fail with HTTP 403 and a JSON body containing - `"error": "policy_denied"`. Read the body — it tells you which layer denied - you (`l7`/`rest`), which method/path was missing, and where to learn more. + `"error": "policy_denied"`. Read the body — it tells you which layer + denied you (`l7`/`rest`), which method/path was missing, and where to + learn more. -3. Read `/etc/openshell/skills/policy_advisor.md` and follow it. Submit the - narrowest possible proposal to `http://policy.local/v1/proposals` — exact - host, exact port, exact method, exact path, binary `/usr/bin/curl`. Do not - include query strings. Do not propose wildcard hosts. The 202 response - carries `accepted_chunk_ids`; this demo submits one rule per proposal, so - the list always has exactly one element. Save `accepted_chunk_ids[0]`, - you need it for step 4. +3. Submit the narrowest possible proposal to + `http://policy.local/v1/proposals` — exact host (`api.github.com`), + exact port (443), exact method (`PUT`), exact path + (`/repos/{{OWNER}}/{{REPO}}/contents/{{FILE_PATH}}`), binary + `/usr/bin/curl`. Do not include query strings. Do not propose + wildcard hosts. Save `accepted_chunk_ids[0]`. 4. Block on the developer's decision by calling - `GET http://policy.local/v1/proposals/{chunk_id}/wait?timeout=300`. This is - a single HTTP request that the supervisor holds open until the developer - approves or rejects; do not run a polling loop yourself. + `GET http://policy.local/v1/proposals/{chunk_id}/wait?timeout=300`. + - This time the prover emits a `capability_expansion` finding: PUT + is a new method on a host the binary already had credentialed + reach to (read-only). That's a stated intent change, so the + gateway holds the chunk in `pending` for human review instead of + auto-approving. The `/wait` call still parks on a socket — zero + LLM tokens burn while the human decides. - `status: "approved"` — retry the PUT once. Policy has hot-reloaded. - - `status: "rejected"` — read `rejection_reason`. If it has text, address - the specific feedback and submit a revised proposal (back to step 3); - otherwise stop and tell the developer you can't proceed. - - `status: "pending"` with `timed_out: true` — the supervisor returned - without a decision after the full timeout window elapsed. Immediately - re-issue the same `/wait` request once. Each `/wait` is one long-lived - HTTP call; do not sleep, do not loop with a short timeout, do not - decrease `timeout=300`. + - `status: "rejected"` — read `rejection_reason`. If it has text, + address the specific feedback and submit a revised proposal (back + to step 3); otherwise stop and tell the developer you can't + proceed. + - `status: "pending"` with `timed_out: true` — re-issue the same + `/wait` request once. 5. On a successful PUT (HTTP 200 or 201), print a short summary showing - `content.path` and `content.html_url` from the GitHub response. Do not - print the full response body. + `content.path` and `content.html_url` from the GitHub response. Do + not print the full response body. If anything is unclear, prefer making a narrower proposal and asking for approval again over widening the rule. diff --git a/examples/agent-driven-policy-management/demo.sh b/examples/agent-driven-policy-management/demo.sh index a3e1d1836..1a451da38 100755 --- a/examples/agent-driven-policy-management/demo.sh +++ b/examples/agent-driven-policy-management/demo.sh @@ -5,25 +5,11 @@ # Agent-driven policy management demo. # -# Runs the full loop end-to-end: -# -# 1. A Codex agent inside an OpenShell sandbox attempts a PUT that the L7 -# proxy denies with a structured policy_denied 403. -# 2. The agent reads /etc/openshell/skills/policy_advisor.md. -# 3. The agent submits a narrow proposal (exact host, port, method, path) -# to policy.local and saves the returned chunk_id. -# 4. The agent blocks on `GET /v1/proposals/{chunk_id}/wait` — one HTTP -# call that sleeps on a socket. THE AGENT BURNS ZERO LLM TOKENS WHILE -# IT WAITS; this is the load-bearing UX win over polling. -# 5. The developer (this script, simulating the host side) sees the pending -# proposal in `openshell rule get` and approves it. -# 6. The agent's /wait returns approved within ~1 second of the approval, -# retries the original PUT once against the hot-reloaded policy, and -# exits. -# -# The whole loop is feature-flagged behind agent_policy_proposals_enabled and -# requires no GitHub credentials beyond the repo write token already used by -# the existing demo flow. +# Shows the approval loop in one run: +# deny → agent proposes narrow access → gateway validates → approve → retry. +# A public raw.githubusercontent.com GET auto-approves; the GitHub PUT waits +# for review because a GitHub credential is in scope. See README.md for the +# full walkthrough. set -euo pipefail @@ -51,6 +37,8 @@ DEMO_FILE_PATH="${DEMO_FILE_DIR}/${DEMO_RUN_ID}.md" DEMO_SANDBOX_NAME="${DEMO_SANDBOX_NAME:-policy-demo-${DEMO_RUN_ID}}" DEMO_CODEX_PROVIDER_NAME="${DEMO_CODEX_PROVIDER_NAME:-codex-policy-demo-${DEMO_RUN_ID}}" DEMO_GITHUB_PROVIDER_NAME="${DEMO_GITHUB_PROVIDER_NAME:-github-policy-demo-${DEMO_RUN_ID}}" +DEMO_CODEX_MODEL="${DEMO_CODEX_MODEL:-gpt-5.4-mini}" +DEMO_CODEX_LOCAL_BIN="${DEMO_CODEX_LOCAL_BIN:-}" DEMO_MANUAL_APPROVE="${DEMO_MANUAL_APPROVE:-0}" # Manual approvals need more headroom than the auto-approve loop — a human # reads the proposal, thinks, and decides. Bump the default to 30 min when @@ -134,19 +122,18 @@ spin_clear() { # — a sed delimiter collision in one of the substitutions blanks the entire # log tail, hiding the very failure context we're trying to surface. redact_log() { - python3 - \ - "${DEMO_GITHUB_TOKEN:-}" \ - "${CODEX_AUTH_ACCESS_TOKEN:-}" \ - "${CODEX_AUTH_REFRESH_TOKEN:-}" \ - "${CODEX_AUTH_ACCOUNT_ID:-}" \ - <<'PY' + python3 -c ' import sys tokens = [t for t in sys.argv[1:] if t] for line in sys.stdin: for t in tokens: line = line.replace(t, "[redacted]") sys.stdout.write(line) -PY +' \ + "${DEMO_GITHUB_TOKEN:-}" \ + "${CODEX_AUTH_ACCESS_TOKEN:-}" \ + "${CODEX_AUTH_REFRESH_TOKEN:-}" \ + "${CODEX_AUTH_ACCOUNT_ID:-}" } fail() { @@ -186,6 +173,20 @@ cleanup() { fi fi + # Restore the providers_v2_enabled setting to what it was before this + # run. The demo opts in to v2 composition so provider profiles + # contribute to the effective policy; restore so the host's broader + # workflow isn't affected. + if [[ -n "${PRIOR_PROVIDERS_V2_FLAG:-}" ]]; then + if [[ "$PRIOR_PROVIDERS_V2_FLAG" == "(unset)" ]]; then + "$OPENSHELL_BIN" settings delete --global --key providers_v2_enabled --yes \ + >/dev/null 2>&1 || true + else + "$OPENSHELL_BIN" settings set --global --key providers_v2_enabled \ + --value "$PRIOR_PROVIDERS_V2_FLAG" --yes >/dev/null 2>&1 || true + fi + fi + if [[ $status -eq 0 ]]; then rm -rf "$TMP_DIR" else @@ -219,7 +220,7 @@ resolve_github_token() { resolve_codex_auth() { [[ -f "${HOME}/.codex/auth.json" ]] || fail "missing local Codex sign-in; run: codex login" - export CODEX_AUTH_ACCESS_TOKEN CODEX_AUTH_REFRESH_TOKEN CODEX_AUTH_ACCOUNT_ID + export CODEX_AUTH_ACCESS_TOKEN CODEX_AUTH_REFRESH_TOKEN CODEX_AUTH_ACCOUNT_ID DEMO_CODEX_MODEL CODEX_AUTH_ACCESS_TOKEN="$(jq -r '.tokens.access_token // empty' "${HOME}/.codex/auth.json")" CODEX_AUTH_REFRESH_TOKEN="$(jq -r '.tokens.refresh_token // empty' "${HOME}/.codex/auth.json")" CODEX_AUTH_ACCOUNT_ID="$(jq -r '.tokens.account_id // empty' "${HOME}/.codex/auth.json")" @@ -330,7 +331,13 @@ render_payload() { -e "s|{{FILE_PATH}}|${DEMO_FILE_PATH}|g" \ -e "s|{{RUN_ID}}|${DEMO_RUN_ID}|g" \ "$TASK_TEMPLATE" > "${PAYLOAD_DIR}/agent-task.md" - cp "$SANDBOX_AGENT" "${PAYLOAD_DIR}/sandbox-agent.sh" + sed "s|DEMO_CODEX_MODEL=\"\${DEMO_CODEX_MODEL:-gpt-5.4-mini}\"|DEMO_CODEX_MODEL=\"\${DEMO_CODEX_MODEL:-${DEMO_CODEX_MODEL}}\"|" \ + "$SANDBOX_AGENT" > "${PAYLOAD_DIR}/sandbox-agent.sh" + if [[ -n "$DEMO_CODEX_LOCAL_BIN" ]]; then + [[ -x "$DEMO_CODEX_LOCAL_BIN" ]] || fail "DEMO_CODEX_LOCAL_BIN is not executable: $DEMO_CODEX_LOCAL_BIN" + cp "$DEMO_CODEX_LOCAL_BIN" "${PAYLOAD_DIR}/codex" + chmod +x "${PAYLOAD_DIR}/codex" + fi cp "$POLICY_TEMPLATE" "$POLICY_FILE" } @@ -347,7 +354,7 @@ create_providers() { "$OPENSHELL_BIN" provider create \ --name "$DEMO_GITHUB_PROVIDER_NAME" \ - --type generic \ + --type github \ --credential DEMO_GITHUB_TOKEN >/dev/null info "providers created (codex, github) — credentials injected as env vars only" @@ -357,9 +364,10 @@ start_agent_sandbox() { step "Launching sandbox; agent will hit a policy block and draft a proposal" "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true - info "initial policy: read-only access to api.github.com (no PUT)" - info "agent task: PUT /repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}" - info "live log: ${AGENT_LOG}" + info "policy: raw GitHub schema path denied; GitHub writes denied" + info "approval: auto for no new findings; review for credential risk" + info "target: PUT /repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}" + info "log: ${AGENT_LOG}" # `--upload :/sandbox` preserves the source directory basename # (matches `scp -r`/`cp -r`, see PRs #952 / #1028), so `${PAYLOAD_DIR}` @@ -372,6 +380,7 @@ start_agent_sandbox() { --provider "$DEMO_CODEX_PROVIDER_NAME" \ --provider "$DEMO_GITHUB_PROVIDER_NAME" \ --policy "$POLICY_FILE" \ + --approval-mode auto \ --upload "${PAYLOAD_DIR}:/sandbox" \ --no-git-ignore \ --no-auto-providers \ @@ -381,51 +390,97 @@ start_agent_sandbox() { AGENT_PID="$!" } -# Strip the rule_get output down to the lines a developer needs to make an -# informed approve/reject decision: rationale, binary, endpoint. Filters the -# noisy fields (UUID, agent-generated rule_name, hardcoded confidence, -# duplicate Binaries) until `openshell rule get` learns to print L7 -# method/path itself (tracked separately). -# -# `openshell rule get` colorizes labels with ANSI escapes; strip them before -# parsing so the field-name match works in piped contexts. +# Strip `rule get` down to the approval contract: chunk, binary, access, +# and the prover's categorical findings (no severity grade — the prover +# emits category names like `credential_reach_expansion` and +# `capability_expansion`). summarize_pending() { local pending="$1" sed 's/\x1b\[[0-9;]*m//g' "$pending" \ | awk ' - /Rationale:/ { sub(/^[[:space:]]*/, ""); print " " $0; next } - /Binary:/ { sub(/^[[:space:]]*/, ""); print " " $0; next } - /Endpoints:/ { sub(/^[[:space:]]*/, ""); print " " $0; next } + BEGIN { + in_validation = 0 + chunk_count = 0 + validation_printed = 0 + } + /^[[:space:]]*Chunk:/ { + in_validation = 0 + chunk_count++ + validation_printed = 0 + if (chunk_count > 1) print "" + sub(/^[[:space:]]*/, "") + chunk_id = $2 + short_id = substr(chunk_id, 1, 8) + print " Request " chunk_count ": chunk " short_id + next + } + /Binary:/ { + in_validation = 0 + sub(/^[[:space:]]*/, "") + sub(/^Binary:/, "Binary: ") + print " " $0 + next + } + /Endpoints:/ { + in_validation = 0 + sub(/^[[:space:]]*/, "") + if (!validation_printed) { + print " Prover: no verdict shown" + validation_printed = 1 + } + sub(/^Endpoints:/, "Access: ") + print " " $0 + next + } + /Validation:/ { + in_validation = 1 + validation_printed = 1 + sub(/^[[:space:]]*/, "") + sub(/^Validation:[[:space:]]*(prover:[[:space:]]*)?/, "Prover: ") + print " " $0 + next + } + /Rationale:/ { + in_validation = 0 + sub(/^[[:space:]]*/, "") + sub(/^Rationale:/, "Reason: ") + print " " $0 + next + } + # Indented continuation lines of the validation block are + # category-named finding rows (e.g., + # `capability_expansion: PUT on api.github.com:443 via /usr/bin/curl`). + in_validation && /^[[:space:]]+(credential_reach_expansion|capability_expansion|l7_bypass_credentialed|link_local_reach):/ { + sub(/^[[:space:]]*/, "") + print " Finding: " $0 + next + } + { in_validation = 0 } ' } +pending_requires_review() { + local pending="$1" + local clean + # Empty-delta chunks can appear in the pending view for a moment before the + # gateway records auto-approval. Keep the demo focused on actual review + # work: findings, merge failures, or policy validation failures. + clean="$(sed 's/\x1b\[[0-9;]*m//g' "$pending")" + if grep -Eq 'Validation: (prover: [1-9][0-9]* new finding|merge failed|policy invalid)|^[[:space:]]+(credential_reach_expansion|capability_expansion|l7_bypass_credentialed|link_local_reach):' <<<"$clean"; then + return 0 + fi + if grep -q 'Validation:' <<<"$clean"; then + return 1 + fi + return 0 +} + narrate_sandbox_workflow() { - info "Inside the sandbox right now:" - info "" - info " • agent: ${DIM}curl -X PUT https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/...${RESET}" - info " • L7 proxy denies the write and returns a structured 403 the" - info " agent can parse and act on:" - cat </dev/null 2>&1; then spin_clear if ! wait "$AGENT_PID"; then @@ -484,27 +542,44 @@ approve_pending_until_agent_exits() { fi AGENT_PID="" if (( approval_count == 0 )); then - fail "agent exited before any pending proposal appeared" + info "agent exited with zero review approvals (all proposals auto-approved)" + else + info "agent exited after ${approval_count} review approval(s)" fi - info "agent exited after ${approval_count} approval(s)" return fi - # Anything pending? Approve and keep watching — the agent may - # redraft if a previous proposal didn't yield the access it needed. + # Anything pending needs an explicit host-side decision. Auto mode only + # bypasses this when the gateway validation finds no new risk. if "$OPENSHELL_BIN" rule get "$DEMO_SANDBOX_NAME" --status pending >"$pending" 2>/dev/null \ && grep -q "Chunk:" "$pending" && grep -q "pending" "$pending"; then + if ! pending_requires_review "$pending"; then + spin_wait "waiting for auto-approvals to settle" 2 + continue + fi spin_clear info "" - info "${GREEN}proposal received:${RESET}" + info "${YELLOW}approval requested${RESET}" summarize_pending "$pending" if [[ "$DEMO_MANUAL_APPROVE" == "1" ]]; then approve_manually "$pending" else - step "Approving — the agent's /wait will return within ~1s" - "$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" \ - | awk '/approved/ { print " " $0 }' + info "" + spin_wait "letting the proposal land before approving" 2 + spin_clear + step "Approving for demo" + local approve_output + if ! approve_output="$("$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" 2>&1)"; then + if grep -q "no pending chunks to approve" <<<"$approve_output"; then + info " decision already recorded" + else + printf "%s\n" "$approve_output" >&2 + fail "could not approve pending proposal" + fi + else + awk '/approved/ { print " " $0 }' <<<"$approve_output" + fi fi approval_count=$((approval_count + 1)) fi @@ -532,21 +607,13 @@ verify_github_write() { jq -r '" file: \(.path)", " url: \(.html_url)"' "$body" } -# Print the OCSF JSONL trace, filtered to the three events that *are* the -# demo's story: the L7 PUT deny, the policy hot-reload, and the L7 PUT allow. -# The native OCSF shorthand is informative and consistent with the rest of -# OpenShell's logging — keep it as-is rather than re-formatting. +# Print the concise OCSF trace that shows deny, proposal, decision, reload, +# and successful retry. show_logs() { - step "Policy decision trace (OCSF)" - # Filter to the events that tell the loop's story end-to-end, ordered by - # the trace's own timestamps: - # HTTP:PUT DENIED — initial proxy enforcement - # CONFIG:PROPOSED — agent submitted a chunk to the gateway - # CONFIG:APPROVED/REJECTED — developer decided; agent's /wait woke up - # CONFIG:LOADED — supervisor hot-reloaded the merged policy - # HTTP:PUT ALLOWED — agent's retry succeeded + step "Decision trace" "$OPENSHELL_BIN" logs "$DEMO_SANDBOX_NAME" --since 10m -n 200 2>&1 \ - | grep -E 'HTTP:PUT.*(DENIED|ALLOWED)|CONFIG:(PROPOSED|APPROVED|REJECTED|LOADED)' \ + | grep -E 'HTTP:PUT.*(DENIED|ALLOWED)|agent_authored proposal|auto-approved: no new prover findings \(source=agent_authored\)|gateway approved draft chunk .*PUT|Policy reloaded successfully' \ + | grep -v 'source=mechanistic' \ | sed 's/^/ /' || true } @@ -557,14 +624,26 @@ enable_agent_proposals() { # delete` rather than a value write. local prior prior="$("$OPENSHELL_BIN" settings get --global --json 2>/dev/null \ - | grep -o '"agent_policy_proposals_enabled"[^,}]*' \ - | grep -o 'true\|false' | head -1)" + | jq -r '.settings.agent_policy_proposals_enabled // empty | tostring | select(. == "true" or . == "false")')" PRIOR_PROPOSALS_FLAG="${prior:-(unset)}" "$OPENSHELL_BIN" settings set --global \ --key agent_policy_proposals_enabled --value true --yes >/dev/null \ || fail "could not enable agent_policy_proposals_enabled globally" } +enable_providers_v2() { + # Providers-v2 composition is behind a global flag. The demo opts in + # so provider profiles (codex, github) contribute to the effective + # policy via composition. Cleanup restores the prior value. + local prior + prior="$("$OPENSHELL_BIN" settings get --global --json 2>/dev/null \ + | jq -r '.settings.providers_v2_enabled // empty | tostring | select(. == "true" or . == "false")')" + PRIOR_PROVIDERS_V2_FLAG="${prior:-(unset)}" + "$OPENSHELL_BIN" settings set --global \ + --key providers_v2_enabled --value true --yes >/dev/null \ + || fail "could not enable providers_v2_enabled globally" +} + main() { validate_env @@ -574,6 +653,7 @@ main() { render_payload create_providers enable_agent_proposals + enable_providers_v2 show_run_summary diff --git a/examples/agent-driven-policy-management/policy.template.yaml b/examples/agent-driven-policy-management/policy.template.yaml index e920277b5..0498ecfcc 100644 --- a/examples/agent-driven-policy-management/policy.template.yaml +++ b/examples/agent-driven-policy-management/policy.template.yaml @@ -3,13 +3,21 @@ # Initial sandbox policy for the agent-driven policy demo. # -# The agent inside the sandbox can: -# - reach Codex's model and auth endpoints (codex) -# - clone Codex plugin repos read-only (codex_plugins) -# - read api.github.com via curl (github_api_readonly) +# The demo exercises two flavors of denial-→-propose-→-decision: # -# The agent CANNOT write to GitHub yet. That's the proposal it has to draft -# and ask the developer to approve. +# - Step 1 hits raw.githubusercontent.com (no credential in scope). The +# host is pre-listed at L7 with no allowed paths, so the agent's GET +# structured-403's. The agent proposes the exact path; the prover +# sees no credential exposure and the gateway auto-approves. +# +# - Step 2 hits api.github.com PUT (github credential in scope). The +# host is pre-allowed for read-only access, so the PUT +# structured-403's. The agent proposes the narrow PUT path; the +# prover sees github_token in scope and emits MEDIUM. The chunk +# lands in pending for human review; demo.sh approves on behalf. +# +# This shows both halves of the loop in one run: free path for safe +# changes, single human approval for credentialed ones. version: 1 @@ -35,35 +43,41 @@ network_policies: - { host: ab.chatgpt.com, port: 443, protocol: rest, enforcement: enforce, access: full } binaries: - { path: /usr/bin/codex } + - { path: /sandbox/payload/codex } - { path: /usr/bin/node } - { path: "/usr/lib/node_modules/@openai/**" } - codex_plugins: - name: codex-plugins + github_api_readonly: + # api.github.com pre-allowed for read-only access. Writes (PUT/POST/PATCH/DELETE) + # structured-403 at L7 — the agent proposes the specific method/path, + # and the prover gates on credential-in-scope (github provider attached). + name: github-api-readonly endpoints: - - host: github.com + - host: api.github.com port: 443 protocol: rest enforcement: enforce - rules: - - allow: - method: GET - path: "/openai/plugins.git/info/refs*" - - allow: - method: POST - path: "/openai/plugins.git/git-upload-pack" + access: read-only binaries: - - { path: /usr/bin/git } - - { path: /usr/lib/git-core/git-remote-http } - - { path: "/usr/lib/node_modules/@openai/**" } + - { path: /usr/bin/curl } - github_api_readonly: - name: github-api-readonly + github_raw_scoped: + # raw.githubusercontent.com — pre-listed at L7 with one bootstrap + # path so the L7 validator accepts the rule. The agent must propose + # any additional GET paths it actually needs. Each new proposal is + # un-credentialed (no provider declares this host), so the prover + # sees no findings and the gateway auto-approves narrow scoped reads + # when `proposal_approval_mode = auto` (set via `--approval-mode auto` + # at create or via `openshell settings set` at runtime). + name: github-raw-scoped endpoints: - - host: api.github.com + - host: raw.githubusercontent.com port: 443 protocol: rest enforcement: enforce - access: read-only + rules: + - allow: + method: GET + path: /github/rest-api-description/main/README.md binaries: - { path: /usr/bin/curl } diff --git a/examples/agent-driven-policy-management/sandbox-agent.sh b/examples/agent-driven-policy-management/sandbox-agent.sh index 052535c35..45449dd92 100755 --- a/examples/agent-driven-policy-management/sandbox-agent.sh +++ b/examples/agent-driven-policy-management/sandbox-agent.sh @@ -74,9 +74,29 @@ cd "$WORK" # compare runs. DEMO_CODEX_REASONING="${DEMO_CODEX_REASONING:-low}" -exec codex exec \ - --skip-git-repo-check \ - --sandbox danger-full-access \ - --ephemeral \ +# Pin the model to one that ChatGPT-account Codex users can reach and that is +# quick enough for the mechanical proposal loop. Override with DEMO_CODEX_MODEL +# if your account supports something different. +DEMO_CODEX_MODEL="${DEMO_CODEX_MODEL:-gpt-5.4-mini}" +CODEX_BIN="${CODEX_BIN:-codex}" +if [[ -x /sandbox/payload/codex ]]; then + CODEX_BIN="/sandbox/payload/codex" +fi + +CODEX_EXEC_ARGS=( + exec + --skip-git-repo-check + --sandbox danger-full-access + --ephemeral +) +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-user-config"; then + CODEX_EXEC_ARGS+=(--ignore-user-config) +fi +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-rules"; then + CODEX_EXEC_ARGS+=(--ignore-rules) +fi + +exec "$CODEX_BIN" "${CODEX_EXEC_ARGS[@]}" \ + -c "model=\"${DEMO_CODEX_MODEL}\"" \ -c "model_reasoning_effort=\"${DEMO_CODEX_REASONING}\"" \ "$(cat /sandbox/payload/agent-task.md)" diff --git a/proto/openshell.proto b/proto/openshell.proto index 02dbbe283..93aee8fd5 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -321,6 +321,12 @@ message SandboxSpec { // (e.g. "0", "1"). When empty with gpu=true, the driver assigns the // first available GPU. string gpu_device = 10; + // Field 11 was `proposal_approval_mode`. The approval mode is now a + // runtime setting (gateway or sandbox scope) read via UpdateConfig / + // GetSandboxConfig, so it can be flipped on a running sandbox and + // managed fleet-wide. + reserved 11; + reserved "proposal_approval_mode"; } // Public sandbox template mapped onto compute-driver template inputs. diff --git a/providers/github.yaml b/providers/github.yaml index 2be9fb2de..4ce5af2d3 100644 --- a/providers/github.yaml +++ b/providers/github.yaml @@ -15,6 +15,9 @@ credentials: discovery: credentials: [api_token] endpoints: + # api.github.com is the REST API surface. Defaults to read-only — + # writes require an explicit policy proposal so the agentic loop + + # prover can audit each capability change. - host: api.github.com port: 443 protocol: rest @@ -26,6 +29,7 @@ endpoints: protocol: graphql access: read-only enforcement: enforce + # github.com is the git transport (clone / fetch by default). - host: github.com port: 443 protocol: rest diff --git a/python/openshell/sandbox.py b/python/openshell/sandbox.py index 85e5a3392..4c98a4155 100644 --- a/python/openshell/sandbox.py +++ b/python/openshell/sandbox.py @@ -12,8 +12,9 @@ import tempfile import threading import time +from collections import namedtuple from dataclasses import dataclass -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from urllib.parse import urlparse import grpc @@ -26,6 +27,16 @@ openshell_pb2_grpc, ) +_ClientCallDetailsBase = namedtuple( + "_ClientCallDetailsBase", + ("method", "timeout", "metadata", "credentials", "wait_for_ready", "compression"), +) + + +class _ClientCallDetails(_ClientCallDetailsBase, grpc.ClientCallDetails): + pass + + if TYPE_CHECKING: import builtins from collections.abc import Callable, Iterator, Mapping, Sequence @@ -73,21 +84,33 @@ def __init__(self, token_provider: Callable[[], str]) -> None: self._token_provider = token_provider def _attach(self, details: grpc.ClientCallDetails) -> grpc.ClientCallDetails: - metadata = list(details.metadata) if details.metadata else [] + original_metadata = getattr(details, "metadata", None) + metadata = list(original_metadata) if original_metadata else [] metadata.append(("authorization", f"Bearer {self._token_provider()}")) - return details._replace(metadata=metadata) + return _ClientCallDetails( + getattr(details, "method", None), + getattr(details, "timeout", None), + metadata, + getattr(details, "credentials", None), + getattr(details, "wait_for_ready", None), + getattr(details, "compression", None), + ) - def intercept_unary_unary(self, continuation, details, request): - return continuation(self._attach(details), request) + def intercept_unary_unary(self, continuation, client_call_details, request): + return continuation(self._attach(client_call_details), request) - def intercept_unary_stream(self, continuation, details, request): - return continuation(self._attach(details), request) + def intercept_unary_stream(self, continuation, client_call_details, request): + return continuation(self._attach(client_call_details), request) - def intercept_stream_unary(self, continuation, details, request_iterator): - return continuation(self._attach(details), request_iterator) + def intercept_stream_unary( + self, continuation, client_call_details, request_iterator + ): + return continuation(self._attach(client_call_details), request_iterator) - def intercept_stream_stream(self, continuation, details, request_iterator): - return continuation(self._attach(details), request_iterator) + def intercept_stream_stream( + self, continuation, client_call_details, request_iterator + ): + return continuation(self._attach(client_call_details), request_iterator) def _normalize_bearer( @@ -96,7 +119,7 @@ def _normalize_bearer( if bearer is None: return None if callable(bearer): - return bearer + return cast("Callable[[], str]", bearer) token = bearer return lambda: token diff --git a/python/openshell/sandbox_test.py b/python/openshell/sandbox_test.py index cd26ce85e..175472ca7 100644 --- a/python/openshell/sandbox_test.py +++ b/python/openshell/sandbox_test.py @@ -174,15 +174,13 @@ def __init__( self.wait_for_ready = wait_for_ready def _replace(self, **kwargs: Any) -> _FakeClientCallDetails: - current = { - "method": self.method, - "timeout": self.timeout, - "metadata": self.metadata, - "credentials": self.credentials, - "wait_for_ready": self.wait_for_ready, - } - current.update(kwargs) - return _FakeClientCallDetails(**current) + return _FakeClientCallDetails( + method=kwargs.get("method", self.method), + timeout=kwargs.get("timeout", self.timeout), + metadata=kwargs.get("metadata", self.metadata), + credentials=kwargs.get("credentials", self.credentials), + wait_for_ready=kwargs.get("wait_for_ready", self.wait_for_ready), + ) def test_normalize_bearer_accepts_str_or_callable() -> None: @@ -947,7 +945,7 @@ def handler(request: _httpx.Request) -> _httpx.Response: ) r = _OidcRefresher(tmp_path, "g", write_back=False) - _install_mock_transport(r, _httpx.MockTransport(handler)) # type: ignore[has-type] + _install_mock_transport(r, _httpx.MockTransport(handler)) results: list[str] = [] errors: list[BaseException] = []