MCP v2 PR-1: policy engine + audit log + Config/Audit/Policy panel tabs

Foundation for Claude-drives-the-workspace writes. Nothing wired end-to-end yet (App.tsx dispatcher comes next); this lands the machinery + UI. mcp_policy.rs (new) — three-tier allow/ask/deny policy with deny-first precedence and a compiled-in non-overridable hard-deny list (10 patterns covering rm -rf /, fork bombs, mkfs on device, dd to raw disk, /etc/passwd overwrite, curl|sh, chmod -R 777 /, etc.). Shell-operator-aware glob matcher mirroring Claude Code's Bash(*) syntax. Restrictive default — empty policy means every non-hard- denied call falls to Ask. Persisted to mcp-policy.json in app_config_dir. Includes a PolicyClassifier scaffold (no-op) for a future v2.1 LLM-classifier hook. 1152 lines incl. ~100 unit + fuzz tests covering the matchers and lookalike negatives. mcp.rs — TileService now holds AppHandle + Arc<PendingActions> (oneshot registry keyed by uuid). New async dispatch_action helper runs the policy check, emits "mcp://request" for the frontend to handle, awaits a oneshot reply (30s timeout), then emits "mcp:// audit" with the outcome regardless. set_label tool wired through this path as the demo for PR-1b's dispatcher. commands.rs / lib.rs — new Tauri commands mcp_action_reply, mcp_policy_load, mcp_policy_save; PendingActions registered as managed state. McpPanel.tsx — refactored into Config / Audit / Policy tabs. AuditTab listens on mcp://audit, keeps a 200-entry ring with ok/denied/failed chips. PolicyTab edits the allow/ask/deny buckets (stacked vertically — three columns overflowed the panel) and shows the hard-deny rules read-only at the bottom with "Cannot be disabled" badges. Themed scrollbar on mcp-body to match xterm panes. Caveat: set_label calls from Claude will currently time out — the App.tsx side that listens on mcp://request and replies via mcp_action_reply lands in PR-1b. Co-authored by Sonnet (policy engine, backend plumbing, panel UI) and Haiku (hard-deny fuzz test suite); integration + bug fixes here.
2026-05-26 12:05:31 +01:00 · 2026-05-26 12:05:31 +01:00 · 464c576b79
commit 464c576b79
parent b14b450577
11 changed files with 2512 additions and 144 deletions
--- a/src-tauri/src/mcp.rs
+++ b/src-tauri/src/mcp.rs
@ -16,7 +16,7 @@ use std::collections::HashMap;
 use std::net::SocketAddr;
 use std::path::PathBuf;
 use std::sync::Arc;
-use std::time::{Duration, Instant};
+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};

 use anyhow::{Context, Result};
 use axum::{
@ -40,7 +40,7 @@ use rmcp::{
 };
 use serde::{Deserialize, Serialize};
 use serde_json::json;
-use tauri::{AppHandle, Manager};
+use tauri::{AppHandle, Emitter, Manager};
 use tokio::{net::TcpListener, sync::RwLock, task::JoinHandle};
 use tokio_util::sync::CancellationToken;

@ -169,6 +169,74 @@ pub struct McpState {
    pub mirror: McpMirror,
 }

+// ----------------------------------------------------------------------------
+// Action reply registry.
+// ----------------------------------------------------------------------------
+
+/// Registry of pending frontend action requests. Each entry maps a `requestId`
+/// to a oneshot sender that the `mcp_action_reply` Tauri command will fire
+/// once the frontend resolves or rejects the action.
+///
+/// Owned as separate managed state (Arc<PendingActions>) so Tauri commands can
+/// grab it via `tauri::State<'_, Arc<PendingActions>>` without needing to lock
+/// the entire McpState or pass TileService around.
+pub struct PendingActions(
+    pub PlMutex<HashMap<String, tokio::sync::oneshot::Sender<Result<serde_json::Value, String>>>>,
+);
+
+impl Default for PendingActions {
+    fn default() -> Self {
+        Self(PlMutex::new(HashMap::new()))
+    }
+}
+
+// ----------------------------------------------------------------------------
+// Audit / request event payload types.
+// ----------------------------------------------------------------------------
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+struct McpActionRequest {
+    request_id: String,
+    tool: &'static str,
+    args: serde_json::Value,
+    needs_confirm: bool,
+    reason: Option<String>,
+}
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase", tag = "kind")]
+enum McpAuditResult {
+    Ok,
+    Denied { reason: String, hard: bool },
+    Failed { msg: String },
+}
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+struct McpAuditEntry {
+    ts_ms: u64,
+    tool: &'static str,
+    args_summary: String,
+    result: McpAuditResult,
+    duration_ms: u64,
+}
+
+fn now_ms() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_millis() as u64
+}
+
+fn truncate_summary(s: &str) -> String {
+    if s.len() > 80 {
+        format!("{}...", &s[..80])
+    } else {
+        s.to_string()
+    }
+}
+
 // ----------------------------------------------------------------------------
 // MCP service: tools + resources.
 // ----------------------------------------------------------------------------
@ -177,6 +245,8 @@ pub struct McpState {
 pub struct TileService {
    ptys: Arc<PtyManager>,
    state: Arc<RwLock<McpState>>,
+    pending: Arc<PendingActions>,
+    app: AppHandle,
    tool_router: ToolRouter<Self>,
 }

@ -206,19 +276,222 @@ pub struct WaitForIdleArgs {
    pub timeout_ms: Option<u64>,
 }

+#[derive(Debug, Deserialize, schemars::JsonSchema)]
+pub struct SetLabelArgs {
+    /// Stable leaf id from the tree (uuid-shaped). Must belong to a pane
+    /// the user has allow-listed for MCP access.
+    pub leaf_id: LeafId,
+    /// New human-readable label. Pass an empty string to clear the label.
+    pub label: String,
+}
+
 const READ_PANE_HARD_CAP_LINES: usize = 3000;
 const WAIT_TIMEOUT_HARD_CAP_MS: u64 = 5 * 60 * 1000;

 #[tool_router]
 impl TileService {
-    pub fn new(ptys: Arc<PtyManager>, state: Arc<RwLock<McpState>>) -> Self {
+    pub fn new(
+        ptys: Arc<PtyManager>,
+        state: Arc<RwLock<McpState>>,
+        pending: Arc<PendingActions>,
+        app: AppHandle,
+    ) -> Self {
        Self {
            ptys,
            state,
+            pending,
+            app,
            tool_router: Self::tool_router(),
        }
    }

+    /// Dispatch an action that the frontend must execute. Generates a uuid,
+    /// registers a oneshot, emits the "mcp://request" event with the args,
+    /// awaits the reply with a 30s timeout. Also emits an "mcp://audit" event
+    /// after the call resolves (regardless of outcome).
+    async fn dispatch_action(
+        &self,
+        tool: &'static str,
+        args: serde_json::Value,
+        args_repr: String,
+    ) -> Result<serde_json::Value, McpError> {
+        let start_ms = now_ms();
+        let args_summary = truncate_summary(&args_repr);
+
+        tracing::debug!(tool, args_repr = %args_repr, "dispatch_action: start");
+
+        // 1. Load user policy.
+        let policy = crate::mcp_policy::load_or_init(&self.app)
+            .map_err(|e| McpError::internal_error(e.to_string(), None))?;
+
+        // 2. Hard-deny check (for any tool — is_hard_denied checks for shell
+        //    catastrophe patterns; for non-write_pane tools the patterns are
+        //    unlikely to match args_repr but the check is cheap and safe).
+        if let Some(label) = crate::mcp_policy::is_hard_denied(&args_repr) {
+            let duration_ms = now_ms() - start_ms;
+            let audit = McpAuditEntry {
+                ts_ms: start_ms,
+                tool,
+                args_summary: args_summary.clone(),
+                result: McpAuditResult::Denied {
+                    reason: label.to_string(),
+                    hard: true,
+                },
+                duration_ms,
+            };
+            tracing::debug!(tool, reason = label, hard = true, "dispatch_action: hard-denied");
+            let _ = self.app.emit("mcp://audit", &audit);
+            return Err(McpError::invalid_params(
+                format!("hard-denied: {label}"),
+                None,
+            ));
+        }
+
+        // 3. Evaluate user-policy decision.
+        let decision = crate::mcp_policy::evaluate(&policy, tool, &args_repr);
+
+        tracing::debug!(tool, ?decision, "dispatch_action: policy decision");
+
+        // 4. Handle Deny.
+        let (needs_confirm, ask_reason) = match &decision {
+            crate::mcp_policy::PolicyDecision::Allow => (false, None),
+            crate::mcp_policy::PolicyDecision::Ask { reason } => {
+                (true, Some(reason.clone()))
+            }
+            crate::mcp_policy::PolicyDecision::Deny { reason, hard } => {
+                let duration_ms = now_ms() - start_ms;
+                let audit = McpAuditEntry {
+                    ts_ms: start_ms,
+                    tool,
+                    args_summary: args_summary.clone(),
+                    result: McpAuditResult::Denied {
+                        reason: reason.clone(),
+                        hard: *hard,
+                    },
+                    duration_ms,
+                };
+                tracing::debug!(tool, reason = %reason, hard, "dispatch_action: denied by policy");
+                let _ = self.app.emit("mcp://audit", &audit);
+                return Err(McpError::invalid_params(
+                    format!("denied: {reason}"),
+                    None,
+                ));
+            }
+        };
+
+        // 5. Generate a unique request id, register oneshot, emit mcp://request.
+        // uuid crate is not in Cargo.toml; generate via rand (already a dep).
+        // TODO: if uuid (v4 feature) is added to Cargo.toml, replace with:
+        //   let request_id = uuid::Uuid::new_v4().to_string();
+        let request_id = {
+            use rand::RngCore;
+            let mut bytes = [0u8; 16];
+            rand::rng().fill_bytes(&mut bytes);
+            // Format as a RFC-4122-style UUID v4 string for frontend interop.
+            bytes[6] = (bytes[6] & 0x0f) | 0x40;
+            bytes[8] = (bytes[8] & 0x3f) | 0x80;
+            format!(
+                "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
+                bytes[0], bytes[1], bytes[2], bytes[3],
+                bytes[4], bytes[5],
+                bytes[6], bytes[7],
+                bytes[8], bytes[9],
+                bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15],
+            )
+        };
+
+        let (tx, rx) = tokio::sync::oneshot::channel();
+        {
+            self.pending.0.lock().insert(request_id.clone(), tx);
+        }
+
+        let payload = McpActionRequest {
+            request_id: request_id.clone(),
+            tool,
+            args,
+            needs_confirm,
+            reason: ask_reason,
+        };
+        tracing::debug!(tool, request_id = %request_id, needs_confirm, "dispatch_action: emitting mcp://request");
+        let _ = self.app.emit("mcp://request", &payload);
+
+        // 6. Await reply with 30s timeout.
+        let result = tokio::time::timeout(Duration::from_secs(30), rx).await;
+
+        let duration_ms = now_ms() - start_ms;
+
+        match result {
+            Err(_elapsed) => {
+                // Timeout — remove stale sender from registry.
+                self.pending.0.lock().remove(&request_id);
+                let audit = McpAuditEntry {
+                    ts_ms: start_ms,
+                    tool,
+                    args_summary,
+                    result: McpAuditResult::Failed {
+                        msg: "timeout".into(),
+                    },
+                    duration_ms,
+                };
+                tracing::debug!(tool, request_id = %request_id, "dispatch_action: timed out");
+                let _ = self.app.emit("mcp://audit", &audit);
+                Err(McpError::internal_error(
+                    "action timed out waiting for frontend response",
+                    Some(json!({ "requestId": request_id })),
+                ))
+            }
+            Ok(Err(_recv_err)) => {
+                // Sender was dropped (shouldn't happen normally).
+                let audit = McpAuditEntry {
+                    ts_ms: start_ms,
+                    tool,
+                    args_summary,
+                    result: McpAuditResult::Failed {
+                        msg: "channel closed".into(),
+                    },
+                    duration_ms,
+                };
+                tracing::debug!(tool, request_id = %request_id, "dispatch_action: channel closed");
+                let _ = self.app.emit("mcp://audit", &audit);
+                Err(McpError::internal_error(
+                    "action channel closed unexpectedly",
+                    Some(json!({ "requestId": request_id })),
+                ))
+            }
+            Ok(Ok(reply)) => {
+                // 7. On reply: emit audit, propagate. Destructure with
+                //    ownership so the success payload and the error string
+                //    move out cleanly (avoids borrow-then-move on `reply`).
+                let (audit_result, err, ok_payload) = match reply {
+                    Ok(v) => {
+                        tracing::debug!(tool, request_id = %request_id, "dispatch_action: reply ok");
+                        (McpAuditResult::Ok, None, Some(v))
+                    }
+                    Err(msg) => {
+                        tracing::debug!(tool, request_id = %request_id, error = %msg, "dispatch_action: reply error");
+                        (
+                            McpAuditResult::Failed { msg: msg.clone() },
+                            Some(McpError::internal_error(msg, None)),
+                            None,
+                        )
+                    }
+                };
+                let audit = McpAuditEntry {
+                    ts_ms: start_ms,
+                    tool,
+                    args_summary,
+                    result: audit_result,
+                    duration_ms,
+                };
+                let _ = self.app.emit("mcp://audit", &audit);
+                match err {
+                    Some(e) => Err(e),
+                    None => Ok(ok_payload.expect("ok branch always sets ok_payload")),
+                }
+            }
+        }
+    }
+
    /// Look up a leaf_id → pane_id under the MCP-allow gate.
    async fn resolve_pane(&self, leaf_id: &str) -> Result<PaneId, McpError> {
        let st = self.state.read().await;
@ -340,6 +613,41 @@ impl TileService {
            }
        }
    }
+
+    #[tool(description = "Set or clear the human-readable label on a pane. \
+        Pass empty string to clear. The leaf must be MCP-allowed.")]
+    async fn set_label(
+        &self,
+        Parameters(args): Parameters<SetLabelArgs>,
+    ) -> Result<CallToolResult, McpError> {
+        // Validate leaf exists in mirror + is visible to MCP (mcpAllow=true
+        // is enforced by the frontend before mirroring, so presence here
+        // implies the user has allowed it).
+        let _leaf = self
+            .state
+            .read()
+            .await
+            .mirror
+            .leaves
+            .get(&args.leaf_id)
+            .cloned()
+            .ok_or_else(|| {
+                McpError::invalid_params(
+                    "unknown leaf_id (not visible to MCP; user may need to allow it)",
+                    Some(json!({ "leaf_id": &args.leaf_id })),
+                )
+            })?;
+
+        let args_repr = format!("leafId={} label={}", &args.leaf_id, &args.label);
+        let args_json = json!({ "leafId": &args.leaf_id, "label": &args.label });
+
+        tracing::debug!(leaf_id = %args.leaf_id, label = %args.label, "set_label: dispatching");
+        let _ = self
+            .dispatch_action("set_label", args_json, args_repr)
+            .await?;
+
+        Ok(CallToolResult::success(vec![Content::text("ok")]))
+    }
 }

 #[tool_handler]
@ -495,6 +803,7 @@ pub async fn start_server(
    app_handle: AppHandle,
    ptys: Arc<PtyManager>,
    state: Arc<RwLock<McpState>>,
+    pending: Arc<PendingActions>,
 ) -> Result<RunningServer> {
    let cfg = load_or_init_config(&app_handle)?;
    let token = cfg.token.clone();
@ -505,13 +814,24 @@ pub async fn start_server(
    // Fresh service per session; cheap because we share state via Arcs.
    let ptys_f = ptys.clone();
    let state_f = state.clone();
+    let pending_f = pending.clone();
+    // Clone AppHandle before the move closure so we can pass it into each
+    // TileService instance. AppHandle is cheap to clone (it's an Arc inside).
+    let app_handle_for_service = app_handle.clone();
    // Disable rmcp's DNS-rebinding host allowlist. The default only permits
    // localhost / 127.0.0.1 / ::1; legitimate WSL clients connect via the
    // dynamic WSL gateway IP (172.x.x.1) which can't be in any static list.
    // Bearer-token auth on /mcp is the real gatekeeper, and we're not
    // running in a browser context where DNS rebinding is a concern.
    let mcp_service = StreamableHttpService::new(
-        move || Ok(TileService::new(ptys_f.clone(), state_f.clone())),
+        move || {
+            Ok(TileService::new(
+                ptys_f.clone(),
+                state_f.clone(),
+                pending_f.clone(),
+                app_handle_for_service.clone(),
+            ))
+        },
        LocalSessionManager::default().into(),
        StreamableHttpServerConfig::default().disable_allowed_hosts(),
    );
@ -580,3 +900,4 @@ pub fn regenerate_token(app: &AppHandle) -> Result<String> {
    save_config(app, &cfg)?;
    Ok(cfg.token)
 }
+