From 9144ba64b67ad32bd8306f077ba4194ebfb866c3 Mon Sep 17 00:00:00 2001 From: megaproxy Date: Sat, 30 May 2026 01:09:46 +0100 Subject: [PATCH] Fix: closing any window killed all (tokio::spawn panic on close path) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The synchronous on_window_event CloseRequested handler reached WindowsState::schedule_save -> tokio::spawn, which panics ("no reactor running") because that callback runs on the main thread with no ambient Tokio runtime; the unhandled main-thread panic aborted the whole process, taking every window + PTY down. (push_window_workspaces hit the same line safely because it's an async tauri::command.) - window_state.rs: tokio::spawn -> tauri::async_runtime::spawn (global runtime, works from any thread). Verified against tauri 2.11 source. - lib.rs: defensive .build().run() guard — prevent_exit while any window remains so no path can orphan live PTYs; close logging warn!->debug!. Source-verified; pending Windows runtime test. Co-Authored-By: Claude Opus 4.8 (1M context) --- memory.md | 12 +++++++++ src-tauri/src/lib.rs | 50 ++++++++++++++++++++++++++++++++--- src-tauri/src/window_state.rs | 13 ++++++--- 3 files changed, 69 insertions(+), 6 deletions(-) diff --git a/memory.md b/memory.md index ab221f4..8e83a09 100644 --- a/memory.md +++ b/memory.md @@ -108,6 +108,18 @@ Four-agent research pass (terminal-landscape, AI-orchestration, xterm/Tauri ecos ## Session log +### 2026-05-30 — FIX: closing any window killed all windows (Tokio-runtime panic) + +**Symptom:** after dragging a pane out (or spawning) a daughter window, closing *either* the main or a daughter window closed them all, dumping `exit code 101`. + +**Root cause (confirmed via a 3-agent Workflow + reading the installed `tauri-runtime-wry-2.11.2` / `tauri-2.11.2` source):** NOT the exit logic and NOT WebView2. It was a **panic on the main thread**. The synchronous `on_window_event` `CloseRequested` handler in `lib.rs` calls `WindowsState::forget()` → `schedule_save()` → `tokio::spawn` (`window_state.rs:95`). That callback runs on the wry event-loop main thread with **no ambient Tokio runtime**, so `tokio::spawn` panics (`there is no reactor running…`); an unhandled main-thread panic aborts the whole process, taking every window + PTY down. `push_window_workspaces` hit the same `schedule_save` line but never crashed because it's an `async #[tauri::command]` that already runs inside Tauri's managed Tokio runtime — the bug only fired on the window-close path. + +**Fix (`src-tauri/src/window_state.rs`):** swap `tokio::spawn` → **`tauri::async_runtime::spawn`**, which schedules onto Tauri's global lazily-init'd Tokio runtime and works from *any* thread (incl. sync callbacks). Verified against `tauri-2.11.2/src/async_runtime.rs`: same `JoinHandle` shape, has `.abort()` (needed for the debounce cancel), and `tokio::time::sleep` still works inside the spawned future. Imports: `JoinHandle`+`spawn` now from `tauri::async_runtime`, `Duration` from `std::time`, `sleep` from `tokio::time`. **Rule learned: never call `tokio::spawn`/`tokio::*` runtime APIs from `on_window_event`, the `RunEvent` `.run()` closure, `Drop` impls, or any sync helper reachable from them — use `tauri::async_runtime::spawn`. Audit found this was the ONLY unsafe instance (`mcp.rs:800` and `mcp.rs:1502` are in async contexts → safe).** + +**Also `src-tauri/src/lib.rs` (defensive, not the primary fix):** switched `.run(generate_context!())` → `.build(…).run(|app, event| …)` and on `RunEvent::ExitRequested` call `api.prevent_exit()` iff `code.is_none() && !webview_windows().is_empty()` — belt-and-suspenders so no future path can tear down the process (and orphan live PTYs) while any window remains; explicit `AppHandle::exit(Some)` is always honored. Verified-from-source semantics: wry emits `ExitRequested{code:None}` **only** when the last window is destroyed (window store empty), and `manager.on_window_close` removes the window from `webview_windows()` *before* `ExitRequested` fires, so the count is accurate and there's no zombie risk. Window close/destroy logging demoted `warn!`→`debug!` (run `RUST_LOG=tiletopia=debug` to trace). + +**Status: source-verified, NOT yet runtime-verified — needs a Windows `pnpm tauri dev` build** (cargo toolchain is Windows-only; can't build from WSL). Regression test = step 1: drag a pane out, close the daughter, main must survive with no exit-101. **Known minor follow-up:** a deliberately-closed window's *own* panes leak their PTYs (webview JS doesn't run XtermPane unmount cleanup on OS close), so those WSL shells linger orphaned — lower priority than persistence, not fixed. + ### 2026-05-28/29 — bug fix + feature batch from the backlog (post-0.4.0) Started from a user-reported **stuck/ghost cursor** in panes; fixed by switching xterm from the DOM renderer to `@xterm/addon-canvas` (DOM renderer leaves a stale cursor block under the Claude TUI's rapid hide/show + blink). User verified fixed on Windows. diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 3a88bac..c8cf4f9 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -67,12 +67,34 @@ pub fn run() { .manage(windows_state) .manage(pending_inits) .on_window_event(move |window, event| { + let label = window.label().to_string(); + + // Window-lifecycle tracing for the multi-window close behavior. + // Silent at the default `info` level; run with + // `RUST_LOG=tiletopia=debug` to confirm the event sequence when a + // window closes (which windows the runtime still tracks, whether a + // close triggers an app-exit). Verified against tauri-runtime-wry + // 2.11: closing a non-last window emits NO ExitRequested, so other + // windows survive; only the last window's Destroyed triggers exit. + match event { + tauri::WindowEvent::CloseRequested { .. } + | tauri::WindowEvent::Destroyed => { + let open: Vec = window + .app_handle() + .webview_windows() + .keys() + .cloned() + .collect(); + tracing::debug!("window {event:?} label={label} open_windows={open:?}"); + } + _ => {} + } + // When a non-main window closes, drop its workspaces from the // aggregator AND any unconsumed pending-init payload so neither // resurrect on next launch. Matches Chrome-style "closing a // detached window discards its tabs" intent. if let tauri::WindowEvent::CloseRequested { .. } = event { - let label = window.label().to_string(); if label != MAIN_WINDOW_LABEL { pending_inits_for_event.by_label.lock().remove(&label); windows_state_for_event @@ -109,6 +131,28 @@ pub fn run() { commands::mcp_policy_save, commands::mcp_hard_deny_labels, ]) - .run(tauri::generate_context!()) - .expect("error while running tauri application"); + .build(tauri::generate_context!()) + .expect("error while building tauri application") + .run(|app_handle, event| { + // Keep the process alive as long as ANY window is open. Every + // window (main + drag-out "daughter" windows) shares one process, + // and every PTY is owned by the single PtyManager in it. Tauri/wry + // emits `ExitRequested { code: None }` only when the LAST window is + // destroyed (tauri-runtime-wry 2.11 emits it solely when the window + // store goes empty); an explicit `AppHandle::exit(n)` carries + // `code: Some(n)`. By the time this fires, the closed window has + // already been removed from `webview_windows()`, so the check is + // accurate. We only ever reach the empty-set case here, but guard + // defensively: if any window somehow remains, refuse the exit so a + // stray close can't tear the process down and orphan live PTYs. + // An explicit exit (Some) is always honored. + if let tauri::RunEvent::ExitRequested { code, api, .. } = event { + let open: Vec = + app_handle.webview_windows().keys().cloned().collect(); + tracing::debug!("RunEvent::ExitRequested code={code:?} open_windows={open:?}"); + if code.is_none() && !open.is_empty() { + api.prevent_exit(); + } + } + }); } diff --git a/src-tauri/src/window_state.rs b/src-tauri/src/window_state.rs index a6ba575..36f6014 100644 --- a/src-tauri/src/window_state.rs +++ b/src-tauri/src/window_state.rs @@ -21,13 +21,20 @@ use std::collections::HashMap; use std::sync::Arc; +use std::time::Duration; use anyhow::{Context, Result}; use parking_lot::Mutex; use serde_json::Value; +// `async_runtime::spawn` schedules onto Tauri's global Tokio runtime and works +// from ANY thread — including the synchronous `on_window_event` callback that +// reaches `schedule_save` via `forget()` on window close. Plain `tokio::spawn` +// panics there ("no reactor running") because that callback has no ambient +// runtime, and a main-thread panic aborts the whole process, taking every +// window + PTY with it. See the close-crash fix. +use tauri::async_runtime::{spawn, JoinHandle}; use tauri::{AppHandle, Manager}; -use tokio::task::JoinHandle; -use tokio::time::{sleep, Duration}; +use tokio::time::sleep; const WORKSPACE_FILE: &str = "workspace.json"; const SAVE_DEBOUNCE: Duration = Duration::from_millis(500); @@ -92,7 +99,7 @@ impl WindowsState { if let Some(prev) = slot.take() { prev.abort(); } - let handle = tokio::spawn(async move { + let handle = spawn(async move { sleep(SAVE_DEBOUNCE).await; if let Err(e) = me.save_now(&app).await { tracing::warn!("debounced workspace save failed: {e:#}");