From 3450adc113acf9a2e9e0877437a76b7e263a4cc7 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Thu, 2 Jul 2026 16:13:00 +0530 Subject: [PATCH] fix(desktop): macOS/Linux pre-CEF stale-lock reap + installer pre-install kill (#4395) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #3605; the Windows-only runtime reap landed in #3793. macOS and Linux had no pre-CEF single-instance guard, so a process holding the CEF SingletonLock past the cache-wait budget could be a healthy running primary, not a wedged one — the reason the earlier macOS SIGKILL escalation was reverted. Part 1: add `cef_stale_reap` (macOS/Linux), a pre-CEF reap of a wedged prior instance gated on a post-update relaunch marker (the staleness signal from #4395). The updater writes the marker before `app.restart()`; the relaunched process consumes it (once, deleted, age-bounded). Only a fresh marker + a live lock holder triggers a reap. Guards: skip self, skip cross-host holders, and SIGTERM → grace → re-validate the same pid still owns the lock → SIGKILL (closes the PID-reuse window, honors kill_pid_force's contract). Never reaps a healthy instance. Wired into run() before cef_preflight::wait_for_cache_release. Part 2: add an NSIS NSIS_HOOK_PREINSTALL that terminates openhuman-core.exe and the main binary tree before any file copy, so a stale prior-version process can't survive an update. Adds the nix `hostname` feature for gethostname. Pure decision logic is unit tested on any host; the cfg-gated runtime/installer paths need verification on real macOS, Linux, and Windows(installer) hosts. --- app/src-tauri/Cargo.toml | 2 +- app/src-tauri/nsis-hooks.nsh | 29 ++ app/src-tauri/src/cef_stale_reap.rs | 511 ++++++++++++++++++++++++++++ app/src-tauri/src/lib.rs | 30 ++ app/src-tauri/tauri.conf.json | 5 + 5 files changed, 576 insertions(+), 1 deletion(-) create mode 100644 app/src-tauri/nsis-hooks.nsh create mode 100644 app/src-tauri/src/cef_stale_reap.rs diff --git a/app/src-tauri/Cargo.toml b/app/src-tauri/Cargo.toml index 4252e660c0..2e68696ab7 100644 --- a/app/src-tauri/Cargo.toml +++ b/app/src-tauri/Cargo.toml @@ -135,7 +135,7 @@ cef = { version = "=146.4.1", default-features = false } openhuman_core = { path = "../..", package = "openhuman", default-features = false } [target.'cfg(unix)'.dependencies] -nix = { version = "0.29", default-features = false, features = ["signal", "user"] } +nix = { version = "0.29", default-features = false, features = ["hostname", "signal", "user"] } [target.'cfg(target_os = "macos")'.dependencies] objc2 = "0.6" diff --git a/app/src-tauri/nsis-hooks.nsh b/app/src-tauri/nsis-hooks.nsh new file mode 100644 index 0000000000..8b60906762 --- /dev/null +++ b/app/src-tauri/nsis-hooks.nsh @@ -0,0 +1,29 @@ +; NSIS installer hooks for OpenHuman (issue #4395, follow-up to #3605). +; +; Part 2 of #4395 — installer pre-install process kill. The default tauri NSIS +; template only stops the *main* binary (OpenHuman.exe) via CheckIfAppIsRunning +; before overwriting it. A leftover / wedged `openhuman-core.exe` from the prior +; version (CLI/MCP/core mode) can still hold file handles under $INSTDIR and +; survive the update, which is exactly the "stale process blocks update" symptom +; #3605 tracks. Terminate those related processes before any file is copied so +; the new binary always lands cleanly. +; +; NSIS_HOOK_PREINSTALL runs before the template copies files, sets registry +; keys, and creates shortcuts (and before the built-in CheckIfAppIsRunning), so +; this is the correct place to guarantee no OpenHuman process is holding the +; install directory open. +; +; taskkill ships with every supported Windows version. /T also terminates child +; processes (CEF helpers); a non-zero exit (nothing matched) is fine and ignored. + +!macro NSIS_HOOK_PREINSTALL + DetailPrint "Stopping any running OpenHuman helper processes before install..." + ; Force-kill the embedded core / CLI / MCP process and its child tree. + nsExec::Exec 'taskkill /F /IM openhuman-core.exe /T' + Pop $0 + ; Belt-and-suspenders: the main binary and its CEF child processes. The + ; template's CheckIfAppIsRunning also covers OpenHuman.exe, but in a silent + ; auto-update relaunch we want the whole tree gone before files are replaced. + nsExec::Exec 'taskkill /F /IM ${MAINBINARYNAME}.exe /T' + Pop $0 +!macroend diff --git a/app/src-tauri/src/cef_stale_reap.rs b/app/src-tauri/src/cef_stale_reap.rs new file mode 100644 index 0000000000..d7fee7784f --- /dev/null +++ b/app/src-tauri/src/cef_stale_reap.rs @@ -0,0 +1,511 @@ +//! macOS/Linux pre-CEF stale CEF-lock-holder reap (issue #4395, follow-up to +//! #3605). +//! +//! ## The gap this closes +//! +//! On Windows a Win32 named mutex early in `run()` forces any *concurrent* +//! peer to `exit(0)` before the pre-CEF reap, so a process still holding the +//! CEF cache lock past that point is provably a wedged prior instance — +//! `process_recovery::reap_stale_openhuman_processes` reaps it safely. +//! +//! macOS and Linux have **no equivalent pre-CEF single-instance guard**: the +//! `#[cfg(windows)]` mutex does not exist there and `tauri_plugin_single_instance` +//! registers later (inside the builder, after CEF init). So a process holding +//! the CEF `SingletonLock` past the [`cef_preflight`] cache-wait budget may be a +//! **healthy running primary** — a live app holds its lock for its whole +//! lifetime — not a wedged one. Reaping it purely because "the lock was held N +//! seconds" could SIGKILL a healthy instance and lose user data. That exact +//! macOS SIGKILL escalation was reverted from PR #3793 (flagged by @oxoxDev + +//! Codex P1), and this module must not reintroduce it. +//! +//! ## The staleness signal (why this is safe) +//! +//! Rather than a time budget, the reap is gated on a **post-update relaunch +//! marker** — the "post-update marker" staleness signal named in #4395. The +//! updater writes the marker ([`write_update_relaunch_marker`]) immediately +//! before `app.restart()`; the freshly relaunched process consumes it here. +//! +//! Only when a *recent* marker is present do we treat a live CEF-lock holder as +//! stale. That is the one situation where a survivor is provably the +//! pre-update instance that should already have exited — never a healthy, +//! separately-launched app (single-instance app; no marker on a normal launch). +//! The marker is consumed (deleted) once per launch and bounded by +//! [`MARKER_MAX_AGE`] so a leaked marker cannot make a much-later launch reap a +//! healthy instance. +//! +//! Additional guards before any kill: +//! - **Self**: never reap our own pid. +//! - **Host**: the `SingletonLock` symlink target is `-`; skip +//! if the holder's host differs from ours (networked/NFS home dir where the +//! pid belongs to another machine and could collide with an unrelated local +//! process). +//! - **Re-validate before SIGKILL**: after SIGTERM + grace, re-read the lock +//! and confirm the *same* pid still owns it and is still alive. This closes +//! the PID-reuse window and honors `kill_pid_force`'s contract. +//! +//! This is complementary to `process_recovery::reap_stale_openhuman_processes` +//! (which deliberately *skips* when a live CEF-lock holder exists) and to the +//! Windows-only reap hardening tracked separately in #3900 — no Windows code is +//! touched here. +//! +//! Validation note: the reap glue is `#[cfg(any(macos, linux))]` and cannot be +//! reproduced on an arbitrary dev host. The decision logic ([`decide_reap`], +//! [`marker_is_fresh`], [`marker_path_for`]) is pure and unit-tested on any +//! host; the filesystem/signal glue is exercised on real macOS/Linux hosts. + +use std::path::{Path, PathBuf}; +use std::time::Duration; + +/// Grace period between SIGTERM and the re-validated SIGKILL. Mirrors the +/// `TERM_GRACE` used by `process_recovery`. +#[cfg(any(target_os = "macos", target_os = "linux"))] +const TERM_GRACE: Duration = Duration::from_millis(500); + +/// A relaunch marker older than this is ignored (and cleaned up). Bounds the +/// blast radius of a marker leaked by a process that died before consuming it, +/// so it can never make an unrelated later launch reap a healthy instance. +const MARKER_MAX_AGE: Duration = Duration::from_secs(300); + +/// File name of the post-update relaunch marker, written as a sibling of the +/// CEF cache directory (outside the locked `cef/` dir). +const MARKER_FILE_NAME: &str = "openhuman-update-relaunch.marker"; + +/// What to do about the current CEF `SingletonLock` holder. +#[derive(Debug, Clone, PartialEq, Eq)] +enum ReapDecision { + /// Leave the holder alone; the normal `cef_preflight` wait handles it. + Skip(&'static str), + /// Holder is provably a stale pre-update instance — reap this pid. + Reap { pid: i32 }, +} + +/// Pure reap decision, separated from the filesystem/signal glue so it is +/// unit-testable on any host. +/// +/// `holder` is the *live* `SingletonLock` holder as `(host, pid)`, or `None` +/// when there is no lock / it is stale / the pid is dead. `local_host` is this +/// machine's hostname (`None` if unresolved — then the host gate is skipped and +/// the marker requirement alone protects us). +fn decide_reap( + holder: Option<(String, i32)>, + self_pid: i32, + local_host: Option<&str>, + marker_fresh: bool, +) -> ReapDecision { + let Some((host, pid)) = holder else { + return ReapDecision::Skip("no live CEF SingletonLock holder"); + }; + if pid == self_pid { + return ReapDecision::Skip("CEF lock held by self"); + } + if let Some(local) = local_host { + if local != host { + return ReapDecision::Skip("CEF lock holder is on a different host"); + } + } + if !marker_fresh { + // The decisive safety gate: without a recent post-update marker a live + // holder may be a healthy running instance. Defer to the preflight wait + // rather than risk killing it. + return ReapDecision::Skip("no recent post-update marker; deferring to preflight wait"); + } + ReapDecision::Reap { pid } +} + +/// Pure freshness predicate: a marker is fresh iff its age is known and within +/// `max_age`. An unknown age (`None`) is treated as not fresh (fail-safe: do +/// not reap on an unreadable timestamp). +fn marker_is_fresh(age: Option, max_age: Duration) -> bool { + matches!(age, Some(a) if a <= max_age) +} + +/// Pure derivation of the marker path from the CEF cache directory. The marker +/// is a sibling of the cache dir (e.g. `.../com.openhuman.app/cef` → +/// `.../com.openhuman.app/openhuman-update-relaunch.marker`) so it lives +/// outside the locked `cef/` directory. +fn marker_path_for(cache_path: &Path) -> PathBuf { + cache_path.with_file_name(MARKER_FILE_NAME) +} + +/// Resolve the CEF cache directory. In production `cef_profile::prepare_process_cache_path` +/// always sets `OPENHUMAN_CEF_CACHE_PATH` before both the updater and this reap +/// run, so env resolution is sufficient and keeps this cross-platform/testable. +#[cfg(any(target_os = "macos", target_os = "linux"))] +fn cef_cache_path() -> Option { + std::env::var_os("OPENHUMAN_CEF_CACHE_PATH").map(PathBuf::from) +} + +/// Write the post-update relaunch marker. Called by the updater immediately +/// before `app.restart()`; the freshly relaunched process consumes it in +/// [`reap_stale_cef_lock_holder`]. Best-effort — a write failure only means the +/// reap falls back to the (safe) preflight wait. +#[cfg(any(target_os = "macos", target_os = "linux"))] +pub(crate) fn write_update_relaunch_marker() { + let Some(cache) = cef_cache_path() else { + log::warn!( + "[cef-stale-reap] OPENHUMAN_CEF_CACHE_PATH unset; skipping update-relaunch marker write" + ); + return; + }; + let marker = marker_path_for(&cache); + let body = format!("pid={}\n", std::process::id()); + match std::fs::write(&marker, body) { + Ok(()) => log::info!( + "[cef-stale-reap] wrote update-relaunch marker at {} (pid={})", + marker.display(), + std::process::id() + ), + Err(e) => log::warn!( + "[cef-stale-reap] failed to write update-relaunch marker at {}: {e}", + marker.display() + ), + } +} + +#[cfg(any(target_os = "macos", target_os = "linux"))] +mod imp { + use std::path::Path; + + use super::{ + cef_cache_path, decide_reap, marker_is_fresh, marker_path_for, ReapDecision, MARKER_MAX_AGE, + TERM_GRACE, + }; + use crate::cef_preflight; + use crate::core_process; + use crate::process_kill::{kill_pid_force, kill_pid_term}; + + /// Pre-CEF reap of a wedged prior instance that still holds the CEF + /// `SingletonLock` after an update relaunch. Safe by construction: only + /// acts when a recent post-update marker is present (see module docs). + /// Best-effort throughout — any failure leaves the holder for the normal + /// `cef_preflight::wait_for_cache_release` path. + pub(crate) fn reap_stale_cef_lock_holder() { + if core_process::reuse_existing_listener_enabled() { + log::info!( + "[cef-stale-reap] OPENHUMAN_CORE_REUSE_EXISTING=1; skipping stale CEF-lock reap" + ); + return; + } + + let Some(cache) = cef_cache_path() else { + log::debug!("[cef-stale-reap] OPENHUMAN_CEF_CACHE_PATH unset; nothing to reap"); + return; + }; + let lock_path = cache.join("SingletonLock"); + let marker_path = marker_path_for(&cache); + + // Consume the marker exactly once per launch (delete regardless of the + // decision) so a normal launch can never inherit a leftover signal. + let marker_fresh = consume_marker(&marker_path); + + let holder = live_lock_holder(&lock_path); + let self_pid = std::process::id() as i32; + let local_host = local_hostname(); + + match decide_reap(holder, self_pid, local_host.as_deref(), marker_fresh) { + ReapDecision::Skip(reason) => { + log::debug!("[cef-stale-reap] not reaping CEF-lock holder: {reason}"); + } + ReapDecision::Reap { pid } => reap_pid(&lock_path, pid), + } + } + + /// SIGTERM the stale pid, wait a grace period, then — only if the *same* + /// pid still owns the lock and is still alive — SIGKILL it. The re-check + /// closes the PID-reuse window before the force-kill. + fn reap_pid(lock_path: &Path, pid: i32) { + let pid_u32 = pid as u32; + log::warn!( + "[cef-stale-reap] recent post-update marker + live CEF-lock holder pid={pid}; \ + treating as wedged pre-update instance, sending SIGTERM" + ); + if let Err(e) = kill_pid_term(pid_u32) { + log::warn!("[cef-stale-reap] SIGTERM pid={pid} failed: {e}"); + } + + std::thread::sleep(TERM_GRACE); + + if holder_still_owns_lock(lock_path, pid) { + log::warn!( + "[cef-stale-reap] pid={pid} still holds the CEF lock after SIGTERM+grace; SIGKILL" + ); + if let Err(e) = kill_pid_force(pid_u32) { + log::warn!("[cef-stale-reap] SIGKILL pid={pid} failed: {e}"); + } + } else { + // Either it exited on SIGTERM, or the lock now points elsewhere. + // Do NOT force-kill — the pid may have been reused. `cef_preflight` + // removes the now-dead lock on its next poll. + log::info!( + "[cef-stale-reap] pid={pid} released the CEF lock after SIGTERM (or ownership changed); no SIGKILL" + ); + } + } + + /// Read+parse the live `SingletonLock` holder as `(host, pid)`. Returns + /// `None` when the lock is absent, unparseable, or the pid is not alive + /// (a dead-pid lock is stale and handled by `cef_preflight`). + fn live_lock_holder(lock_path: &Path) -> Option<(String, i32)> { + let target = std::fs::read_link(lock_path).ok()?; + let (host, pid) = cef_preflight::parse_lock_target(&target.to_string_lossy())?; + cef_preflight::is_pid_alive(pid).then_some((host, pid)) + } + + /// True iff the lock still resolves to `expected_pid` and that pid is alive. + fn holder_still_owns_lock(lock_path: &Path, expected_pid: i32) -> bool { + let Ok(target) = std::fs::read_link(lock_path) else { + return false; + }; + match cef_preflight::parse_lock_target(&target.to_string_lossy()) { + Some((_, pid)) => pid == expected_pid && cef_preflight::is_pid_alive(pid), + None => false, + } + } + + /// This machine's hostname, matching the `-` that Chromium + /// writes into `SingletonLock`. `None` if it cannot be resolved. + fn local_hostname() -> Option { + nix::unistd::gethostname() + .ok()? + .into_string() + .ok() + .filter(|h| !h.is_empty()) + } + + /// Read the marker's freshness and delete it (one-shot). Returns `true` + /// only if the marker existed and is within [`MARKER_MAX_AGE`]. + fn consume_marker(marker_path: &Path) -> bool { + let fresh = match std::fs::metadata(marker_path) { + Ok(meta) => { + let age = meta + .modified() + .ok() + .and_then(|m| std::time::SystemTime::now().duration_since(m).ok()); + let fresh = marker_is_fresh(age, MARKER_MAX_AGE); + log::info!( + "[cef-stale-reap] update-relaunch marker present at {} (fresh={fresh})", + marker_path.display() + ); + fresh + } + Err(_) => false, + }; + // Delete whether fresh or stale so it is never reused. + if let Err(e) = std::fs::remove_file(marker_path) { + if e.kind() != std::io::ErrorKind::NotFound { + log::warn!( + "[cef-stale-reap] failed to remove update-relaunch marker {}: {e}", + marker_path.display() + ); + } + } + fresh + } + + #[cfg(test)] + mod tests { + use super::*; + use std::os::unix::fs::symlink; + use std::path::PathBuf; + + // Serialises tests that mutate the process-global OPENHUMAN_CEF_CACHE_PATH. + static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + fn fresh_tmp(tag: &str) -> PathBuf { + let tmp = std::env::temp_dir().join(format!( + "oh-cef-stale-reap-{}-{}-{}", + tag, + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0) + )); + let _ = std::fs::remove_dir_all(&tmp); + std::fs::create_dir_all(&tmp).expect("create tmp dir"); + tmp + } + + #[test] + fn live_lock_holder_reads_self_pid() { + let tmp = fresh_tmp("live-holder"); + let me = std::process::id() as i32; + symlink(format!("myhost-{me}"), tmp.join("SingletonLock")).unwrap(); + + let holder = live_lock_holder(&tmp.join("SingletonLock")); + assert_eq!(holder, Some(("myhost".to_string(), me))); + let _ = std::fs::remove_dir_all(&tmp); + } + + #[test] + fn live_lock_holder_none_for_dead_pid() { + let tmp = fresh_tmp("dead-holder"); + // ~i32::MAX-1: far beyond any plausible live pid. + symlink("deadhost-2147483646", tmp.join("SingletonLock")).unwrap(); + assert_eq!(live_lock_holder(&tmp.join("SingletonLock")), None); + let _ = std::fs::remove_dir_all(&tmp); + } + + #[test] + fn holder_still_owns_lock_matches_same_pid_only() { + let tmp = fresh_tmp("revalidate"); + let me = std::process::id() as i32; + let lock = tmp.join("SingletonLock"); + symlink(format!("myhost-{me}"), &lock).unwrap(); + + assert!(holder_still_owns_lock(&lock, me)); + // A different (dead) pid must not re-validate. + assert!(!holder_still_owns_lock(&lock, 2147483646)); + let _ = std::fs::remove_dir_all(&tmp); + } + + #[test] + fn consume_marker_reports_fresh_and_deletes() { + let tmp = fresh_tmp("consume-fresh"); + let cache = tmp.join("cef"); + std::fs::create_dir_all(&cache).unwrap(); + let marker = marker_path_for(&cache); + std::fs::write(&marker, "pid=1\n").unwrap(); + + assert!(consume_marker(&marker), "just-written marker must be fresh"); + assert!( + std::fs::metadata(&marker).is_err(), + "marker must be deleted after consume" + ); + // Second consume: absent → not fresh, no error. + assert!(!consume_marker(&marker)); + let _ = std::fs::remove_dir_all(&tmp); + } + + #[test] + fn write_then_consume_roundtrip_via_env() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + let prior = std::env::var_os("OPENHUMAN_CEF_CACHE_PATH"); + + let tmp = fresh_tmp("roundtrip"); + let cache = tmp.join("cef"); + std::fs::create_dir_all(&cache).unwrap(); + std::env::set_var("OPENHUMAN_CEF_CACHE_PATH", &cache); + + super::super::write_update_relaunch_marker(); + let marker = marker_path_for(&cache); + assert!( + std::fs::metadata(&marker).is_ok(), + "marker must exist after write" + ); + assert!(consume_marker(&marker), "freshly written marker is fresh"); + + match prior { + Some(v) => std::env::set_var("OPENHUMAN_CEF_CACHE_PATH", v), + None => std::env::remove_var("OPENHUMAN_CEF_CACHE_PATH"), + } + let _ = std::fs::remove_dir_all(&tmp); + } + + #[test] + fn reap_orchestrator_no_lock_no_marker_is_safe_noop() { + // With no SingletonLock and no marker, the orchestrator must take + // the Skip path — no panic, no files created, nothing killed. + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + let prior = std::env::var_os("OPENHUMAN_CEF_CACHE_PATH"); + + let tmp = fresh_tmp("noop"); + let cache = tmp.join("cef"); + std::fs::create_dir_all(&cache).unwrap(); + std::env::set_var("OPENHUMAN_CEF_CACHE_PATH", &cache); + + reap_stale_cef_lock_holder(); + + assert!( + std::fs::metadata(marker_path_for(&cache)).is_err(), + "orchestrator must not create a marker" + ); + + match prior { + Some(v) => std::env::set_var("OPENHUMAN_CEF_CACHE_PATH", v), + None => std::env::remove_var("OPENHUMAN_CEF_CACHE_PATH"), + } + let _ = std::fs::remove_dir_all(&tmp); + } + } +} + +#[cfg(any(target_os = "macos", target_os = "linux"))] +pub(crate) use imp::reap_stale_cef_lock_holder; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn skip_when_no_holder() { + assert_eq!( + decide_reap(None, 100, Some("host"), true), + ReapDecision::Skip("no live CEF SingletonLock holder") + ); + } + + #[test] + fn skip_when_holder_is_self() { + assert_eq!( + decide_reap(Some(("host".into(), 100)), 100, Some("host"), true), + ReapDecision::Skip("CEF lock held by self") + ); + } + + #[test] + fn skip_when_holder_on_other_host() { + assert_eq!( + decide_reap(Some(("otherbox".into(), 200)), 100, Some("host"), true), + ReapDecision::Skip("CEF lock holder is on a different host") + ); + } + + #[test] + fn skip_when_no_fresh_marker_even_if_live_holder() { + // The core safety property: a live holder on our host is NOT reaped + // without a recent post-update marker (it may be a healthy instance). + assert_eq!( + decide_reap(Some(("host".into(), 200)), 100, Some("host"), false), + ReapDecision::Skip("no recent post-update marker; deferring to preflight wait") + ); + } + + #[test] + fn reap_only_with_marker_matching_host_and_other_pid() { + assert_eq!( + decide_reap(Some(("host".into(), 200)), 100, Some("host"), true), + ReapDecision::Reap { pid: 200 } + ); + } + + #[test] + fn reap_when_local_host_unresolved_but_marker_fresh() { + // Unresolved local host skips only the host gate; the marker gate still + // governs, so a fresh marker still permits the reap. + assert_eq!( + decide_reap(Some(("host".into(), 200)), 100, None, true), + ReapDecision::Reap { pid: 200 } + ); + } + + #[test] + fn marker_is_fresh_bounds_on_age() { + assert!(marker_is_fresh(Some(Duration::from_secs(0)), MARKER_MAX_AGE)); + assert!(marker_is_fresh(Some(MARKER_MAX_AGE), MARKER_MAX_AGE)); + assert!(!marker_is_fresh( + Some(MARKER_MAX_AGE + Duration::from_secs(1)), + MARKER_MAX_AGE + )); + // Unknown age is fail-safe (not fresh → no reap). + assert!(!marker_is_fresh(None, MARKER_MAX_AGE)); + } + + #[test] + fn marker_path_is_sibling_of_cache_dir() { + assert_eq!( + marker_path_for(Path::new("/x/com.openhuman.app/cef")), + PathBuf::from("/x/com.openhuman.app/openhuman-update-relaunch.marker") + ); + } +} diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index b7aac0440d..4aae5e7114 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -20,6 +20,13 @@ mod cef_profile; // logic is unit-tested on any host; the Win32 glue is windows-only. #[cfg(any(target_os = "windows", test))] mod cef_singleton_wait; +// macOS/Linux pre-CEF reap of a wedged prior instance that still holds the CEF +// SingletonLock after an update relaunch (issue #4395, follow-up to #3605). +// Marker-gated so it never reaps a healthy running instance. Compiled under +// `test` too so the pure decision logic is unit-tested on any host; the +// filesystem/signal glue is macOS/Linux-only. +#[cfg(any(target_os = "macos", target_os = "linux", test))] +mod cef_stale_reap; mod claude_code; mod companion_commands; mod core_process; @@ -601,6 +608,11 @@ async fn apply_app_update( log::info!("[app-update] install complete — relaunching"); let _ = app.emit("app-update:status", "restarting"); + // Drop a post-update relaunch marker so the freshly launched process can + // safely reap this instance if it wedges instead of exiting (issue #4395). + #[cfg(any(target_os = "macos", target_os = "linux"))] + cef_stale_reap::write_update_relaunch_marker(); + log::info!("[app-update] starting early teardown before restart"); perform_early_teardown_async(&app).await; @@ -814,6 +826,11 @@ async fn install_app_update( log::info!("[app-update] install complete — relaunching"); let _ = app.emit("app-update:status", "restarting"); + // Drop a post-update relaunch marker so the freshly launched process can + // safely reap this instance if it wedges instead of exiting (issue #4395). + #[cfg(any(target_os = "macos", target_os = "linux"))] + cef_stale_reap::write_update_relaunch_marker(); + log::info!("[app-update] starting early teardown before restart"); perform_early_teardown_async(&app).await; @@ -2711,6 +2728,19 @@ pub fn run() { // after the budget we exit cleanly (code 0). Stale locks (PID dead) are // removed so crashed processes don't block launches. macOS: issue #864. // Linux: OPENHUMAN-TAURI-K1. Sentry: TAURI-RUST-F. + // + // ── macOS/Linux pre-CEF stale-lock reap (issue #4395) ───────────────── + // Before the bounded wait above can only *time out* on a wedged prior + // instance (the #3605 symptom: the updated app silently refuses to start), + // proactively reap that instance — but ONLY when a recent post-update + // relaunch marker proves the CEF-lock holder is the pre-update process, + // never a healthy running one. This is the macOS/Linux analogue of the + // Windows pre-CEF reap, gated on a staleness signal because these targets + // have no early single-instance mutex. See `cef_stale_reap` for the safety + // rationale (and why the reverted #3793 SIGKILL is not reintroduced). + #[cfg(any(target_os = "macos", target_os = "linux"))] + cef_stale_reap::reap_stale_cef_lock_holder(); + #[cfg(any(target_os = "macos", target_os = "linux"))] cef_preflight::wait_for_cache_release(); diff --git a/app/src-tauri/tauri.conf.json b/app/src-tauri/tauri.conf.json index 1eaad69899..e1a62c18e6 100644 --- a/app/src-tauri/tauri.conf.json +++ b/app/src-tauri/tauri.conf.json @@ -69,6 +69,11 @@ "desktopTemplate": "main.desktop" } }, + "windows": { + "nsis": { + "installerHooks": "nsis-hooks.nsh" + } + }, "createUpdaterArtifacts": false, "macOS": { "minimumSystemVersion": "10.15",