diff --git a/CHANGELOG.md b/CHANGELOG.md index 45426e61a..791dbcd7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,7 +64,7 @@ Status: unreleased. - Cron: allow payloads containing "heartbeat" in event filter. (#2219) Thanks @dwfinkelstein. - CLI: avoid loading config for global help/version while registering plugin commands. (#2212) Thanks @dial481. - Agents: include memory.md when bootstrapping memory context. (#2318) Thanks @czekaj. -- Agents: release session locks on process termination. (#2483) Thanks @janeexai. +- Agents: release session locks on process termination and cover more signals. (#2483) Thanks @janeexai. - Telegram: harden polling + retry behavior for transient network errors and Node 22 transport issues. (#2420) Thanks @techboss. - Telegram: wrap reasoning italics per line to avoid raw underscores. (#2181) Thanks @YuriNachos. - Telegram: log fetch/API errors in delivery to avoid unhandled rejections. (#2492) Thanks @altryne. diff --git a/src/agents/session-write-lock.test.ts b/src/agents/session-write-lock.test.ts index 8eafd6bf4..072eca364 100644 --- a/src/agents/session-write-lock.test.ts +++ b/src/agents/session-write-lock.test.ts @@ -3,7 +3,7 @@ import os from "node:os"; import path from "node:path"; import { describe, expect, it } from "vitest"; -import { acquireSessionWriteLock } from "./session-write-lock.js"; +import { __testing, acquireSessionWriteLock } from "./session-write-lock.js"; describe("acquireSessionWriteLock", () => { it("reuses locks across symlinked session paths", async () => { @@ -73,9 +73,38 @@ describe("acquireSessionWriteLock", () => { } }); + it("removes held locks on termination signals", async () => { + const signals = ["SIGINT", "SIGTERM", "SIGQUIT", "SIGABRT"] as const; + for (const signal of signals) { + const root = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-lock-cleanup-")); + try { + const sessionFile = path.join(root, "sessions.json"); + const lockPath = `${sessionFile}.lock`; + await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 }); + const keepAlive = () => {}; + if (signal === "SIGINT") { + process.on(signal, keepAlive); + } + + __testing.handleTerminationSignal(signal); + + await expect(fs.stat(lockPath)).rejects.toThrow(); + if (signal === "SIGINT") { + process.off(signal, keepAlive); + } + } finally { + await fs.rm(root, { recursive: true, force: true }); + } + } + }); + + it("registers cleanup for SIGQUIT and SIGABRT", () => { + expect(__testing.cleanupSignals).toContain("SIGQUIT"); + expect(__testing.cleanupSignals).toContain("SIGABRT"); + }); it("cleans up locks on SIGINT without removing other handlers", async () => { const root = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-lock-")); - const originalKill = process.kill; + const originalKill = process.kill.bind(process); const killCalls: Array = []; let otherHandlerCalled = false; @@ -99,7 +128,7 @@ describe("acquireSessionWriteLock", () => { await expect(fs.access(lockPath)).rejects.toThrow(); expect(otherHandlerCalled).toBe(true); - expect(killCalls).toEqual(["SIGINT"]); + expect(killCalls).toEqual([]); } finally { process.off("SIGINT", otherHandler); process.kill = originalKill; @@ -121,4 +150,13 @@ describe("acquireSessionWriteLock", () => { await fs.rm(root, { recursive: true, force: true }); } }); + it("keeps other signal listeners registered", () => { + const keepAlive = () => {}; + process.on("SIGINT", keepAlive); + + __testing.handleTerminationSignal("SIGINT"); + + expect(process.listeners("SIGINT")).toContain(keepAlive); + process.off("SIGINT", keepAlive); + }); }); diff --git a/src/agents/session-write-lock.ts b/src/agents/session-write-lock.ts index d7499eb2a..832d368a6 100644 --- a/src/agents/session-write-lock.ts +++ b/src/agents/session-write-lock.ts @@ -1,5 +1,5 @@ -import fs from "node:fs/promises"; import fsSync from "node:fs"; +import fs from "node:fs/promises"; import path from "node:path"; type LockFilePayload = { @@ -14,6 +14,9 @@ type HeldLock = { }; const HELD_LOCKS = new Map(); +const CLEANUP_SIGNALS = ["SIGINT", "SIGTERM", "SIGQUIT", "SIGABRT"] as const; +type CleanupSignal = (typeof CLEANUP_SIGNALS)[number]; +const cleanupHandlers = new Map void>(); function isAlive(pid: number): boolean { if (!Number.isFinite(pid) || pid <= 0) return false; @@ -25,6 +28,65 @@ function isAlive(pid: number): boolean { } } +/** + * Synchronously release all held locks. + * Used during process exit when async operations aren't reliable. + */ +function releaseAllLocksSync(): void { + for (const [sessionFile, held] of HELD_LOCKS) { + try { + if (typeof held.handle.fd === "number") { + fsSync.closeSync(held.handle.fd); + } + } catch { + // Ignore errors during cleanup - best effort + } + try { + fsSync.rmSync(held.lockPath, { force: true }); + } catch { + // Ignore errors during cleanup - best effort + } + HELD_LOCKS.delete(sessionFile); + } +} + +let cleanupRegistered = false; + +function handleTerminationSignal(signal: CleanupSignal): void { + releaseAllLocksSync(); + const shouldReraise = process.listenerCount(signal) === 1; + if (shouldReraise) { + const handler = cleanupHandlers.get(signal); + if (handler) process.off(signal, handler); + try { + process.kill(process.pid, signal); + } catch { + // Ignore errors during shutdown + } + } +} + +function registerCleanupHandlers(): void { + if (cleanupRegistered) return; + cleanupRegistered = true; + + // Cleanup on normal exit and process.exit() calls + process.on("exit", () => { + releaseAllLocksSync(); + }); + + // Handle termination signals + for (const signal of CLEANUP_SIGNALS) { + try { + const handler = () => handleTerminationSignal(signal); + cleanupHandlers.set(signal, handler); + process.on(signal, handler); + } catch { + // Ignore unsupported signals on this platform. + } + } +} + async function readLockPayload(lockPath: string): Promise { try { const raw = await fs.readFile(lockPath, "utf8"); @@ -44,6 +106,7 @@ export async function acquireSessionWriteLock(params: { }): Promise<{ release: () => Promise; }> { + registerCleanupHandlers(); const timeoutMs = params.timeoutMs ?? 10_000; const staleMs = params.staleMs ?? 30 * 60 * 1000; const sessionFile = path.resolve(params.sessionFile); @@ -118,52 +181,8 @@ export async function acquireSessionWriteLock(params: { throw new Error(`session file locked (timeout ${timeoutMs}ms): ${owner} ${lockPath}`); } -/** - * Synchronously release all held locks. - * Used during process exit when async operations aren't reliable. - */ -function releaseAllLocksSync(): void { - for (const [sessionFile, held] of HELD_LOCKS) { - try { - fsSync.closeSync(held.handle.fd); - } catch { - // Ignore close errors during cleanup - best effort - } - try { - fsSync.rmSync(held.lockPath, { force: true }); - } catch { - // Ignore errors during cleanup - best effort - } - HELD_LOCKS.delete(sessionFile); - } -} - -let cleanupRegistered = false; - -function registerCleanupHandlers(): void { - if (cleanupRegistered) return; - cleanupRegistered = true; - - // Cleanup on normal exit and process.exit() calls - process.on("exit", () => { - releaseAllLocksSync(); - }); - - // Handle SIGINT (Ctrl+C) and SIGTERM - const handleSignal = (signal: NodeJS.Signals) => { - releaseAllLocksSync(); - // Remove only our handlers and re-raise signal for proper exit code. - process.off("SIGINT", onSigInt); - process.off("SIGTERM", onSigTerm); - process.kill(process.pid, signal); - }; - - const onSigInt = () => handleSignal("SIGINT"); - const onSigTerm = () => handleSignal("SIGTERM"); - - process.on("SIGINT", onSigInt); - process.on("SIGTERM", onSigTerm); -} - -// Register cleanup handlers when module loads -registerCleanupHandlers(); +export const __testing = { + cleanupSignals: [...CLEANUP_SIGNALS], + handleTerminationSignal, + releaseAllLocksSync, +};