fix: harden session lock cleanup (#2483) (thanks @janeexai)

This commit is contained in:
Gustavo Madeira Santana
2026-01-26 21:09:08 -05:00
parent 5796a92231
commit 66a5b324a1
3 changed files with 111 additions and 54 deletions

View File

@@ -64,7 +64,7 @@ Status: unreleased.
- Cron: allow payloads containing "heartbeat" in event filter. (#2219) Thanks @dwfinkelstein.
- CLI: avoid loading config for global help/version while registering plugin commands. (#2212) Thanks @dial481.
- Agents: include memory.md when bootstrapping memory context. (#2318) Thanks @czekaj.
- Agents: release session locks on process termination. (#2483) Thanks @janeexai.
- Agents: release session locks on process termination and cover more signals. (#2483) Thanks @janeexai.
- Telegram: harden polling + retry behavior for transient network errors and Node 22 transport issues. (#2420) Thanks @techboss.
- Telegram: wrap reasoning italics per line to avoid raw underscores. (#2181) Thanks @YuriNachos.
- Telegram: log fetch/API errors in delivery to avoid unhandled rejections. (#2492) Thanks @altryne.

View File

@@ -3,7 +3,7 @@ import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { acquireSessionWriteLock } from "./session-write-lock.js";
import { __testing, acquireSessionWriteLock } from "./session-write-lock.js";
describe("acquireSessionWriteLock", () => {
it("reuses locks across symlinked session paths", async () => {
@@ -73,9 +73,38 @@ describe("acquireSessionWriteLock", () => {
}
});
it("removes held locks on termination signals", async () => {
const signals = ["SIGINT", "SIGTERM", "SIGQUIT", "SIGABRT"] as const;
for (const signal of signals) {
const root = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-lock-cleanup-"));
try {
const sessionFile = path.join(root, "sessions.json");
const lockPath = `${sessionFile}.lock`;
await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 });
const keepAlive = () => {};
if (signal === "SIGINT") {
process.on(signal, keepAlive);
}
__testing.handleTerminationSignal(signal);
await expect(fs.stat(lockPath)).rejects.toThrow();
if (signal === "SIGINT") {
process.off(signal, keepAlive);
}
} finally {
await fs.rm(root, { recursive: true, force: true });
}
}
});
it("registers cleanup for SIGQUIT and SIGABRT", () => {
expect(__testing.cleanupSignals).toContain("SIGQUIT");
expect(__testing.cleanupSignals).toContain("SIGABRT");
});
it("cleans up locks on SIGINT without removing other handlers", async () => {
const root = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-lock-"));
const originalKill = process.kill;
const originalKill = process.kill.bind(process);
const killCalls: Array<NodeJS.Signals | undefined> = [];
let otherHandlerCalled = false;
@@ -99,7 +128,7 @@ describe("acquireSessionWriteLock", () => {
await expect(fs.access(lockPath)).rejects.toThrow();
expect(otherHandlerCalled).toBe(true);
expect(killCalls).toEqual(["SIGINT"]);
expect(killCalls).toEqual([]);
} finally {
process.off("SIGINT", otherHandler);
process.kill = originalKill;
@@ -121,4 +150,13 @@ describe("acquireSessionWriteLock", () => {
await fs.rm(root, { recursive: true, force: true });
}
});
it("keeps other signal listeners registered", () => {
const keepAlive = () => {};
process.on("SIGINT", keepAlive);
__testing.handleTerminationSignal("SIGINT");
expect(process.listeners("SIGINT")).toContain(keepAlive);
process.off("SIGINT", keepAlive);
});
});

View File

@@ -1,5 +1,5 @@
import fs from "node:fs/promises";
import fsSync from "node:fs";
import fs from "node:fs/promises";
import path from "node:path";
type LockFilePayload = {
@@ -14,6 +14,9 @@ type HeldLock = {
};
const HELD_LOCKS = new Map<string, HeldLock>();
const CLEANUP_SIGNALS = ["SIGINT", "SIGTERM", "SIGQUIT", "SIGABRT"] as const;
type CleanupSignal = (typeof CLEANUP_SIGNALS)[number];
const cleanupHandlers = new Map<CleanupSignal, () => void>();
function isAlive(pid: number): boolean {
if (!Number.isFinite(pid) || pid <= 0) return false;
@@ -25,6 +28,65 @@ function isAlive(pid: number): boolean {
}
}
/**
* Synchronously release all held locks.
* Used during process exit when async operations aren't reliable.
*/
function releaseAllLocksSync(): void {
for (const [sessionFile, held] of HELD_LOCKS) {
try {
if (typeof held.handle.fd === "number") {
fsSync.closeSync(held.handle.fd);
}
} catch {
// Ignore errors during cleanup - best effort
}
try {
fsSync.rmSync(held.lockPath, { force: true });
} catch {
// Ignore errors during cleanup - best effort
}
HELD_LOCKS.delete(sessionFile);
}
}
let cleanupRegistered = false;
function handleTerminationSignal(signal: CleanupSignal): void {
releaseAllLocksSync();
const shouldReraise = process.listenerCount(signal) === 1;
if (shouldReraise) {
const handler = cleanupHandlers.get(signal);
if (handler) process.off(signal, handler);
try {
process.kill(process.pid, signal);
} catch {
// Ignore errors during shutdown
}
}
}
function registerCleanupHandlers(): void {
if (cleanupRegistered) return;
cleanupRegistered = true;
// Cleanup on normal exit and process.exit() calls
process.on("exit", () => {
releaseAllLocksSync();
});
// Handle termination signals
for (const signal of CLEANUP_SIGNALS) {
try {
const handler = () => handleTerminationSignal(signal);
cleanupHandlers.set(signal, handler);
process.on(signal, handler);
} catch {
// Ignore unsupported signals on this platform.
}
}
}
async function readLockPayload(lockPath: string): Promise<LockFilePayload | null> {
try {
const raw = await fs.readFile(lockPath, "utf8");
@@ -44,6 +106,7 @@ export async function acquireSessionWriteLock(params: {
}): Promise<{
release: () => Promise<void>;
}> {
registerCleanupHandlers();
const timeoutMs = params.timeoutMs ?? 10_000;
const staleMs = params.staleMs ?? 30 * 60 * 1000;
const sessionFile = path.resolve(params.sessionFile);
@@ -118,52 +181,8 @@ export async function acquireSessionWriteLock(params: {
throw new Error(`session file locked (timeout ${timeoutMs}ms): ${owner} ${lockPath}`);
}
/**
* Synchronously release all held locks.
* Used during process exit when async operations aren't reliable.
*/
function releaseAllLocksSync(): void {
for (const [sessionFile, held] of HELD_LOCKS) {
try {
fsSync.closeSync(held.handle.fd);
} catch {
// Ignore close errors during cleanup - best effort
}
try {
fsSync.rmSync(held.lockPath, { force: true });
} catch {
// Ignore errors during cleanup - best effort
}
HELD_LOCKS.delete(sessionFile);
}
}
let cleanupRegistered = false;
function registerCleanupHandlers(): void {
if (cleanupRegistered) return;
cleanupRegistered = true;
// Cleanup on normal exit and process.exit() calls
process.on("exit", () => {
releaseAllLocksSync();
});
// Handle SIGINT (Ctrl+C) and SIGTERM
const handleSignal = (signal: NodeJS.Signals) => {
releaseAllLocksSync();
// Remove only our handlers and re-raise signal for proper exit code.
process.off("SIGINT", onSigInt);
process.off("SIGTERM", onSigTerm);
process.kill(process.pid, signal);
};
const onSigInt = () => handleSignal("SIGINT");
const onSigTerm = () => handleSignal("SIGTERM");
process.on("SIGINT", onSigInt);
process.on("SIGTERM", onSigTerm);
}
// Register cleanup handlers when module loads
registerCleanupHandlers();
export const __testing = {
cleanupSignals: [...CLEANUP_SIGNALS],
handleTerminationSignal,
releaseAllLocksSync,
};