fix: harden session lock cleanup (#2483) (thanks @janeexai)
This commit is contained in:
@@ -64,7 +64,7 @@ Status: unreleased.
|
||||
- Cron: allow payloads containing "heartbeat" in event filter. (#2219) Thanks @dwfinkelstein.
|
||||
- CLI: avoid loading config for global help/version while registering plugin commands. (#2212) Thanks @dial481.
|
||||
- Agents: include memory.md when bootstrapping memory context. (#2318) Thanks @czekaj.
|
||||
- Agents: release session locks on process termination. (#2483) Thanks @janeexai.
|
||||
- Agents: release session locks on process termination and cover more signals. (#2483) Thanks @janeexai.
|
||||
- Telegram: harden polling + retry behavior for transient network errors and Node 22 transport issues. (#2420) Thanks @techboss.
|
||||
- Telegram: wrap reasoning italics per line to avoid raw underscores. (#2181) Thanks @YuriNachos.
|
||||
- Telegram: log fetch/API errors in delivery to avoid unhandled rejections. (#2492) Thanks @altryne.
|
||||
|
||||
@@ -3,7 +3,7 @@ import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { acquireSessionWriteLock } from "./session-write-lock.js";
|
||||
import { __testing, acquireSessionWriteLock } from "./session-write-lock.js";
|
||||
|
||||
describe("acquireSessionWriteLock", () => {
|
||||
it("reuses locks across symlinked session paths", async () => {
|
||||
@@ -73,9 +73,38 @@ describe("acquireSessionWriteLock", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("removes held locks on termination signals", async () => {
|
||||
const signals = ["SIGINT", "SIGTERM", "SIGQUIT", "SIGABRT"] as const;
|
||||
for (const signal of signals) {
|
||||
const root = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-lock-cleanup-"));
|
||||
try {
|
||||
const sessionFile = path.join(root, "sessions.json");
|
||||
const lockPath = `${sessionFile}.lock`;
|
||||
await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 });
|
||||
const keepAlive = () => {};
|
||||
if (signal === "SIGINT") {
|
||||
process.on(signal, keepAlive);
|
||||
}
|
||||
|
||||
__testing.handleTerminationSignal(signal);
|
||||
|
||||
await expect(fs.stat(lockPath)).rejects.toThrow();
|
||||
if (signal === "SIGINT") {
|
||||
process.off(signal, keepAlive);
|
||||
}
|
||||
} finally {
|
||||
await fs.rm(root, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("registers cleanup for SIGQUIT and SIGABRT", () => {
|
||||
expect(__testing.cleanupSignals).toContain("SIGQUIT");
|
||||
expect(__testing.cleanupSignals).toContain("SIGABRT");
|
||||
});
|
||||
it("cleans up locks on SIGINT without removing other handlers", async () => {
|
||||
const root = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-lock-"));
|
||||
const originalKill = process.kill;
|
||||
const originalKill = process.kill.bind(process);
|
||||
const killCalls: Array<NodeJS.Signals | undefined> = [];
|
||||
let otherHandlerCalled = false;
|
||||
|
||||
@@ -99,7 +128,7 @@ describe("acquireSessionWriteLock", () => {
|
||||
|
||||
await expect(fs.access(lockPath)).rejects.toThrow();
|
||||
expect(otherHandlerCalled).toBe(true);
|
||||
expect(killCalls).toEqual(["SIGINT"]);
|
||||
expect(killCalls).toEqual([]);
|
||||
} finally {
|
||||
process.off("SIGINT", otherHandler);
|
||||
process.kill = originalKill;
|
||||
@@ -121,4 +150,13 @@ describe("acquireSessionWriteLock", () => {
|
||||
await fs.rm(root, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
it("keeps other signal listeners registered", () => {
|
||||
const keepAlive = () => {};
|
||||
process.on("SIGINT", keepAlive);
|
||||
|
||||
__testing.handleTerminationSignal("SIGINT");
|
||||
|
||||
expect(process.listeners("SIGINT")).toContain(keepAlive);
|
||||
process.off("SIGINT", keepAlive);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import fs from "node:fs/promises";
|
||||
import fsSync from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
type LockFilePayload = {
|
||||
@@ -14,6 +14,9 @@ type HeldLock = {
|
||||
};
|
||||
|
||||
const HELD_LOCKS = new Map<string, HeldLock>();
|
||||
const CLEANUP_SIGNALS = ["SIGINT", "SIGTERM", "SIGQUIT", "SIGABRT"] as const;
|
||||
type CleanupSignal = (typeof CLEANUP_SIGNALS)[number];
|
||||
const cleanupHandlers = new Map<CleanupSignal, () => void>();
|
||||
|
||||
function isAlive(pid: number): boolean {
|
||||
if (!Number.isFinite(pid) || pid <= 0) return false;
|
||||
@@ -25,6 +28,65 @@ function isAlive(pid: number): boolean {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronously release all held locks.
|
||||
* Used during process exit when async operations aren't reliable.
|
||||
*/
|
||||
function releaseAllLocksSync(): void {
|
||||
for (const [sessionFile, held] of HELD_LOCKS) {
|
||||
try {
|
||||
if (typeof held.handle.fd === "number") {
|
||||
fsSync.closeSync(held.handle.fd);
|
||||
}
|
||||
} catch {
|
||||
// Ignore errors during cleanup - best effort
|
||||
}
|
||||
try {
|
||||
fsSync.rmSync(held.lockPath, { force: true });
|
||||
} catch {
|
||||
// Ignore errors during cleanup - best effort
|
||||
}
|
||||
HELD_LOCKS.delete(sessionFile);
|
||||
}
|
||||
}
|
||||
|
||||
let cleanupRegistered = false;
|
||||
|
||||
function handleTerminationSignal(signal: CleanupSignal): void {
|
||||
releaseAllLocksSync();
|
||||
const shouldReraise = process.listenerCount(signal) === 1;
|
||||
if (shouldReraise) {
|
||||
const handler = cleanupHandlers.get(signal);
|
||||
if (handler) process.off(signal, handler);
|
||||
try {
|
||||
process.kill(process.pid, signal);
|
||||
} catch {
|
||||
// Ignore errors during shutdown
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function registerCleanupHandlers(): void {
|
||||
if (cleanupRegistered) return;
|
||||
cleanupRegistered = true;
|
||||
|
||||
// Cleanup on normal exit and process.exit() calls
|
||||
process.on("exit", () => {
|
||||
releaseAllLocksSync();
|
||||
});
|
||||
|
||||
// Handle termination signals
|
||||
for (const signal of CLEANUP_SIGNALS) {
|
||||
try {
|
||||
const handler = () => handleTerminationSignal(signal);
|
||||
cleanupHandlers.set(signal, handler);
|
||||
process.on(signal, handler);
|
||||
} catch {
|
||||
// Ignore unsupported signals on this platform.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function readLockPayload(lockPath: string): Promise<LockFilePayload | null> {
|
||||
try {
|
||||
const raw = await fs.readFile(lockPath, "utf8");
|
||||
@@ -44,6 +106,7 @@ export async function acquireSessionWriteLock(params: {
|
||||
}): Promise<{
|
||||
release: () => Promise<void>;
|
||||
}> {
|
||||
registerCleanupHandlers();
|
||||
const timeoutMs = params.timeoutMs ?? 10_000;
|
||||
const staleMs = params.staleMs ?? 30 * 60 * 1000;
|
||||
const sessionFile = path.resolve(params.sessionFile);
|
||||
@@ -118,52 +181,8 @@ export async function acquireSessionWriteLock(params: {
|
||||
throw new Error(`session file locked (timeout ${timeoutMs}ms): ${owner} ${lockPath}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronously release all held locks.
|
||||
* Used during process exit when async operations aren't reliable.
|
||||
*/
|
||||
function releaseAllLocksSync(): void {
|
||||
for (const [sessionFile, held] of HELD_LOCKS) {
|
||||
try {
|
||||
fsSync.closeSync(held.handle.fd);
|
||||
} catch {
|
||||
// Ignore close errors during cleanup - best effort
|
||||
}
|
||||
try {
|
||||
fsSync.rmSync(held.lockPath, { force: true });
|
||||
} catch {
|
||||
// Ignore errors during cleanup - best effort
|
||||
}
|
||||
HELD_LOCKS.delete(sessionFile);
|
||||
}
|
||||
}
|
||||
|
||||
let cleanupRegistered = false;
|
||||
|
||||
function registerCleanupHandlers(): void {
|
||||
if (cleanupRegistered) return;
|
||||
cleanupRegistered = true;
|
||||
|
||||
// Cleanup on normal exit and process.exit() calls
|
||||
process.on("exit", () => {
|
||||
releaseAllLocksSync();
|
||||
});
|
||||
|
||||
// Handle SIGINT (Ctrl+C) and SIGTERM
|
||||
const handleSignal = (signal: NodeJS.Signals) => {
|
||||
releaseAllLocksSync();
|
||||
// Remove only our handlers and re-raise signal for proper exit code.
|
||||
process.off("SIGINT", onSigInt);
|
||||
process.off("SIGTERM", onSigTerm);
|
||||
process.kill(process.pid, signal);
|
||||
};
|
||||
|
||||
const onSigInt = () => handleSignal("SIGINT");
|
||||
const onSigTerm = () => handleSignal("SIGTERM");
|
||||
|
||||
process.on("SIGINT", onSigInt);
|
||||
process.on("SIGTERM", onSigTerm);
|
||||
}
|
||||
|
||||
// Register cleanup handlers when module loads
|
||||
registerCleanupHandlers();
|
||||
export const __testing = {
|
||||
cleanupSignals: [...CLEANUP_SIGNALS],
|
||||
handleTerminationSignal,
|
||||
releaseAllLocksSync,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user