fix: improve app restart and gateway logs
This commit is contained in:
34
src/infra/gateway-lock.test.ts
Normal file
34
src/infra/gateway-lock.test.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { acquireGatewayLock, GatewayLockError } from "./gateway-lock.js";
|
||||
|
||||
const newLockPath = () =>
|
||||
path.join(
|
||||
os.tmpdir(),
|
||||
`clawdis-gateway-lock-test-${process.pid}-${Math.random().toString(16).slice(2)}.sock`,
|
||||
);
|
||||
|
||||
describe("gateway-lock", () => {
|
||||
it("prevents concurrent gateway instances and releases cleanly", async () => {
|
||||
const lockPath = newLockPath();
|
||||
|
||||
const release1 = await acquireGatewayLock(lockPath);
|
||||
expect(fs.existsSync(lockPath)).toBe(true);
|
||||
|
||||
await expect(acquireGatewayLock(lockPath)).rejects.toBeInstanceOf(
|
||||
GatewayLockError,
|
||||
);
|
||||
|
||||
await release1();
|
||||
expect(fs.existsSync(lockPath)).toBe(false);
|
||||
|
||||
// After release, lock can be reacquired.
|
||||
const release2 = await acquireGatewayLock(lockPath);
|
||||
await release2();
|
||||
expect(fs.existsSync(lockPath)).toBe(false);
|
||||
});
|
||||
});
|
||||
102
src/infra/gateway-lock.ts
Normal file
102
src/infra/gateway-lock.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
import fs from "node:fs";
|
||||
import net from "node:net";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
const DEFAULT_LOCK_PATH = path.join(os.tmpdir(), "clawdis-gateway.lock");
|
||||
|
||||
export class GatewayLockError extends Error {}
|
||||
|
||||
type ReleaseFn = () => Promise<void>;
|
||||
|
||||
/**
|
||||
* Acquire an exclusive single-instance lock for the gateway using a Unix domain socket.
|
||||
*
|
||||
* Why a socket? If the process crashes or is SIGKILLed, the socket file remains but
|
||||
* the next start will detect ECONNREFUSED when connecting and clean the stale path
|
||||
* before retrying. This keeps the lock self-healing without manual pidfile cleanup.
|
||||
*/
|
||||
export async function acquireGatewayLock(
|
||||
lockPath = DEFAULT_LOCK_PATH,
|
||||
): Promise<ReleaseFn> {
|
||||
// Fast path: try to listen on the lock path.
|
||||
const attemptListen = (): Promise<net.Server> =>
|
||||
new Promise((resolve, reject) => {
|
||||
const server = net.createServer();
|
||||
|
||||
server.once("error", async (err: NodeJS.ErrnoException) => {
|
||||
if (err.code !== "EADDRINUSE") {
|
||||
reject(new GatewayLockError(`lock listen failed: ${err.message}`));
|
||||
return;
|
||||
}
|
||||
|
||||
// Something is already bound. Try to connect to see if it is alive.
|
||||
const client = net.connect({ path: lockPath });
|
||||
|
||||
client.once("connect", () => {
|
||||
client.destroy();
|
||||
reject(
|
||||
new GatewayLockError("another gateway instance is already running"),
|
||||
);
|
||||
});
|
||||
|
||||
client.once("error", (connErr: NodeJS.ErrnoException) => {
|
||||
// Nothing is listening -> stale socket file. Remove and retry once.
|
||||
if (connErr.code === "ECONNREFUSED" || connErr.code === "ENOENT") {
|
||||
try {
|
||||
fs.rmSync(lockPath, { force: true });
|
||||
} catch (rmErr) {
|
||||
reject(
|
||||
new GatewayLockError(
|
||||
`failed to clean stale lock at ${lockPath}: ${String(rmErr)}`,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
attemptListen().then(resolve, reject);
|
||||
return;
|
||||
}
|
||||
|
||||
reject(
|
||||
new GatewayLockError(
|
||||
`failed to connect to existing lock (${lockPath}): ${connErr.message}`,
|
||||
),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
server.listen(lockPath, () => resolve(server));
|
||||
});
|
||||
|
||||
const server = await attemptListen();
|
||||
|
||||
let released = false;
|
||||
const release = async (): Promise<void> => {
|
||||
if (released) return;
|
||||
released = true;
|
||||
await new Promise<void>((resolve) => server.close(() => resolve()));
|
||||
try {
|
||||
fs.rmSync(lockPath, { force: true });
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
};
|
||||
|
||||
const cleanupSignals: NodeJS.Signals[] = ["SIGINT", "SIGTERM", "SIGHUP"];
|
||||
const handleSignal = async () => {
|
||||
await release();
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
for (const sig of cleanupSignals) {
|
||||
process.once(sig, () => {
|
||||
void handleSignal();
|
||||
});
|
||||
}
|
||||
process.once("exit", () => {
|
||||
// Exit handler must be sync-safe; release is async but close+rm are fast.
|
||||
void release();
|
||||
});
|
||||
|
||||
return release;
|
||||
}
|
||||
Reference in New Issue
Block a user