fix(gateway): handle SIGTERM shutdown cleanly
This commit is contained in:
@@ -24,6 +24,7 @@ pnpm clawdis gateway --force
|
||||
- Logs to stdout; use launchd/systemd to keep it alive and rotate logs.
|
||||
- Pass `--verbose` to mirror debug logging (handshakes, req/res, events) from the log file into stdio when troubleshooting.
|
||||
- `--force` uses `lsof` to find listeners on the chosen port, sends SIGTERM, logs what it killed, then starts the gateway (fails fast if `lsof` is missing).
|
||||
- If you run under a supervisor (launchd/systemd/mac app child-process mode), a stop/restart typically sends **SIGTERM**; older builds may surface this as `pnpm` `ELIFECYCLE` exit code **143** (SIGTERM), which is a normal shutdown, not a crash.
|
||||
- Optional shared secret: pass `--token <value>` or set `CLAWDIS_GATEWAY_TOKEN` to require clients to send `hello.auth.token`.
|
||||
|
||||
## Remote access
|
||||
|
||||
@@ -269,8 +269,53 @@ Examples:
|
||||
if (opts.token) {
|
||||
process.env.CLAWDIS_GATEWAY_TOKEN = String(opts.token);
|
||||
}
|
||||
|
||||
let server: Awaited<ReturnType<typeof startGatewayServer>> | null = null;
|
||||
let shuttingDown = false;
|
||||
let forceExitTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
const onSigterm = () => shutdown("SIGTERM");
|
||||
const onSigint = () => shutdown("SIGINT");
|
||||
|
||||
const shutdown = (signal: string) => {
|
||||
// Ensure we don't leak listeners across restarts/tests.
|
||||
process.removeListener("SIGTERM", onSigterm);
|
||||
process.removeListener("SIGINT", onSigint);
|
||||
|
||||
if (shuttingDown) {
|
||||
defaultRuntime.log(
|
||||
info(`gateway: received ${signal} during shutdown; exiting now`),
|
||||
);
|
||||
defaultRuntime.exit(0);
|
||||
}
|
||||
shuttingDown = true;
|
||||
defaultRuntime.log(info(`gateway: received ${signal}; shutting down`));
|
||||
|
||||
// Avoid hanging forever if a provider task ignores abort.
|
||||
forceExitTimer = setTimeout(() => {
|
||||
defaultRuntime.error(
|
||||
"gateway: shutdown timed out; exiting without full cleanup",
|
||||
);
|
||||
defaultRuntime.exit(0);
|
||||
}, 5000);
|
||||
|
||||
void (async () => {
|
||||
try {
|
||||
await server?.close();
|
||||
} catch (err) {
|
||||
defaultRuntime.error(`gateway: shutdown error: ${String(err)}`);
|
||||
} finally {
|
||||
if (forceExitTimer) clearTimeout(forceExitTimer);
|
||||
defaultRuntime.exit(0);
|
||||
}
|
||||
})();
|
||||
};
|
||||
|
||||
process.once("SIGTERM", onSigterm);
|
||||
process.once("SIGINT", onSigint);
|
||||
|
||||
try {
|
||||
await startGatewayServer(port, { webchatPort });
|
||||
server = await startGatewayServer(port, { webchatPort });
|
||||
} catch (err) {
|
||||
if (err instanceof GatewayLockError) {
|
||||
defaultRuntime.error(`Gateway failed to start: ${err.message}`);
|
||||
|
||||
@@ -157,7 +157,10 @@ describe("gateway server", () => {
|
||||
},
|
||||
}),
|
||||
);
|
||||
await onceMessage(ws, (o) => o.type === "res" && o.id === "agent-last-stale");
|
||||
await onceMessage(
|
||||
ws,
|
||||
(o) => o.type === "res" && o.id === "agent-last-stale",
|
||||
);
|
||||
|
||||
const spy = vi.mocked(agentCommand);
|
||||
expect(spy).toHaveBeenCalled();
|
||||
|
||||
@@ -34,9 +34,9 @@ import { monitorWebProvider, webAuthExists } from "../providers/web/index.js";
|
||||
import { defaultRuntime } from "../runtime.js";
|
||||
import { monitorTelegramProvider } from "../telegram/monitor.js";
|
||||
import { sendMessageTelegram } from "../telegram/send.js";
|
||||
import { normalizeE164 } from "../utils.js";
|
||||
import { sendMessageWhatsApp } from "../web/outbound.js";
|
||||
import { ensureWebChatServerFromConfig } from "../webchat/server.js";
|
||||
import { normalizeE164 } from "../utils.js";
|
||||
import { buildMessageWithAttachments } from "./chat-attachments.js";
|
||||
import {
|
||||
ErrorCodes,
|
||||
|
||||
@@ -244,9 +244,16 @@ describe("partial reply gating", () => {
|
||||
replyResolver,
|
||||
);
|
||||
|
||||
const stored = JSON.parse(await fs.readFile(store.storePath, "utf8")) as {
|
||||
main?: { lastChannel?: string; lastTo?: string };
|
||||
};
|
||||
let stored: { main?: { lastChannel?: string; lastTo?: string } } | null =
|
||||
null;
|
||||
for (let attempt = 0; attempt < 50; attempt += 1) {
|
||||
stored = JSON.parse(await fs.readFile(store.storePath, "utf8")) as {
|
||||
main?: { lastChannel?: string; lastTo?: string };
|
||||
};
|
||||
if (stored.main?.lastChannel && stored.main?.lastTo) break;
|
||||
await new Promise((resolve) => setTimeout(resolve, 5));
|
||||
}
|
||||
if (!stored) throw new Error("store not loaded");
|
||||
expect(stored.main?.lastChannel).toBe("whatsapp");
|
||||
expect(stored.main?.lastTo).toBe("+1000");
|
||||
|
||||
|
||||
@@ -752,6 +752,7 @@ export async function monitorWebProvider(
|
||||
// Batch inbound messages per conversation while command queue is busy.
|
||||
type PendingBatch = { messages: WebInboundMsg[]; timer?: NodeJS.Timeout };
|
||||
const pendingBatches = new Map<string, PendingBatch>();
|
||||
const backgroundTasks = new Set<Promise<unknown>>();
|
||||
|
||||
const buildLine = (msg: WebInboundMsg) => {
|
||||
// Build message prefix: explicit config > default based on allowFrom
|
||||
@@ -863,7 +864,7 @@ export async function monitorWebProvider(
|
||||
return normalizeE164(latest.from);
|
||||
})();
|
||||
if (to) {
|
||||
void updateLastRoute({
|
||||
const task = updateLastRoute({
|
||||
storePath,
|
||||
sessionKey: mainKey,
|
||||
channel: "whatsapp",
|
||||
@@ -874,6 +875,10 @@ export async function monitorWebProvider(
|
||||
"failed updating last route",
|
||||
);
|
||||
});
|
||||
backgroundTasks.add(task);
|
||||
task.finally(() => {
|
||||
backgroundTasks.delete(task);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1053,6 +1058,10 @@ export async function monitorWebProvider(
|
||||
if (heartbeat) clearInterval(heartbeat);
|
||||
if (replyHeartbeatTimer) clearInterval(replyHeartbeatTimer);
|
||||
if (watchdogTimer) clearInterval(watchdogTimer);
|
||||
if (backgroundTasks.size > 0) {
|
||||
await Promise.allSettled(backgroundTasks);
|
||||
backgroundTasks.clear();
|
||||
}
|
||||
try {
|
||||
await listener.close();
|
||||
} catch (err) {
|
||||
|
||||
Reference in New Issue
Block a user