fix(gateway): handle SIGTERM shutdown cleanly
This commit is contained in:
@@ -24,6 +24,7 @@ pnpm clawdis gateway --force
|
|||||||
- Logs to stdout; use launchd/systemd to keep it alive and rotate logs.
|
- Logs to stdout; use launchd/systemd to keep it alive and rotate logs.
|
||||||
- Pass `--verbose` to mirror debug logging (handshakes, req/res, events) from the log file into stdio when troubleshooting.
|
- Pass `--verbose` to mirror debug logging (handshakes, req/res, events) from the log file into stdio when troubleshooting.
|
||||||
- `--force` uses `lsof` to find listeners on the chosen port, sends SIGTERM, logs what it killed, then starts the gateway (fails fast if `lsof` is missing).
|
- `--force` uses `lsof` to find listeners on the chosen port, sends SIGTERM, logs what it killed, then starts the gateway (fails fast if `lsof` is missing).
|
||||||
|
- If you run under a supervisor (launchd/systemd/mac app child-process mode), a stop/restart typically sends **SIGTERM**; older builds may surface this as `pnpm` `ELIFECYCLE` exit code **143** (SIGTERM), which is a normal shutdown, not a crash.
|
||||||
- Optional shared secret: pass `--token <value>` or set `CLAWDIS_GATEWAY_TOKEN` to require clients to send `hello.auth.token`.
|
- Optional shared secret: pass `--token <value>` or set `CLAWDIS_GATEWAY_TOKEN` to require clients to send `hello.auth.token`.
|
||||||
|
|
||||||
## Remote access
|
## Remote access
|
||||||
|
|||||||
@@ -269,8 +269,53 @@ Examples:
|
|||||||
if (opts.token) {
|
if (opts.token) {
|
||||||
process.env.CLAWDIS_GATEWAY_TOKEN = String(opts.token);
|
process.env.CLAWDIS_GATEWAY_TOKEN = String(opts.token);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let server: Awaited<ReturnType<typeof startGatewayServer>> | null = null;
|
||||||
|
let shuttingDown = false;
|
||||||
|
let forceExitTimer: ReturnType<typeof setTimeout> | null = null;
|
||||||
|
|
||||||
|
const onSigterm = () => shutdown("SIGTERM");
|
||||||
|
const onSigint = () => shutdown("SIGINT");
|
||||||
|
|
||||||
|
const shutdown = (signal: string) => {
|
||||||
|
// Ensure we don't leak listeners across restarts/tests.
|
||||||
|
process.removeListener("SIGTERM", onSigterm);
|
||||||
|
process.removeListener("SIGINT", onSigint);
|
||||||
|
|
||||||
|
if (shuttingDown) {
|
||||||
|
defaultRuntime.log(
|
||||||
|
info(`gateway: received ${signal} during shutdown; exiting now`),
|
||||||
|
);
|
||||||
|
defaultRuntime.exit(0);
|
||||||
|
}
|
||||||
|
shuttingDown = true;
|
||||||
|
defaultRuntime.log(info(`gateway: received ${signal}; shutting down`));
|
||||||
|
|
||||||
|
// Avoid hanging forever if a provider task ignores abort.
|
||||||
|
forceExitTimer = setTimeout(() => {
|
||||||
|
defaultRuntime.error(
|
||||||
|
"gateway: shutdown timed out; exiting without full cleanup",
|
||||||
|
);
|
||||||
|
defaultRuntime.exit(0);
|
||||||
|
}, 5000);
|
||||||
|
|
||||||
|
void (async () => {
|
||||||
|
try {
|
||||||
|
await server?.close();
|
||||||
|
} catch (err) {
|
||||||
|
defaultRuntime.error(`gateway: shutdown error: ${String(err)}`);
|
||||||
|
} finally {
|
||||||
|
if (forceExitTimer) clearTimeout(forceExitTimer);
|
||||||
|
defaultRuntime.exit(0);
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
};
|
||||||
|
|
||||||
|
process.once("SIGTERM", onSigterm);
|
||||||
|
process.once("SIGINT", onSigint);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await startGatewayServer(port, { webchatPort });
|
server = await startGatewayServer(port, { webchatPort });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof GatewayLockError) {
|
if (err instanceof GatewayLockError) {
|
||||||
defaultRuntime.error(`Gateway failed to start: ${err.message}`);
|
defaultRuntime.error(`Gateway failed to start: ${err.message}`);
|
||||||
|
|||||||
@@ -157,7 +157,10 @@ describe("gateway server", () => {
|
|||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
await onceMessage(ws, (o) => o.type === "res" && o.id === "agent-last-stale");
|
await onceMessage(
|
||||||
|
ws,
|
||||||
|
(o) => o.type === "res" && o.id === "agent-last-stale",
|
||||||
|
);
|
||||||
|
|
||||||
const spy = vi.mocked(agentCommand);
|
const spy = vi.mocked(agentCommand);
|
||||||
expect(spy).toHaveBeenCalled();
|
expect(spy).toHaveBeenCalled();
|
||||||
|
|||||||
@@ -34,9 +34,9 @@ import { monitorWebProvider, webAuthExists } from "../providers/web/index.js";
|
|||||||
import { defaultRuntime } from "../runtime.js";
|
import { defaultRuntime } from "../runtime.js";
|
||||||
import { monitorTelegramProvider } from "../telegram/monitor.js";
|
import { monitorTelegramProvider } from "../telegram/monitor.js";
|
||||||
import { sendMessageTelegram } from "../telegram/send.js";
|
import { sendMessageTelegram } from "../telegram/send.js";
|
||||||
|
import { normalizeE164 } from "../utils.js";
|
||||||
import { sendMessageWhatsApp } from "../web/outbound.js";
|
import { sendMessageWhatsApp } from "../web/outbound.js";
|
||||||
import { ensureWebChatServerFromConfig } from "../webchat/server.js";
|
import { ensureWebChatServerFromConfig } from "../webchat/server.js";
|
||||||
import { normalizeE164 } from "../utils.js";
|
|
||||||
import { buildMessageWithAttachments } from "./chat-attachments.js";
|
import { buildMessageWithAttachments } from "./chat-attachments.js";
|
||||||
import {
|
import {
|
||||||
ErrorCodes,
|
ErrorCodes,
|
||||||
|
|||||||
@@ -244,9 +244,16 @@ describe("partial reply gating", () => {
|
|||||||
replyResolver,
|
replyResolver,
|
||||||
);
|
);
|
||||||
|
|
||||||
const stored = JSON.parse(await fs.readFile(store.storePath, "utf8")) as {
|
let stored: { main?: { lastChannel?: string; lastTo?: string } } | null =
|
||||||
main?: { lastChannel?: string; lastTo?: string };
|
null;
|
||||||
};
|
for (let attempt = 0; attempt < 50; attempt += 1) {
|
||||||
|
stored = JSON.parse(await fs.readFile(store.storePath, "utf8")) as {
|
||||||
|
main?: { lastChannel?: string; lastTo?: string };
|
||||||
|
};
|
||||||
|
if (stored.main?.lastChannel && stored.main?.lastTo) break;
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 5));
|
||||||
|
}
|
||||||
|
if (!stored) throw new Error("store not loaded");
|
||||||
expect(stored.main?.lastChannel).toBe("whatsapp");
|
expect(stored.main?.lastChannel).toBe("whatsapp");
|
||||||
expect(stored.main?.lastTo).toBe("+1000");
|
expect(stored.main?.lastTo).toBe("+1000");
|
||||||
|
|
||||||
|
|||||||
@@ -752,6 +752,7 @@ export async function monitorWebProvider(
|
|||||||
// Batch inbound messages per conversation while command queue is busy.
|
// Batch inbound messages per conversation while command queue is busy.
|
||||||
type PendingBatch = { messages: WebInboundMsg[]; timer?: NodeJS.Timeout };
|
type PendingBatch = { messages: WebInboundMsg[]; timer?: NodeJS.Timeout };
|
||||||
const pendingBatches = new Map<string, PendingBatch>();
|
const pendingBatches = new Map<string, PendingBatch>();
|
||||||
|
const backgroundTasks = new Set<Promise<unknown>>();
|
||||||
|
|
||||||
const buildLine = (msg: WebInboundMsg) => {
|
const buildLine = (msg: WebInboundMsg) => {
|
||||||
// Build message prefix: explicit config > default based on allowFrom
|
// Build message prefix: explicit config > default based on allowFrom
|
||||||
@@ -863,7 +864,7 @@ export async function monitorWebProvider(
|
|||||||
return normalizeE164(latest.from);
|
return normalizeE164(latest.from);
|
||||||
})();
|
})();
|
||||||
if (to) {
|
if (to) {
|
||||||
void updateLastRoute({
|
const task = updateLastRoute({
|
||||||
storePath,
|
storePath,
|
||||||
sessionKey: mainKey,
|
sessionKey: mainKey,
|
||||||
channel: "whatsapp",
|
channel: "whatsapp",
|
||||||
@@ -874,6 +875,10 @@ export async function monitorWebProvider(
|
|||||||
"failed updating last route",
|
"failed updating last route",
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
backgroundTasks.add(task);
|
||||||
|
task.finally(() => {
|
||||||
|
backgroundTasks.delete(task);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1053,6 +1058,10 @@ export async function monitorWebProvider(
|
|||||||
if (heartbeat) clearInterval(heartbeat);
|
if (heartbeat) clearInterval(heartbeat);
|
||||||
if (replyHeartbeatTimer) clearInterval(replyHeartbeatTimer);
|
if (replyHeartbeatTimer) clearInterval(replyHeartbeatTimer);
|
||||||
if (watchdogTimer) clearInterval(watchdogTimer);
|
if (watchdogTimer) clearInterval(watchdogTimer);
|
||||||
|
if (backgroundTasks.size > 0) {
|
||||||
|
await Promise.allSettled(backgroundTasks);
|
||||||
|
backgroundTasks.clear();
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
await listener.close();
|
await listener.close();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
|||||||
Reference in New Issue
Block a user