diff --git a/docs/gateway.md b/docs/gateway.md index add12c3e2..01424c2dd 100644 --- a/docs/gateway.md +++ b/docs/gateway.md @@ -24,6 +24,7 @@ pnpm clawdis gateway --force - Logs to stdout; use launchd/systemd to keep it alive and rotate logs. - Pass `--verbose` to mirror debug logging (handshakes, req/res, events) from the log file into stdio when troubleshooting. - `--force` uses `lsof` to find listeners on the chosen port, sends SIGTERM, logs what it killed, then starts the gateway (fails fast if `lsof` is missing). +- If you run under a supervisor (launchd/systemd/mac app child-process mode), a stop/restart typically sends **SIGTERM**; older builds may surface this as `pnpm` `ELIFECYCLE` exit code **143** (SIGTERM), which is a normal shutdown, not a crash. - Optional shared secret: pass `--token ` or set `CLAWDIS_GATEWAY_TOKEN` to require clients to send `hello.auth.token`. ## Remote access diff --git a/src/cli/program.ts b/src/cli/program.ts index 2e4e140ab..481303e8c 100644 --- a/src/cli/program.ts +++ b/src/cli/program.ts @@ -269,8 +269,53 @@ Examples: if (opts.token) { process.env.CLAWDIS_GATEWAY_TOKEN = String(opts.token); } + + let server: Awaited> | null = null; + let shuttingDown = false; + let forceExitTimer: ReturnType | null = null; + + const onSigterm = () => shutdown("SIGTERM"); + const onSigint = () => shutdown("SIGINT"); + + const shutdown = (signal: string) => { + // Ensure we don't leak listeners across restarts/tests. + process.removeListener("SIGTERM", onSigterm); + process.removeListener("SIGINT", onSigint); + + if (shuttingDown) { + defaultRuntime.log( + info(`gateway: received ${signal} during shutdown; exiting now`), + ); + defaultRuntime.exit(0); + } + shuttingDown = true; + defaultRuntime.log(info(`gateway: received ${signal}; shutting down`)); + + // Avoid hanging forever if a provider task ignores abort. + forceExitTimer = setTimeout(() => { + defaultRuntime.error( + "gateway: shutdown timed out; exiting without full cleanup", + ); + defaultRuntime.exit(0); + }, 5000); + + void (async () => { + try { + await server?.close(); + } catch (err) { + defaultRuntime.error(`gateway: shutdown error: ${String(err)}`); + } finally { + if (forceExitTimer) clearTimeout(forceExitTimer); + defaultRuntime.exit(0); + } + })(); + }; + + process.once("SIGTERM", onSigterm); + process.once("SIGINT", onSigint); + try { - await startGatewayServer(port, { webchatPort }); + server = await startGatewayServer(port, { webchatPort }); } catch (err) { if (err instanceof GatewayLockError) { defaultRuntime.error(`Gateway failed to start: ${err.message}`); diff --git a/src/gateway/server.test.ts b/src/gateway/server.test.ts index 0dbd41a9f..bbfa07169 100644 --- a/src/gateway/server.test.ts +++ b/src/gateway/server.test.ts @@ -157,7 +157,10 @@ describe("gateway server", () => { }, }), ); - await onceMessage(ws, (o) => o.type === "res" && o.id === "agent-last-stale"); + await onceMessage( + ws, + (o) => o.type === "res" && o.id === "agent-last-stale", + ); const spy = vi.mocked(agentCommand); expect(spy).toHaveBeenCalled(); diff --git a/src/gateway/server.ts b/src/gateway/server.ts index 06d5ec295..b8d4867a3 100644 --- a/src/gateway/server.ts +++ b/src/gateway/server.ts @@ -34,9 +34,9 @@ import { monitorWebProvider, webAuthExists } from "../providers/web/index.js"; import { defaultRuntime } from "../runtime.js"; import { monitorTelegramProvider } from "../telegram/monitor.js"; import { sendMessageTelegram } from "../telegram/send.js"; +import { normalizeE164 } from "../utils.js"; import { sendMessageWhatsApp } from "../web/outbound.js"; import { ensureWebChatServerFromConfig } from "../webchat/server.js"; -import { normalizeE164 } from "../utils.js"; import { buildMessageWithAttachments } from "./chat-attachments.js"; import { ErrorCodes, diff --git a/src/web/auto-reply.test.ts b/src/web/auto-reply.test.ts index 0a503cb99..9b6285dad 100644 --- a/src/web/auto-reply.test.ts +++ b/src/web/auto-reply.test.ts @@ -244,9 +244,16 @@ describe("partial reply gating", () => { replyResolver, ); - const stored = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { - main?: { lastChannel?: string; lastTo?: string }; - }; + let stored: { main?: { lastChannel?: string; lastTo?: string } } | null = + null; + for (let attempt = 0; attempt < 50; attempt += 1) { + stored = JSON.parse(await fs.readFile(store.storePath, "utf8")) as { + main?: { lastChannel?: string; lastTo?: string }; + }; + if (stored.main?.lastChannel && stored.main?.lastTo) break; + await new Promise((resolve) => setTimeout(resolve, 5)); + } + if (!stored) throw new Error("store not loaded"); expect(stored.main?.lastChannel).toBe("whatsapp"); expect(stored.main?.lastTo).toBe("+1000"); diff --git a/src/web/auto-reply.ts b/src/web/auto-reply.ts index e762f4b52..4d34a0db8 100644 --- a/src/web/auto-reply.ts +++ b/src/web/auto-reply.ts @@ -752,6 +752,7 @@ export async function monitorWebProvider( // Batch inbound messages per conversation while command queue is busy. type PendingBatch = { messages: WebInboundMsg[]; timer?: NodeJS.Timeout }; const pendingBatches = new Map(); + const backgroundTasks = new Set>(); const buildLine = (msg: WebInboundMsg) => { // Build message prefix: explicit config > default based on allowFrom @@ -863,7 +864,7 @@ export async function monitorWebProvider( return normalizeE164(latest.from); })(); if (to) { - void updateLastRoute({ + const task = updateLastRoute({ storePath, sessionKey: mainKey, channel: "whatsapp", @@ -874,6 +875,10 @@ export async function monitorWebProvider( "failed updating last route", ); }); + backgroundTasks.add(task); + task.finally(() => { + backgroundTasks.delete(task); + }); } } @@ -1053,6 +1058,10 @@ export async function monitorWebProvider( if (heartbeat) clearInterval(heartbeat); if (replyHeartbeatTimer) clearInterval(replyHeartbeatTimer); if (watchdogTimer) clearInterval(watchdogTimer); + if (backgroundTasks.size > 0) { + await Promise.allSettled(backgroundTasks); + backgroundTasks.clear(); + } try { await listener.close(); } catch (err) {