From 42eb7640f984053809c12c47dd6acb655b4b4faa Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 25 Dec 2025 18:05:37 +0000 Subject: [PATCH] feat: add gateway restart tool --- docs/gateway.md | 1 + src/agents/clawdis-gateway-tool.test.ts | 36 ++++ src/agents/clawdis-tools.ts | 51 ++++++ src/cli/gateway-cli.ts | 232 ++++++++++++------------ src/gateway/server.ts | 19 +- src/macos/gateway-daemon.ts | 62 +++++-- 6 files changed, 266 insertions(+), 135 deletions(-) create mode 100644 src/agents/clawdis-gateway-tool.test.ts diff --git a/docs/gateway.md b/docs/gateway.md index bf9539605..86aa497f4 100644 --- a/docs/gateway.md +++ b/docs/gateway.md @@ -26,6 +26,7 @@ pnpm clawdis gateway --force - Pass `--verbose` to mirror debug logging (handshakes, req/res, events) from the log file into stdio when troubleshooting. - `--force` uses `lsof` to find listeners on the chosen port, sends SIGTERM, logs what it killed, then starts the gateway (fails fast if `lsof` is missing). - If you run under a supervisor (launchd/systemd/mac app child-process mode), a stop/restart typically sends **SIGTERM**; older builds may surface this as `pnpm` `ELIFECYCLE` exit code **143** (SIGTERM), which is a normal shutdown, not a crash. +- **SIGUSR1** triggers an in-process restart (no external supervisor required). This is what the `clawdis_gateway` agent tool uses. - Optional shared secret: pass `--token ` or set `CLAWDIS_GATEWAY_TOKEN` to require clients to send `connect.params.auth.token`. ## Remote access diff --git a/src/agents/clawdis-gateway-tool.test.ts b/src/agents/clawdis-gateway-tool.test.ts new file mode 100644 index 000000000..b61c0f1f9 --- /dev/null +++ b/src/agents/clawdis-gateway-tool.test.ts @@ -0,0 +1,36 @@ +import { describe, expect, it, vi } from "vitest"; + +import { createClawdisTools } from "./clawdis-tools.js"; + +describe("clawdis_gateway tool", () => { + it("schedules SIGUSR1 restart", async () => { + vi.useFakeTimers(); + const kill = vi.spyOn(process, "kill").mockImplementation(() => true); + + try { + const tool = createClawdisTools().find( + (candidate) => candidate.name === "clawdis_gateway", + ); + expect(tool).toBeDefined(); + if (!tool) throw new Error("missing clawdis_gateway tool"); + + const result = await tool.execute("call1", { + action: "restart", + delayMs: 0, + }); + expect(result.details).toMatchObject({ + ok: true, + pid: process.pid, + signal: "SIGUSR1", + delayMs: 0, + }); + + expect(kill).not.toHaveBeenCalled(); + await vi.runAllTimersAsync(); + expect(kill).toHaveBeenCalledWith(process.pid, "SIGUSR1"); + } finally { + kill.mockRestore(); + vi.useRealTimers(); + } + }); +}); diff --git a/src/agents/clawdis-tools.ts b/src/agents/clawdis-tools.ts index 57aa1961b..941bc89d8 100644 --- a/src/agents/clawdis-tools.ts +++ b/src/agents/clawdis-tools.ts @@ -1370,11 +1370,62 @@ function createCronTool(): AnyAgentTool { }; } +const GatewayToolSchema = Type.Union([ + Type.Object({ + action: Type.Literal("restart"), + delayMs: Type.Optional(Type.Number()), + reason: Type.Optional(Type.String()), + }), +]); + +function createGatewayTool(): AnyAgentTool { + return { + label: "Clawdis Gateway", + name: "clawdis_gateway", + description: + "Restart the running gateway process in-place (SIGUSR1) without needing an external supervisor. Use delayMs to avoid interrupting an in-flight reply.", + parameters: GatewayToolSchema, + execute: async (_toolCallId, args) => { + const params = args as Record; + const action = readStringParam(params, "action", { required: true }); + if (action !== "restart") throw new Error(`Unknown action: ${action}`); + + const delayMsRaw = + typeof params.delayMs === "number" && Number.isFinite(params.delayMs) + ? Math.floor(params.delayMs) + : 2000; + const delayMs = Math.min(Math.max(delayMsRaw, 0), 60_000); + const reason = + typeof params.reason === "string" && params.reason.trim() + ? params.reason.trim().slice(0, 200) + : undefined; + + const pid = process.pid; + setTimeout(() => { + try { + process.kill(pid, "SIGUSR1"); + } catch { + /* ignore */ + } + }, delayMs); + + return jsonResult({ + ok: true, + pid, + signal: "SIGUSR1", + delayMs, + reason: reason ?? null, + }); + }, + }; +} + export function createClawdisTools(): AnyAgentTool[] { return [ createBrowserTool(), createCanvasTool(), createNodesTool(), createCronTool(), + createGatewayTool(), ]; } diff --git a/src/cli/gateway-cli.ts b/src/cli/gateway-cli.ts index a052aca14..e48d62cd8 100644 --- a/src/cli/gateway-cli.ts +++ b/src/cli/gateway-cli.ts @@ -25,6 +25,84 @@ type GatewayRpcOpts = { const gatewayLog = createSubsystemLogger("gateway"); +type GatewayRunSignalAction = "stop" | "restart"; + +async function runGatewayLoop(params: { + start: () => Promise>>; + runtime: typeof defaultRuntime; +}) { + let server: Awaited> | null = null; + let shuttingDown = false; + let restartResolver: (() => void) | null = null; + + const cleanupSignals = () => { + process.removeListener("SIGTERM", onSigterm); + process.removeListener("SIGINT", onSigint); + process.removeListener("SIGUSR1", onSigusr1); + }; + + const request = (action: GatewayRunSignalAction, signal: string) => { + if (shuttingDown) { + gatewayLog.info(`received ${signal} during shutdown; ignoring`); + return; + } + shuttingDown = true; + const isRestart = action === "restart"; + gatewayLog.info( + `received ${signal}; ${isRestart ? "restarting" : "shutting down"}`, + ); + + const forceExitTimer = setTimeout(() => { + gatewayLog.error("shutdown timed out; exiting without full cleanup"); + cleanupSignals(); + params.runtime.exit(0); + }, 5000); + + void (async () => { + try { + await server?.close({ + reason: isRestart ? "gateway restarting" : "gateway stopping", + restartExpectedMs: isRestart ? 1500 : null, + }); + } catch (err) { + gatewayLog.error(`shutdown error: ${String(err)}`); + } finally { + clearTimeout(forceExitTimer); + server = null; + if (isRestart) { + shuttingDown = false; + restartResolver?.(); + } else { + cleanupSignals(); + params.runtime.exit(0); + } + } + })(); + }; + + const onSigterm = () => request("stop", "SIGTERM"); + const onSigint = () => request("stop", "SIGINT"); + const onSigusr1 = () => request("restart", "SIGUSR1"); + + process.on("SIGTERM", onSigterm); + process.on("SIGINT", onSigint); + process.on("SIGUSR1", onSigusr1); + + try { + // Keep process alive; SIGUSR1 triggers an in-process restart (no supervisor required). + // SIGTERM/SIGINT still exit after a graceful shutdown. + // eslint-disable-next-line no-constant-condition + while (true) { + server = await params.start(); + await new Promise((resolve) => { + restartResolver = resolve; + }); + } + } finally { + cleanupSignals(); + } +} + const gatewayCallOpts = (cmd: Command) => cmd .option("--url ", "Gateway WebSocket URL", "ws://127.0.0.1:18789") @@ -155,61 +233,27 @@ export function registerGatewayCli(program: Command) { return; } - let server: Awaited> | null = null; - let shuttingDown = false; - let forceExitTimer: ReturnType | null = null; - - const onSigterm = () => shutdown("SIGTERM"); - const onSigint = () => shutdown("SIGINT"); - - const shutdown = (signal: string) => { - process.removeListener("SIGTERM", onSigterm); - process.removeListener("SIGINT", onSigint); - - if (shuttingDown) { - gatewayLog.info(`received ${signal} during shutdown; exiting now`); - defaultRuntime.exit(0); - } - shuttingDown = true; - gatewayLog.info(`received ${signal}; shutting down`); - - forceExitTimer = setTimeout(() => { - gatewayLog.error("shutdown timed out; exiting without full cleanup"); - defaultRuntime.exit(0); - }, 5000); - - void (async () => { - try { - await server?.close(); - } catch (err) { - gatewayLog.error(`shutdown error: ${String(err)}`); - } finally { - if (forceExitTimer) clearTimeout(forceExitTimer); - defaultRuntime.exit(0); - } - })(); - }; - - process.once("SIGTERM", onSigterm); - process.once("SIGINT", onSigint); - try { - server = await startGatewayServer(port, { - bind, - auth: - authMode || opts.password || authModeRaw - ? { - mode: authMode ?? undefined, - password: opts.password ? String(opts.password) : undefined, - } - : undefined, - tailscale: - tailscaleMode || opts.tailscaleResetOnExit - ? { - mode: tailscaleMode ?? undefined, - resetOnExit: Boolean(opts.tailscaleResetOnExit), - } - : undefined, + await runGatewayLoop({ + runtime: defaultRuntime, + start: async () => + await startGatewayServer(port, { + bind, + auth: + authMode || opts.password || authModeRaw + ? { + mode: authMode ?? undefined, + password: opts.password ? String(opts.password) : undefined, + } + : undefined, + tailscale: + tailscaleMode || opts.tailscaleResetOnExit + ? { + mode: tailscaleMode ?? undefined, + resetOnExit: Boolean(opts.tailscaleResetOnExit), + } + : undefined, + }), }); } catch (err) { if (err instanceof GatewayLockError) { @@ -220,8 +264,6 @@ export function registerGatewayCli(program: Command) { defaultRuntime.error(`Gateway failed to start: ${String(err)}`); defaultRuntime.exit(1); } - - await new Promise(() => {}); }); const gateway = program @@ -385,63 +427,27 @@ export function registerGatewayCli(program: Command) { return; } - let server: Awaited> | null = null; - let shuttingDown = false; - let forceExitTimer: ReturnType | null = null; - - const onSigterm = () => shutdown("SIGTERM"); - const onSigint = () => shutdown("SIGINT"); - - const shutdown = (signal: string) => { - // Ensure we don't leak listeners across restarts/tests. - process.removeListener("SIGTERM", onSigterm); - process.removeListener("SIGINT", onSigint); - - if (shuttingDown) { - gatewayLog.info(`received ${signal} during shutdown; exiting now`); - defaultRuntime.exit(0); - } - shuttingDown = true; - gatewayLog.info(`received ${signal}; shutting down`); - - // Avoid hanging forever if a provider task ignores abort. - forceExitTimer = setTimeout(() => { - gatewayLog.error("shutdown timed out; exiting without full cleanup"); - defaultRuntime.exit(0); - }, 5000); - - void (async () => { - try { - await server?.close(); - } catch (err) { - gatewayLog.error(`shutdown error: ${String(err)}`); - } finally { - if (forceExitTimer) clearTimeout(forceExitTimer); - defaultRuntime.exit(0); - } - })(); - }; - - process.once("SIGTERM", onSigterm); - process.once("SIGINT", onSigint); - try { - server = await startGatewayServer(port, { - bind, - auth: - authMode || opts.password || authModeRaw - ? { - mode: authMode ?? undefined, - password: opts.password ? String(opts.password) : undefined, - } - : undefined, - tailscale: - tailscaleMode || opts.tailscaleResetOnExit - ? { - mode: tailscaleMode ?? undefined, - resetOnExit: Boolean(opts.tailscaleResetOnExit), - } - : undefined, + await runGatewayLoop({ + runtime: defaultRuntime, + start: async () => + await startGatewayServer(port, { + bind, + auth: + authMode || opts.password || authModeRaw + ? { + mode: authMode ?? undefined, + password: opts.password ? String(opts.password) : undefined, + } + : undefined, + tailscale: + tailscaleMode || opts.tailscaleResetOnExit + ? { + mode: tailscaleMode ?? undefined, + resetOnExit: Boolean(opts.tailscaleResetOnExit), + } + : undefined, + }), }); } catch (err) { if (err instanceof GatewayLockError) { @@ -452,8 +458,6 @@ export function registerGatewayCli(program: Command) { defaultRuntime.error(`Gateway failed to start: ${String(err)}`); defaultRuntime.exit(1); } - // Keep process alive - await new Promise(() => {}); }); gatewayCallOpts( diff --git a/src/gateway/server.ts b/src/gateway/server.ts index bb7dfa108..de97ad17c 100644 --- a/src/gateway/server.ts +++ b/src/gateway/server.ts @@ -515,7 +515,10 @@ const EVENTS = [ ]; export type GatewayServer = { - close: () => Promise; + close: (opts?: { + reason?: string; + restartExpectedMs?: number | null; + }) => Promise; }; export type GatewayServerOptions = { @@ -5911,7 +5914,15 @@ export async function startGatewayServer( } return { - close: async () => { + close: async (opts) => { + const reasonRaw = + typeof opts?.reason === "string" ? opts.reason.trim() : ""; + const reason = reasonRaw || "gateway stopping"; + const restartExpectedMs = + typeof opts?.restartExpectedMs === "number" && + Number.isFinite(opts.restartExpectedMs) + ? Math.max(0, Math.floor(opts.restartExpectedMs)) + : null; if (bonjourStop) { try { await bonjourStop(); @@ -5947,8 +5958,8 @@ export async function startGatewayServer( await stopTelegramProvider(); cron.stop(); broadcast("shutdown", { - reason: "gateway stopping", - restartExpectedMs: null, + reason, + restartExpectedMs, }); clearInterval(tickInterval); clearInterval(healthInterval); diff --git a/src/macos/gateway-daemon.ts b/src/macos/gateway-daemon.ts index c3dd3473a..1c046b7b3 100644 --- a/src/macos/gateway-daemon.ts +++ b/src/macos/gateway-daemon.ts @@ -95,54 +95,82 @@ async function main() { let server: Awaited> | null = null; let shuttingDown = false; let forceExitTimer: ReturnType | null = null; + let restartResolver: (() => void) | null = null; - const shutdown = (signal: string) => { + const cleanupSignals = () => { process.removeListener("SIGTERM", onSigterm); process.removeListener("SIGINT", onSigint); + process.removeListener("SIGUSR1", onSigusr1); + }; + const request = (action: "stop" | "restart", signal: string) => { if (shuttingDown) { defaultRuntime.log( - `gateway: received ${signal} during shutdown; exiting now`, + `gateway: received ${signal} during shutdown; ignoring`, ); - process.exit(0); + return; } shuttingDown = true; - defaultRuntime.log(`gateway: received ${signal}; shutting down`); + const isRestart = action === "restart"; + defaultRuntime.log( + `gateway: received ${signal}; ${isRestart ? "restarting" : "shutting down"}`, + ); forceExitTimer = setTimeout(() => { defaultRuntime.error( "gateway: shutdown timed out; exiting without full cleanup", ); + cleanupSignals(); process.exit(0); }, 5000); void (async () => { try { - await server?.close(); + await server?.close({ + reason: isRestart ? "gateway restarting" : "gateway stopping", + restartExpectedMs: isRestart ? 1500 : null, + }); } catch (err) { defaultRuntime.error(`gateway: shutdown error: ${String(err)}`); } finally { if (forceExitTimer) clearTimeout(forceExitTimer); - process.exit(0); + server = null; + if (isRestart) { + shuttingDown = false; + restartResolver?.(); + } else { + cleanupSignals(); + process.exit(0); + } } })(); }; - const onSigterm = () => shutdown("SIGTERM"); - const onSigint = () => shutdown("SIGINT"); + const onSigterm = () => request("stop", "SIGTERM"); + const onSigint = () => request("stop", "SIGINT"); + const onSigusr1 = () => request("restart", "SIGUSR1"); - process.once("SIGTERM", onSigterm); - process.once("SIGINT", onSigint); + process.on("SIGTERM", onSigterm); + process.on("SIGINT", onSigint); + process.on("SIGUSR1", onSigusr1); try { - server = await startGatewayServer(port, { bind }); - } catch (err) { - defaultRuntime.error(`Gateway failed to start: ${String(err)}`); - process.exit(1); + // eslint-disable-next-line no-constant-condition + while (true) { + try { + server = await startGatewayServer(port, { bind }); + } catch (err) { + cleanupSignals(); + defaultRuntime.error(`Gateway failed to start: ${String(err)}`); + process.exit(1); + } + await new Promise((resolve) => { + restartResolver = resolve; + }); + } + } finally { + cleanupSignals(); } - - // Keep process alive - await new Promise(() => {}); } void main();