feat: add gateway restart tool

This commit is contained in:
Peter Steinberger
2025-12-25 18:05:37 +00:00
parent aafcd569b1
commit 42eb7640f9
6 changed files with 266 additions and 135 deletions

View File

@@ -0,0 +1,36 @@
import { describe, expect, it, vi } from "vitest";
import { createClawdisTools } from "./clawdis-tools.js";
describe("clawdis_gateway tool", () => {
it("schedules SIGUSR1 restart", async () => {
vi.useFakeTimers();
const kill = vi.spyOn(process, "kill").mockImplementation(() => true);
try {
const tool = createClawdisTools().find(
(candidate) => candidate.name === "clawdis_gateway",
);
expect(tool).toBeDefined();
if (!tool) throw new Error("missing clawdis_gateway tool");
const result = await tool.execute("call1", {
action: "restart",
delayMs: 0,
});
expect(result.details).toMatchObject({
ok: true,
pid: process.pid,
signal: "SIGUSR1",
delayMs: 0,
});
expect(kill).not.toHaveBeenCalled();
await vi.runAllTimersAsync();
expect(kill).toHaveBeenCalledWith(process.pid, "SIGUSR1");
} finally {
kill.mockRestore();
vi.useRealTimers();
}
});
});

View File

@@ -1370,11 +1370,62 @@ function createCronTool(): AnyAgentTool {
};
}
const GatewayToolSchema = Type.Union([
Type.Object({
action: Type.Literal("restart"),
delayMs: Type.Optional(Type.Number()),
reason: Type.Optional(Type.String()),
}),
]);
function createGatewayTool(): AnyAgentTool {
return {
label: "Clawdis Gateway",
name: "clawdis_gateway",
description:
"Restart the running gateway process in-place (SIGUSR1) without needing an external supervisor. Use delayMs to avoid interrupting an in-flight reply.",
parameters: GatewayToolSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
const action = readStringParam(params, "action", { required: true });
if (action !== "restart") throw new Error(`Unknown action: ${action}`);
const delayMsRaw =
typeof params.delayMs === "number" && Number.isFinite(params.delayMs)
? Math.floor(params.delayMs)
: 2000;
const delayMs = Math.min(Math.max(delayMsRaw, 0), 60_000);
const reason =
typeof params.reason === "string" && params.reason.trim()
? params.reason.trim().slice(0, 200)
: undefined;
const pid = process.pid;
setTimeout(() => {
try {
process.kill(pid, "SIGUSR1");
} catch {
/* ignore */
}
}, delayMs);
return jsonResult({
ok: true,
pid,
signal: "SIGUSR1",
delayMs,
reason: reason ?? null,
});
},
};
}
export function createClawdisTools(): AnyAgentTool[] {
return [
createBrowserTool(),
createCanvasTool(),
createNodesTool(),
createCronTool(),
createGatewayTool(),
];
}

View File

@@ -25,6 +25,84 @@ type GatewayRpcOpts = {
const gatewayLog = createSubsystemLogger("gateway");
type GatewayRunSignalAction = "stop" | "restart";
async function runGatewayLoop(params: {
start: () => Promise<Awaited<ReturnType<typeof startGatewayServer>>>;
runtime: typeof defaultRuntime;
}) {
let server: Awaited<ReturnType<typeof startGatewayServer>> | null = null;
let shuttingDown = false;
let restartResolver: (() => void) | null = null;
const cleanupSignals = () => {
process.removeListener("SIGTERM", onSigterm);
process.removeListener("SIGINT", onSigint);
process.removeListener("SIGUSR1", onSigusr1);
};
const request = (action: GatewayRunSignalAction, signal: string) => {
if (shuttingDown) {
gatewayLog.info(`received ${signal} during shutdown; ignoring`);
return;
}
shuttingDown = true;
const isRestart = action === "restart";
gatewayLog.info(
`received ${signal}; ${isRestart ? "restarting" : "shutting down"}`,
);
const forceExitTimer = setTimeout(() => {
gatewayLog.error("shutdown timed out; exiting without full cleanup");
cleanupSignals();
params.runtime.exit(0);
}, 5000);
void (async () => {
try {
await server?.close({
reason: isRestart ? "gateway restarting" : "gateway stopping",
restartExpectedMs: isRestart ? 1500 : null,
});
} catch (err) {
gatewayLog.error(`shutdown error: ${String(err)}`);
} finally {
clearTimeout(forceExitTimer);
server = null;
if (isRestart) {
shuttingDown = false;
restartResolver?.();
} else {
cleanupSignals();
params.runtime.exit(0);
}
}
})();
};
const onSigterm = () => request("stop", "SIGTERM");
const onSigint = () => request("stop", "SIGINT");
const onSigusr1 = () => request("restart", "SIGUSR1");
process.on("SIGTERM", onSigterm);
process.on("SIGINT", onSigint);
process.on("SIGUSR1", onSigusr1);
try {
// Keep process alive; SIGUSR1 triggers an in-process restart (no supervisor required).
// SIGTERM/SIGINT still exit after a graceful shutdown.
// eslint-disable-next-line no-constant-condition
while (true) {
server = await params.start();
await new Promise<void>((resolve) => {
restartResolver = resolve;
});
}
} finally {
cleanupSignals();
}
}
const gatewayCallOpts = (cmd: Command) =>
cmd
.option("--url <url>", "Gateway WebSocket URL", "ws://127.0.0.1:18789")
@@ -155,61 +233,27 @@ export function registerGatewayCli(program: Command) {
return;
}
let server: Awaited<ReturnType<typeof startGatewayServer>> | null = null;
let shuttingDown = false;
let forceExitTimer: ReturnType<typeof setTimeout> | null = null;
const onSigterm = () => shutdown("SIGTERM");
const onSigint = () => shutdown("SIGINT");
const shutdown = (signal: string) => {
process.removeListener("SIGTERM", onSigterm);
process.removeListener("SIGINT", onSigint);
if (shuttingDown) {
gatewayLog.info(`received ${signal} during shutdown; exiting now`);
defaultRuntime.exit(0);
}
shuttingDown = true;
gatewayLog.info(`received ${signal}; shutting down`);
forceExitTimer = setTimeout(() => {
gatewayLog.error("shutdown timed out; exiting without full cleanup");
defaultRuntime.exit(0);
}, 5000);
void (async () => {
try {
await server?.close();
} catch (err) {
gatewayLog.error(`shutdown error: ${String(err)}`);
} finally {
if (forceExitTimer) clearTimeout(forceExitTimer);
defaultRuntime.exit(0);
}
})();
};
process.once("SIGTERM", onSigterm);
process.once("SIGINT", onSigint);
try {
server = await startGatewayServer(port, {
bind,
auth:
authMode || opts.password || authModeRaw
? {
mode: authMode ?? undefined,
password: opts.password ? String(opts.password) : undefined,
}
: undefined,
tailscale:
tailscaleMode || opts.tailscaleResetOnExit
? {
mode: tailscaleMode ?? undefined,
resetOnExit: Boolean(opts.tailscaleResetOnExit),
}
: undefined,
await runGatewayLoop({
runtime: defaultRuntime,
start: async () =>
await startGatewayServer(port, {
bind,
auth:
authMode || opts.password || authModeRaw
? {
mode: authMode ?? undefined,
password: opts.password ? String(opts.password) : undefined,
}
: undefined,
tailscale:
tailscaleMode || opts.tailscaleResetOnExit
? {
mode: tailscaleMode ?? undefined,
resetOnExit: Boolean(opts.tailscaleResetOnExit),
}
: undefined,
}),
});
} catch (err) {
if (err instanceof GatewayLockError) {
@@ -220,8 +264,6 @@ export function registerGatewayCli(program: Command) {
defaultRuntime.error(`Gateway failed to start: ${String(err)}`);
defaultRuntime.exit(1);
}
await new Promise<never>(() => {});
});
const gateway = program
@@ -385,63 +427,27 @@ export function registerGatewayCli(program: Command) {
return;
}
let server: Awaited<ReturnType<typeof startGatewayServer>> | null = null;
let shuttingDown = false;
let forceExitTimer: ReturnType<typeof setTimeout> | null = null;
const onSigterm = () => shutdown("SIGTERM");
const onSigint = () => shutdown("SIGINT");
const shutdown = (signal: string) => {
// Ensure we don't leak listeners across restarts/tests.
process.removeListener("SIGTERM", onSigterm);
process.removeListener("SIGINT", onSigint);
if (shuttingDown) {
gatewayLog.info(`received ${signal} during shutdown; exiting now`);
defaultRuntime.exit(0);
}
shuttingDown = true;
gatewayLog.info(`received ${signal}; shutting down`);
// Avoid hanging forever if a provider task ignores abort.
forceExitTimer = setTimeout(() => {
gatewayLog.error("shutdown timed out; exiting without full cleanup");
defaultRuntime.exit(0);
}, 5000);
void (async () => {
try {
await server?.close();
} catch (err) {
gatewayLog.error(`shutdown error: ${String(err)}`);
} finally {
if (forceExitTimer) clearTimeout(forceExitTimer);
defaultRuntime.exit(0);
}
})();
};
process.once("SIGTERM", onSigterm);
process.once("SIGINT", onSigint);
try {
server = await startGatewayServer(port, {
bind,
auth:
authMode || opts.password || authModeRaw
? {
mode: authMode ?? undefined,
password: opts.password ? String(opts.password) : undefined,
}
: undefined,
tailscale:
tailscaleMode || opts.tailscaleResetOnExit
? {
mode: tailscaleMode ?? undefined,
resetOnExit: Boolean(opts.tailscaleResetOnExit),
}
: undefined,
await runGatewayLoop({
runtime: defaultRuntime,
start: async () =>
await startGatewayServer(port, {
bind,
auth:
authMode || opts.password || authModeRaw
? {
mode: authMode ?? undefined,
password: opts.password ? String(opts.password) : undefined,
}
: undefined,
tailscale:
tailscaleMode || opts.tailscaleResetOnExit
? {
mode: tailscaleMode ?? undefined,
resetOnExit: Boolean(opts.tailscaleResetOnExit),
}
: undefined,
}),
});
} catch (err) {
if (err instanceof GatewayLockError) {
@@ -452,8 +458,6 @@ export function registerGatewayCli(program: Command) {
defaultRuntime.error(`Gateway failed to start: ${String(err)}`);
defaultRuntime.exit(1);
}
// Keep process alive
await new Promise<never>(() => {});
});
gatewayCallOpts(

View File

@@ -515,7 +515,10 @@ const EVENTS = [
];
export type GatewayServer = {
close: () => Promise<void>;
close: (opts?: {
reason?: string;
restartExpectedMs?: number | null;
}) => Promise<void>;
};
export type GatewayServerOptions = {
@@ -5911,7 +5914,15 @@ export async function startGatewayServer(
}
return {
close: async () => {
close: async (opts) => {
const reasonRaw =
typeof opts?.reason === "string" ? opts.reason.trim() : "";
const reason = reasonRaw || "gateway stopping";
const restartExpectedMs =
typeof opts?.restartExpectedMs === "number" &&
Number.isFinite(opts.restartExpectedMs)
? Math.max(0, Math.floor(opts.restartExpectedMs))
: null;
if (bonjourStop) {
try {
await bonjourStop();
@@ -5947,8 +5958,8 @@ export async function startGatewayServer(
await stopTelegramProvider();
cron.stop();
broadcast("shutdown", {
reason: "gateway stopping",
restartExpectedMs: null,
reason,
restartExpectedMs,
});
clearInterval(tickInterval);
clearInterval(healthInterval);

View File

@@ -95,54 +95,82 @@ async function main() {
let server: Awaited<ReturnType<typeof startGatewayServer>> | null = null;
let shuttingDown = false;
let forceExitTimer: ReturnType<typeof setTimeout> | null = null;
let restartResolver: (() => void) | null = null;
const shutdown = (signal: string) => {
const cleanupSignals = () => {
process.removeListener("SIGTERM", onSigterm);
process.removeListener("SIGINT", onSigint);
process.removeListener("SIGUSR1", onSigusr1);
};
const request = (action: "stop" | "restart", signal: string) => {
if (shuttingDown) {
defaultRuntime.log(
`gateway: received ${signal} during shutdown; exiting now`,
`gateway: received ${signal} during shutdown; ignoring`,
);
process.exit(0);
return;
}
shuttingDown = true;
defaultRuntime.log(`gateway: received ${signal}; shutting down`);
const isRestart = action === "restart";
defaultRuntime.log(
`gateway: received ${signal}; ${isRestart ? "restarting" : "shutting down"}`,
);
forceExitTimer = setTimeout(() => {
defaultRuntime.error(
"gateway: shutdown timed out; exiting without full cleanup",
);
cleanupSignals();
process.exit(0);
}, 5000);
void (async () => {
try {
await server?.close();
await server?.close({
reason: isRestart ? "gateway restarting" : "gateway stopping",
restartExpectedMs: isRestart ? 1500 : null,
});
} catch (err) {
defaultRuntime.error(`gateway: shutdown error: ${String(err)}`);
} finally {
if (forceExitTimer) clearTimeout(forceExitTimer);
process.exit(0);
server = null;
if (isRestart) {
shuttingDown = false;
restartResolver?.();
} else {
cleanupSignals();
process.exit(0);
}
}
})();
};
const onSigterm = () => shutdown("SIGTERM");
const onSigint = () => shutdown("SIGINT");
const onSigterm = () => request("stop", "SIGTERM");
const onSigint = () => request("stop", "SIGINT");
const onSigusr1 = () => request("restart", "SIGUSR1");
process.once("SIGTERM", onSigterm);
process.once("SIGINT", onSigint);
process.on("SIGTERM", onSigterm);
process.on("SIGINT", onSigint);
process.on("SIGUSR1", onSigusr1);
try {
server = await startGatewayServer(port, { bind });
} catch (err) {
defaultRuntime.error(`Gateway failed to start: ${String(err)}`);
process.exit(1);
// eslint-disable-next-line no-constant-condition
while (true) {
try {
server = await startGatewayServer(port, { bind });
} catch (err) {
cleanupSignals();
defaultRuntime.error(`Gateway failed to start: ${String(err)}`);
process.exit(1);
}
await new Promise<void>((resolve) => {
restartResolver = resolve;
});
}
} finally {
cleanupSignals();
}
// Keep process alive
await new Promise<never>(() => {});
}
void main();