diff --git a/CHANGELOG.md b/CHANGELOG.md index 500d5090f..5cf936b28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ Docs: https://docs.clawd.bot - TUI: highlight model search matches and stabilize search ordering. - CLI: keep banners on routed commands, restore config guarding outside fast-path routing, and tighten fast-path flag parsing while skipping console capture for extra speed. (#1195) — thanks @gumadeiras. - Slack: resolve Bolt import interop for Bun + Node. (#1191) — thanks @CoreyH. +- Gateway: require authorized restarts for SIGUSR1 (restart/apply/update) so config gating can't be bypassed. +- Discord: stop reconnecting the gateway after aborts to prevent duplicate listeners. ## 2026.1.18-4 diff --git a/docs/cli/gateway.md b/docs/cli/gateway.md index c5627c854..e9e461761 100644 --- a/docs/cli/gateway.md +++ b/docs/cli/gateway.md @@ -28,7 +28,7 @@ clawdbot gateway Notes: - By default, the Gateway refuses to start unless `gateway.mode=local` is set in `~/.clawdbot/clawdbot.json`. Use `--allow-unconfigured` for ad-hoc/dev runs. - Binding beyond loopback without auth is blocked (safety guardrail). -- `SIGUSR1` triggers an in-process restart (useful without a supervisor). +- `SIGUSR1` triggers an in-process restart when authorized (enable `commands.restart` or use the gateway tool/config apply/update). - `SIGINT`/`SIGTERM` handlers stop the gateway process, but they don’t restore any custom terminal state. If you wrap the CLI with a TUI or raw-mode input, restore the terminal before exit. ### Options diff --git a/docs/gateway/index.md b/docs/gateway/index.md index 53ee56ea0..fb19a1263 100644 --- a/docs/gateway/index.md +++ b/docs/gateway/index.md @@ -34,7 +34,7 @@ pnpm gateway:watch - Pass `--verbose` to mirror debug logging (handshakes, req/res, events) from the log file into stdio when troubleshooting. - `--force` uses `lsof` to find listeners on the chosen port, sends SIGTERM, logs what it killed, then starts the gateway (fails fast if `lsof` is missing). - If you run under a supervisor (launchd/systemd/mac app child-process mode), a stop/restart typically sends **SIGTERM**; older builds may surface this as `pnpm` `ELIFECYCLE` exit code **143** (SIGTERM), which is a normal shutdown, not a crash. -- **SIGUSR1** triggers an in-process restart (no external supervisor required). This is what the `gateway` agent tool uses. +- **SIGUSR1** triggers an in-process restart when authorized (gateway tool/config apply/update, or enable `commands.restart` for manual restarts). - Gateway auth: set `gateway.auth.mode=token` + `gateway.auth.token` (or pass `--token ` / `CLAWDBOT_GATEWAY_TOKEN`) to require clients to send `connect.params.auth.token`. - The wizard now generates a token by default, even on loopback. - Port precedence: `--port` > `CLAWDBOT_GATEWAY_PORT` > `gateway.port` > default `18789`. diff --git a/docs/tools/index.md b/docs/tools/index.md index e702f3951..541195213 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -356,7 +356,7 @@ Notes: Restart or apply updates to the running Gateway process (in-place). Core actions: -- `restart` (sends `SIGUSR1` to the current process; `clawdbot gateway` restart in-place) +- `restart` (authorizes + sends `SIGUSR1` for in-process restart; `clawdbot gateway` restart in-place) - `config.get` / `config.schema` - `config.apply` (validate + write config + restart + wake) - `update.run` (run update + restart + wake) diff --git a/src/cli/gateway-cli/run-loop.ts b/src/cli/gateway-cli/run-loop.ts index b54e05abd..21e32e91b 100644 --- a/src/cli/gateway-cli/run-loop.ts +++ b/src/cli/gateway-cli/run-loop.ts @@ -1,5 +1,9 @@ import type { startGatewayServer } from "../../gateway/server.js"; -import { createSubsystemLogger } from "../../logging.js"; +import { + consumeGatewaySigusr1RestartAuthorization, + isGatewaySigusr1RestartExternallyAllowed, +} from "../../infra/restart.js"; +import { createSubsystemLogger } from "../../logging/subsystem.js"; import type { defaultRuntime } from "../../runtime.js"; const gatewayLog = createSubsystemLogger("gateway"); @@ -67,6 +71,13 @@ export async function runGatewayLoop(params: { }; const onSigusr1 = () => { gatewayLog.info("signal SIGUSR1 received"); + const authorized = consumeGatewaySigusr1RestartAuthorization(); + if (!authorized && !isGatewaySigusr1RestartExternallyAllowed()) { + gatewayLog.warn( + "SIGUSR1 restart ignored (not authorized; enable commands.restart or use gateway tool).", + ); + return; + } request("restart", "SIGUSR1"); }; diff --git a/src/discord/monitor/provider.ts b/src/discord/monitor/provider.ts index 177c584fd..213c774b0 100644 --- a/src/discord/monitor/provider.ts +++ b/src/discord/monitor/provider.ts @@ -14,7 +14,7 @@ import { import type { ClawdbotConfig, ReplyToMode } from "../../config/config.js"; import { loadConfig } from "../../config/config.js"; import { danger, logVerbose, shouldLogVerbose, warn } from "../../globals.js"; -import { createSubsystemLogger } from "../../logging.js"; +import { createSubsystemLogger } from "../../logging/subsystem.js"; import type { RuntimeEnv } from "../../runtime.js"; import { resolveDiscordAccount } from "../accounts.js"; import { attachDiscordGatewayLogging } from "../gateway-logging.js"; @@ -443,6 +443,17 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { emitter: gatewayEmitter, runtime, }); + const abortSignal = opts.abortSignal; + const onAbort = () => { + if (!gateway) return; + gateway.options.reconnect = { maxAttempts: 0 }; + gateway.disconnect(); + }; + if (abortSignal?.aborted) { + onAbort(); + } else { + abortSignal?.addEventListener("abort", onAbort, { once: true }); + } // Timeout to detect zombie connections where HELLO is never received. const HELLO_TIMEOUT_MS = 30000; let helloTimeoutId: ReturnType | undefined; @@ -472,7 +483,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { disconnect: () => gateway.disconnect(), } : undefined, - abortSignal: opts.abortSignal, + abortSignal, onGatewayError: (err) => { runtime.error?.(danger(`discord gateway error: ${String(err)}`)); }, @@ -487,6 +498,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { stopGatewayLogging(); if (helloTimeoutId) clearTimeout(helloTimeoutId); gatewayEmitter?.removeListener("debug", onGatewayDebug); + abortSignal?.removeEventListener("abort", onAbort); } } diff --git a/src/gateway/server-reload-handlers.ts b/src/gateway/server-reload-handlers.ts index 5aa6fe56f..3d0826952 100644 --- a/src/gateway/server-reload-handlers.ts +++ b/src/gateway/server-reload-handlers.ts @@ -3,6 +3,10 @@ import type { loadConfig } from "../config/config.js"; import { startGmailWatcher, stopGmailWatcher } from "../hooks/gmail-watcher.js"; import { startHeartbeatRunner } from "../infra/heartbeat-runner.js"; import { resetDirectoryCache } from "../infra/outbound/target-resolver.js"; +import { + authorizeGatewaySigusr1Restart, + setGatewaySigusr1RestartPolicy, +} from "../infra/restart.js"; import { setCommandLaneConcurrency } from "../process/command-queue.js"; import { isTruthyEnvValue } from "../infra/env.js"; import type { ChannelKind, GatewayReloadPlan } from "./config-reload.js"; @@ -38,6 +42,7 @@ export function createGatewayReloadHandlers(params: { plan: GatewayReloadPlan, nextConfig: ReturnType, ) => { + setGatewaySigusr1RestartPolicy({ allowExternal: nextConfig.commands?.restart === true }); const state = params.getState(); const nextState = { ...state }; @@ -139,8 +144,9 @@ export function createGatewayReloadHandlers(params: { const requestGatewayRestart = ( plan: GatewayReloadPlan, - _nextConfig: ReturnType, + nextConfig: ReturnType, ) => { + setGatewaySigusr1RestartPolicy({ allowExternal: nextConfig.commands?.restart === true }); const reasons = plan.restartReasons.length ? plan.restartReasons.join(", ") : plan.changedPaths.join(", "); @@ -149,6 +155,7 @@ export function createGatewayReloadHandlers(params: { params.logReload.warn("no SIGUSR1 listener found; restart skipped"); return; } + authorizeGatewaySigusr1Restart(); process.emit("SIGUSR1"); }; diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index 325e01435..6154966d1 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -23,8 +23,9 @@ import { setSkillsRemoteBridge, } from "../infra/skills-remote.js"; import { scheduleGatewayUpdateCheck } from "../infra/update-startup.js"; +import { setGatewaySigusr1RestartPolicy } from "../infra/restart.js"; import { autoMigrateLegacyState } from "../infra/state-migrations.js"; -import { createSubsystemLogger, runtimeForLogger } from "../logging.js"; +import { createSubsystemLogger, runtimeForLogger } from "../logging/subsystem.js"; import type { PluginServicesHandle } from "../plugins/services.js"; import type { RuntimeEnv } from "../runtime.js"; import { runOnboardingWizard } from "../wizard/onboarding.js"; @@ -172,6 +173,7 @@ export async function startGatewayServer( } const cfgAtStart = loadConfig(); + setGatewaySigusr1RestartPolicy({ allowExternal: cfgAtStart.commands?.restart === true }); initSubagentRegistry(); await autoMigrateLegacyState({ cfg: cfgAtStart, log }); const defaultAgentId = resolveDefaultAgentId(cfgAtStart); diff --git a/src/infra/restart.test.ts b/src/infra/restart.test.ts new file mode 100644 index 000000000..010f71239 --- /dev/null +++ b/src/infra/restart.test.ts @@ -0,0 +1,41 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import { + __testing, + consumeGatewaySigusr1RestartAuthorization, + isGatewaySigusr1RestartExternallyAllowed, + scheduleGatewaySigusr1Restart, + setGatewaySigusr1RestartPolicy, +} from "./restart.js"; + +describe("restart authorization", () => { + beforeEach(() => { + __testing.resetSigusr1State(); + vi.useFakeTimers(); + vi.spyOn(process, "kill").mockImplementation(() => true); + }); + + afterEach(async () => { + await vi.runOnlyPendingTimersAsync(); + vi.useRealTimers(); + vi.restoreAllMocks(); + __testing.resetSigusr1State(); + }); + + it("consumes a scheduled authorization once", async () => { + expect(consumeGatewaySigusr1RestartAuthorization()).toBe(false); + + scheduleGatewaySigusr1Restart({ delayMs: 0 }); + + expect(consumeGatewaySigusr1RestartAuthorization()).toBe(true); + expect(consumeGatewaySigusr1RestartAuthorization()).toBe(false); + + await vi.runAllTimersAsync(); + }); + + it("tracks external restart policy", () => { + expect(isGatewaySigusr1RestartExternallyAllowed()).toBe(false); + setGatewaySigusr1RestartPolicy({ allowExternal: true }); + expect(isGatewaySigusr1RestartExternallyAllowed()).toBe(true); + }); +}); diff --git a/src/infra/restart.ts b/src/infra/restart.ts index cc257ede2..497e4b44c 100644 --- a/src/infra/restart.ts +++ b/src/infra/restart.ts @@ -12,6 +12,45 @@ export type RestartAttempt = { }; const SPAWN_TIMEOUT_MS = 2000; +const SIGUSR1_AUTH_GRACE_MS = 5000; + +let sigusr1AuthorizedCount = 0; +let sigusr1AuthorizedUntil = 0; +let sigusr1ExternalAllowed = false; + +function resetSigusr1AuthorizationIfExpired(now = Date.now()) { + if (sigusr1AuthorizedCount <= 0) return; + if (now <= sigusr1AuthorizedUntil) return; + sigusr1AuthorizedCount = 0; + sigusr1AuthorizedUntil = 0; +} + +export function setGatewaySigusr1RestartPolicy(opts?: { allowExternal?: boolean }) { + sigusr1ExternalAllowed = opts?.allowExternal === true; +} + +export function isGatewaySigusr1RestartExternallyAllowed() { + return sigusr1ExternalAllowed; +} + +export function authorizeGatewaySigusr1Restart(delayMs = 0) { + const delay = Math.max(0, Math.floor(delayMs)); + const expiresAt = Date.now() + delay + SIGUSR1_AUTH_GRACE_MS; + sigusr1AuthorizedCount += 1; + if (expiresAt > sigusr1AuthorizedUntil) { + sigusr1AuthorizedUntil = expiresAt; + } +} + +export function consumeGatewaySigusr1RestartAuthorization(): boolean { + resetSigusr1AuthorizationIfExpired(); + if (sigusr1AuthorizedCount <= 0) return false; + sigusr1AuthorizedCount -= 1; + if (sigusr1AuthorizedCount <= 0) { + sigusr1AuthorizedUntil = 0; + } + return true; +} function formatSpawnDetail(result: { error?: unknown; @@ -134,6 +173,7 @@ export function scheduleGatewaySigusr1Restart(opts?: { typeof opts?.reason === "string" && opts.reason.trim() ? opts.reason.trim().slice(0, 200) : undefined; + authorizeGatewaySigusr1Restart(delayMs); const pid = process.pid; const hasListener = process.listenerCount("SIGUSR1") > 0; setTimeout(() => { @@ -156,3 +196,11 @@ export function scheduleGatewaySigusr1Restart(opts?: { mode: hasListener ? "emit" : "signal", }; } + +export const __testing = { + resetSigusr1State() { + sigusr1AuthorizedCount = 0; + sigusr1AuthorizedUntil = 0; + sigusr1ExternalAllowed = false; + }, +}; diff --git a/src/macos/gateway-daemon.ts b/src/macos/gateway-daemon.ts index 44a2c7a39..cd020cf0b 100644 --- a/src/macos/gateway-daemon.ts +++ b/src/macos/gateway-daemon.ts @@ -45,6 +45,10 @@ async function main() { { startGatewayServer }, { setGatewayWsLogStyle }, { setVerbose }, + { + consumeGatewaySigusr1RestartAuthorization, + isGatewaySigusr1RestartExternallyAllowed, + }, { defaultRuntime }, { enableConsoleCapture, setConsoleTimestampPrefix }, ] = await Promise.all([ @@ -52,6 +56,7 @@ async function main() { import("../gateway/server.js"), import("../gateway/ws-logging.js"), import("../globals.js"), + import("../infra/restart.js"), import("../runtime.js"), import("../logging.js"), ]); @@ -156,6 +161,13 @@ async function main() { }; const onSigusr1 = () => { defaultRuntime.log("gateway: signal SIGUSR1 received"); + const authorized = consumeGatewaySigusr1RestartAuthorization(); + if (!authorized && !isGatewaySigusr1RestartExternallyAllowed()) { + defaultRuntime.log( + "gateway: SIGUSR1 restart ignored (not authorized; enable commands.restart or use gateway tool).", + ); + return; + } request("restart", "SIGUSR1"); };