diff --git a/CHANGELOG.md b/CHANGELOG.md index d8c9b9558..81118b45a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ ### Fixes - Onboarding: resolve CLI entrypoint when running via `npx` so gateway daemon install works without a build step. +- Linux: prompt to enable systemd lingering when installing/restarting the gateway user service (prevents logout/idle shutdowns). - TUI: migrate key handling to the updated pi-tui Key matcher API. - macOS: prefer gateway config reads/writes in local mode (fall back to disk if the gateway is unavailable). - macOS: local gateway now connects via tailnet IP when bind mode is `tailnet`/`auto`. @@ -22,6 +23,7 @@ - Status: show runtime (docker/direct) and move shortcuts to `/help`. - Status: show model auth source (api-key/oauth). - Block streaming: avoid splitting Markdown fenced blocks and reopen fences when forced to split. +- Docs: document systemd lingering and logged-in session requirements on macOS/Windows. ### Maintenance - Deps: bump pi-* stack, Slack SDK, discord-api-types, file-type, zod, and Biome. diff --git a/docs/doctor.md b/docs/doctor.md index f6b599a59..e07f15229 100644 --- a/docs/doctor.md +++ b/docs/doctor.md @@ -15,6 +15,7 @@ read_when: - Migrates legacy `~/.clawdis/clawdis.json` when no Clawdbot config exists. - Checks sandbox Docker images when sandboxing is enabled (offers to build or switch to legacy names). - Detects legacy Clawdis services (launchd/systemd/schtasks) and offers to migrate them. +- On Linux, checks if systemd user lingering is enabled and can enable it (required to keep the Gateway alive after logout). ## Legacy config file migration If `~/.clawdis/clawdis.json` exists and `~/.clawdbot/clawdbot.json` does not, doctor will migrate the file and normalize old paths/image names. diff --git a/docs/faq.md b/docs/faq.md index cb9726919..6ec58618f 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -466,6 +466,21 @@ cd ~/path/to/clawdbot codex --full-auto "debug why clawdbot gateway won't start" ``` +### Gateway stops after I log out (Linux) + +Linux installs use a systemd **user** service. By default, systemd stops user +services on logout/idle, which kills the Gateway. + +Fix: +```bash +sudo loginctl enable-linger $USER +``` + +**macOS/Windows** + +Gateway daemons run in the user session by default. Keep the user logged in. +Headless/system services are not configured out of the box. + ### Processes keep restarting after I kill them The gateway runs under a supervisor that auto-restarts it. You need to stop the supervisor, not just kill the process. diff --git a/docs/gateway.md b/docs/gateway.md index 5b6fb9cd9..e20badea3 100644 --- a/docs/gateway.md +++ b/docs/gateway.md @@ -155,11 +155,13 @@ See also: `docs/presence.md` for how presence is produced/deduped and why `insta - KeepAlive: true - StandardOut/Err: file paths or `syslog` - On failure, launchd restarts; fatal misconfig should keep exiting so the operator notices. +- LaunchAgents are per-user and require a logged-in session; for headless setups use a custom LaunchDaemon (not shipped). Bundled mac app: - Clawdbot.app can bundle a bun-compiled gateway binary and install a per-user LaunchAgent labeled `com.clawdbot.gateway`. -## Supervision (systemd example) +## Supervision (systemd user unit) +Create `~/.config/systemd/user/clawdbot-gateway.service`: ``` [Unit] Description=Clawdbot Gateway @@ -168,16 +170,27 @@ Wants=network-online.target [Service] ExecStart=/usr/local/bin/clawdbot gateway --port 18789 -Restart=on-failure +Restart=always RestartSec=5 -User=clawdbot Environment=CLAWDBOT_GATEWAY_TOKEN= -WorkingDirectory=/home/clawdbot +WorkingDirectory=/home/youruser [Install] -WantedBy=multi-user.target +WantedBy=default.target ``` -Enable with `systemctl enable --now clawdbot-gateway.service`. +Enable lingering (required so the user service survives logout/idle): +``` +sudo loginctl enable-linger youruser +``` +Requires sudo (writes `/var/lib/systemd/linger`). +Then enable the service: +``` +systemctl --user enable --now clawdbot-gateway.service +``` + +## Supervision (Windows scheduled task) +- Onboarding installs a Scheduled Task named `Clawdbot Gateway` (runs on user logon). +- Requires a logged-in user session; for headless setups use a system service or a task configured to run without a logged-in user (not shipped). ## Operational checks - Liveness: open WS and send `req:connect` → expect `res` with `payload.type="hello-ok"` (with snapshot). diff --git a/docs/wizard.md b/docs/wizard.md index 33a109320..d4ec67028 100644 --- a/docs/wizard.md +++ b/docs/wizard.md @@ -72,8 +72,12 @@ It does **not** install or change anything on the remote host. 6) **Daemon install** - macOS: LaunchAgent + - Requires a logged-in user session; for headless, use a custom LaunchDaemon (not shipped). - Linux: systemd user unit + - Wizard enables lingering via `loginctl enable-linger ` so the Gateway stays up after logout. + - Requires sudo (writes `/var/lib/systemd/linger`). - Windows: Scheduled Task + - Runs on user logon; headless/system services are not configured by default. 7) **Health check** - Starts the Gateway (if needed) and runs `clawdbot health`. diff --git a/src/commands/configure.ts b/src/commands/configure.ts index f35e03645..bccbf190b 100644 --- a/src/commands/configure.ts +++ b/src/commands/configure.ts @@ -53,6 +53,7 @@ import { import { setupProviders } from "./onboard-providers.js"; import { promptRemoteGatewayConfig } from "./onboard-remote.js"; import { setupSkills } from "./onboard-skills.js"; +import { ensureSystemdUserLingerInteractive } from "./systemd-linger.js"; type WizardSection = | "model" @@ -373,6 +374,8 @@ async function maybeInstallDaemon(params: { }) { const service = resolveGatewayService(); const loaded = await service.isLoaded({ env: process.env }); + let shouldCheckLinger = false; + let shouldInstall = true; if (loaded) { const action = guardCancel( await select({ @@ -387,7 +390,8 @@ async function maybeInstallDaemon(params: { ); if (action === "restart") { await service.restart({ stdout: process.stdout }); - return; + shouldCheckLinger = true; + shouldInstall = false; } if (action === "skip") return; if (action === "reinstall") { @@ -395,24 +399,37 @@ async function maybeInstallDaemon(params: { } } - const devMode = - process.argv[1]?.includes(`${path.sep}src${path.sep}`) && - process.argv[1]?.endsWith(".ts"); - const { programArguments, workingDirectory } = - await resolveGatewayProgramArguments({ port: params.port, dev: devMode }); - const environment: Record = { - PATH: process.env.PATH, - CLAWDBOT_GATEWAY_TOKEN: params.gatewayToken, - CLAWDBOT_LAUNCHD_LABEL: - process.platform === "darwin" ? GATEWAY_LAUNCH_AGENT_LABEL : undefined, - }; - await service.install({ - env: process.env, - stdout: process.stdout, - programArguments, - workingDirectory, - environment, - }); + if (shouldInstall) { + const devMode = + process.argv[1]?.includes(`${path.sep}src${path.sep}`) && + process.argv[1]?.endsWith(".ts"); + const { programArguments, workingDirectory } = + await resolveGatewayProgramArguments({ port: params.port, dev: devMode }); + const environment: Record = { + PATH: process.env.PATH, + CLAWDBOT_GATEWAY_TOKEN: params.gatewayToken, + CLAWDBOT_LAUNCHD_LABEL: + process.platform === "darwin" ? GATEWAY_LAUNCH_AGENT_LABEL : undefined, + }; + await service.install({ + env: process.env, + stdout: process.stdout, + programArguments, + workingDirectory, + environment, + }); + shouldCheckLinger = true; + } + + if (shouldCheckLinger) { + await ensureSystemdUserLingerInteractive({ + runtime: params.runtime, + prompter: { confirm, note }, + reason: + "Linux installs use a systemd user service. Without lingering, systemd stops the user session on logout/idle and kills the Gateway.", + requireConfirm: true, + }); + } } export async function runConfigureWizard( diff --git a/src/commands/doctor.ts b/src/commands/doctor.ts index 9cacd44ce..cad0aacb8 100644 --- a/src/commands/doctor.ts +++ b/src/commands/doctor.ts @@ -31,6 +31,7 @@ import type { RuntimeEnv } from "../runtime.js"; import { defaultRuntime } from "../runtime.js"; import { resolveUserPath, sleep } from "../utils.js"; import { healthCommand } from "./health.js"; +import { ensureSystemdUserLingerInteractive } from "./systemd-linger.js"; import { applyWizardMetadata, DEFAULT_WORKSPACE, @@ -599,6 +600,28 @@ export async function doctorCommand(runtime: RuntimeEnv = defaultRuntime) { await maybeMigrateLegacyGatewayService(cfg, runtime); + if (process.platform === "linux" && resolveMode(cfg) === "local") { + const service = resolveGatewayService(); + let loaded = false; + try { + loaded = await service.isLoaded({ env: process.env }); + } catch { + loaded = false; + } + if (loaded) { + await ensureSystemdUserLingerInteractive({ + runtime, + prompter: { + confirm: (params) => guardCancel(confirm(params), runtime), + note, + }, + reason: + "Gateway runs as a systemd user service. Without lingering, systemd stops the user session on logout/idle and kills the Gateway.", + requireConfirm: true, + }); + } + } + const workspaceDir = resolveUserPath( cfg.agent?.workspace ?? DEFAULT_WORKSPACE, ); diff --git a/src/commands/onboard-non-interactive.ts b/src/commands/onboard-non-interactive.ts index ce8c0148c..639472398 100644 --- a/src/commands/onboard-non-interactive.ts +++ b/src/commands/onboard-non-interactive.ts @@ -13,6 +13,7 @@ import { resolveGatewayService } from "../daemon/service.js"; import type { RuntimeEnv } from "../runtime.js"; import { defaultRuntime } from "../runtime.js"; import { resolveUserPath, sleep } from "../utils.js"; +import { ensureSystemdUserLingerNonInteractive } from "./systemd-linger.js"; import { healthCommand } from "./health.js"; import { applyMinimaxConfig, setAnthropicApiKey } from "./onboard-auth.js"; import { @@ -231,6 +232,7 @@ export async function runNonInteractiveOnboarding( workingDirectory, environment, }); + await ensureSystemdUserLingerNonInteractive({ runtime }); } if (!opts.skipHealth) { diff --git a/src/commands/systemd-linger.ts b/src/commands/systemd-linger.ts new file mode 100644 index 000000000..172a0cb80 --- /dev/null +++ b/src/commands/systemd-linger.ts @@ -0,0 +1,109 @@ +import { note } from "@clack/prompts"; + +import { + enableSystemdUserLinger, + readSystemdUserLingerStatus, +} from "../daemon/systemd.js"; +import type { RuntimeEnv } from "../runtime.js"; + +export type LingerPrompter = { + confirm?: (params: { message: string; initialValue?: boolean }) => Promise< + boolean + >; + note: (message: string, title?: string) => Promise | void; +}; + +export async function ensureSystemdUserLingerInteractive(params: { + runtime: RuntimeEnv; + prompter?: LingerPrompter; + env?: NodeJS.ProcessEnv; + title?: string; + reason?: string; + prompt?: boolean; + requireConfirm?: boolean; +}): Promise { + if (process.platform !== "linux") return; + if (params.prompt === false) return; + const env = params.env ?? process.env; + const prompter = params.prompter ?? { note }; + const title = params.title ?? "Systemd"; + const status = await readSystemdUserLingerStatus(env); + if (!status) { + await prompter.note( + "Unable to read loginctl linger status. Ensure systemd + loginctl are available.", + title, + ); + return; + } + if (status.linger === "yes") return; + + const reason = + params.reason ?? + "Systemd user services stop when you log out or go idle, which kills the Gateway."; + const actionNote = params.requireConfirm + ? "We can enable lingering now (needs sudo; writes /var/lib/systemd/linger)." + : "Enabling lingering now (needs sudo; writes /var/lib/systemd/linger)."; + await prompter.note( + `${reason}\n${actionNote}`, + title, + ); + + if (params.requireConfirm && prompter.confirm) { + const ok = await prompter.confirm({ + message: `Enable systemd lingering for ${status.user}?`, + initialValue: true, + }); + if (!ok) { + await prompter.note( + "Without lingering, the Gateway will stop when you log out.", + title, + ); + return; + } + } + + const result = await enableSystemdUserLinger({ + env, + user: status.user, + sudoMode: "prompt", + }); + if (result.ok) { + await prompter.note( + `Enabled systemd lingering for ${status.user}.`, + title, + ); + return; + } + + params.runtime.error( + `Failed to enable lingering: ${result.stderr || result.stdout || "unknown error"}`, + ); + await prompter.note( + `Run manually: sudo loginctl enable-linger ${status.user}`, + title, + ); +} + +export async function ensureSystemdUserLingerNonInteractive(params: { + runtime: RuntimeEnv; + env?: NodeJS.ProcessEnv; +}): Promise { + if (process.platform !== "linux") return; + const env = params.env ?? process.env; + const status = await readSystemdUserLingerStatus(env); + if (!status || status.linger === "yes") return; + + const result = await enableSystemdUserLinger({ + env, + user: status.user, + sudoMode: "non-interactive", + }); + if (result.ok) { + params.runtime.log(`Enabled systemd lingering for ${status.user}.`); + return; + } + + params.runtime.log( + `Systemd lingering is disabled for ${status.user}. Run: sudo loginctl enable-linger ${status.user}`, + ); +} diff --git a/src/daemon/systemd.test.ts b/src/daemon/systemd.test.ts new file mode 100644 index 000000000..25a8f28a4 --- /dev/null +++ b/src/daemon/systemd.test.ts @@ -0,0 +1,44 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +import { readSystemdUserLingerStatus } from "./systemd.js"; +import { runExec } from "../process/exec.js"; + +vi.mock("../process/exec.js", () => ({ + runExec: vi.fn(), + runCommandWithTimeout: vi.fn(), +})); + +const runExecMock = vi.mocked(runExec); + +describe("readSystemdUserLingerStatus", () => { + beforeEach(() => { + runExecMock.mockReset(); + }); + + it("returns yes when loginctl reports Linger=yes", async () => { + runExecMock.mockResolvedValue({ + stdout: "Linger=yes\n", + stderr: "", + }); + const result = await readSystemdUserLingerStatus({ USER: "tobi" }); + expect(result).toEqual({ user: "tobi", linger: "yes" }); + }); + + it("returns no when loginctl reports Linger=no", async () => { + runExecMock.mockResolvedValue({ + stdout: "Linger=no\n", + stderr: "", + }); + const result = await readSystemdUserLingerStatus({ USER: "tobi" }); + expect(result).toEqual({ user: "tobi", linger: "no" }); + }); + + it("returns null when Linger is missing", async () => { + runExecMock.mockResolvedValue({ + stdout: "UID=1000\n", + stderr: "", + }); + const result = await readSystemdUserLingerStatus({ USER: "tobi" }); + expect(result).toBeNull(); + }); +}); diff --git a/src/daemon/systemd.ts b/src/daemon/systemd.ts index 4a3a289c5..647a133fd 100644 --- a/src/daemon/systemd.ts +++ b/src/daemon/systemd.ts @@ -1,5 +1,6 @@ import { execFile } from "node:child_process"; import fs from "node:fs/promises"; +import os from "node:os"; import path from "node:path"; import { promisify } from "node:util"; @@ -7,6 +8,7 @@ import { GATEWAY_SYSTEMD_SERVICE_NAME, LEGACY_GATEWAY_SYSTEMD_SERVICE_NAMES, } from "./constants.js"; +import { runCommandWithTimeout, runExec } from "../process/exec.js"; const execFileAsync = promisify(execFile); @@ -30,6 +32,83 @@ function resolveSystemdUnitPath( return resolveSystemdUnitPathForName(env, GATEWAY_SYSTEMD_SERVICE_NAME); } +function resolveLoginctlUser( + env: Record, +): string | null { + const fromEnv = env.USER?.trim() || env.LOGNAME?.trim(); + if (fromEnv) return fromEnv; + try { + return os.userInfo().username; + } catch { + return null; + } +} + +export type SystemdUserLingerStatus = { + user: string; + linger: "yes" | "no"; +}; + +export async function readSystemdUserLingerStatus( + env: Record, +): Promise { + const user = resolveLoginctlUser(env); + if (!user) return null; + try { + const { stdout } = await runExec( + "loginctl", + ["show-user", user, "-p", "Linger"], + { timeoutMs: 5_000 }, + ); + const line = stdout + .split("\n") + .map((entry) => entry.trim()) + .find((entry) => entry.startsWith("Linger=")); + const value = line?.split("=")[1]?.trim().toLowerCase(); + if (value === "yes" || value === "no") { + return { user, linger: value }; + } + } catch { + // ignore; loginctl may be unavailable + } + return null; +} + +export async function enableSystemdUserLinger(params: { + env: Record; + user?: string; + sudoMode?: "prompt" | "non-interactive"; +}): Promise<{ ok: boolean; stdout: string; stderr: string; code: number }> { + const user = params.user ?? resolveLoginctlUser(params.env); + if (!user) { + return { ok: false, stdout: "", stderr: "Missing user", code: 1 }; + } + const needsSudo = + typeof process.getuid === "function" ? process.getuid() !== 0 : true; + const sudoArgs = + needsSudo && params.sudoMode !== undefined + ? ["sudo", ...(params.sudoMode === "non-interactive" ? ["-n"] : [])] + : []; + const argv = [ + ...sudoArgs, + "loginctl", + "enable-linger", + user, + ]; + try { + const result = await runCommandWithTimeout(argv, { timeoutMs: 30_000 }); + return { + ok: result.code === 0, + stdout: result.stdout, + stderr: result.stderr, + code: result.code ?? 1, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { ok: false, stdout: "", stderr: message, code: 1 }; + } +} + function systemdEscapeArg(value: string): string { if (!/[\s"\\]/.test(value)) return value; return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`; diff --git a/src/wizard/onboarding.ts b/src/wizard/onboarding.ts index c25c86ba7..4cc081142 100644 --- a/src/wizard/onboarding.ts +++ b/src/wizard/onboarding.ts @@ -34,6 +34,7 @@ import { import { setupProviders } from "../commands/onboard-providers.js"; import { promptRemoteGatewayConfig } from "../commands/onboard-remote.js"; import { setupSkills } from "../commands/onboard-skills.js"; +import { ensureSystemdUserLingerInteractive } from "../commands/systemd-linger.js"; import type { AuthChoice, GatewayAuthChoice, @@ -537,6 +538,17 @@ export async function runOnboardingWizard( environment, }); } + + await ensureSystemdUserLingerInteractive({ + runtime, + prompter: { + confirm: prompter.confirm, + note: prompter.note, + }, + reason: + "Linux installs use a systemd user service. Without lingering, systemd stops the user session on logout/idle and kills the Gateway.", + requireConfirm: true, + }); } await sleep(1500);