feat(doctor): audit supervisor config + docs

This commit is contained in:
Peter Steinberger
2026-01-08 21:28:40 +01:00
parent d0c4ce6749
commit 01641b34ea
9 changed files with 310 additions and 4 deletions

View File

@@ -2,6 +2,7 @@
## Unreleased
- Doctor/Daemon: audit supervisor configs, recommend doctor from daemon status, and document user vs system services. (#?) — thanks @steipete
- Daemon: align generated systemd unit with docs for network-online + restart delay. (#479) — thanks @azade-c
- Outbound: default Telegram account selection for config-only tokens; remove heartbeat-specific accountId handling. (follow-up #516) — thanks @YuriNachos
- Cron: allow Telegram delivery targets with topic/thread IDs (e.g. `-100…:topic:123`). (#474) — thanks @mitschabaude-bot

View File

@@ -51,6 +51,7 @@ cat ~/.clawdbot/clawdbot.json
- Sandbox image repair when sandboxing is enabled.
- Legacy service migration and extra gateway detection.
- Gateway runtime checks (service installed but not running; cached launchd label).
- Supervisor config audit (launchd/systemd/schtasks) with optional repair.
- Gateway port collision diagnostics (default `18789`).
- Security warnings for open DM policies.
- systemd linger check on Linux.
@@ -143,17 +144,23 @@ workspace.
Doctor runs a health check and offers to restart the gateway when it looks
unhealthy.
### 11) Gateway runtime + port diagnostics
### 11) Supervisor config audit + repair
Doctor checks the installed supervisor config (launchd/systemd/schtasks) for
missing or outdated defaults (e.g., systemd network-online dependencies and
restart delay). When it finds a mismatch, it recommends an update and can
rewrite the service file/task to the current defaults.
### 12) Gateway runtime + port diagnostics
Doctor inspects the daemon runtime (PID, last exit status) and warns when the
service is installed but not actually running. It also checks for port collisions
on the gateway port (default `18789`) and reports likely causes (gateway already
running, SSH tunnel).
### 12) Config write + wizard metadata
### 13) Config write + wizard metadata
Doctor persists any config changes and stamps wizard metadata to record the
doctor run.
### 13) Workspace tips (backup + memory system)
### 14) Workspace tips (backup + memory system)
Doctor suggests a workspace memory system when missing and prints a backup tip
if the workspace is not already under git.

View File

@@ -189,6 +189,14 @@ Bundled mac app:
- `launchctl` only works if the LaunchAgent is installed; otherwise use `clawdbot daemon install` first.
## Supervision (systemd user unit)
Clawdbot installs a **systemd user service** by default on Linux/WSL2. We
recommend user services for single-user machines (simpler env, per-user config).
Use a **system service** for multi-user or always-on servers (no lingering
required, shared supervision).
`clawdbot daemon install` writes the user unit. `clawdbot doctor` audits the
unit and can update it to match the current recommended defaults.
Create `~/.config/systemd/user/clawdbot-gateway.service`:
```
[Unit]

View File

@@ -54,7 +54,11 @@ clawdbot doctor
```
## System control (systemd user unit)
Full unit example lives in the [Gateway runbook](/gateway). Minimal setup:
Clawdbot installs a systemd **user** service by default. Use a **system**
service for shared or always-on servers. The full unit example and guidance
live in the [Gateway runbook](/gateway).
Minimal setup:
Create `~/.config/systemd/user/clawdbot-gateway.service`:

View File

@@ -33,6 +33,8 @@ import { resolveGatewayLogPaths } from "../daemon/launchd.js";
import { findLegacyGatewayServices } from "../daemon/legacy.js";
import { resolveGatewayProgramArguments } from "../daemon/program-args.js";
import { resolveGatewayService } from "../daemon/service.js";
import type { ServiceConfigAudit } from "../daemon/service-audit.js";
import { auditGatewayServiceConfig } from "../daemon/service-audit.js";
import { callGateway } from "../gateway/call.js";
import { resolveGatewayBindHost } from "../gateway/net.js";
import {
@@ -89,6 +91,7 @@ type DaemonStatus = {
cachedLabel?: boolean;
missingUnit?: boolean;
};
configAudit?: ServiceConfigAudit;
};
config?: {
cli: ConfigSummary;
@@ -343,6 +346,10 @@ async function gatherDaemonStatus(opts: {
service.readCommand(process.env).catch(() => null),
service.readRuntime(process.env).catch(() => undefined),
]);
const configAudit = await auditGatewayServiceConfig({
env: process.env,
command,
});
const serviceEnv = command?.environment ?? undefined;
const mergedDaemonEnv = {
@@ -484,6 +491,7 @@ async function gatherDaemonStatus(opts: {
notLoadedText: service.notLoadedText,
command,
runtime,
configAudit,
},
config: {
cli: cliConfigSummary,
@@ -538,6 +546,16 @@ function printDaemonStatus(status: DaemonStatus, opts: { json: boolean }) {
if (daemonEnvLines.length > 0) {
defaultRuntime.log(`Daemon env: ${daemonEnvLines.join(" ")}`);
}
if (service.configAudit?.issues.length) {
defaultRuntime.error(
"Service config looks out of date or non-standard.",
);
for (const issue of service.configAudit.issues) {
const detail = issue.detail ? ` (${issue.detail})` : "";
defaultRuntime.error(`Service config issue: ${issue.message}${detail}`);
}
defaultRuntime.error('Recommendation: run "clawdbot doctor".');
}
if (status.config) {
const cliCfg = `${status.config.cli.path}${status.config.cli.exists ? "" : " (missing)"}${status.config.cli.valid ? "" : " (invalid)"}`;
defaultRuntime.log(`Config (cli): ${cliCfg}`);

View File

@@ -15,6 +15,7 @@ import {
} from "../daemon/legacy.js";
import { resolveGatewayProgramArguments } from "../daemon/program-args.js";
import { resolveGatewayService } from "../daemon/service.js";
import { auditGatewayServiceConfig } from "../daemon/service-audit.js";
import type { RuntimeEnv } from "../runtime.js";
import {
DEFAULT_GATEWAY_DAEMON_RUNTIME,
@@ -23,6 +24,18 @@ import {
} from "./daemon-runtime.js";
import type { DoctorOptions, DoctorPrompter } from "./doctor-prompter.js";
function detectGatewayRuntime(
programArguments: string[] | undefined,
): GatewayDaemonRuntime {
const first = programArguments?.[0];
if (first) {
const base = path.basename(first).toLowerCase();
if (base === "bun" || base === "bun.exe") return "bun";
if (base === "node" || base === "node.exe") return "node";
}
return DEFAULT_GATEWAY_DAEMON_RUNTIME;
}
export async function maybeMigrateLegacyGatewayService(
cfg: ClawdbotConfig,
mode: "local" | "remote",
@@ -112,6 +125,83 @@ export async function maybeMigrateLegacyGatewayService(
});
}
export async function maybeRepairGatewayServiceConfig(
cfg: ClawdbotConfig,
mode: "local" | "remote",
runtime: RuntimeEnv,
prompter: DoctorPrompter,
) {
if (resolveIsNixMode(process.env)) {
note("Nix mode detected; skip service updates.", "Gateway");
return;
}
if (mode === "remote") {
note("Gateway mode is remote; skipped local service audit.", "Gateway");
return;
}
const service = resolveGatewayService();
const command = await service.readCommand(process.env).catch(() => null);
if (!command) return;
const audit = await auditGatewayServiceConfig({
env: process.env,
command,
});
if (audit.issues.length === 0) return;
note(
audit.issues
.map((issue) =>
issue.detail ? `- ${issue.message} (${issue.detail})` : `- ${issue.message}`,
)
.join("\n"),
"Gateway service config",
);
const repair = await prompter.confirmSkipInNonInteractive({
message: "Update gateway service config to the recommended defaults now?",
initialValue: true,
});
if (!repair) return;
const devMode =
process.argv[1]?.includes(`${path.sep}src${path.sep}`) &&
process.argv[1]?.endsWith(".ts");
const port = resolveGatewayPort(cfg, process.env);
const runtimeChoice = detectGatewayRuntime(command.programArguments);
const { programArguments, workingDirectory } =
await resolveGatewayProgramArguments({
port,
dev: devMode,
runtime: runtimeChoice,
});
const environment: Record<string, string | undefined> = {
PATH: process.env.PATH,
CLAWDBOT_PROFILE: process.env.CLAWDBOT_PROFILE,
CLAWDBOT_STATE_DIR: process.env.CLAWDBOT_STATE_DIR,
CLAWDBOT_CONFIG_PATH: process.env.CLAWDBOT_CONFIG_PATH,
CLAWDBOT_GATEWAY_PORT: String(port),
CLAWDBOT_GATEWAY_TOKEN:
cfg.gateway?.auth?.token ?? process.env.CLAWDBOT_GATEWAY_TOKEN,
CLAWDBOT_LAUNCHD_LABEL:
process.platform === "darwin" ? GATEWAY_LAUNCH_AGENT_LABEL : undefined,
};
try {
await service.install({
env: process.env,
stdout: process.stdout,
programArguments,
workingDirectory,
environment,
});
} catch (err) {
runtime.error(`Gateway service update failed: ${String(err)}`);
}
}
export async function maybeScanExtraGatewayServices(options: DoctorOptions) {
const extraServices = await findExtraGatewayServices(process.env, {
deep: options.deep,

View File

@@ -30,6 +30,7 @@ import {
} from "./doctor-format.js";
import {
maybeMigrateLegacyGatewayService,
maybeRepairGatewayServiceConfig,
maybeScanExtraGatewayServices,
} from "./doctor-gateway-services.js";
import {
@@ -157,6 +158,12 @@ export async function doctorCommand(
prompter,
);
await maybeScanExtraGatewayServices(options);
await maybeRepairGatewayServiceConfig(
cfg,
resolveMode(cfg),
runtime,
prompter,
);
await noteSecurityWarnings(cfg);

165
src/daemon/service-audit.ts Normal file
View File

@@ -0,0 +1,165 @@
import fs from "node:fs/promises";
import { resolveLaunchAgentPlistPath } from "./launchd.js";
import { resolveSystemdUserUnitPath } from "./systemd.js";
export type GatewayServiceCommand = {
programArguments: string[];
workingDirectory?: string;
environment?: Record<string, string>;
sourcePath?: string;
} | null;
export type ServiceConfigIssue = {
code: string;
message: string;
detail?: string;
};
export type ServiceConfigAudit = {
ok: boolean;
issues: ServiceConfigIssue[];
};
function hasGatewaySubcommand(programArguments?: string[]): boolean {
return Boolean(programArguments?.some((arg) => arg === "gateway"));
}
function parseSystemdUnit(content: string): {
after: Set<string>;
wants: Set<string>;
restartSec?: string;
} {
const after = new Set<string>();
const wants = new Set<string>();
let restartSec: string | undefined;
for (const rawLine of content.split(/\r?\n/)) {
const line = rawLine.trim();
if (!line) continue;
if (line.startsWith("#") || line.startsWith(";")) continue;
if (line.startsWith("[")) continue;
const idx = line.indexOf("=");
if (idx <= 0) continue;
const key = line.slice(0, idx).trim();
const value = line.slice(idx + 1).trim();
if (!value) continue;
if (key === "After") {
for (const entry of value.split(/\s+/)) {
if (entry) after.add(entry);
}
} else if (key === "Wants") {
for (const entry of value.split(/\s+/)) {
if (entry) wants.add(entry);
}
} else if (key === "RestartSec") {
restartSec = value;
}
}
return { after, wants, restartSec };
}
function isRestartSecPreferred(value: string | undefined): boolean {
if (!value) return false;
const parsed = Number.parseFloat(value);
if (!Number.isFinite(parsed)) return false;
return Math.abs(parsed - 5) < 0.01;
}
async function auditSystemdUnit(
env: Record<string, string | undefined>,
issues: ServiceConfigIssue[],
) {
const unitPath = resolveSystemdUserUnitPath(env);
let content = "";
try {
content = await fs.readFile(unitPath, "utf8");
} catch {
return;
}
const parsed = parseSystemdUnit(content);
if (!parsed.after.has("network-online.target")) {
issues.push({
code: "systemd-after-network-online",
message: "Missing systemd After=network-online.target",
detail: unitPath,
});
}
if (!parsed.wants.has("network-online.target")) {
issues.push({
code: "systemd-wants-network-online",
message: "Missing systemd Wants=network-online.target",
detail: unitPath,
});
}
if (!isRestartSecPreferred(parsed.restartSec)) {
issues.push({
code: "systemd-restart-sec",
message: "RestartSec does not match the recommended 5s",
detail: unitPath,
});
}
}
async function auditLaunchdPlist(
env: Record<string, string | undefined>,
issues: ServiceConfigIssue[],
) {
const plistPath = resolveLaunchAgentPlistPath(env);
let content = "";
try {
content = await fs.readFile(plistPath, "utf8");
} catch {
return;
}
const hasRunAtLoad = /<key>RunAtLoad<\/key>\s*<true\s*\/>/i.test(content);
const hasKeepAlive = /<key>KeepAlive<\/key>\s*<true\s*\/>/i.test(content);
if (!hasRunAtLoad) {
issues.push({
code: "launchd-run-at-load",
message: "LaunchAgent is missing RunAtLoad=true",
detail: plistPath,
});
}
if (!hasKeepAlive) {
issues.push({
code: "launchd-keep-alive",
message: "LaunchAgent is missing KeepAlive=true",
detail: plistPath,
});
}
}
function auditGatewayCommand(
programArguments: string[] | undefined,
issues: ServiceConfigIssue[],
) {
if (!programArguments || programArguments.length === 0) return;
if (!hasGatewaySubcommand(programArguments)) {
issues.push({
code: "gateway-command-missing",
message: "Service command does not include the gateway subcommand",
});
}
}
export async function auditGatewayServiceConfig(params: {
env: Record<string, string | undefined>;
command: GatewayServiceCommand;
platform?: NodeJS.Platform;
}): Promise<ServiceConfigAudit> {
const issues: ServiceConfigIssue[] = [];
const platform = params.platform ?? process.platform;
auditGatewayCommand(params.command?.programArguments, issues);
if (platform === "linux") {
await auditSystemdUnit(params.env, issues);
} else if (platform === "darwin") {
await auditLaunchdPlist(params.env, issues);
}
return { ok: issues.length === 0, issues };
}

View File

@@ -33,6 +33,12 @@ function resolveSystemdUnitPath(
return resolveSystemdUnitPathForName(env, GATEWAY_SYSTEMD_SERVICE_NAME);
}
export function resolveSystemdUserUnitPath(
env: Record<string, string | undefined>,
): string {
return resolveSystemdUnitPath(env);
}
function resolveLoginctlUser(
env: Record<string, string | undefined>,
): string | null {