feat(doctor): audit supervisor config + docs
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
## Unreleased
|
||||
|
||||
- Doctor/Daemon: audit supervisor configs, recommend doctor from daemon status, and document user vs system services. (#?) — thanks @steipete
|
||||
- Daemon: align generated systemd unit with docs for network-online + restart delay. (#479) — thanks @azade-c
|
||||
- Outbound: default Telegram account selection for config-only tokens; remove heartbeat-specific accountId handling. (follow-up #516) — thanks @YuriNachos
|
||||
- Cron: allow Telegram delivery targets with topic/thread IDs (e.g. `-100…:topic:123`). (#474) — thanks @mitschabaude-bot
|
||||
|
||||
@@ -51,6 +51,7 @@ cat ~/.clawdbot/clawdbot.json
|
||||
- Sandbox image repair when sandboxing is enabled.
|
||||
- Legacy service migration and extra gateway detection.
|
||||
- Gateway runtime checks (service installed but not running; cached launchd label).
|
||||
- Supervisor config audit (launchd/systemd/schtasks) with optional repair.
|
||||
- Gateway port collision diagnostics (default `18789`).
|
||||
- Security warnings for open DM policies.
|
||||
- systemd linger check on Linux.
|
||||
@@ -143,17 +144,23 @@ workspace.
|
||||
Doctor runs a health check and offers to restart the gateway when it looks
|
||||
unhealthy.
|
||||
|
||||
### 11) Gateway runtime + port diagnostics
|
||||
### 11) Supervisor config audit + repair
|
||||
Doctor checks the installed supervisor config (launchd/systemd/schtasks) for
|
||||
missing or outdated defaults (e.g., systemd network-online dependencies and
|
||||
restart delay). When it finds a mismatch, it recommends an update and can
|
||||
rewrite the service file/task to the current defaults.
|
||||
|
||||
### 12) Gateway runtime + port diagnostics
|
||||
Doctor inspects the daemon runtime (PID, last exit status) and warns when the
|
||||
service is installed but not actually running. It also checks for port collisions
|
||||
on the gateway port (default `18789`) and reports likely causes (gateway already
|
||||
running, SSH tunnel).
|
||||
|
||||
### 12) Config write + wizard metadata
|
||||
### 13) Config write + wizard metadata
|
||||
Doctor persists any config changes and stamps wizard metadata to record the
|
||||
doctor run.
|
||||
|
||||
### 13) Workspace tips (backup + memory system)
|
||||
### 14) Workspace tips (backup + memory system)
|
||||
Doctor suggests a workspace memory system when missing and prints a backup tip
|
||||
if the workspace is not already under git.
|
||||
|
||||
|
||||
@@ -189,6 +189,14 @@ Bundled mac app:
|
||||
- `launchctl` only works if the LaunchAgent is installed; otherwise use `clawdbot daemon install` first.
|
||||
|
||||
## Supervision (systemd user unit)
|
||||
Clawdbot installs a **systemd user service** by default on Linux/WSL2. We
|
||||
recommend user services for single-user machines (simpler env, per-user config).
|
||||
Use a **system service** for multi-user or always-on servers (no lingering
|
||||
required, shared supervision).
|
||||
|
||||
`clawdbot daemon install` writes the user unit. `clawdbot doctor` audits the
|
||||
unit and can update it to match the current recommended defaults.
|
||||
|
||||
Create `~/.config/systemd/user/clawdbot-gateway.service`:
|
||||
```
|
||||
[Unit]
|
||||
|
||||
@@ -54,7 +54,11 @@ clawdbot doctor
|
||||
```
|
||||
|
||||
## System control (systemd user unit)
|
||||
Full unit example lives in the [Gateway runbook](/gateway). Minimal setup:
|
||||
Clawdbot installs a systemd **user** service by default. Use a **system**
|
||||
service for shared or always-on servers. The full unit example and guidance
|
||||
live in the [Gateway runbook](/gateway).
|
||||
|
||||
Minimal setup:
|
||||
|
||||
Create `~/.config/systemd/user/clawdbot-gateway.service`:
|
||||
|
||||
|
||||
@@ -33,6 +33,8 @@ import { resolveGatewayLogPaths } from "../daemon/launchd.js";
|
||||
import { findLegacyGatewayServices } from "../daemon/legacy.js";
|
||||
import { resolveGatewayProgramArguments } from "../daemon/program-args.js";
|
||||
import { resolveGatewayService } from "../daemon/service.js";
|
||||
import type { ServiceConfigAudit } from "../daemon/service-audit.js";
|
||||
import { auditGatewayServiceConfig } from "../daemon/service-audit.js";
|
||||
import { callGateway } from "../gateway/call.js";
|
||||
import { resolveGatewayBindHost } from "../gateway/net.js";
|
||||
import {
|
||||
@@ -89,6 +91,7 @@ type DaemonStatus = {
|
||||
cachedLabel?: boolean;
|
||||
missingUnit?: boolean;
|
||||
};
|
||||
configAudit?: ServiceConfigAudit;
|
||||
};
|
||||
config?: {
|
||||
cli: ConfigSummary;
|
||||
@@ -343,6 +346,10 @@ async function gatherDaemonStatus(opts: {
|
||||
service.readCommand(process.env).catch(() => null),
|
||||
service.readRuntime(process.env).catch(() => undefined),
|
||||
]);
|
||||
const configAudit = await auditGatewayServiceConfig({
|
||||
env: process.env,
|
||||
command,
|
||||
});
|
||||
|
||||
const serviceEnv = command?.environment ?? undefined;
|
||||
const mergedDaemonEnv = {
|
||||
@@ -484,6 +491,7 @@ async function gatherDaemonStatus(opts: {
|
||||
notLoadedText: service.notLoadedText,
|
||||
command,
|
||||
runtime,
|
||||
configAudit,
|
||||
},
|
||||
config: {
|
||||
cli: cliConfigSummary,
|
||||
@@ -538,6 +546,16 @@ function printDaemonStatus(status: DaemonStatus, opts: { json: boolean }) {
|
||||
if (daemonEnvLines.length > 0) {
|
||||
defaultRuntime.log(`Daemon env: ${daemonEnvLines.join(" ")}`);
|
||||
}
|
||||
if (service.configAudit?.issues.length) {
|
||||
defaultRuntime.error(
|
||||
"Service config looks out of date or non-standard.",
|
||||
);
|
||||
for (const issue of service.configAudit.issues) {
|
||||
const detail = issue.detail ? ` (${issue.detail})` : "";
|
||||
defaultRuntime.error(`Service config issue: ${issue.message}${detail}`);
|
||||
}
|
||||
defaultRuntime.error('Recommendation: run "clawdbot doctor".');
|
||||
}
|
||||
if (status.config) {
|
||||
const cliCfg = `${status.config.cli.path}${status.config.cli.exists ? "" : " (missing)"}${status.config.cli.valid ? "" : " (invalid)"}`;
|
||||
defaultRuntime.log(`Config (cli): ${cliCfg}`);
|
||||
|
||||
@@ -15,6 +15,7 @@ import {
|
||||
} from "../daemon/legacy.js";
|
||||
import { resolveGatewayProgramArguments } from "../daemon/program-args.js";
|
||||
import { resolveGatewayService } from "../daemon/service.js";
|
||||
import { auditGatewayServiceConfig } from "../daemon/service-audit.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
import {
|
||||
DEFAULT_GATEWAY_DAEMON_RUNTIME,
|
||||
@@ -23,6 +24,18 @@ import {
|
||||
} from "./daemon-runtime.js";
|
||||
import type { DoctorOptions, DoctorPrompter } from "./doctor-prompter.js";
|
||||
|
||||
function detectGatewayRuntime(
|
||||
programArguments: string[] | undefined,
|
||||
): GatewayDaemonRuntime {
|
||||
const first = programArguments?.[0];
|
||||
if (first) {
|
||||
const base = path.basename(first).toLowerCase();
|
||||
if (base === "bun" || base === "bun.exe") return "bun";
|
||||
if (base === "node" || base === "node.exe") return "node";
|
||||
}
|
||||
return DEFAULT_GATEWAY_DAEMON_RUNTIME;
|
||||
}
|
||||
|
||||
export async function maybeMigrateLegacyGatewayService(
|
||||
cfg: ClawdbotConfig,
|
||||
mode: "local" | "remote",
|
||||
@@ -112,6 +125,83 @@ export async function maybeMigrateLegacyGatewayService(
|
||||
});
|
||||
}
|
||||
|
||||
export async function maybeRepairGatewayServiceConfig(
|
||||
cfg: ClawdbotConfig,
|
||||
mode: "local" | "remote",
|
||||
runtime: RuntimeEnv,
|
||||
prompter: DoctorPrompter,
|
||||
) {
|
||||
if (resolveIsNixMode(process.env)) {
|
||||
note("Nix mode detected; skip service updates.", "Gateway");
|
||||
return;
|
||||
}
|
||||
|
||||
if (mode === "remote") {
|
||||
note("Gateway mode is remote; skipped local service audit.", "Gateway");
|
||||
return;
|
||||
}
|
||||
|
||||
const service = resolveGatewayService();
|
||||
const command = await service.readCommand(process.env).catch(() => null);
|
||||
if (!command) return;
|
||||
|
||||
const audit = await auditGatewayServiceConfig({
|
||||
env: process.env,
|
||||
command,
|
||||
});
|
||||
if (audit.issues.length === 0) return;
|
||||
|
||||
note(
|
||||
audit.issues
|
||||
.map((issue) =>
|
||||
issue.detail ? `- ${issue.message} (${issue.detail})` : `- ${issue.message}`,
|
||||
)
|
||||
.join("\n"),
|
||||
"Gateway service config",
|
||||
);
|
||||
|
||||
const repair = await prompter.confirmSkipInNonInteractive({
|
||||
message: "Update gateway service config to the recommended defaults now?",
|
||||
initialValue: true,
|
||||
});
|
||||
if (!repair) return;
|
||||
|
||||
const devMode =
|
||||
process.argv[1]?.includes(`${path.sep}src${path.sep}`) &&
|
||||
process.argv[1]?.endsWith(".ts");
|
||||
const port = resolveGatewayPort(cfg, process.env);
|
||||
const runtimeChoice = detectGatewayRuntime(command.programArguments);
|
||||
const { programArguments, workingDirectory } =
|
||||
await resolveGatewayProgramArguments({
|
||||
port,
|
||||
dev: devMode,
|
||||
runtime: runtimeChoice,
|
||||
});
|
||||
const environment: Record<string, string | undefined> = {
|
||||
PATH: process.env.PATH,
|
||||
CLAWDBOT_PROFILE: process.env.CLAWDBOT_PROFILE,
|
||||
CLAWDBOT_STATE_DIR: process.env.CLAWDBOT_STATE_DIR,
|
||||
CLAWDBOT_CONFIG_PATH: process.env.CLAWDBOT_CONFIG_PATH,
|
||||
CLAWDBOT_GATEWAY_PORT: String(port),
|
||||
CLAWDBOT_GATEWAY_TOKEN:
|
||||
cfg.gateway?.auth?.token ?? process.env.CLAWDBOT_GATEWAY_TOKEN,
|
||||
CLAWDBOT_LAUNCHD_LABEL:
|
||||
process.platform === "darwin" ? GATEWAY_LAUNCH_AGENT_LABEL : undefined,
|
||||
};
|
||||
|
||||
try {
|
||||
await service.install({
|
||||
env: process.env,
|
||||
stdout: process.stdout,
|
||||
programArguments,
|
||||
workingDirectory,
|
||||
environment,
|
||||
});
|
||||
} catch (err) {
|
||||
runtime.error(`Gateway service update failed: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function maybeScanExtraGatewayServices(options: DoctorOptions) {
|
||||
const extraServices = await findExtraGatewayServices(process.env, {
|
||||
deep: options.deep,
|
||||
|
||||
@@ -30,6 +30,7 @@ import {
|
||||
} from "./doctor-format.js";
|
||||
import {
|
||||
maybeMigrateLegacyGatewayService,
|
||||
maybeRepairGatewayServiceConfig,
|
||||
maybeScanExtraGatewayServices,
|
||||
} from "./doctor-gateway-services.js";
|
||||
import {
|
||||
@@ -157,6 +158,12 @@ export async function doctorCommand(
|
||||
prompter,
|
||||
);
|
||||
await maybeScanExtraGatewayServices(options);
|
||||
await maybeRepairGatewayServiceConfig(
|
||||
cfg,
|
||||
resolveMode(cfg),
|
||||
runtime,
|
||||
prompter,
|
||||
);
|
||||
|
||||
await noteSecurityWarnings(cfg);
|
||||
|
||||
|
||||
165
src/daemon/service-audit.ts
Normal file
165
src/daemon/service-audit.ts
Normal file
@@ -0,0 +1,165 @@
|
||||
import fs from "node:fs/promises";
|
||||
import { resolveLaunchAgentPlistPath } from "./launchd.js";
|
||||
import { resolveSystemdUserUnitPath } from "./systemd.js";
|
||||
|
||||
export type GatewayServiceCommand = {
|
||||
programArguments: string[];
|
||||
workingDirectory?: string;
|
||||
environment?: Record<string, string>;
|
||||
sourcePath?: string;
|
||||
} | null;
|
||||
|
||||
export type ServiceConfigIssue = {
|
||||
code: string;
|
||||
message: string;
|
||||
detail?: string;
|
||||
};
|
||||
|
||||
export type ServiceConfigAudit = {
|
||||
ok: boolean;
|
||||
issues: ServiceConfigIssue[];
|
||||
};
|
||||
|
||||
function hasGatewaySubcommand(programArguments?: string[]): boolean {
|
||||
return Boolean(programArguments?.some((arg) => arg === "gateway"));
|
||||
}
|
||||
|
||||
function parseSystemdUnit(content: string): {
|
||||
after: Set<string>;
|
||||
wants: Set<string>;
|
||||
restartSec?: string;
|
||||
} {
|
||||
const after = new Set<string>();
|
||||
const wants = new Set<string>();
|
||||
let restartSec: string | undefined;
|
||||
|
||||
for (const rawLine of content.split(/\r?\n/)) {
|
||||
const line = rawLine.trim();
|
||||
if (!line) continue;
|
||||
if (line.startsWith("#") || line.startsWith(";")) continue;
|
||||
if (line.startsWith("[")) continue;
|
||||
const idx = line.indexOf("=");
|
||||
if (idx <= 0) continue;
|
||||
const key = line.slice(0, idx).trim();
|
||||
const value = line.slice(idx + 1).trim();
|
||||
if (!value) continue;
|
||||
if (key === "After") {
|
||||
for (const entry of value.split(/\s+/)) {
|
||||
if (entry) after.add(entry);
|
||||
}
|
||||
} else if (key === "Wants") {
|
||||
for (const entry of value.split(/\s+/)) {
|
||||
if (entry) wants.add(entry);
|
||||
}
|
||||
} else if (key === "RestartSec") {
|
||||
restartSec = value;
|
||||
}
|
||||
}
|
||||
|
||||
return { after, wants, restartSec };
|
||||
}
|
||||
|
||||
function isRestartSecPreferred(value: string | undefined): boolean {
|
||||
if (!value) return false;
|
||||
const parsed = Number.parseFloat(value);
|
||||
if (!Number.isFinite(parsed)) return false;
|
||||
return Math.abs(parsed - 5) < 0.01;
|
||||
}
|
||||
|
||||
async function auditSystemdUnit(
|
||||
env: Record<string, string | undefined>,
|
||||
issues: ServiceConfigIssue[],
|
||||
) {
|
||||
const unitPath = resolveSystemdUserUnitPath(env);
|
||||
let content = "";
|
||||
try {
|
||||
content = await fs.readFile(unitPath, "utf8");
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
const parsed = parseSystemdUnit(content);
|
||||
if (!parsed.after.has("network-online.target")) {
|
||||
issues.push({
|
||||
code: "systemd-after-network-online",
|
||||
message: "Missing systemd After=network-online.target",
|
||||
detail: unitPath,
|
||||
});
|
||||
}
|
||||
if (!parsed.wants.has("network-online.target")) {
|
||||
issues.push({
|
||||
code: "systemd-wants-network-online",
|
||||
message: "Missing systemd Wants=network-online.target",
|
||||
detail: unitPath,
|
||||
});
|
||||
}
|
||||
if (!isRestartSecPreferred(parsed.restartSec)) {
|
||||
issues.push({
|
||||
code: "systemd-restart-sec",
|
||||
message: "RestartSec does not match the recommended 5s",
|
||||
detail: unitPath,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function auditLaunchdPlist(
|
||||
env: Record<string, string | undefined>,
|
||||
issues: ServiceConfigIssue[],
|
||||
) {
|
||||
const plistPath = resolveLaunchAgentPlistPath(env);
|
||||
let content = "";
|
||||
try {
|
||||
content = await fs.readFile(plistPath, "utf8");
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
const hasRunAtLoad = /<key>RunAtLoad<\/key>\s*<true\s*\/>/i.test(content);
|
||||
const hasKeepAlive = /<key>KeepAlive<\/key>\s*<true\s*\/>/i.test(content);
|
||||
if (!hasRunAtLoad) {
|
||||
issues.push({
|
||||
code: "launchd-run-at-load",
|
||||
message: "LaunchAgent is missing RunAtLoad=true",
|
||||
detail: plistPath,
|
||||
});
|
||||
}
|
||||
if (!hasKeepAlive) {
|
||||
issues.push({
|
||||
code: "launchd-keep-alive",
|
||||
message: "LaunchAgent is missing KeepAlive=true",
|
||||
detail: plistPath,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function auditGatewayCommand(
|
||||
programArguments: string[] | undefined,
|
||||
issues: ServiceConfigIssue[],
|
||||
) {
|
||||
if (!programArguments || programArguments.length === 0) return;
|
||||
if (!hasGatewaySubcommand(programArguments)) {
|
||||
issues.push({
|
||||
code: "gateway-command-missing",
|
||||
message: "Service command does not include the gateway subcommand",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function auditGatewayServiceConfig(params: {
|
||||
env: Record<string, string | undefined>;
|
||||
command: GatewayServiceCommand;
|
||||
platform?: NodeJS.Platform;
|
||||
}): Promise<ServiceConfigAudit> {
|
||||
const issues: ServiceConfigIssue[] = [];
|
||||
const platform = params.platform ?? process.platform;
|
||||
|
||||
auditGatewayCommand(params.command?.programArguments, issues);
|
||||
|
||||
if (platform === "linux") {
|
||||
await auditSystemdUnit(params.env, issues);
|
||||
} else if (platform === "darwin") {
|
||||
await auditLaunchdPlist(params.env, issues);
|
||||
}
|
||||
|
||||
return { ok: issues.length === 0, issues };
|
||||
}
|
||||
@@ -33,6 +33,12 @@ function resolveSystemdUnitPath(
|
||||
return resolveSystemdUnitPathForName(env, GATEWAY_SYSTEMD_SERVICE_NAME);
|
||||
}
|
||||
|
||||
export function resolveSystemdUserUnitPath(
|
||||
env: Record<string, string | undefined>,
|
||||
): string {
|
||||
return resolveSystemdUnitPath(env);
|
||||
}
|
||||
|
||||
function resolveLoginctlUser(
|
||||
env: Record<string, string | undefined>,
|
||||
): string | null {
|
||||
|
||||
Reference in New Issue
Block a user