diff --git a/CHANGELOG.md b/CHANGELOG.md index af457f56e..59c16cef3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ - Agent: avoid duplicating context/skills when SDK rebuilds the system prompt. (#418) - Signal: reconnect SSE monitor with abortable backoff; log stream errors. Thanks @nexty5870 for PR #430. - Gateway: pass resolved provider as messageProvider for agent runs so provider-specific tools are available. Thanks @imfing for PR #389. +- Doctor: add state integrity checks + repair prompts for missing sessions/state dirs, transcript mismatches, and permission issues; document full doctor flow and workspace backup tips. - Discord/Telegram: add per-request retry policy with configurable delays and docs. - Telegram: run long polling via grammY runner with per-chat sequentialization and concurrency tied to `agent.maxConcurrent`. Thanks @mukhtharcm for PR #366. - macOS: prevent gateway launchd startup race where the app could kill a just-started gateway; avoid unnecessary `bootout` and ensure the job is enabled at login. Fixes #306. Thanks @gupsammy for PR #387. diff --git a/docs/concepts/agent-workspace.md b/docs/concepts/agent-workspace.md index 7e85fd7a6..453b57b77 100644 --- a/docs/concepts/agent-workspace.md +++ b/docs/concepts/agent-workspace.md @@ -141,6 +141,19 @@ gh auth login gh repo create clawd-workspace --private --source . --remote origin --push ``` +Option C: GitLab web UI + +1. Create a new **private** repository on GitLab. +2. Do not initialize with a README (avoids merge conflicts). +3. Copy the HTTPS remote URL. +4. Add the remote and push: + +```bash +git branch -M main +git remote add origin +git push -u origin main +``` + ### 3) Ongoing updates ```bash diff --git a/docs/gateway/doctor.md b/docs/gateway/doctor.md index 4075b88a3..371811cea 100644 --- a/docs/gateway/doctor.md +++ b/docs/gateway/doctor.md @@ -6,52 +6,10 @@ read_when: --- # Doctor -`clawdbot doctor` is the repair + migration tool for Clawdbot. It runs a quick health check, audits skills, and can migrate deprecated config entries to the new schema. +`clawdbot doctor` is the repair + migration tool for Clawdbot. It fixes stale +config/state, checks health, and provides actionable repair steps. -## What it does -- Runs a health check and offers to restart the gateway if it looks unhealthy. -- Prints a skills status summary (eligible/missing/blocked). -- Detects deprecated config keys and offers to migrate them. -- Migrates legacy `~/.clawdis/clawdis.json` when no Clawdbot config exists. -- Checks sandbox Docker images when sandboxing is enabled (offers to build or switch to legacy names). -- Detects legacy Clawdis services (launchd/systemd; legacy schtasks for native Windows) and offers to migrate them. -- Detects other gateway-like services and prints cleanup hints (optional deep scan for system services). -- On Linux, checks if systemd user lingering is enabled and can enable it (required to keep the Gateway alive after logout). -- Migrates legacy on-disk state layouts (sessions, agentDir, provider auth dirs) into the current per-agent/per-account structure. - -## Legacy config file migration -If `~/.clawdis/clawdis.json` exists and `~/.clawdbot/clawdbot.json` does not, doctor will migrate the file and normalize old paths/image names. - -## Legacy config migrations -When the config contains deprecated keys, other commands will refuse to run and ask you to run `clawdbot doctor`. -Doctor will: -- Explain which legacy keys were found. -- Show the migration it applied. -- Rewrite `~/.clawdbot/clawdbot.json` with the updated schema. - -The Gateway also auto-runs doctor migrations on startup when it detects a legacy -config format, so stale configs are repaired without manual intervention. - -Current migrations: -- `routing.allowFrom` → `whatsapp.allowFrom` -- `agent.model`/`allowedModels`/`modelAliases`/`modelFallbacks`/`imageModelFallbacks` - → `agent.models` + `agent.model.primary/fallbacks` + `agent.imageModel.primary/fallbacks` - -## Legacy state migrations (disk layout) - -Doctor can migrate older on-disk layouts into the current structure: -- Sessions store + transcripts: - - from `~/.clawdbot/sessions/` to `~/.clawdbot/agents//sessions/` -- Agent dir: - - from `~/.clawdbot/agent/` to `~/.clawdbot/agents//agent/` -- WhatsApp auth state (Baileys): - - from legacy `~/.clawdbot/credentials/*.json` (except `oauth.json`) - - to `~/.clawdbot/credentials/whatsapp//...` (default account id: `default`) - -These migrations are best-effort and idempotent; doctor will emit warnings when it leaves any legacy folders behind as backups. -The Gateway/CLI also auto-migrates the legacy sessions + agent dir on startup so history/auth/models land in the per-agent path without a manual doctor run. WhatsApp auth is intentionally only migrated via `clawdbot doctor`. - -## Usage +## Quick start ```bash clawdbot doctor @@ -83,7 +41,112 @@ If you want to review changes before writing, open the config file first: cat ~/.clawdbot/clawdbot.json ``` -## Legacy service migrations -Doctor checks for older Clawdis gateway services (launchd/systemd/schtasks). WSL2 installs use systemd. -If found, it offers to remove them and install the Clawdbot service using the current gateway port. -Remote mode skips the install step, and Nix mode only reports what it finds. +## What it does (summary) +- Health check + restart prompt. +- Skills status summary (eligible/missing/blocked). +- Legacy config migration and normalization. +- Legacy on-disk state migration (sessions/agent dir/WhatsApp auth). +- State integrity and permissions checks (sessions, transcripts, state dir). +- Sandbox image repair when sandboxing is enabled. +- Legacy service migration and extra gateway detection. +- Security warnings for open DM policies. +- systemd linger check on Linux. +- Writes updated config + wizard metadata. + +## Detailed behavior and rationale + +### 1) Legacy config file migration +If `~/.clawdis/clawdis.json` exists and `~/.clawdbot/clawdbot.json` does not, +doctor migrates the file and normalizes old paths/image names. This prevents +new installs from silently booting with the wrong schema. + +### 2) Legacy config key migrations +When the config contains deprecated keys, other commands refuse to run and ask +you to run `clawdbot doctor`. + +Doctor will: +- Explain which legacy keys were found. +- Show the migration it applied. +- Rewrite `~/.clawdbot/clawdbot.json` with the updated schema. + +The Gateway also auto-runs doctor migrations on startup when it detects a +legacy config format, so stale configs are repaired without manual intervention. + +Current migrations: +- `routing.allowFrom` → `whatsapp.allowFrom` +- `agent.model`/`allowedModels`/`modelAliases`/`modelFallbacks`/`imageModelFallbacks` + → `agent.models` + `agent.model.primary/fallbacks` + `agent.imageModel.primary/fallbacks` + +### 3) Legacy state migrations (disk layout) +Doctor can migrate older on-disk layouts into the current structure: +- Sessions store + transcripts: + - from `~/.clawdbot/sessions/` to `~/.clawdbot/agents//sessions/` +- Agent dir: + - from `~/.clawdbot/agent/` to `~/.clawdbot/agents//agent/` +- WhatsApp auth state (Baileys): + - from legacy `~/.clawdbot/credentials/*.json` (except `oauth.json`) + - to `~/.clawdbot/credentials/whatsapp//...` (default account id: `default`) + +These migrations are best-effort and idempotent; doctor will emit warnings when +it leaves any legacy folders behind as backups. The Gateway/CLI also auto-migrates +the legacy sessions + agent dir on startup so history/auth/models land in the +per-agent path without a manual doctor run. WhatsApp auth is intentionally only +migrated via `clawdbot doctor`. + +### 4) State integrity checks (session persistence, routing, and safety) +The state directory is the operational brainstem. If it vanishes, you lose +sessions, credentials, logs, and config (unless you have backups elsewhere). + +Doctor checks: +- **State dir missing**: warns about catastrophic state loss, prompts to recreate + the directory, and reminds you that it cannot recover missing data. +- **State dir permissions**: verifies writability; offers to repair permissions + (and emits a `chown` hint when owner/group mismatch is detected). +- **Session dirs missing**: `sessions/` and the session store directory are + required to persist history and avoid `ENOENT` crashes. +- **Transcript mismatch**: warns when recent session entries have missing + transcript files. +- **Main session “1-line JSONL”**: flags when the main transcript has only one + line (history is not accumulating). +- **Multiple state dirs**: warns when multiple `~/.clawdbot` folders exist across + home directories or when `CLAWDBOT_STATE_DIR` points elsewhere (history can + split between installs). +- **Remote mode reminder**: if `gateway.mode=remote`, doctor reminds you to run + it on the remote host (the state lives there). + +### 5) Sandbox image repair +When sandboxing is enabled, doctor checks Docker images and offers to build or +switch to legacy names if the current image is missing. + +### 6) Gateway service migrations and cleanup hints +Doctor detects legacy Clawdis gateway services (launchd/systemd/schtasks) and +offers to remove them and install the Clawdbot service using the current gateway +port. It can also scan for extra gateway-like services and print cleanup hints +to ensure only one gateway runs per machine. + +### 7) Security warnings +Doctor emits warnings when a provider is open to DMs without an allowlist, or +when a policy is configured in a dangerous way. + +### 8) systemd linger (Linux) +If running as a systemd user service, doctor ensures lingering is enabled so the +gateway stays alive after logout. + +### 9) Skills status +Doctor prints a quick summary of eligible/missing/blocked skills for the current +workspace. + +### 10) Gateway health check + restart +Doctor runs a health check and offers to restart the gateway when it looks +unhealthy. + +### 11) Config write + wizard metadata +Doctor persists any config changes and stamps wizard metadata to record the +doctor run. + +### 12) Workspace tips (backup + memory system) +Doctor suggests a workspace memory system when missing and prints a backup tip +if the workspace is not already under git. + +See [/concepts/agent-workspace](/concepts/agent-workspace) for a full guide to +workspace structure and git backup (recommended private GitHub or GitLab). diff --git a/src/commands/doctor.test.ts b/src/commands/doctor.test.ts index 3f0158d00..0144c603b 100644 --- a/src/commands/doctor.test.ts +++ b/src/commands/doctor.test.ts @@ -1,6 +1,11 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; let originalIsTTY: boolean | undefined; +let originalStateDir: string | undefined; +let tempStateDir: string | undefined; function setStdinTty(value: boolean | undefined) { try { @@ -16,14 +21,33 @@ function setStdinTty(value: boolean | undefined) { beforeEach(() => { originalIsTTY = process.stdin.isTTY; setStdinTty(true); + originalStateDir = process.env.CLAWDBOT_STATE_DIR; + tempStateDir = fs.mkdtempSync( + path.join(os.tmpdir(), "clawdbot-doctor-state-"), + ); + process.env.CLAWDBOT_STATE_DIR = tempStateDir; + fs.mkdirSync(path.join(tempStateDir, "agents", "main", "sessions"), { + recursive: true, + }); + fs.mkdirSync(path.join(tempStateDir, "credentials"), { recursive: true }); }); afterEach(() => { setStdinTty(originalIsTTY); + if (originalStateDir === undefined) { + delete process.env.CLAWDBOT_STATE_DIR; + } else { + process.env.CLAWDBOT_STATE_DIR = originalStateDir; + } + if (tempStateDir) { + fs.rmSync(tempStateDir, { recursive: true, force: true }); + tempStateDir = undefined; + } }); const readConfigFileSnapshot = vi.fn(); const confirm = vi.fn().mockResolvedValue(true); +const note = vi.fn(); const select = vi.fn().mockResolvedValue("node"); const note = vi.fn(); const writeConfigFile = vi.fn().mockResolvedValue(undefined); @@ -737,4 +761,36 @@ describe("doctor", () => { expect(profiles["anthropic:me@example.com"]).toBeTruthy(); expect(profiles["anthropic:default"]).toBeUndefined(); }); + + it("warns when the state directory is missing", async () => { + readConfigFileSnapshot.mockResolvedValue({ + path: "/tmp/clawdbot.json", + exists: true, + raw: "{}", + parsed: {}, + valid: true, + config: {}, + issues: [], + legacyIssues: [], + }); + + const missingDir = fs.mkdtempSync( + path.join(os.tmpdir(), "clawdbot-missing-state-"), + ); + fs.rmSync(missingDir, { recursive: true, force: true }); + process.env.CLAWDBOT_STATE_DIR = missingDir; + note.mockClear(); + + const { doctorCommand } = await import("./doctor.js"); + await doctorCommand( + { log: vi.fn(), error: vi.fn(), exit: vi.fn() }, + { nonInteractive: true, workspaceSuggestions: false }, + ); + + const stateNote = note.mock.calls.find( + (call) => call[1] === "State integrity", + ); + expect(stateNote).toBeTruthy(); + expect(String(stateNote?.[0])).toContain("CRITICAL"); + }); }); diff --git a/src/commands/doctor.ts b/src/commands/doctor.ts index 3acc69e18..4b1c4f8fa 100644 --- a/src/commands/doctor.ts +++ b/src/commands/doctor.ts @@ -23,7 +23,19 @@ import { readConfigFileSnapshot, writeConfigFile, } from "../config/config.js"; -import { resolveGatewayPort, resolveIsNixMode } from "../config/paths.js"; +import { + resolveGatewayPort, + resolveIsNixMode, + resolveOAuthDir, + resolveStateDir, +} from "../config/paths.js"; +import { + loadSessionStore, + resolveMainSessionKey, + resolveSessionFilePath, + resolveSessionTranscriptsDirForAgent, + resolveStorePath, +} from "../config/sessions.js"; import { GATEWAY_LAUNCH_AGENT_LABEL } from "../daemon/constants.js"; import { findExtraGatewayServices, @@ -39,6 +51,7 @@ import { readProviderAllowFromStore } from "../pairing/pairing-store.js"; import { runCommandWithTimeout, runExec } from "../process/exec.js"; import type { RuntimeEnv } from "../runtime.js"; import { defaultRuntime } from "../runtime.js"; +import { DEFAULT_AGENT_ID, normalizeAgentId } from "../routing/session-key.js"; import { readTelegramAllowFromStore } from "../telegram/pairing-store.js"; import { resolveTelegramToken } from "../telegram/token.js"; import { normalizeE164, resolveUserPath, sleep } from "../utils.js"; @@ -431,6 +444,305 @@ function createDoctorPrompter(params: { }; } +function existsDir(dir: string): boolean { + try { + return fs.existsSync(dir) && fs.statSync(dir).isDirectory(); + } catch { + return false; + } +} + +function existsFile(filePath: string): boolean { + try { + return fs.existsSync(filePath) && fs.statSync(filePath).isFile(); + } catch { + return false; + } +} + +function canWriteDir(dir: string): boolean { + try { + fs.accessSync(dir, fs.constants.W_OK); + return true; + } catch { + return false; + } +} + +function ensureDir(dir: string): { ok: boolean; error?: string } { + try { + fs.mkdirSync(dir, { recursive: true }); + return { ok: true }; + } catch (err) { + return { ok: false, error: String(err) }; + } +} + +function dirPermissionHint(dir: string): string | null { + const uid = typeof process.getuid === "function" ? process.getuid() : null; + const gid = typeof process.getgid === "function" ? process.getgid() : null; + try { + const stat = fs.statSync(dir); + if (uid !== null && stat.uid !== uid) { + return `Owner mismatch (uid ${stat.uid}). Run: sudo chown -R $USER "${dir}"`; + } + if (gid !== null && stat.gid !== gid) { + return `Group mismatch (gid ${stat.gid}). If access fails, run: sudo chown -R $USER "${dir}"`; + } + } catch { + return null; + } + return null; +} + +function addUserRwx(mode: number): number { + const perms = mode & 0o777; + return perms | 0o700; +} + +function countJsonlLines(filePath: string): number { + try { + const raw = fs.readFileSync(filePath, "utf-8"); + if (!raw) return 0; + let count = 0; + for (let i = 0; i < raw.length; i += 1) { + if (raw[i] === "\n") count += 1; + } + if (!raw.endsWith("\n")) count += 1; + return count; + } catch { + return 0; + } +} + +function findOtherStateDirs(stateDir: string): string[] { + const resolvedState = path.resolve(stateDir); + const roots = + process.platform === "darwin" + ? ["/Users"] + : process.platform === "linux" + ? ["/home"] + : []; + const found: string[] = []; + for (const root of roots) { + let entries: fs.Dirent[] = []; + try { + entries = fs.readdirSync(root, { withFileTypes: true }); + } catch { + continue; + } + for (const entry of entries) { + if (!entry.isDirectory()) continue; + if (entry.name.startsWith(".")) continue; + const candidate = path.resolve(root, entry.name, ".clawdbot"); + if (candidate === resolvedState) continue; + if (existsDir(candidate)) found.push(candidate); + } + } + return found; +} + +async function noteStateIntegrity( + cfg: ClawdbotConfig, + prompter: DoctorPrompter, +) { + const warnings: string[] = []; + const changes: string[] = []; + const env = process.env; + const homedir = os.homedir; + const stateDir = resolveStateDir(env, homedir); + const defaultStateDir = path.join(homedir(), ".clawdbot"); + const oauthDir = resolveOAuthDir(env, stateDir); + const agentId = normalizeAgentId( + cfg.routing?.defaultAgentId ?? DEFAULT_AGENT_ID, + ); + const sessionsDir = resolveSessionTranscriptsDirForAgent( + agentId, + env, + homedir, + ); + const storePath = resolveStorePath(cfg.session?.store, { agentId }); + const storeDir = path.dirname(storePath); + + let stateDirExists = existsDir(stateDir); + if (!stateDirExists) { + warnings.push( + `- CRITICAL: state directory missing (${stateDir}). Sessions, credentials, logs, and config are stored there.`, + ); + if (cfg.gateway?.mode === "remote") { + warnings.push( + "- Gateway is in remote mode; run doctor on the remote host where the gateway runs.", + ); + } + const create = await prompter.confirmSkipInNonInteractive({ + message: `Create ${stateDir} now?`, + initialValue: false, + }); + if (create) { + const created = ensureDir(stateDir); + if (created.ok) { + changes.push(`- Created ${stateDir}`); + stateDirExists = true; + } else { + warnings.push(`- Failed to create ${stateDir}: ${created.error}`); + } + } + } + + if (stateDirExists && !canWriteDir(stateDir)) { + warnings.push(`- State directory not writable (${stateDir}).`); + const hint = dirPermissionHint(stateDir); + if (hint) warnings.push(` ${hint}`); + const repair = await prompter.confirmSkipInNonInteractive({ + message: `Repair permissions on ${stateDir}?`, + initialValue: true, + }); + if (repair) { + try { + const stat = fs.statSync(stateDir); + const target = addUserRwx(stat.mode); + fs.chmodSync(stateDir, target); + changes.push(`- Repaired permissions on ${stateDir}`); + } catch (err) { + warnings.push(`- Failed to repair ${stateDir}: ${String(err)}`); + } + } + } + + if (stateDirExists) { + const dirCandidates = new Map(); + dirCandidates.set(sessionsDir, "Sessions dir"); + dirCandidates.set(storeDir, "Session store dir"); + dirCandidates.set(oauthDir, "OAuth dir"); + + for (const [dir, label] of dirCandidates) { + if (!existsDir(dir)) { + warnings.push(`- ${label} missing (${dir}).`); + const create = await prompter.confirmSkipInNonInteractive({ + message: `Create ${label} at ${dir}?`, + initialValue: true, + }); + if (create) { + const created = ensureDir(dir); + if (created.ok) { + changes.push(`- Created ${label}: ${dir}`); + } else { + warnings.push(`- Failed to create ${dir}: ${created.error}`); + } + } + continue; + } + if (!canWriteDir(dir)) { + warnings.push(`- ${label} not writable (${dir}).`); + const hint = dirPermissionHint(dir); + if (hint) warnings.push(` ${hint}`); + const repair = await prompter.confirmSkipInNonInteractive({ + message: `Repair permissions on ${label}?`, + initialValue: true, + }); + if (repair) { + try { + const stat = fs.statSync(dir); + const target = addUserRwx(stat.mode); + fs.chmodSync(dir, target); + changes.push(`- Repaired permissions on ${label}: ${dir}`); + } catch (err) { + warnings.push(`- Failed to repair ${dir}: ${String(err)}`); + } + } + } + } + } + + const extraStateDirs = new Set(); + if (path.resolve(stateDir) !== path.resolve(defaultStateDir)) { + if (existsDir(defaultStateDir)) extraStateDirs.add(defaultStateDir); + } + for (const other of findOtherStateDirs(stateDir)) { + extraStateDirs.add(other); + } + if (extraStateDirs.size > 0) { + warnings.push( + [ + "- Multiple state directories detected. This can split session history.", + ...Array.from(extraStateDirs).map((dir) => ` - ${dir}`), + ` Active state dir: ${stateDir}`, + ].join("\n"), + ); + } + + const store = loadSessionStore(storePath); + const entries = Object.entries(store).filter( + ([, entry]) => entry && typeof entry === "object", + ); + if (entries.length > 0) { + const recent = entries + .slice() + .sort((a, b) => { + const aUpdated = typeof a[1].updatedAt === "number" ? a[1].updatedAt : 0; + const bUpdated = typeof b[1].updatedAt === "number" ? b[1].updatedAt : 0; + return bUpdated - aUpdated; + }) + .slice(0, 5); + const missing = recent.filter(([, entry]) => { + const sessionId = entry.sessionId; + if (!sessionId) return false; + const transcriptPath = resolveSessionFilePath(sessionId, entry, { + agentId, + }); + return !existsFile(transcriptPath); + }); + if (missing.length > 0) { + warnings.push( + `- ${missing.length}/${recent.length} recent sessions are missing transcripts. Check for deleted session files or split state dirs.`, + ); + } + + const mainKey = resolveMainSessionKey(cfg); + const mainEntry = store[mainKey]; + if (mainEntry?.sessionId) { + const transcriptPath = resolveSessionFilePath( + mainEntry.sessionId, + mainEntry, + { agentId }, + ); + if (!existsFile(transcriptPath)) { + warnings.push( + `- Main session transcript missing (${transcriptPath}). History will appear to reset.`, + ); + } else { + const lineCount = countJsonlLines(transcriptPath); + if (lineCount <= 1) { + warnings.push( + `- Main session transcript has only ${lineCount} line. Session history may not be appending.`, + ); + } + } + } + } + + if (warnings.length > 0) { + note(warnings.join("\n"), "State integrity"); + } + if (changes.length > 0) { + note(changes.join("\n"), "Doctor changes"); + } +} + +function noteWorkspaceBackupTip(workspaceDir: string) { + if (!existsDir(workspaceDir)) return; + const gitMarker = path.join(workspaceDir, ".git"); + if (fs.existsSync(gitMarker)) return; + note( + [ + "- Tip: back up the workspace in a private git repo (GitHub or GitLab).", + "- Keep ~/.clawdbot out of git; it contains credentials and session history.", + "- Details: /concepts/agent-workspace#git-backup-recommended", + ].join("\n"), + "Workspace", + ); +} + async function maybeRepairAnthropicOAuthProfileId( cfg: ClawdbotConfig, prompter: DoctorPrompter, @@ -1006,6 +1318,8 @@ export async function doctorCommand( } } + await noteStateIntegrity(cfg, prompter); + cfg = await maybeRepairSandboxImages(cfg, runtime, prompter); await maybeMigrateLegacyGatewayService(cfg, runtime, prompter); @@ -1124,6 +1438,7 @@ export async function doctorCommand( const workspaceDir = resolveUserPath( cfg.agent?.workspace ?? DEFAULT_WORKSPACE, ); + noteWorkspaceBackupTip(workspaceDir); if (await shouldSuggestMemorySystem(workspaceDir)) { note(MEMORY_SYSTEM_PROMPT, "Workspace"); }