From 2714ed503b21848c708cbe5bceef9a80ec672ada Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 7 Dec 2025 04:33:22 +0000 Subject: [PATCH] CLI: add health probe command --- docs/health.md | 23 ++++++ docs/mac/health.md | 22 ++++++ src/cli/program.ts | 24 ++++++ src/commands/health.test.ts | 62 +++++++++++++++ src/commands/health.ts | 146 ++++++++++++++++++++++++++++++++++++ 5 files changed, 277 insertions(+) create mode 100644 docs/health.md create mode 100644 docs/mac/health.md create mode 100644 src/commands/health.test.ts create mode 100644 src/commands/health.ts diff --git a/docs/health.md b/docs/health.md new file mode 100644 index 000000000..935892580 --- /dev/null +++ b/docs/health.md @@ -0,0 +1,23 @@ +# Health Checks (CLI) + +Short guide to verify the WhatsApp Web / Baileys stack without guessing. + +## Quick checks +- `pnpm clawdis status --json` — confirms creds exist (`web.linked`), shows auth age (`authAgeMs`), heartbeat interval, and where the session store lives. +- `pnpm clawdis heartbeat --verbose --dry-run` — runs the heartbeat path end-to-end (session resolution, message creation) without sending anything. Drop `--dry-run` or add `--message "Ping"` to actually send. +- `pnpm clawdis relay --verbose --heartbeat-now` — spins the full monitor loop, fires a heartbeat immediately, and will reconnect per `web.reconnect` settings. Good for soak testing. +- Logs: tail `/tmp/clawdis/clawdis.log` and filter for `web-heartbeat`, `web-reconnect`, `web-auto-reply`, `web-inbound`. + +## Deep diagnostics +- Creds on disk: `ls -l ~/.clawdis/credentials/creds.json` (mtime should be recent). +- Session store: `ls -l ~/.clawdis/sessions.json` (path can be overridden in config). Count and recent recipients are surfaced via `status`. +- IPC socket (if relay is running): `ls -l ~/.clawdis/clawdis.sock`. +- Relink flow: `pnpm clawdis logout && pnpm clawdis login --provider web --verbose` when status codes 409–515 or `loggedOut` appear in logs. + +## When something fails +- `logged out` or status 409–515 → relink with `clawdis logout` then `clawdis login --provider web`. +- Repeated reconnect exits → tune `web.reconnect` (flags: `--web-retries`, `--web-retry-initial`, `--web-retry-max`) and rerun relay. +- No inbound messages → confirm linked phone is online and sender is allowed; use `pnpm clawdis heartbeat --all --verbose` to test each known recipient. + +## Planned "health" command +A dedicated `clawdis health --json` probe (connect-only, no sends) is planned to report: linked creds, auth age, Baileys connect result/status code, session-store summary, and IPC presence. Until it lands, use the checks above. diff --git a/docs/mac/health.md b/docs/mac/health.md new file mode 100644 index 000000000..c50fcce1d --- /dev/null +++ b/docs/mac/health.md @@ -0,0 +1,22 @@ +# Health Checks on macOS + +How to see whether the WhatsApp Web/Baileys bridge is healthy from the menu bar app. + +## Menu bar (planned) +- Status dot expands beyond “relay running” to reflect Baileys health: + - Green: linked + socket opened recently. + - Orange: connecting/retrying. + - Red: logged out or probe failed. +- Secondary line reads "Web: linked · auth 12m · socket ok" or shows the failure reason. +- "Run Health Check" menu item triggers an on-demand probe. + +## Settings (planned) +- General tab gains a Health card showing: linked E.164, auth age, session-store path/count, last check time, last error/status code, and buttons for Run Health Check / Reveal Logs / Relink. +- Uses a cached snapshot so the UI loads instantly and falls back gracefully when offline. + +## How the probe works (planned) +- App runs `clawdis health --json` via `ShellRunner` every ~60s and on demand. The probe loads creds, attempts a short Baileys connect, and reports status without sending messages. +- Cache the last good snapshot and the last error separately to avoid flicker; show the timestamp of each. + +## Until the UI ships +- Use the CLI flow in `docs/health.md` (status, heartbeat dry-run, relay heartbeat) and tail `/tmp/clawdis/clawdis.log` for `web-heartbeat` / `web-reconnect`. diff --git a/src/cli/program.ts b/src/cli/program.ts index 1e3bfb814..cac2a0a29 100644 --- a/src/cli/program.ts +++ b/src/cli/program.ts @@ -3,6 +3,7 @@ import { Command } from "commander"; import { agentCommand } from "../commands/agent.js"; import { sendCommand } from "../commands/send.js"; import { sessionsCommand } from "../commands/sessions.js"; +import { healthCommand } from "../commands/health.js"; import { statusCommand } from "../commands/status.js"; import { loadConfig } from "../config/config.js"; import { danger, info, setVerbose } from "../globals.js"; @@ -18,6 +19,7 @@ import { import { defaultRuntime } from "../runtime.js"; import { VERSION } from "../version.js"; import { + DEFAULT_HEARTBEAT_SECONDS, resolveHeartbeatSeconds, resolveReconnectPolicy, } from "../web/reconnect.js"; @@ -569,6 +571,28 @@ Examples: } }); + program + .command("health") + .description("Probe WhatsApp Web health (creds + Baileys connect) and session store") + .option("--json", "Output JSON instead of text", false) + .option("--timeout ", "Connection timeout in milliseconds", "10000") + .option("--verbose", "Verbose logging", false) + .action(async (opts) => { + setVerbose(Boolean(opts.verbose)); + const timeout = opts.timeout ? Number.parseInt(String(opts.timeout), 10) : undefined; + if (timeout !== undefined && (Number.isNaN(timeout) || timeout <= 0)) { + defaultRuntime.error("--timeout must be a positive integer (milliseconds)"); + defaultRuntime.exit(1); + return; + } + try { + await healthCommand({ json: Boolean(opts.json), timeoutMs: timeout }, defaultRuntime); + } catch (err) { + defaultRuntime.error(String(err)); + defaultRuntime.exit(1); + } + }); + program .command("sessions") .description("List stored conversation sessions") diff --git a/src/commands/health.test.ts b/src/commands/health.test.ts new file mode 100644 index 000000000..572a2097f --- /dev/null +++ b/src/commands/health.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it, vi, beforeEach } from "vitest"; + +import { healthCommand } from "./health.js"; + +const runtime = { + log: vi.fn(), + error: vi.fn(), + exit: vi.fn(), +}; + +vi.mock("../config/config.js", () => ({ + loadConfig: () => ({ web: {}, inbound: {} }), +})); + +vi.mock("../config/sessions.js", () => ({ + resolveStorePath: vi.fn(() => "/tmp/sessions.json"), + loadSessionStore: vi.fn(() => ({ + "+1555": { updatedAt: Date.now() - 60_000 }, + })), +})); + +const waitForWaConnection = vi.fn(); +const webAuthExists = vi.fn(); + +vi.mock("../web/session.js", () => ({ + createWaSocket: vi.fn(async () => ({ ws: { close: vi.fn() }, ev: { on: vi.fn() } })), + waitForWaConnection: (...args: unknown[]) => waitForWaConnection(...args), + webAuthExists: (...args: unknown[]) => webAuthExists(...args), + getStatusCode: () => undefined, + getWebAuthAgeMs: () => 5000, + logWebSelfId: vi.fn(), +})); + +vi.mock("../web/reconnect.js", () => ({ + resolveHeartbeatSeconds: () => 60, +})); + +describe("healthCommand", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("outputs JSON when linked and connect succeeds", async () => { + webAuthExists.mockResolvedValue(true); + waitForWaConnection.mockResolvedValue(undefined); + + await healthCommand({ json: true, timeoutMs: 5000 }, runtime as never); + + expect(runtime.exit).not.toHaveBeenCalled(); + const logged = runtime.log.mock.calls[0][0] as string; + const parsed = JSON.parse(logged); + expect(parsed.web.linked).toBe(true); + expect(parsed.web.connect.ok).toBe(true); + expect(parsed.sessions.count).toBe(1); + }); + + it("exits non-zero when not linked", async () => { + webAuthExists.mockResolvedValue(false); + await healthCommand({ json: true }, runtime as never); + expect(runtime.exit).toHaveBeenCalledWith(1); + }); +}); diff --git a/src/commands/health.ts b/src/commands/health.ts new file mode 100644 index 000000000..4c0069ca2 --- /dev/null +++ b/src/commands/health.ts @@ -0,0 +1,146 @@ +import fs from "node:fs"; +import path from "node:path"; + +import { loadConfig } from "../config/config.js"; +import { + loadSessionStore, + resolveStorePath, +} from "../config/sessions.js"; +import { info } from "../globals.js"; +import type { RuntimeEnv } from "../runtime.js"; +import { resolveHeartbeatSeconds } from "../web/reconnect.js"; +import { + createWaSocket, + getStatusCode, + getWebAuthAgeMs, + logWebSelfId, + waitForWaConnection, + webAuthExists, +} from "../web/session.js"; + +type HealthConnect = { + ok: boolean; + status?: number | null; + error?: string | null; + elapsedMs: number; +}; + +type HealthSummary = { + ts: number; + durationMs: number; + web: { + linked: boolean; + authAgeMs: number | null; + connect?: HealthConnect; + }; + heartbeatSeconds: number; + sessions: { + path: string; + count: number; + recent: Array<{ key: string; updatedAt: number | null; age: number | null }>; + }; + ipc: { path: string; exists: boolean }; +}; + +const DEFAULT_TIMEOUT_MS = 10_000; + +async function probeWebConnect(timeoutMs: number): Promise { + const started = Date.now(); + const sock = await createWaSocket(false, false); + try { + await Promise.race([ + waitForWaConnection(sock), + new Promise((_resolve, reject) => + setTimeout(() => reject(new Error("timeout")), timeoutMs), + ), + ]); + return { ok: true, status: null, error: null, elapsedMs: Date.now() - started }; + } catch (err) { + return { + ok: false, + status: getStatusCode(err), + error: err instanceof Error ? err.message : String(err), + elapsedMs: Date.now() - started, + }; + } finally { + try { + sock.ws?.close(); + } catch { + // ignore + } + } +} + +export async function healthCommand( + opts: { json?: boolean; timeoutMs?: number }, + runtime: RuntimeEnv, +) { + const cfg = loadConfig(); + const linked = await webAuthExists(); + const authAgeMs = getWebAuthAgeMs(); + const heartbeatSeconds = resolveHeartbeatSeconds(cfg, undefined); + const storePath = resolveStorePath(cfg.inbound?.reply?.session?.store); + const store = loadSessionStore(storePath); + const sessions = Object.entries(store) + .filter(([key]) => key !== "global" && key !== "unknown") + .map(([key, entry]) => ({ key, updatedAt: entry?.updatedAt ?? 0 })) + .sort((a, b) => b.updatedAt - a.updatedAt); + const recent = sessions.slice(0, 5).map((s) => ({ + key: s.key, + updatedAt: s.updatedAt || null, + age: s.updatedAt ? Date.now() - s.updatedAt : null, + })); + + const ipcPath = path.join(process.env.HOME ?? "", ".clawdis", "clawdis.sock"); + const ipcExists = Boolean(ipcPath) && fs.existsSync(ipcPath); + + const start = Date.now(); + const timeoutMs = Math.max(1000, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS); + const connect = linked ? await probeWebConnect(timeoutMs) : undefined; + + const summary: HealthSummary = { + ts: Date.now(), + durationMs: Date.now() - start, + web: { linked, authAgeMs, connect }, + heartbeatSeconds, + sessions: { + path: storePath, + count: sessions.length, + recent, + }, + ipc: { path: ipcPath, exists: ipcExists }, + }; + + const fatal = !linked || (connect && !connect.ok); + + if (opts.json) { + runtime.log(JSON.stringify(summary, null, 2)); + } else { + runtime.log( + linked + ? `Web: linked (auth age ${authAgeMs ? `${Math.round(authAgeMs / 60000)}m` : "unknown"})` + : "Web: not linked (run clawdis login)", + ); + if (linked) { + logWebSelfId(runtime, true); + } + if (connect) { + const base = connect.ok + ? info(`Connect: ok (${connect.elapsedMs}ms)`) : `Connect: failed (${connect.status ?? "unknown"})`; + runtime.log(base + (connect.error ? ` - ${connect.error}` : "")); + } + runtime.log(info(`Heartbeat interval: ${heartbeatSeconds}s`)); + runtime.log(info(`Session store: ${storePath} (${sessions.length} entries)`)); + if (recent.length > 0) { + runtime.log("Recent sessions:"); + for (const r of recent) { + runtime.log(`- ${r.key} (${r.updatedAt ? `${Math.round((Date.now() - r.updatedAt) / 60000)}m ago` : "no activity"})`); + } + } + runtime.log(info(`IPC socket: ${ipcExists ? "present" : "missing"} (${ipcPath})`)); + } + + if (fatal) { + runtime.exit(1); + } +}