CLI: add health probe command

This commit is contained in:
Peter Steinberger
2025-12-07 04:33:22 +00:00
parent 78d96355dd
commit 2714ed503b
5 changed files with 277 additions and 0 deletions

23
docs/health.md Normal file
View File

@@ -0,0 +1,23 @@
# Health Checks (CLI)
Short guide to verify the WhatsApp Web / Baileys stack without guessing.
## Quick checks
- `pnpm clawdis status --json` — confirms creds exist (`web.linked`), shows auth age (`authAgeMs`), heartbeat interval, and where the session store lives.
- `pnpm clawdis heartbeat --verbose --dry-run` — runs the heartbeat path end-to-end (session resolution, message creation) without sending anything. Drop `--dry-run` or add `--message "Ping"` to actually send.
- `pnpm clawdis relay --verbose --heartbeat-now` — spins the full monitor loop, fires a heartbeat immediately, and will reconnect per `web.reconnect` settings. Good for soak testing.
- Logs: tail `/tmp/clawdis/clawdis.log` and filter for `web-heartbeat`, `web-reconnect`, `web-auto-reply`, `web-inbound`.
## Deep diagnostics
- Creds on disk: `ls -l ~/.clawdis/credentials/creds.json` (mtime should be recent).
- Session store: `ls -l ~/.clawdis/sessions.json` (path can be overridden in config). Count and recent recipients are surfaced via `status`.
- IPC socket (if relay is running): `ls -l ~/.clawdis/clawdis.sock`.
- Relink flow: `pnpm clawdis logout && pnpm clawdis login --provider web --verbose` when status codes 409515 or `loggedOut` appear in logs.
## When something fails
- `logged out` or status 409515 → relink with `clawdis logout` then `clawdis login --provider web`.
- Repeated reconnect exits → tune `web.reconnect` (flags: `--web-retries`, `--web-retry-initial`, `--web-retry-max`) and rerun relay.
- No inbound messages → confirm linked phone is online and sender is allowed; use `pnpm clawdis heartbeat --all --verbose` to test each known recipient.
## Planned "health" command
A dedicated `clawdis health --json` probe (connect-only, no sends) is planned to report: linked creds, auth age, Baileys connect result/status code, session-store summary, and IPC presence. Until it lands, use the checks above.

22
docs/mac/health.md Normal file
View File

@@ -0,0 +1,22 @@
# Health Checks on macOS
How to see whether the WhatsApp Web/Baileys bridge is healthy from the menu bar app.
## Menu bar (planned)
- Status dot expands beyond “relay running” to reflect Baileys health:
- Green: linked + socket opened recently.
- Orange: connecting/retrying.
- Red: logged out or probe failed.
- Secondary line reads "Web: linked · auth 12m · socket ok" or shows the failure reason.
- "Run Health Check" menu item triggers an on-demand probe.
## Settings (planned)
- General tab gains a Health card showing: linked E.164, auth age, session-store path/count, last check time, last error/status code, and buttons for Run Health Check / Reveal Logs / Relink.
- Uses a cached snapshot so the UI loads instantly and falls back gracefully when offline.
## How the probe works (planned)
- App runs `clawdis health --json` via `ShellRunner` every ~60s and on demand. The probe loads creds, attempts a short Baileys connect, and reports status without sending messages.
- Cache the last good snapshot and the last error separately to avoid flicker; show the timestamp of each.
## Until the UI ships
- Use the CLI flow in `docs/health.md` (status, heartbeat dry-run, relay heartbeat) and tail `/tmp/clawdis/clawdis.log` for `web-heartbeat` / `web-reconnect`.

View File

@@ -3,6 +3,7 @@ import { Command } from "commander";
import { agentCommand } from "../commands/agent.js";
import { sendCommand } from "../commands/send.js";
import { sessionsCommand } from "../commands/sessions.js";
import { healthCommand } from "../commands/health.js";
import { statusCommand } from "../commands/status.js";
import { loadConfig } from "../config/config.js";
import { danger, info, setVerbose } from "../globals.js";
@@ -18,6 +19,7 @@ import {
import { defaultRuntime } from "../runtime.js";
import { VERSION } from "../version.js";
import {
DEFAULT_HEARTBEAT_SECONDS,
resolveHeartbeatSeconds,
resolveReconnectPolicy,
} from "../web/reconnect.js";
@@ -569,6 +571,28 @@ Examples:
}
});
program
.command("health")
.description("Probe WhatsApp Web health (creds + Baileys connect) and session store")
.option("--json", "Output JSON instead of text", false)
.option("--timeout <ms>", "Connection timeout in milliseconds", "10000")
.option("--verbose", "Verbose logging", false)
.action(async (opts) => {
setVerbose(Boolean(opts.verbose));
const timeout = opts.timeout ? Number.parseInt(String(opts.timeout), 10) : undefined;
if (timeout !== undefined && (Number.isNaN(timeout) || timeout <= 0)) {
defaultRuntime.error("--timeout must be a positive integer (milliseconds)");
defaultRuntime.exit(1);
return;
}
try {
await healthCommand({ json: Boolean(opts.json), timeoutMs: timeout }, defaultRuntime);
} catch (err) {
defaultRuntime.error(String(err));
defaultRuntime.exit(1);
}
});
program
.command("sessions")
.description("List stored conversation sessions")

View File

@@ -0,0 +1,62 @@
import { describe, expect, it, vi, beforeEach } from "vitest";
import { healthCommand } from "./health.js";
const runtime = {
log: vi.fn(),
error: vi.fn(),
exit: vi.fn(),
};
vi.mock("../config/config.js", () => ({
loadConfig: () => ({ web: {}, inbound: {} }),
}));
vi.mock("../config/sessions.js", () => ({
resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
loadSessionStore: vi.fn(() => ({
"+1555": { updatedAt: Date.now() - 60_000 },
})),
}));
const waitForWaConnection = vi.fn();
const webAuthExists = vi.fn();
vi.mock("../web/session.js", () => ({
createWaSocket: vi.fn(async () => ({ ws: { close: vi.fn() }, ev: { on: vi.fn() } })),
waitForWaConnection: (...args: unknown[]) => waitForWaConnection(...args),
webAuthExists: (...args: unknown[]) => webAuthExists(...args),
getStatusCode: () => undefined,
getWebAuthAgeMs: () => 5000,
logWebSelfId: vi.fn(),
}));
vi.mock("../web/reconnect.js", () => ({
resolveHeartbeatSeconds: () => 60,
}));
describe("healthCommand", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("outputs JSON when linked and connect succeeds", async () => {
webAuthExists.mockResolvedValue(true);
waitForWaConnection.mockResolvedValue(undefined);
await healthCommand({ json: true, timeoutMs: 5000 }, runtime as never);
expect(runtime.exit).not.toHaveBeenCalled();
const logged = runtime.log.mock.calls[0][0] as string;
const parsed = JSON.parse(logged);
expect(parsed.web.linked).toBe(true);
expect(parsed.web.connect.ok).toBe(true);
expect(parsed.sessions.count).toBe(1);
});
it("exits non-zero when not linked", async () => {
webAuthExists.mockResolvedValue(false);
await healthCommand({ json: true }, runtime as never);
expect(runtime.exit).toHaveBeenCalledWith(1);
});
});

146
src/commands/health.ts Normal file
View File

@@ -0,0 +1,146 @@
import fs from "node:fs";
import path from "node:path";
import { loadConfig } from "../config/config.js";
import {
loadSessionStore,
resolveStorePath,
} from "../config/sessions.js";
import { info } from "../globals.js";
import type { RuntimeEnv } from "../runtime.js";
import { resolveHeartbeatSeconds } from "../web/reconnect.js";
import {
createWaSocket,
getStatusCode,
getWebAuthAgeMs,
logWebSelfId,
waitForWaConnection,
webAuthExists,
} from "../web/session.js";
type HealthConnect = {
ok: boolean;
status?: number | null;
error?: string | null;
elapsedMs: number;
};
type HealthSummary = {
ts: number;
durationMs: number;
web: {
linked: boolean;
authAgeMs: number | null;
connect?: HealthConnect;
};
heartbeatSeconds: number;
sessions: {
path: string;
count: number;
recent: Array<{ key: string; updatedAt: number | null; age: number | null }>;
};
ipc: { path: string; exists: boolean };
};
const DEFAULT_TIMEOUT_MS = 10_000;
async function probeWebConnect(timeoutMs: number): Promise<HealthConnect> {
const started = Date.now();
const sock = await createWaSocket(false, false);
try {
await Promise.race([
waitForWaConnection(sock),
new Promise((_resolve, reject) =>
setTimeout(() => reject(new Error("timeout")), timeoutMs),
),
]);
return { ok: true, status: null, error: null, elapsedMs: Date.now() - started };
} catch (err) {
return {
ok: false,
status: getStatusCode(err),
error: err instanceof Error ? err.message : String(err),
elapsedMs: Date.now() - started,
};
} finally {
try {
sock.ws?.close();
} catch {
// ignore
}
}
}
export async function healthCommand(
opts: { json?: boolean; timeoutMs?: number },
runtime: RuntimeEnv,
) {
const cfg = loadConfig();
const linked = await webAuthExists();
const authAgeMs = getWebAuthAgeMs();
const heartbeatSeconds = resolveHeartbeatSeconds(cfg, undefined);
const storePath = resolveStorePath(cfg.inbound?.reply?.session?.store);
const store = loadSessionStore(storePath);
const sessions = Object.entries(store)
.filter(([key]) => key !== "global" && key !== "unknown")
.map(([key, entry]) => ({ key, updatedAt: entry?.updatedAt ?? 0 }))
.sort((a, b) => b.updatedAt - a.updatedAt);
const recent = sessions.slice(0, 5).map((s) => ({
key: s.key,
updatedAt: s.updatedAt || null,
age: s.updatedAt ? Date.now() - s.updatedAt : null,
}));
const ipcPath = path.join(process.env.HOME ?? "", ".clawdis", "clawdis.sock");
const ipcExists = Boolean(ipcPath) && fs.existsSync(ipcPath);
const start = Date.now();
const timeoutMs = Math.max(1000, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS);
const connect = linked ? await probeWebConnect(timeoutMs) : undefined;
const summary: HealthSummary = {
ts: Date.now(),
durationMs: Date.now() - start,
web: { linked, authAgeMs, connect },
heartbeatSeconds,
sessions: {
path: storePath,
count: sessions.length,
recent,
},
ipc: { path: ipcPath, exists: ipcExists },
};
const fatal = !linked || (connect && !connect.ok);
if (opts.json) {
runtime.log(JSON.stringify(summary, null, 2));
} else {
runtime.log(
linked
? `Web: linked (auth age ${authAgeMs ? `${Math.round(authAgeMs / 60000)}m` : "unknown"})`
: "Web: not linked (run clawdis login)",
);
if (linked) {
logWebSelfId(runtime, true);
}
if (connect) {
const base = connect.ok
? info(`Connect: ok (${connect.elapsedMs}ms)`) : `Connect: failed (${connect.status ?? "unknown"})`;
runtime.log(base + (connect.error ? ` - ${connect.error}` : ""));
}
runtime.log(info(`Heartbeat interval: ${heartbeatSeconds}s`));
runtime.log(info(`Session store: ${storePath} (${sessions.length} entries)`));
if (recent.length > 0) {
runtime.log("Recent sessions:");
for (const r of recent) {
runtime.log(`- ${r.key} (${r.updatedAt ? `${Math.round((Date.now() - r.updatedAt) / 60000)}m ago` : "no activity"})`);
}
}
runtime.log(info(`IPC socket: ${ipcExists ? "present" : "missing"} (${ipcPath})`));
}
if (fatal) {
runtime.exit(1);
}
}