CLI: add health probe command
This commit is contained in:
23
docs/health.md
Normal file
23
docs/health.md
Normal file
@@ -0,0 +1,23 @@
|
||||
# Health Checks (CLI)
|
||||
|
||||
Short guide to verify the WhatsApp Web / Baileys stack without guessing.
|
||||
|
||||
## Quick checks
|
||||
- `pnpm clawdis status --json` — confirms creds exist (`web.linked`), shows auth age (`authAgeMs`), heartbeat interval, and where the session store lives.
|
||||
- `pnpm clawdis heartbeat --verbose --dry-run` — runs the heartbeat path end-to-end (session resolution, message creation) without sending anything. Drop `--dry-run` or add `--message "Ping"` to actually send.
|
||||
- `pnpm clawdis relay --verbose --heartbeat-now` — spins the full monitor loop, fires a heartbeat immediately, and will reconnect per `web.reconnect` settings. Good for soak testing.
|
||||
- Logs: tail `/tmp/clawdis/clawdis.log` and filter for `web-heartbeat`, `web-reconnect`, `web-auto-reply`, `web-inbound`.
|
||||
|
||||
## Deep diagnostics
|
||||
- Creds on disk: `ls -l ~/.clawdis/credentials/creds.json` (mtime should be recent).
|
||||
- Session store: `ls -l ~/.clawdis/sessions.json` (path can be overridden in config). Count and recent recipients are surfaced via `status`.
|
||||
- IPC socket (if relay is running): `ls -l ~/.clawdis/clawdis.sock`.
|
||||
- Relink flow: `pnpm clawdis logout && pnpm clawdis login --provider web --verbose` when status codes 409–515 or `loggedOut` appear in logs.
|
||||
|
||||
## When something fails
|
||||
- `logged out` or status 409–515 → relink with `clawdis logout` then `clawdis login --provider web`.
|
||||
- Repeated reconnect exits → tune `web.reconnect` (flags: `--web-retries`, `--web-retry-initial`, `--web-retry-max`) and rerun relay.
|
||||
- No inbound messages → confirm linked phone is online and sender is allowed; use `pnpm clawdis heartbeat --all --verbose` to test each known recipient.
|
||||
|
||||
## Planned "health" command
|
||||
A dedicated `clawdis health --json` probe (connect-only, no sends) is planned to report: linked creds, auth age, Baileys connect result/status code, session-store summary, and IPC presence. Until it lands, use the checks above.
|
||||
22
docs/mac/health.md
Normal file
22
docs/mac/health.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Health Checks on macOS
|
||||
|
||||
How to see whether the WhatsApp Web/Baileys bridge is healthy from the menu bar app.
|
||||
|
||||
## Menu bar (planned)
|
||||
- Status dot expands beyond “relay running” to reflect Baileys health:
|
||||
- Green: linked + socket opened recently.
|
||||
- Orange: connecting/retrying.
|
||||
- Red: logged out or probe failed.
|
||||
- Secondary line reads "Web: linked · auth 12m · socket ok" or shows the failure reason.
|
||||
- "Run Health Check" menu item triggers an on-demand probe.
|
||||
|
||||
## Settings (planned)
|
||||
- General tab gains a Health card showing: linked E.164, auth age, session-store path/count, last check time, last error/status code, and buttons for Run Health Check / Reveal Logs / Relink.
|
||||
- Uses a cached snapshot so the UI loads instantly and falls back gracefully when offline.
|
||||
|
||||
## How the probe works (planned)
|
||||
- App runs `clawdis health --json` via `ShellRunner` every ~60s and on demand. The probe loads creds, attempts a short Baileys connect, and reports status without sending messages.
|
||||
- Cache the last good snapshot and the last error separately to avoid flicker; show the timestamp of each.
|
||||
|
||||
## Until the UI ships
|
||||
- Use the CLI flow in `docs/health.md` (status, heartbeat dry-run, relay heartbeat) and tail `/tmp/clawdis/clawdis.log` for `web-heartbeat` / `web-reconnect`.
|
||||
@@ -3,6 +3,7 @@ import { Command } from "commander";
|
||||
import { agentCommand } from "../commands/agent.js";
|
||||
import { sendCommand } from "../commands/send.js";
|
||||
import { sessionsCommand } from "../commands/sessions.js";
|
||||
import { healthCommand } from "../commands/health.js";
|
||||
import { statusCommand } from "../commands/status.js";
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import { danger, info, setVerbose } from "../globals.js";
|
||||
@@ -18,6 +19,7 @@ import {
|
||||
import { defaultRuntime } from "../runtime.js";
|
||||
import { VERSION } from "../version.js";
|
||||
import {
|
||||
DEFAULT_HEARTBEAT_SECONDS,
|
||||
resolveHeartbeatSeconds,
|
||||
resolveReconnectPolicy,
|
||||
} from "../web/reconnect.js";
|
||||
@@ -569,6 +571,28 @@ Examples:
|
||||
}
|
||||
});
|
||||
|
||||
program
|
||||
.command("health")
|
||||
.description("Probe WhatsApp Web health (creds + Baileys connect) and session store")
|
||||
.option("--json", "Output JSON instead of text", false)
|
||||
.option("--timeout <ms>", "Connection timeout in milliseconds", "10000")
|
||||
.option("--verbose", "Verbose logging", false)
|
||||
.action(async (opts) => {
|
||||
setVerbose(Boolean(opts.verbose));
|
||||
const timeout = opts.timeout ? Number.parseInt(String(opts.timeout), 10) : undefined;
|
||||
if (timeout !== undefined && (Number.isNaN(timeout) || timeout <= 0)) {
|
||||
defaultRuntime.error("--timeout must be a positive integer (milliseconds)");
|
||||
defaultRuntime.exit(1);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await healthCommand({ json: Boolean(opts.json), timeoutMs: timeout }, defaultRuntime);
|
||||
} catch (err) {
|
||||
defaultRuntime.error(String(err));
|
||||
defaultRuntime.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
program
|
||||
.command("sessions")
|
||||
.description("List stored conversation sessions")
|
||||
|
||||
62
src/commands/health.test.ts
Normal file
62
src/commands/health.test.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
import { describe, expect, it, vi, beforeEach } from "vitest";
|
||||
|
||||
import { healthCommand } from "./health.js";
|
||||
|
||||
const runtime = {
|
||||
log: vi.fn(),
|
||||
error: vi.fn(),
|
||||
exit: vi.fn(),
|
||||
};
|
||||
|
||||
vi.mock("../config/config.js", () => ({
|
||||
loadConfig: () => ({ web: {}, inbound: {} }),
|
||||
}));
|
||||
|
||||
vi.mock("../config/sessions.js", () => ({
|
||||
resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
|
||||
loadSessionStore: vi.fn(() => ({
|
||||
"+1555": { updatedAt: Date.now() - 60_000 },
|
||||
})),
|
||||
}));
|
||||
|
||||
const waitForWaConnection = vi.fn();
|
||||
const webAuthExists = vi.fn();
|
||||
|
||||
vi.mock("../web/session.js", () => ({
|
||||
createWaSocket: vi.fn(async () => ({ ws: { close: vi.fn() }, ev: { on: vi.fn() } })),
|
||||
waitForWaConnection: (...args: unknown[]) => waitForWaConnection(...args),
|
||||
webAuthExists: (...args: unknown[]) => webAuthExists(...args),
|
||||
getStatusCode: () => undefined,
|
||||
getWebAuthAgeMs: () => 5000,
|
||||
logWebSelfId: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock("../web/reconnect.js", () => ({
|
||||
resolveHeartbeatSeconds: () => 60,
|
||||
}));
|
||||
|
||||
describe("healthCommand", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("outputs JSON when linked and connect succeeds", async () => {
|
||||
webAuthExists.mockResolvedValue(true);
|
||||
waitForWaConnection.mockResolvedValue(undefined);
|
||||
|
||||
await healthCommand({ json: true, timeoutMs: 5000 }, runtime as never);
|
||||
|
||||
expect(runtime.exit).not.toHaveBeenCalled();
|
||||
const logged = runtime.log.mock.calls[0][0] as string;
|
||||
const parsed = JSON.parse(logged);
|
||||
expect(parsed.web.linked).toBe(true);
|
||||
expect(parsed.web.connect.ok).toBe(true);
|
||||
expect(parsed.sessions.count).toBe(1);
|
||||
});
|
||||
|
||||
it("exits non-zero when not linked", async () => {
|
||||
webAuthExists.mockResolvedValue(false);
|
||||
await healthCommand({ json: true }, runtime as never);
|
||||
expect(runtime.exit).toHaveBeenCalledWith(1);
|
||||
});
|
||||
});
|
||||
146
src/commands/health.ts
Normal file
146
src/commands/health.ts
Normal file
@@ -0,0 +1,146 @@
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import {
|
||||
loadSessionStore,
|
||||
resolveStorePath,
|
||||
} from "../config/sessions.js";
|
||||
import { info } from "../globals.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
import { resolveHeartbeatSeconds } from "../web/reconnect.js";
|
||||
import {
|
||||
createWaSocket,
|
||||
getStatusCode,
|
||||
getWebAuthAgeMs,
|
||||
logWebSelfId,
|
||||
waitForWaConnection,
|
||||
webAuthExists,
|
||||
} from "../web/session.js";
|
||||
|
||||
type HealthConnect = {
|
||||
ok: boolean;
|
||||
status?: number | null;
|
||||
error?: string | null;
|
||||
elapsedMs: number;
|
||||
};
|
||||
|
||||
type HealthSummary = {
|
||||
ts: number;
|
||||
durationMs: number;
|
||||
web: {
|
||||
linked: boolean;
|
||||
authAgeMs: number | null;
|
||||
connect?: HealthConnect;
|
||||
};
|
||||
heartbeatSeconds: number;
|
||||
sessions: {
|
||||
path: string;
|
||||
count: number;
|
||||
recent: Array<{ key: string; updatedAt: number | null; age: number | null }>;
|
||||
};
|
||||
ipc: { path: string; exists: boolean };
|
||||
};
|
||||
|
||||
const DEFAULT_TIMEOUT_MS = 10_000;
|
||||
|
||||
async function probeWebConnect(timeoutMs: number): Promise<HealthConnect> {
|
||||
const started = Date.now();
|
||||
const sock = await createWaSocket(false, false);
|
||||
try {
|
||||
await Promise.race([
|
||||
waitForWaConnection(sock),
|
||||
new Promise((_resolve, reject) =>
|
||||
setTimeout(() => reject(new Error("timeout")), timeoutMs),
|
||||
),
|
||||
]);
|
||||
return { ok: true, status: null, error: null, elapsedMs: Date.now() - started };
|
||||
} catch (err) {
|
||||
return {
|
||||
ok: false,
|
||||
status: getStatusCode(err),
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
elapsedMs: Date.now() - started,
|
||||
};
|
||||
} finally {
|
||||
try {
|
||||
sock.ws?.close();
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function healthCommand(
|
||||
opts: { json?: boolean; timeoutMs?: number },
|
||||
runtime: RuntimeEnv,
|
||||
) {
|
||||
const cfg = loadConfig();
|
||||
const linked = await webAuthExists();
|
||||
const authAgeMs = getWebAuthAgeMs();
|
||||
const heartbeatSeconds = resolveHeartbeatSeconds(cfg, undefined);
|
||||
const storePath = resolveStorePath(cfg.inbound?.reply?.session?.store);
|
||||
const store = loadSessionStore(storePath);
|
||||
const sessions = Object.entries(store)
|
||||
.filter(([key]) => key !== "global" && key !== "unknown")
|
||||
.map(([key, entry]) => ({ key, updatedAt: entry?.updatedAt ?? 0 }))
|
||||
.sort((a, b) => b.updatedAt - a.updatedAt);
|
||||
const recent = sessions.slice(0, 5).map((s) => ({
|
||||
key: s.key,
|
||||
updatedAt: s.updatedAt || null,
|
||||
age: s.updatedAt ? Date.now() - s.updatedAt : null,
|
||||
}));
|
||||
|
||||
const ipcPath = path.join(process.env.HOME ?? "", ".clawdis", "clawdis.sock");
|
||||
const ipcExists = Boolean(ipcPath) && fs.existsSync(ipcPath);
|
||||
|
||||
const start = Date.now();
|
||||
const timeoutMs = Math.max(1000, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS);
|
||||
const connect = linked ? await probeWebConnect(timeoutMs) : undefined;
|
||||
|
||||
const summary: HealthSummary = {
|
||||
ts: Date.now(),
|
||||
durationMs: Date.now() - start,
|
||||
web: { linked, authAgeMs, connect },
|
||||
heartbeatSeconds,
|
||||
sessions: {
|
||||
path: storePath,
|
||||
count: sessions.length,
|
||||
recent,
|
||||
},
|
||||
ipc: { path: ipcPath, exists: ipcExists },
|
||||
};
|
||||
|
||||
const fatal = !linked || (connect && !connect.ok);
|
||||
|
||||
if (opts.json) {
|
||||
runtime.log(JSON.stringify(summary, null, 2));
|
||||
} else {
|
||||
runtime.log(
|
||||
linked
|
||||
? `Web: linked (auth age ${authAgeMs ? `${Math.round(authAgeMs / 60000)}m` : "unknown"})`
|
||||
: "Web: not linked (run clawdis login)",
|
||||
);
|
||||
if (linked) {
|
||||
logWebSelfId(runtime, true);
|
||||
}
|
||||
if (connect) {
|
||||
const base = connect.ok
|
||||
? info(`Connect: ok (${connect.elapsedMs}ms)`) : `Connect: failed (${connect.status ?? "unknown"})`;
|
||||
runtime.log(base + (connect.error ? ` - ${connect.error}` : ""));
|
||||
}
|
||||
runtime.log(info(`Heartbeat interval: ${heartbeatSeconds}s`));
|
||||
runtime.log(info(`Session store: ${storePath} (${sessions.length} entries)`));
|
||||
if (recent.length > 0) {
|
||||
runtime.log("Recent sessions:");
|
||||
for (const r of recent) {
|
||||
runtime.log(`- ${r.key} (${r.updatedAt ? `${Math.round((Date.now() - r.updatedAt) / 60000)}m ago` : "no activity"})`);
|
||||
}
|
||||
}
|
||||
runtime.log(info(`IPC socket: ${ipcExists ? "present" : "missing"} (${ipcPath})`));
|
||||
}
|
||||
|
||||
if (fatal) {
|
||||
runtime.exit(1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user