CLI: add health probe command
This commit is contained in:
23
docs/health.md
Normal file
23
docs/health.md
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# Health Checks (CLI)
|
||||||
|
|
||||||
|
Short guide to verify the WhatsApp Web / Baileys stack without guessing.
|
||||||
|
|
||||||
|
## Quick checks
|
||||||
|
- `pnpm clawdis status --json` — confirms creds exist (`web.linked`), shows auth age (`authAgeMs`), heartbeat interval, and where the session store lives.
|
||||||
|
- `pnpm clawdis heartbeat --verbose --dry-run` — runs the heartbeat path end-to-end (session resolution, message creation) without sending anything. Drop `--dry-run` or add `--message "Ping"` to actually send.
|
||||||
|
- `pnpm clawdis relay --verbose --heartbeat-now` — spins the full monitor loop, fires a heartbeat immediately, and will reconnect per `web.reconnect` settings. Good for soak testing.
|
||||||
|
- Logs: tail `/tmp/clawdis/clawdis.log` and filter for `web-heartbeat`, `web-reconnect`, `web-auto-reply`, `web-inbound`.
|
||||||
|
|
||||||
|
## Deep diagnostics
|
||||||
|
- Creds on disk: `ls -l ~/.clawdis/credentials/creds.json` (mtime should be recent).
|
||||||
|
- Session store: `ls -l ~/.clawdis/sessions.json` (path can be overridden in config). Count and recent recipients are surfaced via `status`.
|
||||||
|
- IPC socket (if relay is running): `ls -l ~/.clawdis/clawdis.sock`.
|
||||||
|
- Relink flow: `pnpm clawdis logout && pnpm clawdis login --provider web --verbose` when status codes 409–515 or `loggedOut` appear in logs.
|
||||||
|
|
||||||
|
## When something fails
|
||||||
|
- `logged out` or status 409–515 → relink with `clawdis logout` then `clawdis login --provider web`.
|
||||||
|
- Repeated reconnect exits → tune `web.reconnect` (flags: `--web-retries`, `--web-retry-initial`, `--web-retry-max`) and rerun relay.
|
||||||
|
- No inbound messages → confirm linked phone is online and sender is allowed; use `pnpm clawdis heartbeat --all --verbose` to test each known recipient.
|
||||||
|
|
||||||
|
## Planned "health" command
|
||||||
|
A dedicated `clawdis health --json` probe (connect-only, no sends) is planned to report: linked creds, auth age, Baileys connect result/status code, session-store summary, and IPC presence. Until it lands, use the checks above.
|
||||||
22
docs/mac/health.md
Normal file
22
docs/mac/health.md
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# Health Checks on macOS
|
||||||
|
|
||||||
|
How to see whether the WhatsApp Web/Baileys bridge is healthy from the menu bar app.
|
||||||
|
|
||||||
|
## Menu bar (planned)
|
||||||
|
- Status dot expands beyond “relay running” to reflect Baileys health:
|
||||||
|
- Green: linked + socket opened recently.
|
||||||
|
- Orange: connecting/retrying.
|
||||||
|
- Red: logged out or probe failed.
|
||||||
|
- Secondary line reads "Web: linked · auth 12m · socket ok" or shows the failure reason.
|
||||||
|
- "Run Health Check" menu item triggers an on-demand probe.
|
||||||
|
|
||||||
|
## Settings (planned)
|
||||||
|
- General tab gains a Health card showing: linked E.164, auth age, session-store path/count, last check time, last error/status code, and buttons for Run Health Check / Reveal Logs / Relink.
|
||||||
|
- Uses a cached snapshot so the UI loads instantly and falls back gracefully when offline.
|
||||||
|
|
||||||
|
## How the probe works (planned)
|
||||||
|
- App runs `clawdis health --json` via `ShellRunner` every ~60s and on demand. The probe loads creds, attempts a short Baileys connect, and reports status without sending messages.
|
||||||
|
- Cache the last good snapshot and the last error separately to avoid flicker; show the timestamp of each.
|
||||||
|
|
||||||
|
## Until the UI ships
|
||||||
|
- Use the CLI flow in `docs/health.md` (status, heartbeat dry-run, relay heartbeat) and tail `/tmp/clawdis/clawdis.log` for `web-heartbeat` / `web-reconnect`.
|
||||||
@@ -3,6 +3,7 @@ import { Command } from "commander";
|
|||||||
import { agentCommand } from "../commands/agent.js";
|
import { agentCommand } from "../commands/agent.js";
|
||||||
import { sendCommand } from "../commands/send.js";
|
import { sendCommand } from "../commands/send.js";
|
||||||
import { sessionsCommand } from "../commands/sessions.js";
|
import { sessionsCommand } from "../commands/sessions.js";
|
||||||
|
import { healthCommand } from "../commands/health.js";
|
||||||
import { statusCommand } from "../commands/status.js";
|
import { statusCommand } from "../commands/status.js";
|
||||||
import { loadConfig } from "../config/config.js";
|
import { loadConfig } from "../config/config.js";
|
||||||
import { danger, info, setVerbose } from "../globals.js";
|
import { danger, info, setVerbose } from "../globals.js";
|
||||||
@@ -18,6 +19,7 @@ import {
|
|||||||
import { defaultRuntime } from "../runtime.js";
|
import { defaultRuntime } from "../runtime.js";
|
||||||
import { VERSION } from "../version.js";
|
import { VERSION } from "../version.js";
|
||||||
import {
|
import {
|
||||||
|
DEFAULT_HEARTBEAT_SECONDS,
|
||||||
resolveHeartbeatSeconds,
|
resolveHeartbeatSeconds,
|
||||||
resolveReconnectPolicy,
|
resolveReconnectPolicy,
|
||||||
} from "../web/reconnect.js";
|
} from "../web/reconnect.js";
|
||||||
@@ -569,6 +571,28 @@ Examples:
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
program
|
||||||
|
.command("health")
|
||||||
|
.description("Probe WhatsApp Web health (creds + Baileys connect) and session store")
|
||||||
|
.option("--json", "Output JSON instead of text", false)
|
||||||
|
.option("--timeout <ms>", "Connection timeout in milliseconds", "10000")
|
||||||
|
.option("--verbose", "Verbose logging", false)
|
||||||
|
.action(async (opts) => {
|
||||||
|
setVerbose(Boolean(opts.verbose));
|
||||||
|
const timeout = opts.timeout ? Number.parseInt(String(opts.timeout), 10) : undefined;
|
||||||
|
if (timeout !== undefined && (Number.isNaN(timeout) || timeout <= 0)) {
|
||||||
|
defaultRuntime.error("--timeout must be a positive integer (milliseconds)");
|
||||||
|
defaultRuntime.exit(1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await healthCommand({ json: Boolean(opts.json), timeoutMs: timeout }, defaultRuntime);
|
||||||
|
} catch (err) {
|
||||||
|
defaultRuntime.error(String(err));
|
||||||
|
defaultRuntime.exit(1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
program
|
program
|
||||||
.command("sessions")
|
.command("sessions")
|
||||||
.description("List stored conversation sessions")
|
.description("List stored conversation sessions")
|
||||||
|
|||||||
62
src/commands/health.test.ts
Normal file
62
src/commands/health.test.ts
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
import { describe, expect, it, vi, beforeEach } from "vitest";
|
||||||
|
|
||||||
|
import { healthCommand } from "./health.js";
|
||||||
|
|
||||||
|
const runtime = {
|
||||||
|
log: vi.fn(),
|
||||||
|
error: vi.fn(),
|
||||||
|
exit: vi.fn(),
|
||||||
|
};
|
||||||
|
|
||||||
|
vi.mock("../config/config.js", () => ({
|
||||||
|
loadConfig: () => ({ web: {}, inbound: {} }),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock("../config/sessions.js", () => ({
|
||||||
|
resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
|
||||||
|
loadSessionStore: vi.fn(() => ({
|
||||||
|
"+1555": { updatedAt: Date.now() - 60_000 },
|
||||||
|
})),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const waitForWaConnection = vi.fn();
|
||||||
|
const webAuthExists = vi.fn();
|
||||||
|
|
||||||
|
vi.mock("../web/session.js", () => ({
|
||||||
|
createWaSocket: vi.fn(async () => ({ ws: { close: vi.fn() }, ev: { on: vi.fn() } })),
|
||||||
|
waitForWaConnection: (...args: unknown[]) => waitForWaConnection(...args),
|
||||||
|
webAuthExists: (...args: unknown[]) => webAuthExists(...args),
|
||||||
|
getStatusCode: () => undefined,
|
||||||
|
getWebAuthAgeMs: () => 5000,
|
||||||
|
logWebSelfId: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock("../web/reconnect.js", () => ({
|
||||||
|
resolveHeartbeatSeconds: () => 60,
|
||||||
|
}));
|
||||||
|
|
||||||
|
describe("healthCommand", () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("outputs JSON when linked and connect succeeds", async () => {
|
||||||
|
webAuthExists.mockResolvedValue(true);
|
||||||
|
waitForWaConnection.mockResolvedValue(undefined);
|
||||||
|
|
||||||
|
await healthCommand({ json: true, timeoutMs: 5000 }, runtime as never);
|
||||||
|
|
||||||
|
expect(runtime.exit).not.toHaveBeenCalled();
|
||||||
|
const logged = runtime.log.mock.calls[0][0] as string;
|
||||||
|
const parsed = JSON.parse(logged);
|
||||||
|
expect(parsed.web.linked).toBe(true);
|
||||||
|
expect(parsed.web.connect.ok).toBe(true);
|
||||||
|
expect(parsed.sessions.count).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("exits non-zero when not linked", async () => {
|
||||||
|
webAuthExists.mockResolvedValue(false);
|
||||||
|
await healthCommand({ json: true }, runtime as never);
|
||||||
|
expect(runtime.exit).toHaveBeenCalledWith(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
146
src/commands/health.ts
Normal file
146
src/commands/health.ts
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
import fs from "node:fs";
|
||||||
|
import path from "node:path";
|
||||||
|
|
||||||
|
import { loadConfig } from "../config/config.js";
|
||||||
|
import {
|
||||||
|
loadSessionStore,
|
||||||
|
resolveStorePath,
|
||||||
|
} from "../config/sessions.js";
|
||||||
|
import { info } from "../globals.js";
|
||||||
|
import type { RuntimeEnv } from "../runtime.js";
|
||||||
|
import { resolveHeartbeatSeconds } from "../web/reconnect.js";
|
||||||
|
import {
|
||||||
|
createWaSocket,
|
||||||
|
getStatusCode,
|
||||||
|
getWebAuthAgeMs,
|
||||||
|
logWebSelfId,
|
||||||
|
waitForWaConnection,
|
||||||
|
webAuthExists,
|
||||||
|
} from "../web/session.js";
|
||||||
|
|
||||||
|
type HealthConnect = {
|
||||||
|
ok: boolean;
|
||||||
|
status?: number | null;
|
||||||
|
error?: string | null;
|
||||||
|
elapsedMs: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type HealthSummary = {
|
||||||
|
ts: number;
|
||||||
|
durationMs: number;
|
||||||
|
web: {
|
||||||
|
linked: boolean;
|
||||||
|
authAgeMs: number | null;
|
||||||
|
connect?: HealthConnect;
|
||||||
|
};
|
||||||
|
heartbeatSeconds: number;
|
||||||
|
sessions: {
|
||||||
|
path: string;
|
||||||
|
count: number;
|
||||||
|
recent: Array<{ key: string; updatedAt: number | null; age: number | null }>;
|
||||||
|
};
|
||||||
|
ipc: { path: string; exists: boolean };
|
||||||
|
};
|
||||||
|
|
||||||
|
const DEFAULT_TIMEOUT_MS = 10_000;
|
||||||
|
|
||||||
|
async function probeWebConnect(timeoutMs: number): Promise<HealthConnect> {
|
||||||
|
const started = Date.now();
|
||||||
|
const sock = await createWaSocket(false, false);
|
||||||
|
try {
|
||||||
|
await Promise.race([
|
||||||
|
waitForWaConnection(sock),
|
||||||
|
new Promise((_resolve, reject) =>
|
||||||
|
setTimeout(() => reject(new Error("timeout")), timeoutMs),
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
return { ok: true, status: null, error: null, elapsedMs: Date.now() - started };
|
||||||
|
} catch (err) {
|
||||||
|
return {
|
||||||
|
ok: false,
|
||||||
|
status: getStatusCode(err),
|
||||||
|
error: err instanceof Error ? err.message : String(err),
|
||||||
|
elapsedMs: Date.now() - started,
|
||||||
|
};
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
sock.ws?.close();
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function healthCommand(
|
||||||
|
opts: { json?: boolean; timeoutMs?: number },
|
||||||
|
runtime: RuntimeEnv,
|
||||||
|
) {
|
||||||
|
const cfg = loadConfig();
|
||||||
|
const linked = await webAuthExists();
|
||||||
|
const authAgeMs = getWebAuthAgeMs();
|
||||||
|
const heartbeatSeconds = resolveHeartbeatSeconds(cfg, undefined);
|
||||||
|
const storePath = resolveStorePath(cfg.inbound?.reply?.session?.store);
|
||||||
|
const store = loadSessionStore(storePath);
|
||||||
|
const sessions = Object.entries(store)
|
||||||
|
.filter(([key]) => key !== "global" && key !== "unknown")
|
||||||
|
.map(([key, entry]) => ({ key, updatedAt: entry?.updatedAt ?? 0 }))
|
||||||
|
.sort((a, b) => b.updatedAt - a.updatedAt);
|
||||||
|
const recent = sessions.slice(0, 5).map((s) => ({
|
||||||
|
key: s.key,
|
||||||
|
updatedAt: s.updatedAt || null,
|
||||||
|
age: s.updatedAt ? Date.now() - s.updatedAt : null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const ipcPath = path.join(process.env.HOME ?? "", ".clawdis", "clawdis.sock");
|
||||||
|
const ipcExists = Boolean(ipcPath) && fs.existsSync(ipcPath);
|
||||||
|
|
||||||
|
const start = Date.now();
|
||||||
|
const timeoutMs = Math.max(1000, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS);
|
||||||
|
const connect = linked ? await probeWebConnect(timeoutMs) : undefined;
|
||||||
|
|
||||||
|
const summary: HealthSummary = {
|
||||||
|
ts: Date.now(),
|
||||||
|
durationMs: Date.now() - start,
|
||||||
|
web: { linked, authAgeMs, connect },
|
||||||
|
heartbeatSeconds,
|
||||||
|
sessions: {
|
||||||
|
path: storePath,
|
||||||
|
count: sessions.length,
|
||||||
|
recent,
|
||||||
|
},
|
||||||
|
ipc: { path: ipcPath, exists: ipcExists },
|
||||||
|
};
|
||||||
|
|
||||||
|
const fatal = !linked || (connect && !connect.ok);
|
||||||
|
|
||||||
|
if (opts.json) {
|
||||||
|
runtime.log(JSON.stringify(summary, null, 2));
|
||||||
|
} else {
|
||||||
|
runtime.log(
|
||||||
|
linked
|
||||||
|
? `Web: linked (auth age ${authAgeMs ? `${Math.round(authAgeMs / 60000)}m` : "unknown"})`
|
||||||
|
: "Web: not linked (run clawdis login)",
|
||||||
|
);
|
||||||
|
if (linked) {
|
||||||
|
logWebSelfId(runtime, true);
|
||||||
|
}
|
||||||
|
if (connect) {
|
||||||
|
const base = connect.ok
|
||||||
|
? info(`Connect: ok (${connect.elapsedMs}ms)`) : `Connect: failed (${connect.status ?? "unknown"})`;
|
||||||
|
runtime.log(base + (connect.error ? ` - ${connect.error}` : ""));
|
||||||
|
}
|
||||||
|
runtime.log(info(`Heartbeat interval: ${heartbeatSeconds}s`));
|
||||||
|
runtime.log(info(`Session store: ${storePath} (${sessions.length} entries)`));
|
||||||
|
if (recent.length > 0) {
|
||||||
|
runtime.log("Recent sessions:");
|
||||||
|
for (const r of recent) {
|
||||||
|
runtime.log(`- ${r.key} (${r.updatedAt ? `${Math.round((Date.now() - r.updatedAt) / 60000)}m ago` : "no activity"})`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
runtime.log(info(`IPC socket: ${ipcExists ? "present" : "missing"} (${ipcPath})`));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fatal) {
|
||||||
|
runtime.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user