fix: stabilize health probe and gateway handshake

This commit is contained in:
Peter Steinberger
2025-12-10 00:52:43 +00:00
parent f1fd25e95e
commit cf8b00890f
3 changed files with 28 additions and 12 deletions

View File

@@ -14,7 +14,9 @@ import { defaultRuntime } from "../runtime.js";
import { VERSION } from "../version.js";
import { startWebChatServer } from "../webchat/server.js";
import { createDefaultDeps } from "./deps.js";
import { forceFreePort, listPortListeners } from "./ports.js";
import { forceFreePort, listPortListeners, parseLsofOutput } from "./ports.js";
export { forceFreePort, listPortListeners, parseLsofOutput };
export function buildProgram() {
const program = new Command();
@@ -489,7 +491,7 @@ Examples:
.option(
"--probe",
"Also attempt a live Baileys connect (can conflict if gateway is already connected)",
false,
true,
)
.action(async (opts) => {
setVerbose(Boolean(opts.verbose));
@@ -508,7 +510,7 @@ Examples:
{
json: Boolean(opts.json),
timeoutMs: timeout,
probe: Boolean(opts.probe),
probe: opts.probe ?? true,
},
defaultRuntime,
);
@@ -564,6 +566,19 @@ Shows token usage per session when the agent reports it; set inbound.reply.agent
? Number.parseInt(String(opts.port), 10)
: undefined;
const server = await startWebChatServer(port);
if (!server) {
const targetPort = port ?? 18788;
const msg = `webchat failed to start on http://127.0.0.1:${targetPort}/`;
if (opts.json) {
defaultRuntime.error(
JSON.stringify({ error: msg, port: targetPort }),
);
} else {
defaultRuntime.error(danger(msg));
}
defaultRuntime.exit(1);
return;
}
const payload = {
port: server.port,
basePath: "/",

View File

@@ -76,7 +76,7 @@ async function probeWebConnect(timeoutMs: number): Promise<HealthConnect> {
const status = getStatusCode(err);
// Conflict/duplicate sessions are expected when the primary gateway session
// is already connected. Treat these as healthy so health checks dont flap.
if (status === 409 || status === 440 || status === 515) {
if (status === 409 || status === 515) {
return {
ok: true,
status,
@@ -239,8 +239,9 @@ export async function healthCommand(
opts: { json?: boolean; timeoutMs?: number; probe?: boolean },
runtime: RuntimeEnv,
) {
const probe = opts.probe ?? true;
const summary = await getHealthSnapshot(opts.timeoutMs, {
probe: opts.probe,
probe,
});
const fatal =
!summary.web.linked ||

View File

@@ -97,7 +97,7 @@ function buildSnapshot(): Snapshot {
const MAX_PAYLOAD_BYTES = 512 * 1024; // cap incoming frame size
const MAX_BUFFERED_BYTES = 1.5 * 1024 * 1024; // per-connection send buffer limit
const HANDSHAKE_TIMEOUT_MS = 10_000;
const HANDSHAKE_TIMEOUT_MS = 3_000;
const TICK_INTERVAL_MS = 30_000;
const HEALTH_REFRESH_INTERVAL_MS = 60_000;
const DEDUPE_TTL_MS = 5 * 60_000;
@@ -754,9 +754,11 @@ export async function startGatewayServer(port = 18789): Promise<GatewayServer> {
typeof a?.content === "string"
? a.content
: ArrayBuffer.isView(a?.content)
? Buffer.from(a.content as ArrayBufferLike).toString(
"base64",
)
? Buffer.from(
a.content.buffer,
a.content.byteOffset,
a.content.byteLength,
).toString("base64")
: undefined,
})) ?? [];
let messageWithAttachments = p.message;
@@ -1076,9 +1078,7 @@ export async function startGatewayServer(port = 18789): Promise<GatewayServer> {
defaultRuntime.log(`gateway log file: ${getResolvedLoggerSettings().file}`);
// Start loopback WebChat server (unless disabled via config).
void ensureWebChatServerFromConfig({
gatewayUrl: `ws://127.0.0.1:${port}`,
})
void ensureWebChatServerFromConfig()
.then((webchat) => {
if (webchat) {
defaultRuntime.log(