fix(onboarding): wait for gateway before health

This commit is contained in:
Peter Steinberger
2026-01-15 09:00:55 +00:00
parent 609d029e20
commit 510915a801
4 changed files with 96 additions and 9 deletions

View File

@@ -9,7 +9,7 @@ import { ensureControlUiAssetsBuilt } from "../infra/control-ui-assets.js";
import type { RuntimeEnv } from "../runtime.js";
import { defaultRuntime } from "../runtime.js";
import { note } from "../terminal/note.js";
import { resolveUserPath, sleep } from "../utils.js";
import { resolveUserPath } from "../utils.js";
import { createClackPrompter } from "../wizard/clack-prompter.js";
import { WizardCancelledError } from "../wizard/prompts.js";
import { removeChannelConfigWizard } from "./configure.channels.js";
@@ -41,6 +41,7 @@ import {
probeGatewayReachable,
resolveControlUiLinks,
summarizeExistingConfig,
waitForGatewayReachable,
} from "./onboard-helpers.js";
import { promptRemoteGatewayConfig } from "./onboard-remote.js";
import { setupSkills } from "./onboard-skills.js";
@@ -354,7 +355,22 @@ export async function runConfigureWizard(
}
if (selected.includes("health")) {
await sleep(1000);
const localLinks = resolveControlUiLinks({
bind: nextConfig.gateway?.bind ?? "loopback",
port: gatewayPort,
customBindHost: nextConfig.gateway?.customBindHost,
basePath: undefined,
});
const remoteUrl = nextConfig.gateway?.remote?.url?.trim();
const wsUrl = nextConfig.gateway?.mode === "remote" && remoteUrl ? remoteUrl : localLinks.wsUrl;
const token = nextConfig.gateway?.auth?.token ?? process.env.CLAWDBOT_GATEWAY_TOKEN;
const password = nextConfig.gateway?.auth?.password ?? process.env.CLAWDBOT_GATEWAY_PASSWORD;
await waitForGatewayReachable({
url: wsUrl,
token,
password,
deadlineMs: 15_000,
});
try {
await healthCommand({ json: false, timeoutMs: 10_000 }, runtime);
} catch (err) {
@@ -459,7 +475,24 @@ export async function runConfigureWizard(
}
if (choice === "health") {
await sleep(1000);
const localLinks = resolveControlUiLinks({
bind: nextConfig.gateway?.bind ?? "loopback",
port: gatewayPort,
customBindHost: nextConfig.gateway?.customBindHost,
basePath: undefined,
});
const remoteUrl = nextConfig.gateway?.remote?.url?.trim();
const wsUrl =
nextConfig.gateway?.mode === "remote" && remoteUrl ? remoteUrl : localLinks.wsUrl;
const token = nextConfig.gateway?.auth?.token ?? process.env.CLAWDBOT_GATEWAY_TOKEN;
const password =
nextConfig.gateway?.auth?.password ?? process.env.CLAWDBOT_GATEWAY_PASSWORD;
await waitForGatewayReachable({
url: wsUrl,
token,
password,
deadlineMs: 15_000,
});
try {
await healthCommand({ json: false, timeoutMs: 10_000 }, runtime);
} catch (err) {

View File

@@ -17,7 +17,7 @@ import { runCommandWithTimeout } from "../process/exec.js";
import type { RuntimeEnv } from "../runtime.js";
import { stylePromptTitle } from "../terminal/prompt-style.js";
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js";
import { CONFIG_DIR, resolveUserPath } from "../utils.js";
import { CONFIG_DIR, resolveUserPath, sleep } from "../utils.js";
import { VERSION } from "../version.js";
import type { NodeManagerChoice, OnboardMode, ResetScope } from "./onboard-types.js";
@@ -333,6 +333,38 @@ export async function probeGatewayReachable(params: {
}
}
export async function waitForGatewayReachable(params: {
url: string;
token?: string;
password?: string;
/** Total time to wait before giving up. */
deadlineMs?: number;
/** Per-probe timeout (each probe makes a full gateway health request). */
probeTimeoutMs?: number;
/** Delay between probes. */
pollMs?: number;
}): Promise<{ ok: boolean; detail?: string }> {
const deadlineMs = params.deadlineMs ?? 15_000;
const pollMs = params.pollMs ?? 400;
const probeTimeoutMs = params.probeTimeoutMs ?? 1500;
const startedAt = Date.now();
let lastDetail: string | undefined;
while (Date.now() - startedAt < deadlineMs) {
const probe = await probeGatewayReachable({
url: params.url,
token: params.token,
password: params.password,
timeoutMs: probeTimeoutMs,
});
if (probe.ok) return probe;
lastDetail = probe.detail;
await sleep(pollMs);
}
return { ok: false, detail: lastDetail };
}
function summarizeError(err: unknown): string {
let raw = "unknown error";
if (err instanceof Error) {

View File

@@ -1,13 +1,14 @@
import type { ClawdbotConfig } from "../../config/config.js";
import { CONFIG_PATH_CLAWDBOT, resolveGatewayPort, writeConfigFile } from "../../config/config.js";
import type { RuntimeEnv } from "../../runtime.js";
import { sleep } from "../../utils.js";
import { DEFAULT_GATEWAY_DAEMON_RUNTIME } from "../daemon-runtime.js";
import { healthCommand } from "../health.js";
import {
applyWizardMetadata,
DEFAULT_WORKSPACE,
ensureWorkspaceAndSessions,
resolveControlUiLinks,
waitForGatewayReachable,
} from "../onboard-helpers.js";
import type { OnboardOptions } from "../onboard-types.js";
@@ -88,8 +89,17 @@ export async function runNonInteractiveOnboardingLocal(params: {
const daemonRuntimeRaw = opts.daemonRuntime ?? DEFAULT_GATEWAY_DAEMON_RUNTIME;
if (!opts.skipHealth) {
await sleep(1000);
// Health check runs against the gateway; small delay avoids flakiness during install/start.
const links = resolveControlUiLinks({
bind: gatewayResult.bind as "auto" | "lan" | "loopback" | "custom",
port: gatewayResult.port,
customBindHost: nextConfig.gateway?.customBindHost,
basePath: undefined,
});
await waitForGatewayReachable({
url: links.wsUrl,
token: gatewayResult.gatewayToken,
deadlineMs: 15_000,
});
await healthCommand({ json: false, timeoutMs: 10_000 }, runtime);
}

View File

@@ -14,6 +14,7 @@ import {
formatControlUiSshHint,
openUrl,
probeGatewayReachable,
waitForGatewayReachable,
resolveControlUiLinks,
} from "../commands/onboard-helpers.js";
import type { OnboardOptions } from "../commands/onboard-types.js";
@@ -31,7 +32,7 @@ import { isSystemdUserServiceAvailable } from "../daemon/systemd.js";
import { ensureControlUiAssetsBuilt } from "../infra/control-ui-assets.js";
import type { RuntimeEnv } from "../runtime.js";
import { runTui } from "../tui/tui.js";
import { resolveUserPath, sleep } from "../utils.js";
import { resolveUserPath } from "../utils.js";
import type { GatewayWizardSettings, WizardFlow } from "./onboarding.types.js";
import type { WizardPrompter } from "./prompts.js";
@@ -209,7 +210,18 @@ export async function finalizeOnboardingWizard(options: FinalizeOnboardingOption
}
if (!opts.skipHealth) {
await sleep(1500);
const probeLinks = resolveControlUiLinks({
bind: nextConfig.gateway?.bind ?? "loopback",
port: settings.port,
customBindHost: nextConfig.gateway?.customBindHost,
basePath: undefined,
});
// Daemon install/restart can briefly flap the WS; wait a bit so health check doesn't false-fail.
await waitForGatewayReachable({
url: probeLinks.wsUrl,
token: settings.gatewayToken,
deadlineMs: 15_000,
});
try {
await healthCommand({ json: false, timeoutMs: 10_000 }, runtime);
} catch (err) {