From 510915a8017b494e7dc1aef221acb7287d3123ca Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 15 Jan 2026 09:00:55 +0000 Subject: [PATCH] fix(onboarding): wait for gateway before health --- src/commands/configure.wizard.ts | 39 +++++++++++++++++-- src/commands/onboard-helpers.ts | 34 +++++++++++++++- src/commands/onboard-non-interactive/local.ts | 16 ++++++-- src/wizard/onboarding.finalize.ts | 16 +++++++- 4 files changed, 96 insertions(+), 9 deletions(-) diff --git a/src/commands/configure.wizard.ts b/src/commands/configure.wizard.ts index c09a6d8a8..e50c3eff8 100644 --- a/src/commands/configure.wizard.ts +++ b/src/commands/configure.wizard.ts @@ -9,7 +9,7 @@ import { ensureControlUiAssetsBuilt } from "../infra/control-ui-assets.js"; import type { RuntimeEnv } from "../runtime.js"; import { defaultRuntime } from "../runtime.js"; import { note } from "../terminal/note.js"; -import { resolveUserPath, sleep } from "../utils.js"; +import { resolveUserPath } from "../utils.js"; import { createClackPrompter } from "../wizard/clack-prompter.js"; import { WizardCancelledError } from "../wizard/prompts.js"; import { removeChannelConfigWizard } from "./configure.channels.js"; @@ -41,6 +41,7 @@ import { probeGatewayReachable, resolveControlUiLinks, summarizeExistingConfig, + waitForGatewayReachable, } from "./onboard-helpers.js"; import { promptRemoteGatewayConfig } from "./onboard-remote.js"; import { setupSkills } from "./onboard-skills.js"; @@ -354,7 +355,22 @@ export async function runConfigureWizard( } if (selected.includes("health")) { - await sleep(1000); + const localLinks = resolveControlUiLinks({ + bind: nextConfig.gateway?.bind ?? "loopback", + port: gatewayPort, + customBindHost: nextConfig.gateway?.customBindHost, + basePath: undefined, + }); + const remoteUrl = nextConfig.gateway?.remote?.url?.trim(); + const wsUrl = nextConfig.gateway?.mode === "remote" && remoteUrl ? remoteUrl : localLinks.wsUrl; + const token = nextConfig.gateway?.auth?.token ?? process.env.CLAWDBOT_GATEWAY_TOKEN; + const password = nextConfig.gateway?.auth?.password ?? process.env.CLAWDBOT_GATEWAY_PASSWORD; + await waitForGatewayReachable({ + url: wsUrl, + token, + password, + deadlineMs: 15_000, + }); try { await healthCommand({ json: false, timeoutMs: 10_000 }, runtime); } catch (err) { @@ -459,7 +475,24 @@ export async function runConfigureWizard( } if (choice === "health") { - await sleep(1000); + const localLinks = resolveControlUiLinks({ + bind: nextConfig.gateway?.bind ?? "loopback", + port: gatewayPort, + customBindHost: nextConfig.gateway?.customBindHost, + basePath: undefined, + }); + const remoteUrl = nextConfig.gateway?.remote?.url?.trim(); + const wsUrl = + nextConfig.gateway?.mode === "remote" && remoteUrl ? remoteUrl : localLinks.wsUrl; + const token = nextConfig.gateway?.auth?.token ?? process.env.CLAWDBOT_GATEWAY_TOKEN; + const password = + nextConfig.gateway?.auth?.password ?? process.env.CLAWDBOT_GATEWAY_PASSWORD; + await waitForGatewayReachable({ + url: wsUrl, + token, + password, + deadlineMs: 15_000, + }); try { await healthCommand({ json: false, timeoutMs: 10_000 }, runtime); } catch (err) { diff --git a/src/commands/onboard-helpers.ts b/src/commands/onboard-helpers.ts index 12f041557..90eb39c93 100644 --- a/src/commands/onboard-helpers.ts +++ b/src/commands/onboard-helpers.ts @@ -17,7 +17,7 @@ import { runCommandWithTimeout } from "../process/exec.js"; import type { RuntimeEnv } from "../runtime.js"; import { stylePromptTitle } from "../terminal/prompt-style.js"; import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js"; -import { CONFIG_DIR, resolveUserPath } from "../utils.js"; +import { CONFIG_DIR, resolveUserPath, sleep } from "../utils.js"; import { VERSION } from "../version.js"; import type { NodeManagerChoice, OnboardMode, ResetScope } from "./onboard-types.js"; @@ -333,6 +333,38 @@ export async function probeGatewayReachable(params: { } } +export async function waitForGatewayReachable(params: { + url: string; + token?: string; + password?: string; + /** Total time to wait before giving up. */ + deadlineMs?: number; + /** Per-probe timeout (each probe makes a full gateway health request). */ + probeTimeoutMs?: number; + /** Delay between probes. */ + pollMs?: number; +}): Promise<{ ok: boolean; detail?: string }> { + const deadlineMs = params.deadlineMs ?? 15_000; + const pollMs = params.pollMs ?? 400; + const probeTimeoutMs = params.probeTimeoutMs ?? 1500; + const startedAt = Date.now(); + let lastDetail: string | undefined; + + while (Date.now() - startedAt < deadlineMs) { + const probe = await probeGatewayReachable({ + url: params.url, + token: params.token, + password: params.password, + timeoutMs: probeTimeoutMs, + }); + if (probe.ok) return probe; + lastDetail = probe.detail; + await sleep(pollMs); + } + + return { ok: false, detail: lastDetail }; +} + function summarizeError(err: unknown): string { let raw = "unknown error"; if (err instanceof Error) { diff --git a/src/commands/onboard-non-interactive/local.ts b/src/commands/onboard-non-interactive/local.ts index 56b11b3b1..1d42a5faf 100644 --- a/src/commands/onboard-non-interactive/local.ts +++ b/src/commands/onboard-non-interactive/local.ts @@ -1,13 +1,14 @@ import type { ClawdbotConfig } from "../../config/config.js"; import { CONFIG_PATH_CLAWDBOT, resolveGatewayPort, writeConfigFile } from "../../config/config.js"; import type { RuntimeEnv } from "../../runtime.js"; -import { sleep } from "../../utils.js"; import { DEFAULT_GATEWAY_DAEMON_RUNTIME } from "../daemon-runtime.js"; import { healthCommand } from "../health.js"; import { applyWizardMetadata, DEFAULT_WORKSPACE, ensureWorkspaceAndSessions, + resolveControlUiLinks, + waitForGatewayReachable, } from "../onboard-helpers.js"; import type { OnboardOptions } from "../onboard-types.js"; @@ -88,8 +89,17 @@ export async function runNonInteractiveOnboardingLocal(params: { const daemonRuntimeRaw = opts.daemonRuntime ?? DEFAULT_GATEWAY_DAEMON_RUNTIME; if (!opts.skipHealth) { - await sleep(1000); - // Health check runs against the gateway; small delay avoids flakiness during install/start. + const links = resolveControlUiLinks({ + bind: gatewayResult.bind as "auto" | "lan" | "loopback" | "custom", + port: gatewayResult.port, + customBindHost: nextConfig.gateway?.customBindHost, + basePath: undefined, + }); + await waitForGatewayReachable({ + url: links.wsUrl, + token: gatewayResult.gatewayToken, + deadlineMs: 15_000, + }); await healthCommand({ json: false, timeoutMs: 10_000 }, runtime); } diff --git a/src/wizard/onboarding.finalize.ts b/src/wizard/onboarding.finalize.ts index 29b91239d..a2946343b 100644 --- a/src/wizard/onboarding.finalize.ts +++ b/src/wizard/onboarding.finalize.ts @@ -14,6 +14,7 @@ import { formatControlUiSshHint, openUrl, probeGatewayReachable, + waitForGatewayReachable, resolveControlUiLinks, } from "../commands/onboard-helpers.js"; import type { OnboardOptions } from "../commands/onboard-types.js"; @@ -31,7 +32,7 @@ import { isSystemdUserServiceAvailable } from "../daemon/systemd.js"; import { ensureControlUiAssetsBuilt } from "../infra/control-ui-assets.js"; import type { RuntimeEnv } from "../runtime.js"; import { runTui } from "../tui/tui.js"; -import { resolveUserPath, sleep } from "../utils.js"; +import { resolveUserPath } from "../utils.js"; import type { GatewayWizardSettings, WizardFlow } from "./onboarding.types.js"; import type { WizardPrompter } from "./prompts.js"; @@ -209,7 +210,18 @@ export async function finalizeOnboardingWizard(options: FinalizeOnboardingOption } if (!opts.skipHealth) { - await sleep(1500); + const probeLinks = resolveControlUiLinks({ + bind: nextConfig.gateway?.bind ?? "loopback", + port: settings.port, + customBindHost: nextConfig.gateway?.customBindHost, + basePath: undefined, + }); + // Daemon install/restart can briefly flap the WS; wait a bit so health check doesn't false-fail. + await waitForGatewayReachable({ + url: probeLinks.wsUrl, + token: settings.gatewayToken, + deadlineMs: 15_000, + }); try { await healthCommand({ json: false, timeoutMs: 10_000 }, runtime); } catch (err) {