From ec27c813cc99a418415269bb63a83c5f2cc79521 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 18 Jan 2026 07:52:19 +0000 Subject: [PATCH] fix(fallback): handle timeout aborts Co-authored-by: Mykyta Bozhenko <21245729+cheeeee@users.noreply.github.com> --- CHANGELOG.md | 1 + src/agents/failover-error.ts | 24 ++++++++ src/agents/model-fallback.test.ts | 64 ++++++++++++++++++++ src/agents/model-fallback.ts | 16 ++++- src/agents/pi-embedded-runner/run/attempt.ts | 36 ++++++++--- 5 files changed, 129 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0469d9bbc..ff991ce05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ Docs: https://docs.clawd.bot - Exec approvals: enforce allowlist when ask is off; prefer raw command for node approvals/events. - Tools: return a companion-app-required message when node exec is requested with no paired node. - Streaming: emit assistant deltas for OpenAI-compatible SSE chunks. (#1147) — thanks @alauppe. +- Model fallback: treat timeout aborts as failover while preserving user aborts. (#1137) — thanks @cheeeee. ## 2026.1.18-2 diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index 766affcd1..ef88dbc29 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -1,5 +1,7 @@ import { classifyFailoverReason, type FailoverReason } from "./pi-embedded-helpers.js"; +const TIMEOUT_HINT_RE = /timeout|timed out|deadline exceeded|context deadline exceeded/i; + export class FailoverError extends Error { readonly reason: FailoverReason; readonly provider?: string; @@ -64,6 +66,11 @@ function getStatusCode(err: unknown): number | undefined { return undefined; } +function getErrorName(err: unknown): string { + if (!err || typeof err !== "object") return ""; + return "name" in err ? String(err.name) : ""; +} + function getErrorCode(err: unknown): string | undefined { if (!err || typeof err !== "object") return undefined; const candidate = (err as { code?: unknown }).code; @@ -86,6 +93,22 @@ function getErrorMessage(err: unknown): string { return ""; } +function hasTimeoutHint(err: unknown): boolean { + if (!err) return false; + if (getErrorName(err) === "TimeoutError") return true; + const message = getErrorMessage(err); + return Boolean(message && TIMEOUT_HINT_RE.test(message)); +} + +export function isTimeoutError(err: unknown): boolean { + if (hasTimeoutHint(err)) return true; + if (!err || typeof err !== "object") return false; + if (getErrorName(err) !== "AbortError") return false; + const cause = "cause" in err ? (err as { cause?: unknown }).cause : undefined; + const reason = "reason" in err ? (err as { reason?: unknown }).reason : undefined; + return hasTimeoutHint(cause) || hasTimeoutHint(reason); +} + export function resolveFailoverReasonFromError(err: unknown): FailoverReason | null { if (isFailoverError(err)) return err.reason; @@ -99,6 +122,7 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n if (["ETIMEDOUT", "ESOCKETTIMEDOUT", "ECONNRESET", "ECONNABORTED"].includes(code)) { return "timeout"; } + if (isTimeoutError(err)) return "timeout"; const message = getErrorMessage(err); if (!message) return null; diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index 381da47ff..263dfe58a 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -281,6 +281,70 @@ describe("runWithModelFallback", () => { expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5"); }); + it("falls back on timeout abort errors", async () => { + const cfg = makeCfg(); + const timeoutCause = Object.assign(new Error("request timed out"), { name: "TimeoutError" }); + const run = vi + .fn() + .mockRejectedValueOnce( + Object.assign(new Error("aborted"), { name: "AbortError", cause: timeoutCause }), + ) + .mockResolvedValueOnce("ok"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("ok"); + expect(run).toHaveBeenCalledTimes(2); + expect(run.mock.calls[1]?.[0]).toBe("anthropic"); + expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5"); + }); + + it("falls back on abort errors with timeout reasons", async () => { + const cfg = makeCfg(); + const run = vi + .fn() + .mockRejectedValueOnce( + Object.assign(new Error("aborted"), { name: "AbortError", reason: "deadline exceeded" }), + ) + .mockResolvedValueOnce("ok"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("ok"); + expect(run).toHaveBeenCalledTimes(2); + expect(run.mock.calls[1]?.[0]).toBe("anthropic"); + expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5"); + }); + + it("does not fall back on user aborts", async () => { + const cfg = makeCfg(); + const run = vi + .fn() + .mockRejectedValueOnce(Object.assign(new Error("aborted"), { name: "AbortError" })) + .mockResolvedValueOnce("ok"); + + await expect( + runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }), + ).rejects.toThrow("aborted"); + + expect(run).toHaveBeenCalledTimes(1); + }); + it("appends the configured primary as a last fallback", async () => { const cfg = makeCfg({ agents: { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 469f45a2c..88522344d 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -1,6 +1,11 @@ import type { ClawdbotConfig } from "../config/config.js"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; -import { coerceToFailoverError, describeFailoverError, isFailoverError } from "./failover-error.js"; +import { + coerceToFailoverError, + describeFailoverError, + isFailoverError, + isTimeoutError, +} from "./failover-error.js"; import { buildModelAliasIndex, modelKey, @@ -26,6 +31,7 @@ type FallbackAttempt = { function isAbortError(err: unknown): boolean { if (!err || typeof err !== "object") return false; + if (isFailoverError(err)) return false; const name = "name" in err ? String(err.name) : ""; if (name === "AbortError") return true; const message = @@ -33,6 +39,10 @@ function isAbortError(err: unknown): boolean { return message.includes("aborted"); } +function shouldRethrowAbort(err: unknown): boolean { + return isAbortError(err) && !isTimeoutError(err); +} + function buildAllowedModelKeys( cfg: ClawdbotConfig | undefined, defaultProvider: string, @@ -216,7 +226,7 @@ export async function runWithModelFallback(params: { attempts, }; } catch (err) { - if (isAbortError(err)) throw err; + if (shouldRethrowAbort(err)) throw err; const normalized = coerceToFailoverError(err, { provider: candidate.provider, @@ -303,7 +313,7 @@ export async function runWithImageModelFallback(params: { attempts, }; } catch (err) { - if (isAbortError(err)) throw err; + if (shouldRethrowAbort(err)) throw err; lastError = err; attempts.push({ provider: candidate.provider, diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 25ef5155f..407a25246 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -66,6 +66,7 @@ import { splitSdkTools } from "../tool-split.js"; import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../../date-time.js"; import { describeUnknownError, mapThinkingLevel } from "../utils.js"; import { resolveSandboxRuntimeStatus } from "../../sandbox/runtime-status.js"; +import { isTimeoutError } from "../../failover-error.js"; import { getGlobalHookRunner } from "../../../plugins/hook-runner-global.js"; import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js"; @@ -353,25 +354,38 @@ export async function runEmbeddedAttempt( let aborted = Boolean(params.abortSignal?.aborted); let timedOut = false; - const abortRun = (isTimeout = false) => { + const getAbortReason = (signal: AbortSignal): unknown => + "reason" in signal ? (signal as { reason?: unknown }).reason : undefined; + const makeTimeoutAbortReason = (): Error => { + const err = new Error("request timed out"); + err.name = "TimeoutError"; + return err; + }; + const makeAbortError = (signal: AbortSignal): Error => { + const reason = getAbortReason(signal); + const err = reason ? new Error("aborted", { cause: reason }) : new Error("aborted"); + err.name = "AbortError"; + return err; + }; + const abortRun = (isTimeout = false, reason?: unknown) => { aborted = true; if (isTimeout) timedOut = true; - runAbortController.abort(); + if (isTimeout) { + runAbortController.abort(reason ?? makeTimeoutAbortReason()); + } else { + runAbortController.abort(reason); + } void activeSession.abort(); }; const abortable = (promise: Promise): Promise => { const signal = runAbortController.signal; if (signal.aborted) { - const err = new Error("aborted"); - (err as { name?: string }).name = "AbortError"; - return Promise.reject(err); + return Promise.reject(makeAbortError(signal)); } return new Promise((resolve, reject) => { const onAbort = () => { - const err = new Error("aborted"); - (err as { name?: string }).name = "AbortError"; signal.removeEventListener("abort", onAbort); - reject(err); + reject(makeAbortError(signal)); }; signal.addEventListener("abort", onAbort, { once: true }); promise.then( @@ -448,7 +462,11 @@ export async function runEmbeddedAttempt( let messagesSnapshot: AgentMessage[] = []; let sessionIdUsed = activeSession.sessionId; - const onAbort = () => abortRun(); + const onAbort = () => { + const reason = params.abortSignal ? getAbortReason(params.abortSignal) : undefined; + const timeout = reason ? isTimeoutError(reason) : false; + abortRun(timeout, reason); + }; if (params.abortSignal) { if (params.abortSignal.aborted) { onAbort();