fix(fallback): handle timeout aborts

Co-authored-by: Mykyta Bozhenko <21245729+cheeeee@users.noreply.github.com>
This commit is contained in:
Peter Steinberger
2026-01-18 07:52:19 +00:00
parent 3b24fe639a
commit ec27c813cc
5 changed files with 129 additions and 12 deletions

View File

@@ -28,6 +28,7 @@ Docs: https://docs.clawd.bot
- Exec approvals: enforce allowlist when ask is off; prefer raw command for node approvals/events.
- Tools: return a companion-app-required message when node exec is requested with no paired node.
- Streaming: emit assistant deltas for OpenAI-compatible SSE chunks. (#1147) — thanks @alauppe.
- Model fallback: treat timeout aborts as failover while preserving user aborts. (#1137) — thanks @cheeeee.
## 2026.1.18-2

View File

@@ -1,5 +1,7 @@
import { classifyFailoverReason, type FailoverReason } from "./pi-embedded-helpers.js";
const TIMEOUT_HINT_RE = /timeout|timed out|deadline exceeded|context deadline exceeded/i;
export class FailoverError extends Error {
readonly reason: FailoverReason;
readonly provider?: string;
@@ -64,6 +66,11 @@ function getStatusCode(err: unknown): number | undefined {
return undefined;
}
function getErrorName(err: unknown): string {
if (!err || typeof err !== "object") return "";
return "name" in err ? String(err.name) : "";
}
function getErrorCode(err: unknown): string | undefined {
if (!err || typeof err !== "object") return undefined;
const candidate = (err as { code?: unknown }).code;
@@ -86,6 +93,22 @@ function getErrorMessage(err: unknown): string {
return "";
}
function hasTimeoutHint(err: unknown): boolean {
if (!err) return false;
if (getErrorName(err) === "TimeoutError") return true;
const message = getErrorMessage(err);
return Boolean(message && TIMEOUT_HINT_RE.test(message));
}
export function isTimeoutError(err: unknown): boolean {
if (hasTimeoutHint(err)) return true;
if (!err || typeof err !== "object") return false;
if (getErrorName(err) !== "AbortError") return false;
const cause = "cause" in err ? (err as { cause?: unknown }).cause : undefined;
const reason = "reason" in err ? (err as { reason?: unknown }).reason : undefined;
return hasTimeoutHint(cause) || hasTimeoutHint(reason);
}
export function resolveFailoverReasonFromError(err: unknown): FailoverReason | null {
if (isFailoverError(err)) return err.reason;
@@ -99,6 +122,7 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n
if (["ETIMEDOUT", "ESOCKETTIMEDOUT", "ECONNRESET", "ECONNABORTED"].includes(code)) {
return "timeout";
}
if (isTimeoutError(err)) return "timeout";
const message = getErrorMessage(err);
if (!message) return null;

View File

@@ -281,6 +281,70 @@ describe("runWithModelFallback", () => {
expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
});
it("falls back on timeout abort errors", async () => {
const cfg = makeCfg();
const timeoutCause = Object.assign(new Error("request timed out"), { name: "TimeoutError" });
const run = vi
.fn()
.mockRejectedValueOnce(
Object.assign(new Error("aborted"), { name: "AbortError", cause: timeoutCause }),
)
.mockResolvedValueOnce("ok");
const result = await runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
run,
});
expect(result.result).toBe("ok");
expect(run).toHaveBeenCalledTimes(2);
expect(run.mock.calls[1]?.[0]).toBe("anthropic");
expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
});
it("falls back on abort errors with timeout reasons", async () => {
const cfg = makeCfg();
const run = vi
.fn()
.mockRejectedValueOnce(
Object.assign(new Error("aborted"), { name: "AbortError", reason: "deadline exceeded" }),
)
.mockResolvedValueOnce("ok");
const result = await runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
run,
});
expect(result.result).toBe("ok");
expect(run).toHaveBeenCalledTimes(2);
expect(run.mock.calls[1]?.[0]).toBe("anthropic");
expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
});
it("does not fall back on user aborts", async () => {
const cfg = makeCfg();
const run = vi
.fn()
.mockRejectedValueOnce(Object.assign(new Error("aborted"), { name: "AbortError" }))
.mockResolvedValueOnce("ok");
await expect(
runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
run,
}),
).rejects.toThrow("aborted");
expect(run).toHaveBeenCalledTimes(1);
});
it("appends the configured primary as a last fallback", async () => {
const cfg = makeCfg({
agents: {

View File

@@ -1,6 +1,11 @@
import type { ClawdbotConfig } from "../config/config.js";
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
import { coerceToFailoverError, describeFailoverError, isFailoverError } from "./failover-error.js";
import {
coerceToFailoverError,
describeFailoverError,
isFailoverError,
isTimeoutError,
} from "./failover-error.js";
import {
buildModelAliasIndex,
modelKey,
@@ -26,6 +31,7 @@ type FallbackAttempt = {
function isAbortError(err: unknown): boolean {
if (!err || typeof err !== "object") return false;
if (isFailoverError(err)) return false;
const name = "name" in err ? String(err.name) : "";
if (name === "AbortError") return true;
const message =
@@ -33,6 +39,10 @@ function isAbortError(err: unknown): boolean {
return message.includes("aborted");
}
function shouldRethrowAbort(err: unknown): boolean {
return isAbortError(err) && !isTimeoutError(err);
}
function buildAllowedModelKeys(
cfg: ClawdbotConfig | undefined,
defaultProvider: string,
@@ -216,7 +226,7 @@ export async function runWithModelFallback<T>(params: {
attempts,
};
} catch (err) {
if (isAbortError(err)) throw err;
if (shouldRethrowAbort(err)) throw err;
const normalized =
coerceToFailoverError(err, {
provider: candidate.provider,
@@ -303,7 +313,7 @@ export async function runWithImageModelFallback<T>(params: {
attempts,
};
} catch (err) {
if (isAbortError(err)) throw err;
if (shouldRethrowAbort(err)) throw err;
lastError = err;
attempts.push({
provider: candidate.provider,

View File

@@ -66,6 +66,7 @@ import { splitSdkTools } from "../tool-split.js";
import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../../date-time.js";
import { describeUnknownError, mapThinkingLevel } from "../utils.js";
import { resolveSandboxRuntimeStatus } from "../../sandbox/runtime-status.js";
import { isTimeoutError } from "../../failover-error.js";
import { getGlobalHookRunner } from "../../../plugins/hook-runner-global.js";
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
@@ -353,25 +354,38 @@ export async function runEmbeddedAttempt(
let aborted = Boolean(params.abortSignal?.aborted);
let timedOut = false;
const abortRun = (isTimeout = false) => {
const getAbortReason = (signal: AbortSignal): unknown =>
"reason" in signal ? (signal as { reason?: unknown }).reason : undefined;
const makeTimeoutAbortReason = (): Error => {
const err = new Error("request timed out");
err.name = "TimeoutError";
return err;
};
const makeAbortError = (signal: AbortSignal): Error => {
const reason = getAbortReason(signal);
const err = reason ? new Error("aborted", { cause: reason }) : new Error("aborted");
err.name = "AbortError";
return err;
};
const abortRun = (isTimeout = false, reason?: unknown) => {
aborted = true;
if (isTimeout) timedOut = true;
runAbortController.abort();
if (isTimeout) {
runAbortController.abort(reason ?? makeTimeoutAbortReason());
} else {
runAbortController.abort(reason);
}
void activeSession.abort();
};
const abortable = <T>(promise: Promise<T>): Promise<T> => {
const signal = runAbortController.signal;
if (signal.aborted) {
const err = new Error("aborted");
(err as { name?: string }).name = "AbortError";
return Promise.reject(err);
return Promise.reject(makeAbortError(signal));
}
return new Promise<T>((resolve, reject) => {
const onAbort = () => {
const err = new Error("aborted");
(err as { name?: string }).name = "AbortError";
signal.removeEventListener("abort", onAbort);
reject(err);
reject(makeAbortError(signal));
};
signal.addEventListener("abort", onAbort, { once: true });
promise.then(
@@ -448,7 +462,11 @@ export async function runEmbeddedAttempt(
let messagesSnapshot: AgentMessage[] = [];
let sessionIdUsed = activeSession.sessionId;
const onAbort = () => abortRun();
const onAbort = () => {
const reason = params.abortSignal ? getAbortReason(params.abortSignal) : undefined;
const timeout = reason ? isTimeoutError(reason) : false;
abortRun(timeout, reason);
};
if (params.abortSignal) {
if (params.abortSignal.aborted) {
onAbort();