fix(fallback): handle timeout aborts
Co-authored-by: Mykyta Bozhenko <21245729+cheeeee@users.noreply.github.com>
This commit is contained in:
@@ -28,6 +28,7 @@ Docs: https://docs.clawd.bot
|
||||
- Exec approvals: enforce allowlist when ask is off; prefer raw command for node approvals/events.
|
||||
- Tools: return a companion-app-required message when node exec is requested with no paired node.
|
||||
- Streaming: emit assistant deltas for OpenAI-compatible SSE chunks. (#1147) — thanks @alauppe.
|
||||
- Model fallback: treat timeout aborts as failover while preserving user aborts. (#1137) — thanks @cheeeee.
|
||||
|
||||
## 2026.1.18-2
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import { classifyFailoverReason, type FailoverReason } from "./pi-embedded-helpers.js";
|
||||
|
||||
const TIMEOUT_HINT_RE = /timeout|timed out|deadline exceeded|context deadline exceeded/i;
|
||||
|
||||
export class FailoverError extends Error {
|
||||
readonly reason: FailoverReason;
|
||||
readonly provider?: string;
|
||||
@@ -64,6 +66,11 @@ function getStatusCode(err: unknown): number | undefined {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function getErrorName(err: unknown): string {
|
||||
if (!err || typeof err !== "object") return "";
|
||||
return "name" in err ? String(err.name) : "";
|
||||
}
|
||||
|
||||
function getErrorCode(err: unknown): string | undefined {
|
||||
if (!err || typeof err !== "object") return undefined;
|
||||
const candidate = (err as { code?: unknown }).code;
|
||||
@@ -86,6 +93,22 @@ function getErrorMessage(err: unknown): string {
|
||||
return "";
|
||||
}
|
||||
|
||||
function hasTimeoutHint(err: unknown): boolean {
|
||||
if (!err) return false;
|
||||
if (getErrorName(err) === "TimeoutError") return true;
|
||||
const message = getErrorMessage(err);
|
||||
return Boolean(message && TIMEOUT_HINT_RE.test(message));
|
||||
}
|
||||
|
||||
export function isTimeoutError(err: unknown): boolean {
|
||||
if (hasTimeoutHint(err)) return true;
|
||||
if (!err || typeof err !== "object") return false;
|
||||
if (getErrorName(err) !== "AbortError") return false;
|
||||
const cause = "cause" in err ? (err as { cause?: unknown }).cause : undefined;
|
||||
const reason = "reason" in err ? (err as { reason?: unknown }).reason : undefined;
|
||||
return hasTimeoutHint(cause) || hasTimeoutHint(reason);
|
||||
}
|
||||
|
||||
export function resolveFailoverReasonFromError(err: unknown): FailoverReason | null {
|
||||
if (isFailoverError(err)) return err.reason;
|
||||
|
||||
@@ -99,6 +122,7 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n
|
||||
if (["ETIMEDOUT", "ESOCKETTIMEDOUT", "ECONNRESET", "ECONNABORTED"].includes(code)) {
|
||||
return "timeout";
|
||||
}
|
||||
if (isTimeoutError(err)) return "timeout";
|
||||
|
||||
const message = getErrorMessage(err);
|
||||
if (!message) return null;
|
||||
|
||||
@@ -281,6 +281,70 @@ describe("runWithModelFallback", () => {
|
||||
expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
|
||||
});
|
||||
|
||||
it("falls back on timeout abort errors", async () => {
|
||||
const cfg = makeCfg();
|
||||
const timeoutCause = Object.assign(new Error("request timed out"), { name: "TimeoutError" });
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(
|
||||
Object.assign(new Error("aborted"), { name: "AbortError", cause: timeoutCause }),
|
||||
)
|
||||
.mockResolvedValueOnce("ok");
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
run,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("ok");
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run.mock.calls[1]?.[0]).toBe("anthropic");
|
||||
expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
|
||||
});
|
||||
|
||||
it("falls back on abort errors with timeout reasons", async () => {
|
||||
const cfg = makeCfg();
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(
|
||||
Object.assign(new Error("aborted"), { name: "AbortError", reason: "deadline exceeded" }),
|
||||
)
|
||||
.mockResolvedValueOnce("ok");
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
run,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("ok");
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run.mock.calls[1]?.[0]).toBe("anthropic");
|
||||
expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
|
||||
});
|
||||
|
||||
it("does not fall back on user aborts", async () => {
|
||||
const cfg = makeCfg();
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(Object.assign(new Error("aborted"), { name: "AbortError" }))
|
||||
.mockResolvedValueOnce("ok");
|
||||
|
||||
await expect(
|
||||
runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
run,
|
||||
}),
|
||||
).rejects.toThrow("aborted");
|
||||
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("appends the configured primary as a last fallback", async () => {
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
import type { ClawdbotConfig } from "../config/config.js";
|
||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
|
||||
import { coerceToFailoverError, describeFailoverError, isFailoverError } from "./failover-error.js";
|
||||
import {
|
||||
coerceToFailoverError,
|
||||
describeFailoverError,
|
||||
isFailoverError,
|
||||
isTimeoutError,
|
||||
} from "./failover-error.js";
|
||||
import {
|
||||
buildModelAliasIndex,
|
||||
modelKey,
|
||||
@@ -26,6 +31,7 @@ type FallbackAttempt = {
|
||||
|
||||
function isAbortError(err: unknown): boolean {
|
||||
if (!err || typeof err !== "object") return false;
|
||||
if (isFailoverError(err)) return false;
|
||||
const name = "name" in err ? String(err.name) : "";
|
||||
if (name === "AbortError") return true;
|
||||
const message =
|
||||
@@ -33,6 +39,10 @@ function isAbortError(err: unknown): boolean {
|
||||
return message.includes("aborted");
|
||||
}
|
||||
|
||||
function shouldRethrowAbort(err: unknown): boolean {
|
||||
return isAbortError(err) && !isTimeoutError(err);
|
||||
}
|
||||
|
||||
function buildAllowedModelKeys(
|
||||
cfg: ClawdbotConfig | undefined,
|
||||
defaultProvider: string,
|
||||
@@ -216,7 +226,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
attempts,
|
||||
};
|
||||
} catch (err) {
|
||||
if (isAbortError(err)) throw err;
|
||||
if (shouldRethrowAbort(err)) throw err;
|
||||
const normalized =
|
||||
coerceToFailoverError(err, {
|
||||
provider: candidate.provider,
|
||||
@@ -303,7 +313,7 @@ export async function runWithImageModelFallback<T>(params: {
|
||||
attempts,
|
||||
};
|
||||
} catch (err) {
|
||||
if (isAbortError(err)) throw err;
|
||||
if (shouldRethrowAbort(err)) throw err;
|
||||
lastError = err;
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
|
||||
@@ -66,6 +66,7 @@ import { splitSdkTools } from "../tool-split.js";
|
||||
import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../../date-time.js";
|
||||
import { describeUnknownError, mapThinkingLevel } from "../utils.js";
|
||||
import { resolveSandboxRuntimeStatus } from "../../sandbox/runtime-status.js";
|
||||
import { isTimeoutError } from "../../failover-error.js";
|
||||
import { getGlobalHookRunner } from "../../../plugins/hook-runner-global.js";
|
||||
|
||||
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
|
||||
@@ -353,25 +354,38 @@ export async function runEmbeddedAttempt(
|
||||
|
||||
let aborted = Boolean(params.abortSignal?.aborted);
|
||||
let timedOut = false;
|
||||
const abortRun = (isTimeout = false) => {
|
||||
const getAbortReason = (signal: AbortSignal): unknown =>
|
||||
"reason" in signal ? (signal as { reason?: unknown }).reason : undefined;
|
||||
const makeTimeoutAbortReason = (): Error => {
|
||||
const err = new Error("request timed out");
|
||||
err.name = "TimeoutError";
|
||||
return err;
|
||||
};
|
||||
const makeAbortError = (signal: AbortSignal): Error => {
|
||||
const reason = getAbortReason(signal);
|
||||
const err = reason ? new Error("aborted", { cause: reason }) : new Error("aborted");
|
||||
err.name = "AbortError";
|
||||
return err;
|
||||
};
|
||||
const abortRun = (isTimeout = false, reason?: unknown) => {
|
||||
aborted = true;
|
||||
if (isTimeout) timedOut = true;
|
||||
runAbortController.abort();
|
||||
if (isTimeout) {
|
||||
runAbortController.abort(reason ?? makeTimeoutAbortReason());
|
||||
} else {
|
||||
runAbortController.abort(reason);
|
||||
}
|
||||
void activeSession.abort();
|
||||
};
|
||||
const abortable = <T>(promise: Promise<T>): Promise<T> => {
|
||||
const signal = runAbortController.signal;
|
||||
if (signal.aborted) {
|
||||
const err = new Error("aborted");
|
||||
(err as { name?: string }).name = "AbortError";
|
||||
return Promise.reject(err);
|
||||
return Promise.reject(makeAbortError(signal));
|
||||
}
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
const onAbort = () => {
|
||||
const err = new Error("aborted");
|
||||
(err as { name?: string }).name = "AbortError";
|
||||
signal.removeEventListener("abort", onAbort);
|
||||
reject(err);
|
||||
reject(makeAbortError(signal));
|
||||
};
|
||||
signal.addEventListener("abort", onAbort, { once: true });
|
||||
promise.then(
|
||||
@@ -448,7 +462,11 @@ export async function runEmbeddedAttempt(
|
||||
|
||||
let messagesSnapshot: AgentMessage[] = [];
|
||||
let sessionIdUsed = activeSession.sessionId;
|
||||
const onAbort = () => abortRun();
|
||||
const onAbort = () => {
|
||||
const reason = params.abortSignal ? getAbortReason(params.abortSignal) : undefined;
|
||||
const timeout = reason ? isTimeoutError(reason) : false;
|
||||
abortRun(timeout, reason);
|
||||
};
|
||||
if (params.abortSignal) {
|
||||
if (params.abortSignal.aborted) {
|
||||
onAbort();
|
||||
|
||||
Reference in New Issue
Block a user