refactor: centralize failover error parsing

This commit is contained in:
Peter Steinberger
2026-01-10 01:25:01 +01:00
parent 82ffcfb181
commit 53ec8e36cb
7 changed files with 98 additions and 58 deletions

View File

@@ -58,6 +58,8 @@ If you have both an OAuth profile and an API key profile for the same provider,
When a profile fails due to auth/ratelimit errors (or a timeout that looks When a profile fails due to auth/ratelimit errors (or a timeout that looks
like rate limiting), Clawdbot marks it in cooldown and moves to the next profile. like rate limiting), Clawdbot marks it in cooldown and moves to the next profile.
Format/invalidrequest errors (for example Cloud Code Assist tool call ID
validation failures) are treated as failoverworthy and use the same cooldowns.
Cooldowns use exponential backoff: Cooldowns use exponential backoff:
- 1 minute - 1 minute

View File

@@ -74,6 +74,7 @@ export type AuthProfileCredential =
export type AuthProfileFailureReason = export type AuthProfileFailureReason =
| "auth" | "auth"
| "format"
| "rate_limit" | "rate_limit"
| "billing" | "billing"
| "timeout" | "timeout"

View File

@@ -15,6 +15,14 @@ describe("failover-error", () => {
expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout"); expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");
}); });
it("infers format errors from error messages", () => {
expect(
resolveFailoverReasonFromError({
message: "invalid request format: messages.1.content.1.tool_use.id",
}),
).toBe("format");
});
it("infers timeout from common node error codes", () => { it("infers timeout from common node error codes", () => {
expect(resolveFailoverReasonFromError({ code: "ETIMEDOUT" })).toBe( expect(resolveFailoverReasonFromError({ code: "ETIMEDOUT" })).toBe(
"timeout", "timeout",
@@ -36,6 +44,15 @@ describe("failover-error", () => {
expect(err?.model).toBe("claude-opus-4-5"); expect(err?.model).toBe("claude-opus-4-5");
}); });
it("coerces format errors with a 400 status", () => {
const err = coerceToFailoverError("invalid request format", {
provider: "google",
model: "cloud-code-assist",
});
expect(err?.reason).toBe("format");
expect(err?.status).toBe(400);
});
it("describes non-Error values consistently", () => { it("describes non-Error values consistently", () => {
const described = describeFailoverError(123); const described = describeFailoverError(123);
expect(described.message).toBe("123"); expect(described.message).toBe("123");

View File

@@ -50,6 +50,8 @@ export function resolveFailoverStatus(
return 401; return 401;
case "timeout": case "timeout":
return 408; return 408;
case "format":
return 400;
default: default:
return undefined; return undefined;
} }

View File

@@ -249,6 +249,7 @@ describe("isFailoverErrorMessage", () => {
"429 rate limit exceeded", "429 rate limit exceeded",
"Your credit balance is too low", "Your credit balance is too low",
"request timed out", "request timed out",
"invalid request format",
]; ];
for (const sample of samples) { for (const sample of samples) {
expect(isFailoverErrorMessage(sample)).toBe(true); expect(isFailoverErrorMessage(sample)).toBe(true);
@@ -263,9 +264,12 @@ describe("classifyFailoverReason", () => {
expect(classifyFailoverReason("resource has been exhausted")).toBe( expect(classifyFailoverReason("resource has been exhausted")).toBe(
"rate_limit", "rate_limit",
); );
expect(classifyFailoverReason("invalid request format")).toBe("format");
expect(classifyFailoverReason("credit balance too low")).toBe("billing"); expect(classifyFailoverReason("credit balance too low")).toBe("billing");
expect(classifyFailoverReason("deadline exceeded")).toBe("timeout"); expect(classifyFailoverReason("deadline exceeded")).toBe("timeout");
expect(classifyFailoverReason("string should match pattern")).toBeNull(); expect(classifyFailoverReason("string should match pattern")).toBe(
"format",
);
expect(classifyFailoverReason("bad request")).toBeNull(); expect(classifyFailoverReason("bad request")).toBeNull();
}); });
}); });

View File

@@ -276,47 +276,84 @@ export function isRateLimitAssistantError(
msg: AssistantMessage | undefined, msg: AssistantMessage | undefined,
): boolean { ): boolean {
if (!msg || msg.stopReason !== "error") return false; if (!msg || msg.stopReason !== "error") return false;
const raw = (msg.errorMessage ?? "").toLowerCase(); return isRateLimitErrorMessage(msg.errorMessage ?? "");
}
type ErrorPattern = RegExp | string;
const ERROR_PATTERNS = {
rateLimit: [
/rate[_ ]limit|too many requests|429/,
"exceeded your current quota",
"resource has been exhausted",
"quota exceeded",
"resource_exhausted",
],
timeout: [
"timeout",
"timed out",
"deadline exceeded",
"context deadline exceeded",
],
billing: [
/\b402\b/,
"payment required",
"insufficient credits",
"credit balance",
"plans & billing",
],
auth: [
/invalid[_ ]?api[_ ]?key/,
"incorrect api key",
"invalid token",
"authentication",
"unauthorized",
"forbidden",
"access denied",
"expired",
"token has expired",
/\b401\b/,
/\b403\b/,
],
format: [
"invalid_request_error",
"string should match pattern",
"tool_use.id",
"tool_use_id",
"messages.1.content.1.tool_use.id",
"invalid request format",
],
} as const;
function matchesErrorPatterns(
raw: string,
patterns: readonly ErrorPattern[],
): boolean {
if (!raw) return false; if (!raw) return false;
return isRateLimitErrorMessage(raw); const value = raw.toLowerCase();
return patterns.some((pattern) =>
pattern instanceof RegExp ? pattern.test(value) : value.includes(pattern),
);
} }
export function isRateLimitErrorMessage(raw: string): boolean { export function isRateLimitErrorMessage(raw: string): boolean {
const value = raw.toLowerCase(); return matchesErrorPatterns(raw, ERROR_PATTERNS.rateLimit);
return (
/rate[_ ]limit|too many requests|429/.test(value) ||
value.includes("exceeded your current quota") ||
value.includes("resource has been exhausted") ||
value.includes("quota exceeded") ||
value.includes("resource_exhausted")
);
} }
export function isTimeoutErrorMessage(raw: string): boolean { export function isTimeoutErrorMessage(raw: string): boolean {
const value = raw.toLowerCase(); return matchesErrorPatterns(raw, ERROR_PATTERNS.timeout);
if (!value) return false;
return (
value.includes("timeout") ||
value.includes("timed out") ||
value.includes("deadline exceeded") ||
value.includes("context deadline exceeded")
);
} }
export function isBillingErrorMessage(raw: string): boolean { export function isBillingErrorMessage(raw: string): boolean {
const value = raw.toLowerCase(); const value = raw.toLowerCase();
if (!value) return false; if (!value) return false;
if (matchesErrorPatterns(value, ERROR_PATTERNS.billing)) return true;
return ( return (
/\b402\b/.test(value) || value.includes("billing") &&
value.includes("payment required") || (value.includes("upgrade") ||
value.includes("insufficient credits") || value.includes("credits") ||
value.includes("credit balance") || value.includes("payment") ||
value.includes("plans & billing") || value.includes("plan"))
(value.includes("billing") &&
(value.includes("upgrade") ||
value.includes("credits") ||
value.includes("payment") ||
value.includes("plan")))
); );
} }
@@ -328,34 +365,11 @@ export function isBillingAssistantError(
} }
export function isAuthErrorMessage(raw: string): boolean { export function isAuthErrorMessage(raw: string): boolean {
const value = raw.toLowerCase(); return matchesErrorPatterns(raw, ERROR_PATTERNS.auth);
if (!value) return false;
return (
/invalid[_ ]?api[_ ]?key/.test(value) ||
value.includes("incorrect api key") ||
value.includes("invalid token") ||
value.includes("authentication") ||
value.includes("unauthorized") ||
value.includes("forbidden") ||
value.includes("access denied") ||
value.includes("expired") ||
value.includes("token has expired") ||
/\b401\b/.test(value) ||
/\b403\b/.test(value)
);
} }
export function isCloudCodeAssistFormatError(raw: string): boolean { export function isCloudCodeAssistFormatError(raw: string): boolean {
const value = raw.toLowerCase(); return matchesErrorPatterns(raw, ERROR_PATTERNS.format);
if (!value) return false;
return (
value.includes("invalid_request_error") ||
value.includes("string should match pattern") ||
value.includes("tool_use.id") ||
value.includes("tool_use_id") ||
value.includes("messages.1.content.1.tool_use.id") ||
value.includes("invalid request format")
);
} }
export function isAuthAssistantError( export function isAuthAssistantError(
@@ -367,16 +381,18 @@ export function isAuthAssistantError(
export type FailoverReason = export type FailoverReason =
| "auth" | "auth"
| "format"
| "rate_limit" | "rate_limit"
| "billing" | "billing"
| "timeout" | "timeout"
| "unknown"; | "unknown";
export function classifyFailoverReason(raw: string): FailoverReason | null { export function classifyFailoverReason(raw: string): FailoverReason | null {
if (isAuthErrorMessage(raw)) return "auth";
if (isRateLimitErrorMessage(raw)) return "rate_limit"; if (isRateLimitErrorMessage(raw)) return "rate_limit";
if (isCloudCodeAssistFormatError(raw)) return "format";
if (isBillingErrorMessage(raw)) return "billing"; if (isBillingErrorMessage(raw)) return "billing";
if (isTimeoutErrorMessage(raw)) return "timeout"; if (isTimeoutErrorMessage(raw)) return "timeout";
if (isAuthErrorMessage(raw)) return "auth";
return null; return null;
} }

View File

@@ -1533,9 +1533,7 @@ export async function runEmbeddedPiAgent(params: {
: false; : false;
// Treat timeout as potential rate limit (Antigravity hangs on rate limit) // Treat timeout as potential rate limit (Antigravity hangs on rate limit)
const shouldRotate = const shouldRotate = (!aborted && failoverFailure) || timedOut;
(!aborted && (failoverFailure || cloudCodeAssistFormatError)) ||
timedOut;
if (shouldRotate) { if (shouldRotate) {
// Mark current profile for cooldown before rotating // Mark current profile for cooldown before rotating