diff --git a/src/agents/auth-profiles.ts b/src/agents/auth-profiles.ts index 77549296d..099d04d47 100644 --- a/src/agents/auth-profiles.ts +++ b/src/agents/auth-profiles.ts @@ -842,6 +842,57 @@ export function calculateAuthProfileCooldownMs(errorCount: number): number { ); } +type ResolvedAuthCooldownConfig = { + billingBackoffMs: number; + billingMaxMs: number; + failureWindowMs: number; +}; + +function resolveAuthCooldownConfig(params: { + cfg?: ClawdbotConfig; + providerId: string; +}): ResolvedAuthCooldownConfig { + const defaults = { + billingBackoffHours: 5, + billingMaxHours: 24, + failureWindowHours: 24, + } as const; + + const resolveHours = (value: unknown, fallback: number) => + typeof value === "number" && Number.isFinite(value) && value > 0 + ? value + : fallback; + + const cooldowns = params.cfg?.auth?.cooldowns; + const billingOverride = (() => { + const map = cooldowns?.billingBackoffHoursByProvider; + if (!map) return undefined; + for (const [key, value] of Object.entries(map)) { + if (normalizeProviderId(key) === params.providerId) return value; + } + return undefined; + })(); + + const billingBackoffHours = resolveHours( + billingOverride ?? cooldowns?.billingBackoffHours, + defaults.billingBackoffHours, + ); + const billingMaxHours = resolveHours( + cooldowns?.billingMaxHours, + defaults.billingMaxHours, + ); + const failureWindowHours = resolveHours( + cooldowns?.failureWindowHours, + defaults.failureWindowHours, + ); + + return { + billingBackoffMs: billingBackoffHours * 60 * 60 * 1000, + billingMaxMs: billingMaxHours * 60 * 60 * 1000, + failureWindowMs: failureWindowHours * 60 * 60 * 1000, + }; +} + function calculateAuthProfileBillingDisableMsWithConfig(params: { errorCount: number; baseMs: number; @@ -872,6 +923,49 @@ export function resolveProfileUnusableUntilForDisplay( return resolveProfileUnusableUntil(stats); } +function computeNextProfileUsageStats(params: { + existing: ProfileUsageStats; + now: number; + reason: AuthProfileFailureReason; + cfgResolved: ResolvedAuthCooldownConfig; +}): ProfileUsageStats { + const windowMs = params.cfgResolved.failureWindowMs; + const windowExpired = + typeof params.existing.lastFailureAt === "number" && + params.existing.lastFailureAt > 0 && + params.now - params.existing.lastFailureAt > windowMs; + + const baseErrorCount = windowExpired ? 0 : (params.existing.errorCount ?? 0); + const nextErrorCount = baseErrorCount + 1; + const failureCounts = windowExpired + ? {} + : { ...params.existing.failureCounts }; + failureCounts[params.reason] = (failureCounts[params.reason] ?? 0) + 1; + + const updatedStats: ProfileUsageStats = { + ...params.existing, + errorCount: nextErrorCount, + failureCounts, + lastFailureAt: params.now, + }; + + if (params.reason === "billing") { + const billingCount = failureCounts.billing ?? 1; + const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({ + errorCount: billingCount, + baseMs: params.cfgResolved.billingBackoffMs, + maxMs: params.cfgResolved.billingMaxMs, + }); + updatedStats.disabledUntil = params.now + backoffMs; + updatedStats.disabledReason = "billing"; + } else { + const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount); + updatedStats.cooldownUntil = params.now + backoffMs; + } + + return updatedStats; +} + /** * Mark a profile as failed for a specific reason. Billing failures are treated * as "disabled" (longer backoff) vs the regular cooldown window. @@ -884,44 +978,6 @@ export async function markAuthProfileFailure(params: { agentDir?: string; }): Promise { const { store, profileId, reason, agentDir, cfg } = params; - const defaults = { - billingBackoffHours: 5, - billingMaxHours: 24, - failureWindowHours: 24, - } as const; - const resolveHours = (value: unknown, fallback: number) => - typeof value === "number" && Number.isFinite(value) && value > 0 - ? value - : fallback; - const resolveCooldownConfig = (providerId: string) => { - const cooldowns = cfg?.auth?.cooldowns; - const billingOverride = (() => { - const map = cooldowns?.billingBackoffHoursByProvider; - if (!map) return undefined; - for (const [key, value] of Object.entries(map)) { - if (normalizeProviderId(key) === providerId) return value; - } - return undefined; - })(); - const billingBackoffHours = resolveHours( - billingOverride ?? cooldowns?.billingBackoffHours, - defaults.billingBackoffHours, - ); - const billingMaxHours = resolveHours( - cooldowns?.billingMaxHours, - defaults.billingMaxHours, - ); - const failureWindowHours = resolveHours( - cooldowns?.failureWindowHours, - defaults.failureWindowHours, - ); - return { - billingBackoffMs: billingBackoffHours * 60 * 60 * 1000, - billingMaxMs: billingMaxHours * 60 * 60 * 1000, - failureWindowMs: failureWindowHours * 60 * 60 * 1000, - }; - }; - const updated = await updateAuthProfileStoreWithLock({ agentDir, updater: (freshStore) => { @@ -932,41 +988,17 @@ export async function markAuthProfileFailure(params: { const now = Date.now(); const providerKey = normalizeProviderId(profile.provider); - const cfgResolved = resolveCooldownConfig(providerKey); + const cfgResolved = resolveAuthCooldownConfig({ + cfg, + providerId: providerKey, + }); - const windowMs = cfgResolved.failureWindowMs; - const windowExpired = - typeof existing.lastFailureAt === "number" && - existing.lastFailureAt > 0 && - now - existing.lastFailureAt > windowMs; - - const baseErrorCount = windowExpired ? 0 : (existing.errorCount ?? 0); - const nextErrorCount = baseErrorCount + 1; - const failureCounts = windowExpired ? {} : { ...existing.failureCounts }; - failureCounts[reason] = (failureCounts[reason] ?? 0) + 1; - - const updatedStats: ProfileUsageStats = { - ...existing, - errorCount: nextErrorCount, - failureCounts, - lastFailureAt: now, - }; - - if (reason === "billing") { - const billingCount = failureCounts.billing ?? 1; - const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({ - errorCount: billingCount, - baseMs: cfgResolved.billingBackoffMs, - maxMs: cfgResolved.billingMaxMs, - }); - updatedStats.disabledUntil = now + backoffMs; - updatedStats.disabledReason = "billing"; - } else { - const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount); - updatedStats.cooldownUntil = now + backoffMs; - } - - freshStore.usageStats[profileId] = updatedStats; + freshStore.usageStats[profileId] = computeNextProfileUsageStats({ + existing, + now, + reason, + cfgResolved, + }); return true; }, }); @@ -982,38 +1014,17 @@ export async function markAuthProfileFailure(params: { const providerKey = normalizeProviderId( store.profiles[profileId]?.provider ?? "", ); - const cfgResolved = resolveCooldownConfig(providerKey); - const windowMs = cfgResolved.failureWindowMs; - const windowExpired = - typeof existing.lastFailureAt === "number" && - existing.lastFailureAt > 0 && - now - existing.lastFailureAt > windowMs; - const baseErrorCount = windowExpired ? 0 : (existing.errorCount ?? 0); - const nextErrorCount = baseErrorCount + 1; - const failureCounts = windowExpired ? {} : { ...existing.failureCounts }; - failureCounts[reason] = (failureCounts[reason] ?? 0) + 1; + const cfgResolved = resolveAuthCooldownConfig({ + cfg, + providerId: providerKey, + }); - const updatedStats: ProfileUsageStats = { - ...existing, - errorCount: nextErrorCount, - failureCounts, - lastFailureAt: now, - }; - if (reason === "billing") { - const billingCount = failureCounts.billing ?? 1; - const backoffMs = calculateAuthProfileBillingDisableMsWithConfig({ - errorCount: billingCount, - baseMs: cfgResolved.billingBackoffMs, - maxMs: cfgResolved.billingMaxMs, - }); - updatedStats.disabledUntil = now + backoffMs; - updatedStats.disabledReason = "billing"; - } else { - const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount); - updatedStats.cooldownUntil = now + backoffMs; - } - - store.usageStats[profileId] = updatedStats; + store.usageStats[profileId] = computeNextProfileUsageStats({ + existing, + now, + reason, + cfgResolved, + }); saveAuthProfileStore(store, agentDir); } diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts new file mode 100644 index 000000000..a94fb8b3f --- /dev/null +++ b/src/agents/failover-error.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it } from "vitest"; +import { + coerceToFailoverError, + describeFailoverError, + resolveFailoverReasonFromError, +} from "./failover-error.js"; + +describe("failover-error", () => { + it("infers failover reason from HTTP status", () => { + expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing"); + expect(resolveFailoverReasonFromError({ statusCode: "429" })).toBe( + "rate_limit", + ); + expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth"); + expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout"); + }); + + it("infers timeout from common node error codes", () => { + expect(resolveFailoverReasonFromError({ code: "ETIMEDOUT" })).toBe( + "timeout", + ); + expect(resolveFailoverReasonFromError({ code: "ECONNRESET" })).toBe( + "timeout", + ); + }); + + it("coerces failover-worthy errors into FailoverError with metadata", () => { + const err = coerceToFailoverError("credit balance too low", { + provider: "anthropic", + model: "claude-opus-4-5", + }); + expect(err?.name).toBe("FailoverError"); + expect(err?.reason).toBe("billing"); + expect(err?.status).toBe(402); + expect(err?.provider).toBe("anthropic"); + expect(err?.model).toBe("claude-opus-4-5"); + }); + + it("describes non-Error values consistently", () => { + const described = describeFailoverError(123); + expect(described.message).toBe("123"); + expect(described.reason).toBeUndefined(); + }); +}); diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index fcebd1e87..fdadab0a7 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -1,4 +1,7 @@ -import type { FailoverReason } from "./pi-embedded-helpers.js"; +import { + classifyFailoverReason, + type FailoverReason, +} from "./pi-embedded-helpers.js"; export class FailoverError extends Error { readonly reason: FailoverReason; @@ -51,3 +54,116 @@ export function resolveFailoverStatus( return undefined; } } + +function getStatusCode(err: unknown): number | undefined { + if (!err || typeof err !== "object") return undefined; + const candidate = + (err as { status?: unknown; statusCode?: unknown }).status ?? + (err as { statusCode?: unknown }).statusCode; + if (typeof candidate === "number") return candidate; + if (typeof candidate === "string" && /^\d+$/.test(candidate)) { + return Number(candidate); + } + return undefined; +} + +function getErrorCode(err: unknown): string | undefined { + if (!err || typeof err !== "object") return undefined; + const candidate = (err as { code?: unknown }).code; + if (typeof candidate !== "string") return undefined; + const trimmed = candidate.trim(); + return trimmed ? trimmed : undefined; +} + +function getErrorMessage(err: unknown): string { + if (err instanceof Error) return err.message; + if (typeof err === "string") return err; + if ( + typeof err === "number" || + typeof err === "boolean" || + typeof err === "bigint" + ) { + return String(err); + } + if (typeof err === "symbol") return err.description ?? ""; + if (err && typeof err === "object") { + const message = (err as { message?: unknown }).message; + if (typeof message === "string") return message; + } + return ""; +} + +export function resolveFailoverReasonFromError( + err: unknown, +): FailoverReason | null { + if (isFailoverError(err)) return err.reason; + + const status = getStatusCode(err); + if (status === 402) return "billing"; + if (status === 429) return "rate_limit"; + if (status === 401 || status === 403) return "auth"; + if (status === 408) return "timeout"; + + const code = (getErrorCode(err) ?? "").toUpperCase(); + if ( + ["ETIMEDOUT", "ESOCKETTIMEDOUT", "ECONNRESET", "ECONNABORTED"].includes( + code, + ) + ) { + return "timeout"; + } + + const message = getErrorMessage(err); + if (!message) return null; + return classifyFailoverReason(message); +} + +export function describeFailoverError(err: unknown): { + message: string; + reason?: FailoverReason; + status?: number; + code?: string; +} { + if (isFailoverError(err)) { + return { + message: err.message, + reason: err.reason, + status: err.status, + code: err.code, + }; + } + const message = getErrorMessage(err) || String(err); + return { + message, + reason: resolveFailoverReasonFromError(err) ?? undefined, + status: getStatusCode(err), + code: getErrorCode(err), + }; +} + +export function coerceToFailoverError( + err: unknown, + context?: { + provider?: string; + model?: string; + profileId?: string; + }, +): FailoverError | null { + if (isFailoverError(err)) return err; + const reason = resolveFailoverReasonFromError(err); + if (!reason) return null; + + const message = getErrorMessage(err) || String(err); + const status = getStatusCode(err) ?? resolveFailoverStatus(reason); + const code = getErrorCode(err); + + return new FailoverError(message, { + reason, + provider: context?.provider, + model: context?.model, + profileId: context?.profileId, + status, + code, + cause: err instanceof Error ? err : undefined, + }); +} diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 607d69094..ed8188842 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -1,6 +1,10 @@ import type { ClawdbotConfig } from "../config/config.js"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; -import { type FailoverError, isFailoverError } from "./failover-error.js"; +import { + coerceToFailoverError, + describeFailoverError, + isFailoverError, +} from "./failover-error.js"; import { buildModelAliasIndex, modelKey, @@ -9,7 +13,6 @@ import { resolveModelRefFromString, } from "./model-selection.js"; import type { FailoverReason } from "./pi-embedded-helpers.js"; -import { isFailoverErrorMessage } from "./pi-embedded-helpers.js"; type ModelCandidate = { provider: string; @@ -36,81 +39,6 @@ function isAbortError(err: unknown): boolean { return message.includes("aborted"); } -function getStatusCode(err: unknown): number | null { - if (!err || typeof err !== "object") return null; - const candidate = - (err as { status?: unknown; statusCode?: unknown }).status ?? - (err as { statusCode?: unknown }).statusCode; - if (typeof candidate === "number") return candidate; - if (typeof candidate === "string" && /^\d+$/.test(candidate)) { - return Number(candidate); - } - return null; -} - -function getErrorCode(err: unknown): string { - if (!err || typeof err !== "object") return ""; - const candidate = (err as { code?: unknown }).code; - return typeof candidate === "string" ? candidate : ""; -} - -function getErrorMessage(err: unknown): string { - if (err instanceof Error) return err.message; - if (typeof err === "string") return err; - if ( - typeof err === "number" || - typeof err === "boolean" || - typeof err === "bigint" - ) { - return String(err); - } - if (typeof err === "symbol") return err.description ?? ""; - if (err && typeof err === "object") { - const message = (err as { message?: unknown }).message; - if (typeof message === "string") return message; - } - return ""; -} - -function describeFallbackError(err: unknown): { - message: string; - reason?: FailoverReason; - status?: number; - code?: string; -} { - if (isFailoverError(err)) { - const fe = err as FailoverError; - return { - message: fe.message, - reason: fe.reason, - status: fe.status, - code: fe.code, - }; - } - return { - message: getErrorMessage(err) || String(err), - status: getStatusCode(err) ?? undefined, - code: getErrorCode(err) || undefined, - }; -} - -function shouldFallbackForError(err: unknown): boolean { - if (isFailoverError(err)) return true; - const statusCode = getStatusCode(err); - if (statusCode && [401, 402, 403, 408, 429].includes(statusCode)) return true; - const code = getErrorCode(err).toUpperCase(); - if ( - ["ETIMEDOUT", "ESOCKETTIMEDOUT", "ECONNRESET", "ECONNABORTED"].includes( - code, - ) - ) { - return true; - } - const message = getErrorMessage(err); - if (!message) return false; - return isFailoverErrorMessage(message); -} - function buildAllowedModelKeys( cfg: ClawdbotConfig | undefined, defaultProvider: string, @@ -290,10 +218,15 @@ export async function runWithModelFallback(params: { }; } catch (err) { if (isAbortError(err)) throw err; - const shouldFallback = shouldFallbackForError(err); - if (!shouldFallback) throw err; - lastError = err; - const described = describeFallbackError(err); + const normalized = + coerceToFailoverError(err, { + provider: candidate.provider, + model: candidate.model, + }) ?? err; + if (!isFailoverError(normalized)) throw err; + + lastError = normalized; + const described = describeFailoverError(normalized); attempts.push({ provider: candidate.provider, model: candidate.model, @@ -305,7 +238,7 @@ export async function runWithModelFallback(params: { await params.onError?.({ provider: candidate.provider, model: candidate.model, - error: err, + error: normalized, attempt: i + 1, total: candidates.length, });