diff --git a/src/agents/auth-profiles.test.ts b/src/agents/auth-profiles.test.ts index 004fd5bc7..3b2185805 100644 --- a/src/agents/auth-profiles.test.ts +++ b/src/agents/auth-profiles.test.ts @@ -151,6 +151,49 @@ describe("resolveAuthProfileOrder", () => { expect(order).toEqual(["anthropic:work", "anthropic:default"]); }); + it("pushes disabled profiles to the end even with store order", () => { + const now = Date.now(); + const order = resolveAuthProfileOrder({ + store: { + ...store, + order: { anthropic: ["anthropic:default", "anthropic:work"] }, + usageStats: { + "anthropic:default": { + disabledUntil: now + 60_000, + disabledReason: "billing", + }, + "anthropic:work": { lastUsed: 1 }, + }, + }, + provider: "anthropic", + }); + expect(order).toEqual(["anthropic:work", "anthropic:default"]); + }); + + it("pushes disabled profiles to the end even with configured order", () => { + const now = Date.now(); + const order = resolveAuthProfileOrder({ + cfg: { + auth: { + order: { anthropic: ["anthropic:default", "anthropic:work"] }, + profiles: cfg.auth.profiles, + }, + }, + store: { + ...store, + usageStats: { + "anthropic:default": { + disabledUntil: now + 60_000, + disabledReason: "billing", + }, + "anthropic:work": { lastUsed: 1 }, + }, + }, + provider: "anthropic", + }); + expect(order).toEqual(["anthropic:work", "anthropic:default"]); + }); + it("normalizes z.ai aliases in auth.order", () => { const order = resolveAuthProfileOrder({ cfg: { diff --git a/src/agents/auth-profiles.ts b/src/agents/auth-profiles.ts index 037ca0d29..9d62250bb 100644 --- a/src/agents/auth-profiles.ts +++ b/src/agents/auth-profiles.ts @@ -72,11 +72,21 @@ export type AuthProfileCredential = | TokenCredential | OAuthCredential; +export type AuthProfileFailureReason = + | "auth" + | "rate_limit" + | "billing" + | "timeout" + | "unknown"; + /** Per-profile usage statistics for round-robin and cooldown tracking */ export type ProfileUsageStats = { lastUsed?: number; cooldownUntil?: number; + disabledUntil?: number; + disabledReason?: AuthProfileFailureReason; errorCount?: number; + failureCounts?: Partial>; }; export type AuthProfileStore = { @@ -772,8 +782,9 @@ export function isProfileInCooldown( profileId: string, ): boolean { const stats = store.usageStats?.[profileId]; - if (!stats?.cooldownUntil) return false; - return Date.now() < stats.cooldownUntil; + if (!stats) return false; + const unusableUntil = resolveProfileUnusableUntil(stats); + return unusableUntil ? Date.now() < unusableUntil : false; } /** @@ -796,6 +807,9 @@ export async function markAuthProfileUsed(params: { lastUsed: Date.now(), errorCount: 0, cooldownUntil: undefined, + disabledUntil: undefined, + disabledReason: undefined, + failureCounts: undefined, }; return true; }, @@ -812,6 +826,9 @@ export async function markAuthProfileUsed(params: { lastUsed: Date.now(), errorCount: 0, cooldownUntil: undefined, + disabledUntil: undefined, + disabledReason: undefined, + failureCounts: undefined, }; saveAuthProfileStore(store, agentDir); } @@ -824,34 +841,74 @@ export function calculateAuthProfileCooldownMs(errorCount: number): number { ); } +function calculateAuthProfileBillingDisableMs(errorCount: number): number { + const normalized = Math.max(1, errorCount); + const steps = [ + 30 * 60 * 1000, // 30 min + 2 * 60 * 60 * 1000, // 2 hours + 8 * 60 * 60 * 1000, // 8 hours + 24 * 60 * 60 * 1000, // 24 hours + ]; + return steps[Math.min(normalized - 1, steps.length - 1)] as number; +} + +function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null { + const values = [stats.cooldownUntil, stats.disabledUntil] + .filter((value): value is number => typeof value === "number") + .filter((value) => Number.isFinite(value) && value > 0); + if (values.length === 0) return null; + return Math.max(...values); +} + +export function resolveProfileUnusableUntilForDisplay( + store: AuthProfileStore, + profileId: string, +): number | null { + const stats = store.usageStats?.[profileId]; + if (!stats) return null; + return resolveProfileUnusableUntil(stats); +} + /** - * Mark a profile as failed/rate-limited. Applies exponential backoff cooldown. - * Cooldown times: 1min, 5min, 25min, max 1 hour. - * Uses store lock to avoid overwriting concurrent usage updates. + * Mark a profile as failed for a specific reason. Billing failures are treated + * as "disabled" (longer backoff) vs the regular cooldown window. */ -export async function markAuthProfileCooldown(params: { +export async function markAuthProfileFailure(params: { store: AuthProfileStore; profileId: string; + reason: AuthProfileFailureReason; agentDir?: string; }): Promise { - const { store, profileId, agentDir } = params; + const { store, profileId, reason, agentDir } = params; const updated = await updateAuthProfileStoreWithLock({ agentDir, updater: (freshStore) => { if (!freshStore.profiles[profileId]) return false; - freshStore.usageStats = freshStore.usageStats ?? {}; const existing = freshStore.usageStats[profileId] ?? {}; - const errorCount = (existing.errorCount ?? 0) + 1; - // Exponential backoff: 1min, 5min, 25min, capped at 1h - const backoffMs = calculateAuthProfileCooldownMs(errorCount); + const nextErrorCount = (existing.errorCount ?? 0) + 1; + const failureCounts = { ...existing.failureCounts }; + failureCounts[reason] = (failureCounts[reason] ?? 0) + 1; - freshStore.usageStats[profileId] = { + const now = Date.now(); + const updatedStats: ProfileUsageStats = { ...existing, - errorCount, - cooldownUntil: Date.now() + backoffMs, + errorCount: nextErrorCount, + failureCounts, }; + + if (reason === "billing") { + const billingCount = failureCounts.billing ?? 1; + const backoffMs = calculateAuthProfileBillingDisableMs(billingCount); + updatedStats.disabledUntil = now + backoffMs; + updatedStats.disabledReason = "billing"; + } else { + const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount); + updatedStats.cooldownUntil = now + backoffMs; + } + + freshStore.usageStats[profileId] = updatedStats; return true; }, }); @@ -863,19 +920,48 @@ export async function markAuthProfileCooldown(params: { store.usageStats = store.usageStats ?? {}; const existing = store.usageStats[profileId] ?? {}; - const errorCount = (existing.errorCount ?? 0) + 1; + const nextErrorCount = (existing.errorCount ?? 0) + 1; + const failureCounts = { ...existing.failureCounts }; + failureCounts[reason] = (failureCounts[reason] ?? 0) + 1; - // Exponential backoff: 1min, 5min, 25min, capped at 1h - const backoffMs = calculateAuthProfileCooldownMs(errorCount); - - store.usageStats[profileId] = { + const now = Date.now(); + const updatedStats: ProfileUsageStats = { ...existing, - errorCount, - cooldownUntil: Date.now() + backoffMs, + errorCount: nextErrorCount, + failureCounts, }; + if (reason === "billing") { + const billingCount = failureCounts.billing ?? 1; + const backoffMs = calculateAuthProfileBillingDisableMs(billingCount); + updatedStats.disabledUntil = now + backoffMs; + updatedStats.disabledReason = "billing"; + } else { + const backoffMs = calculateAuthProfileCooldownMs(nextErrorCount); + updatedStats.cooldownUntil = now + backoffMs; + } + + store.usageStats[profileId] = updatedStats; saveAuthProfileStore(store, agentDir); } +/** + * Mark a profile as failed/rate-limited. Applies exponential backoff cooldown. + * Cooldown times: 1min, 5min, 25min, max 1 hour. + * Uses store lock to avoid overwriting concurrent usage updates. + */ +export async function markAuthProfileCooldown(params: { + store: AuthProfileStore; + profileId: string; + agentDir?: string; +}): Promise { + await markAuthProfileFailure({ + store: params.store, + profileId: params.profileId, + reason: "unknown", + agentDir: params.agentDir, + }); +} + /** * Clear cooldown for a profile (e.g., manual reset). * Uses store lock to avoid overwriting concurrent usage updates. @@ -973,7 +1059,8 @@ export function resolveAuthProfileOrder(params: { const inCooldown: Array<{ profileId: string; cooldownUntil: number }> = []; for (const profileId of deduped) { - const cooldownUntil = store.usageStats?.[profileId]?.cooldownUntil; + const cooldownUntil = + resolveProfileUnusableUntil(store.usageStats?.[profileId] ?? {}) ?? 0; if ( typeof cooldownUntil === "number" && Number.isFinite(cooldownUntil) && @@ -1057,7 +1144,8 @@ function orderProfilesByMode( const cooldownSorted = inCooldown .map((profileId) => ({ profileId, - cooldownUntil: store.usageStats?.[profileId]?.cooldownUntil ?? now, + cooldownUntil: + resolveProfileUnusableUntil(store.usageStats?.[profileId] ?? {}) ?? now, })) .sort((a, b) => a.cooldownUntil - b.cooldownUntil) .map((entry) => entry.profileId); diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index 211efcbc0..77da25937 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -56,6 +56,28 @@ describe("runWithModelFallback", () => { expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5"); }); + it("falls back on 402 payment required", async () => { + const cfg = makeCfg(); + const run = vi + .fn() + .mockRejectedValueOnce( + Object.assign(new Error("payment required"), { status: 402 }), + ) + .mockResolvedValueOnce("ok"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("ok"); + expect(run).toHaveBeenCalledTimes(2); + expect(run.mock.calls[1]?.[0]).toBe("anthropic"); + expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5"); + }); + it("falls back on billing errors", async () => { const cfg = makeCfg(); const run = vi diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index ac667411b..acd331a26 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -7,11 +7,7 @@ import { resolveConfiguredModelRef, resolveModelRefFromString, } from "./model-selection.js"; -import { - isAuthErrorMessage, - isBillingErrorMessage, - isRateLimitErrorMessage, -} from "./pi-embedded-helpers.js"; +import { isFailoverErrorMessage } from "./pi-embedded-helpers.js"; type ModelCandidate = { provider: string; @@ -71,16 +67,6 @@ function getErrorMessage(err: unknown): string { return ""; } -function isTimeoutErrorMessage(raw: string): boolean { - const value = raw.toLowerCase(); - return ( - value.includes("timeout") || - value.includes("timed out") || - value.includes("deadline exceeded") || - value.includes("context deadline exceeded") - ); -} - function shouldFallbackForError(err: unknown): boolean { const statusCode = getStatusCode(err); if (statusCode && [401, 402, 403, 429].includes(statusCode)) return true; @@ -94,12 +80,7 @@ function shouldFallbackForError(err: unknown): boolean { } const message = getErrorMessage(err); if (!message) return false; - return ( - isAuthErrorMessage(message) || - isRateLimitErrorMessage(message) || - isBillingErrorMessage(message) || - isTimeoutErrorMessage(message) - ); + return isFailoverErrorMessage(message); } function buildAllowedModelKeys( diff --git a/src/agents/pi-embedded-helpers.test.ts b/src/agents/pi-embedded-helpers.test.ts index af70d0b72..5b5802255 100644 --- a/src/agents/pi-embedded-helpers.test.ts +++ b/src/agents/pi-embedded-helpers.test.ts @@ -3,9 +3,11 @@ import type { AssistantMessage } from "@mariozechner/pi-ai"; import { describe, expect, it } from "vitest"; import { buildBootstrapContextFiles, + classifyFailoverReason, formatAssistantErrorText, isBillingErrorMessage, isContextOverflowError, + isFailoverErrorMessage, isMessagingToolDuplicate, normalizeTextForComparison, sanitizeGoogleTurnOrdering, @@ -238,6 +240,30 @@ describe("isBillingErrorMessage", () => { }); }); +describe("isFailoverErrorMessage", () => { + it("matches auth/rate/billing/timeout", () => { + const samples = [ + "invalid api key", + "429 rate limit exceeded", + "Your credit balance is too low", + "request timed out", + ]; + for (const sample of samples) { + expect(isFailoverErrorMessage(sample)).toBe(true); + } + }); +}); + +describe("classifyFailoverReason", () => { + it("returns a stable reason", () => { + expect(classifyFailoverReason("invalid api key")).toBe("auth"); + expect(classifyFailoverReason("429 too many requests")).toBe("rate_limit"); + expect(classifyFailoverReason("credit balance too low")).toBe("billing"); + expect(classifyFailoverReason("deadline exceeded")).toBe("timeout"); + expect(classifyFailoverReason("bad request")).toBeNull(); + }); +}); + describe("formatAssistantErrorText", () => { const makeAssistantError = (errorMessage: string): AssistantMessage => ({ diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 58e6c2816..ac945f2ce 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -261,6 +261,17 @@ export function isRateLimitErrorMessage(raw: string): boolean { ); } +export function isTimeoutErrorMessage(raw: string): boolean { + const value = raw.toLowerCase(); + if (!value) return false; + return ( + value.includes("timeout") || + value.includes("timed out") || + value.includes("deadline exceeded") || + value.includes("context deadline exceeded") + ); +} + export function isBillingErrorMessage(raw: string): boolean { const value = raw.toLowerCase(); if (!value) return false; @@ -308,6 +319,32 @@ export function isAuthAssistantError( return isAuthErrorMessage(msg.errorMessage ?? ""); } +export type FailoverReason = + | "auth" + | "rate_limit" + | "billing" + | "timeout" + | "unknown"; + +export function classifyFailoverReason(raw: string): FailoverReason | null { + if (isAuthErrorMessage(raw)) return "auth"; + if (isRateLimitErrorMessage(raw)) return "rate_limit"; + if (isBillingErrorMessage(raw)) return "billing"; + if (isTimeoutErrorMessage(raw)) return "timeout"; + return null; +} + +export function isFailoverErrorMessage(raw: string): boolean { + return classifyFailoverReason(raw) !== null; +} + +export function isFailoverAssistantError( + msg: AssistantMessage | undefined, +): boolean { + if (!msg || msg.stopReason !== "error") return false; + return isFailoverErrorMessage(msg.errorMessage ?? ""); +} + function extractSupportedValues(raw: string): string[] { const match = raw.match(/supported values are:\s*([^\n.]+)/i) ?? diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index ce710d1f1..b6482a4f1 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -37,7 +37,7 @@ import { normalizeMessageProvider } from "../utils/message-provider.js"; import { resolveUserPath } from "../utils.js"; import { resolveClawdbotAgentDir } from "./agent-paths.js"; import { - markAuthProfileCooldown, + markAuthProfileFailure, markAuthProfileGood, markAuthProfileUsed, } from "./auth-profiles.js"; @@ -55,17 +55,17 @@ import { import { ensureClawdbotModelsJson } from "./models-config.js"; import { buildBootstrapContextFiles, + classifyFailoverReason, type EmbeddedContextFile, ensureSessionHeader, formatAssistantErrorText, isAuthAssistantError, - isAuthErrorMessage, - isBillingAssistantError, - isBillingErrorMessage, isContextOverflowError, + isFailoverAssistantError, + isFailoverErrorMessage, isGoogleModelApi, isRateLimitAssistantError, - isRateLimitErrorMessage, + isTimeoutErrorMessage, pickFallbackThinkingLevel, sanitizeGoogleTurnOrdering, sanitizeSessionMessagesImages, @@ -1438,10 +1438,22 @@ export async function runEmbeddedPiAgent(params: { }, }; } + const promptFailoverReason = classifyFailoverReason(errorText); if ( - (isAuthErrorMessage(errorText) || - isRateLimitErrorMessage(errorText) || - isBillingErrorMessage(errorText)) && + promptFailoverReason && + promptFailoverReason !== "timeout" && + lastProfileId + ) { + await markAuthProfileFailure({ + store: authStore, + profileId: lastProfileId, + reason: promptFailoverReason, + agentDir: params.agentDir, + }); + } + if ( + isFailoverErrorMessage(errorText) && + promptFailoverReason !== "timeout" && (await advanceAuthProfile()) ) { continue; @@ -1484,19 +1496,26 @@ export async function runEmbeddedPiAgent(params: { 0; const authFailure = isAuthAssistantError(lastAssistant); const rateLimitFailure = isRateLimitAssistantError(lastAssistant); - const billingFailure = isBillingAssistantError(lastAssistant); + const failoverFailure = isFailoverAssistantError(lastAssistant); + const assistantFailoverReason = classifyFailoverReason( + lastAssistant?.errorMessage ?? "", + ); // Treat timeout as potential rate limit (Antigravity hangs on rate limit) - const shouldRotate = - (!aborted && (authFailure || rateLimitFailure || billingFailure)) || - timedOut; + const shouldRotate = (!aborted && failoverFailure) || timedOut; if (shouldRotate) { // Mark current profile for cooldown before rotating if (lastProfileId) { - await markAuthProfileCooldown({ + const reason = + timedOut || assistantFailoverReason === "timeout" + ? "timeout" + : (assistantFailoverReason ?? "unknown"); + await markAuthProfileFailure({ store: authStore, profileId: lastProfileId, + reason, + agentDir: params.agentDir, }); if (timedOut) { log.warn( @@ -1518,10 +1537,25 @@ export async function runEmbeddedPiAgent(params: { ? "LLM request timed out." : rateLimitFailure ? "LLM request rate limited." - : billingFailure - ? "LLM request payment required." - : "LLM request unauthorized."); - throw new Error(message); + : authFailure + ? "LLM request unauthorized." + : "LLM request failed."); + const err = new Error(message); + (err as { failoverReason?: string }).failoverReason = + assistantFailoverReason ?? undefined; + if (assistantFailoverReason === "billing") { + (err as { status?: number }).status = 402; + } else if (assistantFailoverReason === "rate_limit") { + (err as { status?: number }).status = 429; + } else if (assistantFailoverReason === "auth") { + (err as { status?: number }).status = 401; + } else if ( + assistantFailoverReason === "timeout" || + isTimeoutErrorMessage(message) + ) { + (err as { status?: number }).status = 408; + } + throw err; } }