From 675019cb6f51c51ba64d935094cfa5885651a67a Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 24 Jan 2026 06:14:17 +0000 Subject: [PATCH] fix: trigger fallback on auth profile exhaustion --- CHANGELOG.md | 1 + ...ded-pi-agent.auth-profile-rotation.test.ts | 110 ++++++++++++++++-- src/agents/pi-embedded-runner/run.ts | 58 +++++++-- 3 files changed, 151 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0ff4f6d6..3143ddda9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,7 @@ Docs: https://docs.clawd.bot - CLI: inline auth probe errors in status rows to reduce wrapping. - Telegram: render markdown in media captions. (#1478) - Agents: honor enqueue overrides for embedded runs to avoid queue deadlocks in tests. +- Agents: trigger model fallback when auth profiles are all in cooldown or unavailable. (#1522) - Daemon: use platform PATH delimiters when building minimal service paths. - Tests: skip embedded runner ordering assertion on Windows to avoid CI timeouts. - Linux: include env-configured user bin roots in systemd PATH and align PATH audits. (#1512) Thanks @robbyczgw-cla. diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.test.ts index f765ed4a7..0128f41e3 100644 --- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.test.ts +++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.test.ts @@ -63,12 +63,12 @@ const makeAttempt = (overrides: Partial): EmbeddedRunA ...overrides, }); -const makeConfig = (): ClawdbotConfig => +const makeConfig = (opts?: { fallbacks?: string[]; apiKey?: string }): ClawdbotConfig => ({ agents: { defaults: { model: { - fallbacks: [], + fallbacks: opts?.fallbacks ?? [], }, }, }, @@ -76,7 +76,7 @@ const makeConfig = (): ClawdbotConfig => providers: { openai: { api: "openai-responses", - apiKey: "sk-test", + apiKey: opts?.apiKey ?? "sk-test", baseUrl: "https://example.com", models: [ { @@ -94,7 +94,13 @@ const makeConfig = (): ClawdbotConfig => }, }) satisfies ClawdbotConfig; -const writeAuthStore = async (agentDir: string, opts?: { includeAnthropic?: boolean }) => { +const writeAuthStore = async ( + agentDir: string, + opts?: { + includeAnthropic?: boolean; + usageStats?: Record; + }, +) => { const authPath = path.join(agentDir, "auth-profiles.json"); const payload = { version: 1, @@ -105,10 +111,12 @@ const writeAuthStore = async (agentDir: string, opts?: { includeAnthropic?: bool ? { "anthropic:default": { type: "api_key", provider: "anthropic", key: "sk-anth" } } : {}), }, - usageStats: { - "openai:p1": { lastUsed: 1 }, - "openai:p2": { lastUsed: 2 }, - }, + usageStats: + opts?.usageStats ?? + ({ + "openai:p1": { lastUsed: 1 }, + "openai:p2": { lastUsed: 2 }, + } as Record), }; await fs.writeFile(authPath, JSON.stringify(payload)); }; @@ -384,6 +392,92 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { } }); + it("fails over when all profiles are in cooldown and fallbacks are configured", async () => { + vi.useFakeTimers(); + try { + const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-agent-")); + const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-workspace-")); + const now = Date.now(); + vi.setSystemTime(now); + + try { + await writeAuthStore(agentDir, { + usageStats: { + "openai:p1": { lastUsed: 1, cooldownUntil: now + 60 * 60 * 1000 }, + "openai:p2": { lastUsed: 2, cooldownUntil: now + 60 * 60 * 1000 }, + }, + }); + + await expect( + runEmbeddedPiAgent({ + sessionId: "session:test", + sessionKey: "agent:test:cooldown-failover", + sessionFile: path.join(workspaceDir, "session.jsonl"), + workspaceDir, + agentDir, + config: makeConfig({ fallbacks: ["openai/mock-2"] }), + prompt: "hello", + provider: "openai", + model: "mock-1", + authProfileIdSource: "auto", + timeoutMs: 5_000, + runId: "run:cooldown-failover", + }), + ).rejects.toMatchObject({ + name: "FailoverError", + reason: "rate_limit", + provider: "openai", + model: "mock-1", + }); + + expect(runEmbeddedAttemptMock).not.toHaveBeenCalled(); + } finally { + await fs.rm(agentDir, { recursive: true, force: true }); + await fs.rm(workspaceDir, { recursive: true, force: true }); + } + } finally { + vi.useRealTimers(); + } + }); + + it("fails over when auth is unavailable and fallbacks are configured", async () => { + const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-agent-")); + const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-workspace-")); + const previousOpenAiKey = process.env.OPENAI_API_KEY; + delete process.env.OPENAI_API_KEY; + try { + const authPath = path.join(agentDir, "auth-profiles.json"); + await fs.writeFile(authPath, JSON.stringify({ version: 1, profiles: {}, usageStats: {} })); + + await expect( + runEmbeddedPiAgent({ + sessionId: "session:test", + sessionKey: "agent:test:auth-unavailable", + sessionFile: path.join(workspaceDir, "session.jsonl"), + workspaceDir, + agentDir, + config: makeConfig({ fallbacks: ["openai/mock-2"], apiKey: "" }), + prompt: "hello", + provider: "openai", + model: "mock-1", + authProfileIdSource: "auto", + timeoutMs: 5_000, + runId: "run:auth-unavailable", + }), + ).rejects.toMatchObject({ name: "FailoverError", reason: "auth" }); + + expect(runEmbeddedAttemptMock).not.toHaveBeenCalled(); + } finally { + if (previousOpenAiKey === undefined) { + delete process.env.OPENAI_API_KEY; + } else { + process.env.OPENAI_API_KEY = previousOpenAiKey; + } + await fs.rm(agentDir, { recursive: true, force: true }); + await fs.rm(workspaceDir, { recursive: true, force: true }); + } + }); + it("skips profiles in cooldown when rotating after failure", async () => { vi.useFakeTimers(); try { diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 2873f4143..201fb4fce 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -38,6 +38,7 @@ import { isRateLimitAssistantError, isTimeoutErrorMessage, pickFallbackThinkingLevel, + type FailoverReason, } from "../pi-embedded-helpers.js"; import { normalizeUsage, type UsageLike } from "../usage.js"; @@ -92,6 +93,8 @@ export async function runEmbeddedPiAgent( const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER; const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL; const agentDir = params.agentDir ?? resolveClawdbotAgentDir(); + const fallbackConfigured = + (params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0; await ensureClawdbotModelsJson(params.config, agentDir); const { model, error, authStorage, modelRegistry } = resolveModel( @@ -165,6 +168,42 @@ export async function runEmbeddedPiAgent( let apiKeyInfo: ApiKeyInfo | null = null; let lastProfileId: string | undefined; + const resolveAuthProfileFailoverReason = (params: { + allInCooldown: boolean; + message: string; + }): FailoverReason => { + if (params.allInCooldown) return "rate_limit"; + const classified = classifyFailoverReason(params.message); + return classified ?? "auth"; + }; + + const throwAuthProfileFailover = (params: { + allInCooldown: boolean; + message?: string; + error?: unknown; + }): never => { + const fallbackMessage = `No available auth profile for ${provider} (all in cooldown or unavailable).`; + const message = + params.message?.trim() || + (params.error ? describeUnknownError(params.error).trim() : "") || + fallbackMessage; + const reason = resolveAuthProfileFailoverReason({ + allInCooldown: params.allInCooldown, + message, + }); + if (fallbackConfigured) { + throw new FailoverError(message, { + reason, + provider, + model: modelId, + status: resolveFailoverStatus(reason), + cause: params.error, + }); + } + if (params.error instanceof Error) throw params.error; + throw new Error(message); + }; + const resolveApiKeyForCandidate = async (candidate?: string) => { return getApiKeyForModel({ model, @@ -238,14 +277,17 @@ export async function runEmbeddedPiAgent( break; } if (profileIndex >= profileCandidates.length) { - throw new Error( - `No available auth profile for ${provider} (all in cooldown or unavailable).`, - ); + throwAuthProfileFailover({ allInCooldown: true }); } } catch (err) { - if (profileCandidates[profileIndex] === lockedProfileId) throw err; + if (err instanceof FailoverError) throw err; + if (profileCandidates[profileIndex] === lockedProfileId) { + throwAuthProfileFailover({ allInCooldown: false, error: err }); + } const advanced = await advanceAuthProfile(); - if (!advanced) throw err; + if (!advanced) { + throwAuthProfileFailover({ allInCooldown: false, error: err }); + } } try { @@ -393,9 +435,7 @@ export async function runEmbeddedPiAgent( } // FIX: Throw FailoverError for prompt errors when fallbacks configured // This enables model fallback for quota/rate limit errors during prompt submission - const promptFallbackConfigured = - (params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0; - if (promptFallbackConfigured && isFailoverErrorMessage(errorText)) { + if (fallbackConfigured && isFailoverErrorMessage(errorText)) { throw new FailoverError(errorText, { reason: promptFailoverReason ?? "unknown", provider, @@ -419,8 +459,6 @@ export async function runEmbeddedPiAgent( continue; } - const fallbackConfigured = - (params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0; const authFailure = isAuthAssistantError(lastAssistant); const rateLimitFailure = isRateLimitAssistantError(lastAssistant); const failoverFailure = isFailoverAssistantError(lastAssistant);