From 959ddae6124c93f0f7bb308a4b4e1902981d9069 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Mon, 26 Jan 2026 22:05:31 -0500 Subject: [PATCH] Agents: finish cooldowned provider skip (#2534) * Agents: skip cooldowned providers in fallback * fix: skip cooldowned providers during model failover (#2143) (thanks @YiWang24) --- CHANGELOG.md | 1 + src/agents/model-fallback.test.ts | 123 ++++++++++++++++++ src/agents/model-fallback.ts | 20 +-- .../reply/agent-runner-execution.ts | 1 + src/auto-reply/reply/agent-runner-memory.ts | 1 + src/auto-reply/reply/followup-runner.ts | 1 + src/commands/agent.ts | 1 + src/cron/isolated-agent/run.ts | 3 + 8 files changed, 141 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17bb4477c..0770e0062 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ Status: unreleased. - CLI: avoid loading config for global help/version while registering plugin commands. (#2212) Thanks @dial481. - Agents: include memory.md when bootstrapping memory context. (#2318) Thanks @czekaj. - Agents: release session locks on process termination and cover more signals. (#2483) Thanks @janeexai. +- Agents: skip cooldowned providers during model failover. (#2143) Thanks @YiWang24. - Telegram: harden polling + retry behavior for transient network errors and Node 22 transport issues. (#2420) Thanks @techboss. - Telegram: wrap reasoning italics per line to avoid raw underscores. (#2181) Thanks @YuriNachos. - Telegram: centralize API error logging for delivery and bot calls. (#2492) Thanks @altryne. diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index 8662b0101..73008202d 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -1,6 +1,13 @@ +import crypto from "node:crypto"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; import { describe, expect, it, vi } from "vitest"; import type { ClawdbotConfig } from "../config/config.js"; +import type { AuthProfileStore } from "./auth-profiles.js"; +import { saveAuthProfileStore } from "./auth-profiles.js"; +import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js"; import { runWithModelFallback } from "./model-fallback.js"; function makeCfg(overrides: Partial = {}): ClawdbotConfig { @@ -117,6 +124,122 @@ describe("runWithModelFallback", () => { expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5"); }); + it("skips providers when all profiles are in cooldown", async () => { + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-auth-")); + const provider = `cooldown-test-${crypto.randomUUID()}`; + const profileId = `${provider}:default`; + + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + [profileId]: { + type: "api_key", + provider, + key: "test-key", + }, + }, + usageStats: { + [profileId]: { + cooldownUntil: Date.now() + 60_000, + }, + }, + }; + + saveAuthProfileStore(store, tempDir); + + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: `${provider}/m1`, + fallbacks: ["fallback/ok-model"], + }, + }, + }, + }); + const run = vi.fn().mockImplementation(async (providerId, modelId) => { + if (providerId === "fallback") return "ok"; + throw new Error(`unexpected provider: ${providerId}/${modelId}`); + }); + + try { + const result = await runWithModelFallback({ + cfg, + provider, + model: "m1", + agentDir: tempDir, + run, + }); + + expect(result.result).toBe("ok"); + expect(run.mock.calls).toEqual([["fallback", "ok-model"]]); + expect(result.attempts[0]?.reason).toBe("rate_limit"); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } + }); + + it("does not skip when any profile is available", async () => { + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-auth-")); + const provider = `cooldown-mixed-${crypto.randomUUID()}`; + const profileA = `${provider}:a`; + const profileB = `${provider}:b`; + + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + [profileA]: { + type: "api_key", + provider, + key: "key-a", + }, + [profileB]: { + type: "api_key", + provider, + key: "key-b", + }, + }, + usageStats: { + [profileA]: { + cooldownUntil: Date.now() + 60_000, + }, + }, + }; + + saveAuthProfileStore(store, tempDir); + + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: `${provider}/m1`, + fallbacks: ["fallback/ok-model"], + }, + }, + }, + }); + const run = vi.fn().mockImplementation(async (providerId) => { + if (providerId === provider) return "ok"; + return "unexpected"; + }); + + try { + const result = await runWithModelFallback({ + cfg, + provider, + model: "m1", + agentDir: tempDir, + run, + }); + + expect(result.result).toBe("ok"); + expect(run.mock.calls).toEqual([[provider, "m1"]]); + expect(result.attempts).toEqual([]); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } + }); + it("does not append configured primary when fallbacksOverride is set", async () => { const cfg = makeCfg({ agents: { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 60827ea00..074f7df59 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -14,9 +14,11 @@ import { resolveModelRefFromString, } from "./model-selection.js"; import type { FailoverReason } from "./pi-embedded-helpers.js"; -import { isProfileInCooldown } from "./auth-profiles/usage.js"; -import { loadAuthProfileStore } from "./auth-profiles/store.js"; -import { resolveAuthProfileOrder } from "./auth-profiles/order.js"; +import { + ensureAuthProfileStore, + isProfileInCooldown, + resolveAuthProfileOrder, +} from "./auth-profiles.js"; type ModelCandidate = { provider: string; @@ -192,6 +194,7 @@ export async function runWithModelFallback(params: { cfg: ClawdbotConfig | undefined; provider: string; model: string; + agentDir?: string; /** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */ fallbacksOverride?: string[]; run: (provider: string, model: string) => Promise; @@ -214,16 +217,14 @@ export async function runWithModelFallback(params: { model: params.model, fallbacksOverride: params.fallbacksOverride, }); - - const authStore = params.cfg ? loadAuthProfileStore() : null; - + const authStore = params.cfg + ? ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false }) + : null; const attempts: FallbackAttempt[] = []; let lastError: unknown; for (let i = 0; i < candidates.length; i += 1) { const candidate = candidates[i] as ModelCandidate; - - // Skip candidates that are in cooldown if (authStore) { const profileIds = resolveAuthProfileOrder({ cfg: params.cfg, @@ -238,12 +239,11 @@ export async function runWithModelFallback(params: { provider: candidate.provider, model: candidate.model, error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`, - reason: "auth", // Best effort classification + reason: "rate_limit", }); continue; } } - try { const result = await params.run(candidate.provider, candidate.model); return { diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 3537972e4..7aae24e6a 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -138,6 +138,7 @@ export async function runAgentTurnWithFallback(params: { cfg: params.followupRun.run.config, provider: params.followupRun.run.provider, model: params.followupRun.run.model, + agentDir: params.followupRun.run.agentDir, fallbacksOverride: resolveAgentModelFallbacksOverride( params.followupRun.run.config, resolveAgentIdFromSessionKey(params.followupRun.run.sessionKey), diff --git a/src/auto-reply/reply/agent-runner-memory.ts b/src/auto-reply/reply/agent-runner-memory.ts index a7d590750..2b2e26b0c 100644 --- a/src/auto-reply/reply/agent-runner-memory.ts +++ b/src/auto-reply/reply/agent-runner-memory.ts @@ -92,6 +92,7 @@ export async function runMemoryFlushIfNeeded(params: { cfg: params.followupRun.run.config, provider: params.followupRun.run.provider, model: params.followupRun.run.model, + agentDir: params.followupRun.run.agentDir, fallbacksOverride: resolveAgentModelFallbacksOverride( params.followupRun.run.config, resolveAgentIdFromSessionKey(params.followupRun.run.sessionKey), diff --git a/src/auto-reply/reply/followup-runner.ts b/src/auto-reply/reply/followup-runner.ts index febbc6e6a..7f5bdde21 100644 --- a/src/auto-reply/reply/followup-runner.ts +++ b/src/auto-reply/reply/followup-runner.ts @@ -129,6 +129,7 @@ export function createFollowupRunner(params: { cfg: queued.run.config, provider: queued.run.provider, model: queued.run.model, + agentDir: queued.run.agentDir, fallbacksOverride: resolveAgentModelFallbacksOverride( queued.run.config, resolveAgentIdFromSessionKey(queued.run.sessionKey), diff --git a/src/commands/agent.ts b/src/commands/agent.ts index ef9718833..d3ee7e783 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -382,6 +382,7 @@ export async function agentCommand( cfg, provider, model, + agentDir, fallbacksOverride: resolveAgentModelFallbacksOverride(cfg, sessionAgentId), run: (providerOverride, modelOverride) => { if (isCliProvider(providerOverride, cfg)) { diff --git a/src/cron/isolated-agent/run.ts b/src/cron/isolated-agent/run.ts index 2840cb50f..a267c7deb 100644 --- a/src/cron/isolated-agent/run.ts +++ b/src/cron/isolated-agent/run.ts @@ -1,5 +1,6 @@ import { resolveAgentConfig, + resolveAgentDir, resolveAgentModelFallbacksOverride, resolveAgentWorkspaceDir, resolveDefaultAgentId, @@ -128,6 +129,7 @@ export async function runCronIsolatedAgentTurn(params: { }); const workspaceDirRaw = resolveAgentWorkspaceDir(params.cfg, agentId); + const agentDir = resolveAgentDir(params.cfg, agentId); const workspace = await ensureAgentWorkspace({ dir: workspaceDirRaw, ensureBootstrapFiles: !agentCfg?.skipBootstrap, @@ -330,6 +332,7 @@ export async function runCronIsolatedAgentTurn(params: { cfg: cfgWithAgentDefaults, provider, model, + agentDir, fallbacksOverride: resolveAgentModelFallbacksOverride(params.cfg, agentId), run: (providerOverride, modelOverride) => { if (isCliProvider(providerOverride, cfgWithAgentDefaults)) {