Agents: finish cooldowned provider skip (#2534)

* Agents: skip cooldowned providers in fallback

* fix: skip cooldowned providers during model failover (#2143) (thanks @YiWang24)
This commit is contained in:
Gustavo Madeira Santana
2026-01-26 22:05:31 -05:00
committed by GitHub
parent ff42a48b54
commit 959ddae612
8 changed files with 141 additions and 10 deletions

View File

@@ -66,6 +66,7 @@ Status: unreleased.
- CLI: avoid loading config for global help/version while registering plugin commands. (#2212) Thanks @dial481.
- Agents: include memory.md when bootstrapping memory context. (#2318) Thanks @czekaj.
- Agents: release session locks on process termination and cover more signals. (#2483) Thanks @janeexai.
- Agents: skip cooldowned providers during model failover. (#2143) Thanks @YiWang24.
- Telegram: harden polling + retry behavior for transient network errors and Node 22 transport issues. (#2420) Thanks @techboss.
- Telegram: wrap reasoning italics per line to avoid raw underscores. (#2181) Thanks @YuriNachos.
- Telegram: centralize API error logging for delivery and bot calls. (#2492) Thanks @altryne.

View File

@@ -1,6 +1,13 @@
import crypto from "node:crypto";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it, vi } from "vitest";
import type { ClawdbotConfig } from "../config/config.js";
import type { AuthProfileStore } from "./auth-profiles.js";
import { saveAuthProfileStore } from "./auth-profiles.js";
import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js";
import { runWithModelFallback } from "./model-fallback.js";
function makeCfg(overrides: Partial<ClawdbotConfig> = {}): ClawdbotConfig {
@@ -117,6 +124,122 @@ describe("runWithModelFallback", () => {
expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
});
it("skips providers when all profiles are in cooldown", async () => {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-auth-"));
const provider = `cooldown-test-${crypto.randomUUID()}`;
const profileId = `${provider}:default`;
const store: AuthProfileStore = {
version: AUTH_STORE_VERSION,
profiles: {
[profileId]: {
type: "api_key",
provider,
key: "test-key",
},
},
usageStats: {
[profileId]: {
cooldownUntil: Date.now() + 60_000,
},
},
};
saveAuthProfileStore(store, tempDir);
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: `${provider}/m1`,
fallbacks: ["fallback/ok-model"],
},
},
},
});
const run = vi.fn().mockImplementation(async (providerId, modelId) => {
if (providerId === "fallback") return "ok";
throw new Error(`unexpected provider: ${providerId}/${modelId}`);
});
try {
const result = await runWithModelFallback({
cfg,
provider,
model: "m1",
agentDir: tempDir,
run,
});
expect(result.result).toBe("ok");
expect(run.mock.calls).toEqual([["fallback", "ok-model"]]);
expect(result.attempts[0]?.reason).toBe("rate_limit");
} finally {
await fs.rm(tempDir, { recursive: true, force: true });
}
});
it("does not skip when any profile is available", async () => {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-auth-"));
const provider = `cooldown-mixed-${crypto.randomUUID()}`;
const profileA = `${provider}:a`;
const profileB = `${provider}:b`;
const store: AuthProfileStore = {
version: AUTH_STORE_VERSION,
profiles: {
[profileA]: {
type: "api_key",
provider,
key: "key-a",
},
[profileB]: {
type: "api_key",
provider,
key: "key-b",
},
},
usageStats: {
[profileA]: {
cooldownUntil: Date.now() + 60_000,
},
},
};
saveAuthProfileStore(store, tempDir);
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: `${provider}/m1`,
fallbacks: ["fallback/ok-model"],
},
},
},
});
const run = vi.fn().mockImplementation(async (providerId) => {
if (providerId === provider) return "ok";
return "unexpected";
});
try {
const result = await runWithModelFallback({
cfg,
provider,
model: "m1",
agentDir: tempDir,
run,
});
expect(result.result).toBe("ok");
expect(run.mock.calls).toEqual([[provider, "m1"]]);
expect(result.attempts).toEqual([]);
} finally {
await fs.rm(tempDir, { recursive: true, force: true });
}
});
it("does not append configured primary when fallbacksOverride is set", async () => {
const cfg = makeCfg({
agents: {

View File

@@ -14,9 +14,11 @@ import {
resolveModelRefFromString,
} from "./model-selection.js";
import type { FailoverReason } from "./pi-embedded-helpers.js";
import { isProfileInCooldown } from "./auth-profiles/usage.js";
import { loadAuthProfileStore } from "./auth-profiles/store.js";
import { resolveAuthProfileOrder } from "./auth-profiles/order.js";
import {
ensureAuthProfileStore,
isProfileInCooldown,
resolveAuthProfileOrder,
} from "./auth-profiles.js";
type ModelCandidate = {
provider: string;
@@ -192,6 +194,7 @@ export async function runWithModelFallback<T>(params: {
cfg: ClawdbotConfig | undefined;
provider: string;
model: string;
agentDir?: string;
/** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */
fallbacksOverride?: string[];
run: (provider: string, model: string) => Promise<T>;
@@ -214,16 +217,14 @@ export async function runWithModelFallback<T>(params: {
model: params.model,
fallbacksOverride: params.fallbacksOverride,
});
const authStore = params.cfg ? loadAuthProfileStore() : null;
const authStore = params.cfg
? ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false })
: null;
const attempts: FallbackAttempt[] = [];
let lastError: unknown;
for (let i = 0; i < candidates.length; i += 1) {
const candidate = candidates[i] as ModelCandidate;
// Skip candidates that are in cooldown
if (authStore) {
const profileIds = resolveAuthProfileOrder({
cfg: params.cfg,
@@ -238,12 +239,11 @@ export async function runWithModelFallback<T>(params: {
provider: candidate.provider,
model: candidate.model,
error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`,
reason: "auth", // Best effort classification
reason: "rate_limit",
});
continue;
}
}
try {
const result = await params.run(candidate.provider, candidate.model);
return {

View File

@@ -138,6 +138,7 @@ export async function runAgentTurnWithFallback(params: {
cfg: params.followupRun.run.config,
provider: params.followupRun.run.provider,
model: params.followupRun.run.model,
agentDir: params.followupRun.run.agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(
params.followupRun.run.config,
resolveAgentIdFromSessionKey(params.followupRun.run.sessionKey),

View File

@@ -92,6 +92,7 @@ export async function runMemoryFlushIfNeeded(params: {
cfg: params.followupRun.run.config,
provider: params.followupRun.run.provider,
model: params.followupRun.run.model,
agentDir: params.followupRun.run.agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(
params.followupRun.run.config,
resolveAgentIdFromSessionKey(params.followupRun.run.sessionKey),

View File

@@ -129,6 +129,7 @@ export function createFollowupRunner(params: {
cfg: queued.run.config,
provider: queued.run.provider,
model: queued.run.model,
agentDir: queued.run.agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(
queued.run.config,
resolveAgentIdFromSessionKey(queued.run.sessionKey),

View File

@@ -382,6 +382,7 @@ export async function agentCommand(
cfg,
provider,
model,
agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(cfg, sessionAgentId),
run: (providerOverride, modelOverride) => {
if (isCliProvider(providerOverride, cfg)) {

View File

@@ -1,5 +1,6 @@
import {
resolveAgentConfig,
resolveAgentDir,
resolveAgentModelFallbacksOverride,
resolveAgentWorkspaceDir,
resolveDefaultAgentId,
@@ -128,6 +129,7 @@ export async function runCronIsolatedAgentTurn(params: {
});
const workspaceDirRaw = resolveAgentWorkspaceDir(params.cfg, agentId);
const agentDir = resolveAgentDir(params.cfg, agentId);
const workspace = await ensureAgentWorkspace({
dir: workspaceDirRaw,
ensureBootstrapFiles: !agentCfg?.skipBootstrap,
@@ -330,6 +332,7 @@ export async function runCronIsolatedAgentTurn(params: {
cfg: cfgWithAgentDefaults,
provider,
model,
agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(params.cfg, agentId),
run: (providerOverride, modelOverride) => {
if (isCliProvider(providerOverride, cfgWithAgentDefaults)) {