fix: trigger fallback on auth profile exhaustion

This commit is contained in:
Peter Steinberger
2026-01-24 06:14:17 +00:00
parent 795b592286
commit 675019cb6f
3 changed files with 151 additions and 18 deletions

View File

@@ -43,6 +43,7 @@ Docs: https://docs.clawd.bot
- CLI: inline auth probe errors in status rows to reduce wrapping.
- Telegram: render markdown in media captions. (#1478)
- Agents: honor enqueue overrides for embedded runs to avoid queue deadlocks in tests.
- Agents: trigger model fallback when auth profiles are all in cooldown or unavailable. (#1522)
- Daemon: use platform PATH delimiters when building minimal service paths.
- Tests: skip embedded runner ordering assertion on Windows to avoid CI timeouts.
- Linux: include env-configured user bin roots in systemd PATH and align PATH audits. (#1512) Thanks @robbyczgw-cla.

View File

@@ -63,12 +63,12 @@ const makeAttempt = (overrides: Partial<EmbeddedRunAttemptResult>): EmbeddedRunA
...overrides,
});
const makeConfig = (): ClawdbotConfig =>
const makeConfig = (opts?: { fallbacks?: string[]; apiKey?: string }): ClawdbotConfig =>
({
agents: {
defaults: {
model: {
fallbacks: [],
fallbacks: opts?.fallbacks ?? [],
},
},
},
@@ -76,7 +76,7 @@ const makeConfig = (): ClawdbotConfig =>
providers: {
openai: {
api: "openai-responses",
apiKey: "sk-test",
apiKey: opts?.apiKey ?? "sk-test",
baseUrl: "https://example.com",
models: [
{
@@ -94,7 +94,13 @@ const makeConfig = (): ClawdbotConfig =>
},
}) satisfies ClawdbotConfig;
const writeAuthStore = async (agentDir: string, opts?: { includeAnthropic?: boolean }) => {
const writeAuthStore = async (
agentDir: string,
opts?: {
includeAnthropic?: boolean;
usageStats?: Record<string, { lastUsed?: number; cooldownUntil?: number }>;
},
) => {
const authPath = path.join(agentDir, "auth-profiles.json");
const payload = {
version: 1,
@@ -105,10 +111,12 @@ const writeAuthStore = async (agentDir: string, opts?: { includeAnthropic?: bool
? { "anthropic:default": { type: "api_key", provider: "anthropic", key: "sk-anth" } }
: {}),
},
usageStats: {
"openai:p1": { lastUsed: 1 },
"openai:p2": { lastUsed: 2 },
},
usageStats:
opts?.usageStats ??
({
"openai:p1": { lastUsed: 1 },
"openai:p2": { lastUsed: 2 },
} as Record<string, { lastUsed?: number }>),
};
await fs.writeFile(authPath, JSON.stringify(payload));
};
@@ -384,6 +392,92 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
}
});
it("fails over when all profiles are in cooldown and fallbacks are configured", async () => {
vi.useFakeTimers();
try {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-agent-"));
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-workspace-"));
const now = Date.now();
vi.setSystemTime(now);
try {
await writeAuthStore(agentDir, {
usageStats: {
"openai:p1": { lastUsed: 1, cooldownUntil: now + 60 * 60 * 1000 },
"openai:p2": { lastUsed: 2, cooldownUntil: now + 60 * 60 * 1000 },
},
});
await expect(
runEmbeddedPiAgent({
sessionId: "session:test",
sessionKey: "agent:test:cooldown-failover",
sessionFile: path.join(workspaceDir, "session.jsonl"),
workspaceDir,
agentDir,
config: makeConfig({ fallbacks: ["openai/mock-2"] }),
prompt: "hello",
provider: "openai",
model: "mock-1",
authProfileIdSource: "auto",
timeoutMs: 5_000,
runId: "run:cooldown-failover",
}),
).rejects.toMatchObject({
name: "FailoverError",
reason: "rate_limit",
provider: "openai",
model: "mock-1",
});
expect(runEmbeddedAttemptMock).not.toHaveBeenCalled();
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
await fs.rm(workspaceDir, { recursive: true, force: true });
}
} finally {
vi.useRealTimers();
}
});
it("fails over when auth is unavailable and fallbacks are configured", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-agent-"));
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-workspace-"));
const previousOpenAiKey = process.env.OPENAI_API_KEY;
delete process.env.OPENAI_API_KEY;
try {
const authPath = path.join(agentDir, "auth-profiles.json");
await fs.writeFile(authPath, JSON.stringify({ version: 1, profiles: {}, usageStats: {} }));
await expect(
runEmbeddedPiAgent({
sessionId: "session:test",
sessionKey: "agent:test:auth-unavailable",
sessionFile: path.join(workspaceDir, "session.jsonl"),
workspaceDir,
agentDir,
config: makeConfig({ fallbacks: ["openai/mock-2"], apiKey: "" }),
prompt: "hello",
provider: "openai",
model: "mock-1",
authProfileIdSource: "auto",
timeoutMs: 5_000,
runId: "run:auth-unavailable",
}),
).rejects.toMatchObject({ name: "FailoverError", reason: "auth" });
expect(runEmbeddedAttemptMock).not.toHaveBeenCalled();
} finally {
if (previousOpenAiKey === undefined) {
delete process.env.OPENAI_API_KEY;
} else {
process.env.OPENAI_API_KEY = previousOpenAiKey;
}
await fs.rm(agentDir, { recursive: true, force: true });
await fs.rm(workspaceDir, { recursive: true, force: true });
}
});
it("skips profiles in cooldown when rotating after failure", async () => {
vi.useFakeTimers();
try {

View File

@@ -38,6 +38,7 @@ import {
isRateLimitAssistantError,
isTimeoutErrorMessage,
pickFallbackThinkingLevel,
type FailoverReason,
} from "../pi-embedded-helpers.js";
import { normalizeUsage, type UsageLike } from "../usage.js";
@@ -92,6 +93,8 @@ export async function runEmbeddedPiAgent(
const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
const agentDir = params.agentDir ?? resolveClawdbotAgentDir();
const fallbackConfigured =
(params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0;
await ensureClawdbotModelsJson(params.config, agentDir);
const { model, error, authStorage, modelRegistry } = resolveModel(
@@ -165,6 +168,42 @@ export async function runEmbeddedPiAgent(
let apiKeyInfo: ApiKeyInfo | null = null;
let lastProfileId: string | undefined;
const resolveAuthProfileFailoverReason = (params: {
allInCooldown: boolean;
message: string;
}): FailoverReason => {
if (params.allInCooldown) return "rate_limit";
const classified = classifyFailoverReason(params.message);
return classified ?? "auth";
};
const throwAuthProfileFailover = (params: {
allInCooldown: boolean;
message?: string;
error?: unknown;
}): never => {
const fallbackMessage = `No available auth profile for ${provider} (all in cooldown or unavailable).`;
const message =
params.message?.trim() ||
(params.error ? describeUnknownError(params.error).trim() : "") ||
fallbackMessage;
const reason = resolveAuthProfileFailoverReason({
allInCooldown: params.allInCooldown,
message,
});
if (fallbackConfigured) {
throw new FailoverError(message, {
reason,
provider,
model: modelId,
status: resolveFailoverStatus(reason),
cause: params.error,
});
}
if (params.error instanceof Error) throw params.error;
throw new Error(message);
};
const resolveApiKeyForCandidate = async (candidate?: string) => {
return getApiKeyForModel({
model,
@@ -238,14 +277,17 @@ export async function runEmbeddedPiAgent(
break;
}
if (profileIndex >= profileCandidates.length) {
throw new Error(
`No available auth profile for ${provider} (all in cooldown or unavailable).`,
);
throwAuthProfileFailover({ allInCooldown: true });
}
} catch (err) {
if (profileCandidates[profileIndex] === lockedProfileId) throw err;
if (err instanceof FailoverError) throw err;
if (profileCandidates[profileIndex] === lockedProfileId) {
throwAuthProfileFailover({ allInCooldown: false, error: err });
}
const advanced = await advanceAuthProfile();
if (!advanced) throw err;
if (!advanced) {
throwAuthProfileFailover({ allInCooldown: false, error: err });
}
}
try {
@@ -393,9 +435,7 @@ export async function runEmbeddedPiAgent(
}
// FIX: Throw FailoverError for prompt errors when fallbacks configured
// This enables model fallback for quota/rate limit errors during prompt submission
const promptFallbackConfigured =
(params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0;
if (promptFallbackConfigured && isFailoverErrorMessage(errorText)) {
if (fallbackConfigured && isFailoverErrorMessage(errorText)) {
throw new FailoverError(errorText, {
reason: promptFailoverReason ?? "unknown",
provider,
@@ -419,8 +459,6 @@ export async function runEmbeddedPiAgent(
continue;
}
const fallbackConfigured =
(params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0;
const authFailure = isAuthAssistantError(lastAssistant);
const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
const failoverFailure = isFailoverAssistantError(lastAssistant);