fix: trigger fallback on auth profile exhaustion
This commit is contained in:
@@ -43,6 +43,7 @@ Docs: https://docs.clawd.bot
|
||||
- CLI: inline auth probe errors in status rows to reduce wrapping.
|
||||
- Telegram: render markdown in media captions. (#1478)
|
||||
- Agents: honor enqueue overrides for embedded runs to avoid queue deadlocks in tests.
|
||||
- Agents: trigger model fallback when auth profiles are all in cooldown or unavailable. (#1522)
|
||||
- Daemon: use platform PATH delimiters when building minimal service paths.
|
||||
- Tests: skip embedded runner ordering assertion on Windows to avoid CI timeouts.
|
||||
- Linux: include env-configured user bin roots in systemd PATH and align PATH audits. (#1512) Thanks @robbyczgw-cla.
|
||||
|
||||
@@ -63,12 +63,12 @@ const makeAttempt = (overrides: Partial<EmbeddedRunAttemptResult>): EmbeddedRunA
|
||||
...overrides,
|
||||
});
|
||||
|
||||
const makeConfig = (): ClawdbotConfig =>
|
||||
const makeConfig = (opts?: { fallbacks?: string[]; apiKey?: string }): ClawdbotConfig =>
|
||||
({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
fallbacks: [],
|
||||
fallbacks: opts?.fallbacks ?? [],
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -76,7 +76,7 @@ const makeConfig = (): ClawdbotConfig =>
|
||||
providers: {
|
||||
openai: {
|
||||
api: "openai-responses",
|
||||
apiKey: "sk-test",
|
||||
apiKey: opts?.apiKey ?? "sk-test",
|
||||
baseUrl: "https://example.com",
|
||||
models: [
|
||||
{
|
||||
@@ -94,7 +94,13 @@ const makeConfig = (): ClawdbotConfig =>
|
||||
},
|
||||
}) satisfies ClawdbotConfig;
|
||||
|
||||
const writeAuthStore = async (agentDir: string, opts?: { includeAnthropic?: boolean }) => {
|
||||
const writeAuthStore = async (
|
||||
agentDir: string,
|
||||
opts?: {
|
||||
includeAnthropic?: boolean;
|
||||
usageStats?: Record<string, { lastUsed?: number; cooldownUntil?: number }>;
|
||||
},
|
||||
) => {
|
||||
const authPath = path.join(agentDir, "auth-profiles.json");
|
||||
const payload = {
|
||||
version: 1,
|
||||
@@ -105,10 +111,12 @@ const writeAuthStore = async (agentDir: string, opts?: { includeAnthropic?: bool
|
||||
? { "anthropic:default": { type: "api_key", provider: "anthropic", key: "sk-anth" } }
|
||||
: {}),
|
||||
},
|
||||
usageStats: {
|
||||
"openai:p1": { lastUsed: 1 },
|
||||
"openai:p2": { lastUsed: 2 },
|
||||
},
|
||||
usageStats:
|
||||
opts?.usageStats ??
|
||||
({
|
||||
"openai:p1": { lastUsed: 1 },
|
||||
"openai:p2": { lastUsed: 2 },
|
||||
} as Record<string, { lastUsed?: number }>),
|
||||
};
|
||||
await fs.writeFile(authPath, JSON.stringify(payload));
|
||||
};
|
||||
@@ -384,6 +392,92 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("fails over when all profiles are in cooldown and fallbacks are configured", async () => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-agent-"));
|
||||
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-workspace-"));
|
||||
const now = Date.now();
|
||||
vi.setSystemTime(now);
|
||||
|
||||
try {
|
||||
await writeAuthStore(agentDir, {
|
||||
usageStats: {
|
||||
"openai:p1": { lastUsed: 1, cooldownUntil: now + 60 * 60 * 1000 },
|
||||
"openai:p2": { lastUsed: 2, cooldownUntil: now + 60 * 60 * 1000 },
|
||||
},
|
||||
});
|
||||
|
||||
await expect(
|
||||
runEmbeddedPiAgent({
|
||||
sessionId: "session:test",
|
||||
sessionKey: "agent:test:cooldown-failover",
|
||||
sessionFile: path.join(workspaceDir, "session.jsonl"),
|
||||
workspaceDir,
|
||||
agentDir,
|
||||
config: makeConfig({ fallbacks: ["openai/mock-2"] }),
|
||||
prompt: "hello",
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
authProfileIdSource: "auto",
|
||||
timeoutMs: 5_000,
|
||||
runId: "run:cooldown-failover",
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
name: "FailoverError",
|
||||
reason: "rate_limit",
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
});
|
||||
|
||||
expect(runEmbeddedAttemptMock).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
await fs.rm(agentDir, { recursive: true, force: true });
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true });
|
||||
}
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
});
|
||||
|
||||
it("fails over when auth is unavailable and fallbacks are configured", async () => {
|
||||
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-agent-"));
|
||||
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-workspace-"));
|
||||
const previousOpenAiKey = process.env.OPENAI_API_KEY;
|
||||
delete process.env.OPENAI_API_KEY;
|
||||
try {
|
||||
const authPath = path.join(agentDir, "auth-profiles.json");
|
||||
await fs.writeFile(authPath, JSON.stringify({ version: 1, profiles: {}, usageStats: {} }));
|
||||
|
||||
await expect(
|
||||
runEmbeddedPiAgent({
|
||||
sessionId: "session:test",
|
||||
sessionKey: "agent:test:auth-unavailable",
|
||||
sessionFile: path.join(workspaceDir, "session.jsonl"),
|
||||
workspaceDir,
|
||||
agentDir,
|
||||
config: makeConfig({ fallbacks: ["openai/mock-2"], apiKey: "" }),
|
||||
prompt: "hello",
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
authProfileIdSource: "auto",
|
||||
timeoutMs: 5_000,
|
||||
runId: "run:auth-unavailable",
|
||||
}),
|
||||
).rejects.toMatchObject({ name: "FailoverError", reason: "auth" });
|
||||
|
||||
expect(runEmbeddedAttemptMock).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
if (previousOpenAiKey === undefined) {
|
||||
delete process.env.OPENAI_API_KEY;
|
||||
} else {
|
||||
process.env.OPENAI_API_KEY = previousOpenAiKey;
|
||||
}
|
||||
await fs.rm(agentDir, { recursive: true, force: true });
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("skips profiles in cooldown when rotating after failure", async () => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
|
||||
@@ -38,6 +38,7 @@ import {
|
||||
isRateLimitAssistantError,
|
||||
isTimeoutErrorMessage,
|
||||
pickFallbackThinkingLevel,
|
||||
type FailoverReason,
|
||||
} from "../pi-embedded-helpers.js";
|
||||
import { normalizeUsage, type UsageLike } from "../usage.js";
|
||||
|
||||
@@ -92,6 +93,8 @@ export async function runEmbeddedPiAgent(
|
||||
const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
|
||||
const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
|
||||
const agentDir = params.agentDir ?? resolveClawdbotAgentDir();
|
||||
const fallbackConfigured =
|
||||
(params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0;
|
||||
await ensureClawdbotModelsJson(params.config, agentDir);
|
||||
|
||||
const { model, error, authStorage, modelRegistry } = resolveModel(
|
||||
@@ -165,6 +168,42 @@ export async function runEmbeddedPiAgent(
|
||||
let apiKeyInfo: ApiKeyInfo | null = null;
|
||||
let lastProfileId: string | undefined;
|
||||
|
||||
const resolveAuthProfileFailoverReason = (params: {
|
||||
allInCooldown: boolean;
|
||||
message: string;
|
||||
}): FailoverReason => {
|
||||
if (params.allInCooldown) return "rate_limit";
|
||||
const classified = classifyFailoverReason(params.message);
|
||||
return classified ?? "auth";
|
||||
};
|
||||
|
||||
const throwAuthProfileFailover = (params: {
|
||||
allInCooldown: boolean;
|
||||
message?: string;
|
||||
error?: unknown;
|
||||
}): never => {
|
||||
const fallbackMessage = `No available auth profile for ${provider} (all in cooldown or unavailable).`;
|
||||
const message =
|
||||
params.message?.trim() ||
|
||||
(params.error ? describeUnknownError(params.error).trim() : "") ||
|
||||
fallbackMessage;
|
||||
const reason = resolveAuthProfileFailoverReason({
|
||||
allInCooldown: params.allInCooldown,
|
||||
message,
|
||||
});
|
||||
if (fallbackConfigured) {
|
||||
throw new FailoverError(message, {
|
||||
reason,
|
||||
provider,
|
||||
model: modelId,
|
||||
status: resolveFailoverStatus(reason),
|
||||
cause: params.error,
|
||||
});
|
||||
}
|
||||
if (params.error instanceof Error) throw params.error;
|
||||
throw new Error(message);
|
||||
};
|
||||
|
||||
const resolveApiKeyForCandidate = async (candidate?: string) => {
|
||||
return getApiKeyForModel({
|
||||
model,
|
||||
@@ -238,14 +277,17 @@ export async function runEmbeddedPiAgent(
|
||||
break;
|
||||
}
|
||||
if (profileIndex >= profileCandidates.length) {
|
||||
throw new Error(
|
||||
`No available auth profile for ${provider} (all in cooldown or unavailable).`,
|
||||
);
|
||||
throwAuthProfileFailover({ allInCooldown: true });
|
||||
}
|
||||
} catch (err) {
|
||||
if (profileCandidates[profileIndex] === lockedProfileId) throw err;
|
||||
if (err instanceof FailoverError) throw err;
|
||||
if (profileCandidates[profileIndex] === lockedProfileId) {
|
||||
throwAuthProfileFailover({ allInCooldown: false, error: err });
|
||||
}
|
||||
const advanced = await advanceAuthProfile();
|
||||
if (!advanced) throw err;
|
||||
if (!advanced) {
|
||||
throwAuthProfileFailover({ allInCooldown: false, error: err });
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -393,9 +435,7 @@ export async function runEmbeddedPiAgent(
|
||||
}
|
||||
// FIX: Throw FailoverError for prompt errors when fallbacks configured
|
||||
// This enables model fallback for quota/rate limit errors during prompt submission
|
||||
const promptFallbackConfigured =
|
||||
(params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0;
|
||||
if (promptFallbackConfigured && isFailoverErrorMessage(errorText)) {
|
||||
if (fallbackConfigured && isFailoverErrorMessage(errorText)) {
|
||||
throw new FailoverError(errorText, {
|
||||
reason: promptFailoverReason ?? "unknown",
|
||||
provider,
|
||||
@@ -419,8 +459,6 @@ export async function runEmbeddedPiAgent(
|
||||
continue;
|
||||
}
|
||||
|
||||
const fallbackConfigured =
|
||||
(params.config?.agents?.defaults?.model?.fallbacks?.length ?? 0) > 0;
|
||||
const authFailure = isAuthAssistantError(lastAssistant);
|
||||
const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
|
||||
const failoverFailure = isFailoverAssistantError(lastAssistant);
|
||||
|
||||
Reference in New Issue
Block a user