fix: retry model fallback on rate limits
This commit is contained in:
32
src/agents/pi-embedded-helpers.test.ts
Normal file
32
src/agents/pi-embedded-helpers.test.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import type { AssistantMessage } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { isRateLimitAssistantError } from "./pi-embedded-helpers.js";
|
||||
|
||||
const asAssistant = (overrides: Partial<AssistantMessage>) =>
|
||||
({ role: "assistant", stopReason: "error", ...overrides }) as AssistantMessage;
|
||||
|
||||
describe("isRateLimitAssistantError", () => {
|
||||
it("detects 429 rate limit payloads", () => {
|
||||
const msg = asAssistant({
|
||||
errorMessage:
|
||||
'429 {"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed your account\'s rate limit. Please try again later."}}',
|
||||
});
|
||||
expect(isRateLimitAssistantError(msg)).toBe(true);
|
||||
});
|
||||
|
||||
it("detects human-readable rate limit messages", () => {
|
||||
const msg = asAssistant({
|
||||
errorMessage: "Too many requests. Rate limit exceeded.",
|
||||
});
|
||||
expect(isRateLimitAssistantError(msg)).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false for non-error messages", () => {
|
||||
const msg = asAssistant({
|
||||
stopReason: "end_turn",
|
||||
errorMessage: "rate limit",
|
||||
});
|
||||
expect(isRateLimitAssistantError(msg)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -109,3 +109,12 @@ export function formatAssistantErrorText(
|
||||
// Keep it short for WhatsApp.
|
||||
return raw.length > 600 ? `${raw.slice(0, 600)}…` : raw;
|
||||
}
|
||||
|
||||
export function isRateLimitAssistantError(
|
||||
msg: AssistantMessage | undefined,
|
||||
): boolean {
|
||||
if (!msg || msg.stopReason !== "error") return false;
|
||||
const raw = (msg.errorMessage ?? "").toLowerCase();
|
||||
if (!raw) return false;
|
||||
return /rate[_ ]limit|too many requests|429/.test(raw);
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ import {
|
||||
buildBootstrapContextFiles,
|
||||
ensureSessionHeader,
|
||||
formatAssistantErrorText,
|
||||
isRateLimitAssistantError,
|
||||
sanitizeSessionMessagesImages,
|
||||
} from "./pi-embedded-helpers.js";
|
||||
import {
|
||||
@@ -551,6 +552,16 @@ export async function runEmbeddedPiAgent(params: {
|
||||
| AssistantMessage
|
||||
| undefined;
|
||||
|
||||
const fallbackConfigured =
|
||||
(params.config?.agent?.modelFallbacks?.length ?? 0) > 0;
|
||||
if (fallbackConfigured && isRateLimitAssistantError(lastAssistant)) {
|
||||
const message =
|
||||
lastAssistant?.errorMessage?.trim() ||
|
||||
(lastAssistant ? formatAssistantErrorText(lastAssistant) : "") ||
|
||||
"LLM request rate limited.";
|
||||
throw new Error(message);
|
||||
}
|
||||
|
||||
const usage = lastAssistant?.usage;
|
||||
const agentMeta: EmbeddedPiAgentMeta = {
|
||||
sessionId: sessionIdUsed,
|
||||
|
||||
Reference in New Issue
Block a user