fix: retry model fallback on rate limits
This commit is contained in:
32
src/agents/pi-embedded-helpers.test.ts
Normal file
32
src/agents/pi-embedded-helpers.test.ts
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import type { AssistantMessage } from "@mariozechner/pi-ai";
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import { isRateLimitAssistantError } from "./pi-embedded-helpers.js";
|
||||||
|
|
||||||
|
const asAssistant = (overrides: Partial<AssistantMessage>) =>
|
||||||
|
({ role: "assistant", stopReason: "error", ...overrides }) as AssistantMessage;
|
||||||
|
|
||||||
|
describe("isRateLimitAssistantError", () => {
|
||||||
|
it("detects 429 rate limit payloads", () => {
|
||||||
|
const msg = asAssistant({
|
||||||
|
errorMessage:
|
||||||
|
'429 {"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed your account\'s rate limit. Please try again later."}}',
|
||||||
|
});
|
||||||
|
expect(isRateLimitAssistantError(msg)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("detects human-readable rate limit messages", () => {
|
||||||
|
const msg = asAssistant({
|
||||||
|
errorMessage: "Too many requests. Rate limit exceeded.",
|
||||||
|
});
|
||||||
|
expect(isRateLimitAssistantError(msg)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false for non-error messages", () => {
|
||||||
|
const msg = asAssistant({
|
||||||
|
stopReason: "end_turn",
|
||||||
|
errorMessage: "rate limit",
|
||||||
|
});
|
||||||
|
expect(isRateLimitAssistantError(msg)).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -109,3 +109,12 @@ export function formatAssistantErrorText(
|
|||||||
// Keep it short for WhatsApp.
|
// Keep it short for WhatsApp.
|
||||||
return raw.length > 600 ? `${raw.slice(0, 600)}…` : raw;
|
return raw.length > 600 ? `${raw.slice(0, 600)}…` : raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function isRateLimitAssistantError(
|
||||||
|
msg: AssistantMessage | undefined,
|
||||||
|
): boolean {
|
||||||
|
if (!msg || msg.stopReason !== "error") return false;
|
||||||
|
const raw = (msg.errorMessage ?? "").toLowerCase();
|
||||||
|
if (!raw) return false;
|
||||||
|
return /rate[_ ]limit|too many requests|429/.test(raw);
|
||||||
|
}
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ import {
|
|||||||
buildBootstrapContextFiles,
|
buildBootstrapContextFiles,
|
||||||
ensureSessionHeader,
|
ensureSessionHeader,
|
||||||
formatAssistantErrorText,
|
formatAssistantErrorText,
|
||||||
|
isRateLimitAssistantError,
|
||||||
sanitizeSessionMessagesImages,
|
sanitizeSessionMessagesImages,
|
||||||
} from "./pi-embedded-helpers.js";
|
} from "./pi-embedded-helpers.js";
|
||||||
import {
|
import {
|
||||||
@@ -551,6 +552,16 @@ export async function runEmbeddedPiAgent(params: {
|
|||||||
| AssistantMessage
|
| AssistantMessage
|
||||||
| undefined;
|
| undefined;
|
||||||
|
|
||||||
|
const fallbackConfigured =
|
||||||
|
(params.config?.agent?.modelFallbacks?.length ?? 0) > 0;
|
||||||
|
if (fallbackConfigured && isRateLimitAssistantError(lastAssistant)) {
|
||||||
|
const message =
|
||||||
|
lastAssistant?.errorMessage?.trim() ||
|
||||||
|
(lastAssistant ? formatAssistantErrorText(lastAssistant) : "") ||
|
||||||
|
"LLM request rate limited.";
|
||||||
|
throw new Error(message);
|
||||||
|
}
|
||||||
|
|
||||||
const usage = lastAssistant?.usage;
|
const usage = lastAssistant?.usage;
|
||||||
const agentMeta: EmbeddedPiAgentMeta = {
|
const agentMeta: EmbeddedPiAgentMeta = {
|
||||||
sessionId: sessionIdUsed,
|
sessionId: sessionIdUsed,
|
||||||
|
|||||||
Reference in New Issue
Block a user