fix: retry model fallback on rate limits

This commit is contained in:
CI
2026-01-05 18:04:36 +01:00
committed by Peter Steinberger
parent 7900d33701
commit 5622dfe86b
3 changed files with 52 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
import type { AssistantMessage } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import { isRateLimitAssistantError } from "./pi-embedded-helpers.js";
const asAssistant = (overrides: Partial<AssistantMessage>) =>
({ role: "assistant", stopReason: "error", ...overrides }) as AssistantMessage;
describe("isRateLimitAssistantError", () => {
it("detects 429 rate limit payloads", () => {
const msg = asAssistant({
errorMessage:
'429 {"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed your account\'s rate limit. Please try again later."}}',
});
expect(isRateLimitAssistantError(msg)).toBe(true);
});
it("detects human-readable rate limit messages", () => {
const msg = asAssistant({
errorMessage: "Too many requests. Rate limit exceeded.",
});
expect(isRateLimitAssistantError(msg)).toBe(true);
});
it("returns false for non-error messages", () => {
const msg = asAssistant({
stopReason: "end_turn",
errorMessage: "rate limit",
});
expect(isRateLimitAssistantError(msg)).toBe(false);
});
});

View File

@@ -109,3 +109,12 @@ export function formatAssistantErrorText(
// Keep it short for WhatsApp.
return raw.length > 600 ? `${raw.slice(0, 600)}` : raw;
}
export function isRateLimitAssistantError(
msg: AssistantMessage | undefined,
): boolean {
if (!msg || msg.stopReason !== "error") return false;
const raw = (msg.errorMessage ?? "").toLowerCase();
if (!raw) return false;
return /rate[_ ]limit|too many requests|429/.test(raw);
}

View File

@@ -32,6 +32,7 @@ import {
buildBootstrapContextFiles,
ensureSessionHeader,
formatAssistantErrorText,
isRateLimitAssistantError,
sanitizeSessionMessagesImages,
} from "./pi-embedded-helpers.js";
import {
@@ -551,6 +552,16 @@ export async function runEmbeddedPiAgent(params: {
| AssistantMessage
| undefined;
const fallbackConfigured =
(params.config?.agent?.modelFallbacks?.length ?? 0) > 0;
if (fallbackConfigured && isRateLimitAssistantError(lastAssistant)) {
const message =
lastAssistant?.errorMessage?.trim() ||
(lastAssistant ? formatAssistantErrorText(lastAssistant) : "") ||
"LLM request rate limited.";
throw new Error(message);
}
const usage = lastAssistant?.usage;
const agentMeta: EmbeddedPiAgentMeta = {
sessionId: sessionIdUsed,