fix(auth): improve multi-account round-robin rotation and 429 handling
This commit fixes several issues with multi-account OAuth rotation that
were causing slow responses and inefficient account cycling.
## Changes
### 1. Fix usageStats race condition (auth-profiles.ts)
The `markAuthProfileUsed`, `markAuthProfileCooldown`, `markAuthProfileGood`,
and `clearAuthProfileCooldown` functions were using a stale in-memory store
passed as a parameter. Long-running sessions would overwrite usageStats
updates from concurrent sessions when saving.
**Fix:** Re-read the store from disk before each update to get fresh
usageStats from other sessions, then merge the update.
### 2. Capture AbortError from waitForCompactionRetry (pi-embedded-runner.ts)
When a request timed out, `session.abort()` was called which throws an
`AbortError`. The code structure was:
```javascript
try {
await session.prompt(params.prompt);
} catch (err) {
promptError = err; // Catches AbortError here
}
await waitForCompactionRetry(); // But THIS also throws AbortError!
```
The second `AbortError` from `waitForCompactionRetry()` escaped and
bypassed the rotation/fallback logic entirely.
**Fix:** Wrap `waitForCompactionRetry()` in its own try/catch to capture
the error as `promptError`, enabling proper timeout handling.
Root cause analysis and fix proposed by @erikpr1994 in #313.
Fixes #313
### 3. Fail fast on 429 rate limits (pi-ai patch)
The pi-ai library was retrying 429 errors up to 3 times with exponential
backoff before throwing. This meant a rate-limited account would waste
30+ seconds retrying before our rotation code could try the next account.
**Fix:** Patch google-gemini-cli.js to:
- Throw immediately on first 429 (no retries)
- Not catch and retry 429 errors in the network error handler
This allows the caller to rotate to the next account instantly on rate limit.
Note: We submitted this fix upstream (https://github.com/badlogic/pi-mono/pull/504)
but it was closed without merging. Keeping as a local patch for now.
## Testing
With 6 Antigravity accounts configured:
- Accounts rotate properly based on lastUsed (round-robin)
- 429s trigger immediate rotation to next account
- usageStats persist correctly across concurrent sessions
- Cooldown tracking works as expected
## Before/After
**Before:** Multiple 429 retries on same account, 30-90s delays
**After:** Instant rotation on 429, responses in seconds
This commit is contained in:
committed by
Peter Steinberger
parent
2871657ebe
commit
eb5f758f6b
@@ -213,3 +213,31 @@ index 20fb0a22aaa28f7ff7c2f44a8b628fa1d9d7d936..31bae0aface1319487ce62d35f1f3b6e
|
||||
}));
|
||||
}
|
||||
function mapStopReason(status) {
|
||||
diff --git a/dist/providers/google-gemini-cli.js b/dist/providers/google-gemini-cli.js
|
||||
--- a/dist/providers/google-gemini-cli.js
|
||||
+++ b/dist/providers/google-gemini-cli.js
|
||||
@@ -168,7 +168,12 @@ async function* streamCompletion(params, options) {
|
||||
break; // Success, exit retry loop
|
||||
}
|
||||
const errorText = await response.text();
|
||||
- // Check if retryable
|
||||
+ // PATCH: Fail immediately on 429 to let caller rotate accounts
|
||||
+ if (response.status === 429) {
|
||||
+ console.log(`[pi-ai] 429 rate limit - failing fast to rotate account`);
|
||||
+ throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
|
||||
+ }
|
||||
+ // Check if retryable (non-429 errors)
|
||||
if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
|
||||
// Use server-provided delay or exponential backoff
|
||||
const serverDelay = extractRetryDelay(errorText);
|
||||
@@ -183,6 +188,10 @@ async function* streamCompletion(params, options) {
|
||||
if (error instanceof Error && error.message === "Request was aborted") {
|
||||
throw error;
|
||||
}
|
||||
+ // PATCH: Don't retry 429 errors - let caller rotate accounts
|
||||
+ if (error instanceof Error && error.message.includes("429")) {
|
||||
+ throw error;
|
||||
+ }
|
||||
lastError = error instanceof Error ? error : new Error(String(error));
|
||||
// Network errors are retryable
|
||||
if (attempt < MAX_RETRIES) {
|
||||
|
||||
10
pnpm-lock.yaml
generated
10
pnpm-lock.yaml
generated
@@ -9,7 +9,7 @@ overrides:
|
||||
|
||||
patchedDependencies:
|
||||
'@mariozechner/pi-ai':
|
||||
hash: b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a
|
||||
hash: 31ccee9eb1fc5053d766ccbf3abc8340a5fb3bea1c52b491a385b53468b81819
|
||||
path: patches/@mariozechner__pi-ai.patch
|
||||
playwright-core@1.57.0:
|
||||
hash: 66f1f266424dbe354068aaa5bba87bfb0e1d7d834a938c25dd70d43cdf1c1b02
|
||||
@@ -39,7 +39,7 @@ importers:
|
||||
version: 0.37.2(ws@8.19.0)(zod@4.3.5)
|
||||
'@mariozechner/pi-ai':
|
||||
specifier: ^0.37.2
|
||||
version: 0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5)
|
||||
version: 0.37.2(patch_hash=31ccee9eb1fc5053d766ccbf3abc8340a5fb3bea1c52b491a385b53468b81819)(ws@8.19.0)(zod@4.3.5)
|
||||
'@mariozechner/pi-coding-agent':
|
||||
specifier: ^0.37.2
|
||||
version: 0.37.2(ws@8.19.0)(zod@4.3.5)
|
||||
@@ -3570,7 +3570,7 @@ snapshots:
|
||||
|
||||
'@mariozechner/pi-agent-core@0.37.2(ws@8.19.0)(zod@4.3.5)':
|
||||
dependencies:
|
||||
'@mariozechner/pi-ai': 0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5)
|
||||
'@mariozechner/pi-ai': 0.37.2(patch_hash=31ccee9eb1fc5053d766ccbf3abc8340a5fb3bea1c52b491a385b53468b81819)(ws@8.19.0)(zod@4.3.5)
|
||||
'@mariozechner/pi-tui': 0.37.2
|
||||
transitivePeerDependencies:
|
||||
- '@modelcontextprotocol/sdk'
|
||||
@@ -3580,7 +3580,7 @@ snapshots:
|
||||
- ws
|
||||
- zod
|
||||
|
||||
'@mariozechner/pi-ai@0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5)':
|
||||
'@mariozechner/pi-ai@0.37.2(patch_hash=31ccee9eb1fc5053d766ccbf3abc8340a5fb3bea1c52b491a385b53468b81819)(ws@8.19.0)(zod@4.3.5)':
|
||||
dependencies:
|
||||
'@anthropic-ai/sdk': 0.71.2(zod@4.3.5)
|
||||
'@google/genai': 1.34.0
|
||||
@@ -3604,7 +3604,7 @@ snapshots:
|
||||
dependencies:
|
||||
'@crosscopy/clipboard': 0.2.8
|
||||
'@mariozechner/pi-agent-core': 0.37.2(ws@8.19.0)(zod@4.3.5)
|
||||
'@mariozechner/pi-ai': 0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5)
|
||||
'@mariozechner/pi-ai': 0.37.2(patch_hash=31ccee9eb1fc5053d766ccbf3abc8340a5fb3bea1c52b491a385b53468b81819)(ws@8.19.0)(zod@4.3.5)
|
||||
'@mariozechner/pi-tui': 0.37.2
|
||||
chalk: 5.6.2
|
||||
cli-highlight: 2.1.11
|
||||
|
||||
@@ -355,23 +355,26 @@ export function isProfileInCooldown(
|
||||
|
||||
/**
|
||||
* Mark a profile as successfully used. Resets error count and updates lastUsed.
|
||||
* Re-reads the store from disk to avoid overwriting concurrent updates.
|
||||
*/
|
||||
export function markAuthProfileUsed(params: {
|
||||
store: AuthProfileStore;
|
||||
profileId: string;
|
||||
agentDir?: string;
|
||||
}): void {
|
||||
const { store, profileId, agentDir } = params;
|
||||
if (!store.profiles[profileId]) return;
|
||||
const { profileId, agentDir } = params;
|
||||
// Re-read from disk to get fresh usageStats from other sessions
|
||||
const freshStore = ensureAuthProfileStore(agentDir);
|
||||
if (!freshStore.profiles[profileId]) return;
|
||||
|
||||
store.usageStats = store.usageStats ?? {};
|
||||
store.usageStats[profileId] = {
|
||||
...store.usageStats[profileId],
|
||||
freshStore.usageStats = freshStore.usageStats ?? {};
|
||||
freshStore.usageStats[profileId] = {
|
||||
...freshStore.usageStats[profileId],
|
||||
lastUsed: Date.now(),
|
||||
errorCount: 0,
|
||||
cooldownUntil: undefined,
|
||||
};
|
||||
saveAuthProfileStore(store, agentDir);
|
||||
saveAuthProfileStore(freshStore, agentDir);
|
||||
}
|
||||
|
||||
export function calculateAuthProfileCooldownMs(errorCount: number): number {
|
||||
@@ -385,47 +388,53 @@ export function calculateAuthProfileCooldownMs(errorCount: number): number {
|
||||
/**
|
||||
* Mark a profile as failed/rate-limited. Applies exponential backoff cooldown.
|
||||
* Cooldown times: 1min, 5min, 25min, max 1 hour.
|
||||
* Re-reads the store from disk to avoid overwriting concurrent updates.
|
||||
*/
|
||||
export function markAuthProfileCooldown(params: {
|
||||
store: AuthProfileStore;
|
||||
profileId: string;
|
||||
agentDir?: string;
|
||||
}): void {
|
||||
const { store, profileId, agentDir } = params;
|
||||
if (!store.profiles[profileId]) return;
|
||||
const { profileId, agentDir } = params;
|
||||
// Re-read from disk to get fresh usageStats from other sessions
|
||||
const freshStore = ensureAuthProfileStore(agentDir);
|
||||
if (!freshStore.profiles[profileId]) return;
|
||||
|
||||
store.usageStats = store.usageStats ?? {};
|
||||
const existing = store.usageStats[profileId] ?? {};
|
||||
freshStore.usageStats = freshStore.usageStats ?? {};
|
||||
const existing = freshStore.usageStats[profileId] ?? {};
|
||||
const errorCount = (existing.errorCount ?? 0) + 1;
|
||||
|
||||
// Exponential backoff: 1min, 5min, 25min, capped at 1h
|
||||
const backoffMs = calculateAuthProfileCooldownMs(errorCount);
|
||||
|
||||
store.usageStats[profileId] = {
|
||||
freshStore.usageStats[profileId] = {
|
||||
...existing,
|
||||
errorCount,
|
||||
cooldownUntil: Date.now() + backoffMs,
|
||||
};
|
||||
saveAuthProfileStore(store, agentDir);
|
||||
saveAuthProfileStore(freshStore, agentDir);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear cooldown for a profile (e.g., manual reset).
|
||||
* Re-reads the store from disk to avoid overwriting concurrent updates.
|
||||
*/
|
||||
export function clearAuthProfileCooldown(params: {
|
||||
store: AuthProfileStore;
|
||||
profileId: string;
|
||||
agentDir?: string;
|
||||
}): void {
|
||||
const { store, profileId, agentDir } = params;
|
||||
if (!store.usageStats?.[profileId]) return;
|
||||
const { profileId, agentDir } = params;
|
||||
// Re-read from disk to get fresh usageStats from other sessions
|
||||
const freshStore = ensureAuthProfileStore(agentDir);
|
||||
if (!freshStore.usageStats?.[profileId]) return;
|
||||
|
||||
store.usageStats[profileId] = {
|
||||
...store.usageStats[profileId],
|
||||
freshStore.usageStats[profileId] = {
|
||||
...freshStore.usageStats[profileId],
|
||||
errorCount: 0,
|
||||
cooldownUntil: undefined,
|
||||
};
|
||||
saveAuthProfileStore(store, agentDir);
|
||||
saveAuthProfileStore(freshStore, agentDir);
|
||||
}
|
||||
|
||||
export function resolveAuthProfileOrder(params: {
|
||||
@@ -591,11 +600,13 @@ export function markAuthProfileGood(params: {
|
||||
profileId: string;
|
||||
agentDir?: string;
|
||||
}): void {
|
||||
const { store, provider, profileId, agentDir } = params;
|
||||
const profile = store.profiles[profileId];
|
||||
const { provider, profileId, agentDir } = params;
|
||||
// Re-read from disk to avoid overwriting concurrent updates
|
||||
const freshStore = ensureAuthProfileStore(agentDir);
|
||||
const profile = freshStore.profiles[profileId];
|
||||
if (!profile || profile.provider !== provider) return;
|
||||
store.lastGood = { ...store.lastGood, [provider]: profileId };
|
||||
saveAuthProfileStore(store, agentDir);
|
||||
freshStore.lastGood = { ...freshStore.lastGood, [provider]: profileId };
|
||||
saveAuthProfileStore(freshStore, agentDir);
|
||||
}
|
||||
|
||||
export function resolveAuthStorePathForDisplay(): string {
|
||||
|
||||
@@ -909,7 +909,12 @@ export async function runEmbeddedPiAgent(params: {
|
||||
`embedded run prompt end: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - promptStartedAt}`,
|
||||
);
|
||||
}
|
||||
await waitForCompactionRetry();
|
||||
try {
|
||||
await waitForCompactionRetry();
|
||||
} catch (err) {
|
||||
// Capture AbortError from waitForCompactionRetry to enable fallback/rotation
|
||||
if (!promptError) promptError = err;
|
||||
}
|
||||
messagesSnapshot = session.messages.slice();
|
||||
sessionIdUsed = session.sessionId;
|
||||
} finally {
|
||||
|
||||
Reference in New Issue
Block a user