fix(auth): improve multi-account round-robin rotation and 429 handling

This commit fixes several issues with multi-account OAuth rotation that
were causing slow responses and inefficient account cycling.

## Changes

### 1. Fix usageStats race condition (auth-profiles.ts)

The `markAuthProfileUsed`, `markAuthProfileCooldown`, `markAuthProfileGood`,
and `clearAuthProfileCooldown` functions were using a stale in-memory store
passed as a parameter. Long-running sessions would overwrite usageStats
updates from concurrent sessions when saving.

**Fix:** Re-read the store from disk before each update to get fresh
usageStats from other sessions, then merge the update.

### 2. Capture AbortError from waitForCompactionRetry (pi-embedded-runner.ts)

When a request timed out, `session.abort()` was called which throws an
`AbortError`. The code structure was:

```javascript
try {
  await session.prompt(params.prompt);
} catch (err) {
  promptError = err;  // Catches AbortError here
}
await waitForCompactionRetry();  // But THIS also throws AbortError!
```

The second `AbortError` from `waitForCompactionRetry()` escaped and
bypassed the rotation/fallback logic entirely.

**Fix:** Wrap `waitForCompactionRetry()` in its own try/catch to capture
the error as `promptError`, enabling proper timeout handling.

Root cause analysis and fix proposed by @erikpr1994 in #313.

Fixes #313

### 3. Fail fast on 429 rate limits (pi-ai patch)

The pi-ai library was retrying 429 errors up to 3 times with exponential
backoff before throwing. This meant a rate-limited account would waste
30+ seconds retrying before our rotation code could try the next account.

**Fix:** Patch google-gemini-cli.js to:
- Throw immediately on first 429 (no retries)
- Not catch and retry 429 errors in the network error handler

This allows the caller to rotate to the next account instantly on rate limit.

Note: We submitted this fix upstream (https://github.com/badlogic/pi-mono/pull/504)
but it was closed without merging. Keeping as a local patch for now.

## Testing

With 6 Antigravity accounts configured:
- Accounts rotate properly based on lastUsed (round-robin)
- 429s trigger immediate rotation to next account
- usageStats persist correctly across concurrent sessions
- Cooldown tracking works as expected

## Before/After

**Before:** Multiple 429 retries on same account, 30-90s delays
**After:** Instant rotation on 429, responses in seconds
This commit is contained in:
Muhammed Mukhthar CM
2026-01-06 22:44:19 +00:00
committed by Peter Steinberger
parent 2871657ebe
commit eb5f758f6b
4 changed files with 71 additions and 27 deletions

View File

@@ -355,23 +355,26 @@ export function isProfileInCooldown(
/**
* Mark a profile as successfully used. Resets error count and updates lastUsed.
* Re-reads the store from disk to avoid overwriting concurrent updates.
*/
export function markAuthProfileUsed(params: {
store: AuthProfileStore;
profileId: string;
agentDir?: string;
}): void {
const { store, profileId, agentDir } = params;
if (!store.profiles[profileId]) return;
const { profileId, agentDir } = params;
// Re-read from disk to get fresh usageStats from other sessions
const freshStore = ensureAuthProfileStore(agentDir);
if (!freshStore.profiles[profileId]) return;
store.usageStats = store.usageStats ?? {};
store.usageStats[profileId] = {
...store.usageStats[profileId],
freshStore.usageStats = freshStore.usageStats ?? {};
freshStore.usageStats[profileId] = {
...freshStore.usageStats[profileId],
lastUsed: Date.now(),
errorCount: 0,
cooldownUntil: undefined,
};
saveAuthProfileStore(store, agentDir);
saveAuthProfileStore(freshStore, agentDir);
}
export function calculateAuthProfileCooldownMs(errorCount: number): number {
@@ -385,47 +388,53 @@ export function calculateAuthProfileCooldownMs(errorCount: number): number {
/**
* Mark a profile as failed/rate-limited. Applies exponential backoff cooldown.
* Cooldown times: 1min, 5min, 25min, max 1 hour.
* Re-reads the store from disk to avoid overwriting concurrent updates.
*/
export function markAuthProfileCooldown(params: {
store: AuthProfileStore;
profileId: string;
agentDir?: string;
}): void {
const { store, profileId, agentDir } = params;
if (!store.profiles[profileId]) return;
const { profileId, agentDir } = params;
// Re-read from disk to get fresh usageStats from other sessions
const freshStore = ensureAuthProfileStore(agentDir);
if (!freshStore.profiles[profileId]) return;
store.usageStats = store.usageStats ?? {};
const existing = store.usageStats[profileId] ?? {};
freshStore.usageStats = freshStore.usageStats ?? {};
const existing = freshStore.usageStats[profileId] ?? {};
const errorCount = (existing.errorCount ?? 0) + 1;
// Exponential backoff: 1min, 5min, 25min, capped at 1h
const backoffMs = calculateAuthProfileCooldownMs(errorCount);
store.usageStats[profileId] = {
freshStore.usageStats[profileId] = {
...existing,
errorCount,
cooldownUntil: Date.now() + backoffMs,
};
saveAuthProfileStore(store, agentDir);
saveAuthProfileStore(freshStore, agentDir);
}
/**
* Clear cooldown for a profile (e.g., manual reset).
* Re-reads the store from disk to avoid overwriting concurrent updates.
*/
export function clearAuthProfileCooldown(params: {
store: AuthProfileStore;
profileId: string;
agentDir?: string;
}): void {
const { store, profileId, agentDir } = params;
if (!store.usageStats?.[profileId]) return;
const { profileId, agentDir } = params;
// Re-read from disk to get fresh usageStats from other sessions
const freshStore = ensureAuthProfileStore(agentDir);
if (!freshStore.usageStats?.[profileId]) return;
store.usageStats[profileId] = {
...store.usageStats[profileId],
freshStore.usageStats[profileId] = {
...freshStore.usageStats[profileId],
errorCount: 0,
cooldownUntil: undefined,
};
saveAuthProfileStore(store, agentDir);
saveAuthProfileStore(freshStore, agentDir);
}
export function resolveAuthProfileOrder(params: {
@@ -591,11 +600,13 @@ export function markAuthProfileGood(params: {
profileId: string;
agentDir?: string;
}): void {
const { store, provider, profileId, agentDir } = params;
const profile = store.profiles[profileId];
const { provider, profileId, agentDir } = params;
// Re-read from disk to avoid overwriting concurrent updates
const freshStore = ensureAuthProfileStore(agentDir);
const profile = freshStore.profiles[profileId];
if (!profile || profile.provider !== provider) return;
store.lastGood = { ...store.lastGood, [provider]: profileId };
saveAuthProfileStore(store, agentDir);
freshStore.lastGood = { ...freshStore.lastGood, [provider]: profileId };
saveAuthProfileStore(freshStore, agentDir);
}
export function resolveAuthStorePathForDisplay(): string {

View File

@@ -909,7 +909,12 @@ export async function runEmbeddedPiAgent(params: {
`embedded run prompt end: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - promptStartedAt}`,
);
}
await waitForCompactionRetry();
try {
await waitForCompactionRetry();
} catch (err) {
// Capture AbortError from waitForCompactionRetry to enable fallback/rotation
if (!promptError) promptError = err;
}
messagesSnapshot = session.messages.slice();
sessionIdUsed = session.sessionId;
} finally {