fix: harden Cloud Code Assist failover (#544) (thanks @jeffersonwarrior)

2026-01-10 01:12:46 +01:00
parent 5843733978
commit 251ed83680
3 changed files with 40 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -75,6 +75,7 @@
 - Discord: stop provider when gateway reconnects are exhausted and surface errors. (#514) — thanks @joshp123
 - Agents: strip empty assistant text blocks from session history to avoid Claude API 400s. (#210)
 - Agents: scrub unsupported JSON Schema keywords from tool schemas for Cloud Code Assist API compatibility. (#567) — thanks @erikpr1994
 - Agents: sanitize Cloud Code Assist tool call IDs and detect format/quota errors for failover. (#544) — thanks @jeffersonwarrior
 - Agents: simplify session tool schemas for Gemini compatibility. (#599) — thanks @mcinteerj
 - Agents: add `session_status` agent tool for `/status`-equivalent status (incl. usage/cost) + per-session model overrides. — thanks @steipete
 - Auto-reply: preserve block reply ordering with timeout fallback for streaming. (#503) — thanks @joshp123
--- a/src/agents/pi-embedded-helpers.test.ts
+++ b/src/agents/pi-embedded-helpers.test.ts
@@ -6,12 +6,14 @@ import {
  classifyFailoverReason,
  formatAssistantErrorText,
  isBillingErrorMessage,
  isCloudCodeAssistFormatError,
  isContextOverflowError,
  isFailoverErrorMessage,
  isMessagingToolDuplicate,
  normalizeTextForComparison,
  sanitizeGoogleTurnOrdering,
  sanitizeSessionMessagesImages,
  sanitizeToolCallId,
  validateGeminiTurns,
 } from "./pi-embedded-helpers.js";
 import {
@@ -258,12 +260,34 @@ describe("classifyFailoverReason", () => {
  it("returns a stable reason", () => {
    expect(classifyFailoverReason("invalid api key")).toBe("auth");
    expect(classifyFailoverReason("429 too many requests")).toBe("rate_limit");
    expect(classifyFailoverReason("resource has been exhausted")).toBe(
      "rate_limit",
    );
    expect(classifyFailoverReason("credit balance too low")).toBe("billing");
    expect(classifyFailoverReason("deadline exceeded")).toBe("timeout");
    expect(classifyFailoverReason("string should match pattern")).toBeNull();
    expect(classifyFailoverReason("bad request")).toBeNull();
  });
 });
 describe("isCloudCodeAssistFormatError", () => {
  it("matches format errors", () => {
    const samples = [
      "INVALID_REQUEST_ERROR: string should match pattern",
      "messages.1.content.1.tool_use.id",
      "tool_use.id should match pattern",
      "invalid request format",
    ];
    for (const sample of samples) {
      expect(isCloudCodeAssistFormatError(sample)).toBe(true);
    }
  });
  it("ignores unrelated errors", () => {
    expect(isCloudCodeAssistFormatError("rate limit exceeded")).toBe(false);
  });
 });
 describe("formatAssistantErrorText", () => {
  const makeAssistantError = (errorMessage: string): AssistantMessage =>
    ({
@@ -277,6 +301,20 @@ describe("formatAssistantErrorText", () => {
  });
 });
 describe("sanitizeToolCallId", () => {
  it("keeps valid tool call IDs", () => {
    expect(sanitizeToolCallId("call_abc-123")).toBe("call_abc-123");
  });
  it("replaces invalid characters with underscores", () => {
    expect(sanitizeToolCallId("call_abc|item:456")).toBe("call_abc_item_456");
  });
  it("returns default for empty IDs", () => {
    expect(sanitizeToolCallId("")).toBe("default_tool_id");
  });
 });
 describe("sanitizeGoogleTurnOrdering", () => {
  it("prepends a synthetic user turn when history starts with assistant", () => {
    const input = [
--- a/src/agents/pi-embedded-runner.ts
+++ b/src/agents/pi-embedded-runner.ts
@@ -104,7 +104,6 @@ import {
 // Optional features can be implemented as Pi extensions that run in the same Node process.
 /**
 * Resolve provider-specific extraParams from model config.
 * Auto-enables thinking mode for GLM-4.x models unless explicitly disabled.
@@ -1530,8 +1529,7 @@ export async function runEmbeddedPiAgent(params: {
          // Treat timeout as potential rate limit (Antigravity hangs on rate limit)
          const shouldRotate =
-            (!aborted &&
+            (!aborted && (failoverFailure || cloudCodeAssistFormatError)) ||
              (failoverFailure || cloudCodeAssistFormatError)) ||
            timedOut;
          if (shouldRotate) {