Format messages so they work with Gemini API (#266)

* fix: Gemini stops working after one message in a session * fix: small issue in test file * test: cover google role-merge behavior --------- Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-01-05 22:45:40 -06:00
parent 2737e17c67
commit 8880128ebf
4 changed files with 340 additions and 10 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@
 - Auth: lock auth profile refreshes to avoid multi-instance OAuth logouts; keep credentials on refresh failure.
 - Onboarding: prompt immediately for OpenAI Codex redirect URL on remote/headless logins.
 - Typing indicators: stop typing once the reply dispatcher drains to prevent stuck typing across Discord/Telegram/WhatsApp.
+- Google: merge consecutive messages to satisfy strict role alternation for Google provider models. Thanks @Asleep123 for PR #266.
 - WhatsApp/Telegram: add groupPolicy handling for group messages and normalize allowFrom matching (tg/telegram prefixes). Thanks @mneves75.
 - Auto-reply: add configurable ack reactions for inbound messages (default 👀 or `identity.emoji`) with scope controls. Thanks @obviyus for PR #178.
 - Onboarding: resolve CLI entrypoint when running via `npx` so gateway daemon install works without a build step.
--- a/patches/@mariozechner__pi-ai.patch
+++ b/patches/@mariozechner__pi-ai.patch
@@ -1,8 +1,52 @@
 diff --git a/dist/providers/google-shared.js b/dist/providers/google-shared.js
-index 7bc0a9f5d6241f191cd607ecb37b3acac8d58267..76166a34784cbc0718d4b9bd1fa6336a6dd394ec 100644
+index 7bc0a9f5d6241f191cd607ecb37b3acac8d58267..56866774e47444b5d333961c9b20fce582363124 100644
 --- a/dist/providers/google-shared.js
 +++ b/dist/providers/google-shared.js
-@@ -51,9 +51,19 @@ export function convertMessages(model, context) {
+@@ -10,13 +10,27 @@ import { transformMessages } from "./transorm-messages.js";
+ export function convertMessages(model, context) {
+     const contents = [];
+     const transformedMessages = transformMessages(context.messages, model);
+    
+    /**
+     * Helper to add content while merging consecutive messages of the same role.
+     * Gemini/Cloud Code Assist requires strict role alternation (user/model/user/model).
+     * Consecutive messages of the same role cause "function call turn" errors.
+     */
+    function addContent(role, parts) {
+        if (parts.length === 0) return;
+        const lastContent = contents[contents.length - 1];
+        if (lastContent?.role === role) {
+            // Merge into existing message of same role
+            lastContent.parts.push(...parts);
+        } else {
+            contents.push({ role, parts });
+        }
+    }
+    
+     for (const msg of transformedMessages) {
+         if (msg.role === "user") {
+             if (typeof msg.content === "string") {
+-                contents.push({
+-                    role: "user",
+-                    parts: [{ text: sanitizeSurrogates(msg.content) }],
+-                });
+                addContent("user", [{ text: sanitizeSurrogates(msg.content) }]);
+             }
+             else {
+                 const parts = msg.content.map((item) => {
+@@ -35,10 +49,7 @@ export function convertMessages(model, context) {
+                 const filteredParts = !model.input.includes("image") ? parts.filter((p) => p.text !== undefined) : parts;
+                 if (filteredParts.length === 0)
+                     continue;
+-                contents.push({
+-                    role: "user",
+-                    parts: filteredParts,
+-                });
+                addContent("user", filteredParts);
+             }
+         }
+         else if (msg.role === "assistant") {
+@@ -51,9 +62,19 @@ export function convertMessages(model, context) {
                     parts.push({ text: sanitizeSurrogates(block.text) });
                 }
                 else if (block.type === "thinking") {
@@ -25,7 +69,7 @@ index 7bc0a9f5d6241f191cd607ecb37b3acac8d58267..76166a34784cbc0718d4b9bd1fa6336a
                         parts.push({
                             thought: true,
                             text: sanitizeSurrogates(block.thinking),
-@@ -61,6 +71,7 @@ export function convertMessages(model, context) {
+@@ -61,6 +82,7 @@ export function convertMessages(model, context) {
                         });
                     }
                     else {
@@ -33,7 +77,44 @@ index 7bc0a9f5d6241f191cd607ecb37b3acac8d58267..76166a34784cbc0718d4b9bd1fa6336a
                         parts.push({
                             text: `<thinking>\n${sanitizeSurrogates(block.thinking)}\n</thinking>`,
                         });
-@@ -146,6 +157,77 @@ export function convertMessages(model, context) {
+@@ -85,10 +107,7 @@ export function convertMessages(model, context) {
+             }
+             if (parts.length === 0)
+                 continue;
+-            contents.push({
+-                role: "model",
+-                parts,
+-            });
+            addContent("model", parts);
+         }
+         else if (msg.role === "toolResult") {
+             // Extract text and image content
+@@ -125,27 +144,94 @@ export function convertMessages(model, context) {
+             }
+             // Cloud Code Assist API requires all function responses to be in a single user turn.
+             // Check if the last content is already a user turn with function responses and merge.
+            // Use addContent for proper role alternation handling.
+             const lastContent = contents[contents.length - 1];
+             if (lastContent?.role === "user" && lastContent.parts?.some((p) => p.functionResponse)) {
+                 lastContent.parts.push(functionResponsePart);
+             }
+             else {
+-                contents.push({
+-                    role: "user",
+-                    parts: [functionResponsePart],
+-                });
+                addContent("user", [functionResponsePart]);
+             }
+             // For older models, add images in a separate user message
+            // Note: This may create consecutive user messages, but addContent will merge them
+             if (hasImages && !supportsMultimodalFunctionResponse) {
+-                contents.push({
+-                    role: "user",
+-                    parts: [{ text: "Tool result image:" }, ...imageParts],
+-                });
+                addContent("user", [{ text: "Tool result image:" }, ...imageParts]);
+             }
+         }
     }
     return contents;
 }
@@ -111,7 +192,7 @@ index 7bc0a9f5d6241f191cd607ecb37b3acac8d58267..76166a34784cbc0718d4b9bd1fa6336a
 /**
  * Convert tools to Gemini function declarations format.
  */
-@@ -157,7 +239,7 @@ export function convertTools(tools) {
+@@ -157,7 +243,7 @@ export function convertTools(tools) {
             functionDeclarations: tools.map((tool) => ({
                 name: tool.name,
                 description: tool.description,
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -9,7 +9,7 @@ overrides:

 patchedDependencies:
  '@mariozechner/pi-ai':
-    hash: 628fb051b6f4886984a846a5ee7aa0a571c3360d35b8d114e4684e5edcd100c5
+    hash: b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a
    path: patches/@mariozechner__pi-ai.patch
  qrcode-terminal:
    hash: ed82029850dbdf551f5df1de320945af52b8ea8500cc7bd4f39258e7a3d92e12
@@ -33,7 +33,7 @@ importers:
        version: 0.37.2(ws@8.19.0)(zod@4.3.5)
      '@mariozechner/pi-ai':
        specifier: ^0.37.2
-        version: 0.37.2(patch_hash=628fb051b6f4886984a846a5ee7aa0a571c3360d35b8d114e4684e5edcd100c5)(ws@8.19.0)(zod@4.3.5)
+        version: 0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5)
      '@mariozechner/pi-coding-agent':
        specifier: ^0.37.2
        version: 0.37.2(ws@8.19.0)(zod@4.3.5)
@@ -3602,7 +3602,7 @@ snapshots:

  '@mariozechner/pi-agent-core@0.37.2(ws@8.19.0)(zod@4.3.5)':
    dependencies:
-      '@mariozechner/pi-ai': 0.37.2(patch_hash=628fb051b6f4886984a846a5ee7aa0a571c3360d35b8d114e4684e5edcd100c5)(ws@8.19.0)(zod@4.3.5)
+      '@mariozechner/pi-ai': 0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5)
      '@mariozechner/pi-tui': 0.37.2
    transitivePeerDependencies:
      - '@modelcontextprotocol/sdk'
@@ -3612,7 +3612,7 @@ snapshots:
      - ws
      - zod

-  '@mariozechner/pi-ai@0.37.2(patch_hash=628fb051b6f4886984a846a5ee7aa0a571c3360d35b8d114e4684e5edcd100c5)(ws@8.19.0)(zod@4.3.5)':
+  '@mariozechner/pi-ai@0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5)':
    dependencies:
      '@anthropic-ai/sdk': 0.71.2(zod@4.3.5)
      '@google/genai': 1.34.0
@@ -3636,7 +3636,7 @@ snapshots:
    dependencies:
      '@crosscopy/clipboard': 0.2.8
      '@mariozechner/pi-agent-core': 0.37.2(ws@8.19.0)(zod@4.3.5)
-      '@mariozechner/pi-ai': 0.37.2(patch_hash=628fb051b6f4886984a846a5ee7aa0a571c3360d35b8d114e4684e5edcd100c5)(ws@8.19.0)(zod@4.3.5)
+      '@mariozechner/pi-ai': 0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5)
      '@mariozechner/pi-tui': 0.37.2
      chalk: 5.6.2
      cli-highlight: 2.1.11
--- a/src/providers/google-shared.test.ts
+++ b/src/providers/google-shared.test.ts
@@ -231,4 +231,252 @@ describe("google-shared convertMessages", () => {
      thoughtSignature: "sig",
    });
  });
+
+  it("merges consecutive user messages to satisfy Gemini role alternation", () => {
+    const model = makeModel("gemini-1.5-pro");
+    const context = {
+      messages: [
+        {
+          role: "user",
+          content: "Hello",
+        },
+        {
+          role: "user",
+          content: "How are you?",
+        },
+      ],
+    } as unknown as Context;
+
+    const contents = convertMessages(model, context);
+    // Should merge into a single user message
+    expect(contents).toHaveLength(1);
+    expect(contents[0].role).toBe("user");
+    expect(contents[0].parts).toHaveLength(2);
+  });
+
+  it("merges consecutive user messages for non-Gemini Google models", () => {
+    const model = makeModel("claude-3-opus");
+    const context = {
+      messages: [
+        {
+          role: "user",
+          content: "First",
+        },
+        {
+          role: "user",
+          content: "Second",
+        },
+      ],
+    } as unknown as Context;
+
+    const contents = convertMessages(model, context);
+    expect(contents).toHaveLength(1);
+    expect(contents[0].role).toBe("user");
+    expect(contents[0].parts).toHaveLength(2);
+  });
+
+  it("merges consecutive model messages to satisfy Gemini role alternation", () => {
+    const model = makeModel("gemini-1.5-pro");
+    const context = {
+      messages: [
+        {
+          role: "user",
+          content: "Hello",
+        },
+        {
+          role: "assistant",
+          content: [{ type: "text", text: "Hi there!" }],
+          api: "google-generative-ai",
+          provider: "google",
+          model: "gemini-1.5-pro",
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          },
+          stopReason: "stop",
+          timestamp: 0,
+        },
+        {
+          role: "assistant",
+          content: [{ type: "text", text: "How can I help?" }],
+          api: "google-generative-ai",
+          provider: "google",
+          model: "gemini-1.5-pro",
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          },
+          stopReason: "stop",
+          timestamp: 0,
+        },
+      ],
+    } as unknown as Context;
+
+    const contents = convertMessages(model, context);
+    // Should have 1 user + 1 merged model message
+    expect(contents).toHaveLength(2);
+    expect(contents[0].role).toBe("user");
+    expect(contents[1].role).toBe("model");
+    expect(contents[1].parts).toHaveLength(2);
+  });
+
+  it("handles user message after tool result without model response in between", () => {
+    const model = makeModel("gemini-1.5-pro");
+    const context = {
+      messages: [
+        {
+          role: "user",
+          content: "Use a tool",
+        },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "toolCall",
+              id: "call_1",
+              name: "myTool",
+              arguments: { arg: "value" },
+            },
+          ],
+          api: "google-generative-ai",
+          provider: "google",
+          model: "gemini-1.5-pro",
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          },
+          stopReason: "stop",
+          timestamp: 0,
+        },
+        {
+          role: "toolResult",
+          toolCallId: "call_1",
+          toolName: "myTool",
+          content: [{ type: "text", text: "Tool result" }],
+          isError: false,
+          timestamp: 0,
+        },
+        {
+          role: "user",
+          content: "Now do something else",
+        },
+      ],
+    } as unknown as Context;
+
+    const contents = convertMessages(model, context);
+    // Tool result creates a user turn with functionResponse
+    // The next user message should be merged into it or there should be proper alternation
+    // Check that we don't have consecutive user messages
+    for (let i = 1; i < contents.length; i++) {
+      if (contents[i].role === "user" && contents[i - 1].role === "user") {
+        // If consecutive, they should have been merged
+        expect.fail("Consecutive user messages should be merged");
+      }
+    }
+    // The conversation should be valid for Gemini
+    expect(contents.length).toBeGreaterThan(0);
+  });
+
+  it("ensures function call comes after user turn, not after model turn", () => {
+    const model = makeModel("gemini-1.5-pro");
+    const context = {
+      messages: [
+        {
+          role: "user",
+          content: "Hello",
+        },
+        {
+          role: "assistant",
+          content: [{ type: "text", text: "Hi!" }],
+          api: "google-generative-ai",
+          provider: "google",
+          model: "gemini-1.5-pro",
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          },
+          stopReason: "stop",
+          timestamp: 0,
+        },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "toolCall",
+              id: "call_1",
+              name: "myTool",
+              arguments: {},
+            },
+          ],
+          api: "google-generative-ai",
+          provider: "google",
+          model: "gemini-1.5-pro",
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+            cost: {
+              input: 0,
+              output: 0,
+              cacheRead: 0,
+              cacheWrite: 0,
+              total: 0,
+            },
+          },
+          stopReason: "stop",
+          timestamp: 0,
+        },
+      ],
+    } as unknown as Context;
+
+    const contents = convertMessages(model, context);
+    // Consecutive model messages should be merged so function call is in same turn as text
+    expect(contents).toHaveLength(2);
+    expect(contents[0].role).toBe("user");
+    expect(contents[1].role).toBe("model");
+    // The model message should have both text and function call
+    expect(contents[1].parts?.length).toBe(2);
+  });
 });