diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f337c53a..8ddf397a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - Auth: lock auth profile refreshes to avoid multi-instance OAuth logouts; keep credentials on refresh failure. - Onboarding: prompt immediately for OpenAI Codex redirect URL on remote/headless logins. - Typing indicators: stop typing once the reply dispatcher drains to prevent stuck typing across Discord/Telegram/WhatsApp. +- Google: merge consecutive messages to satisfy strict role alternation for Google provider models. Thanks @Asleep123 for PR #266. - WhatsApp/Telegram: add groupPolicy handling for group messages and normalize allowFrom matching (tg/telegram prefixes). Thanks @mneves75. - Auto-reply: add configurable ack reactions for inbound messages (default 👀 or `identity.emoji`) with scope controls. Thanks @obviyus for PR #178. - Onboarding: resolve CLI entrypoint when running via `npx` so gateway daemon install works without a build step. diff --git a/patches/@mariozechner__pi-ai.patch b/patches/@mariozechner__pi-ai.patch index b4cdf8e51..aa03fc55a 100644 --- a/patches/@mariozechner__pi-ai.patch +++ b/patches/@mariozechner__pi-ai.patch @@ -1,8 +1,52 @@ diff --git a/dist/providers/google-shared.js b/dist/providers/google-shared.js -index 7bc0a9f5d6241f191cd607ecb37b3acac8d58267..76166a34784cbc0718d4b9bd1fa6336a6dd394ec 100644 +index 7bc0a9f5d6241f191cd607ecb37b3acac8d58267..56866774e47444b5d333961c9b20fce582363124 100644 --- a/dist/providers/google-shared.js +++ b/dist/providers/google-shared.js -@@ -51,9 +51,19 @@ export function convertMessages(model, context) { +@@ -10,13 +10,27 @@ import { transformMessages } from "./transorm-messages.js"; + export function convertMessages(model, context) { + const contents = []; + const transformedMessages = transformMessages(context.messages, model); ++ ++ /** ++ * Helper to add content while merging consecutive messages of the same role. ++ * Gemini/Cloud Code Assist requires strict role alternation (user/model/user/model). ++ * Consecutive messages of the same role cause "function call turn" errors. ++ */ ++ function addContent(role, parts) { ++ if (parts.length === 0) return; ++ const lastContent = contents[contents.length - 1]; ++ if (lastContent?.role === role) { ++ // Merge into existing message of same role ++ lastContent.parts.push(...parts); ++ } else { ++ contents.push({ role, parts }); ++ } ++ } ++ + for (const msg of transformedMessages) { + if (msg.role === "user") { + if (typeof msg.content === "string") { +- contents.push({ +- role: "user", +- parts: [{ text: sanitizeSurrogates(msg.content) }], +- }); ++ addContent("user", [{ text: sanitizeSurrogates(msg.content) }]); + } + else { + const parts = msg.content.map((item) => { +@@ -35,10 +49,7 @@ export function convertMessages(model, context) { + const filteredParts = !model.input.includes("image") ? parts.filter((p) => p.text !== undefined) : parts; + if (filteredParts.length === 0) + continue; +- contents.push({ +- role: "user", +- parts: filteredParts, +- }); ++ addContent("user", filteredParts); + } + } + else if (msg.role === "assistant") { +@@ -51,9 +62,19 @@ export function convertMessages(model, context) { parts.push({ text: sanitizeSurrogates(block.text) }); } else if (block.type === "thinking") { @@ -25,7 +69,7 @@ index 7bc0a9f5d6241f191cd607ecb37b3acac8d58267..76166a34784cbc0718d4b9bd1fa6336a parts.push({ thought: true, text: sanitizeSurrogates(block.thinking), -@@ -61,6 +71,7 @@ export function convertMessages(model, context) { +@@ -61,6 +82,7 @@ export function convertMessages(model, context) { }); } else { @@ -33,7 +77,44 @@ index 7bc0a9f5d6241f191cd607ecb37b3acac8d58267..76166a34784cbc0718d4b9bd1fa6336a parts.push({ text: `\n${sanitizeSurrogates(block.thinking)}\n`, }); -@@ -146,6 +157,77 @@ export function convertMessages(model, context) { +@@ -85,10 +107,7 @@ export function convertMessages(model, context) { + } + if (parts.length === 0) + continue; +- contents.push({ +- role: "model", +- parts, +- }); ++ addContent("model", parts); + } + else if (msg.role === "toolResult") { + // Extract text and image content +@@ -125,27 +144,94 @@ export function convertMessages(model, context) { + } + // Cloud Code Assist API requires all function responses to be in a single user turn. + // Check if the last content is already a user turn with function responses and merge. ++ // Use addContent for proper role alternation handling. + const lastContent = contents[contents.length - 1]; + if (lastContent?.role === "user" && lastContent.parts?.some((p) => p.functionResponse)) { + lastContent.parts.push(functionResponsePart); + } + else { +- contents.push({ +- role: "user", +- parts: [functionResponsePart], +- }); ++ addContent("user", [functionResponsePart]); + } + // For older models, add images in a separate user message ++ // Note: This may create consecutive user messages, but addContent will merge them + if (hasImages && !supportsMultimodalFunctionResponse) { +- contents.push({ +- role: "user", +- parts: [{ text: "Tool result image:" }, ...imageParts], +- }); ++ addContent("user", [{ text: "Tool result image:" }, ...imageParts]); + } + } } return contents; } @@ -111,7 +192,7 @@ index 7bc0a9f5d6241f191cd607ecb37b3acac8d58267..76166a34784cbc0718d4b9bd1fa6336a /** * Convert tools to Gemini function declarations format. */ -@@ -157,7 +239,7 @@ export function convertTools(tools) { +@@ -157,7 +243,7 @@ export function convertTools(tools) { functionDeclarations: tools.map((tool) => ({ name: tool.name, description: tool.description, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7506239cb..82dcf8793 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,7 +9,7 @@ overrides: patchedDependencies: '@mariozechner/pi-ai': - hash: 628fb051b6f4886984a846a5ee7aa0a571c3360d35b8d114e4684e5edcd100c5 + hash: b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a path: patches/@mariozechner__pi-ai.patch qrcode-terminal: hash: ed82029850dbdf551f5df1de320945af52b8ea8500cc7bd4f39258e7a3d92e12 @@ -33,7 +33,7 @@ importers: version: 0.37.2(ws@8.19.0)(zod@4.3.5) '@mariozechner/pi-ai': specifier: ^0.37.2 - version: 0.37.2(patch_hash=628fb051b6f4886984a846a5ee7aa0a571c3360d35b8d114e4684e5edcd100c5)(ws@8.19.0)(zod@4.3.5) + version: 0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5) '@mariozechner/pi-coding-agent': specifier: ^0.37.2 version: 0.37.2(ws@8.19.0)(zod@4.3.5) @@ -3602,7 +3602,7 @@ snapshots: '@mariozechner/pi-agent-core@0.37.2(ws@8.19.0)(zod@4.3.5)': dependencies: - '@mariozechner/pi-ai': 0.37.2(patch_hash=628fb051b6f4886984a846a5ee7aa0a571c3360d35b8d114e4684e5edcd100c5)(ws@8.19.0)(zod@4.3.5) + '@mariozechner/pi-ai': 0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5) '@mariozechner/pi-tui': 0.37.2 transitivePeerDependencies: - '@modelcontextprotocol/sdk' @@ -3612,7 +3612,7 @@ snapshots: - ws - zod - '@mariozechner/pi-ai@0.37.2(patch_hash=628fb051b6f4886984a846a5ee7aa0a571c3360d35b8d114e4684e5edcd100c5)(ws@8.19.0)(zod@4.3.5)': + '@mariozechner/pi-ai@0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5)': dependencies: '@anthropic-ai/sdk': 0.71.2(zod@4.3.5) '@google/genai': 1.34.0 @@ -3636,7 +3636,7 @@ snapshots: dependencies: '@crosscopy/clipboard': 0.2.8 '@mariozechner/pi-agent-core': 0.37.2(ws@8.19.0)(zod@4.3.5) - '@mariozechner/pi-ai': 0.37.2(patch_hash=628fb051b6f4886984a846a5ee7aa0a571c3360d35b8d114e4684e5edcd100c5)(ws@8.19.0)(zod@4.3.5) + '@mariozechner/pi-ai': 0.37.2(patch_hash=b49275c3e2023970d8248ababef6df60e093e58a3ba3127c2ba4de1df387d06a)(ws@8.19.0)(zod@4.3.5) '@mariozechner/pi-tui': 0.37.2 chalk: 5.6.2 cli-highlight: 2.1.11 diff --git a/src/providers/google-shared.test.ts b/src/providers/google-shared.test.ts index 9b35bc060..f9bbffbc2 100644 --- a/src/providers/google-shared.test.ts +++ b/src/providers/google-shared.test.ts @@ -231,4 +231,252 @@ describe("google-shared convertMessages", () => { thoughtSignature: "sig", }); }); + + it("merges consecutive user messages to satisfy Gemini role alternation", () => { + const model = makeModel("gemini-1.5-pro"); + const context = { + messages: [ + { + role: "user", + content: "Hello", + }, + { + role: "user", + content: "How are you?", + }, + ], + } as unknown as Context; + + const contents = convertMessages(model, context); + // Should merge into a single user message + expect(contents).toHaveLength(1); + expect(contents[0].role).toBe("user"); + expect(contents[0].parts).toHaveLength(2); + }); + + it("merges consecutive user messages for non-Gemini Google models", () => { + const model = makeModel("claude-3-opus"); + const context = { + messages: [ + { + role: "user", + content: "First", + }, + { + role: "user", + content: "Second", + }, + ], + } as unknown as Context; + + const contents = convertMessages(model, context); + expect(contents).toHaveLength(1); + expect(contents[0].role).toBe("user"); + expect(contents[0].parts).toHaveLength(2); + }); + + it("merges consecutive model messages to satisfy Gemini role alternation", () => { + const model = makeModel("gemini-1.5-pro"); + const context = { + messages: [ + { + role: "user", + content: "Hello", + }, + { + role: "assistant", + content: [{ type: "text", text: "Hi there!" }], + api: "google-generative-ai", + provider: "google", + model: "gemini-1.5-pro", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, + }, + stopReason: "stop", + timestamp: 0, + }, + { + role: "assistant", + content: [{ type: "text", text: "How can I help?" }], + api: "google-generative-ai", + provider: "google", + model: "gemini-1.5-pro", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, + }, + stopReason: "stop", + timestamp: 0, + }, + ], + } as unknown as Context; + + const contents = convertMessages(model, context); + // Should have 1 user + 1 merged model message + expect(contents).toHaveLength(2); + expect(contents[0].role).toBe("user"); + expect(contents[1].role).toBe("model"); + expect(contents[1].parts).toHaveLength(2); + }); + + it("handles user message after tool result without model response in between", () => { + const model = makeModel("gemini-1.5-pro"); + const context = { + messages: [ + { + role: "user", + content: "Use a tool", + }, + { + role: "assistant", + content: [ + { + type: "toolCall", + id: "call_1", + name: "myTool", + arguments: { arg: "value" }, + }, + ], + api: "google-generative-ai", + provider: "google", + model: "gemini-1.5-pro", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, + }, + stopReason: "stop", + timestamp: 0, + }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "myTool", + content: [{ type: "text", text: "Tool result" }], + isError: false, + timestamp: 0, + }, + { + role: "user", + content: "Now do something else", + }, + ], + } as unknown as Context; + + const contents = convertMessages(model, context); + // Tool result creates a user turn with functionResponse + // The next user message should be merged into it or there should be proper alternation + // Check that we don't have consecutive user messages + for (let i = 1; i < contents.length; i++) { + if (contents[i].role === "user" && contents[i - 1].role === "user") { + // If consecutive, they should have been merged + expect.fail("Consecutive user messages should be merged"); + } + } + // The conversation should be valid for Gemini + expect(contents.length).toBeGreaterThan(0); + }); + + it("ensures function call comes after user turn, not after model turn", () => { + const model = makeModel("gemini-1.5-pro"); + const context = { + messages: [ + { + role: "user", + content: "Hello", + }, + { + role: "assistant", + content: [{ type: "text", text: "Hi!" }], + api: "google-generative-ai", + provider: "google", + model: "gemini-1.5-pro", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, + }, + stopReason: "stop", + timestamp: 0, + }, + { + role: "assistant", + content: [ + { + type: "toolCall", + id: "call_1", + name: "myTool", + arguments: {}, + }, + ], + api: "google-generative-ai", + provider: "google", + model: "gemini-1.5-pro", + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, + }, + stopReason: "stop", + timestamp: 0, + }, + ], + } as unknown as Context; + + const contents = convertMessages(model, context); + // Consecutive model messages should be merged so function call is in same turn as text + expect(contents).toHaveLength(2); + expect(contents[0].role).toBe("user"); + expect(contents[1].role).toBe("model"); + // The model message should have both text and function call + expect(contents[1].parts?.length).toBe(2); + }); });