From eadb92300052eb1b1df22f9e22bd1bce95f43322 Mon Sep 17 00:00:00 2001 From: VAC Date: Tue, 6 Jan 2026 07:24:51 -0500 Subject: [PATCH 1/2] fix: auto-recover from Gemini session corruption Detect the Gemini API error 'function call turn comes immediately after a user turn or after a function response turn' which indicates corrupted session history. When detected: - Delete the corrupted transcript file - Remove the session entry from the store - Return a user-friendly message asking them to retry This prevents the error loop where every subsequent message fails with the same error until manual intervention. Fixes #296 --- src/auto-reply/reply/agent-runner.ts | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 9f994bdd6..62b23e926 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -1,4 +1,5 @@ import crypto from "node:crypto"; +import fs from "node:fs"; import { lookupContextTokens } from "../../agents/context.js"; import { DEFAULT_CONTEXT_TOKENS } from "../../agents/defaults.js"; import { runWithModelFallback } from "../../agents/model-fallback.js"; @@ -8,6 +9,7 @@ import { } from "../../agents/pi-embedded.js"; import { loadSessionStore, + resolveSessionTranscriptPath, type SessionEntry, saveSessionStore, } from "../../config/sessions.js"; @@ -346,6 +348,37 @@ export async function runReplyAgent(params: { const message = err instanceof Error ? err.message : String(err); const isContextOverflow = /context.*overflow|too large|context window/i.test(message); + const isSessionCorruption = + /function call turn comes immediately after|INVALID_ARGUMENT.*function/i.test( + message, + ); + + // Auto-recover from Gemini session corruption by resetting the session + if (isSessionCorruption && sessionKey && sessionStore && storePath) { + const corruptedSessionId = sessionEntry?.sessionId; + defaultRuntime.error( + `Session history corrupted (Gemini function call ordering). Resetting session: ${sessionKey}`, + ); + + // Delete transcript file if it exists + if (corruptedSessionId) { + const transcriptPath = resolveSessionTranscriptPath(corruptedSessionId); + try { + fs.unlinkSync(transcriptPath); + } catch { + // Ignore if file doesn't exist + } + } + + // Remove session entry from store + delete sessionStore[sessionKey]; + await saveSessionStore(storePath, sessionStore); + + return finalizeWithFollowup({ + text: "⚠️ Session history was corrupted. I've reset the conversation - please try again!", + }); + } + defaultRuntime.error(`Embedded agent failed before reply: ${message}`); return finalizeWithFollowup({ text: isContextOverflow From 86b56b23084caeddc5d72c0a8e3d0a22ad167c34 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 6 Jan 2026 23:06:01 +0100 Subject: [PATCH 2/2] fix: harden gemini session reset --- CHANGELOG.md | 1 + .../agent-runner.heartbeat-typing.test.ts | 147 ++++++++++++++++++ src/auto-reply/reply/agent-runner.ts | 33 ++-- 3 files changed, 167 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ce0c6d0a..cb980435d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ - Control UI: avoid Slack config ReferenceError by reading slack config snapshots. Thanks @sreekaransrinath for PR #249. - Telegram: honor routing.groupChat.mentionPatterns for group mention gating. Thanks @regenrek for PR #242. - Auto-reply: block unauthorized `/reset` and infer WhatsApp senders from E.164 inputs. +- Auto-reply: reset corrupted Gemini sessions when function-call ordering breaks. Thanks @VACInc for PR #297. - Auto-reply: track compaction count in session status; verbose mode announces auto-compactions. - Telegram: send GIF media as animations (auto-play) and improve filename sniffing. diff --git a/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts b/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts index 2b437a57f..31c686366 100644 --- a/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts +++ b/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts @@ -4,6 +4,7 @@ import path from "node:path"; import { describe, expect, it, vi } from "vitest"; import type { SessionEntry } from "../../config/sessions.js"; +import * as sessions from "../../config/sessions.js"; import type { TemplateContext } from "../templating.js"; import type { GetReplyOptions } from "../types.js"; import type { FollowupRun, QueueSettings } from "./queue.js"; @@ -207,4 +208,150 @@ describe("runReplyAgent typing (heartbeat)", () => { expect(payloads[0]?.text).toContain("count 1"); expect(sessionStore.main.compactionCount).toBe(1); }); + + it("resets corrupted Gemini sessions and deletes transcripts", async () => { + const prevStateDir = process.env.CLAWDBOT_STATE_DIR; + const stateDir = await fs.mkdtemp( + path.join(tmpdir(), "clawdbot-session-reset-"), + ); + process.env.CLAWDBOT_STATE_DIR = stateDir; + try { + const sessionId = "session-corrupt"; + const storePath = path.join(stateDir, "sessions", "sessions.json"); + const sessionEntry = { sessionId, updatedAt: Date.now() }; + const sessionStore = { main: sessionEntry }; + + await fs.mkdir(path.dirname(storePath), { recursive: true }); + await fs.writeFile(storePath, JSON.stringify(sessionStore), "utf-8"); + + const transcriptPath = sessions.resolveSessionTranscriptPath(sessionId); + await fs.mkdir(path.dirname(transcriptPath), { recursive: true }); + await fs.writeFile(transcriptPath, "bad", "utf-8"); + + runEmbeddedPiAgentMock.mockImplementationOnce(async () => { + throw new Error( + "function call turn comes immediately after a user turn or after a function response turn", + ); + }); + + const { run } = createMinimalRun({ + sessionEntry, + sessionStore, + sessionKey: "main", + storePath, + }); + const res = await run(); + + expect(res).toMatchObject({ + text: expect.stringContaining("Session history was corrupted"), + }); + expect(sessionStore.main).toBeUndefined(); + await expect(fs.access(transcriptPath)).rejects.toThrow(); + + const persisted = JSON.parse(await fs.readFile(storePath, "utf-8")); + expect(persisted.main).toBeUndefined(); + } finally { + if (prevStateDir) { + process.env.CLAWDBOT_STATE_DIR = prevStateDir; + } else { + delete process.env.CLAWDBOT_STATE_DIR; + } + } + }); + + it("keeps sessions intact on other errors", async () => { + const prevStateDir = process.env.CLAWDBOT_STATE_DIR; + const stateDir = await fs.mkdtemp( + path.join(tmpdir(), "clawdbot-session-noreset-"), + ); + process.env.CLAWDBOT_STATE_DIR = stateDir; + try { + const sessionId = "session-ok"; + const storePath = path.join(stateDir, "sessions", "sessions.json"); + const sessionEntry = { sessionId, updatedAt: Date.now() }; + const sessionStore = { main: sessionEntry }; + + await fs.mkdir(path.dirname(storePath), { recursive: true }); + await fs.writeFile(storePath, JSON.stringify(sessionStore), "utf-8"); + + const transcriptPath = sessions.resolveSessionTranscriptPath(sessionId); + await fs.mkdir(path.dirname(transcriptPath), { recursive: true }); + await fs.writeFile(transcriptPath, "ok", "utf-8"); + + runEmbeddedPiAgentMock.mockImplementationOnce(async () => { + throw new Error("INVALID_ARGUMENT: some other failure"); + }); + + const { run } = createMinimalRun({ + sessionEntry, + sessionStore, + sessionKey: "main", + storePath, + }); + const res = await run(); + + expect(res).toMatchObject({ + text: expect.stringContaining("Agent failed before reply"), + }); + expect(sessionStore.main).toBeDefined(); + await expect(fs.access(transcriptPath)).resolves.toBeUndefined(); + + const persisted = JSON.parse(await fs.readFile(storePath, "utf-8")); + expect(persisted.main).toBeDefined(); + } finally { + if (prevStateDir) { + process.env.CLAWDBOT_STATE_DIR = prevStateDir; + } else { + delete process.env.CLAWDBOT_STATE_DIR; + } + } + }); + + it("still replies even if session reset fails to persist", async () => { + const prevStateDir = process.env.CLAWDBOT_STATE_DIR; + const stateDir = await fs.mkdtemp( + path.join(tmpdir(), "clawdbot-session-reset-fail-"), + ); + process.env.CLAWDBOT_STATE_DIR = stateDir; + const saveSpy = vi + .spyOn(sessions, "saveSessionStore") + .mockRejectedValueOnce(new Error("boom")); + try { + const sessionId = "session-corrupt"; + const storePath = path.join(stateDir, "sessions", "sessions.json"); + const sessionEntry = { sessionId, updatedAt: Date.now() }; + const sessionStore = { main: sessionEntry }; + + const transcriptPath = sessions.resolveSessionTranscriptPath(sessionId); + await fs.mkdir(path.dirname(transcriptPath), { recursive: true }); + await fs.writeFile(transcriptPath, "bad", "utf-8"); + + runEmbeddedPiAgentMock.mockImplementationOnce(async () => { + throw new Error( + "function call turn comes immediately after a user turn or after a function response turn", + ); + }); + + const { run } = createMinimalRun({ + sessionEntry, + sessionStore, + sessionKey: "main", + storePath, + }); + const res = await run(); + + expect(res).toMatchObject({ + text: expect.stringContaining("Session history was corrupted"), + }); + expect(sessionStore.main).toBeUndefined(); + await expect(fs.access(transcriptPath)).rejects.toThrow(); + } finally { + saveSpy.mockRestore(); + if (prevStateDir) { + process.env.CLAWDBOT_STATE_DIR = prevStateDir; + } else { + delete process.env.CLAWDBOT_STATE_DIR; + } + } + }); }); diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 62b23e926..7d2ba3d22 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -349,9 +349,7 @@ export async function runReplyAgent(params: { const isContextOverflow = /context.*overflow|too large|context window/i.test(message); const isSessionCorruption = - /function call turn comes immediately after|INVALID_ARGUMENT.*function/i.test( - message, - ); + /function call turn comes immediately after/i.test(message); // Auto-recover from Gemini session corruption by resetting the session if (isSessionCorruption && sessionKey && sessionStore && storePath) { @@ -360,19 +358,26 @@ export async function runReplyAgent(params: { `Session history corrupted (Gemini function call ordering). Resetting session: ${sessionKey}`, ); - // Delete transcript file if it exists - if (corruptedSessionId) { - const transcriptPath = resolveSessionTranscriptPath(corruptedSessionId); - try { - fs.unlinkSync(transcriptPath); - } catch { - // Ignore if file doesn't exist + try { + // Delete transcript file if it exists + if (corruptedSessionId) { + const transcriptPath = + resolveSessionTranscriptPath(corruptedSessionId); + try { + fs.unlinkSync(transcriptPath); + } catch { + // Ignore if file doesn't exist + } } - } - // Remove session entry from store - delete sessionStore[sessionKey]; - await saveSessionStore(storePath, sessionStore); + // Remove session entry from store + delete sessionStore[sessionKey]; + await saveSessionStore(storePath, sessionStore); + } catch (cleanupErr) { + defaultRuntime.error( + `Failed to reset corrupted session ${sessionKey}: ${String(cleanupErr)}`, + ); + } return finalizeWithFollowup({ text: "⚠️ Session history was corrupted. I've reset the conversation - please try again!",