From c97bf23a4aa31fbc3c7de7b48c89012840d6ff5b Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 24 Jan 2026 07:58:04 +0000 Subject: [PATCH] fix: gate openai reasoning downgrade on model switches (#1562) (thanks @roshanasingh4) --- CHANGELOG.md | 1 + docs/reference/transcript-hygiene.md | 1 + ...-helpers.downgradeopenai-reasoning.test.ts | 32 ++++--- src/agents/pi-embedded-helpers/openai.ts | 89 +++++++++++++------ ...ed-runner.sanitize-session-history.test.ts | 88 ++++++++++++++++++ src/agents/pi-embedded-runner/google.ts | 69 +++++++++++++- 6 files changed, 239 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c833d1e1b..1fe01fea4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Docs: https://docs.clawd.bot - Gateway: compare Linux process start time to avoid PID recycling lock loops; keep locks unless stale. (#1572) Thanks @steipete. - Skills: gate bird Homebrew install to macOS. (#1569) Thanks @bradleypriest. - Agents: ignore IDENTITY.md template placeholders when parsing identity to avoid placeholder replies. (#1556) +- Agents: drop orphaned OpenAI Responses reasoning blocks on model switches. (#1562) Thanks @roshanasingh4. - Docker: update gateway command in docker-compose and Hetzner guide. (#1514) - Sessions: reject array-backed session stores to prevent silent wipes. (#1469) - Voice wake: auto-save wake words on blur/submit across iOS/Android and align limits with macOS. diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md index fa466cef5..b43b8ab70 100644 --- a/docs/reference/transcript-hygiene.md +++ b/docs/reference/transcript-hygiene.md @@ -48,6 +48,7 @@ Implementation: **OpenAI / OpenAI Codex** - Image sanitization only. +- On model switch into OpenAI Responses/Codex, drop orphaned reasoning signatures (standalone reasoning items without a following content block). - No tool call id sanitization. - No tool result pairing repair. - No turn validation or reordering. diff --git a/src/agents/pi-embedded-helpers.downgradeopenai-reasoning.test.ts b/src/agents/pi-embedded-helpers.downgradeopenai-reasoning.test.ts index 7d25a6727..d83667cd1 100644 --- a/src/agents/pi-embedded-helpers.downgradeopenai-reasoning.test.ts +++ b/src/agents/pi-embedded-helpers.downgradeopenai-reasoning.test.ts @@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest"; import { downgradeOpenAIReasoningBlocks } from "./pi-embedded-helpers.js"; describe("downgradeOpenAIReasoningBlocks", () => { - it("downgrades orphaned reasoning signatures to text", () => { + it("keeps reasoning signatures when followed by content", () => { const input = [ { role: "assistant", @@ -17,22 +17,16 @@ describe("downgradeOpenAIReasoningBlocks", () => { }, ]; - expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual([ - { - role: "assistant", - content: [{ type: "text", text: "internal reasoning" }, { type: "text", text: "answer" }], - }, - ]); + expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual(input); }); - it("drops empty thinking blocks with orphaned signatures", () => { + it("drops orphaned reasoning blocks without following content", () => { const input = [ { role: "assistant", content: [ { type: "thinking", - thinking: " ", thinkingSignature: JSON.stringify({ id: "rs_abc", type: "reasoning" }), }, ], @@ -40,7 +34,25 @@ describe("downgradeOpenAIReasoningBlocks", () => { { role: "user", content: "next" }, ]; - expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual([{ role: "user", content: "next" }]); + expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual([ + { role: "user", content: "next" }, + ]); + }); + + it("drops object-form orphaned signatures", () => { + const input = [ + { + role: "assistant", + content: [ + { + type: "thinking", + thinkingSignature: { id: "rs_obj", type: "reasoning" }, + }, + ], + }, + ]; + + expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual([]); }); it("keeps non-reasoning thinking signatures", () => { diff --git a/src/agents/pi-embedded-helpers/openai.ts b/src/agents/pi-embedded-helpers/openai.ts index 5c92676be..1e0ec057e 100644 --- a/src/agents/pi-embedded-helpers/openai.ts +++ b/src/agents/pi-embedded-helpers/openai.ts @@ -6,20 +6,45 @@ type OpenAIThinkingBlock = { thinkingSignature?: unknown; }; -function isOrphanedOpenAIReasoningSignature(signature: string): boolean { - const trimmed = signature.trim(); - if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return false; - try { - const parsed = JSON.parse(trimmed) as { id?: unknown; type?: unknown }; - const id = typeof parsed?.id === "string" ? parsed.id : ""; - const type = typeof parsed?.type === "string" ? parsed.type : ""; - if (!id.startsWith("rs_")) return false; - if (type === "reasoning") return true; - if (type.startsWith("reasoning.")) return true; - return false; - } catch { - return false; +type OpenAIReasoningSignature = { + id: string; + type: string; +}; + +function parseOpenAIReasoningSignature(value: unknown): OpenAIReasoningSignature | null { + if (!value) return null; + let candidate: { id?: unknown; type?: unknown } | null = null; + if (typeof value === "string") { + const trimmed = value.trim(); + if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null; + try { + candidate = JSON.parse(trimmed) as { id?: unknown; type?: unknown }; + } catch { + return null; + } + } else if (typeof value === "object") { + candidate = value as { id?: unknown; type?: unknown }; } + if (!candidate) return null; + const id = typeof candidate.id === "string" ? candidate.id : ""; + const type = typeof candidate.type === "string" ? candidate.type : ""; + if (!id.startsWith("rs_")) return null; + if (type === "reasoning" || type.startsWith("reasoning.")) { + return { id, type }; + } + return null; +} + +function hasFollowingNonThinkingBlock( + content: Extract["content"], + index: number, +): boolean { + for (let i = index + 1; i < content.length; i++) { + const block = content[i]; + if (!block || typeof block !== "object") return true; + if ((block as { type?: unknown }).type !== "thinking") return true; + } + return false; } /** @@ -27,7 +52,7 @@ function isOrphanedOpenAIReasoningSignature(signature: string): boolean { * without the required following item. * * Clawdbot persists provider-specific reasoning metadata in `thinkingSignature`; if that metadata - * is incomplete, we downgrade the block to plain text (or drop it if empty) to keep history usable. + * is incomplete, drop the block to keep history usable. */ export function downgradeOpenAIReasoningBlocks(messages: AgentMessage[]): AgentMessage[] { const out: AgentMessage[] = []; @@ -53,23 +78,29 @@ export function downgradeOpenAIReasoningBlocks(messages: AgentMessage[]): AgentM let changed = false; type AssistantContentBlock = (typeof assistantMsg.content)[number]; - const nextContent = assistantMsg.content.flatMap((block): AssistantContentBlock[] => { - if (!block || typeof block !== "object") return [block as AssistantContentBlock]; - - const record = block as OpenAIThinkingBlock; - if (record.type !== "thinking") return [block as AssistantContentBlock]; - - const signature = typeof record.thinkingSignature === "string" ? record.thinkingSignature : ""; - if (!signature || !isOrphanedOpenAIReasoningSignature(signature)) { - return [block as AssistantContentBlock]; + const nextContent: AssistantContentBlock[] = []; + for (let i = 0; i < assistantMsg.content.length; i++) { + const block = assistantMsg.content[i]; + if (!block || typeof block !== "object") { + nextContent.push(block as AssistantContentBlock); + continue; + } + const record = block as OpenAIThinkingBlock; + if (record.type !== "thinking") { + nextContent.push(block as AssistantContentBlock); + continue; + } + const signature = parseOpenAIReasoningSignature(record.thinkingSignature); + if (!signature) { + nextContent.push(block as AssistantContentBlock); + continue; + } + if (hasFollowingNonThinkingBlock(assistantMsg.content, i)) { + nextContent.push(block as AssistantContentBlock); + continue; } - - const thinking = typeof record.thinking === "string" ? record.thinking : ""; - const trimmed = thinking.trim(); changed = true; - if (!trimmed) return []; - return [{ type: "text" as const, text: thinking }]; - }); + } if (!changed) { out.push(msg); diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index 26bc4ef96..b428c3328 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -161,4 +161,92 @@ describe("sanitizeSessionHistory", () => { expect(result).toHaveLength(1); expect(result[0]?.role).toBe("assistant"); }); + + it("does not downgrade openai reasoning when the model has not changed", async () => { + const sessionEntries: Array<{ type: string; customType: string; data: unknown }> = [ + { + type: "custom", + customType: "model-snapshot", + data: { + timestamp: Date.now(), + provider: "openai", + modelApi: "openai-responses", + modelId: "gpt-5.2-codex", + }, + }, + ]; + const sessionManager = { + getEntries: vi.fn(() => sessionEntries), + appendCustomEntry: vi.fn((customType: string, data: unknown) => { + sessionEntries.push({ type: "custom", customType, data }); + }), + } as unknown as SessionManager; + const messages: AgentMessage[] = [ + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "reasoning", + thinkingSignature: JSON.stringify({ id: "rs_test", type: "reasoning" }), + }, + ], + }, + ]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-responses", + provider: "openai", + modelId: "gpt-5.2-codex", + sessionManager, + sessionId: "test-session", + }); + + expect(result).toEqual(messages); + }); + + it("downgrades openai reasoning only when the model changes", async () => { + const sessionEntries: Array<{ type: string; customType: string; data: unknown }> = [ + { + type: "custom", + customType: "model-snapshot", + data: { + timestamp: Date.now(), + provider: "anthropic", + modelApi: "anthropic-messages", + modelId: "claude-3-7", + }, + }, + ]; + const sessionManager = { + getEntries: vi.fn(() => sessionEntries), + appendCustomEntry: vi.fn((customType: string, data: unknown) => { + sessionEntries.push({ type: "custom", customType, data }); + }), + } as unknown as SessionManager; + const messages: AgentMessage[] = [ + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "reasoning", + thinkingSignature: { id: "rs_test", type: "reasoning" }, + }, + ], + }, + ]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-responses", + provider: "openai", + modelId: "gpt-5.2-codex", + sessionManager, + sessionId: "test-session", + }); + + expect(result).toEqual([]); + }); }); diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts index 9566593b9..7b26d0d04 100644 --- a/src/agents/pi-embedded-runner/google.ts +++ b/src/agents/pi-embedded-runner/google.ts @@ -212,7 +212,50 @@ registerUnhandledRejectionHandler((reason) => { return true; }); -type CustomEntryLike = { type?: unknown; customType?: unknown }; +type CustomEntryLike = { type?: unknown; customType?: unknown; data?: unknown }; + +type ModelSnapshotEntry = { + timestamp: number; + provider?: string; + modelApi?: string | null; + modelId?: string; +}; + +const MODEL_SNAPSHOT_CUSTOM_TYPE = "model-snapshot"; + +function readLastModelSnapshot(sessionManager: SessionManager): ModelSnapshotEntry | null { + try { + const entries = sessionManager.getEntries(); + for (let i = entries.length - 1; i >= 0; i--) { + const entry = entries[i] as CustomEntryLike; + if (entry?.type !== "custom" || entry?.customType !== MODEL_SNAPSHOT_CUSTOM_TYPE) continue; + const data = entry?.data as ModelSnapshotEntry | undefined; + if (data && typeof data === "object") { + return data; + } + } + } catch { + return null; + } + return null; +} + +function appendModelSnapshot(sessionManager: SessionManager, data: ModelSnapshotEntry): void { + try { + sessionManager.appendCustomEntry(MODEL_SNAPSHOT_CUSTOM_TYPE, data); + } catch { + // ignore persistence failures + } +} + +function isSameModelSnapshot(a: ModelSnapshotEntry, b: ModelSnapshotEntry): boolean { + const normalize = (value?: string | null) => value ?? ""; + return ( + normalize(a.provider) === normalize(b.provider) && + normalize(a.modelApi) === normalize(b.modelApi) && + normalize(a.modelId) === normalize(b.modelId) + ); +} function hasGoogleTurnOrderingMarker(sessionManager: SessionManager): boolean { try { @@ -295,7 +338,29 @@ export async function sanitizeSessionHistory(params: { const isOpenAIResponsesApi = params.modelApi === "openai-responses" || params.modelApi === "openai-codex-responses"; - const sanitizedOpenAI = isOpenAIResponsesApi ? downgradeOpenAIReasoningBlocks(repairedTools) : repairedTools; + const hasSnapshot = Boolean(params.provider || params.modelApi || params.modelId); + const priorSnapshot = hasSnapshot ? readLastModelSnapshot(params.sessionManager) : null; + const modelChanged = priorSnapshot + ? !isSameModelSnapshot(priorSnapshot, { + timestamp: 0, + provider: params.provider, + modelApi: params.modelApi, + modelId: params.modelId, + }) + : false; + const sanitizedOpenAI = + isOpenAIResponsesApi && modelChanged + ? downgradeOpenAIReasoningBlocks(repairedTools) + : repairedTools; + + if (hasSnapshot && (!priorSnapshot || modelChanged)) { + appendModelSnapshot(params.sessionManager, { + timestamp: Date.now(), + provider: params.provider, + modelApi: params.modelApi, + modelId: params.modelId, + }); + } if (!policy.applyGoogleTurnOrdering) { return sanitizedOpenAI;