From c97bf23a4aa31fbc3c7de7b48c89012840d6ff5b Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sat, 24 Jan 2026 07:58:04 +0000
Subject: [PATCH] fix: gate openai reasoning downgrade on model switches
 (#1562) (thanks @roshanasingh4)

---
 CHANGELOG.md                                  |  1 +
 docs/reference/transcript-hygiene.md          |  1 +
 ...-helpers.downgradeopenai-reasoning.test.ts | 32 ++++---
 src/agents/pi-embedded-helpers/openai.ts      | 89 +++++++++++++------
 ...ed-runner.sanitize-session-history.test.ts | 88 ++++++++++++++++++
 src/agents/pi-embedded-runner/google.ts       | 69 +++++++++++++-
 6 files changed, 239 insertions(+), 41 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c833d1e1b..1fe01fea4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ Docs: https://docs.clawd.bot
 - Gateway: compare Linux process start time to avoid PID recycling lock loops; keep locks unless stale. (#1572) Thanks @steipete.
 - Skills: gate bird Homebrew install to macOS. (#1569) Thanks @bradleypriest.
 - Agents: ignore IDENTITY.md template placeholders when parsing identity to avoid placeholder replies. (#1556)
+- Agents: drop orphaned OpenAI Responses reasoning blocks on model switches. (#1562) Thanks @roshanasingh4.
 - Docker: update gateway command in docker-compose and Hetzner guide. (#1514)
 - Sessions: reject array-backed session stores to prevent silent wipes. (#1469)
 - Voice wake: auto-save wake words on blur/submit across iOS/Android and align limits with macOS.
diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md
index fa466cef5..b43b8ab70 100644
--- a/docs/reference/transcript-hygiene.md
+++ b/docs/reference/transcript-hygiene.md
@@ -48,6 +48,7 @@ Implementation:
 
 **OpenAI / OpenAI Codex**
 - Image sanitization only.
+- On model switch into OpenAI Responses/Codex, drop orphaned reasoning signatures (standalone reasoning items without a following content block).
 - No tool call id sanitization.
 - No tool result pairing repair.
 - No turn validation or reordering.
diff --git a/src/agents/pi-embedded-helpers.downgradeopenai-reasoning.test.ts b/src/agents/pi-embedded-helpers.downgradeopenai-reasoning.test.ts
index 7d25a6727..d83667cd1 100644
--- a/src/agents/pi-embedded-helpers.downgradeopenai-reasoning.test.ts
+++ b/src/agents/pi-embedded-helpers.downgradeopenai-reasoning.test.ts
@@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest";
 import { downgradeOpenAIReasoningBlocks } from "./pi-embedded-helpers.js";
 
 describe("downgradeOpenAIReasoningBlocks", () => {
-  it("downgrades orphaned reasoning signatures to text", () => {
+  it("keeps reasoning signatures when followed by content", () => {
     const input = [
       {
         role: "assistant",
@@ -17,22 +17,16 @@ describe("downgradeOpenAIReasoningBlocks", () => {
       },
     ];
 
-    expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual([
-      {
-        role: "assistant",
-        content: [{ type: "text", text: "internal reasoning" }, { type: "text", text: "answer" }],
-      },
-    ]);
+    expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual(input);
   });
 
-  it("drops empty thinking blocks with orphaned signatures", () => {
+  it("drops orphaned reasoning blocks without following content", () => {
     const input = [
       {
         role: "assistant",
         content: [
           {
             type: "thinking",
-            thinking: "   ",
             thinkingSignature: JSON.stringify({ id: "rs_abc", type: "reasoning" }),
           },
         ],
@@ -40,7 +34,25 @@ describe("downgradeOpenAIReasoningBlocks", () => {
       { role: "user", content: "next" },
     ];
 
-    expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual([{ role: "user", content: "next" }]);
+    expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual([
+      { role: "user", content: "next" },
+    ]);
+  });
+
+  it("drops object-form orphaned signatures", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinkingSignature: { id: "rs_obj", type: "reasoning" },
+          },
+        ],
+      },
+    ];
+
+    expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual([]);
   });
 
   it("keeps non-reasoning thinking signatures", () => {
diff --git a/src/agents/pi-embedded-helpers/openai.ts b/src/agents/pi-embedded-helpers/openai.ts
index 5c92676be..1e0ec057e 100644
--- a/src/agents/pi-embedded-helpers/openai.ts
+++ b/src/agents/pi-embedded-helpers/openai.ts
@@ -6,20 +6,45 @@ type OpenAIThinkingBlock = {
   thinkingSignature?: unknown;
 };
 
-function isOrphanedOpenAIReasoningSignature(signature: string): boolean {
-  const trimmed = signature.trim();
-  if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return false;
-  try {
-    const parsed = JSON.parse(trimmed) as { id?: unknown; type?: unknown };
-    const id = typeof parsed?.id === "string" ? parsed.id : "";
-    const type = typeof parsed?.type === "string" ? parsed.type : "";
-    if (!id.startsWith("rs_")) return false;
-    if (type === "reasoning") return true;
-    if (type.startsWith("reasoning.")) return true;
-    return false;
-  } catch {
-    return false;
+type OpenAIReasoningSignature = {
+  id: string;
+  type: string;
+};
+
+function parseOpenAIReasoningSignature(value: unknown): OpenAIReasoningSignature | null {
+  if (!value) return null;
+  let candidate: { id?: unknown; type?: unknown } | null = null;
+  if (typeof value === "string") {
+    const trimmed = value.trim();
+    if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
+    try {
+      candidate = JSON.parse(trimmed) as { id?: unknown; type?: unknown };
+    } catch {
+      return null;
+    }
+  } else if (typeof value === "object") {
+    candidate = value as { id?: unknown; type?: unknown };
   }
+  if (!candidate) return null;
+  const id = typeof candidate.id === "string" ? candidate.id : "";
+  const type = typeof candidate.type === "string" ? candidate.type : "";
+  if (!id.startsWith("rs_")) return null;
+  if (type === "reasoning" || type.startsWith("reasoning.")) {
+    return { id, type };
+  }
+  return null;
+}
+
+function hasFollowingNonThinkingBlock(
+  content: Extract<AgentMessage, { role: "assistant" }>["content"],
+  index: number,
+): boolean {
+  for (let i = index + 1; i < content.length; i++) {
+    const block = content[i];
+    if (!block || typeof block !== "object") return true;
+    if ((block as { type?: unknown }).type !== "thinking") return true;
+  }
+  return false;
 }
 
 /**
@@ -27,7 +52,7 @@ function isOrphanedOpenAIReasoningSignature(signature: string): boolean {
  * without the required following item.
  *
  * Clawdbot persists provider-specific reasoning metadata in `thinkingSignature`; if that metadata
- * is incomplete, we downgrade the block to plain text (or drop it if empty) to keep history usable.
+ * is incomplete, drop the block to keep history usable.
  */
 export function downgradeOpenAIReasoningBlocks(messages: AgentMessage[]): AgentMessage[] {
   const out: AgentMessage[] = [];
@@ -53,23 +78,29 @@ export function downgradeOpenAIReasoningBlocks(messages: AgentMessage[]): AgentM
     let changed = false;
     type AssistantContentBlock = (typeof assistantMsg.content)[number];
 
-    const nextContent = assistantMsg.content.flatMap((block): AssistantContentBlock[] => {
-      if (!block || typeof block !== "object") return [block as AssistantContentBlock];
-
-      const record = block as OpenAIThinkingBlock;
-      if (record.type !== "thinking") return [block as AssistantContentBlock];
-
-      const signature = typeof record.thinkingSignature === "string" ? record.thinkingSignature : "";
-      if (!signature || !isOrphanedOpenAIReasoningSignature(signature)) {
-        return [block as AssistantContentBlock];
+    const nextContent: AssistantContentBlock[] = [];
+    for (let i = 0; i < assistantMsg.content.length; i++) {
+      const block = assistantMsg.content[i];
+      if (!block || typeof block !== "object") {
+        nextContent.push(block as AssistantContentBlock);
+        continue;
+      }
+      const record = block as OpenAIThinkingBlock;
+      if (record.type !== "thinking") {
+        nextContent.push(block as AssistantContentBlock);
+        continue;
+      }
+      const signature = parseOpenAIReasoningSignature(record.thinkingSignature);
+      if (!signature) {
+        nextContent.push(block as AssistantContentBlock);
+        continue;
+      }
+      if (hasFollowingNonThinkingBlock(assistantMsg.content, i)) {
+        nextContent.push(block as AssistantContentBlock);
+        continue;
       }
-
-      const thinking = typeof record.thinking === "string" ? record.thinking : "";
-      const trimmed = thinking.trim();
       changed = true;
-      if (!trimmed) return [];
-      return [{ type: "text" as const, text: thinking }];
-    });
+    }
 
     if (!changed) {
       out.push(msg);
diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
index 26bc4ef96..b428c3328 100644
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -161,4 +161,92 @@ describe("sanitizeSessionHistory", () => {
     expect(result).toHaveLength(1);
     expect(result[0]?.role).toBe("assistant");
   });
+
+  it("does not downgrade openai reasoning when the model has not changed", async () => {
+    const sessionEntries: Array<{ type: string; customType: string; data: unknown }> = [
+      {
+        type: "custom",
+        customType: "model-snapshot",
+        data: {
+          timestamp: Date.now(),
+          provider: "openai",
+          modelApi: "openai-responses",
+          modelId: "gpt-5.2-codex",
+        },
+      },
+    ];
+    const sessionManager = {
+      getEntries: vi.fn(() => sessionEntries),
+      appendCustomEntry: vi.fn((customType: string, data: unknown) => {
+        sessionEntries.push({ type: "custom", customType, data });
+      }),
+    } as unknown as SessionManager;
+    const messages: AgentMessage[] = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "reasoning",
+            thinkingSignature: JSON.stringify({ id: "rs_test", type: "reasoning" }),
+          },
+        ],
+      },
+    ];
+
+    const result = await sanitizeSessionHistory({
+      messages,
+      modelApi: "openai-responses",
+      provider: "openai",
+      modelId: "gpt-5.2-codex",
+      sessionManager,
+      sessionId: "test-session",
+    });
+
+    expect(result).toEqual(messages);
+  });
+
+  it("downgrades openai reasoning only when the model changes", async () => {
+    const sessionEntries: Array<{ type: string; customType: string; data: unknown }> = [
+      {
+        type: "custom",
+        customType: "model-snapshot",
+        data: {
+          timestamp: Date.now(),
+          provider: "anthropic",
+          modelApi: "anthropic-messages",
+          modelId: "claude-3-7",
+        },
+      },
+    ];
+    const sessionManager = {
+      getEntries: vi.fn(() => sessionEntries),
+      appendCustomEntry: vi.fn((customType: string, data: unknown) => {
+        sessionEntries.push({ type: "custom", customType, data });
+      }),
+    } as unknown as SessionManager;
+    const messages: AgentMessage[] = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: "reasoning",
+            thinkingSignature: { id: "rs_test", type: "reasoning" },
+          },
+        ],
+      },
+    ];
+
+    const result = await sanitizeSessionHistory({
+      messages,
+      modelApi: "openai-responses",
+      provider: "openai",
+      modelId: "gpt-5.2-codex",
+      sessionManager,
+      sessionId: "test-session",
+    });
+
+    expect(result).toEqual([]);
+  });
 });
diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts
index 9566593b9..7b26d0d04 100644
--- a/src/agents/pi-embedded-runner/google.ts
+++ b/src/agents/pi-embedded-runner/google.ts
@@ -212,7 +212,50 @@ registerUnhandledRejectionHandler((reason) => {
   return true;
 });
 
-type CustomEntryLike = { type?: unknown; customType?: unknown };
+type CustomEntryLike = { type?: unknown; customType?: unknown; data?: unknown };
+
+type ModelSnapshotEntry = {
+  timestamp: number;
+  provider?: string;
+  modelApi?: string | null;
+  modelId?: string;
+};
+
+const MODEL_SNAPSHOT_CUSTOM_TYPE = "model-snapshot";
+
+function readLastModelSnapshot(sessionManager: SessionManager): ModelSnapshotEntry | null {
+  try {
+    const entries = sessionManager.getEntries();
+    for (let i = entries.length - 1; i >= 0; i--) {
+      const entry = entries[i] as CustomEntryLike;
+      if (entry?.type !== "custom" || entry?.customType !== MODEL_SNAPSHOT_CUSTOM_TYPE) continue;
+      const data = entry?.data as ModelSnapshotEntry | undefined;
+      if (data && typeof data === "object") {
+        return data;
+      }
+    }
+  } catch {
+    return null;
+  }
+  return null;
+}
+
+function appendModelSnapshot(sessionManager: SessionManager, data: ModelSnapshotEntry): void {
+  try {
+    sessionManager.appendCustomEntry(MODEL_SNAPSHOT_CUSTOM_TYPE, data);
+  } catch {
+    // ignore persistence failures
+  }
+}
+
+function isSameModelSnapshot(a: ModelSnapshotEntry, b: ModelSnapshotEntry): boolean {
+  const normalize = (value?: string | null) => value ?? "";
+  return (
+    normalize(a.provider) === normalize(b.provider) &&
+    normalize(a.modelApi) === normalize(b.modelApi) &&
+    normalize(a.modelId) === normalize(b.modelId)
+  );
+}
 
 function hasGoogleTurnOrderingMarker(sessionManager: SessionManager): boolean {
   try {
@@ -295,7 +338,29 @@ export async function sanitizeSessionHistory(params: {
 
   const isOpenAIResponsesApi =
     params.modelApi === "openai-responses" || params.modelApi === "openai-codex-responses";
-  const sanitizedOpenAI = isOpenAIResponsesApi ? downgradeOpenAIReasoningBlocks(repairedTools) : repairedTools;
+  const hasSnapshot = Boolean(params.provider || params.modelApi || params.modelId);
+  const priorSnapshot = hasSnapshot ? readLastModelSnapshot(params.sessionManager) : null;
+  const modelChanged = priorSnapshot
+    ? !isSameModelSnapshot(priorSnapshot, {
+        timestamp: 0,
+        provider: params.provider,
+        modelApi: params.modelApi,
+        modelId: params.modelId,
+      })
+    : false;
+  const sanitizedOpenAI =
+    isOpenAIResponsesApi && modelChanged
+      ? downgradeOpenAIReasoningBlocks(repairedTools)
+      : repairedTools;
+
+  if (hasSnapshot && (!priorSnapshot || modelChanged)) {
+    appendModelSnapshot(params.sessionManager, {
+      timestamp: Date.now(),
+      provider: params.provider,
+      modelApi: params.modelApi,
+      modelId: params.modelId,
+    });
+  }
 
   if (!policy.applyGoogleTurnOrdering) {
     return sanitizedOpenAI;