fix: normalize <think> reasoning blocks

2026-01-09 08:29:58 +00:00
parent 5b50c97939
commit 17ccf53eb1
6 changed files with 310 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,12 +32,14 @@
 - Status: show provider prefix in /status model display. (#506) — thanks @mcinteerj
 - Status: compact /status with session token usage + estimated cost, add `/cost` per-response usage lines (tokens-only for OAuth).
 - Status: show active auth profile and key snippet in /status.
+- Agent: promote `<think>`/`<thinking>` tag reasoning into structured thinking blocks so `/reasoning` works consistently for OpenAI-compat providers.
 - macOS: package ClawdbotKit resources and Swift 6.2 compatibility dylib to avoid launch/tool crashes. (#473) — thanks @gupsammy
 - WhatsApp: group `/model list` output by provider for scannability. (#456) - thanks @mcinteerj
 - Hooks: allow per-hook model overrides for webhook/Gmail runs (e.g. GPT 5 Mini).
 - Control UI: logs tab opens at the newest entries (bottom).
 - Control UI: add Docs link, remove chat composer divider, and add New session button.
 - Control UI: link sessions list to chat view. (#471) — thanks @HazAT
+- Control UI: show/patch per-session reasoning level and render extracted reasoning in chat.
 - Control UI: queue outgoing chat messages, add Enter-to-send, and show queued items. (#527) — thanks @YuriNachos
 - Control UI: drop explicit `ui:install` step; `ui:build` now auto-installs UI deps (docs + update flow).
 - Telegram: retry long-polling conflicts with backoff to avoid fatal exits.
--- a/src/agents/pi-embedded-subscribe.test.ts
+++ b/src/agents/pi-embedded-subscribe.test.ts
@@ -167,6 +167,117 @@ describe("subscribeEmbeddedPiSession", () => {
    );
  });

+  it("promotes <think> tags to thinking blocks at write-time", () => {
+    let handler: ((evt: unknown) => void) | undefined;
+    const session: StubSession = {
+      subscribe: (fn) => {
+        handler = fn;
+        return () => {};
+      },
+    };
+
+    const onBlockReply = vi.fn();
+
+    subscribeEmbeddedPiSession({
+      session: session as unknown as Parameters<
+        typeof subscribeEmbeddedPiSession
+      >[0]["session"],
+      runId: "run",
+      onBlockReply,
+      blockReplyBreak: "message_end",
+      reasoningMode: "on",
+    });
+
+    const assistantMessage = {
+      role: "assistant",
+      content: [
+        {
+          type: "text",
+          text: "<think>\nBecause it helps\n</think>\n\nFinal answer",
+        },
+      ],
+    } as AssistantMessage;
+
+    handler?.({ type: "message_end", message: assistantMessage });
+
+    expect(onBlockReply).toHaveBeenCalledTimes(1);
+    expect(onBlockReply.mock.calls[0][0].text).toBe(
+      "_Reasoning:_\n_Because it helps_\n\nFinal answer",
+    );
+
+    expect(assistantMessage.content).toEqual([
+      { type: "thinking", thinking: "Because it helps" },
+      { type: "text", text: "Final answer" },
+    ]);
+  });
+
+  it("streams <think> reasoning via onReasoningStream without leaking into final text", () => {
+    let handler: ((evt: unknown) => void) | undefined;
+    const session: StubSession = {
+      subscribe: (fn) => {
+        handler = fn;
+        return () => {};
+      },
+    };
+
+    const onReasoningStream = vi.fn();
+    const onBlockReply = vi.fn();
+
+    subscribeEmbeddedPiSession({
+      session: session as unknown as Parameters<
+        typeof subscribeEmbeddedPiSession
+      >[0]["session"],
+      runId: "run",
+      onReasoningStream,
+      onBlockReply,
+      blockReplyBreak: "message_end",
+      reasoningMode: "stream",
+    });
+
+    handler?.({
+      type: "message_update",
+      message: { role: "assistant" },
+      assistantMessageEvent: {
+        type: "text_delta",
+        delta: "<think>\nBecause",
+      },
+    });
+
+    handler?.({
+      type: "message_update",
+      message: { role: "assistant" },
+      assistantMessageEvent: {
+        type: "text_delta",
+        delta: " it helps\n</think>\n\nFinal answer",
+      },
+    });
+
+    const assistantMessage = {
+      role: "assistant",
+      content: [
+        {
+          type: "text",
+          text: "<think>\nBecause it helps\n</think>\n\nFinal answer",
+        },
+      ],
+    } as AssistantMessage;
+
+    handler?.({ type: "message_end", message: assistantMessage });
+
+    expect(onBlockReply).toHaveBeenCalledTimes(1);
+    expect(onBlockReply.mock.calls[0][0].text).toBe("Final answer");
+
+    const streamTexts = onReasoningStream.mock.calls
+      .map((call) => call[0]?.text)
+      .filter((value): value is string => typeof value === "string");
+    expect(streamTexts.at(-1)).toBe("Reasoning:\nBecause it helps");
+
+    expect(assistantMessage.content).toEqual([
+      { type: "thinking", thinking: "Because it helps" },
+      { type: "text", text: "Final answer" },
+    ]);
+  });
+
  it("emits block replies on text_end and does not duplicate on message_end", () => {
    let handler: ((evt: unknown) => void) | undefined;
    const session: StubSession = {
--- a/src/agents/pi-embedded-subscribe.ts
+++ b/src/agents/pi-embedded-subscribe.ts
@@ -24,6 +24,7 @@ const THINKING_OPEN_RE = /<\s*think(?:ing)?\s*>/i;
 const THINKING_CLOSE_RE = /<\s*\/\s*think(?:ing)?\s*>/i;
 const THINKING_OPEN_GLOBAL_RE = /<\s*think(?:ing)?\s*>/gi;
 const THINKING_CLOSE_GLOBAL_RE = /<\s*\/\s*think(?:ing)?\s*>/gi;
+const THINKING_TAG_SCAN_RE = /<\s*(\/?)\s*think(?:ing)?\s*>/gi;
 const TOOL_RESULT_MAX_CHARS = 8000;
 const log = createSubsystemLogger("agent/embedded");
 const RAW_STREAM_ENABLED = process.env.CLAWDBOT_RAW_STREAM === "1";
@@ -121,6 +122,102 @@ function stripUnpairedThinkingTags(text: string): string {
  return text;
 }

+type ThinkTaggedSplitBlock =
+  | { type: "thinking"; thinking: string }
+  | { type: "text"; text: string };
+
+function splitThinkingTaggedText(text: string): ThinkTaggedSplitBlock[] | null {
+  const trimmedStart = text.trimStart();
+  // Avoid false positives: only treat it as structured thinking when it begins
+  // with a think tag (common for local/OpenAI-compat providers that emulate
+  // reasoning blocks via tags).
+  if (!trimmedStart.startsWith("<")) return null;
+  if (!THINKING_OPEN_RE.test(trimmedStart)) return null;
+  if (!THINKING_CLOSE_RE.test(text)) return null;
+
+  THINKING_TAG_SCAN_RE.lastIndex = 0;
+  let inThinking = false;
+  let cursor = 0;
+  let thinkingStart = 0;
+  const blocks: ThinkTaggedSplitBlock[] = [];
+
+  const pushText = (value: string) => {
+    if (!value) return;
+    blocks.push({ type: "text", text: value });
+  };
+  const pushThinking = (value: string) => {
+    const cleaned = value.trim();
+    if (!cleaned) return;
+    blocks.push({ type: "thinking", thinking: cleaned });
+  };
+
+  for (const match of text.matchAll(THINKING_TAG_SCAN_RE)) {
+    const index = match.index ?? 0;
+    const isClose = Boolean(match[1]?.includes("/"));
+
+    if (!inThinking && !isClose) {
+      pushText(text.slice(cursor, index));
+      thinkingStart = index + match[0].length;
+      inThinking = true;
+      continue;
+    }
+
+    if (inThinking && isClose) {
+      pushThinking(text.slice(thinkingStart, index));
+      cursor = index + match[0].length;
+      inThinking = false;
+    }
+  }
+
+  if (inThinking) return null;
+  pushText(text.slice(cursor));
+
+  const hasThinking = blocks.some((b) => b.type === "thinking");
+  if (!hasThinking) return null;
+  return blocks;
+}
+
+function promoteThinkingTagsToBlocks(message: AssistantMessage): void {
+  if (!Array.isArray(message.content)) return;
+  const hasThinkingBlock = message.content.some((block) => {
+    if (!block || typeof block !== "object") return false;
+    return (block as Record<string, unknown>).type === "thinking";
+  });
+  if (hasThinkingBlock) return;
+
+  const next: Array<Record<string, unknown>> = [];
+  let changed = false;
+
+  for (const block of message.content) {
+    if (!block || typeof block !== "object") {
+      next.push(block as Record<string, unknown>);
+      continue;
+    }
+    const record = block as Record<string, unknown>;
+    if (record.type !== "text" || typeof record.text !== "string") {
+      next.push(record);
+      continue;
+    }
+    const split = splitThinkingTaggedText(record.text);
+    if (!split) {
+      next.push(record);
+      continue;
+    }
+    changed = true;
+    for (const part of split) {
+      if (part.type === "thinking") {
+        next.push({ type: "thinking", thinking: part.thinking });
+      } else if (part.type === "text") {
+        const cleaned = part.text.trimStart();
+        if (cleaned) next.push({ type: "text", text: cleaned });
+      }
+    }
+  }
+
+  if (!changed) return;
+  (message as unknown as { content: unknown }).content = next;
+}
+
 function normalizeSlackTarget(raw: string): string | undefined {
  const trimmed = raw.trim();
  if (!trimmed) return undefined;
@@ -792,6 +889,7 @@ export function subscribeEmbeddedPiSession(params: {
        const msg = (evt as AgentEvent & { message: AgentMessage }).message;
        if (msg?.role === "assistant") {
          const assistantMessage = msg as AssistantMessage;
+          promoteThinkingTagsToBlocks(assistantMessage);
          const rawText = extractAssistantText(assistantMessage);
          appendRawStream({
            ts: Date.now(),
--- a/ui/src/ui/controllers/sessions.ts
+++ b/ui/src/ui/controllers/sessions.ts
@@ -42,12 +42,17 @@ export async function loadSessions(state: SessionsState) {
 export async function patchSession(
  state: SessionsState,
  key: string,
-  patch: { thinkingLevel?: string | null; verboseLevel?: string | null },
+  patch: {
+    thinkingLevel?: string | null;
+    verboseLevel?: string | null;
+    reasoningLevel?: string | null;
+  },
 ) {
  if (!state.client || !state.connected) return;
  const params: Record<string, unknown> = { key };
  if ("thinkingLevel" in patch) params.thinkingLevel = patch.thinkingLevel;
  if ("verboseLevel" in patch) params.verboseLevel = patch.verboseLevel;
+  if ("reasoningLevel" in patch) params.reasoningLevel = patch.reasoningLevel;
  try {
    await state.client.request("sessions.patch", params);
    await loadSessions(state);
@@ -55,4 +60,3 @@ export async function patchSession(
    state.sessionsError = String(err);
  }
 }
-
--- a/ui/src/ui/views/chat.ts
+++ b/ui/src/ui/views/chat.ts
@@ -38,6 +38,11 @@ export function renderChat(props: ChatProps) {
  const canCompose = props.connected;
  const isBusy = props.sending || Boolean(props.stream);
  const sessionOptions = resolveSessionOptions(props.sessionKey, props.sessions);
+  const activeSession = props.sessions?.sessions?.find(
+    (row) => row.key === props.sessionKey,
+  );
+  const reasoningLevel = activeSession?.reasoningLevel ?? "off";
+  const showReasoning = reasoningLevel !== "off";
  const composePlaceholder = props.connected
    ? "Message (↩ to send, Shift+↩ for line breaks)"
    : "Connect to the gateway to start chatting…";
@@ -72,6 +77,7 @@ export function renderChat(props: ChatProps) {
        </div>
        <div class="chat-header__right">
          <div class="muted">Thinking: ${props.thinkingLevel ?? "inherit"}</div>
+          <div class="muted">Reasoning: ${reasoningLevel}</div>
        </div>
      </div>

@@ -107,7 +113,7 @@ export function renderChat(props: ChatProps) {
                { streaming: true }
              );
            }
-            return renderMessage(item.message, props);
+            return renderMessage(item.message, props, { showReasoning });
          }
        )}
      </div>
@@ -326,7 +332,7 @@ function renderReadingIndicator() {
 function renderMessage(
  message: unknown,
  props?: Pick<ChatProps, "isToolOutputExpanded" | "onToolOutputToggle">,
-  opts?: { streaming?: boolean }
+  opts?: { streaming?: boolean; showReasoning?: boolean }
 ) {
  const m = message as Record<string, unknown>;
  const role = typeof m.role === "string" ? m.role : "unknown";
@@ -334,6 +340,10 @@ function renderMessage(
  const hasToolCards = toolCards.length > 0;
  const isToolResult = isToolResultMessage(message);
  const extractedText = extractText(message);
+  const extractedThinking =
+    opts?.showReasoning && role === "assistant"
+      ? extractThinking(message)
+      : null;
  const contentText = typeof m.content === "string" ? m.content : null;
  const fallback = hasToolCards ? null : JSON.stringify(message, null, 2);

@@ -345,10 +355,15 @@ function renderMessage(
        : !isToolResult && fallback
          ? { kind: "json" as const, value: fallback }
          : null;
-  const markdown =
+  const markdownBase =
    display?.kind === "json"
      ? ["```json", display.value, "```"].join("\n")
      : (display?.value ?? null);
+  const markdown = extractedThinking
+    ? [formatReasoningMarkdown(extractedThinking), markdownBase]
+        .filter(Boolean)
+        .join("\n\n")
+    : markdownBase;

  const timestamp =
    typeof m.timestamp === "number" ? new Date(m.timestamp).toLocaleTimeString() : "";
@@ -413,6 +428,60 @@ function extractText(message: unknown): string | null {
  return null;
 }

+function extractThinking(message: unknown): string | null {
+  const m = message as Record<string, unknown>;
+  const content = m.content;
+  const parts: string[] = [];
+  if (Array.isArray(content)) {
+    for (const p of content) {
+      const item = p as Record<string, unknown>;
+      if (item.type === "thinking" && typeof item.thinking === "string") {
+        const cleaned = item.thinking.trim();
+        if (cleaned) parts.push(cleaned);
+      }
+    }
+  }
+  if (parts.length > 0) return parts.join("\n");
+
+  // Back-compat: older logs may still have <think> tags inside text blocks.
+  const rawText = extractRawText(message);
+  if (!rawText) return null;
+  const matches = [...rawText.matchAll(/<\s*think(?:ing)?\s*>([\s\S]*?)<\s*\/\s*think(?:ing)?\s*>/gi)];
+  const extracted = matches
+    .map((m) => (m[1] ?? "").trim())
+    .filter(Boolean);
+  return extracted.length > 0 ? extracted.join("\n") : null;
+}
+
+function extractRawText(message: unknown): string | null {
+  const m = message as Record<string, unknown>;
+  const content = m.content;
+  if (typeof content === "string") return content;
+  if (Array.isArray(content)) {
+    const parts = content
+      .map((p) => {
+        const item = p as Record<string, unknown>;
+        if (item.type === "text" && typeof item.text === "string") return item.text;
+        return null;
+      })
+      .filter((v): v is string => typeof v === "string");
+    if (parts.length > 0) return parts.join("\n");
+  }
+  if (typeof m.text === "string") return m.text;
+  return null;
+}
+
+function formatReasoningMarkdown(text: string): string {
+  const trimmed = text.trim();
+  if (!trimmed) return "";
+  const lines = trimmed
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .filter(Boolean)
+    .map((line) => `_${line}_`);
+  return lines.length ? ["_Reasoning:_", ...lines].join("\n") : "";
+}
+
 type ToolCard = {
  kind: "call" | "result";
  name: string;
--- a/ui/src/ui/views/sessions.ts
+++ b/ui/src/ui/views/sessions.ts
@@ -23,12 +23,17 @@ export type SessionsProps = {
  onRefresh: () => void;
  onPatch: (
    key: string,
-    patch: { thinkingLevel?: string | null; verboseLevel?: string | null },
+    patch: {
+      thinkingLevel?: string | null;
+      verboseLevel?: string | null;
+      reasoningLevel?: string | null;
+    },
  ) => void;
 };

 const THINK_LEVELS = ["", "off", "minimal", "low", "medium", "high"] as const;
 const VERBOSE_LEVELS = ["", "off", "on"] as const;
+const REASONING_LEVELS = ["", "off", "on", "stream"] as const;

 export function renderSessions(props: SessionsProps) {
  const rows = props.result?.sessions ?? [];
@@ -117,6 +122,7 @@ export function renderSessions(props: SessionsProps) {
          <div>Tokens</div>
          <div>Thinking</div>
          <div>Verbose</div>
+          <div>Reasoning</div>
        </div>
        ${rows.length === 0
          ? html`<div class="muted">No sessions found.</div>`
@@ -130,6 +136,7 @@ function renderRow(row: GatewaySessionRow, basePath: string, onPatch: SessionsPr
  const updated = row.updatedAt ? formatAgo(row.updatedAt) : "n/a";
  const thinking = row.thinkingLevel ?? "";
  const verbose = row.verboseLevel ?? "";
+  const reasoning = row.reasoningLevel ?? "";
  const displayName = row.displayName ?? row.key;
  const canLink = row.kind !== "global";
  const chatUrl = canLink
@@ -170,6 +177,19 @@ function renderRow(row: GatewaySessionRow, basePath: string, onPatch: SessionsPr
          )}
        </select>
      </div>
+      <div>
+        <select
+          .value=${reasoning}
+          @change=${(e: Event) => {
+            const value = (e.target as HTMLSelectElement).value;
+            onPatch(row.key, { reasoningLevel: value || null });
+          }}
+        >
+          ${REASONING_LEVELS.map((level) =>
+            html`<option value=${level}>${level || "inherit"}</option>`,
+          )}
+        </select>
+      </div>
    </div>
  `;
 }