fix: suppress <think> leakage + split reasoning output (#614) (thanks @zknicker)

This commit is contained in:
Peter Steinberger
2026-01-10 00:02:13 +01:00
parent 2d0ca67c21
commit 51ec578cec
6 changed files with 49 additions and 46 deletions

View File

@@ -76,6 +76,7 @@
- Auto-reply: preserve spacing when stripping inline directives. (#539) — thanks @joshp123 - Auto-reply: preserve spacing when stripping inline directives. (#539) — thanks @joshp123
- Auto-reply: relax reply tag parsing to allow whitespace. (#560) — thanks @mcinteerj - Auto-reply: relax reply tag parsing to allow whitespace. (#560) — thanks @mcinteerj
- Auto-reply: add per-provider block streaming toggles and coalesce streamed blocks to reduce line spam. (#536) — thanks @mcinteerj - Auto-reply: add per-provider block streaming toggles and coalesce streamed blocks to reduce line spam. (#536) — thanks @mcinteerj
- Auto-reply: suppress `<think>` leakage in block streaming and emit `/reasoning` as a separate `Reasoning:` message. (#614) — thanks @zknicker
- Auto-reply: default block streaming off for non-Telegram providers unless explicitly enabled, and avoid splitting on forced flushes below max. - Auto-reply: default block streaming off for non-Telegram providers unless explicitly enabled, and avoid splitting on forced flushes below max.
- Auto-reply: raise default coalesce minChars for Signal/Slack/Discord and clarify streaming vs draft streaming in docs. - Auto-reply: raise default coalesce minChars for Signal/Slack/Discord and clarify streaming vs draft streaming in docs.
- Auto-reply: default block streaming coalesce idle to 1s to reduce tiny chunks. — thanks @steipete - Auto-reply: default block streaming coalesce idle to 1s to reduce tiny chunks. — thanks @steipete

View File

@@ -38,8 +38,8 @@ read_when:
## Reasoning visibility (/reasoning) ## Reasoning visibility (/reasoning)
- Levels: `on|off|stream`. - Levels: `on|off|stream`.
- Directive-only message toggles whether thinking blocks are shown as italic text in replies. - Directive-only message toggles whether thinking blocks are shown in replies.
- When enabled, any model-provided reasoning content is appended as a separate italic block. - When enabled, reasoning is sent as a **separate message** prefixed with `Reasoning:`.
- `stream` (Telegram only): streams reasoning into the Telegram draft bubble while the reply is generating, then sends the final answer without reasoning. - `stream` (Telegram only): streams reasoning into the Telegram draft bubble while the reply is generating, then sends the final answer without reasoning.
- Alias: `/reason`. - Alias: `/reason`.
- Send `/reasoning` (or `/reasoning:`) with no argument to see the current reasoning level. - Send `/reasoning` (or `/reasoning:`) with no argument to see the current reasoning level.

View File

@@ -1604,23 +1604,21 @@ export async function runEmbeddedPiAgent(params: {
} }
} }
const fallbackText = lastAssistant const reasoningText =
? (() => { lastAssistant && params.reasoningLevel === "on"
const base = extractAssistantText(lastAssistant); ? formatReasoningMarkdown(extractAssistantThinking(lastAssistant))
if (params.reasoningLevel !== "on") return base; : "";
const thinking = extractAssistantThinking(lastAssistant); if (reasoningText) replyItems.push({ text: reasoningText });
const formatted = thinking
? formatReasoningMarkdown(thinking) const fallbackAnswerText = lastAssistant
: ""; ? extractAssistantText(lastAssistant)
if (!formatted) return base;
return base ? `${formatted}\n\n${base}` : formatted;
})()
: ""; : "";
for (const text of assistantTexts.length const answerTexts = assistantTexts.length
? assistantTexts ? assistantTexts
: fallbackText : fallbackAnswerText
? [fallbackText] ? [fallbackAnswerText]
: []) { : [];
for (const text of answerTexts) {
const { text: cleanedText, mediaUrls } = splitMediaFromOutput(text); const { text: cleanedText, mediaUrls } = splitMediaFromOutput(text);
if (!cleanedText && (!mediaUrls || mediaUrls.length === 0)) if (!cleanedText && (!mediaUrls || mediaUrls.length === 0))
continue; continue;

View File

@@ -129,7 +129,7 @@ describe("subscribeEmbeddedPiSession", () => {
expect(payload.text).toBe("Hello block"); expect(payload.text).toBe("Hello block");
}); });
it("prepends reasoning before text when enabled", () => { it("emits reasoning as a separate message when enabled", () => {
let handler: ((evt: unknown) => void) | undefined; let handler: ((evt: unknown) => void) | undefined;
const session: StubSession = { const session: StubSession = {
subscribe: (fn) => { subscribe: (fn) => {
@@ -160,11 +160,11 @@ describe("subscribeEmbeddedPiSession", () => {
handler?.({ type: "message_end", message: assistantMessage }); handler?.({ type: "message_end", message: assistantMessage });
expect(onBlockReply).toHaveBeenCalledTimes(1); expect(onBlockReply).toHaveBeenCalledTimes(2);
const payload = onBlockReply.mock.calls[0][0]; expect(onBlockReply.mock.calls[0][0].text).toBe(
expect(payload.text).toBe( "Reasoning:\nBecause it helps",
"_Reasoning:_\n_Because it helps_\n\nFinal answer",
); );
expect(onBlockReply.mock.calls[1][0].text).toBe("Final answer");
}); });
it("promotes <think> tags to thinking blocks at write-time", () => { it("promotes <think> tags to thinking blocks at write-time", () => {
@@ -200,10 +200,11 @@ describe("subscribeEmbeddedPiSession", () => {
handler?.({ type: "message_end", message: assistantMessage }); handler?.({ type: "message_end", message: assistantMessage });
expect(onBlockReply).toHaveBeenCalledTimes(1); expect(onBlockReply).toHaveBeenCalledTimes(2);
expect(onBlockReply.mock.calls[0][0].text).toBe( expect(onBlockReply.mock.calls[0][0].text).toBe(
"_Reasoning:_\n_Because it helps_\n\nFinal answer", "Reasoning:\nBecause it helps",
); );
expect(onBlockReply.mock.calls[1][0].text).toBe("Final answer");
expect(assistantMessage.content).toEqual([ expect(assistantMessage.content).toEqual([
{ type: "thinking", thinking: "Because it helps" }, { type: "thinking", thinking: "Because it helps" },

View File

@@ -936,11 +936,7 @@ export function subscribeEmbeddedPiSession(params: {
const formattedReasoning = rawThinking const formattedReasoning = rawThinking
? formatReasoningMarkdown(rawThinking) ? formatReasoningMarkdown(rawThinking)
: ""; : "";
const text = includeReasoning const text = baseText;
? baseText && formattedReasoning
? `${formattedReasoning}\n\n${baseText}`
: formattedReasoning || baseText
: baseText;
const addedDuringMessage = const addedDuringMessage =
assistantTexts.length > assistantTextBaseline; assistantTexts.length > assistantTextBaseline;
@@ -953,13 +949,28 @@ export function subscribeEmbeddedPiSession(params: {
} }
assistantTextBaseline = assistantTexts.length; assistantTextBaseline = assistantTexts.length;
const onBlockReply = params.onBlockReply;
const shouldEmitReasoning =
includeReasoning &&
Boolean(formattedReasoning) &&
Boolean(onBlockReply) &&
formattedReasoning !== lastReasoningSent;
const shouldEmitReasoningBeforeAnswer =
shouldEmitReasoning &&
blockReplyBreak === "message_end" &&
!addedDuringMessage;
if (shouldEmitReasoningBeforeAnswer && formattedReasoning) {
lastReasoningSent = formattedReasoning;
void onBlockReply?.({ text: formattedReasoning });
}
if ( if (
(blockReplyBreak === "message_end" || (blockReplyBreak === "message_end" ||
(blockChunker (blockChunker
? blockChunker.hasBuffered() ? blockChunker.hasBuffered()
: blockBuffer.length > 0)) && : blockBuffer.length > 0)) &&
text && text &&
params.onBlockReply onBlockReply
) { ) {
if (blockChunker?.hasBuffered()) { if (blockChunker?.hasBuffered()) {
blockChunker.drain({ force: true, emit: emitBlockChunk }); blockChunker.drain({ force: true, emit: emitBlockChunk });
@@ -975,7 +986,7 @@ export function subscribeEmbeddedPiSession(params: {
const { text: cleanedText, mediaUrls } = const { text: cleanedText, mediaUrls } =
splitMediaFromOutput(text); splitMediaFromOutput(text);
if (cleanedText || (mediaUrls && mediaUrls.length > 0)) { if (cleanedText || (mediaUrls && mediaUrls.length > 0)) {
void params.onBlockReply({ void onBlockReply({
text: cleanedText, text: cleanedText,
mediaUrls: mediaUrls?.length ? mediaUrls : undefined, mediaUrls: mediaUrls?.length ? mediaUrls : undefined,
}); });
@@ -983,16 +994,13 @@ export function subscribeEmbeddedPiSession(params: {
} }
} }
} }
const onBlockReply = params.onBlockReply; if (
const shouldEmitReasoningBlock = shouldEmitReasoning &&
includeReasoning && !shouldEmitReasoningBeforeAnswer &&
Boolean(formattedReasoning) && formattedReasoning
Boolean(onBlockReply) && ) {
formattedReasoning !== lastReasoningSent &&
(blockReplyBreak === "text_end" || Boolean(blockChunker));
if (shouldEmitReasoningBlock && formattedReasoning && onBlockReply) {
lastReasoningSent = formattedReasoning; lastReasoningSent = formattedReasoning;
void onBlockReply({ text: formattedReasoning }); void onBlockReply?.({ text: formattedReasoning });
} }
if (streamReasoning && rawThinking) { if (streamReasoning && rawThinking) {
emitReasoningStream(rawThinking); emitReasoningStream(rawThinking);

View File

@@ -37,12 +37,7 @@ export function extractAssistantThinking(msg: AssistantMessage): string {
export function formatReasoningMarkdown(text: string): string { export function formatReasoningMarkdown(text: string): string {
const trimmed = text.trim(); const trimmed = text.trim();
if (!trimmed) return ""; if (!trimmed) return "";
const lines = trimmed.split(/\r?\n/); return `Reasoning:\n${trimmed}`;
const wrapped = lines
.map((line) => line.trim())
.map((line) => (line ? `_${line}_` : ""))
.filter((line) => line.length > 0);
return wrapped.length > 0 ? [`_Reasoning:_`, ...wrapped].join("\n") : "";
} }
export function inferToolMetaFromArgs( export function inferToolMetaFromArgs(