fix: preserve fenced markdown in block streaming

This commit is contained in:
Peter Steinberger
2026-01-05 17:53:45 +01:00
parent 234059811c
commit 55e4e76d43
3 changed files with 265 additions and 41 deletions

View File

@@ -20,6 +20,7 @@
- Control UI: show a reading indicator bubble while the assistant is responding. - Control UI: show a reading indicator bubble while the assistant is responding.
- Status: show runtime (docker/direct) and move shortcuts to `/help`. - Status: show runtime (docker/direct) and move shortcuts to `/help`.
- Status: show model auth source (api-key/oauth). - Status: show model auth source (api-key/oauth).
- Block streaming: avoid splitting Markdown fenced blocks and reopen fences when forced to split.
### Maintenance ### Maintenance
- Deps: bump pi-* stack, Slack SDK, discord-api-types, file-type, zod, and Biome. - Deps: bump pi-* stack, Slack SDK, discord-api-types, file-type, zod, and Biome.

View File

@@ -633,6 +633,109 @@ describe("subscribeEmbeddedPiSession", () => {
]); ]);
}); });
it("avoids splitting inside fenced code blocks", () => {
let handler: ((evt: unknown) => void) | undefined;
const session: StubSession = {
subscribe: (fn) => {
handler = fn;
return () => {};
},
};
const onBlockReply = vi.fn();
subscribeEmbeddedPiSession({
session: session as unknown as Parameters<
typeof subscribeEmbeddedPiSession
>[0]["session"],
runId: "run",
onBlockReply,
blockReplyBreak: "message_end",
blockReplyChunking: {
minChars: 5,
maxChars: 50,
breakPreference: "paragraph",
},
});
const text = "Intro\n\n```bash\nline1\nline2\n```\n\nOutro";
handler?.({
type: "message_update",
message: { role: "assistant" },
assistantMessageEvent: {
type: "text_delta",
delta: text,
},
});
const assistantMessage = {
role: "assistant",
content: [{ type: "text", text }],
} as AssistantMessage;
handler?.({ type: "message_end", message: assistantMessage });
expect(onBlockReply).toHaveBeenCalledTimes(3);
expect(onBlockReply.mock.calls[0][0].text).toBe("Intro");
expect(onBlockReply.mock.calls[1][0].text).toBe(
"```bash\nline1\nline2\n```",
);
expect(onBlockReply.mock.calls[2][0].text).toBe("Outro");
});
it("reopens fenced blocks when splitting inside them", () => {
let handler: ((evt: unknown) => void) | undefined;
const session: StubSession = {
subscribe: (fn) => {
handler = fn;
return () => {};
},
};
const onBlockReply = vi.fn();
subscribeEmbeddedPiSession({
session: session as unknown as Parameters<
typeof subscribeEmbeddedPiSession
>[0]["session"],
runId: "run",
onBlockReply,
blockReplyBreak: "message_end",
blockReplyChunking: {
minChars: 10,
maxChars: 30,
breakPreference: "paragraph",
},
});
const text = `\`\`\`txt\n${"a".repeat(80)}\n\`\`\``;
handler?.({
type: "message_update",
message: { role: "assistant" },
assistantMessageEvent: {
type: "text_delta",
delta: text,
},
});
const assistantMessage = {
role: "assistant",
content: [{ type: "text", text }],
} as AssistantMessage;
handler?.({ type: "message_end", message: assistantMessage });
expect(onBlockReply.mock.calls.length).toBeGreaterThan(1);
for (const call of onBlockReply.mock.calls) {
const chunk = call[0].text as string;
expect(chunk.startsWith("```txt")).toBe(true);
const fenceCount = chunk.match(/```/g)?.length ?? 0;
expect(fenceCount).toBeGreaterThanOrEqual(2);
}
});
it("waits for auto-compaction retry and clears buffered text", async () => { it("waits for auto-compaction retry and clears buffered text", async () => {
const listeners: SessionEventHandler[] = []; const listeners: SessionEventHandler[] = [];
const session = { const session = {

View File

@@ -195,56 +195,159 @@ export function subscribeEmbeddedPiSession(params: {
} }
}; };
const findSentenceBreak = (window: string, minChars: number): number => { type FenceSpan = {
if (!window) return -1; start: number;
const matches = window.matchAll(/[.!?](?=\s|$)/g); end: number;
let idx = -1; openLine: string;
for (const match of matches) { marker: string;
const at = match.index ?? -1; indent: string;
if (at < minChars) continue;
idx = at + 1;
}
return idx;
}; };
const findWhitespaceBreak = (window: string, minChars: number): number => { type FenceSplit = {
for (let i = window.length - 1; i >= minChars; i--) { closeFenceLine: string;
if (/\s/.test(window[i])) return i; reopenFenceLine: string;
}
return -1;
}; };
const pickBreakIndex = (buffer: string): number => { type BreakResult = {
if (!blockChunking) return -1; index: number;
fenceSplit?: FenceSplit;
};
const parseFenceSpans = (buffer: string): FenceSpan[] => {
const spans: FenceSpan[] = [];
let open:
| {
start: number;
markerChar: string;
markerLen: number;
openLine: string;
marker: string;
indent: string;
}
| undefined;
let offset = 0;
while (offset <= buffer.length) {
const nextNewline = buffer.indexOf("\n", offset);
const lineEnd = nextNewline === -1 ? buffer.length : nextNewline;
const line = buffer.slice(offset, lineEnd);
const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/);
if (match) {
const indent = match[1];
const marker = match[2];
const markerChar = marker[0];
const markerLen = marker.length;
if (!open) {
open = {
start: offset,
markerChar,
markerLen,
openLine: line,
marker,
indent,
};
} else if (
open.markerChar === markerChar &&
markerLen >= open.markerLen
) {
const end = nextNewline === -1 ? buffer.length : nextNewline + 1;
spans.push({
start: open.start,
end,
openLine: open.openLine,
marker: open.marker,
indent: open.indent,
});
open = undefined;
}
}
if (nextNewline === -1) break;
offset = nextNewline + 1;
}
if (open) {
spans.push({
start: open.start,
end: buffer.length,
openLine: open.openLine,
marker: open.marker,
indent: open.indent,
});
}
return spans;
};
const findFenceSpanAt = (
spans: FenceSpan[],
index: number,
): FenceSpan | undefined =>
spans.find((span) => index > span.start && index < span.end);
const isSafeBreak = (spans: FenceSpan[], index: number): boolean =>
!findFenceSpanAt(spans, index);
const pickBreakIndex = (buffer: string): BreakResult => {
if (!blockChunking) return { index: -1 };
const minChars = Math.max(1, Math.floor(blockChunking.minChars)); const minChars = Math.max(1, Math.floor(blockChunking.minChars));
const maxChars = Math.max(minChars, Math.floor(blockChunking.maxChars)); const maxChars = Math.max(minChars, Math.floor(blockChunking.maxChars));
if (buffer.length < minChars) return -1; if (buffer.length < minChars) return { index: -1 };
const window = buffer.slice(0, Math.min(maxChars, buffer.length)); const window = buffer.slice(0, Math.min(maxChars, buffer.length));
const fenceSpans = parseFenceSpans(buffer);
const preference = blockChunking.breakPreference ?? "paragraph"; const preference = blockChunking.breakPreference ?? "paragraph";
const paragraphIdx = window.lastIndexOf("\n\n"); if (preference === "paragraph") {
if (preference === "paragraph" && paragraphIdx >= minChars) { let paragraphIdx = window.lastIndexOf("\n\n");
return paragraphIdx; while (paragraphIdx >= minChars) {
if (isSafeBreak(fenceSpans, paragraphIdx)) {
return { index: paragraphIdx };
}
paragraphIdx = window.lastIndexOf("\n\n", paragraphIdx - 1);
}
} }
const newlineIdx = window.lastIndexOf("\n"); if (preference === "paragraph" || preference === "newline") {
if ( let newlineIdx = window.lastIndexOf("\n");
(preference === "paragraph" || preference === "newline") && while (newlineIdx >= minChars) {
newlineIdx >= minChars if (isSafeBreak(fenceSpans, newlineIdx)) {
) { return { index: newlineIdx };
return newlineIdx; }
newlineIdx = window.lastIndexOf("\n", newlineIdx - 1);
}
} }
if (preference !== "newline") { if (preference !== "newline") {
const sentenceIdx = findSentenceBreak(window, minChars); const matches = window.matchAll(/[.!?](?=\s|$)/g);
if (sentenceIdx >= minChars) return sentenceIdx; let sentenceIdx = -1;
for (const match of matches) {
const at = match.index ?? -1;
if (at < minChars) continue;
const candidate = at + 1;
if (isSafeBreak(fenceSpans, candidate)) {
sentenceIdx = candidate;
}
}
if (sentenceIdx >= minChars) return { index: sentenceIdx };
} }
const whitespaceIdx = findWhitespaceBreak(window, minChars); for (let i = window.length - 1; i >= minChars; i--) {
if (whitespaceIdx >= minChars) return whitespaceIdx; if (/\s/.test(window[i]) && isSafeBreak(fenceSpans, i)) {
return { index: i };
}
}
if (buffer.length >= maxChars) return maxChars; if (buffer.length >= maxChars) {
return -1; if (isSafeBreak(fenceSpans, maxChars)) return { index: maxChars };
const fence = findFenceSpanAt(fenceSpans, maxChars);
if (fence) {
return {
index: maxChars,
fenceSplit: {
closeFenceLine: `${fence.indent}${fence.marker}`,
reopenFenceLine: fence.openLine,
},
};
}
return { index: maxChars };
}
return { index: -1 };
}; };
const emitBlockChunk = (text: string) => { const emitBlockChunk = (text: string) => {
@@ -279,25 +382,42 @@ export function subscribeEmbeddedPiSession(params: {
blockBuffer.length >= minChars || blockBuffer.length >= minChars ||
(force && blockBuffer.length > 0) (force && blockBuffer.length > 0)
) { ) {
const breakIdx = pickBreakIndex(blockBuffer); const breakResult = pickBreakIndex(blockBuffer);
if (breakIdx <= 0) { if (breakResult.index <= 0) {
if (force) { if (force) {
emitBlockChunk(blockBuffer); emitBlockChunk(blockBuffer);
blockBuffer = ""; blockBuffer = "";
} }
return; return;
} }
const rawChunk = blockBuffer.slice(0, breakIdx); const breakIdx = breakResult.index;
let rawChunk = blockBuffer.slice(0, breakIdx);
if (rawChunk.trim().length === 0) { if (rawChunk.trim().length === 0) {
blockBuffer = blockBuffer.slice(breakIdx).trimStart(); blockBuffer = blockBuffer.slice(breakIdx).trimStart();
continue; continue;
} }
let nextBuffer = blockBuffer.slice(breakIdx);
const fenceSplit = breakResult.fenceSplit;
if (fenceSplit) {
const closeFence = rawChunk.endsWith("\n")
? `${fenceSplit.closeFenceLine}\n`
: `\n${fenceSplit.closeFenceLine}\n`;
rawChunk = `${rawChunk}${closeFence}`;
const reopenFence = fenceSplit.reopenFenceLine.endsWith("\n")
? fenceSplit.reopenFenceLine
: `${fenceSplit.reopenFenceLine}\n`;
nextBuffer = `${reopenFence}${nextBuffer}`;
}
emitBlockChunk(rawChunk); emitBlockChunk(rawChunk);
const nextStart = if (fenceSplit) {
breakIdx < blockBuffer.length && /\s/.test(blockBuffer[breakIdx]) blockBuffer = nextBuffer;
? breakIdx + 1 } else {
: breakIdx; const nextStart =
blockBuffer = blockBuffer.slice(nextStart).trimStart(); breakIdx < blockBuffer.length && /\s/.test(blockBuffer[breakIdx])
? breakIdx + 1
: breakIdx;
blockBuffer = blockBuffer.slice(nextStart).trimStart();
}
if (blockBuffer.length < minChars && !force) return; if (blockBuffer.length < minChars && !force) return;
if (blockBuffer.length < maxChars && !force) return; if (blockBuffer.length < maxChars && !force) return;
} }