refactor(agent): extract block chunker + tool adapter

This commit is contained in:
Peter Steinberger
2026-01-05 18:05:40 +00:00
parent 7c89ce93b5
commit 86ad703f53
4 changed files with 361 additions and 315 deletions

View File

@@ -0,0 +1,302 @@
export type BlockReplyChunking = {
minChars: number;
maxChars: number;
breakPreference?: "paragraph" | "newline" | "sentence";
};
type FenceSpan = {
start: number;
end: number;
openLine: string;
marker: string;
indent: string;
};
type FenceSplit = {
closeFenceLine: string;
reopenFenceLine: string;
};
type BreakResult = {
index: number;
fenceSplit?: FenceSplit;
};
export class EmbeddedBlockChunker {
#buffer = "";
readonly #chunking: BlockReplyChunking;
constructor(chunking: BlockReplyChunking) {
this.#chunking = chunking;
}
append(text: string) {
if (!text) return;
this.#buffer += text;
}
reset() {
this.#buffer = "";
}
get bufferedText() {
return this.#buffer;
}
hasBuffered(): boolean {
return this.#buffer.length > 0;
}
drain(params: { force: boolean; emit: (chunk: string) => void }) {
const { force, emit } = params;
const minChars = Math.max(1, Math.floor(this.#chunking.minChars));
const maxChars = Math.max(minChars, Math.floor(this.#chunking.maxChars));
if (this.#buffer.length < minChars && !force) return;
while (
this.#buffer.length >= minChars ||
(force && this.#buffer.length > 0)
) {
const breakResult =
force && this.#buffer.length <= maxChars
? this.#pickSoftBreakIndex(this.#buffer, 1)
: this.#pickBreakIndex(this.#buffer);
if (breakResult.index <= 0) {
if (force) {
emit(this.#buffer);
this.#buffer = "";
}
return;
}
const breakIdx = breakResult.index;
let rawChunk = this.#buffer.slice(0, breakIdx);
if (rawChunk.trim().length === 0) {
this.#buffer = stripLeadingNewlines(
this.#buffer.slice(breakIdx),
).trimStart();
continue;
}
let nextBuffer = this.#buffer.slice(breakIdx);
const fenceSplit = breakResult.fenceSplit;
if (fenceSplit) {
const closeFence = rawChunk.endsWith("\n")
? `${fenceSplit.closeFenceLine}\n`
: `\n${fenceSplit.closeFenceLine}\n`;
rawChunk = `${rawChunk}${closeFence}`;
const reopenFence = fenceSplit.reopenFenceLine.endsWith("\n")
? fenceSplit.reopenFenceLine
: `${fenceSplit.reopenFenceLine}\n`;
nextBuffer = `${reopenFence}${nextBuffer}`;
}
emit(rawChunk);
if (fenceSplit) {
this.#buffer = nextBuffer;
} else {
const nextStart =
breakIdx < this.#buffer.length && /\s/.test(this.#buffer[breakIdx])
? breakIdx + 1
: breakIdx;
this.#buffer = stripLeadingNewlines(this.#buffer.slice(nextStart));
}
if (this.#buffer.length < minChars && !force) return;
if (this.#buffer.length < maxChars && !force) return;
}
}
#pickSoftBreakIndex(buffer: string, minCharsOverride?: number): BreakResult {
const minChars = Math.max(
1,
Math.floor(minCharsOverride ?? this.#chunking.minChars),
);
if (buffer.length < minChars) return { index: -1 };
const fenceSpans = parseFenceSpans(buffer);
const preference = this.#chunking.breakPreference ?? "paragraph";
if (preference === "paragraph") {
let paragraphIdx = buffer.indexOf("\n\n");
while (paragraphIdx !== -1) {
if (paragraphIdx >= minChars && isSafeBreak(fenceSpans, paragraphIdx)) {
return { index: paragraphIdx };
}
paragraphIdx = buffer.indexOf("\n\n", paragraphIdx + 2);
}
}
if (preference === "paragraph" || preference === "newline") {
let newlineIdx = buffer.indexOf("\n");
while (newlineIdx !== -1) {
if (newlineIdx >= minChars && isSafeBreak(fenceSpans, newlineIdx)) {
return { index: newlineIdx };
}
newlineIdx = buffer.indexOf("\n", newlineIdx + 1);
}
}
if (preference !== "newline") {
const matches = buffer.matchAll(/[.!?](?=\s|$)/g);
let sentenceIdx = -1;
for (const match of matches) {
const at = match.index ?? -1;
if (at < minChars) continue;
const candidate = at + 1;
if (isSafeBreak(fenceSpans, candidate)) {
sentenceIdx = candidate;
}
}
if (sentenceIdx >= minChars) return { index: sentenceIdx };
}
return { index: -1 };
}
#pickBreakIndex(buffer: string): BreakResult {
const minChars = Math.max(1, Math.floor(this.#chunking.minChars));
const maxChars = Math.max(minChars, Math.floor(this.#chunking.maxChars));
if (buffer.length < minChars) return { index: -1 };
const window = buffer.slice(0, Math.min(maxChars, buffer.length));
const fenceSpans = parseFenceSpans(buffer);
const preference = this.#chunking.breakPreference ?? "paragraph";
if (preference === "paragraph") {
let paragraphIdx = window.lastIndexOf("\n\n");
while (paragraphIdx >= minChars) {
if (isSafeBreak(fenceSpans, paragraphIdx)) {
return { index: paragraphIdx };
}
paragraphIdx = window.lastIndexOf("\n\n", paragraphIdx - 1);
}
}
if (preference === "paragraph" || preference === "newline") {
let newlineIdx = window.lastIndexOf("\n");
while (newlineIdx >= minChars) {
if (isSafeBreak(fenceSpans, newlineIdx)) {
return { index: newlineIdx };
}
newlineIdx = window.lastIndexOf("\n", newlineIdx - 1);
}
}
if (preference !== "newline") {
const matches = window.matchAll(/[.!?](?=\s|$)/g);
let sentenceIdx = -1;
for (const match of matches) {
const at = match.index ?? -1;
if (at < minChars) continue;
const candidate = at + 1;
if (isSafeBreak(fenceSpans, candidate)) {
sentenceIdx = candidate;
}
}
if (sentenceIdx >= minChars) return { index: sentenceIdx };
}
for (let i = window.length - 1; i >= minChars; i--) {
if (/\s/.test(window[i]) && isSafeBreak(fenceSpans, i)) {
return { index: i };
}
}
if (buffer.length >= maxChars) {
if (isSafeBreak(fenceSpans, maxChars)) return { index: maxChars };
const fence = findFenceSpanAt(fenceSpans, maxChars);
if (fence) {
return {
index: maxChars,
fenceSplit: {
closeFenceLine: `${fence.indent}${fence.marker}`,
reopenFenceLine: fence.openLine,
},
};
}
return { index: maxChars };
}
return { index: -1 };
}
}
function stripLeadingNewlines(value: string): string {
let i = 0;
while (i < value.length && value[i] === "\n") i++;
return i > 0 ? value.slice(i) : value;
}
function parseFenceSpans(buffer: string): FenceSpan[] {
const spans: FenceSpan[] = [];
let open:
| {
start: number;
markerChar: string;
markerLen: number;
openLine: string;
marker: string;
indent: string;
}
| undefined;
let offset = 0;
while (offset <= buffer.length) {
const nextNewline = buffer.indexOf("\n", offset);
const lineEnd = nextNewline === -1 ? buffer.length : nextNewline;
const line = buffer.slice(offset, lineEnd);
const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/);
if (match) {
const indent = match[1];
const marker = match[2];
const markerChar = marker[0];
const markerLen = marker.length;
if (!open) {
open = {
start: offset,
markerChar,
markerLen,
openLine: line,
marker,
indent,
};
} else if (
open.markerChar === markerChar &&
markerLen >= open.markerLen
) {
const end = nextNewline === -1 ? buffer.length : nextNewline + 1;
spans.push({
start: open.start,
end,
openLine: open.openLine,
marker: open.marker,
indent: open.indent,
});
open = undefined;
}
}
if (nextNewline === -1) break;
offset = nextNewline + 1;
}
if (open) {
spans.push({
start: open.start,
end: buffer.length,
openLine: open.openLine,
marker: open.marker,
indent: open.indent,
});
}
return spans;
}
function findFenceSpanAt(
spans: FenceSpan[],
index: number,
): FenceSpan | undefined {
return spans.find((span) => index > span.start && index < span.end);
}
function isSafeBreak(spans: FenceSpan[], index: number): boolean {
return !findFenceSpanAt(spans, index);
}

View File

@@ -1,12 +1,7 @@
import fs from "node:fs/promises";
import os from "node:os";
import type {
AgentMessage,
AgentToolResult,
AgentToolUpdateCallback,
ThinkingLevel,
} from "@mariozechner/pi-agent-core";
import type { AgentMessage, ThinkingLevel } from "@mariozechner/pi-agent-core";
import type { Api, AssistantMessage, Model } from "@mariozechner/pi-ai";
import {
buildSystemPrompt,
@@ -16,7 +11,6 @@ import {
SessionManager,
SettingsManager,
type Skill,
type ToolDefinition,
} from "@mariozechner/pi-coding-agent";
import type { ThinkLevel, VerboseLevel } from "../auto-reply/thinking.js";
import { formatToolAggregate } from "../auto-reply/tool-meta.js";
@@ -45,6 +39,7 @@ import {
subscribeEmbeddedPiSession,
} from "./pi-embedded-subscribe.js";
import { extractAssistantText } from "./pi-embedded-utils.js";
import { toToolDefinitions } from "./pi-tool-definition-adapter.js";
import { createClawdbotCodingTools } from "./pi-tools.js";
import { resolveSandboxContext } from "./sandbox.js";
import {
@@ -58,35 +53,6 @@ import {
import { buildAgentSystemPromptAppend } from "./system-prompt.js";
import { loadWorkspaceBootstrapFiles } from "./workspace.js";
function toToolDefinitions(tools: { execute: unknown }[]): ToolDefinition[] {
return tools.map((tool) => {
const record = tool as {
name?: unknown;
label?: unknown;
description?: unknown;
parameters?: unknown;
execute: (
toolCallId: string,
params: unknown,
signal?: AbortSignal,
onUpdate?: AgentToolUpdateCallback<unknown>,
) => Promise<AgentToolResult<unknown>>;
};
const name = typeof record.name === "string" ? record.name : "tool";
return {
name,
label: typeof record.label === "string" ? record.label : name,
description:
typeof record.description === "string" ? record.description : "",
// biome-ignore lint/suspicious/noExplicitAny: TypeBox schema from pi-agent-core uses a different module instance.
parameters: record.parameters as any,
execute: async (toolCallId, params, onUpdate, _ctx, signal) => {
return await record.execute(toolCallId, params, signal, onUpdate);
},
} satisfies ToolDefinition;
});
}
export type EmbeddedPiAgentMeta = {
sessionId: string;
provider: string;

View File

@@ -6,6 +6,8 @@ import { formatToolAggregate } from "../auto-reply/tool-meta.js";
import { emitAgentEvent } from "../infra/agent-events.js";
import { createSubsystemLogger } from "../logging.js";
import { splitMediaFromOutput } from "../media/parse.js";
import type { BlockReplyChunking } from "./pi-embedded-block-chunker.js";
import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";
import {
extractAssistantText,
inferToolMetaFromArgs,
@@ -17,11 +19,7 @@ const THINKING_CLOSE_RE = /<\s*\/\s*think(?:ing)?\s*>/i;
const TOOL_RESULT_MAX_CHARS = 8000;
const log = createSubsystemLogger("agent/embedded");
export type BlockReplyChunking = {
minChars: number;
maxChars: number;
breakPreference?: "paragraph" | "newline" | "sentence";
};
export type { BlockReplyChunking } from "./pi-embedded-block-chunker.js";
function truncateToolText(text: string): string {
if (text.length <= TOOL_RESULT_MAX_CHARS) return text;
@@ -176,6 +174,9 @@ export function subscribeEmbeddedPiSession(params: {
};
const blockChunking = params.blockReplyChunking;
const blockChunker = blockChunking
? new EmbeddedBlockChunker(blockChunking)
: null;
const shouldEmitToolResult = () =>
typeof params.shouldEmitToolResult === "function"
? params.shouldEmitToolResult()
@@ -195,217 +196,6 @@ export function subscribeEmbeddedPiSession(params: {
}
};
type FenceSpan = {
start: number;
end: number;
openLine: string;
marker: string;
indent: string;
};
type FenceSplit = {
closeFenceLine: string;
reopenFenceLine: string;
};
type BreakResult = {
index: number;
fenceSplit?: FenceSplit;
};
const parseFenceSpans = (buffer: string): FenceSpan[] => {
const spans: FenceSpan[] = [];
let open:
| {
start: number;
markerChar: string;
markerLen: number;
openLine: string;
marker: string;
indent: string;
}
| undefined;
let offset = 0;
while (offset <= buffer.length) {
const nextNewline = buffer.indexOf("\n", offset);
const lineEnd = nextNewline === -1 ? buffer.length : nextNewline;
const line = buffer.slice(offset, lineEnd);
const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/);
if (match) {
const indent = match[1];
const marker = match[2];
const markerChar = marker[0];
const markerLen = marker.length;
if (!open) {
open = {
start: offset,
markerChar,
markerLen,
openLine: line,
marker,
indent,
};
} else if (
open.markerChar === markerChar &&
markerLen >= open.markerLen
) {
const end = nextNewline === -1 ? buffer.length : nextNewline + 1;
spans.push({
start: open.start,
end,
openLine: open.openLine,
marker: open.marker,
indent: open.indent,
});
open = undefined;
}
}
if (nextNewline === -1) break;
offset = nextNewline + 1;
}
if (open) {
spans.push({
start: open.start,
end: buffer.length,
openLine: open.openLine,
marker: open.marker,
indent: open.indent,
});
}
return spans;
};
const findFenceSpanAt = (
spans: FenceSpan[],
index: number,
): FenceSpan | undefined =>
spans.find((span) => index > span.start && index < span.end);
const isSafeBreak = (spans: FenceSpan[], index: number): boolean =>
!findFenceSpanAt(spans, index);
const stripLeadingNewlines = (value: string): string => {
let i = 0;
while (i < value.length && value[i] === "\n") i++;
return i > 0 ? value.slice(i) : value;
};
const pickSoftBreakIndex = (
buffer: string,
minCharsOverride?: number,
): BreakResult => {
if (!blockChunking) return { index: -1 };
const minChars = Math.max(
1,
Math.floor(minCharsOverride ?? blockChunking.minChars),
);
if (buffer.length < minChars) return { index: -1 };
const fenceSpans = parseFenceSpans(buffer);
const preference = blockChunking.breakPreference ?? "paragraph";
if (preference === "paragraph") {
let paragraphIdx = buffer.indexOf("\n\n");
while (paragraphIdx !== -1) {
if (paragraphIdx >= minChars && isSafeBreak(fenceSpans, paragraphIdx)) {
return { index: paragraphIdx };
}
paragraphIdx = buffer.indexOf("\n\n", paragraphIdx + 2);
}
}
if (preference === "paragraph" || preference === "newline") {
let newlineIdx = buffer.indexOf("\n");
while (newlineIdx !== -1) {
if (newlineIdx >= minChars && isSafeBreak(fenceSpans, newlineIdx)) {
return { index: newlineIdx };
}
newlineIdx = buffer.indexOf("\n", newlineIdx + 1);
}
}
if (preference !== "newline") {
const matches = buffer.matchAll(/[.!?](?=\s|$)/g);
let sentenceIdx = -1;
for (const match of matches) {
const at = match.index ?? -1;
if (at < minChars) continue;
const candidate = at + 1;
if (isSafeBreak(fenceSpans, candidate)) {
sentenceIdx = candidate;
}
}
if (sentenceIdx >= minChars) return { index: sentenceIdx };
}
return { index: -1 };
};
const pickBreakIndex = (buffer: string): BreakResult => {
if (!blockChunking) return { index: -1 };
const minChars = Math.max(1, Math.floor(blockChunking.minChars));
const maxChars = Math.max(minChars, Math.floor(blockChunking.maxChars));
if (buffer.length < minChars) return { index: -1 };
const window = buffer.slice(0, Math.min(maxChars, buffer.length));
const fenceSpans = parseFenceSpans(buffer);
const preference = blockChunking.breakPreference ?? "paragraph";
if (preference === "paragraph") {
let paragraphIdx = window.lastIndexOf("\n\n");
while (paragraphIdx >= minChars) {
if (isSafeBreak(fenceSpans, paragraphIdx)) {
return { index: paragraphIdx };
}
paragraphIdx = window.lastIndexOf("\n\n", paragraphIdx - 1);
}
}
if (preference === "paragraph" || preference === "newline") {
let newlineIdx = window.lastIndexOf("\n");
while (newlineIdx >= minChars) {
if (isSafeBreak(fenceSpans, newlineIdx)) {
return { index: newlineIdx };
}
newlineIdx = window.lastIndexOf("\n", newlineIdx - 1);
}
}
if (preference !== "newline") {
const matches = window.matchAll(/[.!?](?=\s|$)/g);
let sentenceIdx = -1;
for (const match of matches) {
const at = match.index ?? -1;
if (at < minChars) continue;
const candidate = at + 1;
if (isSafeBreak(fenceSpans, candidate)) {
sentenceIdx = candidate;
}
}
if (sentenceIdx >= minChars) return { index: sentenceIdx };
}
for (let i = window.length - 1; i >= minChars; i--) {
if (/\s/.test(window[i]) && isSafeBreak(fenceSpans, i)) {
return { index: i };
}
}
if (buffer.length >= maxChars) {
if (isSafeBreak(fenceSpans, maxChars)) return { index: maxChars };
const fence = findFenceSpanAt(fenceSpans, maxChars);
if (fence) {
return {
index: maxChars,
fenceSplit: {
closeFenceLine: `${fence.indent}${fence.marker}`,
reopenFenceLine: fence.openLine,
},
};
}
return { index: maxChars };
}
return { index: -1 };
};
const emitBlockChunk = (text: string) => {
// Strip any <thinking> tags that may have leaked into the output (e.g., from Gemini mimicking history)
const strippedText = stripThinkingSegments(stripUnpairedThinkingTags(text));
@@ -423,61 +213,6 @@ export function subscribeEmbeddedPiSession(params: {
});
};
const drainBlockBuffer = (force: boolean) => {
if (!blockChunking) return;
const minChars = Math.max(1, Math.floor(blockChunking.minChars));
const maxChars = Math.max(minChars, Math.floor(blockChunking.maxChars));
if (blockBuffer.length < minChars && !force) return;
while (
blockBuffer.length >= minChars ||
(force && blockBuffer.length > 0)
) {
const breakResult =
force && blockBuffer.length <= maxChars
? pickSoftBreakIndex(blockBuffer, 1)
: pickBreakIndex(blockBuffer);
if (breakResult.index <= 0) {
if (force) {
emitBlockChunk(blockBuffer);
blockBuffer = "";
}
return;
}
const breakIdx = breakResult.index;
let rawChunk = blockBuffer.slice(0, breakIdx);
if (rawChunk.trim().length === 0) {
blockBuffer = stripLeadingNewlines(
blockBuffer.slice(breakIdx),
).trimStart();
continue;
}
let nextBuffer = blockBuffer.slice(breakIdx);
const fenceSplit = breakResult.fenceSplit;
if (fenceSplit) {
const closeFence = rawChunk.endsWith("\n")
? `${fenceSplit.closeFenceLine}\n`
: `\n${fenceSplit.closeFenceLine}\n`;
rawChunk = `${rawChunk}${closeFence}`;
const reopenFence = fenceSplit.reopenFenceLine.endsWith("\n")
? fenceSplit.reopenFenceLine
: `${fenceSplit.reopenFenceLine}\n`;
nextBuffer = `${reopenFence}${nextBuffer}`;
}
emitBlockChunk(rawChunk);
if (fenceSplit) {
blockBuffer = nextBuffer;
} else {
const nextStart =
breakIdx < blockBuffer.length && /\s/.test(blockBuffer[breakIdx])
? breakIdx + 1
: breakIdx;
blockBuffer = stripLeadingNewlines(blockBuffer.slice(nextStart));
}
if (blockBuffer.length < minChars && !force) return;
if (blockBuffer.length < maxChars && !force) return;
}
};
const resetForCompactionRetry = () => {
assistantTexts.length = 0;
toolMetas.length = 0;
@@ -485,6 +220,7 @@ export function subscribeEmbeddedPiSession(params: {
toolSummaryById.clear();
deltaBuffer = "";
blockBuffer = "";
blockChunker?.reset();
lastStreamedAssistant = undefined;
lastBlockReplyText = undefined;
assistantTextBaseline = 0;
@@ -500,6 +236,7 @@ export function subscribeEmbeddedPiSession(params: {
// otherwise re-trigger block replies.
deltaBuffer = "";
blockBuffer = "";
blockChunker?.reset();
lastStreamedAssistant = undefined;
lastBlockReplyText = undefined;
assistantTextBaseline = assistantTexts.length;
@@ -662,7 +399,11 @@ export function subscribeEmbeddedPiSession(params: {
}
if (chunk) {
deltaBuffer += chunk;
blockBuffer += chunk;
if (blockChunker) {
blockChunker.append(chunk);
} else {
blockBuffer += chunk;
}
}
const cleaned = params.enforceFinalTag
@@ -703,16 +444,17 @@ export function subscribeEmbeddedPiSession(params: {
blockChunking &&
blockReplyBreak === "text_end"
) {
drainBlockBuffer(false);
blockChunker?.drain({ force: false, emit: emitBlockChunk });
}
if (evtType === "text_end" && blockReplyBreak === "text_end") {
if (blockChunking && blockBuffer.length > 0) {
drainBlockBuffer(true);
if (blockChunker?.hasBuffered()) {
blockChunker.drain({ force: true, emit: emitBlockChunk });
blockChunker.reset();
} else if (blockBuffer.length > 0) {
emitBlockChunk(blockBuffer);
blockBuffer = "";
}
blockBuffer = "";
}
}
}
@@ -745,12 +487,16 @@ export function subscribeEmbeddedPiSession(params: {
assistantTextBaseline = assistantTexts.length;
if (
(blockReplyBreak === "message_end" || blockBuffer.length > 0) &&
(blockReplyBreak === "message_end" ||
(blockChunker
? blockChunker.hasBuffered()
: blockBuffer.length > 0)) &&
text &&
params.onBlockReply
) {
if (blockChunking && blockBuffer.length > 0) {
drainBlockBuffer(true);
if (blockChunker?.hasBuffered()) {
blockChunker.drain({ force: true, emit: emitBlockChunk });
blockChunker.reset();
} else if (text !== lastBlockReplyText) {
lastBlockReplyText = text;
const { text: cleanedText, mediaUrls } =
@@ -765,6 +511,7 @@ export function subscribeEmbeddedPiSession(params: {
}
deltaBuffer = "";
blockBuffer = "";
blockChunker?.reset();
lastStreamedAssistant = undefined;
}
}

View File

@@ -0,0 +1,31 @@
import type {
AgentTool,
AgentToolResult,
AgentToolUpdateCallback,
} from "@mariozechner/pi-agent-core";
import type { ToolDefinition } from "@mariozechner/pi-coding-agent";
// biome-ignore lint/suspicious/noExplicitAny: TypeBox schema type from pi-agent-core uses a different module instance.
type AnyAgentTool = AgentTool<any, unknown>;
export function toToolDefinitions(tools: AnyAgentTool[]): ToolDefinition[] {
return tools.map((tool) => {
const name = tool.name || "tool";
return {
name,
label: tool.label ?? name,
description: tool.description ?? "",
// biome-ignore lint/suspicious/noExplicitAny: TypeBox schema from pi-agent-core uses a different module instance.
parameters: tool.parameters as any,
execute: async (
toolCallId,
params,
onUpdate: AgentToolUpdateCallback<unknown> | undefined,
_ctx,
signal,
): Promise<AgentToolResult<unknown>> => {
return tool.execute(toolCallId, params, signal, onUpdate);
},
} satisfies ToolDefinition;
});
}