fix(agents): suppress partial replies with reasoning
This commit is contained in:
@@ -402,6 +402,68 @@ describe("subscribeEmbeddedPiSession", () => {
|
|||||||
expect(subscription.assistantTexts).toEqual(["Final answer"]);
|
expect(subscription.assistantTexts).toEqual(["Final answer"]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("suppresses partial replies when reasoning is enabled and block replies are disabled", () => {
|
||||||
|
let handler: ((evt: unknown) => void) | undefined;
|
||||||
|
const session: StubSession = {
|
||||||
|
subscribe: (fn) => {
|
||||||
|
handler = fn;
|
||||||
|
return () => {};
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const onPartialReply = vi.fn();
|
||||||
|
|
||||||
|
const subscription = subscribeEmbeddedPiSession({
|
||||||
|
session: session as unknown as Parameters<
|
||||||
|
typeof subscribeEmbeddedPiSession
|
||||||
|
>[0]["session"],
|
||||||
|
runId: "run",
|
||||||
|
reasoningMode: "on",
|
||||||
|
onPartialReply,
|
||||||
|
});
|
||||||
|
|
||||||
|
handler?.({ type: "message_start", message: { role: "assistant" } });
|
||||||
|
handler?.({
|
||||||
|
type: "message_update",
|
||||||
|
message: { role: "assistant" },
|
||||||
|
assistantMessageEvent: {
|
||||||
|
type: "text_delta",
|
||||||
|
delta: "Draft ",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
handler?.({
|
||||||
|
type: "message_update",
|
||||||
|
message: { role: "assistant" },
|
||||||
|
assistantMessageEvent: {
|
||||||
|
type: "text_delta",
|
||||||
|
delta: "reply",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(onPartialReply).not.toHaveBeenCalled();
|
||||||
|
|
||||||
|
const assistantMessage = {
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{ type: "thinking", thinking: "Because it helps" },
|
||||||
|
{ type: "text", text: "Final answer" },
|
||||||
|
],
|
||||||
|
} as AssistantMessage;
|
||||||
|
|
||||||
|
handler?.({ type: "message_end", message: assistantMessage });
|
||||||
|
handler?.({
|
||||||
|
type: "message_update",
|
||||||
|
message: { role: "assistant" },
|
||||||
|
assistantMessageEvent: {
|
||||||
|
type: "text_end",
|
||||||
|
content: "Draft reply",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(onPartialReply).not.toHaveBeenCalled();
|
||||||
|
expect(subscription.assistantTexts).toEqual(["Final answer"]);
|
||||||
|
});
|
||||||
|
|
||||||
it("emits block replies on text_end and does not duplicate on message_end", () => {
|
it("emits block replies on text_end and does not duplicate on message_end", () => {
|
||||||
let handler: ((evt: unknown) => void) | undefined;
|
let handler: ((evt: unknown) => void) | undefined;
|
||||||
const session: StubSession = {
|
const session: StubSession = {
|
||||||
|
|||||||
@@ -223,6 +223,7 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
const blockReplyBreak = params.blockReplyBreak ?? "text_end";
|
const blockReplyBreak = params.blockReplyBreak ?? "text_end";
|
||||||
const reasoningMode = params.reasoningMode ?? "off";
|
const reasoningMode = params.reasoningMode ?? "off";
|
||||||
const includeReasoning = reasoningMode === "on";
|
const includeReasoning = reasoningMode === "on";
|
||||||
|
const shouldEmitPartialReplies = !(includeReasoning && !params.onBlockReply);
|
||||||
const streamReasoning =
|
const streamReasoning =
|
||||||
reasoningMode === "stream" &&
|
reasoningMode === "stream" &&
|
||||||
typeof params.onReasoningStream === "function";
|
typeof params.onReasoningStream === "function";
|
||||||
@@ -241,6 +242,50 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
let compactionRetryPromise: Promise<void> | null = null;
|
let compactionRetryPromise: Promise<void> | null = null;
|
||||||
let lastReasoningSent: string | undefined;
|
let lastReasoningSent: string | undefined;
|
||||||
|
|
||||||
|
const resetAssistantMessageState = (nextAssistantTextBaseline: number) => {
|
||||||
|
deltaBuffer = "";
|
||||||
|
blockBuffer = "";
|
||||||
|
blockChunker?.reset();
|
||||||
|
blockThinkingActive = false;
|
||||||
|
lastStreamedAssistant = undefined;
|
||||||
|
lastBlockReplyText = undefined;
|
||||||
|
lastStreamedReasoning = undefined;
|
||||||
|
lastReasoningSent = undefined;
|
||||||
|
suppressBlockChunks = false;
|
||||||
|
assistantTextBaseline = nextAssistantTextBaseline;
|
||||||
|
};
|
||||||
|
|
||||||
|
const finalizeAssistantTexts = (args: {
|
||||||
|
text: string;
|
||||||
|
addedDuringMessage: boolean;
|
||||||
|
chunkerHasBuffered: boolean;
|
||||||
|
}) => {
|
||||||
|
const { text, addedDuringMessage, chunkerHasBuffered } = args;
|
||||||
|
|
||||||
|
// If we're not streaming block replies, ensure the final payload includes
|
||||||
|
// the final text even when interim streaming was enabled.
|
||||||
|
if (includeReasoning && text && !params.onBlockReply) {
|
||||||
|
if (assistantTexts.length > assistantTextBaseline) {
|
||||||
|
assistantTexts.splice(
|
||||||
|
assistantTextBaseline,
|
||||||
|
assistantTexts.length - assistantTextBaseline,
|
||||||
|
text,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
const last = assistantTexts.at(-1);
|
||||||
|
if (!last || last !== text) assistantTexts.push(text);
|
||||||
|
}
|
||||||
|
suppressBlockChunks = true;
|
||||||
|
} else if (!addedDuringMessage && !chunkerHasBuffered && text) {
|
||||||
|
// Non-streaming models (no text_delta): ensure assistantTexts gets the final
|
||||||
|
// text when the chunker has nothing buffered to drain.
|
||||||
|
const last = assistantTexts.at(-1);
|
||||||
|
if (!last || last !== text) assistantTexts.push(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
assistantTextBaseline = assistantTexts.length;
|
||||||
|
};
|
||||||
|
|
||||||
// ── Messaging tool duplicate detection ──────────────────────────────────────
|
// ── Messaging tool duplicate detection ──────────────────────────────────────
|
||||||
// Track texts sent via messaging tools to suppress duplicate block replies.
|
// Track texts sent via messaging tools to suppress duplicate block replies.
|
||||||
// Only committed (successful) texts are checked - pending texts are tracked
|
// Only committed (successful) texts are checked - pending texts are tracked
|
||||||
@@ -426,15 +471,7 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
messagingToolSentTargets.length = 0;
|
messagingToolSentTargets.length = 0;
|
||||||
pendingMessagingTexts.clear();
|
pendingMessagingTexts.clear();
|
||||||
pendingMessagingTargets.clear();
|
pendingMessagingTargets.clear();
|
||||||
deltaBuffer = "";
|
resetAssistantMessageState(0);
|
||||||
blockBuffer = "";
|
|
||||||
blockChunker?.reset();
|
|
||||||
blockThinkingActive = false;
|
|
||||||
lastStreamedAssistant = undefined;
|
|
||||||
lastStreamedReasoning = undefined;
|
|
||||||
lastBlockReplyText = undefined;
|
|
||||||
suppressBlockChunks = false;
|
|
||||||
assistantTextBaseline = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const unsubscribe = params.session.subscribe(
|
const unsubscribe = params.session.subscribe(
|
||||||
@@ -447,16 +484,7 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
// Start-of-message is a safer reset point than message_end: some providers
|
// Start-of-message is a safer reset point than message_end: some providers
|
||||||
// may deliver late text_end updates after message_end, which would
|
// may deliver late text_end updates after message_end, which would
|
||||||
// otherwise re-trigger block replies.
|
// otherwise re-trigger block replies.
|
||||||
deltaBuffer = "";
|
resetAssistantMessageState(assistantTexts.length);
|
||||||
blockBuffer = "";
|
|
||||||
blockChunker?.reset();
|
|
||||||
blockThinkingActive = false;
|
|
||||||
lastStreamedAssistant = undefined;
|
|
||||||
lastBlockReplyText = undefined;
|
|
||||||
lastStreamedReasoning = undefined;
|
|
||||||
lastReasoningSent = undefined;
|
|
||||||
suppressBlockChunks = false;
|
|
||||||
assistantTextBaseline = assistantTexts.length;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -717,7 +745,7 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
mediaUrls: mediaUrls?.length ? mediaUrls : undefined,
|
mediaUrls: mediaUrls?.length ? mediaUrls : undefined,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
if (params.onPartialReply) {
|
if (params.onPartialReply && shouldEmitPartialReplies) {
|
||||||
void params.onPartialReply({
|
void params.onPartialReply({
|
||||||
text: cleanedText,
|
text: cleanedText,
|
||||||
mediaUrls: mediaUrls?.length ? mediaUrls : undefined,
|
mediaUrls: mediaUrls?.length ? mediaUrls : undefined,
|
||||||
@@ -780,27 +808,11 @@ export function subscribeEmbeddedPiSession(params: {
|
|||||||
const addedDuringMessage =
|
const addedDuringMessage =
|
||||||
assistantTexts.length > assistantTextBaseline;
|
assistantTexts.length > assistantTextBaseline;
|
||||||
const chunkerHasBuffered = blockChunker?.hasBuffered() ?? false;
|
const chunkerHasBuffered = blockChunker?.hasBuffered() ?? false;
|
||||||
// If we're not streaming block replies, ensure the final payload
|
finalizeAssistantTexts({
|
||||||
// includes the final text even when deltas already populated assistantTexts.
|
text,
|
||||||
if (includeReasoning && text && !params.onBlockReply) {
|
addedDuringMessage,
|
||||||
if (assistantTexts.length > assistantTextBaseline) {
|
chunkerHasBuffered,
|
||||||
assistantTexts.splice(
|
});
|
||||||
assistantTextBaseline,
|
|
||||||
assistantTexts.length - assistantTextBaseline,
|
|
||||||
text,
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
const last = assistantTexts.at(-1);
|
|
||||||
if (!last || last !== text) assistantTexts.push(text);
|
|
||||||
}
|
|
||||||
suppressBlockChunks = true;
|
|
||||||
} else if (!addedDuringMessage && !chunkerHasBuffered && text) {
|
|
||||||
// Non-streaming models (no text_delta): ensure assistantTexts gets the
|
|
||||||
// final text when the chunker has nothing buffered to drain.
|
|
||||||
const last = assistantTexts.at(-1);
|
|
||||||
if (!last || last !== text) assistantTexts.push(text);
|
|
||||||
}
|
|
||||||
assistantTextBaseline = assistantTexts.length;
|
|
||||||
|
|
||||||
const onBlockReply = params.onBlockReply;
|
const onBlockReply = params.onBlockReply;
|
||||||
const shouldEmitReasoning = Boolean(
|
const shouldEmitReasoning = Boolean(
|
||||||
|
|||||||
Reference in New Issue
Block a user