fix(agents): harden tool transcript repair
This commit is contained in:
@@ -50,6 +50,8 @@
|
||||
- Telegram: serialize media-group processing to avoid missed albums under load.
|
||||
- Signal: handle `dataMessage.reaction` events (signal-cli SSE) to avoid broken attachment errors. (#637) — thanks @neist.
|
||||
- Docs: showcase entries for ParentPay, R2 Upload, iOS TestFlight, and Oura Health. (#650) — thanks @henrino3.
|
||||
- Agents: repair session transcripts by dropping duplicate tool results across the whole history (unblocks Anthropic-compatible APIs after retries).
|
||||
- Tests/Live: reset the gateway session between model runs to avoid cross-provider transcript incompatibilities (notably OpenAI Responses reasoning replay rules).
|
||||
## 2026.1.9
|
||||
|
||||
### Highlights
|
||||
|
||||
@@ -642,6 +642,40 @@ describe("sanitizeToolUseResultPairing", () => {
|
||||
const out = sanitizeToolUseResultPairing(input);
|
||||
expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("drops duplicate tool results for the same id across the transcript", () => {
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_1",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "first" }],
|
||||
isError: false,
|
||||
},
|
||||
{ role: "assistant", content: [{ type: "text", text: "ok" }] },
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_1",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "second (duplicate)" }],
|
||||
isError: false,
|
||||
},
|
||||
] satisfies AgentMessage[];
|
||||
|
||||
const out = sanitizeToolUseResultPairing(input);
|
||||
const results = out.filter((m) => m.role === "toolResult") as Array<{
|
||||
toolCallId?: string;
|
||||
content?: unknown;
|
||||
}>;
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0]?.toolCallId).toBe("call_1");
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizeTextForComparison", () => {
|
||||
|
||||
@@ -286,8 +286,18 @@ export function sanitizeToolUseResultPairing(
|
||||
// displaced (e.g. after user turns) or duplicated. Repair by:
|
||||
// - moving matching toolResult messages directly after their assistant toolCall turn
|
||||
// - inserting synthetic error toolResults for missing ids
|
||||
// - dropping duplicate toolResults for the same id within the span
|
||||
// - dropping duplicate toolResults for the same id (anywhere in the transcript)
|
||||
const out: AgentMessage[] = [];
|
||||
const seenToolResultIds = new Set<string>();
|
||||
|
||||
const pushToolResult = (
|
||||
msg: Extract<AgentMessage, { role: "toolResult" }>,
|
||||
) => {
|
||||
const id = extractToolResultId(msg);
|
||||
if (id && seenToolResultIds.has(id)) return;
|
||||
if (id) seenToolResultIds.add(id);
|
||||
out.push(msg);
|
||||
};
|
||||
|
||||
for (let i = 0; i < messages.length; i += 1) {
|
||||
const msg = messages[i] as AgentMessage;
|
||||
@@ -298,7 +308,11 @@ export function sanitizeToolUseResultPairing(
|
||||
|
||||
const role = (msg as { role?: unknown }).role;
|
||||
if (role !== "assistant") {
|
||||
out.push(msg);
|
||||
if (role === "toolResult") {
|
||||
pushToolResult(msg as Extract<AgentMessage, { role: "toolResult" }>);
|
||||
} else {
|
||||
out.push(msg);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -335,6 +349,9 @@ export function sanitizeToolUseResultPairing(
|
||||
>;
|
||||
const id = extractToolResultId(toolResult);
|
||||
if (id && toolCallIds.has(id)) {
|
||||
if (seenToolResultIds.has(id)) {
|
||||
continue;
|
||||
}
|
||||
if (!spanResultsById.has(id)) {
|
||||
spanResultsById.set(id, toolResult);
|
||||
}
|
||||
@@ -349,13 +366,24 @@ export function sanitizeToolUseResultPairing(
|
||||
|
||||
for (const call of toolCalls) {
|
||||
const existing = spanResultsById.get(call.id);
|
||||
out.push(
|
||||
pushToolResult(
|
||||
existing ??
|
||||
makeMissingToolResult({ toolCallId: call.id, toolName: call.name }),
|
||||
);
|
||||
}
|
||||
|
||||
out.push(...remainder);
|
||||
for (const rem of remainder) {
|
||||
if (!rem || typeof rem !== "object") {
|
||||
out.push(rem);
|
||||
continue;
|
||||
}
|
||||
const remRole = (rem as { role?: unknown }).role;
|
||||
if (remRole === "toolResult") {
|
||||
pushToolResult(rem as Extract<AgentMessage, { role: "toolResult" }>);
|
||||
continue;
|
||||
}
|
||||
out.push(rem);
|
||||
}
|
||||
i = j - 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -338,6 +338,11 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
key: sessionKey,
|
||||
model: modelKey,
|
||||
});
|
||||
// Reset between models: avoids cross-provider transcript incompatibilities
|
||||
// (notably OpenAI Responses requiring reasoning replay for function_call items).
|
||||
await client.request<Record<string, unknown>>("sessions.reset", {
|
||||
key: sessionKey,
|
||||
});
|
||||
|
||||
// “Meaningful” direct prompt (no tools).
|
||||
const runId = randomUUID();
|
||||
|
||||
Reference in New Issue
Block a user