fix(agents): harden tool transcript repair

This commit is contained in:
Peter Steinberger
2026-01-10 21:45:10 +00:00
parent 805a29252e
commit 98377c7c6b
4 changed files with 73 additions and 4 deletions

View File

@@ -50,6 +50,8 @@
- Telegram: serialize media-group processing to avoid missed albums under load.
- Signal: handle `dataMessage.reaction` events (signal-cli SSE) to avoid broken attachment errors. (#637) — thanks @neist.
- Docs: showcase entries for ParentPay, R2 Upload, iOS TestFlight, and Oura Health. (#650) — thanks @henrino3.
- Agents: repair session transcripts by dropping duplicate tool results across the whole history (unblocks Anthropic-compatible APIs after retries).
- Tests/Live: reset the gateway session between model runs to avoid cross-provider transcript incompatibilities (notably OpenAI Responses reasoning replay rules).
## 2026.1.9
### Highlights

View File

@@ -642,6 +642,40 @@ describe("sanitizeToolUseResultPairing", () => {
const out = sanitizeToolUseResultPairing(input);
expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1);
});
it("drops duplicate tool results for the same id across the transcript", () => {
const input = [
{
role: "assistant",
content: [
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
],
},
{
role: "toolResult",
toolCallId: "call_1",
toolName: "read",
content: [{ type: "text", text: "first" }],
isError: false,
},
{ role: "assistant", content: [{ type: "text", text: "ok" }] },
{
role: "toolResult",
toolCallId: "call_1",
toolName: "read",
content: [{ type: "text", text: "second (duplicate)" }],
isError: false,
},
] satisfies AgentMessage[];
const out = sanitizeToolUseResultPairing(input);
const results = out.filter((m) => m.role === "toolResult") as Array<{
toolCallId?: string;
content?: unknown;
}>;
expect(results).toHaveLength(1);
expect(results[0]?.toolCallId).toBe("call_1");
});
});
describe("normalizeTextForComparison", () => {

View File

@@ -286,8 +286,18 @@ export function sanitizeToolUseResultPairing(
// displaced (e.g. after user turns) or duplicated. Repair by:
// - moving matching toolResult messages directly after their assistant toolCall turn
// - inserting synthetic error toolResults for missing ids
// - dropping duplicate toolResults for the same id within the span
// - dropping duplicate toolResults for the same id (anywhere in the transcript)
const out: AgentMessage[] = [];
const seenToolResultIds = new Set<string>();
const pushToolResult = (
msg: Extract<AgentMessage, { role: "toolResult" }>,
) => {
const id = extractToolResultId(msg);
if (id && seenToolResultIds.has(id)) return;
if (id) seenToolResultIds.add(id);
out.push(msg);
};
for (let i = 0; i < messages.length; i += 1) {
const msg = messages[i] as AgentMessage;
@@ -298,7 +308,11 @@ export function sanitizeToolUseResultPairing(
const role = (msg as { role?: unknown }).role;
if (role !== "assistant") {
out.push(msg);
if (role === "toolResult") {
pushToolResult(msg as Extract<AgentMessage, { role: "toolResult" }>);
} else {
out.push(msg);
}
continue;
}
@@ -335,6 +349,9 @@ export function sanitizeToolUseResultPairing(
>;
const id = extractToolResultId(toolResult);
if (id && toolCallIds.has(id)) {
if (seenToolResultIds.has(id)) {
continue;
}
if (!spanResultsById.has(id)) {
spanResultsById.set(id, toolResult);
}
@@ -349,13 +366,24 @@ export function sanitizeToolUseResultPairing(
for (const call of toolCalls) {
const existing = spanResultsById.get(call.id);
out.push(
pushToolResult(
existing ??
makeMissingToolResult({ toolCallId: call.id, toolName: call.name }),
);
}
out.push(...remainder);
for (const rem of remainder) {
if (!rem || typeof rem !== "object") {
out.push(rem);
continue;
}
const remRole = (rem as { role?: unknown }).role;
if (remRole === "toolResult") {
pushToolResult(rem as Extract<AgentMessage, { role: "toolResult" }>);
continue;
}
out.push(rem);
}
i = j - 1;
}

View File

@@ -338,6 +338,11 @@ describeLive("gateway live (dev agent, profile keys)", () => {
key: sessionKey,
model: modelKey,
});
// Reset between models: avoids cross-provider transcript incompatibilities
// (notably OpenAI Responses requiring reasoning replay for function_call items).
await client.request<Record<string, unknown>>("sessions.reset", {
key: sessionKey,
});
// “Meaningful” direct prompt (no tools).
const runId = randomUUID();