fix(agents): harden tool transcript repair
This commit is contained in:
@@ -50,6 +50,8 @@
|
|||||||
- Telegram: serialize media-group processing to avoid missed albums under load.
|
- Telegram: serialize media-group processing to avoid missed albums under load.
|
||||||
- Signal: handle `dataMessage.reaction` events (signal-cli SSE) to avoid broken attachment errors. (#637) — thanks @neist.
|
- Signal: handle `dataMessage.reaction` events (signal-cli SSE) to avoid broken attachment errors. (#637) — thanks @neist.
|
||||||
- Docs: showcase entries for ParentPay, R2 Upload, iOS TestFlight, and Oura Health. (#650) — thanks @henrino3.
|
- Docs: showcase entries for ParentPay, R2 Upload, iOS TestFlight, and Oura Health. (#650) — thanks @henrino3.
|
||||||
|
- Agents: repair session transcripts by dropping duplicate tool results across the whole history (unblocks Anthropic-compatible APIs after retries).
|
||||||
|
- Tests/Live: reset the gateway session between model runs to avoid cross-provider transcript incompatibilities (notably OpenAI Responses reasoning replay rules).
|
||||||
## 2026.1.9
|
## 2026.1.9
|
||||||
|
|
||||||
### Highlights
|
### Highlights
|
||||||
|
|||||||
@@ -642,6 +642,40 @@ describe("sanitizeToolUseResultPairing", () => {
|
|||||||
const out = sanitizeToolUseResultPairing(input);
|
const out = sanitizeToolUseResultPairing(input);
|
||||||
expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1);
|
expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("drops duplicate tool results for the same id across the transcript", () => {
|
||||||
|
const input = [
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_1",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: "first" }],
|
||||||
|
isError: false,
|
||||||
|
},
|
||||||
|
{ role: "assistant", content: [{ type: "text", text: "ok" }] },
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_1",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: "second (duplicate)" }],
|
||||||
|
isError: false,
|
||||||
|
},
|
||||||
|
] satisfies AgentMessage[];
|
||||||
|
|
||||||
|
const out = sanitizeToolUseResultPairing(input);
|
||||||
|
const results = out.filter((m) => m.role === "toolResult") as Array<{
|
||||||
|
toolCallId?: string;
|
||||||
|
content?: unknown;
|
||||||
|
}>;
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0]?.toolCallId).toBe("call_1");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("normalizeTextForComparison", () => {
|
describe("normalizeTextForComparison", () => {
|
||||||
|
|||||||
@@ -286,8 +286,18 @@ export function sanitizeToolUseResultPairing(
|
|||||||
// displaced (e.g. after user turns) or duplicated. Repair by:
|
// displaced (e.g. after user turns) or duplicated. Repair by:
|
||||||
// - moving matching toolResult messages directly after their assistant toolCall turn
|
// - moving matching toolResult messages directly after their assistant toolCall turn
|
||||||
// - inserting synthetic error toolResults for missing ids
|
// - inserting synthetic error toolResults for missing ids
|
||||||
// - dropping duplicate toolResults for the same id within the span
|
// - dropping duplicate toolResults for the same id (anywhere in the transcript)
|
||||||
const out: AgentMessage[] = [];
|
const out: AgentMessage[] = [];
|
||||||
|
const seenToolResultIds = new Set<string>();
|
||||||
|
|
||||||
|
const pushToolResult = (
|
||||||
|
msg: Extract<AgentMessage, { role: "toolResult" }>,
|
||||||
|
) => {
|
||||||
|
const id = extractToolResultId(msg);
|
||||||
|
if (id && seenToolResultIds.has(id)) return;
|
||||||
|
if (id) seenToolResultIds.add(id);
|
||||||
|
out.push(msg);
|
||||||
|
};
|
||||||
|
|
||||||
for (let i = 0; i < messages.length; i += 1) {
|
for (let i = 0; i < messages.length; i += 1) {
|
||||||
const msg = messages[i] as AgentMessage;
|
const msg = messages[i] as AgentMessage;
|
||||||
@@ -298,7 +308,11 @@ export function sanitizeToolUseResultPairing(
|
|||||||
|
|
||||||
const role = (msg as { role?: unknown }).role;
|
const role = (msg as { role?: unknown }).role;
|
||||||
if (role !== "assistant") {
|
if (role !== "assistant") {
|
||||||
|
if (role === "toolResult") {
|
||||||
|
pushToolResult(msg as Extract<AgentMessage, { role: "toolResult" }>);
|
||||||
|
} else {
|
||||||
out.push(msg);
|
out.push(msg);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -335,6 +349,9 @@ export function sanitizeToolUseResultPairing(
|
|||||||
>;
|
>;
|
||||||
const id = extractToolResultId(toolResult);
|
const id = extractToolResultId(toolResult);
|
||||||
if (id && toolCallIds.has(id)) {
|
if (id && toolCallIds.has(id)) {
|
||||||
|
if (seenToolResultIds.has(id)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (!spanResultsById.has(id)) {
|
if (!spanResultsById.has(id)) {
|
||||||
spanResultsById.set(id, toolResult);
|
spanResultsById.set(id, toolResult);
|
||||||
}
|
}
|
||||||
@@ -349,13 +366,24 @@ export function sanitizeToolUseResultPairing(
|
|||||||
|
|
||||||
for (const call of toolCalls) {
|
for (const call of toolCalls) {
|
||||||
const existing = spanResultsById.get(call.id);
|
const existing = spanResultsById.get(call.id);
|
||||||
out.push(
|
pushToolResult(
|
||||||
existing ??
|
existing ??
|
||||||
makeMissingToolResult({ toolCallId: call.id, toolName: call.name }),
|
makeMissingToolResult({ toolCallId: call.id, toolName: call.name }),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
out.push(...remainder);
|
for (const rem of remainder) {
|
||||||
|
if (!rem || typeof rem !== "object") {
|
||||||
|
out.push(rem);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const remRole = (rem as { role?: unknown }).role;
|
||||||
|
if (remRole === "toolResult") {
|
||||||
|
pushToolResult(rem as Extract<AgentMessage, { role: "toolResult" }>);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
out.push(rem);
|
||||||
|
}
|
||||||
i = j - 1;
|
i = j - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -338,6 +338,11 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
|||||||
key: sessionKey,
|
key: sessionKey,
|
||||||
model: modelKey,
|
model: modelKey,
|
||||||
});
|
});
|
||||||
|
// Reset between models: avoids cross-provider transcript incompatibilities
|
||||||
|
// (notably OpenAI Responses requiring reasoning replay for function_call items).
|
||||||
|
await client.request<Record<string, unknown>>("sessions.reset", {
|
||||||
|
key: sessionKey,
|
||||||
|
});
|
||||||
|
|
||||||
// “Meaningful” direct prompt (no tools).
|
// “Meaningful” direct prompt (no tools).
|
||||||
const runId = randomUUID();
|
const runId = randomUUID();
|
||||||
|
|||||||
Reference in New Issue
Block a user