diff --git a/CHANGELOG.md b/CHANGELOG.md index d66e0d3d6..ae15780a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,12 @@ Docs: https://docs.clawd.bot ### Fixes - Plugins: surface plugin load/register/config errors in gateway logs with plugin/source context. +<<<<<<< Updated upstream - Agents: propagate accountId into embedded runs so sub-agent announce routing honors the originating account. (#1058) +||||||| Stash base +======= +- Compaction: include tool failure summaries in safeguard compaction to prevent retry loops. (#1084) +>>>>>>> Stashed changes ## 2026.1.18-5 diff --git a/src/agents/pi-extensions/compaction-safeguard.test.ts b/src/agents/pi-extensions/compaction-safeguard.test.ts new file mode 100644 index 000000000..d542da4fc --- /dev/null +++ b/src/agents/pi-extensions/compaction-safeguard.test.ts @@ -0,0 +1,98 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { describe, expect, it } from "vitest"; + +import { __testing } from "./compaction-safeguard.js"; + +const { collectToolFailures, formatToolFailuresSection } = __testing; + +describe("compaction-safeguard tool failures", () => { + it("formats tool failures with meta and summary", () => { + const messages: AgentMessage[] = [ + { + role: "toolResult", + toolCallId: "call-1", + toolName: "exec", + isError: true, + details: { status: "failed", exitCode: 1 }, + content: [{ type: "text", text: "ENOENT: missing file" }], + timestamp: Date.now(), + }, + { + role: "toolResult", + toolCallId: "call-2", + toolName: "read", + isError: false, + content: [{ type: "text", text: "ok" }], + timestamp: Date.now(), + }, + ]; + + const failures = collectToolFailures(messages); + expect(failures).toHaveLength(1); + + const section = formatToolFailuresSection(failures); + expect(section).toContain("## Tool Failures"); + expect(section).toContain("exec (status=failed exitCode=1): ENOENT: missing file"); + }); + + it("dedupes by toolCallId and handles empty output", () => { + const messages: AgentMessage[] = [ + { + role: "toolResult", + toolCallId: "call-1", + toolName: "exec", + isError: true, + details: { exitCode: 2 }, + content: [], + timestamp: Date.now(), + }, + { + role: "toolResult", + toolCallId: "call-1", + toolName: "exec", + isError: true, + content: [{ type: "text", text: "ignored" }], + timestamp: Date.now(), + }, + ]; + + const failures = collectToolFailures(messages); + expect(failures).toHaveLength(1); + + const section = formatToolFailuresSection(failures); + expect(section).toContain("exec (exitCode=2): failed"); + }); + + it("caps the number of failures and adds overflow line", () => { + const messages: AgentMessage[] = Array.from({ length: 9 }, (_, idx) => ({ + role: "toolResult", + toolCallId: `call-${idx}`, + toolName: "exec", + isError: true, + content: [{ type: "text", text: `error ${idx}` }], + timestamp: Date.now(), + })); + + const failures = collectToolFailures(messages); + const section = formatToolFailuresSection(failures); + expect(section).toContain("## Tool Failures"); + expect(section).toContain("...and 1 more"); + }); + + it("omits section when there are no tool failures", () => { + const messages: AgentMessage[] = [ + { + role: "toolResult", + toolCallId: "ok", + toolName: "exec", + isError: false, + content: [{ type: "text", text: "ok" }], + timestamp: Date.now(), + }, + ]; + + const failures = collectToolFailures(messages); + const section = formatToolFailuresSection(failures); + expect(section).toBe(""); + }); +}); diff --git a/src/agents/pi-extensions/compaction-safeguard.ts b/src/agents/pi-extensions/compaction-safeguard.ts index af6a6697c..b944aacd6 100644 --- a/src/agents/pi-extensions/compaction-safeguard.ts +++ b/src/agents/pi-extensions/compaction-safeguard.ts @@ -10,6 +10,101 @@ const FALLBACK_SUMMARY = const TURN_PREFIX_INSTRUCTIONS = "This summary covers the prefix of a split turn. Focus on the original request," + " early progress, and any details needed to understand the retained suffix."; +const MAX_TOOL_FAILURES = 8; +const MAX_TOOL_FAILURE_CHARS = 240; + +type ToolFailure = { + toolCallId: string; + toolName: string; + summary: string; + meta?: string; +}; + +function normalizeFailureText(text: string): string { + return text.replace(/\s+/g, " ").trim(); +} + +function truncateFailureText(text: string, maxChars: number): string { + if (text.length <= maxChars) return text; + return `${text.slice(0, Math.max(0, maxChars - 3))}...`; +} + +function formatToolFailureMeta(details: unknown): string | undefined { + if (!details || typeof details !== "object") return undefined; + const record = details as Record; + const status = typeof record.status === "string" ? record.status : undefined; + const exitCode = + typeof record.exitCode === "number" && Number.isFinite(record.exitCode) + ? record.exitCode + : undefined; + const parts: string[] = []; + if (status) parts.push(`status=${status}`); + if (exitCode !== undefined) parts.push(`exitCode=${exitCode}`); + return parts.length > 0 ? parts.join(" ") : undefined; +} + +function extractToolResultText(content: unknown): string { + if (!Array.isArray(content)) return ""; + const parts: string[] = []; + for (const block of content) { + if (!block || typeof block !== "object") continue; + const rec = block as { type?: unknown; text?: unknown }; + if (rec.type === "text" && typeof rec.text === "string") { + parts.push(rec.text); + } + } + return parts.join("\n"); +} + +function collectToolFailures(messages: AgentMessage[]): ToolFailure[] { + const failures: ToolFailure[] = []; + const seen = new Set(); + + for (const message of messages) { + if (!message || typeof message !== "object") continue; + const role = (message as { role?: unknown }).role; + if (role !== "toolResult") continue; + const toolResult = message as { + toolCallId?: unknown; + toolName?: unknown; + content?: unknown; + details?: unknown; + isError?: unknown; + }; + if (toolResult.isError !== true) continue; + const toolCallId = + typeof toolResult.toolCallId === "string" ? toolResult.toolCallId : ""; + if (!toolCallId || seen.has(toolCallId)) continue; + seen.add(toolCallId); + + const toolName = + typeof toolResult.toolName === "string" && toolResult.toolName.trim() + ? toolResult.toolName + : "tool"; + const rawText = extractToolResultText(toolResult.content); + const meta = formatToolFailureMeta(toolResult.details); + const normalized = normalizeFailureText(rawText); + const summary = truncateFailureText( + normalized || (meta ? "failed" : "failed (no output)"), + MAX_TOOL_FAILURE_CHARS, + ); + failures.push({ toolCallId, toolName, summary, meta }); + } + + return failures; +} + +function formatToolFailuresSection(failures: ToolFailure[]): string { + if (failures.length === 0) return ""; + const lines = failures.slice(0, MAX_TOOL_FAILURES).map((failure) => { + const meta = failure.meta ? ` (${failure.meta})` : ""; + return `- ${failure.toolName}${meta}: ${failure.summary}`; + }); + if (failures.length > MAX_TOOL_FAILURES) { + lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`); + } + return `\n\n## Tool Failures\n${lines.join("\n")}`; +} function computeFileLists(fileOps: FileOperations): { readFiles: string[]; @@ -103,7 +198,12 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void { const { preparation, customInstructions, signal } = event; const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps); const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles); - const fallbackSummary = `${FALLBACK_SUMMARY}${fileOpsSummary}`; + const toolFailures = collectToolFailures([ + ...preparation.messagesToSummarize, + ...preparation.turnPrefixMessages, + ]); + const toolFailureSection = formatToolFailuresSection(toolFailures); + const fallbackSummary = `${FALLBACK_SUMMARY}${toolFailureSection}${fileOpsSummary}`; const model = ctx.model; if (!model) { @@ -162,6 +262,7 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void { summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`; } + summary += toolFailureSection; summary += fileOpsSummary; return { @@ -189,3 +290,8 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void { } }); } + +export const __testing = { + collectToolFailures, + formatToolFailuresSection, +} as const; diff --git a/src/config/config.compaction-settings.test.ts b/src/config/config.compaction-settings.test.ts index bf2f9f618..e85ea3aeb 100644 --- a/src/config/config.compaction-settings.test.ts +++ b/src/config/config.compaction-settings.test.ts @@ -45,4 +45,35 @@ describe("config compaction settings", () => { expect(cfg.agents?.defaults?.compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now."); }); }); + + it("defaults compaction mode to safeguard", async () => { + await withTempHome(async (home) => { + const configDir = path.join(home, ".clawdbot"); + await fs.mkdir(configDir, { recursive: true }); + await fs.writeFile( + path.join(configDir, "clawdbot.json"), + JSON.stringify( + { + agents: { + defaults: { + compaction: { + reserveTokensFloor: 9000, + }, + }, + }, + }, + null, + 2, + ), + "utf-8", + ); + + vi.resetModules(); + const { loadConfig } = await import("./config.js"); + const cfg = loadConfig(); + + expect(cfg.agents?.defaults?.compaction?.mode).toBe("safeguard"); + expect(cfg.agents?.defaults?.compaction?.reserveTokensFloor).toBe(9000); + }); + }); }); diff --git a/src/config/defaults.ts b/src/config/defaults.ts index 0c567b52c..e9028f0fe 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -139,6 +139,27 @@ export function applyContextPruningDefaults(cfg: ClawdbotConfig): ClawdbotConfig }; } +export function applyCompactionDefaults(cfg: ClawdbotConfig): ClawdbotConfig { + const defaults = cfg.agents?.defaults; + if (!defaults) return cfg; + const compaction = defaults?.compaction; + if (compaction?.mode) return cfg; + + return { + ...cfg, + agents: { + ...cfg.agents, + defaults: { + ...defaults, + compaction: { + ...compaction, + mode: "safeguard", + }, + }, + }, + }; +} + export function resetSessionDefaultsWarningForTests() { defaultWarnState = { warned: false }; } diff --git a/src/config/io.ts b/src/config/io.ts index 29e3d1d3a..0d0a3e20f 100644 --- a/src/config/io.ts +++ b/src/config/io.ts @@ -13,6 +13,7 @@ import { } from "../infra/shell-env.js"; import { DuplicateAgentDirError, findDuplicateAgentDirs } from "./agent-dirs.js"; import { + applyCompactionDefaults, applyContextPruningDefaults, applyLoggingDefaults, applyMessageDefaults, @@ -334,9 +335,11 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) { } } const cfg = applyModelDefaults( - applyContextPruningDefaults( - applySessionDefaults( - applyLoggingDefaults(applyMessageDefaults(validated.data as ClawdbotConfig)), + applyCompactionDefaults( + applyContextPruningDefaults( + applySessionDefaults( + applyLoggingDefaults(applyMessageDefaults(validated.data as ClawdbotConfig)), + ), ), ), ); @@ -380,7 +383,9 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) { const hash = hashConfigRaw(null); const config = applyTalkApiKey( applyModelDefaults( - applyContextPruningDefaults(applySessionDefaults(applyMessageDefaults({}))), + applyCompactionDefaults( + applyContextPruningDefaults(applySessionDefaults(applyMessageDefaults({}))), + ), ), ); const legacyIssues: LegacyConfigIssue[] = [];