fix: add safeguard compaction tool summaries

This commit is contained in:
Peter Steinberger
2026-01-19 01:44:08 +00:00
parent af1004ebbd
commit dd1b08b3e8
6 changed files with 271 additions and 5 deletions

View File

@@ -9,7 +9,12 @@ Docs: https://docs.clawd.bot
### Fixes
- Plugins: surface plugin load/register/config errors in gateway logs with plugin/source context.
<<<<<<< Updated upstream
- Agents: propagate accountId into embedded runs so sub-agent announce routing honors the originating account. (#1058)
||||||| Stash base
=======
- Compaction: include tool failure summaries in safeguard compaction to prevent retry loops. (#1084)
>>>>>>> Stashed changes
## 2026.1.18-5

View File

@@ -0,0 +1,98 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { describe, expect, it } from "vitest";
import { __testing } from "./compaction-safeguard.js";
const { collectToolFailures, formatToolFailuresSection } = __testing;
describe("compaction-safeguard tool failures", () => {
it("formats tool failures with meta and summary", () => {
const messages: AgentMessage[] = [
{
role: "toolResult",
toolCallId: "call-1",
toolName: "exec",
isError: true,
details: { status: "failed", exitCode: 1 },
content: [{ type: "text", text: "ENOENT: missing file" }],
timestamp: Date.now(),
},
{
role: "toolResult",
toolCallId: "call-2",
toolName: "read",
isError: false,
content: [{ type: "text", text: "ok" }],
timestamp: Date.now(),
},
];
const failures = collectToolFailures(messages);
expect(failures).toHaveLength(1);
const section = formatToolFailuresSection(failures);
expect(section).toContain("## Tool Failures");
expect(section).toContain("exec (status=failed exitCode=1): ENOENT: missing file");
});
it("dedupes by toolCallId and handles empty output", () => {
const messages: AgentMessage[] = [
{
role: "toolResult",
toolCallId: "call-1",
toolName: "exec",
isError: true,
details: { exitCode: 2 },
content: [],
timestamp: Date.now(),
},
{
role: "toolResult",
toolCallId: "call-1",
toolName: "exec",
isError: true,
content: [{ type: "text", text: "ignored" }],
timestamp: Date.now(),
},
];
const failures = collectToolFailures(messages);
expect(failures).toHaveLength(1);
const section = formatToolFailuresSection(failures);
expect(section).toContain("exec (exitCode=2): failed");
});
it("caps the number of failures and adds overflow line", () => {
const messages: AgentMessage[] = Array.from({ length: 9 }, (_, idx) => ({
role: "toolResult",
toolCallId: `call-${idx}`,
toolName: "exec",
isError: true,
content: [{ type: "text", text: `error ${idx}` }],
timestamp: Date.now(),
}));
const failures = collectToolFailures(messages);
const section = formatToolFailuresSection(failures);
expect(section).toContain("## Tool Failures");
expect(section).toContain("...and 1 more");
});
it("omits section when there are no tool failures", () => {
const messages: AgentMessage[] = [
{
role: "toolResult",
toolCallId: "ok",
toolName: "exec",
isError: false,
content: [{ type: "text", text: "ok" }],
timestamp: Date.now(),
},
];
const failures = collectToolFailures(messages);
const section = formatToolFailuresSection(failures);
expect(section).toBe("");
});
});

View File

@@ -10,6 +10,101 @@ const FALLBACK_SUMMARY =
const TURN_PREFIX_INSTRUCTIONS =
"This summary covers the prefix of a split turn. Focus on the original request," +
" early progress, and any details needed to understand the retained suffix.";
const MAX_TOOL_FAILURES = 8;
const MAX_TOOL_FAILURE_CHARS = 240;
type ToolFailure = {
toolCallId: string;
toolName: string;
summary: string;
meta?: string;
};
function normalizeFailureText(text: string): string {
return text.replace(/\s+/g, " ").trim();
}
function truncateFailureText(text: string, maxChars: number): string {
if (text.length <= maxChars) return text;
return `${text.slice(0, Math.max(0, maxChars - 3))}...`;
}
function formatToolFailureMeta(details: unknown): string | undefined {
if (!details || typeof details !== "object") return undefined;
const record = details as Record<string, unknown>;
const status = typeof record.status === "string" ? record.status : undefined;
const exitCode =
typeof record.exitCode === "number" && Number.isFinite(record.exitCode)
? record.exitCode
: undefined;
const parts: string[] = [];
if (status) parts.push(`status=${status}`);
if (exitCode !== undefined) parts.push(`exitCode=${exitCode}`);
return parts.length > 0 ? parts.join(" ") : undefined;
}
function extractToolResultText(content: unknown): string {
if (!Array.isArray(content)) return "";
const parts: string[] = [];
for (const block of content) {
if (!block || typeof block !== "object") continue;
const rec = block as { type?: unknown; text?: unknown };
if (rec.type === "text" && typeof rec.text === "string") {
parts.push(rec.text);
}
}
return parts.join("\n");
}
function collectToolFailures(messages: AgentMessage[]): ToolFailure[] {
const failures: ToolFailure[] = [];
const seen = new Set<string>();
for (const message of messages) {
if (!message || typeof message !== "object") continue;
const role = (message as { role?: unknown }).role;
if (role !== "toolResult") continue;
const toolResult = message as {
toolCallId?: unknown;
toolName?: unknown;
content?: unknown;
details?: unknown;
isError?: unknown;
};
if (toolResult.isError !== true) continue;
const toolCallId =
typeof toolResult.toolCallId === "string" ? toolResult.toolCallId : "";
if (!toolCallId || seen.has(toolCallId)) continue;
seen.add(toolCallId);
const toolName =
typeof toolResult.toolName === "string" && toolResult.toolName.trim()
? toolResult.toolName
: "tool";
const rawText = extractToolResultText(toolResult.content);
const meta = formatToolFailureMeta(toolResult.details);
const normalized = normalizeFailureText(rawText);
const summary = truncateFailureText(
normalized || (meta ? "failed" : "failed (no output)"),
MAX_TOOL_FAILURE_CHARS,
);
failures.push({ toolCallId, toolName, summary, meta });
}
return failures;
}
function formatToolFailuresSection(failures: ToolFailure[]): string {
if (failures.length === 0) return "";
const lines = failures.slice(0, MAX_TOOL_FAILURES).map((failure) => {
const meta = failure.meta ? ` (${failure.meta})` : "";
return `- ${failure.toolName}${meta}: ${failure.summary}`;
});
if (failures.length > MAX_TOOL_FAILURES) {
lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`);
}
return `\n\n## Tool Failures\n${lines.join("\n")}`;
}
function computeFileLists(fileOps: FileOperations): {
readFiles: string[];
@@ -103,7 +198,12 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
const { preparation, customInstructions, signal } = event;
const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps);
const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles);
const fallbackSummary = `${FALLBACK_SUMMARY}${fileOpsSummary}`;
const toolFailures = collectToolFailures([
...preparation.messagesToSummarize,
...preparation.turnPrefixMessages,
]);
const toolFailureSection = formatToolFailuresSection(toolFailures);
const fallbackSummary = `${FALLBACK_SUMMARY}${toolFailureSection}${fileOpsSummary}`;
const model = ctx.model;
if (!model) {
@@ -162,6 +262,7 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
}
summary += toolFailureSection;
summary += fileOpsSummary;
return {
@@ -189,3 +290,8 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
}
});
}
export const __testing = {
collectToolFailures,
formatToolFailuresSection,
} as const;

View File

@@ -45,4 +45,35 @@ describe("config compaction settings", () => {
expect(cfg.agents?.defaults?.compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now.");
});
});
it("defaults compaction mode to safeguard", async () => {
await withTempHome(async (home) => {
const configDir = path.join(home, ".clawdbot");
await fs.mkdir(configDir, { recursive: true });
await fs.writeFile(
path.join(configDir, "clawdbot.json"),
JSON.stringify(
{
agents: {
defaults: {
compaction: {
reserveTokensFloor: 9000,
},
},
},
},
null,
2,
),
"utf-8",
);
vi.resetModules();
const { loadConfig } = await import("./config.js");
const cfg = loadConfig();
expect(cfg.agents?.defaults?.compaction?.mode).toBe("safeguard");
expect(cfg.agents?.defaults?.compaction?.reserveTokensFloor).toBe(9000);
});
});
});

View File

@@ -139,6 +139,27 @@ export function applyContextPruningDefaults(cfg: ClawdbotConfig): ClawdbotConfig
};
}
export function applyCompactionDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
const defaults = cfg.agents?.defaults;
if (!defaults) return cfg;
const compaction = defaults?.compaction;
if (compaction?.mode) return cfg;
return {
...cfg,
agents: {
...cfg.agents,
defaults: {
...defaults,
compaction: {
...compaction,
mode: "safeguard",
},
},
},
};
}
export function resetSessionDefaultsWarningForTests() {
defaultWarnState = { warned: false };
}

View File

@@ -13,6 +13,7 @@ import {
} from "../infra/shell-env.js";
import { DuplicateAgentDirError, findDuplicateAgentDirs } from "./agent-dirs.js";
import {
applyCompactionDefaults,
applyContextPruningDefaults,
applyLoggingDefaults,
applyMessageDefaults,
@@ -334,9 +335,11 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
}
}
const cfg = applyModelDefaults(
applyContextPruningDefaults(
applySessionDefaults(
applyLoggingDefaults(applyMessageDefaults(validated.data as ClawdbotConfig)),
applyCompactionDefaults(
applyContextPruningDefaults(
applySessionDefaults(
applyLoggingDefaults(applyMessageDefaults(validated.data as ClawdbotConfig)),
),
),
),
);
@@ -380,7 +383,9 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
const hash = hashConfigRaw(null);
const config = applyTalkApiKey(
applyModelDefaults(
applyContextPruningDefaults(applySessionDefaults(applyMessageDefaults({}))),
applyCompactionDefaults(
applyContextPruningDefaults(applySessionDefaults(applyMessageDefaults({}))),
),
),
);
const legacyIssues: LegacyConfigIssue[] = [];