fix: add safeguard compaction tool summaries
This commit is contained in:
@@ -9,7 +9,12 @@ Docs: https://docs.clawd.bot
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
- Plugins: surface plugin load/register/config errors in gateway logs with plugin/source context.
|
- Plugins: surface plugin load/register/config errors in gateway logs with plugin/source context.
|
||||||
|
<<<<<<< Updated upstream
|
||||||
- Agents: propagate accountId into embedded runs so sub-agent announce routing honors the originating account. (#1058)
|
- Agents: propagate accountId into embedded runs so sub-agent announce routing honors the originating account. (#1058)
|
||||||
|
||||||| Stash base
|
||||||
|
=======
|
||||||
|
- Compaction: include tool failure summaries in safeguard compaction to prevent retry loops. (#1084)
|
||||||
|
>>>>>>> Stashed changes
|
||||||
|
|
||||||
## 2026.1.18-5
|
## 2026.1.18-5
|
||||||
|
|
||||||
|
|||||||
98
src/agents/pi-extensions/compaction-safeguard.test.ts
Normal file
98
src/agents/pi-extensions/compaction-safeguard.test.ts
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import { __testing } from "./compaction-safeguard.js";
|
||||||
|
|
||||||
|
const { collectToolFailures, formatToolFailuresSection } = __testing;
|
||||||
|
|
||||||
|
describe("compaction-safeguard tool failures", () => {
|
||||||
|
it("formats tool failures with meta and summary", () => {
|
||||||
|
const messages: AgentMessage[] = [
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call-1",
|
||||||
|
toolName: "exec",
|
||||||
|
isError: true,
|
||||||
|
details: { status: "failed", exitCode: 1 },
|
||||||
|
content: [{ type: "text", text: "ENOENT: missing file" }],
|
||||||
|
timestamp: Date.now(),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call-2",
|
||||||
|
toolName: "read",
|
||||||
|
isError: false,
|
||||||
|
content: [{ type: "text", text: "ok" }],
|
||||||
|
timestamp: Date.now(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const failures = collectToolFailures(messages);
|
||||||
|
expect(failures).toHaveLength(1);
|
||||||
|
|
||||||
|
const section = formatToolFailuresSection(failures);
|
||||||
|
expect(section).toContain("## Tool Failures");
|
||||||
|
expect(section).toContain("exec (status=failed exitCode=1): ENOENT: missing file");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("dedupes by toolCallId and handles empty output", () => {
|
||||||
|
const messages: AgentMessage[] = [
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call-1",
|
||||||
|
toolName: "exec",
|
||||||
|
isError: true,
|
||||||
|
details: { exitCode: 2 },
|
||||||
|
content: [],
|
||||||
|
timestamp: Date.now(),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call-1",
|
||||||
|
toolName: "exec",
|
||||||
|
isError: true,
|
||||||
|
content: [{ type: "text", text: "ignored" }],
|
||||||
|
timestamp: Date.now(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const failures = collectToolFailures(messages);
|
||||||
|
expect(failures).toHaveLength(1);
|
||||||
|
|
||||||
|
const section = formatToolFailuresSection(failures);
|
||||||
|
expect(section).toContain("exec (exitCode=2): failed");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("caps the number of failures and adds overflow line", () => {
|
||||||
|
const messages: AgentMessage[] = Array.from({ length: 9 }, (_, idx) => ({
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: `call-${idx}`,
|
||||||
|
toolName: "exec",
|
||||||
|
isError: true,
|
||||||
|
content: [{ type: "text", text: `error ${idx}` }],
|
||||||
|
timestamp: Date.now(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const failures = collectToolFailures(messages);
|
||||||
|
const section = formatToolFailuresSection(failures);
|
||||||
|
expect(section).toContain("## Tool Failures");
|
||||||
|
expect(section).toContain("...and 1 more");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("omits section when there are no tool failures", () => {
|
||||||
|
const messages: AgentMessage[] = [
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "ok",
|
||||||
|
toolName: "exec",
|
||||||
|
isError: false,
|
||||||
|
content: [{ type: "text", text: "ok" }],
|
||||||
|
timestamp: Date.now(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const failures = collectToolFailures(messages);
|
||||||
|
const section = formatToolFailuresSection(failures);
|
||||||
|
expect(section).toBe("");
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -10,6 +10,101 @@ const FALLBACK_SUMMARY =
|
|||||||
const TURN_PREFIX_INSTRUCTIONS =
|
const TURN_PREFIX_INSTRUCTIONS =
|
||||||
"This summary covers the prefix of a split turn. Focus on the original request," +
|
"This summary covers the prefix of a split turn. Focus on the original request," +
|
||||||
" early progress, and any details needed to understand the retained suffix.";
|
" early progress, and any details needed to understand the retained suffix.";
|
||||||
|
const MAX_TOOL_FAILURES = 8;
|
||||||
|
const MAX_TOOL_FAILURE_CHARS = 240;
|
||||||
|
|
||||||
|
type ToolFailure = {
|
||||||
|
toolCallId: string;
|
||||||
|
toolName: string;
|
||||||
|
summary: string;
|
||||||
|
meta?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
function normalizeFailureText(text: string): string {
|
||||||
|
return text.replace(/\s+/g, " ").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function truncateFailureText(text: string, maxChars: number): string {
|
||||||
|
if (text.length <= maxChars) return text;
|
||||||
|
return `${text.slice(0, Math.max(0, maxChars - 3))}...`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatToolFailureMeta(details: unknown): string | undefined {
|
||||||
|
if (!details || typeof details !== "object") return undefined;
|
||||||
|
const record = details as Record<string, unknown>;
|
||||||
|
const status = typeof record.status === "string" ? record.status : undefined;
|
||||||
|
const exitCode =
|
||||||
|
typeof record.exitCode === "number" && Number.isFinite(record.exitCode)
|
||||||
|
? record.exitCode
|
||||||
|
: undefined;
|
||||||
|
const parts: string[] = [];
|
||||||
|
if (status) parts.push(`status=${status}`);
|
||||||
|
if (exitCode !== undefined) parts.push(`exitCode=${exitCode}`);
|
||||||
|
return parts.length > 0 ? parts.join(" ") : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractToolResultText(content: unknown): string {
|
||||||
|
if (!Array.isArray(content)) return "";
|
||||||
|
const parts: string[] = [];
|
||||||
|
for (const block of content) {
|
||||||
|
if (!block || typeof block !== "object") continue;
|
||||||
|
const rec = block as { type?: unknown; text?: unknown };
|
||||||
|
if (rec.type === "text" && typeof rec.text === "string") {
|
||||||
|
parts.push(rec.text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parts.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
function collectToolFailures(messages: AgentMessage[]): ToolFailure[] {
|
||||||
|
const failures: ToolFailure[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
for (const message of messages) {
|
||||||
|
if (!message || typeof message !== "object") continue;
|
||||||
|
const role = (message as { role?: unknown }).role;
|
||||||
|
if (role !== "toolResult") continue;
|
||||||
|
const toolResult = message as {
|
||||||
|
toolCallId?: unknown;
|
||||||
|
toolName?: unknown;
|
||||||
|
content?: unknown;
|
||||||
|
details?: unknown;
|
||||||
|
isError?: unknown;
|
||||||
|
};
|
||||||
|
if (toolResult.isError !== true) continue;
|
||||||
|
const toolCallId =
|
||||||
|
typeof toolResult.toolCallId === "string" ? toolResult.toolCallId : "";
|
||||||
|
if (!toolCallId || seen.has(toolCallId)) continue;
|
||||||
|
seen.add(toolCallId);
|
||||||
|
|
||||||
|
const toolName =
|
||||||
|
typeof toolResult.toolName === "string" && toolResult.toolName.trim()
|
||||||
|
? toolResult.toolName
|
||||||
|
: "tool";
|
||||||
|
const rawText = extractToolResultText(toolResult.content);
|
||||||
|
const meta = formatToolFailureMeta(toolResult.details);
|
||||||
|
const normalized = normalizeFailureText(rawText);
|
||||||
|
const summary = truncateFailureText(
|
||||||
|
normalized || (meta ? "failed" : "failed (no output)"),
|
||||||
|
MAX_TOOL_FAILURE_CHARS,
|
||||||
|
);
|
||||||
|
failures.push({ toolCallId, toolName, summary, meta });
|
||||||
|
}
|
||||||
|
|
||||||
|
return failures;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatToolFailuresSection(failures: ToolFailure[]): string {
|
||||||
|
if (failures.length === 0) return "";
|
||||||
|
const lines = failures.slice(0, MAX_TOOL_FAILURES).map((failure) => {
|
||||||
|
const meta = failure.meta ? ` (${failure.meta})` : "";
|
||||||
|
return `- ${failure.toolName}${meta}: ${failure.summary}`;
|
||||||
|
});
|
||||||
|
if (failures.length > MAX_TOOL_FAILURES) {
|
||||||
|
lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`);
|
||||||
|
}
|
||||||
|
return `\n\n## Tool Failures\n${lines.join("\n")}`;
|
||||||
|
}
|
||||||
|
|
||||||
function computeFileLists(fileOps: FileOperations): {
|
function computeFileLists(fileOps: FileOperations): {
|
||||||
readFiles: string[];
|
readFiles: string[];
|
||||||
@@ -103,7 +198,12 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
|||||||
const { preparation, customInstructions, signal } = event;
|
const { preparation, customInstructions, signal } = event;
|
||||||
const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps);
|
const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps);
|
||||||
const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles);
|
const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles);
|
||||||
const fallbackSummary = `${FALLBACK_SUMMARY}${fileOpsSummary}`;
|
const toolFailures = collectToolFailures([
|
||||||
|
...preparation.messagesToSummarize,
|
||||||
|
...preparation.turnPrefixMessages,
|
||||||
|
]);
|
||||||
|
const toolFailureSection = formatToolFailuresSection(toolFailures);
|
||||||
|
const fallbackSummary = `${FALLBACK_SUMMARY}${toolFailureSection}${fileOpsSummary}`;
|
||||||
|
|
||||||
const model = ctx.model;
|
const model = ctx.model;
|
||||||
if (!model) {
|
if (!model) {
|
||||||
@@ -162,6 +262,7 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
|||||||
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
|
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
summary += toolFailureSection;
|
||||||
summary += fileOpsSummary;
|
summary += fileOpsSummary;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -189,3 +290,8 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const __testing = {
|
||||||
|
collectToolFailures,
|
||||||
|
formatToolFailuresSection,
|
||||||
|
} as const;
|
||||||
|
|||||||
@@ -45,4 +45,35 @@ describe("config compaction settings", () => {
|
|||||||
expect(cfg.agents?.defaults?.compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now.");
|
expect(cfg.agents?.defaults?.compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now.");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("defaults compaction mode to safeguard", async () => {
|
||||||
|
await withTempHome(async (home) => {
|
||||||
|
const configDir = path.join(home, ".clawdbot");
|
||||||
|
await fs.mkdir(configDir, { recursive: true });
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(configDir, "clawdbot.json"),
|
||||||
|
JSON.stringify(
|
||||||
|
{
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
compaction: {
|
||||||
|
reserveTokensFloor: 9000,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
),
|
||||||
|
"utf-8",
|
||||||
|
);
|
||||||
|
|
||||||
|
vi.resetModules();
|
||||||
|
const { loadConfig } = await import("./config.js");
|
||||||
|
const cfg = loadConfig();
|
||||||
|
|
||||||
|
expect(cfg.agents?.defaults?.compaction?.mode).toBe("safeguard");
|
||||||
|
expect(cfg.agents?.defaults?.compaction?.reserveTokensFloor).toBe(9000);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -139,6 +139,27 @@ export function applyContextPruningDefaults(cfg: ClawdbotConfig): ClawdbotConfig
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function applyCompactionDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
|
||||||
|
const defaults = cfg.agents?.defaults;
|
||||||
|
if (!defaults) return cfg;
|
||||||
|
const compaction = defaults?.compaction;
|
||||||
|
if (compaction?.mode) return cfg;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...cfg,
|
||||||
|
agents: {
|
||||||
|
...cfg.agents,
|
||||||
|
defaults: {
|
||||||
|
...defaults,
|
||||||
|
compaction: {
|
||||||
|
...compaction,
|
||||||
|
mode: "safeguard",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export function resetSessionDefaultsWarningForTests() {
|
export function resetSessionDefaultsWarningForTests() {
|
||||||
defaultWarnState = { warned: false };
|
defaultWarnState = { warned: false };
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import {
|
|||||||
} from "../infra/shell-env.js";
|
} from "../infra/shell-env.js";
|
||||||
import { DuplicateAgentDirError, findDuplicateAgentDirs } from "./agent-dirs.js";
|
import { DuplicateAgentDirError, findDuplicateAgentDirs } from "./agent-dirs.js";
|
||||||
import {
|
import {
|
||||||
|
applyCompactionDefaults,
|
||||||
applyContextPruningDefaults,
|
applyContextPruningDefaults,
|
||||||
applyLoggingDefaults,
|
applyLoggingDefaults,
|
||||||
applyMessageDefaults,
|
applyMessageDefaults,
|
||||||
@@ -334,9 +335,11 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
const cfg = applyModelDefaults(
|
const cfg = applyModelDefaults(
|
||||||
applyContextPruningDefaults(
|
applyCompactionDefaults(
|
||||||
applySessionDefaults(
|
applyContextPruningDefaults(
|
||||||
applyLoggingDefaults(applyMessageDefaults(validated.data as ClawdbotConfig)),
|
applySessionDefaults(
|
||||||
|
applyLoggingDefaults(applyMessageDefaults(validated.data as ClawdbotConfig)),
|
||||||
|
),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
@@ -380,7 +383,9 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
|
|||||||
const hash = hashConfigRaw(null);
|
const hash = hashConfigRaw(null);
|
||||||
const config = applyTalkApiKey(
|
const config = applyTalkApiKey(
|
||||||
applyModelDefaults(
|
applyModelDefaults(
|
||||||
applyContextPruningDefaults(applySessionDefaults(applyMessageDefaults({}))),
|
applyCompactionDefaults(
|
||||||
|
applyContextPruningDefaults(applySessionDefaults(applyMessageDefaults({}))),
|
||||||
|
),
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
const legacyIssues: LegacyConfigIssue[] = [];
|
const legacyIssues: LegacyConfigIssue[] = [];
|
||||||
|
|||||||
Reference in New Issue
Block a user