fix: add safeguard compaction tool summaries

2026-01-19 01:44:08 +00:00
parent af1004ebbd
commit dd1b08b3e8
6 changed files with 271 additions and 5 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,12 @@ Docs: https://docs.clawd.bot

 ### Fixes
 - Plugins: surface plugin load/register/config errors in gateway logs with plugin/source context.
+<<<<<<< Updated upstream
 - Agents: propagate accountId into embedded runs so sub-agent announce routing honors the originating account. (#1058)
+||||||| Stash base
+=======
+- Compaction: include tool failure summaries in safeguard compaction to prevent retry loops. (#1084)
+>>>>>>> Stashed changes

 ## 2026.1.18-5

--- a/src/agents/pi-extensions/compaction-safeguard.test.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.test.ts
@@ -0,0 +1,98 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import { describe, expect, it } from "vitest";
+
+import { __testing } from "./compaction-safeguard.js";
+
+const { collectToolFailures, formatToolFailuresSection } = __testing;
+
+describe("compaction-safeguard tool failures", () => {
+  it("formats tool failures with meta and summary", () => {
+    const messages: AgentMessage[] = [
+      {
+        role: "toolResult",
+        toolCallId: "call-1",
+        toolName: "exec",
+        isError: true,
+        details: { status: "failed", exitCode: 1 },
+        content: [{ type: "text", text: "ENOENT: missing file" }],
+        timestamp: Date.now(),
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call-2",
+        toolName: "read",
+        isError: false,
+        content: [{ type: "text", text: "ok" }],
+        timestamp: Date.now(),
+      },
+    ];
+
+    const failures = collectToolFailures(messages);
+    expect(failures).toHaveLength(1);
+
+    const section = formatToolFailuresSection(failures);
+    expect(section).toContain("## Tool Failures");
+    expect(section).toContain("exec (status=failed exitCode=1): ENOENT: missing file");
+  });
+
+  it("dedupes by toolCallId and handles empty output", () => {
+    const messages: AgentMessage[] = [
+      {
+        role: "toolResult",
+        toolCallId: "call-1",
+        toolName: "exec",
+        isError: true,
+        details: { exitCode: 2 },
+        content: [],
+        timestamp: Date.now(),
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call-1",
+        toolName: "exec",
+        isError: true,
+        content: [{ type: "text", text: "ignored" }],
+        timestamp: Date.now(),
+      },
+    ];
+
+    const failures = collectToolFailures(messages);
+    expect(failures).toHaveLength(1);
+
+    const section = formatToolFailuresSection(failures);
+    expect(section).toContain("exec (exitCode=2): failed");
+  });
+
+  it("caps the number of failures and adds overflow line", () => {
+    const messages: AgentMessage[] = Array.from({ length: 9 }, (_, idx) => ({
+      role: "toolResult",
+      toolCallId: `call-${idx}`,
+      toolName: "exec",
+      isError: true,
+      content: [{ type: "text", text: `error ${idx}` }],
+      timestamp: Date.now(),
+    }));
+
+    const failures = collectToolFailures(messages);
+    const section = formatToolFailuresSection(failures);
+    expect(section).toContain("## Tool Failures");
+    expect(section).toContain("...and 1 more");
+  });
+
+  it("omits section when there are no tool failures", () => {
+    const messages: AgentMessage[] = [
+      {
+        role: "toolResult",
+        toolCallId: "ok",
+        toolName: "exec",
+        isError: false,
+        content: [{ type: "text", text: "ok" }],
+        timestamp: Date.now(),
+      },
+    ];
+
+    const failures = collectToolFailures(messages);
+    const section = formatToolFailuresSection(failures);
+    expect(section).toBe("");
+  });
+});
--- a/src/agents/pi-extensions/compaction-safeguard.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.ts
@@ -10,6 +10,101 @@ const FALLBACK_SUMMARY =
 const TURN_PREFIX_INSTRUCTIONS =
  "This summary covers the prefix of a split turn. Focus on the original request," +
  " early progress, and any details needed to understand the retained suffix.";
+const MAX_TOOL_FAILURES = 8;
+const MAX_TOOL_FAILURE_CHARS = 240;
+
+type ToolFailure = {
+  toolCallId: string;
+  toolName: string;
+  summary: string;
+  meta?: string;
+};
+
+function normalizeFailureText(text: string): string {
+  return text.replace(/\s+/g, " ").trim();
+}
+
+function truncateFailureText(text: string, maxChars: number): string {
+  if (text.length <= maxChars) return text;
+  return `${text.slice(0, Math.max(0, maxChars - 3))}...`;
+}
+
+function formatToolFailureMeta(details: unknown): string | undefined {
+  if (!details || typeof details !== "object") return undefined;
+  const record = details as Record<string, unknown>;
+  const status = typeof record.status === "string" ? record.status : undefined;
+  const exitCode =
+    typeof record.exitCode === "number" && Number.isFinite(record.exitCode)
+      ? record.exitCode
+      : undefined;
+  const parts: string[] = [];
+  if (status) parts.push(`status=${status}`);
+  if (exitCode !== undefined) parts.push(`exitCode=${exitCode}`);
+  return parts.length > 0 ? parts.join(" ") : undefined;
+}
+
+function extractToolResultText(content: unknown): string {
+  if (!Array.isArray(content)) return "";
+  const parts: string[] = [];
+  for (const block of content) {
+    if (!block || typeof block !== "object") continue;
+    const rec = block as { type?: unknown; text?: unknown };
+    if (rec.type === "text" && typeof rec.text === "string") {
+      parts.push(rec.text);
+    }
+  }
+  return parts.join("\n");
+}
+
+function collectToolFailures(messages: AgentMessage[]): ToolFailure[] {
+  const failures: ToolFailure[] = [];
+  const seen = new Set<string>();
+
+  for (const message of messages) {
+    if (!message || typeof message !== "object") continue;
+    const role = (message as { role?: unknown }).role;
+    if (role !== "toolResult") continue;
+    const toolResult = message as {
+      toolCallId?: unknown;
+      toolName?: unknown;
+      content?: unknown;
+      details?: unknown;
+      isError?: unknown;
+    };
+    if (toolResult.isError !== true) continue;
+    const toolCallId =
+      typeof toolResult.toolCallId === "string" ? toolResult.toolCallId : "";
+    if (!toolCallId || seen.has(toolCallId)) continue;
+    seen.add(toolCallId);
+
+    const toolName =
+      typeof toolResult.toolName === "string" && toolResult.toolName.trim()
+        ? toolResult.toolName
+        : "tool";
+    const rawText = extractToolResultText(toolResult.content);
+    const meta = formatToolFailureMeta(toolResult.details);
+    const normalized = normalizeFailureText(rawText);
+    const summary = truncateFailureText(
+      normalized || (meta ? "failed" : "failed (no output)"),
+      MAX_TOOL_FAILURE_CHARS,
+    );
+    failures.push({ toolCallId, toolName, summary, meta });
+  }
+
+  return failures;
+}
+
+function formatToolFailuresSection(failures: ToolFailure[]): string {
+  if (failures.length === 0) return "";
+  const lines = failures.slice(0, MAX_TOOL_FAILURES).map((failure) => {
+    const meta = failure.meta ? ` (${failure.meta})` : "";
+    return `- ${failure.toolName}${meta}: ${failure.summary}`;
+  });
+  if (failures.length > MAX_TOOL_FAILURES) {
+    lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`);
+  }
+  return `\n\n## Tool Failures\n${lines.join("\n")}`;
+}

 function computeFileLists(fileOps: FileOperations): {
  readFiles: string[];
@@ -103,7 +198,12 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
    const { preparation, customInstructions, signal } = event;
    const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps);
    const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles);
-    const fallbackSummary = `${FALLBACK_SUMMARY}${fileOpsSummary}`;
+    const toolFailures = collectToolFailures([
+      ...preparation.messagesToSummarize,
+      ...preparation.turnPrefixMessages,
+    ]);
+    const toolFailureSection = formatToolFailuresSection(toolFailures);
+    const fallbackSummary = `${FALLBACK_SUMMARY}${toolFailureSection}${fileOpsSummary}`;

    const model = ctx.model;
    if (!model) {
@@ -162,6 +262,7 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
        summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
      }

+      summary += toolFailureSection;
      summary += fileOpsSummary;

      return {
@@ -189,3 +290,8 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
    }
  });
 }
+
+export const __testing = {
+  collectToolFailures,
+  formatToolFailuresSection,
+} as const;
--- a/src/config/config.compaction-settings.test.ts
+++ b/src/config/config.compaction-settings.test.ts
@@ -45,4 +45,35 @@ describe("config compaction settings", () => {
      expect(cfg.agents?.defaults?.compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now.");
    });
  });
+
+  it("defaults compaction mode to safeguard", async () => {
+    await withTempHome(async (home) => {
+      const configDir = path.join(home, ".clawdbot");
+      await fs.mkdir(configDir, { recursive: true });
+      await fs.writeFile(
+        path.join(configDir, "clawdbot.json"),
+        JSON.stringify(
+          {
+            agents: {
+              defaults: {
+                compaction: {
+                  reserveTokensFloor: 9000,
+                },
+              },
+            },
+          },
+          null,
+          2,
+        ),
+        "utf-8",
+      );
+
+      vi.resetModules();
+      const { loadConfig } = await import("./config.js");
+      const cfg = loadConfig();
+
+      expect(cfg.agents?.defaults?.compaction?.mode).toBe("safeguard");
+      expect(cfg.agents?.defaults?.compaction?.reserveTokensFloor).toBe(9000);
+    });
+  });
 });
--- a/src/config/defaults.ts
+++ b/src/config/defaults.ts
@@ -139,6 +139,27 @@ export function applyContextPruningDefaults(cfg: ClawdbotConfig): ClawdbotConfig
  };
 }

+export function applyCompactionDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
+  const defaults = cfg.agents?.defaults;
+  if (!defaults) return cfg;
+  const compaction = defaults?.compaction;
+  if (compaction?.mode) return cfg;
+
+  return {
+    ...cfg,
+    agents: {
+      ...cfg.agents,
+      defaults: {
+        ...defaults,
+        compaction: {
+          ...compaction,
+          mode: "safeguard",
+        },
+      },
+    },
+  };
+}
+
 export function resetSessionDefaultsWarningForTests() {
  defaultWarnState = { warned: false };
 }
--- a/src/config/io.ts
+++ b/src/config/io.ts
@@ -13,6 +13,7 @@ import {
 } from "../infra/shell-env.js";
 import { DuplicateAgentDirError, findDuplicateAgentDirs } from "./agent-dirs.js";
 import {
+  applyCompactionDefaults,
  applyContextPruningDefaults,
  applyLoggingDefaults,
  applyMessageDefaults,
@@ -334,9 +335,11 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
        }
      }
      const cfg = applyModelDefaults(
-        applyContextPruningDefaults(
-          applySessionDefaults(
-            applyLoggingDefaults(applyMessageDefaults(validated.data as ClawdbotConfig)),
+        applyCompactionDefaults(
+          applyContextPruningDefaults(
+            applySessionDefaults(
+              applyLoggingDefaults(applyMessageDefaults(validated.data as ClawdbotConfig)),
+            ),
          ),
        ),
      );
@@ -380,7 +383,9 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
      const hash = hashConfigRaw(null);
      const config = applyTalkApiKey(
        applyModelDefaults(
-          applyContextPruningDefaults(applySessionDefaults(applyMessageDefaults({}))),
+          applyCompactionDefaults(
+            applyContextPruningDefaults(applySessionDefaults(applyMessageDefaults({}))),
+          ),
        ),
      );
      const legacyIssues: LegacyConfigIssue[] = [];