fix: add safeguard compaction tool summaries

2026-01-19 01:44:08 +00:00
parent af1004ebbd
commit dd1b08b3e8
6 changed files with 271 additions and 5 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,12 @@ Docs: https://docs.clawd.bot
 ### Fixes
 - Plugins: surface plugin load/register/config errors in gateway logs with plugin/source context.
 <<<<<<< Updated upstream
 - Agents: propagate accountId into embedded runs so sub-agent announce routing honors the originating account. (#1058)
 ||||||| Stash base
 =======
 - Compaction: include tool failure summaries in safeguard compaction to prevent retry loops. (#1084)
 >>>>>>> Stashed changes
 ## 2026.1.18-5
--- a/src/agents/pi-extensions/compaction-safeguard.test.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.test.ts
@@ -0,0 +1,98 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import { describe, expect, it } from "vitest";
 import { __testing } from "./compaction-safeguard.js";
 const { collectToolFailures, formatToolFailuresSection } = __testing;
 describe("compaction-safeguard tool failures", () => {
  it("formats tool failures with meta and summary", () => {
    const messages: AgentMessage[] = [
      {
        role: "toolResult",
        toolCallId: "call-1",
        toolName: "exec",
        isError: true,
        details: { status: "failed", exitCode: 1 },
        content: [{ type: "text", text: "ENOENT: missing file" }],
        timestamp: Date.now(),
      },
      {
        role: "toolResult",
        toolCallId: "call-2",
        toolName: "read",
        isError: false,
        content: [{ type: "text", text: "ok" }],
        timestamp: Date.now(),
      },
    ];
    const failures = collectToolFailures(messages);
    expect(failures).toHaveLength(1);
    const section = formatToolFailuresSection(failures);
    expect(section).toContain("## Tool Failures");
    expect(section).toContain("exec (status=failed exitCode=1): ENOENT: missing file");
  });
  it("dedupes by toolCallId and handles empty output", () => {
    const messages: AgentMessage[] = [
      {
        role: "toolResult",
        toolCallId: "call-1",
        toolName: "exec",
        isError: true,
        details: { exitCode: 2 },
        content: [],
        timestamp: Date.now(),
      },
      {
        role: "toolResult",
        toolCallId: "call-1",
        toolName: "exec",
        isError: true,
        content: [{ type: "text", text: "ignored" }],
        timestamp: Date.now(),
      },
    ];
    const failures = collectToolFailures(messages);
    expect(failures).toHaveLength(1);
    const section = formatToolFailuresSection(failures);
    expect(section).toContain("exec (exitCode=2): failed");
  });
  it("caps the number of failures and adds overflow line", () => {
    const messages: AgentMessage[] = Array.from({ length: 9 }, (_, idx) => ({
      role: "toolResult",
      toolCallId: `call-${idx}`,
      toolName: "exec",
      isError: true,
      content: [{ type: "text", text: `error ${idx}` }],
      timestamp: Date.now(),
    }));
    const failures = collectToolFailures(messages);
    const section = formatToolFailuresSection(failures);
    expect(section).toContain("## Tool Failures");
    expect(section).toContain("...and 1 more");
  });
  it("omits section when there are no tool failures", () => {
    const messages: AgentMessage[] = [
      {
        role: "toolResult",
        toolCallId: "ok",
        toolName: "exec",
        isError: false,
        content: [{ type: "text", text: "ok" }],
        timestamp: Date.now(),
      },
    ];
    const failures = collectToolFailures(messages);
    const section = formatToolFailuresSection(failures);
    expect(section).toBe("");
  });
 });
--- a/src/agents/pi-extensions/compaction-safeguard.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.ts
@@ -10,6 +10,101 @@ const FALLBACK_SUMMARY =
 const TURN_PREFIX_INSTRUCTIONS =
  "This summary covers the prefix of a split turn. Focus on the original request," +
  " early progress, and any details needed to understand the retained suffix.";
 const MAX_TOOL_FAILURES = 8;
 const MAX_TOOL_FAILURE_CHARS = 240;
 type ToolFailure = {
  toolCallId: string;
  toolName: string;
  summary: string;
  meta?: string;
 };
 function normalizeFailureText(text: string): string {
  return text.replace(/\s+/g, " ").trim();
 }
 function truncateFailureText(text: string, maxChars: number): string {
  if (text.length <= maxChars) return text;
  return `${text.slice(0, Math.max(0, maxChars - 3))}...`;
 }
 function formatToolFailureMeta(details: unknown): string | undefined {
  if (!details || typeof details !== "object") return undefined;
  const record = details as Record<string, unknown>;
  const status = typeof record.status === "string" ? record.status : undefined;
  const exitCode =
    typeof record.exitCode === "number" && Number.isFinite(record.exitCode)
      ? record.exitCode
      : undefined;
  const parts: string[] = [];
  if (status) parts.push(`status=${status}`);
  if (exitCode !== undefined) parts.push(`exitCode=${exitCode}`);
  return parts.length > 0 ? parts.join(" ") : undefined;
 }
 function extractToolResultText(content: unknown): string {
  if (!Array.isArray(content)) return "";
  const parts: string[] = [];
  for (const block of content) {
    if (!block || typeof block !== "object") continue;
    const rec = block as { type?: unknown; text?: unknown };
    if (rec.type === "text" && typeof rec.text === "string") {
      parts.push(rec.text);
    }
  }
  return parts.join("\n");
 }
 function collectToolFailures(messages: AgentMessage[]): ToolFailure[] {
  const failures: ToolFailure[] = [];
  const seen = new Set<string>();
  for (const message of messages) {
    if (!message || typeof message !== "object") continue;
    const role = (message as { role?: unknown }).role;
    if (role !== "toolResult") continue;
    const toolResult = message as {
      toolCallId?: unknown;
      toolName?: unknown;
      content?: unknown;
      details?: unknown;
      isError?: unknown;
    };
    if (toolResult.isError !== true) continue;
    const toolCallId =
      typeof toolResult.toolCallId === "string" ? toolResult.toolCallId : "";
    if (!toolCallId || seen.has(toolCallId)) continue;
    seen.add(toolCallId);
    const toolName =
      typeof toolResult.toolName === "string" && toolResult.toolName.trim()
        ? toolResult.toolName
        : "tool";
    const rawText = extractToolResultText(toolResult.content);
    const meta = formatToolFailureMeta(toolResult.details);
    const normalized = normalizeFailureText(rawText);
    const summary = truncateFailureText(
      normalized || (meta ? "failed" : "failed (no output)"),
      MAX_TOOL_FAILURE_CHARS,
    );
    failures.push({ toolCallId, toolName, summary, meta });
  }
  return failures;
 }
 function formatToolFailuresSection(failures: ToolFailure[]): string {
  if (failures.length === 0) return "";
  const lines = failures.slice(0, MAX_TOOL_FAILURES).map((failure) => {
    const meta = failure.meta ? ` (${failure.meta})` : "";
    return `- ${failure.toolName}${meta}: ${failure.summary}`;
  });
  if (failures.length > MAX_TOOL_FAILURES) {
    lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`);
  }
  return `\n\n## Tool Failures\n${lines.join("\n")}`;
 }
 function computeFileLists(fileOps: FileOperations): {
  readFiles: string[];
@@ -103,7 +198,12 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
    const { preparation, customInstructions, signal } = event;
    const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps);
    const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles);
-    const fallbackSummary = `${FALLBACK_SUMMARY}${fileOpsSummary}`;
+    const toolFailures = collectToolFailures([
      ...preparation.messagesToSummarize,
      ...preparation.turnPrefixMessages,
    ]);
    const toolFailureSection = formatToolFailuresSection(toolFailures);
    const fallbackSummary = `${FALLBACK_SUMMARY}${toolFailureSection}${fileOpsSummary}`;
    const model = ctx.model;
    if (!model) {
@@ -162,6 +262,7 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
        summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
      }
      summary += toolFailureSection;
      summary += fileOpsSummary;
      return {
@@ -189,3 +290,8 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
    }
  });
 }
 export const __testing = {
  collectToolFailures,
  formatToolFailuresSection,
 } as const;
--- a/src/config/config.compaction-settings.test.ts
+++ b/src/config/config.compaction-settings.test.ts
@@ -45,4 +45,35 @@ describe("config compaction settings", () => {
      expect(cfg.agents?.defaults?.compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now.");
    });
  });
  it("defaults compaction mode to safeguard", async () => {
    await withTempHome(async (home) => {
      const configDir = path.join(home, ".clawdbot");
      await fs.mkdir(configDir, { recursive: true });
      await fs.writeFile(
        path.join(configDir, "clawdbot.json"),
        JSON.stringify(
          {
            agents: {
              defaults: {
                compaction: {
                  reserveTokensFloor: 9000,
                },
              },
            },
          },
          null,
          2,
        ),
        "utf-8",
      );
      vi.resetModules();
      const { loadConfig } = await import("./config.js");
      const cfg = loadConfig();
      expect(cfg.agents?.defaults?.compaction?.mode).toBe("safeguard");
      expect(cfg.agents?.defaults?.compaction?.reserveTokensFloor).toBe(9000);
    });
  });
 });
--- a/src/config/defaults.ts
+++ b/src/config/defaults.ts
@@ -139,6 +139,27 @@ export function applyContextPruningDefaults(cfg: ClawdbotConfig): ClawdbotConfig
  };
 }
 export function applyCompactionDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
  const defaults = cfg.agents?.defaults;
  if (!defaults) return cfg;
  const compaction = defaults?.compaction;
  if (compaction?.mode) return cfg;
  return {
    ...cfg,
    agents: {
      ...cfg.agents,
      defaults: {
        ...defaults,
        compaction: {
          ...compaction,
          mode: "safeguard",
        },
      },
    },
  };
 }
 export function resetSessionDefaultsWarningForTests() {
  defaultWarnState = { warned: false };
 }
--- a/src/config/io.ts
+++ b/src/config/io.ts
@@ -13,6 +13,7 @@ import {
 } from "../infra/shell-env.js";
 import { DuplicateAgentDirError, findDuplicateAgentDirs } from "./agent-dirs.js";
 import {
  applyCompactionDefaults,
  applyContextPruningDefaults,
  applyLoggingDefaults,
  applyMessageDefaults,
@@ -334,9 +335,11 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
        }
      }
      const cfg = applyModelDefaults(
-        applyContextPruningDefaults(
+        applyCompactionDefaults(
-          applySessionDefaults(
+          applyContextPruningDefaults(
-            applyLoggingDefaults(applyMessageDefaults(validated.data as ClawdbotConfig)),
+            applySessionDefaults(
              applyLoggingDefaults(applyMessageDefaults(validated.data as ClawdbotConfig)),
            ),
          ),
        ),
      );
@@ -380,7 +383,9 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
      const hash = hashConfigRaw(null);
      const config = applyTalkApiKey(
        applyModelDefaults(
-          applyContextPruningDefaults(applySessionDefaults(applyMessageDefaults({}))),
+          applyCompactionDefaults(
            applyContextPruningDefaults(applySessionDefaults(applyMessageDefaults({}))),
          ),
        ),
      );
      const legacyIssues: LegacyConfigIssue[] = [];