Agents: safeguard compaction summarization

2026-01-10 18:34:40 -06:00
parent 42ff634a9d
commit a96d299971
8 changed files with 248 additions and 0 deletions
--- a/docs/compaction.md
+++ b/docs/compaction.md
@@ -0,0 +1,40 @@
+---
+summary: "Compaction modes and configuration"
+read_when:
+  - You want to configure compaction summarization behavior
+  - You are tuning compaction settings in clawdbot.json
+---
+# Compaction
+
+Compaction summarizes older session history so the conversation stays within the model context window. The summary is stored in the session JSONL history and combined with the most recent messages.
+
+## Modes
+
+`agents.defaults.compaction.mode` controls how summaries are generated.
+
+- `default` (default): use the built-in compaction summarizer.
+- `safeguard`: uses a chunked summarization pass to avoid context overflow for very long histories. If chunked summarization fails, Clawdbot falls back to a minimal summary plus file-operation metadata.
+
+## Configuration
+
+```json5
+{
+  agents: {
+    defaults: {
+      compaction: {
+        mode: "safeguard",
+        reserveTokensFloor: 20000,
+        memoryFlush: {
+          enabled: true,
+          softThresholdTokens: 4000
+        }
+      }
+    }
+  }
+}
+```
+
+## Related docs
+
+- [Context window + compaction behavior](/concepts/compaction)
+- [Gateway configuration reference](/gateway/configuration)
--- a/docs/concepts/compaction.md
+++ b/docs/concepts/compaction.md
@@ -15,6 +15,9 @@ Compaction **summarizes older conversation** into a compact summary entry and ke

 Compaction **persists** in the session’s JSONL history.

+## Configuration
+See [Compaction config & modes](/compaction) for the `agents.defaults.compaction` settings.
+
 ## Auto-compaction (default on)
 When a session nears or exceeds the model’s context window, Clawdbot triggers auto-compaction and may retry the original request using the compacted context.

--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@@ -1392,6 +1392,8 @@ See [/concepts/session-pruning](/concepts/session-pruning) for behavior details.

 #### `agents.defaults.compaction` (reserve headroom + memory flush)

+`agents.defaults.compaction.mode` selects the compaction summarization strategy. Defaults to `default`; set `safeguard` to enable chunked summarization for very long histories. See [/compaction](/compaction).
+
 `agents.defaults.compaction.reserveTokensFloor` enforces a minimum `reserveTokens`
 value for Pi compaction (default: `20000`). Set it to `0` to disable the floor.

@@ -1413,6 +1415,7 @@ Example (tuned):
  agents: {
    defaults: {
      compaction: {
+        mode: "safeguard",
        reserveTokensFloor: 24000,
        memoryFlush: {
          enabled: true,
--- a/src/agents/pi-embedded-runner.ts
+++ b/src/agents/pi-embedded-runner.ts
@@ -321,6 +321,12 @@ function buildContextPruningExtension(params: {
  };
 }

+function resolveCompactionMode(cfg?: ClawdbotConfig): "default" | "safeguard" {
+  return cfg?.agents?.defaults?.compaction?.mode === "safeguard"
+    ? "safeguard"
+    : "default";
+}
+
 function buildEmbeddedExtensionPaths(params: {
  cfg: ClawdbotConfig | undefined;
  sessionManager: SessionManager;
@@ -329,6 +335,9 @@ function buildEmbeddedExtensionPaths(params: {
  model: Model<Api> | undefined;
 }): string[] {
  const paths = [resolvePiExtensionPath("transcript-sanitize")];
+  if (resolveCompactionMode(params.cfg) === "safeguard") {
+    paths.push(resolvePiExtensionPath("compaction-safeguard"));
+  }
  const pruning = buildContextPruningExtension(params);
  if (pruning.additionalExtensionPaths) {
    paths.push(...pruning.additionalExtensionPaths);
--- a/src/agents/pi-extensions/compaction-safeguard.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.ts
@@ -0,0 +1,186 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type {
+  ExtensionAPI,
+  ExtensionContext,
+} from "@mariozechner/pi-coding-agent";
+import {
+  computeFileLists,
+  formatFileOperations,
+  generateSummary,
+  estimateTokens,
+} from "@mariozechner/pi-coding-agent";
+
+import { DEFAULT_CONTEXT_TOKENS } from "../defaults.js";
+
+const MAX_CHUNK_RATIO = 0.4;
+const FALLBACK_SUMMARY =
+  "Summary unavailable due to context limits. Older messages were truncated.";
+const TURN_PREFIX_INSTRUCTIONS =
+  "This summary covers the prefix of a split turn. Focus on the original request,"
+  + " early progress, and any details needed to understand the retained suffix.";
+
+function chunkMessages(messages: AgentMessage[], maxTokens: number): AgentMessage[][] {
+  if (messages.length === 0) return [];
+
+  const chunks: AgentMessage[][] = [];
+  let currentChunk: AgentMessage[] = [];
+  let currentTokens = 0;
+
+  for (const message of messages) {
+    const messageTokens = estimateTokens(message);
+    if (
+      currentChunk.length > 0 &&
+      currentTokens + messageTokens > maxTokens
+    ) {
+      chunks.push(currentChunk);
+      currentChunk = [];
+      currentTokens = 0;
+    }
+
+    currentChunk.push(message);
+    currentTokens += messageTokens;
+
+    if (messageTokens > maxTokens) {
+      // Split oversized messages to avoid unbounded chunk growth.
+      chunks.push(currentChunk);
+      currentChunk = [];
+      currentTokens = 0;
+    }
+  }
+
+  if (currentChunk.length > 0) {
+    chunks.push(currentChunk);
+  }
+
+  return chunks;
+}
+
+async function summarizeChunks(params: {
+  messages: AgentMessage[];
+  model: NonNullable<ExtensionContext["model"]>;
+  apiKey: string;
+  signal: AbortSignal;
+  reserveTokens: number;
+  maxChunkTokens: number;
+  customInstructions?: string;
+  previousSummary?: string;
+}): Promise<string> {
+  if (params.messages.length === 0) {
+    return params.previousSummary ?? "No prior history.";
+  }
+
+  const chunks = chunkMessages(params.messages, params.maxChunkTokens);
+  let summary = params.previousSummary;
+
+  for (const chunk of chunks) {
+    summary = await generateSummary(
+      chunk,
+      params.model,
+      params.reserveTokens,
+      params.apiKey,
+      params.signal,
+      params.customInstructions,
+      summary,
+    );
+  }
+
+  return summary ?? "No prior history.";
+}
+
+export default function compactionSafeguardExtension(api: ExtensionAPI): void {
+  api.on("session_before_compact", async (event, ctx) => {
+    const { preparation, customInstructions, signal } = event;
+    const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps);
+    const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles);
+    const fallbackSummary = `${FALLBACK_SUMMARY}${fileOpsSummary}`;
+
+    const model = ctx.model;
+    if (!model) {
+      return {
+        compaction: {
+          summary: fallbackSummary,
+          firstKeptEntryId: preparation.firstKeptEntryId,
+          tokensBefore: preparation.tokensBefore,
+          details: { readFiles, modifiedFiles },
+        },
+      };
+    }
+
+    const apiKey = await ctx.modelRegistry.getApiKey(model);
+    if (!apiKey) {
+      return {
+        compaction: {
+          summary: fallbackSummary,
+          firstKeptEntryId: preparation.firstKeptEntryId,
+          tokensBefore: preparation.tokensBefore,
+          details: { readFiles, modifiedFiles },
+        },
+      };
+    }
+
+    try {
+      const contextWindowTokens = Math.max(
+        1,
+        Math.floor(model.contextWindow ?? DEFAULT_CONTEXT_TOKENS),
+      );
+      const maxChunkTokens = Math.max(
+        1,
+        Math.floor(contextWindowTokens * MAX_CHUNK_RATIO),
+      );
+      const reserveTokens = Math.max(
+        1,
+        Math.floor(preparation.settings.reserveTokens),
+      );
+
+      const historySummary = await summarizeChunks({
+        messages: preparation.messagesToSummarize,
+        model,
+        apiKey,
+        signal,
+        reserveTokens,
+        maxChunkTokens,
+        customInstructions,
+        previousSummary: preparation.previousSummary,
+      });
+
+      let summary = historySummary;
+      if (preparation.isSplitTurn && preparation.turnPrefixMessages.length > 0) {
+        const prefixSummary = await summarizeChunks({
+          messages: preparation.turnPrefixMessages,
+          model,
+          apiKey,
+          signal,
+          reserveTokens,
+          maxChunkTokens,
+          customInstructions: TURN_PREFIX_INSTRUCTIONS,
+        });
+        summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
+      }
+
+      summary += fileOpsSummary;
+
+      return {
+        compaction: {
+          summary,
+          firstKeptEntryId: preparation.firstKeptEntryId,
+          tokensBefore: preparation.tokensBefore,
+          details: { readFiles, modifiedFiles },
+        },
+      };
+    } catch (error) {
+      console.warn(
+        `Compaction summarization failed; truncating history: ${
+          error instanceof Error ? error.message : String(error)
+        }`,
+      );
+      return {
+        compaction: {
+          summary: fallbackSummary,
+          firstKeptEntryId: preparation.firstKeptEntryId,
+          tokensBefore: preparation.tokensBefore,
+          details: { readFiles, modifiedFiles },
+        },
+      };
+    }
+  });
+}
--- a/src/config/config.test.ts
+++ b/src/config/config.test.ts
@@ -522,6 +522,7 @@ describe("config compaction settings", () => {
            agents: {
              defaults: {
                compaction: {
+                  mode: "safeguard",
                  reserveTokensFloor: 12_345,
                  memoryFlush: {
                    enabled: false,
@@ -544,6 +545,7 @@ describe("config compaction settings", () => {
      const cfg = loadConfig();

      expect(cfg.agents?.defaults?.compaction?.reserveTokensFloor).toBe(12_345);
+      expect(cfg.agents?.defaults?.compaction?.mode).toBe("safeguard");
      expect(cfg.agents?.defaults?.compaction?.memoryFlush?.enabled).toBe(
        false,
      );
--- a/src/config/types.ts
+++ b/src/config/types.ts
@@ -1716,7 +1716,11 @@ export type AgentDefaultsConfig = {
  };
 };

+export type AgentCompactionMode = "default" | "safeguard";
+
 export type AgentCompactionConfig = {
+  /** Compaction summarization mode. */
+  mode?: AgentCompactionMode;
  /** Minimum reserve tokens enforced for Pi compaction (0 disables the floor). */
  reserveTokensFloor?: number;
  /** Pre-compaction memory flush (agentic turn). Default: enabled. */
--- a/src/config/zod-schema.ts
+++ b/src/config/zod-schema.ts
@@ -1210,6 +1210,7 @@ const AgentDefaultsSchema = z
      .optional(),
    compaction: z
      .object({
+        mode: z.union([z.literal("default"), z.literal("safeguard")]).optional(),
        reserveTokensFloor: z.number().int().nonnegative().optional(),
        memoryFlush: z
          .object({