diff --git a/CHANGELOG.md b/CHANGELOG.md index 06abafdd7..4727b6f0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Changes - Cron: accept ISO timestamps for one-shot schedules (UTC) and allow optional delete-after-run; wired into CLI + macOS editor. - Gateway: add Tailscale binary discovery, custom bind mode, and probe auth retry for password changes. (#740 — thanks @jeffersonwarrior) +- Agents: add compaction mode config with optional safeguard summarization for long histories. (#700 — thanks @thewilloftheshadow) ### Fixes - Gateway: honor `CLAWDBOT_LAUNCHD_LABEL` / `CLAWDBOT_SYSTEMD_UNIT` overrides when checking or restarting the daemon. diff --git a/docs/compaction.md b/docs/compaction.md new file mode 100644 index 000000000..c09082de6 --- /dev/null +++ b/docs/compaction.md @@ -0,0 +1,40 @@ +--- +summary: "Compaction modes and configuration" +read_when: + - You want to configure compaction summarization behavior + - You are tuning compaction settings in clawdbot.json +--- +# Compaction + +Compaction summarizes older session history so the conversation stays within the model context window. The summary is stored in the session JSONL history and combined with the most recent messages. + +## Modes + +`agents.defaults.compaction.mode` controls how summaries are generated. + +- `default` (default): use the built-in compaction summarizer. +- `safeguard`: uses a chunked summarization pass to avoid context overflow for very long histories. If chunked summarization fails, Clawdbot falls back to a minimal summary plus file-operation metadata. + +## Configuration + +```json5 +{ + agents: { + defaults: { + compaction: { + mode: "safeguard", + reserveTokensFloor: 20000, + memoryFlush: { + enabled: true, + softThresholdTokens: 4000 + } + } + } + } +} +``` + +## Related docs + +- [Context window + compaction behavior](/concepts/compaction) +- [Gateway configuration reference](/gateway/configuration) diff --git a/docs/concepts/compaction.md b/docs/concepts/compaction.md index b4edc6591..3743bfc7b 100644 --- a/docs/concepts/compaction.md +++ b/docs/concepts/compaction.md @@ -15,6 +15,9 @@ Compaction **summarizes older conversation** into a compact summary entry and ke Compaction **persists** in the session’s JSONL history. +## Configuration +See [Compaction config & modes](/compaction) for the `agents.defaults.compaction` settings. + ## Auto-compaction (default on) When a session nears or exceeds the model’s context window, Clawdbot triggers auto-compaction and may retry the original request using the compacted context. diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 25fe5db92..9101c3fc1 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -1392,6 +1392,8 @@ See [/concepts/session-pruning](/concepts/session-pruning) for behavior details. #### `agents.defaults.compaction` (reserve headroom + memory flush) +`agents.defaults.compaction.mode` selects the compaction summarization strategy. Defaults to `default`; set `safeguard` to enable chunked summarization for very long histories. See [/compaction](/compaction). + `agents.defaults.compaction.reserveTokensFloor` enforces a minimum `reserveTokens` value for Pi compaction (default: `20000`). Set it to `0` to disable the floor. @@ -1413,6 +1415,7 @@ Example (tuned): agents: { defaults: { compaction: { + mode: "safeguard", reserveTokensFloor: 24000, memoryFlush: { enabled: true, diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index fc1407ae0..942a4ab06 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -321,6 +321,12 @@ function buildContextPruningExtension(params: { }; } +function resolveCompactionMode(cfg?: ClawdbotConfig): "default" | "safeguard" { + return cfg?.agents?.defaults?.compaction?.mode === "safeguard" + ? "safeguard" + : "default"; +} + function buildEmbeddedExtensionPaths(params: { cfg: ClawdbotConfig | undefined; sessionManager: SessionManager; @@ -329,6 +335,9 @@ function buildEmbeddedExtensionPaths(params: { model: Model | undefined; }): string[] { const paths = [resolvePiExtensionPath("transcript-sanitize")]; + if (resolveCompactionMode(params.cfg) === "safeguard") { + paths.push(resolvePiExtensionPath("compaction-safeguard")); + } const pruning = buildContextPruningExtension(params); if (pruning.additionalExtensionPaths) { paths.push(...pruning.additionalExtensionPaths); diff --git a/src/agents/pi-extensions/compaction-safeguard.ts b/src/agents/pi-extensions/compaction-safeguard.ts new file mode 100644 index 000000000..4b374c258 --- /dev/null +++ b/src/agents/pi-extensions/compaction-safeguard.ts @@ -0,0 +1,212 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { + ExtensionAPI, + ExtensionContext, + FileOperations, +} from "@mariozechner/pi-coding-agent"; +import { estimateTokens, generateSummary } from "@mariozechner/pi-coding-agent"; + +import { DEFAULT_CONTEXT_TOKENS } from "../defaults.js"; + +const MAX_CHUNK_RATIO = 0.4; +const FALLBACK_SUMMARY = + "Summary unavailable due to context limits. Older messages were truncated."; +const TURN_PREFIX_INSTRUCTIONS = + "This summary covers the prefix of a split turn. Focus on the original request," + + " early progress, and any details needed to understand the retained suffix."; + +function computeFileLists(fileOps: FileOperations): { + readFiles: string[]; + modifiedFiles: string[]; +} { + const modified = new Set([...fileOps.edited, ...fileOps.written]); + const readFiles = [...fileOps.read].filter((f) => !modified.has(f)).sort(); + const modifiedFiles = [...modified].sort(); + return { readFiles, modifiedFiles }; +} + +function formatFileOperations( + readFiles: string[], + modifiedFiles: string[], +): string { + const sections: string[] = []; + if (readFiles.length > 0) { + sections.push(`\n${readFiles.join("\n")}\n`); + } + if (modifiedFiles.length > 0) { + sections.push( + `\n${modifiedFiles.join("\n")}\n`, + ); + } + if (sections.length === 0) return ""; + return `\n\n${sections.join("\n\n")}`; +} + +function chunkMessages( + messages: AgentMessage[], + maxTokens: number, +): AgentMessage[][] { + if (messages.length === 0) return []; + + const chunks: AgentMessage[][] = []; + let currentChunk: AgentMessage[] = []; + let currentTokens = 0; + + for (const message of messages) { + const messageTokens = estimateTokens(message); + if (currentChunk.length > 0 && currentTokens + messageTokens > maxTokens) { + chunks.push(currentChunk); + currentChunk = []; + currentTokens = 0; + } + + currentChunk.push(message); + currentTokens += messageTokens; + + if (messageTokens > maxTokens) { + // Split oversized messages to avoid unbounded chunk growth. + chunks.push(currentChunk); + currentChunk = []; + currentTokens = 0; + } + } + + if (currentChunk.length > 0) { + chunks.push(currentChunk); + } + + return chunks; +} + +async function summarizeChunks(params: { + messages: AgentMessage[]; + model: NonNullable; + apiKey: string; + signal: AbortSignal; + reserveTokens: number; + maxChunkTokens: number; + customInstructions?: string; + previousSummary?: string; +}): Promise { + if (params.messages.length === 0) { + return params.previousSummary ?? "No prior history."; + } + + const chunks = chunkMessages(params.messages, params.maxChunkTokens); + let summary = params.previousSummary; + + for (const chunk of chunks) { + summary = await generateSummary( + chunk, + params.model, + params.reserveTokens, + params.apiKey, + params.signal, + params.customInstructions, + summary, + ); + } + + return summary ?? "No prior history."; +} + +export default function compactionSafeguardExtension(api: ExtensionAPI): void { + api.on("session_before_compact", async (event, ctx) => { + const { preparation, customInstructions, signal } = event; + const { readFiles, modifiedFiles } = computeFileLists(preparation.fileOps); + const fileOpsSummary = formatFileOperations(readFiles, modifiedFiles); + const fallbackSummary = `${FALLBACK_SUMMARY}${fileOpsSummary}`; + + const model = ctx.model; + if (!model) { + return { + compaction: { + summary: fallbackSummary, + firstKeptEntryId: preparation.firstKeptEntryId, + tokensBefore: preparation.tokensBefore, + details: { readFiles, modifiedFiles }, + }, + }; + } + + const apiKey = await ctx.modelRegistry.getApiKey(model); + if (!apiKey) { + return { + compaction: { + summary: fallbackSummary, + firstKeptEntryId: preparation.firstKeptEntryId, + tokensBefore: preparation.tokensBefore, + details: { readFiles, modifiedFiles }, + }, + }; + } + + try { + const contextWindowTokens = Math.max( + 1, + Math.floor(model.contextWindow ?? DEFAULT_CONTEXT_TOKENS), + ); + const maxChunkTokens = Math.max( + 1, + Math.floor(contextWindowTokens * MAX_CHUNK_RATIO), + ); + const reserveTokens = Math.max( + 1, + Math.floor(preparation.settings.reserveTokens), + ); + + const historySummary = await summarizeChunks({ + messages: preparation.messagesToSummarize, + model, + apiKey, + signal, + reserveTokens, + maxChunkTokens, + customInstructions, + previousSummary: preparation.previousSummary, + }); + + let summary = historySummary; + if ( + preparation.isSplitTurn && + preparation.turnPrefixMessages.length > 0 + ) { + const prefixSummary = await summarizeChunks({ + messages: preparation.turnPrefixMessages, + model, + apiKey, + signal, + reserveTokens, + maxChunkTokens, + customInstructions: TURN_PREFIX_INSTRUCTIONS, + }); + summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`; + } + + summary += fileOpsSummary; + + return { + compaction: { + summary, + firstKeptEntryId: preparation.firstKeptEntryId, + tokensBefore: preparation.tokensBefore, + details: { readFiles, modifiedFiles }, + }, + }; + } catch (error) { + console.warn( + `Compaction summarization failed; truncating history: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + return { + compaction: { + summary: fallbackSummary, + firstKeptEntryId: preparation.firstKeptEntryId, + tokensBefore: preparation.tokensBefore, + details: { readFiles, modifiedFiles }, + }, + }; + } + }); +} diff --git a/src/config/config.test.ts b/src/config/config.test.ts index 35932cfa8..a42e08cf4 100644 --- a/src/config/config.test.ts +++ b/src/config/config.test.ts @@ -522,6 +522,7 @@ describe("config compaction settings", () => { agents: { defaults: { compaction: { + mode: "safeguard", reserveTokensFloor: 12_345, memoryFlush: { enabled: false, @@ -544,6 +545,7 @@ describe("config compaction settings", () => { const cfg = loadConfig(); expect(cfg.agents?.defaults?.compaction?.reserveTokensFloor).toBe(12_345); + expect(cfg.agents?.defaults?.compaction?.mode).toBe("safeguard"); expect(cfg.agents?.defaults?.compaction?.memoryFlush?.enabled).toBe( false, ); diff --git a/src/config/types.ts b/src/config/types.ts index 9869b0ab4..133f4aad1 100644 --- a/src/config/types.ts +++ b/src/config/types.ts @@ -1716,7 +1716,11 @@ export type AgentDefaultsConfig = { }; }; +export type AgentCompactionMode = "default" | "safeguard"; + export type AgentCompactionConfig = { + /** Compaction summarization mode. */ + mode?: AgentCompactionMode; /** Minimum reserve tokens enforced for Pi compaction (0 disables the floor). */ reserveTokensFloor?: number; /** Pre-compaction memory flush (agentic turn). Default: enabled. */ diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index d6a020411..cacd7fcfb 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -1210,6 +1210,9 @@ const AgentDefaultsSchema = z .optional(), compaction: z .object({ + mode: z + .union([z.literal("default"), z.literal("safeguard")]) + .optional(), reserveTokensFloor: z.number().int().nonnegative().optional(), memoryFlush: z .object({