From 9f59ff325b66125e348b4d1a038b42ebd4e48216 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 21 Jan 2026 19:44:20 +0000 Subject: [PATCH] feat: add cache-ttl pruning mode --- CHANGELOG.md | 1 + docs/concepts/session-pruning.md | 34 +++++------- docs/gateway/configuration.md | 50 ++++++++---------- docs/providers/anthropic.md | 6 +-- src/agents/pi-embedded-runner/cache-ttl.ts | 52 +++++++++++++++++++ src/agents/pi-embedded-runner/extensions.ts | 5 +- src/agents/pi-embedded-runner/extra-params.ts | 4 +- .../pi-embedded-runner/run/attempt.test.ts | 6 ++- src/agents/pi-embedded-runner/run/attempt.ts | 12 +++++ .../pi-extensions/context-pruning.test.ts | 22 +++++--- .../context-pruning/extension.ts | 11 ++++ .../pi-extensions/context-pruning/pruner.ts | 28 ---------- .../pi-extensions/context-pruning/runtime.ts | 1 + .../pi-extensions/context-pruning/settings.ts | 23 ++++++-- src/config/config.pruning-defaults.test.ts | 4 +- src/config/defaults.ts | 19 +------ src/config/types.agent-defaults.ts | 4 +- src/config/zod-schema.agent-defaults.ts | 5 +- 18 files changed, 164 insertions(+), 123 deletions(-) create mode 100644 src/agents/pi-embedded-runner/cache-ttl.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index a55377fff..5c5b81a8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Docs: https://docs.clawd.bot ## 2026.1.21 ### Changes +- Caching: make tool-result pruning TTL-aware so cache reuse stays stable and token usage drops. - CLI: default exec approvals to the local host, add gateway/node targeting flags, and show target details in allowlist output. - CLI: exec approvals mutations render tables instead of raw JSON. - Exec approvals: support wildcard agent allowlists (`*`) across all agents. diff --git a/docs/concepts/session-pruning.md b/docs/concepts/session-pruning.md index 4e76f9bb4..5e91b9fb8 100644 --- a/docs/concepts/session-pruning.md +++ b/docs/concepts/session-pruning.md @@ -9,8 +9,10 @@ read_when: Session pruning trims **old tool results** from the in-memory context right before each LLM call. It does **not** rewrite the on-disk session history (`*.jsonl`). ## When it runs -- Before each LLM request (context hook). +- When `mode: "cache-ttl"` is enabled and the last Anthropic call for the session is older than `ttl`. - Only affects the messages sent to the model for that request. + - Only active for Anthropic API calls (and OpenRouter Anthropic models). + - For best results, match `ttl` to your model `cacheControlTtl`. ## What can be pruned - Only `toolResult` messages. @@ -26,14 +28,10 @@ Pruning uses an estimated context window (chars ≈ tokens × 4). The window siz 3) `agents.defaults.contextTokens`. 4) Default `200000` tokens. -## Modes -### adaptive -- If estimated context ratio ≥ `softTrimRatio`: soft-trim oversized tool results. -- If still ≥ `hardClearRatio` **and** prunable tool text ≥ `minPrunableToolChars`: hard-clear oldest eligible tool results. - -### aggressive -- Always hard-clears eligible tool results before the cutoff. -- Ignores `hardClear.enabled` (always clears when eligible). +## Mode +### cache-ttl +- Pruning only runs if the last Anthropic call is older than `ttl` (default `5m`). +- When it runs: same soft-trim + hard-clear behavior as before. ## Soft vs hard pruning - **Soft-trim**: only for oversized tool results. @@ -52,6 +50,7 @@ Pruning uses an estimated context window (chars ≈ tokens × 4). The window siz - Compaction is separate: compaction summarizes and persists, pruning is transient per request. See [/concepts/compaction](/concepts/compaction). ## Defaults (when enabled) +- `ttl`: `"5m"` - `keepLastAssistants`: `3` - `softTrimRatio`: `0.3` - `hardClearRatio`: `0.5` @@ -60,16 +59,7 @@ Pruning uses an estimated context window (chars ≈ tokens × 4). The window siz - `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }` ## Examples -Default (adaptive): -```json5 -{ - agent: { - contextPruning: { mode: "adaptive" } - } -} -``` - -To disable: +Default (off): ```json5 { agent: { @@ -78,11 +68,11 @@ To disable: } ``` -Aggressive: +Enable TTL-aware pruning: ```json5 { agent: { - contextPruning: { mode: "aggressive" } + contextPruning: { mode: "cache-ttl", ttl: "5m" } } } ``` @@ -92,7 +82,7 @@ Restrict pruning to specific tools: { agent: { contextPruning: { - mode: "adaptive", + mode: "cache-ttl", tools: { allow: ["exec", "read"], deny: ["*image*"] } } } diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 9eddda3f8..ddce68e79 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -1414,7 +1414,7 @@ Each `agents.defaults.models` entry can include: - `alias` (optional model shortcut, e.g. `/opus`). - `params` (optional provider-specific API params passed through to the model request). -`params` is also applied to streaming runs (embedded agent + compaction). Supported keys today: `temperature`, `maxTokens`, `cacheControlTtl` (`"5m"` or `"1h"`, Anthropic API + OpenRouter Anthropic models only; ignored for Anthropic OAuth/Claude Code tokens). These merge with call-time options; caller-supplied values win. `temperature` is an advanced knob—leave unset unless you know the model’s defaults and need a change. Anthropic API defaults to `"1h"` unless you override (`cacheControlTtl: "5m"`). Clawdbot includes the `extended-cache-ttl-2025-04-11` beta flag for Anthropic API; keep it if you override provider headers. +`params` is also applied to streaming runs (embedded agent + compaction). Supported keys today: `temperature`, `maxTokens`, `cacheControlTtl` (`"5m"`, Anthropic API + OpenRouter Anthropic models only; ignored for Anthropic OAuth/Claude Code tokens). These merge with call-time options; caller-supplied values win. `temperature` is an advanced knob—leave unset unless you know the model’s defaults and need a change. Clawdbot includes the `extended-cache-ttl-2025-04-11` beta flag for Anthropic API; keep it if you override provider headers. Example: @@ -1569,7 +1569,7 @@ Example: } ``` -#### `agents.defaults.contextPruning` (tool-result pruning) +#### `agents.defaults.contextPruning` (TTL-aware tool-result pruning) `agents.defaults.contextPruning` prunes **old tool results** from the in-memory context right before a request is sent to the LLM. It does **not** modify the session history on disk (`*.jsonl` remains complete). @@ -1580,11 +1580,9 @@ High level: - Never touches user/assistant messages. - Protects the last `keepLastAssistants` assistant messages (no tool results after that point are pruned). - Protects the bootstrap prefix (nothing before the first user message is pruned). -- Modes: - - `adaptive`: soft-trims oversized tool results (keep head/tail) when the estimated context ratio crosses `softTrimRatio`. - Then hard-clears the oldest eligible tool results when the estimated context ratio crosses `hardClearRatio` **and** - there’s enough prunable tool-result bulk (`minPrunableToolChars`). - - `aggressive`: always replaces eligible tool results before the cutoff with the `hardClear.placeholder` (no ratio checks). +- Mode: + - `cache-ttl`: pruning only runs when the last Anthropic call for the session is **older** than `ttl`. + When it runs, it uses the same soft-trim + hard-clear behavior as before. Soft vs hard pruning (what changes in the context sent to the LLM): - **Soft-trim**: only for *oversized* tool results. Keeps the beginning + end and inserts `...` in the middle. @@ -1598,44 +1596,40 @@ Notes / current limitations: - Tool results containing **image blocks are skipped** (never trimmed/cleared) right now. - The estimated “context ratio” is based on **characters** (approximate), not exact tokens. - If the session doesn’t contain at least `keepLastAssistants` assistant messages yet, pruning is skipped. -- In `aggressive` mode, `hardClear.enabled` is ignored (eligible tool results are always replaced with `hardClear.placeholder`). +- `cache-ttl` only activates for Anthropic API calls (and OpenRouter Anthropic models). +- For best results, match `contextPruning.ttl` to the model `cacheControlTtl` you set in `agents.defaults.models.*.params`. -Default (adaptive): -```json5 -{ - agents: { defaults: { contextPruning: { mode: "adaptive" } } } -} -``` - -To disable: +Default (off): ```json5 { agents: { defaults: { contextPruning: { mode: "off" } } } } ``` -Defaults (when `mode` is `"adaptive"` or `"aggressive"`): -- `keepLastAssistants`: `3` -- `softTrimRatio`: `0.3` (adaptive only) -- `hardClearRatio`: `0.5` (adaptive only) -- `minPrunableToolChars`: `50000` (adaptive only) -- `softTrim`: `{ maxChars: 4000, headChars: 1500, tailChars: 1500 }` (adaptive only) -- `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }` - -Example (aggressive, minimal): +Enable TTL-aware pruning: ```json5 { - agents: { defaults: { contextPruning: { mode: "aggressive" } } } + agents: { defaults: { contextPruning: { mode: "cache-ttl" } } } } ``` -Example (adaptive tuned): +Defaults (when `mode` is `"cache-ttl"`): +- `ttl`: `"5m"` +- `keepLastAssistants`: `3` +- `softTrimRatio`: `0.3` +- `hardClearRatio`: `0.5` +- `minPrunableToolChars`: `50000` +- `softTrim`: `{ maxChars: 4000, headChars: 1500, tailChars: 1500 }` +- `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }` + +Example (cache-ttl tuned): ```json5 { agents: { defaults: { contextPruning: { - mode: "adaptive", + mode: "cache-ttl", + ttl: "5m", keepLastAssistants: 3, softTrimRatio: 0.3, hardClearRatio: 0.5, diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md index 9c72b990a..80cbe5d8b 100644 --- a/docs/providers/anthropic.md +++ b/docs/providers/anthropic.md @@ -36,10 +36,10 @@ clawdbot onboard --anthropic-api-key "$ANTHROPIC_API_KEY" ## Prompt caching (Anthropic API) -Clawdbot enables **1-hour prompt caching by default** for Anthropic API keys. +Clawdbot does **not** override Anthropic’s default cache TTL unless you set it. This is **API-only**; Claude Code CLI OAuth ignores TTL settings. -To override the TTL per model, set `cacheControlTtl` in the model `params`: +To set the TTL per model, use `cacheControlTtl` in the model `params`: ```json5 { @@ -47,7 +47,7 @@ To override the TTL per model, set `cacheControlTtl` in the model `params`: defaults: { models: { "anthropic/claude-opus-4-5": { - params: { cacheControlTtl: "5m" } // or "1h" + params: { cacheControlTtl: "5m" } } } } diff --git a/src/agents/pi-embedded-runner/cache-ttl.ts b/src/agents/pi-embedded-runner/cache-ttl.ts new file mode 100644 index 000000000..a280653f1 --- /dev/null +++ b/src/agents/pi-embedded-runner/cache-ttl.ts @@ -0,0 +1,52 @@ +type CustomEntryLike = { type?: unknown; customType?: unknown; data?: unknown }; + +export const CACHE_TTL_CUSTOM_TYPE = "clawdbot.cache-ttl"; + +export type CacheTtlEntryData = { + timestamp: number; + provider?: string; + modelId?: string; +}; + +export function isCacheTtlEligibleProvider(provider: string, modelId: string): boolean { + const normalizedProvider = provider.toLowerCase(); + const normalizedModelId = modelId.toLowerCase(); + if (normalizedProvider === "anthropic") return true; + if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/")) + return true; + return false; +} + +export function readLastCacheTtlTimestamp(sessionManager: unknown): number | null { + const sm = sessionManager as { getEntries?: () => CustomEntryLike[] }; + if (!sm?.getEntries) return null; + try { + const entries = sm.getEntries(); + let last: number | null = null; + for (let i = entries.length - 1; i >= 0; i--) { + const entry = entries[i]; + if (entry?.type !== "custom" || entry?.customType !== CACHE_TTL_CUSTOM_TYPE) continue; + const data = entry?.data as Partial | undefined; + const ts = typeof data?.timestamp === "number" ? data.timestamp : null; + if (ts && Number.isFinite(ts)) { + last = ts; + break; + } + } + return last; + } catch { + return null; + } +} + +export function appendCacheTtlTimestamp(sessionManager: unknown, data: CacheTtlEntryData): void { + const sm = sessionManager as { + appendCustomEntry?: (customType: string, data: unknown) => void; + }; + if (!sm?.appendCustomEntry) return; + try { + sm.appendCustomEntry(CACHE_TTL_CUSTOM_TYPE, data); + } catch { + // ignore persistence failures + } +} diff --git a/src/agents/pi-embedded-runner/extensions.ts b/src/agents/pi-embedded-runner/extensions.ts index a7fd27ebb..48d9d22e6 100644 --- a/src/agents/pi-embedded-runner/extensions.ts +++ b/src/agents/pi-embedded-runner/extensions.ts @@ -11,6 +11,7 @@ import { setContextPruningRuntime } from "../pi-extensions/context-pruning/runti import { computeEffectiveSettings } from "../pi-extensions/context-pruning/settings.js"; import { makeToolPrunablePredicate } from "../pi-extensions/context-pruning/tools.js"; import { ensurePiCompactionReserveTokens } from "../pi-settings.js"; +import { isCacheTtlEligibleProvider, readLastCacheTtlTimestamp } from "./cache-ttl.js"; function resolvePiExtensionPath(id: string): string { const self = fileURLToPath(import.meta.url); @@ -43,7 +44,8 @@ function buildContextPruningExtension(params: { model: Model | undefined; }): { additionalExtensionPaths?: string[] } { const raw = params.cfg?.agents?.defaults?.contextPruning; - if (raw?.mode !== "adaptive" && raw?.mode !== "aggressive") return {}; + if (raw?.mode !== "cache-ttl") return {}; + if (!isCacheTtlEligibleProvider(params.provider, params.modelId)) return {}; const settings = computeEffectiveSettings(raw); if (!settings) return {}; @@ -52,6 +54,7 @@ function buildContextPruningExtension(params: { settings, contextWindowTokens: resolveContextWindowTokens(params), isToolPrunable: makeToolPrunablePredicate(settings.tools), + lastCacheTouchAt: readLastCacheTtlTimestamp(params.sessionManager), }); return { diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 11f2ab83a..f6a4490a4 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -21,7 +21,7 @@ export function resolveExtraParams(params: { return modelConfig?.params ? { ...modelConfig.params } : undefined; } -type CacheControlTtl = "5m" | "1h"; +type CacheControlTtl = "5m"; function resolveCacheControlTtl( extraParams: Record | undefined, @@ -29,7 +29,7 @@ function resolveCacheControlTtl( modelId: string, ): CacheControlTtl | undefined { const raw = extraParams?.cacheControlTtl; - if (raw !== "5m" && raw !== "1h") return undefined; + if (raw !== "5m") return undefined; if (provider === "anthropic") return raw; if (provider === "openrouter" && modelId.startsWith("anthropic/")) return raw; return undefined; diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index af1d97828..93d5b5651 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -36,8 +36,10 @@ describe("injectHistoryImagesIntoMessages", () => { const didMutate = injectHistoryImagesIntoMessages(messages, new Map([[0, [image]]])); expect(didMutate).toBe(false); - const content = messages[0]?.content as unknown[] | undefined; - expect(content).toBeDefined(); + const content = messages[0]?.content; + if (!Array.isArray(content)) { + throw new Error("expected array content"); + } expect(content).toHaveLength(2); }); diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 6a1bd3978..e2e3a39dd 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -49,6 +49,7 @@ import { resolveDefaultModelForAgent } from "../../model-selection.js"; import { isAbortError } from "../abort.js"; import { buildEmbeddedExtensionPaths } from "../extensions.js"; import { applyExtraParamsToAgent } from "../extra-params.js"; +import { appendCacheTtlTimestamp, isCacheTtlEligibleProvider } from "../cache-ttl.js"; import { logToolSchemasForGoogle, sanitizeSessionHistory, @@ -685,6 +686,17 @@ export async function runEmbeddedAttempt( note: `images: prompt=${imageResult.images.length} history=${imageResult.historyImagesByIndex.size}`, }); + const shouldTrackCacheTtl = + params.config?.agents?.defaults?.contextPruning?.mode === "cache-ttl" && + isCacheTtlEligibleProvider(params.provider, params.modelId); + if (shouldTrackCacheTtl) { + appendCacheTtlTimestamp(sessionManager, { + timestamp: Date.now(), + provider: params.provider, + modelId: params.modelId, + }); + } + // Only pass images option if there are actually images to pass // This avoids potential issues with models that don't expect the images parameter if (imageResult.images.length > 0) { diff --git a/src/agents/pi-extensions/context-pruning.test.ts b/src/agents/pi-extensions/context-pruning.test.ts index 7dc8e6c59..b316ef87f 100644 --- a/src/agents/pi-extensions/context-pruning.test.ts +++ b/src/agents/pi-extensions/context-pruning.test.ts @@ -135,12 +135,15 @@ describe("context-pruning", () => { }); it("never prunes tool results before the first user message", () => { - const settings = computeEffectiveSettings({ - mode: "aggressive", + const settings = { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, keepLastAssistants: 0, - hardClear: { placeholder: "[cleared]" }, - }); - if (!settings) throw new Error("expected settings"); + softTrimRatio: 0.0, + hardClearRatio: 0.0, + minPrunableToolChars: 0, + hardClear: { enabled: true, placeholder: "[cleared]" }, + softTrim: { maxChars: 10, headChars: 3, tailChars: 3 }, + }; const messages: AgentMessage[] = [ makeAssistant("bootstrap tool calls"), @@ -170,7 +173,7 @@ describe("context-pruning", () => { expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]"); }); - it("mode aggressive clears eligible tool results before cutoff", () => { + it("hard-clear removes eligible tool results before cutoff", () => { const messages: AgentMessage[] = [ makeUser("u1"), makeAssistant("a1"), @@ -195,9 +198,11 @@ describe("context-pruning", () => { const settings = { ...DEFAULT_CONTEXT_PRUNING_SETTINGS, - mode: "aggressive", keepLastAssistants: 1, - hardClear: { enabled: false, placeholder: "[cleared]" }, + softTrimRatio: 10.0, + hardClearRatio: 0.0, + minPrunableToolChars: 0, + hardClear: { enabled: true, placeholder: "[cleared]" }, }; const ctx = { @@ -258,6 +263,7 @@ describe("context-pruning", () => { }, contextWindowTokens: 1000, isToolPrunable: () => true, + lastCacheTouchAt: Date.now() - DEFAULT_CONTEXT_PRUNING_SETTINGS.ttlMs - 1000, }); const messages: AgentMessage[] = [ diff --git a/src/agents/pi-extensions/context-pruning/extension.ts b/src/agents/pi-extensions/context-pruning/extension.ts index 94da729e4..7e48141c4 100644 --- a/src/agents/pi-extensions/context-pruning/extension.ts +++ b/src/agents/pi-extensions/context-pruning/extension.ts @@ -9,6 +9,17 @@ export default function contextPruningExtension(api: ExtensionAPI): void { const runtime = getContextPruningRuntime(ctx.sessionManager); if (!runtime) return undefined; + if (runtime.settings.mode === "cache-ttl") { + const ttlMs = runtime.settings.ttlMs; + const lastTouch = runtime.lastCacheTouchAt ?? null; + if (!lastTouch || ttlMs <= 0) { + return undefined; + } + if (ttlMs > 0 && Date.now() - lastTouch < ttlMs) { + return undefined; + } + } + const next = pruneContextMessages({ messages: event.messages as AgentMessage[], settings: runtime.settings, diff --git a/src/agents/pi-extensions/context-pruning/pruner.ts b/src/agents/pi-extensions/context-pruning/pruner.ts index 61d5154d4..c13e5c37a 100644 --- a/src/agents/pi-extensions/context-pruning/pruner.ts +++ b/src/agents/pi-extensions/context-pruning/pruner.ts @@ -211,34 +211,6 @@ export function pruneContextMessages(params: { const isToolPrunable = params.isToolPrunable ?? makeToolPrunablePredicate(settings.tools); - if (settings.mode === "aggressive") { - let next: AgentMessage[] | null = null; - - for (let i = pruneStartIndex; i < cutoffIndex; i++) { - const msg = messages[i]; - if (!msg || msg.role !== "toolResult") continue; - if (!isToolPrunable(msg.toolName)) continue; - if (hasImageBlocks(msg.content)) { - continue; - } - - const alreadyCleared = - msg.content.length === 1 && - msg.content[0]?.type === "text" && - msg.content[0].text === settings.hardClear.placeholder; - if (alreadyCleared) continue; - - const cleared: ToolResultMessage = { - ...msg, - content: [asText(settings.hardClear.placeholder)], - }; - if (!next) next = messages.slice(); - next[i] = cleared as unknown as AgentMessage; - } - - return next ?? messages; - } - const totalCharsBefore = estimateContextChars(messages); let totalChars = totalCharsBefore; let ratio = totalChars / charWindow; diff --git a/src/agents/pi-extensions/context-pruning/runtime.ts b/src/agents/pi-extensions/context-pruning/runtime.ts index b497e6383..fecb4ce3e 100644 --- a/src/agents/pi-extensions/context-pruning/runtime.ts +++ b/src/agents/pi-extensions/context-pruning/runtime.ts @@ -4,6 +4,7 @@ export type ContextPruningRuntimeValue = { settings: EffectiveContextPruningSettings; contextWindowTokens?: number | null; isToolPrunable: (toolName: string) => boolean; + lastCacheTouchAt?: number | null; }; // Session-scoped runtime registry keyed by object identity. diff --git a/src/agents/pi-extensions/context-pruning/settings.ts b/src/agents/pi-extensions/context-pruning/settings.ts index 69f9474d1..8d1497083 100644 --- a/src/agents/pi-extensions/context-pruning/settings.ts +++ b/src/agents/pi-extensions/context-pruning/settings.ts @@ -1,12 +1,15 @@ +import { parseDurationMs } from "../../../cli/parse-duration.js"; + export type ContextPruningToolMatch = { allow?: string[]; deny?: string[]; }; - -export type ContextPruningMode = "off" | "adaptive" | "aggressive"; +export type ContextPruningMode = "off" | "cache-ttl"; export type ContextPruningConfig = { mode?: ContextPruningMode; + /** TTL to consider cache expired (duration string, default unit: minutes). */ + ttl?: string; keepLastAssistants?: number; softTrimRatio?: number; hardClearRatio?: number; @@ -25,6 +28,7 @@ export type ContextPruningConfig = { export type EffectiveContextPruningSettings = { mode: Exclude; + ttlMs: number; keepLastAssistants: number; softTrimRatio: number; hardClearRatio: number; @@ -42,7 +46,8 @@ export type EffectiveContextPruningSettings = { }; export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings = { - mode: "adaptive", + mode: "cache-ttl", + ttlMs: 5 * 60 * 1000, keepLastAssistants: 3, softTrimRatio: 0.3, hardClearRatio: 0.5, @@ -62,11 +67,19 @@ export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings = export function computeEffectiveSettings(raw: unknown): EffectiveContextPruningSettings | null { if (!raw || typeof raw !== "object") return null; const cfg = raw as ContextPruningConfig; - if (cfg.mode !== "adaptive" && cfg.mode !== "aggressive") return null; + if (cfg.mode !== "cache-ttl") return null; const s: EffectiveContextPruningSettings = structuredClone(DEFAULT_CONTEXT_PRUNING_SETTINGS); s.mode = cfg.mode; + if (typeof cfg.ttl === "string") { + try { + s.ttlMs = parseDurationMs(cfg.ttl, { defaultUnit: "m" }); + } catch { + // keep default ttl + } + } + if (typeof cfg.keepLastAssistants === "number" && Number.isFinite(cfg.keepLastAssistants)) { s.keepLastAssistants = Math.max(0, Math.floor(cfg.keepLastAssistants)); } @@ -94,7 +107,7 @@ export function computeEffectiveSettings(raw: unknown): EffectiveContextPruningS } } if (cfg.hardClear) { - if (s.mode === "adaptive" && typeof cfg.hardClear.enabled === "boolean") { + if (typeof cfg.hardClear.enabled === "boolean") { s.hardClear.enabled = cfg.hardClear.enabled; } if (typeof cfg.hardClear.placeholder === "string" && cfg.hardClear.placeholder.trim()) { diff --git a/src/config/config.pruning-defaults.test.ts b/src/config/config.pruning-defaults.test.ts index 195c96cc5..8bc4d7e20 100644 --- a/src/config/config.pruning-defaults.test.ts +++ b/src/config/config.pruning-defaults.test.ts @@ -4,7 +4,7 @@ import { describe, expect, it, vi } from "vitest"; import { withTempHome } from "./test-helpers.js"; describe("config pruning defaults", () => { - it("defaults contextPruning mode to adaptive", async () => { + it("does not enable contextPruning by default", async () => { await withTempHome(async (home) => { const configDir = path.join(home, ".clawdbot"); await fs.mkdir(configDir, { recursive: true }); @@ -18,7 +18,7 @@ describe("config pruning defaults", () => { const { loadConfig } = await import("./config.js"); const cfg = loadConfig(); - expect(cfg.agents?.defaults?.contextPruning?.mode).toBe("adaptive"); + expect(cfg.agents?.defaults?.contextPruning?.mode).toBeUndefined(); }); }); diff --git a/src/config/defaults.ts b/src/config/defaults.ts index 1d0208fc2..9976a64ef 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -157,24 +157,7 @@ export function applyLoggingDefaults(cfg: ClawdbotConfig): ClawdbotConfig { } export function applyContextPruningDefaults(cfg: ClawdbotConfig): ClawdbotConfig { - const defaults = cfg.agents?.defaults; - if (!defaults) return cfg; - const contextPruning = defaults?.contextPruning; - if (contextPruning?.mode) return cfg; - - return { - ...cfg, - agents: { - ...cfg.agents, - defaults: { - ...defaults, - contextPruning: { - ...contextPruning, - mode: "adaptive", - }, - }, - }, - }; + return cfg; } export function applyCompactionDefaults(cfg: ClawdbotConfig): ClawdbotConfig { diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts index 85eff97f2..0fcdcf49b 100644 --- a/src/config/types.agent-defaults.ts +++ b/src/config/types.agent-defaults.ts @@ -23,7 +23,9 @@ export type AgentModelListConfig = { }; export type AgentContextPruningConfig = { - mode?: "off" | "adaptive" | "aggressive"; + mode?: "off" | "cache-ttl"; + /** TTL to consider cache expired (duration string, default unit: minutes). */ + ttl?: string; keepLastAssistants?: number; softTrimRatio?: number; hardClearRatio?: number; diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts index aafa9a6f4..c6c0ab3b2 100644 --- a/src/config/zod-schema.agent-defaults.ts +++ b/src/config/zod-schema.agent-defaults.ts @@ -54,9 +54,8 @@ export const AgentDefaultsSchema = z memorySearch: MemorySearchSchema, contextPruning: z .object({ - mode: z - .union([z.literal("off"), z.literal("adaptive"), z.literal("aggressive")]) - .optional(), + mode: z.union([z.literal("off"), z.literal("cache-ttl")]).optional(), + ttl: z.string().optional(), keepLastAssistants: z.number().int().nonnegative().optional(), softTrimRatio: z.number().min(0).max(1).optional(), hardClearRatio: z.number().min(0).max(1).optional(),