From 9f59ff325b66125e348b4d1a038b42ebd4e48216 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Wed, 21 Jan 2026 19:44:20 +0000
Subject: [PATCH] feat: add cache-ttl pruning mode

---
 CHANGELOG.md                                  |  1 +
 docs/concepts/session-pruning.md              | 34 +++++-------
 docs/gateway/configuration.md                 | 50 ++++++++----------
 docs/providers/anthropic.md                   |  6 +--
 src/agents/pi-embedded-runner/cache-ttl.ts    | 52 +++++++++++++++++++
 src/agents/pi-embedded-runner/extensions.ts   |  5 +-
 src/agents/pi-embedded-runner/extra-params.ts |  4 +-
 .../pi-embedded-runner/run/attempt.test.ts    |  6 ++-
 src/agents/pi-embedded-runner/run/attempt.ts  | 12 +++++
 .../pi-extensions/context-pruning.test.ts     | 22 +++++---
 .../context-pruning/extension.ts              | 11 ++++
 .../pi-extensions/context-pruning/pruner.ts   | 28 ----------
 .../pi-extensions/context-pruning/runtime.ts  |  1 +
 .../pi-extensions/context-pruning/settings.ts | 23 ++++++--
 src/config/config.pruning-defaults.test.ts    |  4 +-
 src/config/defaults.ts                        | 19 +------
 src/config/types.agent-defaults.ts            |  4 +-
 src/config/zod-schema.agent-defaults.ts       |  5 +-
 18 files changed, 164 insertions(+), 123 deletions(-)
 create mode 100644 src/agents/pi-embedded-runner/cache-ttl.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a55377fff..5c5b81a8d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ Docs: https://docs.clawd.bot
 ## 2026.1.21
 
 ### Changes
+- Caching: make tool-result pruning TTL-aware so cache reuse stays stable and token usage drops.
 - CLI: default exec approvals to the local host, add gateway/node targeting flags, and show target details in allowlist output.
 - CLI: exec approvals mutations render tables instead of raw JSON.
 - Exec approvals: support wildcard agent allowlists (`*`) across all agents.
diff --git a/docs/concepts/session-pruning.md b/docs/concepts/session-pruning.md
index 4e76f9bb4..5e91b9fb8 100644
--- a/docs/concepts/session-pruning.md
+++ b/docs/concepts/session-pruning.md
@@ -9,8 +9,10 @@ read_when:
 Session pruning trims **old tool results** from the in-memory context right before each LLM call. It does **not** rewrite the on-disk session history (`*.jsonl`).
 
 ## When it runs
-- Before each LLM request (context hook).
+- When `mode: "cache-ttl"` is enabled and the last Anthropic call for the session is older than `ttl`.
 - Only affects the messages sent to the model for that request.
+ - Only active for Anthropic API calls (and OpenRouter Anthropic models).
+ - For best results, match `ttl` to your model `cacheControlTtl`.
 
 ## What can be pruned
 - Only `toolResult` messages.
@@ -26,14 +28,10 @@ Pruning uses an estimated context window (chars ≈ tokens × 4). The window siz
 3) `agents.defaults.contextTokens`.
 4) Default `200000` tokens.
 
-## Modes
-### adaptive
-- If estimated context ratio ≥ `softTrimRatio`: soft-trim oversized tool results.
-- If still ≥ `hardClearRatio` **and** prunable tool text ≥ `minPrunableToolChars`: hard-clear oldest eligible tool results.
-
-### aggressive
-- Always hard-clears eligible tool results before the cutoff.
-- Ignores `hardClear.enabled` (always clears when eligible).
+## Mode
+### cache-ttl
+- Pruning only runs if the last Anthropic call is older than `ttl` (default `5m`).
+- When it runs: same soft-trim + hard-clear behavior as before.
 
 ## Soft vs hard pruning
 - **Soft-trim**: only for oversized tool results.
@@ -52,6 +50,7 @@ Pruning uses an estimated context window (chars ≈ tokens × 4). The window siz
 - Compaction is separate: compaction summarizes and persists, pruning is transient per request. See [/concepts/compaction](/concepts/compaction).
 
 ## Defaults (when enabled)
+- `ttl`: `"5m"`
 - `keepLastAssistants`: `3`
 - `softTrimRatio`: `0.3`
 - `hardClearRatio`: `0.5`
@@ -60,16 +59,7 @@ Pruning uses an estimated context window (chars ≈ tokens × 4). The window siz
 - `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }`
 
 ## Examples
-Default (adaptive):
-```json5
-{
-  agent: {
-    contextPruning: { mode: "adaptive" }
-  }
-}
-```
-
-To disable:
+Default (off):
 ```json5
 {
   agent: {
@@ -78,11 +68,11 @@ To disable:
 }
 ```
 
-Aggressive:
+Enable TTL-aware pruning:
 ```json5
 {
   agent: {
-    contextPruning: { mode: "aggressive" }
+    contextPruning: { mode: "cache-ttl", ttl: "5m" }
   }
 }
 ```
@@ -92,7 +82,7 @@ Restrict pruning to specific tools:
 {
   agent: {
     contextPruning: {
-      mode: "adaptive",
+      mode: "cache-ttl",
       tools: { allow: ["exec", "read"], deny: ["*image*"] }
     }
   }
diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md
index 9eddda3f8..ddce68e79 100644
--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@@ -1414,7 +1414,7 @@ Each `agents.defaults.models` entry can include:
 - `alias` (optional model shortcut, e.g. `/opus`).
 - `params` (optional provider-specific API params passed through to the model request).
 
-`params` is also applied to streaming runs (embedded agent + compaction). Supported keys today: `temperature`, `maxTokens`, `cacheControlTtl` (`"5m"` or `"1h"`, Anthropic API + OpenRouter Anthropic models only; ignored for Anthropic OAuth/Claude Code tokens). These merge with call-time options; caller-supplied values win. `temperature` is an advanced knob—leave unset unless you know the model’s defaults and need a change. Anthropic API defaults to `"1h"` unless you override (`cacheControlTtl: "5m"`). Clawdbot includes the `extended-cache-ttl-2025-04-11` beta flag for Anthropic API; keep it if you override provider headers.
+`params` is also applied to streaming runs (embedded agent + compaction). Supported keys today: `temperature`, `maxTokens`, `cacheControlTtl` (`"5m"`, Anthropic API + OpenRouter Anthropic models only; ignored for Anthropic OAuth/Claude Code tokens). These merge with call-time options; caller-supplied values win. `temperature` is an advanced knob—leave unset unless you know the model’s defaults and need a change. Clawdbot includes the `extended-cache-ttl-2025-04-11` beta flag for Anthropic API; keep it if you override provider headers.
 
 Example:
 
@@ -1569,7 +1569,7 @@ Example:
 }
 ```
 
-#### `agents.defaults.contextPruning` (tool-result pruning)
+#### `agents.defaults.contextPruning` (TTL-aware tool-result pruning)
 
 `agents.defaults.contextPruning` prunes **old tool results** from the in-memory context right before a request is sent to the LLM.
 It does **not** modify the session history on disk (`*.jsonl` remains complete).
@@ -1580,11 +1580,9 @@ High level:
 - Never touches user/assistant messages.
 - Protects the last `keepLastAssistants` assistant messages (no tool results after that point are pruned).
 - Protects the bootstrap prefix (nothing before the first user message is pruned).
-- Modes:
-  - `adaptive`: soft-trims oversized tool results (keep head/tail) when the estimated context ratio crosses `softTrimRatio`.
-    Then hard-clears the oldest eligible tool results when the estimated context ratio crosses `hardClearRatio` **and**
-    there’s enough prunable tool-result bulk (`minPrunableToolChars`).
-  - `aggressive`: always replaces eligible tool results before the cutoff with the `hardClear.placeholder` (no ratio checks).
+- Mode:
+  - `cache-ttl`: pruning only runs when the last Anthropic call for the session is **older** than `ttl`.
+    When it runs, it uses the same soft-trim + hard-clear behavior as before.
 
 Soft vs hard pruning (what changes in the context sent to the LLM):
 - **Soft-trim**: only for *oversized* tool results. Keeps the beginning + end and inserts `...` in the middle.
@@ -1598,44 +1596,40 @@ Notes / current limitations:
 - Tool results containing **image blocks are skipped** (never trimmed/cleared) right now.
 - The estimated “context ratio” is based on **characters** (approximate), not exact tokens.
 - If the session doesn’t contain at least `keepLastAssistants` assistant messages yet, pruning is skipped.
-- In `aggressive` mode, `hardClear.enabled` is ignored (eligible tool results are always replaced with `hardClear.placeholder`).
+- `cache-ttl` only activates for Anthropic API calls (and OpenRouter Anthropic models).
+- For best results, match `contextPruning.ttl` to the model `cacheControlTtl` you set in `agents.defaults.models.*.params`.
 
-Default (adaptive):
-```json5
-{
-  agents: { defaults: { contextPruning: { mode: "adaptive" } } }
-}
-```
-
-To disable:
+Default (off):
 ```json5
 {
   agents: { defaults: { contextPruning: { mode: "off" } } }
 }
 ```
 
-Defaults (when `mode` is `"adaptive"` or `"aggressive"`):
-- `keepLastAssistants`: `3`
-- `softTrimRatio`: `0.3` (adaptive only)
-- `hardClearRatio`: `0.5` (adaptive only)
-- `minPrunableToolChars`: `50000` (adaptive only)
-- `softTrim`: `{ maxChars: 4000, headChars: 1500, tailChars: 1500 }` (adaptive only)
-- `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }`
-
-Example (aggressive, minimal):
+Enable TTL-aware pruning:
 ```json5
 {
-  agents: { defaults: { contextPruning: { mode: "aggressive" } } }
+  agents: { defaults: { contextPruning: { mode: "cache-ttl" } } }
 }
 ```
 
-Example (adaptive tuned):
+Defaults (when `mode` is `"cache-ttl"`):
+- `ttl`: `"5m"`
+- `keepLastAssistants`: `3`
+- `softTrimRatio`: `0.3`
+- `hardClearRatio`: `0.5`
+- `minPrunableToolChars`: `50000`
+- `softTrim`: `{ maxChars: 4000, headChars: 1500, tailChars: 1500 }`
+- `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }`
+
+Example (cache-ttl tuned):
 ```json5
 {
   agents: {
     defaults: {
       contextPruning: {
-        mode: "adaptive",
+        mode: "cache-ttl",
+        ttl: "5m",
         keepLastAssistants: 3,
         softTrimRatio: 0.3,
         hardClearRatio: 0.5,
diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md
index 9c72b990a..80cbe5d8b 100644
--- a/docs/providers/anthropic.md
+++ b/docs/providers/anthropic.md
@@ -36,10 +36,10 @@ clawdbot onboard --anthropic-api-key "$ANTHROPIC_API_KEY"
 
 ## Prompt caching (Anthropic API)
 
-Clawdbot enables **1-hour prompt caching by default** for Anthropic API keys.
+Clawdbot does **not** override Anthropic’s default cache TTL unless you set it.
 This is **API-only**; Claude Code CLI OAuth ignores TTL settings.
 
-To override the TTL per model, set `cacheControlTtl` in the model `params`:
+To set the TTL per model, use `cacheControlTtl` in the model `params`:
 
 ```json5
 {
@@ -47,7 +47,7 @@ To override the TTL per model, set `cacheControlTtl` in the model `params`:
     defaults: {
       models: {
         "anthropic/claude-opus-4-5": {
-          params: { cacheControlTtl: "5m" } // or "1h"
+          params: { cacheControlTtl: "5m" }
         }
       }
     }
diff --git a/src/agents/pi-embedded-runner/cache-ttl.ts b/src/agents/pi-embedded-runner/cache-ttl.ts
new file mode 100644
index 000000000..a280653f1
--- /dev/null
+++ b/src/agents/pi-embedded-runner/cache-ttl.ts
@@ -0,0 +1,52 @@
+type CustomEntryLike = { type?: unknown; customType?: unknown; data?: unknown };
+
+export const CACHE_TTL_CUSTOM_TYPE = "clawdbot.cache-ttl";
+
+export type CacheTtlEntryData = {
+  timestamp: number;
+  provider?: string;
+  modelId?: string;
+};
+
+export function isCacheTtlEligibleProvider(provider: string, modelId: string): boolean {
+  const normalizedProvider = provider.toLowerCase();
+  const normalizedModelId = modelId.toLowerCase();
+  if (normalizedProvider === "anthropic") return true;
+  if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/"))
+    return true;
+  return false;
+}
+
+export function readLastCacheTtlTimestamp(sessionManager: unknown): number | null {
+  const sm = sessionManager as { getEntries?: () => CustomEntryLike[] };
+  if (!sm?.getEntries) return null;
+  try {
+    const entries = sm.getEntries();
+    let last: number | null = null;
+    for (let i = entries.length - 1; i >= 0; i--) {
+      const entry = entries[i];
+      if (entry?.type !== "custom" || entry?.customType !== CACHE_TTL_CUSTOM_TYPE) continue;
+      const data = entry?.data as Partial<CacheTtlEntryData> | undefined;
+      const ts = typeof data?.timestamp === "number" ? data.timestamp : null;
+      if (ts && Number.isFinite(ts)) {
+        last = ts;
+        break;
+      }
+    }
+    return last;
+  } catch {
+    return null;
+  }
+}
+
+export function appendCacheTtlTimestamp(sessionManager: unknown, data: CacheTtlEntryData): void {
+  const sm = sessionManager as {
+    appendCustomEntry?: (customType: string, data: unknown) => void;
+  };
+  if (!sm?.appendCustomEntry) return;
+  try {
+    sm.appendCustomEntry(CACHE_TTL_CUSTOM_TYPE, data);
+  } catch {
+    // ignore persistence failures
+  }
+}
diff --git a/src/agents/pi-embedded-runner/extensions.ts b/src/agents/pi-embedded-runner/extensions.ts
index a7fd27ebb..48d9d22e6 100644
--- a/src/agents/pi-embedded-runner/extensions.ts
+++ b/src/agents/pi-embedded-runner/extensions.ts
@@ -11,6 +11,7 @@ import { setContextPruningRuntime } from "../pi-extensions/context-pruning/runti
 import { computeEffectiveSettings } from "../pi-extensions/context-pruning/settings.js";
 import { makeToolPrunablePredicate } from "../pi-extensions/context-pruning/tools.js";
 import { ensurePiCompactionReserveTokens } from "../pi-settings.js";
+import { isCacheTtlEligibleProvider, readLastCacheTtlTimestamp } from "./cache-ttl.js";
 
 function resolvePiExtensionPath(id: string): string {
   const self = fileURLToPath(import.meta.url);
@@ -43,7 +44,8 @@ function buildContextPruningExtension(params: {
   model: Model<Api> | undefined;
 }): { additionalExtensionPaths?: string[] } {
   const raw = params.cfg?.agents?.defaults?.contextPruning;
-  if (raw?.mode !== "adaptive" && raw?.mode !== "aggressive") return {};
+  if (raw?.mode !== "cache-ttl") return {};
+  if (!isCacheTtlEligibleProvider(params.provider, params.modelId)) return {};
 
   const settings = computeEffectiveSettings(raw);
   if (!settings) return {};
@@ -52,6 +54,7 @@ function buildContextPruningExtension(params: {
     settings,
     contextWindowTokens: resolveContextWindowTokens(params),
     isToolPrunable: makeToolPrunablePredicate(settings.tools),
+    lastCacheTouchAt: readLastCacheTtlTimestamp(params.sessionManager),
   });
 
   return {
diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts
index 11f2ab83a..f6a4490a4 100644
--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@@ -21,7 +21,7 @@ export function resolveExtraParams(params: {
   return modelConfig?.params ? { ...modelConfig.params } : undefined;
 }
 
-type CacheControlTtl = "5m" | "1h";
+type CacheControlTtl = "5m";
 
 function resolveCacheControlTtl(
   extraParams: Record<string, unknown> | undefined,
@@ -29,7 +29,7 @@ function resolveCacheControlTtl(
   modelId: string,
 ): CacheControlTtl | undefined {
   const raw = extraParams?.cacheControlTtl;
-  if (raw !== "5m" && raw !== "1h") return undefined;
+  if (raw !== "5m") return undefined;
   if (provider === "anthropic") return raw;
   if (provider === "openrouter" && modelId.startsWith("anthropic/")) return raw;
   return undefined;
diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts
index af1d97828..93d5b5651 100644
--- a/src/agents/pi-embedded-runner/run/attempt.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.test.ts
@@ -36,8 +36,10 @@ describe("injectHistoryImagesIntoMessages", () => {
     const didMutate = injectHistoryImagesIntoMessages(messages, new Map([[0, [image]]]));
 
     expect(didMutate).toBe(false);
-    const content = messages[0]?.content as unknown[] | undefined;
-    expect(content).toBeDefined();
+    const content = messages[0]?.content;
+    if (!Array.isArray(content)) {
+      throw new Error("expected array content");
+    }
     expect(content).toHaveLength(2);
   });
 
diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts
index 6a1bd3978..e2e3a39dd 100644
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -49,6 +49,7 @@ import { resolveDefaultModelForAgent } from "../../model-selection.js";
 import { isAbortError } from "../abort.js";
 import { buildEmbeddedExtensionPaths } from "../extensions.js";
 import { applyExtraParamsToAgent } from "../extra-params.js";
+import { appendCacheTtlTimestamp, isCacheTtlEligibleProvider } from "../cache-ttl.js";
 import {
   logToolSchemasForGoogle,
   sanitizeSessionHistory,
@@ -685,6 +686,17 @@ export async function runEmbeddedAttempt(
             note: `images: prompt=${imageResult.images.length} history=${imageResult.historyImagesByIndex.size}`,
           });
 
+          const shouldTrackCacheTtl =
+            params.config?.agents?.defaults?.contextPruning?.mode === "cache-ttl" &&
+            isCacheTtlEligibleProvider(params.provider, params.modelId);
+          if (shouldTrackCacheTtl) {
+            appendCacheTtlTimestamp(sessionManager, {
+              timestamp: Date.now(),
+              provider: params.provider,
+              modelId: params.modelId,
+            });
+          }
+
           // Only pass images option if there are actually images to pass
           // This avoids potential issues with models that don't expect the images parameter
           if (imageResult.images.length > 0) {
diff --git a/src/agents/pi-extensions/context-pruning.test.ts b/src/agents/pi-extensions/context-pruning.test.ts
index 7dc8e6c59..b316ef87f 100644
--- a/src/agents/pi-extensions/context-pruning.test.ts
+++ b/src/agents/pi-extensions/context-pruning.test.ts
@@ -135,12 +135,15 @@ describe("context-pruning", () => {
   });
 
   it("never prunes tool results before the first user message", () => {
-    const settings = computeEffectiveSettings({
-      mode: "aggressive",
+    const settings = {
+      ...DEFAULT_CONTEXT_PRUNING_SETTINGS,
       keepLastAssistants: 0,
-      hardClear: { placeholder: "[cleared]" },
-    });
-    if (!settings) throw new Error("expected settings");
+      softTrimRatio: 0.0,
+      hardClearRatio: 0.0,
+      minPrunableToolChars: 0,
+      hardClear: { enabled: true, placeholder: "[cleared]" },
+      softTrim: { maxChars: 10, headChars: 3, tailChars: 3 },
+    };
 
     const messages: AgentMessage[] = [
       makeAssistant("bootstrap tool calls"),
@@ -170,7 +173,7 @@ describe("context-pruning", () => {
     expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]");
   });
 
-  it("mode aggressive clears eligible tool results before cutoff", () => {
+  it("hard-clear removes eligible tool results before cutoff", () => {
     const messages: AgentMessage[] = [
       makeUser("u1"),
       makeAssistant("a1"),
@@ -195,9 +198,11 @@ describe("context-pruning", () => {
 
     const settings = {
       ...DEFAULT_CONTEXT_PRUNING_SETTINGS,
-      mode: "aggressive",
       keepLastAssistants: 1,
-      hardClear: { enabled: false, placeholder: "[cleared]" },
+      softTrimRatio: 10.0,
+      hardClearRatio: 0.0,
+      minPrunableToolChars: 0,
+      hardClear: { enabled: true, placeholder: "[cleared]" },
     };
 
     const ctx = {
@@ -258,6 +263,7 @@ describe("context-pruning", () => {
       },
       contextWindowTokens: 1000,
       isToolPrunable: () => true,
+      lastCacheTouchAt: Date.now() - DEFAULT_CONTEXT_PRUNING_SETTINGS.ttlMs - 1000,
     });
 
     const messages: AgentMessage[] = [
diff --git a/src/agents/pi-extensions/context-pruning/extension.ts b/src/agents/pi-extensions/context-pruning/extension.ts
index 94da729e4..7e48141c4 100644
--- a/src/agents/pi-extensions/context-pruning/extension.ts
+++ b/src/agents/pi-extensions/context-pruning/extension.ts
@@ -9,6 +9,17 @@ export default function contextPruningExtension(api: ExtensionAPI): void {
     const runtime = getContextPruningRuntime(ctx.sessionManager);
     if (!runtime) return undefined;
 
+    if (runtime.settings.mode === "cache-ttl") {
+      const ttlMs = runtime.settings.ttlMs;
+      const lastTouch = runtime.lastCacheTouchAt ?? null;
+      if (!lastTouch || ttlMs <= 0) {
+        return undefined;
+      }
+      if (ttlMs > 0 && Date.now() - lastTouch < ttlMs) {
+        return undefined;
+      }
+    }
+
     const next = pruneContextMessages({
       messages: event.messages as AgentMessage[],
       settings: runtime.settings,
diff --git a/src/agents/pi-extensions/context-pruning/pruner.ts b/src/agents/pi-extensions/context-pruning/pruner.ts
index 61d5154d4..c13e5c37a 100644
--- a/src/agents/pi-extensions/context-pruning/pruner.ts
+++ b/src/agents/pi-extensions/context-pruning/pruner.ts
@@ -211,34 +211,6 @@ export function pruneContextMessages(params: {
 
   const isToolPrunable = params.isToolPrunable ?? makeToolPrunablePredicate(settings.tools);
 
-  if (settings.mode === "aggressive") {
-    let next: AgentMessage[] | null = null;
-
-    for (let i = pruneStartIndex; i < cutoffIndex; i++) {
-      const msg = messages[i];
-      if (!msg || msg.role !== "toolResult") continue;
-      if (!isToolPrunable(msg.toolName)) continue;
-      if (hasImageBlocks(msg.content)) {
-        continue;
-      }
-
-      const alreadyCleared =
-        msg.content.length === 1 &&
-        msg.content[0]?.type === "text" &&
-        msg.content[0].text === settings.hardClear.placeholder;
-      if (alreadyCleared) continue;
-
-      const cleared: ToolResultMessage = {
-        ...msg,
-        content: [asText(settings.hardClear.placeholder)],
-      };
-      if (!next) next = messages.slice();
-      next[i] = cleared as unknown as AgentMessage;
-    }
-
-    return next ?? messages;
-  }
-
   const totalCharsBefore = estimateContextChars(messages);
   let totalChars = totalCharsBefore;
   let ratio = totalChars / charWindow;
diff --git a/src/agents/pi-extensions/context-pruning/runtime.ts b/src/agents/pi-extensions/context-pruning/runtime.ts
index b497e6383..fecb4ce3e 100644
--- a/src/agents/pi-extensions/context-pruning/runtime.ts
+++ b/src/agents/pi-extensions/context-pruning/runtime.ts
@@ -4,6 +4,7 @@ export type ContextPruningRuntimeValue = {
   settings: EffectiveContextPruningSettings;
   contextWindowTokens?: number | null;
   isToolPrunable: (toolName: string) => boolean;
+  lastCacheTouchAt?: number | null;
 };
 
 // Session-scoped runtime registry keyed by object identity.
diff --git a/src/agents/pi-extensions/context-pruning/settings.ts b/src/agents/pi-extensions/context-pruning/settings.ts
index 69f9474d1..8d1497083 100644
--- a/src/agents/pi-extensions/context-pruning/settings.ts
+++ b/src/agents/pi-extensions/context-pruning/settings.ts
@@ -1,12 +1,15 @@
+import { parseDurationMs } from "../../../cli/parse-duration.js";
+
 export type ContextPruningToolMatch = {
   allow?: string[];
   deny?: string[];
 };
-
-export type ContextPruningMode = "off" | "adaptive" | "aggressive";
+export type ContextPruningMode = "off" | "cache-ttl";
 
 export type ContextPruningConfig = {
   mode?: ContextPruningMode;
+  /** TTL to consider cache expired (duration string, default unit: minutes). */
+  ttl?: string;
   keepLastAssistants?: number;
   softTrimRatio?: number;
   hardClearRatio?: number;
@@ -25,6 +28,7 @@ export type ContextPruningConfig = {
 
 export type EffectiveContextPruningSettings = {
   mode: Exclude<ContextPruningMode, "off">;
+  ttlMs: number;
   keepLastAssistants: number;
   softTrimRatio: number;
   hardClearRatio: number;
@@ -42,7 +46,8 @@ export type EffectiveContextPruningSettings = {
 };
 
 export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings = {
-  mode: "adaptive",
+  mode: "cache-ttl",
+  ttlMs: 5 * 60 * 1000,
   keepLastAssistants: 3,
   softTrimRatio: 0.3,
   hardClearRatio: 0.5,
@@ -62,11 +67,19 @@ export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings =
 export function computeEffectiveSettings(raw: unknown): EffectiveContextPruningSettings | null {
   if (!raw || typeof raw !== "object") return null;
   const cfg = raw as ContextPruningConfig;
-  if (cfg.mode !== "adaptive" && cfg.mode !== "aggressive") return null;
+  if (cfg.mode !== "cache-ttl") return null;
 
   const s: EffectiveContextPruningSettings = structuredClone(DEFAULT_CONTEXT_PRUNING_SETTINGS);
   s.mode = cfg.mode;
 
+  if (typeof cfg.ttl === "string") {
+    try {
+      s.ttlMs = parseDurationMs(cfg.ttl, { defaultUnit: "m" });
+    } catch {
+      // keep default ttl
+    }
+  }
+
   if (typeof cfg.keepLastAssistants === "number" && Number.isFinite(cfg.keepLastAssistants)) {
     s.keepLastAssistants = Math.max(0, Math.floor(cfg.keepLastAssistants));
   }
@@ -94,7 +107,7 @@ export function computeEffectiveSettings(raw: unknown): EffectiveContextPruningS
     }
   }
   if (cfg.hardClear) {
-    if (s.mode === "adaptive" && typeof cfg.hardClear.enabled === "boolean") {
+    if (typeof cfg.hardClear.enabled === "boolean") {
       s.hardClear.enabled = cfg.hardClear.enabled;
     }
     if (typeof cfg.hardClear.placeholder === "string" && cfg.hardClear.placeholder.trim()) {
diff --git a/src/config/config.pruning-defaults.test.ts b/src/config/config.pruning-defaults.test.ts
index 195c96cc5..8bc4d7e20 100644
--- a/src/config/config.pruning-defaults.test.ts
+++ b/src/config/config.pruning-defaults.test.ts
@@ -4,7 +4,7 @@ import { describe, expect, it, vi } from "vitest";
 import { withTempHome } from "./test-helpers.js";
 
 describe("config pruning defaults", () => {
-  it("defaults contextPruning mode to adaptive", async () => {
+  it("does not enable contextPruning by default", async () => {
     await withTempHome(async (home) => {
       const configDir = path.join(home, ".clawdbot");
       await fs.mkdir(configDir, { recursive: true });
@@ -18,7 +18,7 @@ describe("config pruning defaults", () => {
       const { loadConfig } = await import("./config.js");
       const cfg = loadConfig();
 
-      expect(cfg.agents?.defaults?.contextPruning?.mode).toBe("adaptive");
+      expect(cfg.agents?.defaults?.contextPruning?.mode).toBeUndefined();
     });
   });
 
diff --git a/src/config/defaults.ts b/src/config/defaults.ts
index 1d0208fc2..9976a64ef 100644
--- a/src/config/defaults.ts
+++ b/src/config/defaults.ts
@@ -157,24 +157,7 @@ export function applyLoggingDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
 }
 
 export function applyContextPruningDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
-  const defaults = cfg.agents?.defaults;
-  if (!defaults) return cfg;
-  const contextPruning = defaults?.contextPruning;
-  if (contextPruning?.mode) return cfg;
-
-  return {
-    ...cfg,
-    agents: {
-      ...cfg.agents,
-      defaults: {
-        ...defaults,
-        contextPruning: {
-          ...contextPruning,
-          mode: "adaptive",
-        },
-      },
-    },
-  };
+  return cfg;
 }
 
 export function applyCompactionDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts
index 85eff97f2..0fcdcf49b 100644
--- a/src/config/types.agent-defaults.ts
+++ b/src/config/types.agent-defaults.ts
@@ -23,7 +23,9 @@ export type AgentModelListConfig = {
 };
 
 export type AgentContextPruningConfig = {
-  mode?: "off" | "adaptive" | "aggressive";
+  mode?: "off" | "cache-ttl";
+  /** TTL to consider cache expired (duration string, default unit: minutes). */
+  ttl?: string;
   keepLastAssistants?: number;
   softTrimRatio?: number;
   hardClearRatio?: number;
diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts
index aafa9a6f4..c6c0ab3b2 100644
--- a/src/config/zod-schema.agent-defaults.ts
+++ b/src/config/zod-schema.agent-defaults.ts
@@ -54,9 +54,8 @@ export const AgentDefaultsSchema = z
     memorySearch: MemorySearchSchema,
     contextPruning: z
       .object({
-        mode: z
-          .union([z.literal("off"), z.literal("adaptive"), z.literal("aggressive")])
-          .optional(),
+        mode: z.union([z.literal("off"), z.literal("cache-ttl")]).optional(),
+        ttl: z.string().optional(),
         keepLastAssistants: z.number().int().nonnegative().optional(),
         softTrimRatio: z.number().min(0).max(1).optional(),
         hardClearRatio: z.number().min(0).max(1).optional(),