feat: add cache-ttl pruning mode
This commit is contained in:
@@ -5,6 +5,7 @@ Docs: https://docs.clawd.bot
|
||||
## 2026.1.21
|
||||
|
||||
### Changes
|
||||
- Caching: make tool-result pruning TTL-aware so cache reuse stays stable and token usage drops.
|
||||
- CLI: default exec approvals to the local host, add gateway/node targeting flags, and show target details in allowlist output.
|
||||
- CLI: exec approvals mutations render tables instead of raw JSON.
|
||||
- Exec approvals: support wildcard agent allowlists (`*`) across all agents.
|
||||
|
||||
@@ -9,8 +9,10 @@ read_when:
|
||||
Session pruning trims **old tool results** from the in-memory context right before each LLM call. It does **not** rewrite the on-disk session history (`*.jsonl`).
|
||||
|
||||
## When it runs
|
||||
- Before each LLM request (context hook).
|
||||
- When `mode: "cache-ttl"` is enabled and the last Anthropic call for the session is older than `ttl`.
|
||||
- Only affects the messages sent to the model for that request.
|
||||
- Only active for Anthropic API calls (and OpenRouter Anthropic models).
|
||||
- For best results, match `ttl` to your model `cacheControlTtl`.
|
||||
|
||||
## What can be pruned
|
||||
- Only `toolResult` messages.
|
||||
@@ -26,14 +28,10 @@ Pruning uses an estimated context window (chars ≈ tokens × 4). The window siz
|
||||
3) `agents.defaults.contextTokens`.
|
||||
4) Default `200000` tokens.
|
||||
|
||||
## Modes
|
||||
### adaptive
|
||||
- If estimated context ratio ≥ `softTrimRatio`: soft-trim oversized tool results.
|
||||
- If still ≥ `hardClearRatio` **and** prunable tool text ≥ `minPrunableToolChars`: hard-clear oldest eligible tool results.
|
||||
|
||||
### aggressive
|
||||
- Always hard-clears eligible tool results before the cutoff.
|
||||
- Ignores `hardClear.enabled` (always clears when eligible).
|
||||
## Mode
|
||||
### cache-ttl
|
||||
- Pruning only runs if the last Anthropic call is older than `ttl` (default `5m`).
|
||||
- When it runs: same soft-trim + hard-clear behavior as before.
|
||||
|
||||
## Soft vs hard pruning
|
||||
- **Soft-trim**: only for oversized tool results.
|
||||
@@ -52,6 +50,7 @@ Pruning uses an estimated context window (chars ≈ tokens × 4). The window siz
|
||||
- Compaction is separate: compaction summarizes and persists, pruning is transient per request. See [/concepts/compaction](/concepts/compaction).
|
||||
|
||||
## Defaults (when enabled)
|
||||
- `ttl`: `"5m"`
|
||||
- `keepLastAssistants`: `3`
|
||||
- `softTrimRatio`: `0.3`
|
||||
- `hardClearRatio`: `0.5`
|
||||
@@ -60,16 +59,7 @@ Pruning uses an estimated context window (chars ≈ tokens × 4). The window siz
|
||||
- `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }`
|
||||
|
||||
## Examples
|
||||
Default (adaptive):
|
||||
```json5
|
||||
{
|
||||
agent: {
|
||||
contextPruning: { mode: "adaptive" }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
To disable:
|
||||
Default (off):
|
||||
```json5
|
||||
{
|
||||
agent: {
|
||||
@@ -78,11 +68,11 @@ To disable:
|
||||
}
|
||||
```
|
||||
|
||||
Aggressive:
|
||||
Enable TTL-aware pruning:
|
||||
```json5
|
||||
{
|
||||
agent: {
|
||||
contextPruning: { mode: "aggressive" }
|
||||
contextPruning: { mode: "cache-ttl", ttl: "5m" }
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -92,7 +82,7 @@ Restrict pruning to specific tools:
|
||||
{
|
||||
agent: {
|
||||
contextPruning: {
|
||||
mode: "adaptive",
|
||||
mode: "cache-ttl",
|
||||
tools: { allow: ["exec", "read"], deny: ["*image*"] }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1414,7 +1414,7 @@ Each `agents.defaults.models` entry can include:
|
||||
- `alias` (optional model shortcut, e.g. `/opus`).
|
||||
- `params` (optional provider-specific API params passed through to the model request).
|
||||
|
||||
`params` is also applied to streaming runs (embedded agent + compaction). Supported keys today: `temperature`, `maxTokens`, `cacheControlTtl` (`"5m"` or `"1h"`, Anthropic API + OpenRouter Anthropic models only; ignored for Anthropic OAuth/Claude Code tokens). These merge with call-time options; caller-supplied values win. `temperature` is an advanced knob—leave unset unless you know the model’s defaults and need a change. Anthropic API defaults to `"1h"` unless you override (`cacheControlTtl: "5m"`). Clawdbot includes the `extended-cache-ttl-2025-04-11` beta flag for Anthropic API; keep it if you override provider headers.
|
||||
`params` is also applied to streaming runs (embedded agent + compaction). Supported keys today: `temperature`, `maxTokens`, `cacheControlTtl` (`"5m"`, Anthropic API + OpenRouter Anthropic models only; ignored for Anthropic OAuth/Claude Code tokens). These merge with call-time options; caller-supplied values win. `temperature` is an advanced knob—leave unset unless you know the model’s defaults and need a change. Clawdbot includes the `extended-cache-ttl-2025-04-11` beta flag for Anthropic API; keep it if you override provider headers.
|
||||
|
||||
Example:
|
||||
|
||||
@@ -1569,7 +1569,7 @@ Example:
|
||||
}
|
||||
```
|
||||
|
||||
#### `agents.defaults.contextPruning` (tool-result pruning)
|
||||
#### `agents.defaults.contextPruning` (TTL-aware tool-result pruning)
|
||||
|
||||
`agents.defaults.contextPruning` prunes **old tool results** from the in-memory context right before a request is sent to the LLM.
|
||||
It does **not** modify the session history on disk (`*.jsonl` remains complete).
|
||||
@@ -1580,11 +1580,9 @@ High level:
|
||||
- Never touches user/assistant messages.
|
||||
- Protects the last `keepLastAssistants` assistant messages (no tool results after that point are pruned).
|
||||
- Protects the bootstrap prefix (nothing before the first user message is pruned).
|
||||
- Modes:
|
||||
- `adaptive`: soft-trims oversized tool results (keep head/tail) when the estimated context ratio crosses `softTrimRatio`.
|
||||
Then hard-clears the oldest eligible tool results when the estimated context ratio crosses `hardClearRatio` **and**
|
||||
there’s enough prunable tool-result bulk (`minPrunableToolChars`).
|
||||
- `aggressive`: always replaces eligible tool results before the cutoff with the `hardClear.placeholder` (no ratio checks).
|
||||
- Mode:
|
||||
- `cache-ttl`: pruning only runs when the last Anthropic call for the session is **older** than `ttl`.
|
||||
When it runs, it uses the same soft-trim + hard-clear behavior as before.
|
||||
|
||||
Soft vs hard pruning (what changes in the context sent to the LLM):
|
||||
- **Soft-trim**: only for *oversized* tool results. Keeps the beginning + end and inserts `...` in the middle.
|
||||
@@ -1598,44 +1596,40 @@ Notes / current limitations:
|
||||
- Tool results containing **image blocks are skipped** (never trimmed/cleared) right now.
|
||||
- The estimated “context ratio” is based on **characters** (approximate), not exact tokens.
|
||||
- If the session doesn’t contain at least `keepLastAssistants` assistant messages yet, pruning is skipped.
|
||||
- In `aggressive` mode, `hardClear.enabled` is ignored (eligible tool results are always replaced with `hardClear.placeholder`).
|
||||
- `cache-ttl` only activates for Anthropic API calls (and OpenRouter Anthropic models).
|
||||
- For best results, match `contextPruning.ttl` to the model `cacheControlTtl` you set in `agents.defaults.models.*.params`.
|
||||
|
||||
Default (adaptive):
|
||||
```json5
|
||||
{
|
||||
agents: { defaults: { contextPruning: { mode: "adaptive" } } }
|
||||
}
|
||||
```
|
||||
|
||||
To disable:
|
||||
Default (off):
|
||||
```json5
|
||||
{
|
||||
agents: { defaults: { contextPruning: { mode: "off" } } }
|
||||
}
|
||||
```
|
||||
|
||||
Defaults (when `mode` is `"adaptive"` or `"aggressive"`):
|
||||
- `keepLastAssistants`: `3`
|
||||
- `softTrimRatio`: `0.3` (adaptive only)
|
||||
- `hardClearRatio`: `0.5` (adaptive only)
|
||||
- `minPrunableToolChars`: `50000` (adaptive only)
|
||||
- `softTrim`: `{ maxChars: 4000, headChars: 1500, tailChars: 1500 }` (adaptive only)
|
||||
- `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }`
|
||||
|
||||
Example (aggressive, minimal):
|
||||
Enable TTL-aware pruning:
|
||||
```json5
|
||||
{
|
||||
agents: { defaults: { contextPruning: { mode: "aggressive" } } }
|
||||
agents: { defaults: { contextPruning: { mode: "cache-ttl" } } }
|
||||
}
|
||||
```
|
||||
|
||||
Example (adaptive tuned):
|
||||
Defaults (when `mode` is `"cache-ttl"`):
|
||||
- `ttl`: `"5m"`
|
||||
- `keepLastAssistants`: `3`
|
||||
- `softTrimRatio`: `0.3`
|
||||
- `hardClearRatio`: `0.5`
|
||||
- `minPrunableToolChars`: `50000`
|
||||
- `softTrim`: `{ maxChars: 4000, headChars: 1500, tailChars: 1500 }`
|
||||
- `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }`
|
||||
|
||||
Example (cache-ttl tuned):
|
||||
```json5
|
||||
{
|
||||
agents: {
|
||||
defaults: {
|
||||
contextPruning: {
|
||||
mode: "adaptive",
|
||||
mode: "cache-ttl",
|
||||
ttl: "5m",
|
||||
keepLastAssistants: 3,
|
||||
softTrimRatio: 0.3,
|
||||
hardClearRatio: 0.5,
|
||||
|
||||
@@ -36,10 +36,10 @@ clawdbot onboard --anthropic-api-key "$ANTHROPIC_API_KEY"
|
||||
|
||||
## Prompt caching (Anthropic API)
|
||||
|
||||
Clawdbot enables **1-hour prompt caching by default** for Anthropic API keys.
|
||||
Clawdbot does **not** override Anthropic’s default cache TTL unless you set it.
|
||||
This is **API-only**; Claude Code CLI OAuth ignores TTL settings.
|
||||
|
||||
To override the TTL per model, set `cacheControlTtl` in the model `params`:
|
||||
To set the TTL per model, use `cacheControlTtl` in the model `params`:
|
||||
|
||||
```json5
|
||||
{
|
||||
@@ -47,7 +47,7 @@ To override the TTL per model, set `cacheControlTtl` in the model `params`:
|
||||
defaults: {
|
||||
models: {
|
||||
"anthropic/claude-opus-4-5": {
|
||||
params: { cacheControlTtl: "5m" } // or "1h"
|
||||
params: { cacheControlTtl: "5m" }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
52
src/agents/pi-embedded-runner/cache-ttl.ts
Normal file
52
src/agents/pi-embedded-runner/cache-ttl.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
type CustomEntryLike = { type?: unknown; customType?: unknown; data?: unknown };
|
||||
|
||||
export const CACHE_TTL_CUSTOM_TYPE = "clawdbot.cache-ttl";
|
||||
|
||||
export type CacheTtlEntryData = {
|
||||
timestamp: number;
|
||||
provider?: string;
|
||||
modelId?: string;
|
||||
};
|
||||
|
||||
export function isCacheTtlEligibleProvider(provider: string, modelId: string): boolean {
|
||||
const normalizedProvider = provider.toLowerCase();
|
||||
const normalizedModelId = modelId.toLowerCase();
|
||||
if (normalizedProvider === "anthropic") return true;
|
||||
if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/"))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
export function readLastCacheTtlTimestamp(sessionManager: unknown): number | null {
|
||||
const sm = sessionManager as { getEntries?: () => CustomEntryLike[] };
|
||||
if (!sm?.getEntries) return null;
|
||||
try {
|
||||
const entries = sm.getEntries();
|
||||
let last: number | null = null;
|
||||
for (let i = entries.length - 1; i >= 0; i--) {
|
||||
const entry = entries[i];
|
||||
if (entry?.type !== "custom" || entry?.customType !== CACHE_TTL_CUSTOM_TYPE) continue;
|
||||
const data = entry?.data as Partial<CacheTtlEntryData> | undefined;
|
||||
const ts = typeof data?.timestamp === "number" ? data.timestamp : null;
|
||||
if (ts && Number.isFinite(ts)) {
|
||||
last = ts;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return last;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function appendCacheTtlTimestamp(sessionManager: unknown, data: CacheTtlEntryData): void {
|
||||
const sm = sessionManager as {
|
||||
appendCustomEntry?: (customType: string, data: unknown) => void;
|
||||
};
|
||||
if (!sm?.appendCustomEntry) return;
|
||||
try {
|
||||
sm.appendCustomEntry(CACHE_TTL_CUSTOM_TYPE, data);
|
||||
} catch {
|
||||
// ignore persistence failures
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import { setContextPruningRuntime } from "../pi-extensions/context-pruning/runti
|
||||
import { computeEffectiveSettings } from "../pi-extensions/context-pruning/settings.js";
|
||||
import { makeToolPrunablePredicate } from "../pi-extensions/context-pruning/tools.js";
|
||||
import { ensurePiCompactionReserveTokens } from "../pi-settings.js";
|
||||
import { isCacheTtlEligibleProvider, readLastCacheTtlTimestamp } from "./cache-ttl.js";
|
||||
|
||||
function resolvePiExtensionPath(id: string): string {
|
||||
const self = fileURLToPath(import.meta.url);
|
||||
@@ -43,7 +44,8 @@ function buildContextPruningExtension(params: {
|
||||
model: Model<Api> | undefined;
|
||||
}): { additionalExtensionPaths?: string[] } {
|
||||
const raw = params.cfg?.agents?.defaults?.contextPruning;
|
||||
if (raw?.mode !== "adaptive" && raw?.mode !== "aggressive") return {};
|
||||
if (raw?.mode !== "cache-ttl") return {};
|
||||
if (!isCacheTtlEligibleProvider(params.provider, params.modelId)) return {};
|
||||
|
||||
const settings = computeEffectiveSettings(raw);
|
||||
if (!settings) return {};
|
||||
@@ -52,6 +54,7 @@ function buildContextPruningExtension(params: {
|
||||
settings,
|
||||
contextWindowTokens: resolveContextWindowTokens(params),
|
||||
isToolPrunable: makeToolPrunablePredicate(settings.tools),
|
||||
lastCacheTouchAt: readLastCacheTtlTimestamp(params.sessionManager),
|
||||
});
|
||||
|
||||
return {
|
||||
|
||||
@@ -21,7 +21,7 @@ export function resolveExtraParams(params: {
|
||||
return modelConfig?.params ? { ...modelConfig.params } : undefined;
|
||||
}
|
||||
|
||||
type CacheControlTtl = "5m" | "1h";
|
||||
type CacheControlTtl = "5m";
|
||||
|
||||
function resolveCacheControlTtl(
|
||||
extraParams: Record<string, unknown> | undefined,
|
||||
@@ -29,7 +29,7 @@ function resolveCacheControlTtl(
|
||||
modelId: string,
|
||||
): CacheControlTtl | undefined {
|
||||
const raw = extraParams?.cacheControlTtl;
|
||||
if (raw !== "5m" && raw !== "1h") return undefined;
|
||||
if (raw !== "5m") return undefined;
|
||||
if (provider === "anthropic") return raw;
|
||||
if (provider === "openrouter" && modelId.startsWith("anthropic/")) return raw;
|
||||
return undefined;
|
||||
|
||||
@@ -36,8 +36,10 @@ describe("injectHistoryImagesIntoMessages", () => {
|
||||
const didMutate = injectHistoryImagesIntoMessages(messages, new Map([[0, [image]]]));
|
||||
|
||||
expect(didMutate).toBe(false);
|
||||
const content = messages[0]?.content as unknown[] | undefined;
|
||||
expect(content).toBeDefined();
|
||||
const content = messages[0]?.content;
|
||||
if (!Array.isArray(content)) {
|
||||
throw new Error("expected array content");
|
||||
}
|
||||
expect(content).toHaveLength(2);
|
||||
});
|
||||
|
||||
|
||||
@@ -49,6 +49,7 @@ import { resolveDefaultModelForAgent } from "../../model-selection.js";
|
||||
import { isAbortError } from "../abort.js";
|
||||
import { buildEmbeddedExtensionPaths } from "../extensions.js";
|
||||
import { applyExtraParamsToAgent } from "../extra-params.js";
|
||||
import { appendCacheTtlTimestamp, isCacheTtlEligibleProvider } from "../cache-ttl.js";
|
||||
import {
|
||||
logToolSchemasForGoogle,
|
||||
sanitizeSessionHistory,
|
||||
@@ -685,6 +686,17 @@ export async function runEmbeddedAttempt(
|
||||
note: `images: prompt=${imageResult.images.length} history=${imageResult.historyImagesByIndex.size}`,
|
||||
});
|
||||
|
||||
const shouldTrackCacheTtl =
|
||||
params.config?.agents?.defaults?.contextPruning?.mode === "cache-ttl" &&
|
||||
isCacheTtlEligibleProvider(params.provider, params.modelId);
|
||||
if (shouldTrackCacheTtl) {
|
||||
appendCacheTtlTimestamp(sessionManager, {
|
||||
timestamp: Date.now(),
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
});
|
||||
}
|
||||
|
||||
// Only pass images option if there are actually images to pass
|
||||
// This avoids potential issues with models that don't expect the images parameter
|
||||
if (imageResult.images.length > 0) {
|
||||
|
||||
@@ -135,12 +135,15 @@ describe("context-pruning", () => {
|
||||
});
|
||||
|
||||
it("never prunes tool results before the first user message", () => {
|
||||
const settings = computeEffectiveSettings({
|
||||
mode: "aggressive",
|
||||
const settings = {
|
||||
...DEFAULT_CONTEXT_PRUNING_SETTINGS,
|
||||
keepLastAssistants: 0,
|
||||
hardClear: { placeholder: "[cleared]" },
|
||||
});
|
||||
if (!settings) throw new Error("expected settings");
|
||||
softTrimRatio: 0.0,
|
||||
hardClearRatio: 0.0,
|
||||
minPrunableToolChars: 0,
|
||||
hardClear: { enabled: true, placeholder: "[cleared]" },
|
||||
softTrim: { maxChars: 10, headChars: 3, tailChars: 3 },
|
||||
};
|
||||
|
||||
const messages: AgentMessage[] = [
|
||||
makeAssistant("bootstrap tool calls"),
|
||||
@@ -170,7 +173,7 @@ describe("context-pruning", () => {
|
||||
expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]");
|
||||
});
|
||||
|
||||
it("mode aggressive clears eligible tool results before cutoff", () => {
|
||||
it("hard-clear removes eligible tool results before cutoff", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
makeUser("u1"),
|
||||
makeAssistant("a1"),
|
||||
@@ -195,9 +198,11 @@ describe("context-pruning", () => {
|
||||
|
||||
const settings = {
|
||||
...DEFAULT_CONTEXT_PRUNING_SETTINGS,
|
||||
mode: "aggressive",
|
||||
keepLastAssistants: 1,
|
||||
hardClear: { enabled: false, placeholder: "[cleared]" },
|
||||
softTrimRatio: 10.0,
|
||||
hardClearRatio: 0.0,
|
||||
minPrunableToolChars: 0,
|
||||
hardClear: { enabled: true, placeholder: "[cleared]" },
|
||||
};
|
||||
|
||||
const ctx = {
|
||||
@@ -258,6 +263,7 @@ describe("context-pruning", () => {
|
||||
},
|
||||
contextWindowTokens: 1000,
|
||||
isToolPrunable: () => true,
|
||||
lastCacheTouchAt: Date.now() - DEFAULT_CONTEXT_PRUNING_SETTINGS.ttlMs - 1000,
|
||||
});
|
||||
|
||||
const messages: AgentMessage[] = [
|
||||
|
||||
@@ -9,6 +9,17 @@ export default function contextPruningExtension(api: ExtensionAPI): void {
|
||||
const runtime = getContextPruningRuntime(ctx.sessionManager);
|
||||
if (!runtime) return undefined;
|
||||
|
||||
if (runtime.settings.mode === "cache-ttl") {
|
||||
const ttlMs = runtime.settings.ttlMs;
|
||||
const lastTouch = runtime.lastCacheTouchAt ?? null;
|
||||
if (!lastTouch || ttlMs <= 0) {
|
||||
return undefined;
|
||||
}
|
||||
if (ttlMs > 0 && Date.now() - lastTouch < ttlMs) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
const next = pruneContextMessages({
|
||||
messages: event.messages as AgentMessage[],
|
||||
settings: runtime.settings,
|
||||
|
||||
@@ -211,34 +211,6 @@ export function pruneContextMessages(params: {
|
||||
|
||||
const isToolPrunable = params.isToolPrunable ?? makeToolPrunablePredicate(settings.tools);
|
||||
|
||||
if (settings.mode === "aggressive") {
|
||||
let next: AgentMessage[] | null = null;
|
||||
|
||||
for (let i = pruneStartIndex; i < cutoffIndex; i++) {
|
||||
const msg = messages[i];
|
||||
if (!msg || msg.role !== "toolResult") continue;
|
||||
if (!isToolPrunable(msg.toolName)) continue;
|
||||
if (hasImageBlocks(msg.content)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const alreadyCleared =
|
||||
msg.content.length === 1 &&
|
||||
msg.content[0]?.type === "text" &&
|
||||
msg.content[0].text === settings.hardClear.placeholder;
|
||||
if (alreadyCleared) continue;
|
||||
|
||||
const cleared: ToolResultMessage = {
|
||||
...msg,
|
||||
content: [asText(settings.hardClear.placeholder)],
|
||||
};
|
||||
if (!next) next = messages.slice();
|
||||
next[i] = cleared as unknown as AgentMessage;
|
||||
}
|
||||
|
||||
return next ?? messages;
|
||||
}
|
||||
|
||||
const totalCharsBefore = estimateContextChars(messages);
|
||||
let totalChars = totalCharsBefore;
|
||||
let ratio = totalChars / charWindow;
|
||||
|
||||
@@ -4,6 +4,7 @@ export type ContextPruningRuntimeValue = {
|
||||
settings: EffectiveContextPruningSettings;
|
||||
contextWindowTokens?: number | null;
|
||||
isToolPrunable: (toolName: string) => boolean;
|
||||
lastCacheTouchAt?: number | null;
|
||||
};
|
||||
|
||||
// Session-scoped runtime registry keyed by object identity.
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import { parseDurationMs } from "../../../cli/parse-duration.js";
|
||||
|
||||
export type ContextPruningToolMatch = {
|
||||
allow?: string[];
|
||||
deny?: string[];
|
||||
};
|
||||
|
||||
export type ContextPruningMode = "off" | "adaptive" | "aggressive";
|
||||
export type ContextPruningMode = "off" | "cache-ttl";
|
||||
|
||||
export type ContextPruningConfig = {
|
||||
mode?: ContextPruningMode;
|
||||
/** TTL to consider cache expired (duration string, default unit: minutes). */
|
||||
ttl?: string;
|
||||
keepLastAssistants?: number;
|
||||
softTrimRatio?: number;
|
||||
hardClearRatio?: number;
|
||||
@@ -25,6 +28,7 @@ export type ContextPruningConfig = {
|
||||
|
||||
export type EffectiveContextPruningSettings = {
|
||||
mode: Exclude<ContextPruningMode, "off">;
|
||||
ttlMs: number;
|
||||
keepLastAssistants: number;
|
||||
softTrimRatio: number;
|
||||
hardClearRatio: number;
|
||||
@@ -42,7 +46,8 @@ export type EffectiveContextPruningSettings = {
|
||||
};
|
||||
|
||||
export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings = {
|
||||
mode: "adaptive",
|
||||
mode: "cache-ttl",
|
||||
ttlMs: 5 * 60 * 1000,
|
||||
keepLastAssistants: 3,
|
||||
softTrimRatio: 0.3,
|
||||
hardClearRatio: 0.5,
|
||||
@@ -62,11 +67,19 @@ export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings =
|
||||
export function computeEffectiveSettings(raw: unknown): EffectiveContextPruningSettings | null {
|
||||
if (!raw || typeof raw !== "object") return null;
|
||||
const cfg = raw as ContextPruningConfig;
|
||||
if (cfg.mode !== "adaptive" && cfg.mode !== "aggressive") return null;
|
||||
if (cfg.mode !== "cache-ttl") return null;
|
||||
|
||||
const s: EffectiveContextPruningSettings = structuredClone(DEFAULT_CONTEXT_PRUNING_SETTINGS);
|
||||
s.mode = cfg.mode;
|
||||
|
||||
if (typeof cfg.ttl === "string") {
|
||||
try {
|
||||
s.ttlMs = parseDurationMs(cfg.ttl, { defaultUnit: "m" });
|
||||
} catch {
|
||||
// keep default ttl
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof cfg.keepLastAssistants === "number" && Number.isFinite(cfg.keepLastAssistants)) {
|
||||
s.keepLastAssistants = Math.max(0, Math.floor(cfg.keepLastAssistants));
|
||||
}
|
||||
@@ -94,7 +107,7 @@ export function computeEffectiveSettings(raw: unknown): EffectiveContextPruningS
|
||||
}
|
||||
}
|
||||
if (cfg.hardClear) {
|
||||
if (s.mode === "adaptive" && typeof cfg.hardClear.enabled === "boolean") {
|
||||
if (typeof cfg.hardClear.enabled === "boolean") {
|
||||
s.hardClear.enabled = cfg.hardClear.enabled;
|
||||
}
|
||||
if (typeof cfg.hardClear.placeholder === "string" && cfg.hardClear.placeholder.trim()) {
|
||||
|
||||
@@ -4,7 +4,7 @@ import { describe, expect, it, vi } from "vitest";
|
||||
import { withTempHome } from "./test-helpers.js";
|
||||
|
||||
describe("config pruning defaults", () => {
|
||||
it("defaults contextPruning mode to adaptive", async () => {
|
||||
it("does not enable contextPruning by default", async () => {
|
||||
await withTempHome(async (home) => {
|
||||
const configDir = path.join(home, ".clawdbot");
|
||||
await fs.mkdir(configDir, { recursive: true });
|
||||
@@ -18,7 +18,7 @@ describe("config pruning defaults", () => {
|
||||
const { loadConfig } = await import("./config.js");
|
||||
const cfg = loadConfig();
|
||||
|
||||
expect(cfg.agents?.defaults?.contextPruning?.mode).toBe("adaptive");
|
||||
expect(cfg.agents?.defaults?.contextPruning?.mode).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -157,24 +157,7 @@ export function applyLoggingDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
|
||||
}
|
||||
|
||||
export function applyContextPruningDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
|
||||
const defaults = cfg.agents?.defaults;
|
||||
if (!defaults) return cfg;
|
||||
const contextPruning = defaults?.contextPruning;
|
||||
if (contextPruning?.mode) return cfg;
|
||||
|
||||
return {
|
||||
...cfg,
|
||||
agents: {
|
||||
...cfg.agents,
|
||||
defaults: {
|
||||
...defaults,
|
||||
contextPruning: {
|
||||
...contextPruning,
|
||||
mode: "adaptive",
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
return cfg;
|
||||
}
|
||||
|
||||
export function applyCompactionDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
|
||||
|
||||
@@ -23,7 +23,9 @@ export type AgentModelListConfig = {
|
||||
};
|
||||
|
||||
export type AgentContextPruningConfig = {
|
||||
mode?: "off" | "adaptive" | "aggressive";
|
||||
mode?: "off" | "cache-ttl";
|
||||
/** TTL to consider cache expired (duration string, default unit: minutes). */
|
||||
ttl?: string;
|
||||
keepLastAssistants?: number;
|
||||
softTrimRatio?: number;
|
||||
hardClearRatio?: number;
|
||||
|
||||
@@ -54,9 +54,8 @@ export const AgentDefaultsSchema = z
|
||||
memorySearch: MemorySearchSchema,
|
||||
contextPruning: z
|
||||
.object({
|
||||
mode: z
|
||||
.union([z.literal("off"), z.literal("adaptive"), z.literal("aggressive")])
|
||||
.optional(),
|
||||
mode: z.union([z.literal("off"), z.literal("cache-ttl")]).optional(),
|
||||
ttl: z.string().optional(),
|
||||
keepLastAssistants: z.number().int().nonnegative().optional(),
|
||||
softTrimRatio: z.number().min(0).max(1).optional(),
|
||||
hardClearRatio: z.number().min(0).max(1).optional(),
|
||||
|
||||
Reference in New Issue
Block a user