feat: add cache-ttl pruning mode

This commit is contained in:
Peter Steinberger
2026-01-21 19:44:20 +00:00
parent c415ccaed5
commit 9f59ff325b
18 changed files with 164 additions and 123 deletions

View File

@@ -0,0 +1,52 @@
type CustomEntryLike = { type?: unknown; customType?: unknown; data?: unknown };
export const CACHE_TTL_CUSTOM_TYPE = "clawdbot.cache-ttl";
export type CacheTtlEntryData = {
timestamp: number;
provider?: string;
modelId?: string;
};
export function isCacheTtlEligibleProvider(provider: string, modelId: string): boolean {
const normalizedProvider = provider.toLowerCase();
const normalizedModelId = modelId.toLowerCase();
if (normalizedProvider === "anthropic") return true;
if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/"))
return true;
return false;
}
export function readLastCacheTtlTimestamp(sessionManager: unknown): number | null {
const sm = sessionManager as { getEntries?: () => CustomEntryLike[] };
if (!sm?.getEntries) return null;
try {
const entries = sm.getEntries();
let last: number | null = null;
for (let i = entries.length - 1; i >= 0; i--) {
const entry = entries[i];
if (entry?.type !== "custom" || entry?.customType !== CACHE_TTL_CUSTOM_TYPE) continue;
const data = entry?.data as Partial<CacheTtlEntryData> | undefined;
const ts = typeof data?.timestamp === "number" ? data.timestamp : null;
if (ts && Number.isFinite(ts)) {
last = ts;
break;
}
}
return last;
} catch {
return null;
}
}
export function appendCacheTtlTimestamp(sessionManager: unknown, data: CacheTtlEntryData): void {
const sm = sessionManager as {
appendCustomEntry?: (customType: string, data: unknown) => void;
};
if (!sm?.appendCustomEntry) return;
try {
sm.appendCustomEntry(CACHE_TTL_CUSTOM_TYPE, data);
} catch {
// ignore persistence failures
}
}

View File

@@ -11,6 +11,7 @@ import { setContextPruningRuntime } from "../pi-extensions/context-pruning/runti
import { computeEffectiveSettings } from "../pi-extensions/context-pruning/settings.js";
import { makeToolPrunablePredicate } from "../pi-extensions/context-pruning/tools.js";
import { ensurePiCompactionReserveTokens } from "../pi-settings.js";
import { isCacheTtlEligibleProvider, readLastCacheTtlTimestamp } from "./cache-ttl.js";
function resolvePiExtensionPath(id: string): string {
const self = fileURLToPath(import.meta.url);
@@ -43,7 +44,8 @@ function buildContextPruningExtension(params: {
model: Model<Api> | undefined;
}): { additionalExtensionPaths?: string[] } {
const raw = params.cfg?.agents?.defaults?.contextPruning;
if (raw?.mode !== "adaptive" && raw?.mode !== "aggressive") return {};
if (raw?.mode !== "cache-ttl") return {};
if (!isCacheTtlEligibleProvider(params.provider, params.modelId)) return {};
const settings = computeEffectiveSettings(raw);
if (!settings) return {};
@@ -52,6 +54,7 @@ function buildContextPruningExtension(params: {
settings,
contextWindowTokens: resolveContextWindowTokens(params),
isToolPrunable: makeToolPrunablePredicate(settings.tools),
lastCacheTouchAt: readLastCacheTtlTimestamp(params.sessionManager),
});
return {

View File

@@ -21,7 +21,7 @@ export function resolveExtraParams(params: {
return modelConfig?.params ? { ...modelConfig.params } : undefined;
}
type CacheControlTtl = "5m" | "1h";
type CacheControlTtl = "5m";
function resolveCacheControlTtl(
extraParams: Record<string, unknown> | undefined,
@@ -29,7 +29,7 @@ function resolveCacheControlTtl(
modelId: string,
): CacheControlTtl | undefined {
const raw = extraParams?.cacheControlTtl;
if (raw !== "5m" && raw !== "1h") return undefined;
if (raw !== "5m") return undefined;
if (provider === "anthropic") return raw;
if (provider === "openrouter" && modelId.startsWith("anthropic/")) return raw;
return undefined;

View File

@@ -36,8 +36,10 @@ describe("injectHistoryImagesIntoMessages", () => {
const didMutate = injectHistoryImagesIntoMessages(messages, new Map([[0, [image]]]));
expect(didMutate).toBe(false);
const content = messages[0]?.content as unknown[] | undefined;
expect(content).toBeDefined();
const content = messages[0]?.content;
if (!Array.isArray(content)) {
throw new Error("expected array content");
}
expect(content).toHaveLength(2);
});

View File

@@ -49,6 +49,7 @@ import { resolveDefaultModelForAgent } from "../../model-selection.js";
import { isAbortError } from "../abort.js";
import { buildEmbeddedExtensionPaths } from "../extensions.js";
import { applyExtraParamsToAgent } from "../extra-params.js";
import { appendCacheTtlTimestamp, isCacheTtlEligibleProvider } from "../cache-ttl.js";
import {
logToolSchemasForGoogle,
sanitizeSessionHistory,
@@ -685,6 +686,17 @@ export async function runEmbeddedAttempt(
note: `images: prompt=${imageResult.images.length} history=${imageResult.historyImagesByIndex.size}`,
});
const shouldTrackCacheTtl =
params.config?.agents?.defaults?.contextPruning?.mode === "cache-ttl" &&
isCacheTtlEligibleProvider(params.provider, params.modelId);
if (shouldTrackCacheTtl) {
appendCacheTtlTimestamp(sessionManager, {
timestamp: Date.now(),
provider: params.provider,
modelId: params.modelId,
});
}
// Only pass images option if there are actually images to pass
// This avoids potential issues with models that don't expect the images parameter
if (imageResult.images.length > 0) {

View File

@@ -135,12 +135,15 @@ describe("context-pruning", () => {
});
it("never prunes tool results before the first user message", () => {
const settings = computeEffectiveSettings({
mode: "aggressive",
const settings = {
...DEFAULT_CONTEXT_PRUNING_SETTINGS,
keepLastAssistants: 0,
hardClear: { placeholder: "[cleared]" },
});
if (!settings) throw new Error("expected settings");
softTrimRatio: 0.0,
hardClearRatio: 0.0,
minPrunableToolChars: 0,
hardClear: { enabled: true, placeholder: "[cleared]" },
softTrim: { maxChars: 10, headChars: 3, tailChars: 3 },
};
const messages: AgentMessage[] = [
makeAssistant("bootstrap tool calls"),
@@ -170,7 +173,7 @@ describe("context-pruning", () => {
expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]");
});
it("mode aggressive clears eligible tool results before cutoff", () => {
it("hard-clear removes eligible tool results before cutoff", () => {
const messages: AgentMessage[] = [
makeUser("u1"),
makeAssistant("a1"),
@@ -195,9 +198,11 @@ describe("context-pruning", () => {
const settings = {
...DEFAULT_CONTEXT_PRUNING_SETTINGS,
mode: "aggressive",
keepLastAssistants: 1,
hardClear: { enabled: false, placeholder: "[cleared]" },
softTrimRatio: 10.0,
hardClearRatio: 0.0,
minPrunableToolChars: 0,
hardClear: { enabled: true, placeholder: "[cleared]" },
};
const ctx = {
@@ -258,6 +263,7 @@ describe("context-pruning", () => {
},
contextWindowTokens: 1000,
isToolPrunable: () => true,
lastCacheTouchAt: Date.now() - DEFAULT_CONTEXT_PRUNING_SETTINGS.ttlMs - 1000,
});
const messages: AgentMessage[] = [

View File

@@ -9,6 +9,17 @@ export default function contextPruningExtension(api: ExtensionAPI): void {
const runtime = getContextPruningRuntime(ctx.sessionManager);
if (!runtime) return undefined;
if (runtime.settings.mode === "cache-ttl") {
const ttlMs = runtime.settings.ttlMs;
const lastTouch = runtime.lastCacheTouchAt ?? null;
if (!lastTouch || ttlMs <= 0) {
return undefined;
}
if (ttlMs > 0 && Date.now() - lastTouch < ttlMs) {
return undefined;
}
}
const next = pruneContextMessages({
messages: event.messages as AgentMessage[],
settings: runtime.settings,

View File

@@ -211,34 +211,6 @@ export function pruneContextMessages(params: {
const isToolPrunable = params.isToolPrunable ?? makeToolPrunablePredicate(settings.tools);
if (settings.mode === "aggressive") {
let next: AgentMessage[] | null = null;
for (let i = pruneStartIndex; i < cutoffIndex; i++) {
const msg = messages[i];
if (!msg || msg.role !== "toolResult") continue;
if (!isToolPrunable(msg.toolName)) continue;
if (hasImageBlocks(msg.content)) {
continue;
}
const alreadyCleared =
msg.content.length === 1 &&
msg.content[0]?.type === "text" &&
msg.content[0].text === settings.hardClear.placeholder;
if (alreadyCleared) continue;
const cleared: ToolResultMessage = {
...msg,
content: [asText(settings.hardClear.placeholder)],
};
if (!next) next = messages.slice();
next[i] = cleared as unknown as AgentMessage;
}
return next ?? messages;
}
const totalCharsBefore = estimateContextChars(messages);
let totalChars = totalCharsBefore;
let ratio = totalChars / charWindow;

View File

@@ -4,6 +4,7 @@ export type ContextPruningRuntimeValue = {
settings: EffectiveContextPruningSettings;
contextWindowTokens?: number | null;
isToolPrunable: (toolName: string) => boolean;
lastCacheTouchAt?: number | null;
};
// Session-scoped runtime registry keyed by object identity.

View File

@@ -1,12 +1,15 @@
import { parseDurationMs } from "../../../cli/parse-duration.js";
export type ContextPruningToolMatch = {
allow?: string[];
deny?: string[];
};
export type ContextPruningMode = "off" | "adaptive" | "aggressive";
export type ContextPruningMode = "off" | "cache-ttl";
export type ContextPruningConfig = {
mode?: ContextPruningMode;
/** TTL to consider cache expired (duration string, default unit: minutes). */
ttl?: string;
keepLastAssistants?: number;
softTrimRatio?: number;
hardClearRatio?: number;
@@ -25,6 +28,7 @@ export type ContextPruningConfig = {
export type EffectiveContextPruningSettings = {
mode: Exclude<ContextPruningMode, "off">;
ttlMs: number;
keepLastAssistants: number;
softTrimRatio: number;
hardClearRatio: number;
@@ -42,7 +46,8 @@ export type EffectiveContextPruningSettings = {
};
export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings = {
mode: "adaptive",
mode: "cache-ttl",
ttlMs: 5 * 60 * 1000,
keepLastAssistants: 3,
softTrimRatio: 0.3,
hardClearRatio: 0.5,
@@ -62,11 +67,19 @@ export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings =
export function computeEffectiveSettings(raw: unknown): EffectiveContextPruningSettings | null {
if (!raw || typeof raw !== "object") return null;
const cfg = raw as ContextPruningConfig;
if (cfg.mode !== "adaptive" && cfg.mode !== "aggressive") return null;
if (cfg.mode !== "cache-ttl") return null;
const s: EffectiveContextPruningSettings = structuredClone(DEFAULT_CONTEXT_PRUNING_SETTINGS);
s.mode = cfg.mode;
if (typeof cfg.ttl === "string") {
try {
s.ttlMs = parseDurationMs(cfg.ttl, { defaultUnit: "m" });
} catch {
// keep default ttl
}
}
if (typeof cfg.keepLastAssistants === "number" && Number.isFinite(cfg.keepLastAssistants)) {
s.keepLastAssistants = Math.max(0, Math.floor(cfg.keepLastAssistants));
}
@@ -94,7 +107,7 @@ export function computeEffectiveSettings(raw: unknown): EffectiveContextPruningS
}
}
if (cfg.hardClear) {
if (s.mode === "adaptive" && typeof cfg.hardClear.enabled === "boolean") {
if (typeof cfg.hardClear.enabled === "boolean") {
s.hardClear.enabled = cfg.hardClear.enabled;
}
if (typeof cfg.hardClear.placeholder === "string" && cfg.hardClear.placeholder.trim()) {

View File

@@ -4,7 +4,7 @@ import { describe, expect, it, vi } from "vitest";
import { withTempHome } from "./test-helpers.js";
describe("config pruning defaults", () => {
it("defaults contextPruning mode to adaptive", async () => {
it("does not enable contextPruning by default", async () => {
await withTempHome(async (home) => {
const configDir = path.join(home, ".clawdbot");
await fs.mkdir(configDir, { recursive: true });
@@ -18,7 +18,7 @@ describe("config pruning defaults", () => {
const { loadConfig } = await import("./config.js");
const cfg = loadConfig();
expect(cfg.agents?.defaults?.contextPruning?.mode).toBe("adaptive");
expect(cfg.agents?.defaults?.contextPruning?.mode).toBeUndefined();
});
});

View File

@@ -157,24 +157,7 @@ export function applyLoggingDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
}
export function applyContextPruningDefaults(cfg: ClawdbotConfig): ClawdbotConfig {
const defaults = cfg.agents?.defaults;
if (!defaults) return cfg;
const contextPruning = defaults?.contextPruning;
if (contextPruning?.mode) return cfg;
return {
...cfg,
agents: {
...cfg.agents,
defaults: {
...defaults,
contextPruning: {
...contextPruning,
mode: "adaptive",
},
},
},
};
return cfg;
}
export function applyCompactionDefaults(cfg: ClawdbotConfig): ClawdbotConfig {

View File

@@ -23,7 +23,9 @@ export type AgentModelListConfig = {
};
export type AgentContextPruningConfig = {
mode?: "off" | "adaptive" | "aggressive";
mode?: "off" | "cache-ttl";
/** TTL to consider cache expired (duration string, default unit: minutes). */
ttl?: string;
keepLastAssistants?: number;
softTrimRatio?: number;
hardClearRatio?: number;

View File

@@ -54,9 +54,8 @@ export const AgentDefaultsSchema = z
memorySearch: MemorySearchSchema,
contextPruning: z
.object({
mode: z
.union([z.literal("off"), z.literal("adaptive"), z.literal("aggressive")])
.optional(),
mode: z.union([z.literal("off"), z.literal("cache-ttl")]).optional(),
ttl: z.string().optional(),
keepLastAssistants: z.number().int().nonnegative().optional(),
softTrimRatio: z.number().min(0).max(1).optional(),
hardClearRatio: z.number().min(0).max(1).optional(),