feat: add cache-ttl pruning mode
This commit is contained in:
@@ -135,12 +135,15 @@ describe("context-pruning", () => {
|
||||
});
|
||||
|
||||
it("never prunes tool results before the first user message", () => {
|
||||
const settings = computeEffectiveSettings({
|
||||
mode: "aggressive",
|
||||
const settings = {
|
||||
...DEFAULT_CONTEXT_PRUNING_SETTINGS,
|
||||
keepLastAssistants: 0,
|
||||
hardClear: { placeholder: "[cleared]" },
|
||||
});
|
||||
if (!settings) throw new Error("expected settings");
|
||||
softTrimRatio: 0.0,
|
||||
hardClearRatio: 0.0,
|
||||
minPrunableToolChars: 0,
|
||||
hardClear: { enabled: true, placeholder: "[cleared]" },
|
||||
softTrim: { maxChars: 10, headChars: 3, tailChars: 3 },
|
||||
};
|
||||
|
||||
const messages: AgentMessage[] = [
|
||||
makeAssistant("bootstrap tool calls"),
|
||||
@@ -170,7 +173,7 @@ describe("context-pruning", () => {
|
||||
expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]");
|
||||
});
|
||||
|
||||
it("mode aggressive clears eligible tool results before cutoff", () => {
|
||||
it("hard-clear removes eligible tool results before cutoff", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
makeUser("u1"),
|
||||
makeAssistant("a1"),
|
||||
@@ -195,9 +198,11 @@ describe("context-pruning", () => {
|
||||
|
||||
const settings = {
|
||||
...DEFAULT_CONTEXT_PRUNING_SETTINGS,
|
||||
mode: "aggressive",
|
||||
keepLastAssistants: 1,
|
||||
hardClear: { enabled: false, placeholder: "[cleared]" },
|
||||
softTrimRatio: 10.0,
|
||||
hardClearRatio: 0.0,
|
||||
minPrunableToolChars: 0,
|
||||
hardClear: { enabled: true, placeholder: "[cleared]" },
|
||||
};
|
||||
|
||||
const ctx = {
|
||||
@@ -258,6 +263,7 @@ describe("context-pruning", () => {
|
||||
},
|
||||
contextWindowTokens: 1000,
|
||||
isToolPrunable: () => true,
|
||||
lastCacheTouchAt: Date.now() - DEFAULT_CONTEXT_PRUNING_SETTINGS.ttlMs - 1000,
|
||||
});
|
||||
|
||||
const messages: AgentMessage[] = [
|
||||
|
||||
@@ -9,6 +9,17 @@ export default function contextPruningExtension(api: ExtensionAPI): void {
|
||||
const runtime = getContextPruningRuntime(ctx.sessionManager);
|
||||
if (!runtime) return undefined;
|
||||
|
||||
if (runtime.settings.mode === "cache-ttl") {
|
||||
const ttlMs = runtime.settings.ttlMs;
|
||||
const lastTouch = runtime.lastCacheTouchAt ?? null;
|
||||
if (!lastTouch || ttlMs <= 0) {
|
||||
return undefined;
|
||||
}
|
||||
if (ttlMs > 0 && Date.now() - lastTouch < ttlMs) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
const next = pruneContextMessages({
|
||||
messages: event.messages as AgentMessage[],
|
||||
settings: runtime.settings,
|
||||
|
||||
@@ -211,34 +211,6 @@ export function pruneContextMessages(params: {
|
||||
|
||||
const isToolPrunable = params.isToolPrunable ?? makeToolPrunablePredicate(settings.tools);
|
||||
|
||||
if (settings.mode === "aggressive") {
|
||||
let next: AgentMessage[] | null = null;
|
||||
|
||||
for (let i = pruneStartIndex; i < cutoffIndex; i++) {
|
||||
const msg = messages[i];
|
||||
if (!msg || msg.role !== "toolResult") continue;
|
||||
if (!isToolPrunable(msg.toolName)) continue;
|
||||
if (hasImageBlocks(msg.content)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const alreadyCleared =
|
||||
msg.content.length === 1 &&
|
||||
msg.content[0]?.type === "text" &&
|
||||
msg.content[0].text === settings.hardClear.placeholder;
|
||||
if (alreadyCleared) continue;
|
||||
|
||||
const cleared: ToolResultMessage = {
|
||||
...msg,
|
||||
content: [asText(settings.hardClear.placeholder)],
|
||||
};
|
||||
if (!next) next = messages.slice();
|
||||
next[i] = cleared as unknown as AgentMessage;
|
||||
}
|
||||
|
||||
return next ?? messages;
|
||||
}
|
||||
|
||||
const totalCharsBefore = estimateContextChars(messages);
|
||||
let totalChars = totalCharsBefore;
|
||||
let ratio = totalChars / charWindow;
|
||||
|
||||
@@ -4,6 +4,7 @@ export type ContextPruningRuntimeValue = {
|
||||
settings: EffectiveContextPruningSettings;
|
||||
contextWindowTokens?: number | null;
|
||||
isToolPrunable: (toolName: string) => boolean;
|
||||
lastCacheTouchAt?: number | null;
|
||||
};
|
||||
|
||||
// Session-scoped runtime registry keyed by object identity.
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import { parseDurationMs } from "../../../cli/parse-duration.js";
|
||||
|
||||
export type ContextPruningToolMatch = {
|
||||
allow?: string[];
|
||||
deny?: string[];
|
||||
};
|
||||
|
||||
export type ContextPruningMode = "off" | "adaptive" | "aggressive";
|
||||
export type ContextPruningMode = "off" | "cache-ttl";
|
||||
|
||||
export type ContextPruningConfig = {
|
||||
mode?: ContextPruningMode;
|
||||
/** TTL to consider cache expired (duration string, default unit: minutes). */
|
||||
ttl?: string;
|
||||
keepLastAssistants?: number;
|
||||
softTrimRatio?: number;
|
||||
hardClearRatio?: number;
|
||||
@@ -25,6 +28,7 @@ export type ContextPruningConfig = {
|
||||
|
||||
export type EffectiveContextPruningSettings = {
|
||||
mode: Exclude<ContextPruningMode, "off">;
|
||||
ttlMs: number;
|
||||
keepLastAssistants: number;
|
||||
softTrimRatio: number;
|
||||
hardClearRatio: number;
|
||||
@@ -42,7 +46,8 @@ export type EffectiveContextPruningSettings = {
|
||||
};
|
||||
|
||||
export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings = {
|
||||
mode: "adaptive",
|
||||
mode: "cache-ttl",
|
||||
ttlMs: 5 * 60 * 1000,
|
||||
keepLastAssistants: 3,
|
||||
softTrimRatio: 0.3,
|
||||
hardClearRatio: 0.5,
|
||||
@@ -62,11 +67,19 @@ export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings =
|
||||
export function computeEffectiveSettings(raw: unknown): EffectiveContextPruningSettings | null {
|
||||
if (!raw || typeof raw !== "object") return null;
|
||||
const cfg = raw as ContextPruningConfig;
|
||||
if (cfg.mode !== "adaptive" && cfg.mode !== "aggressive") return null;
|
||||
if (cfg.mode !== "cache-ttl") return null;
|
||||
|
||||
const s: EffectiveContextPruningSettings = structuredClone(DEFAULT_CONTEXT_PRUNING_SETTINGS);
|
||||
s.mode = cfg.mode;
|
||||
|
||||
if (typeof cfg.ttl === "string") {
|
||||
try {
|
||||
s.ttlMs = parseDurationMs(cfg.ttl, { defaultUnit: "m" });
|
||||
} catch {
|
||||
// keep default ttl
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof cfg.keepLastAssistants === "number" && Number.isFinite(cfg.keepLastAssistants)) {
|
||||
s.keepLastAssistants = Math.max(0, Math.floor(cfg.keepLastAssistants));
|
||||
}
|
||||
@@ -94,7 +107,7 @@ export function computeEffectiveSettings(raw: unknown): EffectiveContextPruningS
|
||||
}
|
||||
}
|
||||
if (cfg.hardClear) {
|
||||
if (s.mode === "adaptive" && typeof cfg.hardClear.enabled === "boolean") {
|
||||
if (typeof cfg.hardClear.enabled === "boolean") {
|
||||
s.hardClear.enabled = cfg.hardClear.enabled;
|
||||
}
|
||||
if (typeof cfg.hardClear.placeholder === "string" && cfg.hardClear.placeholder.trim()) {
|
||||
|
||||
Reference in New Issue
Block a user