Agents: summarize dropped messages during compaction safeguard pruning (#2418)
This commit is contained in:
@@ -6,6 +6,7 @@ Docs: https://docs.clawd.bot
|
||||
Status: unreleased.
|
||||
|
||||
### Changes
|
||||
- Agents: summarize dropped messages during compaction safeguard pruning. (#2418)
|
||||
- Skills: add multi-image input support to Nano Banana Pro skill. (#1958) Thanks @tyler6204.
|
||||
- Agents: honor tools.exec.safeBins in exec allowlist checks. (#2281)
|
||||
- Matrix: switch plugin SDK to @vector-im/matrix-bot-sdk.
|
||||
|
||||
@@ -103,5 +103,47 @@ describe("pruneHistoryForContextShare", () => {
|
||||
expect(pruned.droppedChunks).toBe(0);
|
||||
expect(pruned.messages.length).toBe(messages.length);
|
||||
expect(pruned.keptTokens).toBe(estimateMessagesTokens(messages));
|
||||
expect(pruned.droppedMessagesList).toEqual([]);
|
||||
});
|
||||
|
||||
it("returns droppedMessagesList containing dropped messages", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
makeMessage(1, 4000),
|
||||
makeMessage(2, 4000),
|
||||
makeMessage(3, 4000),
|
||||
makeMessage(4, 4000),
|
||||
];
|
||||
const maxContextTokens = 2000; // budget is 1000 tokens (50%)
|
||||
const pruned = pruneHistoryForContextShare({
|
||||
messages,
|
||||
maxContextTokens,
|
||||
maxHistoryShare: 0.5,
|
||||
parts: 2,
|
||||
});
|
||||
|
||||
expect(pruned.droppedChunks).toBeGreaterThan(0);
|
||||
expect(pruned.droppedMessagesList.length).toBe(pruned.droppedMessages);
|
||||
|
||||
// All messages accounted for: kept + dropped = original
|
||||
const allIds = [
|
||||
...pruned.droppedMessagesList.map((m) => m.timestamp),
|
||||
...pruned.messages.map((m) => m.timestamp),
|
||||
].sort((a, b) => a - b);
|
||||
const originalIds = messages.map((m) => m.timestamp).sort((a, b) => a - b);
|
||||
expect(allIds).toEqual(originalIds);
|
||||
});
|
||||
|
||||
it("returns empty droppedMessagesList when no pruning needed", () => {
|
||||
const messages: AgentMessage[] = [makeMessage(1, 100)];
|
||||
const pruned = pruneHistoryForContextShare({
|
||||
messages,
|
||||
maxContextTokens: 100_000,
|
||||
maxHistoryShare: 0.5,
|
||||
parts: 2,
|
||||
});
|
||||
|
||||
expect(pruned.droppedChunks).toBe(0);
|
||||
expect(pruned.droppedMessagesList).toEqual([]);
|
||||
expect(pruned.messages.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -301,6 +301,7 @@ export function pruneHistoryForContextShare(params: {
|
||||
parts?: number;
|
||||
}): {
|
||||
messages: AgentMessage[];
|
||||
droppedMessagesList: AgentMessage[];
|
||||
droppedChunks: number;
|
||||
droppedMessages: number;
|
||||
droppedTokens: number;
|
||||
@@ -310,6 +311,7 @@ export function pruneHistoryForContextShare(params: {
|
||||
const maxHistoryShare = params.maxHistoryShare ?? 0.5;
|
||||
const budgetTokens = Math.max(1, Math.floor(params.maxContextTokens * maxHistoryShare));
|
||||
let keptMessages = params.messages;
|
||||
const allDroppedMessages: AgentMessage[] = [];
|
||||
let droppedChunks = 0;
|
||||
let droppedMessages = 0;
|
||||
let droppedTokens = 0;
|
||||
@@ -323,11 +325,13 @@ export function pruneHistoryForContextShare(params: {
|
||||
droppedChunks += 1;
|
||||
droppedMessages += dropped.length;
|
||||
droppedTokens += estimateMessagesTokens(dropped);
|
||||
allDroppedMessages.push(...dropped);
|
||||
keptMessages = rest.flat();
|
||||
}
|
||||
|
||||
return {
|
||||
messages: keptMessages,
|
||||
droppedMessagesList: allDroppedMessages,
|
||||
droppedChunks,
|
||||
droppedMessages,
|
||||
droppedTokens,
|
||||
|
||||
@@ -7,6 +7,7 @@ import type { SessionManager } from "@mariozechner/pi-coding-agent";
|
||||
import type { ClawdbotConfig } from "../../config/config.js";
|
||||
import { resolveContextWindowInfo } from "../context-window-guard.js";
|
||||
import { DEFAULT_CONTEXT_TOKENS } from "../defaults.js";
|
||||
import { setCompactionSafeguardRuntime } from "../pi-extensions/compaction-safeguard-runtime.js";
|
||||
import { setContextPruningRuntime } from "../pi-extensions/context-pruning/runtime.js";
|
||||
import { computeEffectiveSettings } from "../pi-extensions/context-pruning/settings.js";
|
||||
import { makeToolPrunablePredicate } from "../pi-extensions/context-pruning/tools.js";
|
||||
@@ -75,6 +76,10 @@ export function buildEmbeddedExtensionPaths(params: {
|
||||
}): string[] {
|
||||
const paths: string[] = [];
|
||||
if (resolveCompactionMode(params.cfg) === "safeguard") {
|
||||
const compactionCfg = params.cfg?.agents?.defaults?.compaction;
|
||||
setCompactionSafeguardRuntime(params.sessionManager, {
|
||||
maxHistoryShare: compactionCfg?.maxHistoryShare,
|
||||
});
|
||||
paths.push(resolvePiExtensionPath("compaction-safeguard"));
|
||||
}
|
||||
const pruning = buildContextPruningExtension(params);
|
||||
|
||||
34
src/agents/pi-extensions/compaction-safeguard-runtime.ts
Normal file
34
src/agents/pi-extensions/compaction-safeguard-runtime.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
export type CompactionSafeguardRuntimeValue = {
|
||||
maxHistoryShare?: number;
|
||||
};
|
||||
|
||||
// Session-scoped runtime registry keyed by object identity.
|
||||
// Follows the same WeakMap pattern as context-pruning/runtime.ts.
|
||||
const REGISTRY = new WeakMap<object, CompactionSafeguardRuntimeValue>();
|
||||
|
||||
export function setCompactionSafeguardRuntime(
|
||||
sessionManager: unknown,
|
||||
value: CompactionSafeguardRuntimeValue | null,
|
||||
): void {
|
||||
if (!sessionManager || typeof sessionManager !== "object") {
|
||||
return;
|
||||
}
|
||||
|
||||
const key = sessionManager as object;
|
||||
if (value === null) {
|
||||
REGISTRY.delete(key);
|
||||
return;
|
||||
}
|
||||
|
||||
REGISTRY.set(key, value);
|
||||
}
|
||||
|
||||
export function getCompactionSafeguardRuntime(
|
||||
sessionManager: unknown,
|
||||
): CompactionSafeguardRuntimeValue | null {
|
||||
if (!sessionManager || typeof sessionManager !== "object") {
|
||||
return null;
|
||||
}
|
||||
|
||||
return REGISTRY.get(sessionManager as object) ?? null;
|
||||
}
|
||||
@@ -1,6 +1,10 @@
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import {
|
||||
getCompactionSafeguardRuntime,
|
||||
setCompactionSafeguardRuntime,
|
||||
} from "./compaction-safeguard-runtime.js";
|
||||
import { __testing } from "./compaction-safeguard.js";
|
||||
|
||||
const {
|
||||
@@ -208,3 +212,41 @@ describe("isOversizedForSummary", () => {
|
||||
expect(typeof isOversized).toBe("boolean");
|
||||
});
|
||||
});
|
||||
|
||||
describe("compaction-safeguard runtime registry", () => {
|
||||
it("stores and retrieves config by session manager identity", () => {
|
||||
const sm = {};
|
||||
setCompactionSafeguardRuntime(sm, { maxHistoryShare: 0.3 });
|
||||
const runtime = getCompactionSafeguardRuntime(sm);
|
||||
expect(runtime).toEqual({ maxHistoryShare: 0.3 });
|
||||
});
|
||||
|
||||
it("returns null for unknown session manager", () => {
|
||||
const sm = {};
|
||||
expect(getCompactionSafeguardRuntime(sm)).toBeNull();
|
||||
});
|
||||
|
||||
it("clears entry when value is null", () => {
|
||||
const sm = {};
|
||||
setCompactionSafeguardRuntime(sm, { maxHistoryShare: 0.7 });
|
||||
expect(getCompactionSafeguardRuntime(sm)).not.toBeNull();
|
||||
setCompactionSafeguardRuntime(sm, null);
|
||||
expect(getCompactionSafeguardRuntime(sm)).toBeNull();
|
||||
});
|
||||
|
||||
it("ignores non-object session managers", () => {
|
||||
setCompactionSafeguardRuntime(null, { maxHistoryShare: 0.5 });
|
||||
expect(getCompactionSafeguardRuntime(null)).toBeNull();
|
||||
setCompactionSafeguardRuntime(undefined, { maxHistoryShare: 0.5 });
|
||||
expect(getCompactionSafeguardRuntime(undefined)).toBeNull();
|
||||
});
|
||||
|
||||
it("isolates different session managers", () => {
|
||||
const sm1 = {};
|
||||
const sm2 = {};
|
||||
setCompactionSafeguardRuntime(sm1, { maxHistoryShare: 0.3 });
|
||||
setCompactionSafeguardRuntime(sm2, { maxHistoryShare: 0.8 });
|
||||
expect(getCompactionSafeguardRuntime(sm1)).toEqual({ maxHistoryShare: 0.3 });
|
||||
expect(getCompactionSafeguardRuntime(sm2)).toEqual({ maxHistoryShare: 0.8 });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -11,6 +11,7 @@ import {
|
||||
resolveContextWindowTokens,
|
||||
summarizeInStages,
|
||||
} from "../compaction.js";
|
||||
import { getCompactionSafeguardRuntime } from "./compaction-safeguard-runtime.js";
|
||||
const FALLBACK_SUMMARY =
|
||||
"Summary unavailable due to context limits. Older messages were truncated.";
|
||||
const TURN_PREFIX_INSTRUCTIONS =
|
||||
@@ -174,21 +175,28 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
||||
const turnPrefixMessages = preparation.turnPrefixMessages ?? [];
|
||||
let messagesToSummarize = preparation.messagesToSummarize;
|
||||
|
||||
const runtime = getCompactionSafeguardRuntime(ctx.sessionManager);
|
||||
const maxHistoryShare = runtime?.maxHistoryShare ?? 0.5;
|
||||
|
||||
const tokensBefore =
|
||||
typeof preparation.tokensBefore === "number" && Number.isFinite(preparation.tokensBefore)
|
||||
? preparation.tokensBefore
|
||||
: undefined;
|
||||
|
||||
let droppedSummary: string | undefined;
|
||||
|
||||
if (tokensBefore !== undefined) {
|
||||
const summarizableTokens =
|
||||
estimateMessagesTokens(messagesToSummarize) + estimateMessagesTokens(turnPrefixMessages);
|
||||
const newContentTokens = Math.max(0, Math.floor(tokensBefore - summarizableTokens));
|
||||
const maxHistoryTokens = Math.floor(contextWindowTokens * 0.5);
|
||||
// Apply SAFETY_MARGIN so token underestimates don't trigger unnecessary pruning
|
||||
const maxHistoryTokens = Math.floor(contextWindowTokens * maxHistoryShare * SAFETY_MARGIN);
|
||||
|
||||
if (newContentTokens > maxHistoryTokens) {
|
||||
const pruned = pruneHistoryForContextShare({
|
||||
messages: messagesToSummarize,
|
||||
maxContextTokens: contextWindowTokens,
|
||||
maxHistoryShare: 0.5,
|
||||
maxHistoryShare,
|
||||
parts: 2,
|
||||
});
|
||||
if (pruned.droppedChunks > 0) {
|
||||
@@ -200,6 +208,37 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
||||
`(${pruned.droppedMessages} messages) to fit history budget.`,
|
||||
);
|
||||
messagesToSummarize = pruned.messages;
|
||||
|
||||
// Summarize dropped messages so context isn't lost
|
||||
if (pruned.droppedMessagesList.length > 0) {
|
||||
try {
|
||||
const droppedChunkRatio = computeAdaptiveChunkRatio(
|
||||
pruned.droppedMessagesList,
|
||||
contextWindowTokens,
|
||||
);
|
||||
const droppedMaxChunkTokens = Math.max(
|
||||
1,
|
||||
Math.floor(contextWindowTokens * droppedChunkRatio),
|
||||
);
|
||||
droppedSummary = await summarizeInStages({
|
||||
messages: pruned.droppedMessagesList,
|
||||
model,
|
||||
apiKey,
|
||||
signal,
|
||||
reserveTokens: Math.max(1, Math.floor(preparation.settings.reserveTokens)),
|
||||
maxChunkTokens: droppedMaxChunkTokens,
|
||||
contextWindow: contextWindowTokens,
|
||||
customInstructions,
|
||||
previousSummary: preparation.previousSummary,
|
||||
});
|
||||
} catch (droppedError) {
|
||||
console.warn(
|
||||
`Compaction safeguard: failed to summarize dropped messages, continuing without: ${
|
||||
droppedError instanceof Error ? droppedError.message : String(droppedError)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -210,6 +249,10 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
||||
const maxChunkTokens = Math.max(1, Math.floor(contextWindowTokens * adaptiveRatio));
|
||||
const reserveTokens = Math.max(1, Math.floor(preparation.settings.reserveTokens));
|
||||
|
||||
// Feed dropped-messages summary as previousSummary so the main summarization
|
||||
// incorporates context from pruned messages instead of losing it entirely.
|
||||
const effectivePreviousSummary = droppedSummary ?? preparation.previousSummary;
|
||||
|
||||
const historySummary = await summarizeInStages({
|
||||
messages: messagesToSummarize,
|
||||
model,
|
||||
@@ -219,7 +262,7 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
||||
maxChunkTokens,
|
||||
contextWindow: contextWindowTokens,
|
||||
customInstructions,
|
||||
previousSummary: preparation.previousSummary,
|
||||
previousSummary: effectivePreviousSummary,
|
||||
});
|
||||
|
||||
let summary = historySummary;
|
||||
|
||||
@@ -244,6 +244,8 @@ export type AgentCompactionConfig = {
|
||||
mode?: AgentCompactionMode;
|
||||
/** Minimum reserve tokens enforced for Pi compaction (0 disables the floor). */
|
||||
reserveTokensFloor?: number;
|
||||
/** Max share of context window for history during safeguard pruning (0.1–0.9, default 0.5). */
|
||||
maxHistoryShare?: number;
|
||||
/** Pre-compaction memory flush (agentic turn). Default: enabled. */
|
||||
memoryFlush?: AgentCompactionMemoryFlushConfig;
|
||||
};
|
||||
|
||||
@@ -90,6 +90,7 @@ export const AgentDefaultsSchema = z
|
||||
.object({
|
||||
mode: z.union([z.literal("default"), z.literal("safeguard")]).optional(),
|
||||
reserveTokensFloor: z.number().int().nonnegative().optional(),
|
||||
maxHistoryShare: z.number().min(0.1).max(0.9).optional(),
|
||||
memoryFlush: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
|
||||
Reference in New Issue
Block a user