Agents: summarize dropped messages during compaction safeguard pruning (#2418)
This commit is contained in:
@@ -6,6 +6,7 @@ Docs: https://docs.clawd.bot
|
|||||||
Status: unreleased.
|
Status: unreleased.
|
||||||
|
|
||||||
### Changes
|
### Changes
|
||||||
|
- Agents: summarize dropped messages during compaction safeguard pruning. (#2418)
|
||||||
- Skills: add multi-image input support to Nano Banana Pro skill. (#1958) Thanks @tyler6204.
|
- Skills: add multi-image input support to Nano Banana Pro skill. (#1958) Thanks @tyler6204.
|
||||||
- Agents: honor tools.exec.safeBins in exec allowlist checks. (#2281)
|
- Agents: honor tools.exec.safeBins in exec allowlist checks. (#2281)
|
||||||
- Matrix: switch plugin SDK to @vector-im/matrix-bot-sdk.
|
- Matrix: switch plugin SDK to @vector-im/matrix-bot-sdk.
|
||||||
|
|||||||
@@ -103,5 +103,47 @@ describe("pruneHistoryForContextShare", () => {
|
|||||||
expect(pruned.droppedChunks).toBe(0);
|
expect(pruned.droppedChunks).toBe(0);
|
||||||
expect(pruned.messages.length).toBe(messages.length);
|
expect(pruned.messages.length).toBe(messages.length);
|
||||||
expect(pruned.keptTokens).toBe(estimateMessagesTokens(messages));
|
expect(pruned.keptTokens).toBe(estimateMessagesTokens(messages));
|
||||||
|
expect(pruned.droppedMessagesList).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns droppedMessagesList containing dropped messages", () => {
|
||||||
|
const messages: AgentMessage[] = [
|
||||||
|
makeMessage(1, 4000),
|
||||||
|
makeMessage(2, 4000),
|
||||||
|
makeMessage(3, 4000),
|
||||||
|
makeMessage(4, 4000),
|
||||||
|
];
|
||||||
|
const maxContextTokens = 2000; // budget is 1000 tokens (50%)
|
||||||
|
const pruned = pruneHistoryForContextShare({
|
||||||
|
messages,
|
||||||
|
maxContextTokens,
|
||||||
|
maxHistoryShare: 0.5,
|
||||||
|
parts: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(pruned.droppedChunks).toBeGreaterThan(0);
|
||||||
|
expect(pruned.droppedMessagesList.length).toBe(pruned.droppedMessages);
|
||||||
|
|
||||||
|
// All messages accounted for: kept + dropped = original
|
||||||
|
const allIds = [
|
||||||
|
...pruned.droppedMessagesList.map((m) => m.timestamp),
|
||||||
|
...pruned.messages.map((m) => m.timestamp),
|
||||||
|
].sort((a, b) => a - b);
|
||||||
|
const originalIds = messages.map((m) => m.timestamp).sort((a, b) => a - b);
|
||||||
|
expect(allIds).toEqual(originalIds);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns empty droppedMessagesList when no pruning needed", () => {
|
||||||
|
const messages: AgentMessage[] = [makeMessage(1, 100)];
|
||||||
|
const pruned = pruneHistoryForContextShare({
|
||||||
|
messages,
|
||||||
|
maxContextTokens: 100_000,
|
||||||
|
maxHistoryShare: 0.5,
|
||||||
|
parts: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(pruned.droppedChunks).toBe(0);
|
||||||
|
expect(pruned.droppedMessagesList).toEqual([]);
|
||||||
|
expect(pruned.messages.length).toBe(1);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -301,6 +301,7 @@ export function pruneHistoryForContextShare(params: {
|
|||||||
parts?: number;
|
parts?: number;
|
||||||
}): {
|
}): {
|
||||||
messages: AgentMessage[];
|
messages: AgentMessage[];
|
||||||
|
droppedMessagesList: AgentMessage[];
|
||||||
droppedChunks: number;
|
droppedChunks: number;
|
||||||
droppedMessages: number;
|
droppedMessages: number;
|
||||||
droppedTokens: number;
|
droppedTokens: number;
|
||||||
@@ -310,6 +311,7 @@ export function pruneHistoryForContextShare(params: {
|
|||||||
const maxHistoryShare = params.maxHistoryShare ?? 0.5;
|
const maxHistoryShare = params.maxHistoryShare ?? 0.5;
|
||||||
const budgetTokens = Math.max(1, Math.floor(params.maxContextTokens * maxHistoryShare));
|
const budgetTokens = Math.max(1, Math.floor(params.maxContextTokens * maxHistoryShare));
|
||||||
let keptMessages = params.messages;
|
let keptMessages = params.messages;
|
||||||
|
const allDroppedMessages: AgentMessage[] = [];
|
||||||
let droppedChunks = 0;
|
let droppedChunks = 0;
|
||||||
let droppedMessages = 0;
|
let droppedMessages = 0;
|
||||||
let droppedTokens = 0;
|
let droppedTokens = 0;
|
||||||
@@ -323,11 +325,13 @@ export function pruneHistoryForContextShare(params: {
|
|||||||
droppedChunks += 1;
|
droppedChunks += 1;
|
||||||
droppedMessages += dropped.length;
|
droppedMessages += dropped.length;
|
||||||
droppedTokens += estimateMessagesTokens(dropped);
|
droppedTokens += estimateMessagesTokens(dropped);
|
||||||
|
allDroppedMessages.push(...dropped);
|
||||||
keptMessages = rest.flat();
|
keptMessages = rest.flat();
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
messages: keptMessages,
|
messages: keptMessages,
|
||||||
|
droppedMessagesList: allDroppedMessages,
|
||||||
droppedChunks,
|
droppedChunks,
|
||||||
droppedMessages,
|
droppedMessages,
|
||||||
droppedTokens,
|
droppedTokens,
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import type { SessionManager } from "@mariozechner/pi-coding-agent";
|
|||||||
import type { ClawdbotConfig } from "../../config/config.js";
|
import type { ClawdbotConfig } from "../../config/config.js";
|
||||||
import { resolveContextWindowInfo } from "../context-window-guard.js";
|
import { resolveContextWindowInfo } from "../context-window-guard.js";
|
||||||
import { DEFAULT_CONTEXT_TOKENS } from "../defaults.js";
|
import { DEFAULT_CONTEXT_TOKENS } from "../defaults.js";
|
||||||
|
import { setCompactionSafeguardRuntime } from "../pi-extensions/compaction-safeguard-runtime.js";
|
||||||
import { setContextPruningRuntime } from "../pi-extensions/context-pruning/runtime.js";
|
import { setContextPruningRuntime } from "../pi-extensions/context-pruning/runtime.js";
|
||||||
import { computeEffectiveSettings } from "../pi-extensions/context-pruning/settings.js";
|
import { computeEffectiveSettings } from "../pi-extensions/context-pruning/settings.js";
|
||||||
import { makeToolPrunablePredicate } from "../pi-extensions/context-pruning/tools.js";
|
import { makeToolPrunablePredicate } from "../pi-extensions/context-pruning/tools.js";
|
||||||
@@ -75,6 +76,10 @@ export function buildEmbeddedExtensionPaths(params: {
|
|||||||
}): string[] {
|
}): string[] {
|
||||||
const paths: string[] = [];
|
const paths: string[] = [];
|
||||||
if (resolveCompactionMode(params.cfg) === "safeguard") {
|
if (resolveCompactionMode(params.cfg) === "safeguard") {
|
||||||
|
const compactionCfg = params.cfg?.agents?.defaults?.compaction;
|
||||||
|
setCompactionSafeguardRuntime(params.sessionManager, {
|
||||||
|
maxHistoryShare: compactionCfg?.maxHistoryShare,
|
||||||
|
});
|
||||||
paths.push(resolvePiExtensionPath("compaction-safeguard"));
|
paths.push(resolvePiExtensionPath("compaction-safeguard"));
|
||||||
}
|
}
|
||||||
const pruning = buildContextPruningExtension(params);
|
const pruning = buildContextPruningExtension(params);
|
||||||
|
|||||||
34
src/agents/pi-extensions/compaction-safeguard-runtime.ts
Normal file
34
src/agents/pi-extensions/compaction-safeguard-runtime.ts
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
export type CompactionSafeguardRuntimeValue = {
|
||||||
|
maxHistoryShare?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Session-scoped runtime registry keyed by object identity.
|
||||||
|
// Follows the same WeakMap pattern as context-pruning/runtime.ts.
|
||||||
|
const REGISTRY = new WeakMap<object, CompactionSafeguardRuntimeValue>();
|
||||||
|
|
||||||
|
export function setCompactionSafeguardRuntime(
|
||||||
|
sessionManager: unknown,
|
||||||
|
value: CompactionSafeguardRuntimeValue | null,
|
||||||
|
): void {
|
||||||
|
if (!sessionManager || typeof sessionManager !== "object") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const key = sessionManager as object;
|
||||||
|
if (value === null) {
|
||||||
|
REGISTRY.delete(key);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTRY.set(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getCompactionSafeguardRuntime(
|
||||||
|
sessionManager: unknown,
|
||||||
|
): CompactionSafeguardRuntimeValue | null {
|
||||||
|
if (!sessionManager || typeof sessionManager !== "object") {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return REGISTRY.get(sessionManager as object) ?? null;
|
||||||
|
}
|
||||||
@@ -1,6 +1,10 @@
|
|||||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import {
|
||||||
|
getCompactionSafeguardRuntime,
|
||||||
|
setCompactionSafeguardRuntime,
|
||||||
|
} from "./compaction-safeguard-runtime.js";
|
||||||
import { __testing } from "./compaction-safeguard.js";
|
import { __testing } from "./compaction-safeguard.js";
|
||||||
|
|
||||||
const {
|
const {
|
||||||
@@ -208,3 +212,41 @@ describe("isOversizedForSummary", () => {
|
|||||||
expect(typeof isOversized).toBe("boolean");
|
expect(typeof isOversized).toBe("boolean");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("compaction-safeguard runtime registry", () => {
|
||||||
|
it("stores and retrieves config by session manager identity", () => {
|
||||||
|
const sm = {};
|
||||||
|
setCompactionSafeguardRuntime(sm, { maxHistoryShare: 0.3 });
|
||||||
|
const runtime = getCompactionSafeguardRuntime(sm);
|
||||||
|
expect(runtime).toEqual({ maxHistoryShare: 0.3 });
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns null for unknown session manager", () => {
|
||||||
|
const sm = {};
|
||||||
|
expect(getCompactionSafeguardRuntime(sm)).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("clears entry when value is null", () => {
|
||||||
|
const sm = {};
|
||||||
|
setCompactionSafeguardRuntime(sm, { maxHistoryShare: 0.7 });
|
||||||
|
expect(getCompactionSafeguardRuntime(sm)).not.toBeNull();
|
||||||
|
setCompactionSafeguardRuntime(sm, null);
|
||||||
|
expect(getCompactionSafeguardRuntime(sm)).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ignores non-object session managers", () => {
|
||||||
|
setCompactionSafeguardRuntime(null, { maxHistoryShare: 0.5 });
|
||||||
|
expect(getCompactionSafeguardRuntime(null)).toBeNull();
|
||||||
|
setCompactionSafeguardRuntime(undefined, { maxHistoryShare: 0.5 });
|
||||||
|
expect(getCompactionSafeguardRuntime(undefined)).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("isolates different session managers", () => {
|
||||||
|
const sm1 = {};
|
||||||
|
const sm2 = {};
|
||||||
|
setCompactionSafeguardRuntime(sm1, { maxHistoryShare: 0.3 });
|
||||||
|
setCompactionSafeguardRuntime(sm2, { maxHistoryShare: 0.8 });
|
||||||
|
expect(getCompactionSafeguardRuntime(sm1)).toEqual({ maxHistoryShare: 0.3 });
|
||||||
|
expect(getCompactionSafeguardRuntime(sm2)).toEqual({ maxHistoryShare: 0.8 });
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import {
|
|||||||
resolveContextWindowTokens,
|
resolveContextWindowTokens,
|
||||||
summarizeInStages,
|
summarizeInStages,
|
||||||
} from "../compaction.js";
|
} from "../compaction.js";
|
||||||
|
import { getCompactionSafeguardRuntime } from "./compaction-safeguard-runtime.js";
|
||||||
const FALLBACK_SUMMARY =
|
const FALLBACK_SUMMARY =
|
||||||
"Summary unavailable due to context limits. Older messages were truncated.";
|
"Summary unavailable due to context limits. Older messages were truncated.";
|
||||||
const TURN_PREFIX_INSTRUCTIONS =
|
const TURN_PREFIX_INSTRUCTIONS =
|
||||||
@@ -174,21 +175,28 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
|||||||
const turnPrefixMessages = preparation.turnPrefixMessages ?? [];
|
const turnPrefixMessages = preparation.turnPrefixMessages ?? [];
|
||||||
let messagesToSummarize = preparation.messagesToSummarize;
|
let messagesToSummarize = preparation.messagesToSummarize;
|
||||||
|
|
||||||
|
const runtime = getCompactionSafeguardRuntime(ctx.sessionManager);
|
||||||
|
const maxHistoryShare = runtime?.maxHistoryShare ?? 0.5;
|
||||||
|
|
||||||
const tokensBefore =
|
const tokensBefore =
|
||||||
typeof preparation.tokensBefore === "number" && Number.isFinite(preparation.tokensBefore)
|
typeof preparation.tokensBefore === "number" && Number.isFinite(preparation.tokensBefore)
|
||||||
? preparation.tokensBefore
|
? preparation.tokensBefore
|
||||||
: undefined;
|
: undefined;
|
||||||
|
|
||||||
|
let droppedSummary: string | undefined;
|
||||||
|
|
||||||
if (tokensBefore !== undefined) {
|
if (tokensBefore !== undefined) {
|
||||||
const summarizableTokens =
|
const summarizableTokens =
|
||||||
estimateMessagesTokens(messagesToSummarize) + estimateMessagesTokens(turnPrefixMessages);
|
estimateMessagesTokens(messagesToSummarize) + estimateMessagesTokens(turnPrefixMessages);
|
||||||
const newContentTokens = Math.max(0, Math.floor(tokensBefore - summarizableTokens));
|
const newContentTokens = Math.max(0, Math.floor(tokensBefore - summarizableTokens));
|
||||||
const maxHistoryTokens = Math.floor(contextWindowTokens * 0.5);
|
// Apply SAFETY_MARGIN so token underestimates don't trigger unnecessary pruning
|
||||||
|
const maxHistoryTokens = Math.floor(contextWindowTokens * maxHistoryShare * SAFETY_MARGIN);
|
||||||
|
|
||||||
if (newContentTokens > maxHistoryTokens) {
|
if (newContentTokens > maxHistoryTokens) {
|
||||||
const pruned = pruneHistoryForContextShare({
|
const pruned = pruneHistoryForContextShare({
|
||||||
messages: messagesToSummarize,
|
messages: messagesToSummarize,
|
||||||
maxContextTokens: contextWindowTokens,
|
maxContextTokens: contextWindowTokens,
|
||||||
maxHistoryShare: 0.5,
|
maxHistoryShare,
|
||||||
parts: 2,
|
parts: 2,
|
||||||
});
|
});
|
||||||
if (pruned.droppedChunks > 0) {
|
if (pruned.droppedChunks > 0) {
|
||||||
@@ -200,6 +208,37 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
|||||||
`(${pruned.droppedMessages} messages) to fit history budget.`,
|
`(${pruned.droppedMessages} messages) to fit history budget.`,
|
||||||
);
|
);
|
||||||
messagesToSummarize = pruned.messages;
|
messagesToSummarize = pruned.messages;
|
||||||
|
|
||||||
|
// Summarize dropped messages so context isn't lost
|
||||||
|
if (pruned.droppedMessagesList.length > 0) {
|
||||||
|
try {
|
||||||
|
const droppedChunkRatio = computeAdaptiveChunkRatio(
|
||||||
|
pruned.droppedMessagesList,
|
||||||
|
contextWindowTokens,
|
||||||
|
);
|
||||||
|
const droppedMaxChunkTokens = Math.max(
|
||||||
|
1,
|
||||||
|
Math.floor(contextWindowTokens * droppedChunkRatio),
|
||||||
|
);
|
||||||
|
droppedSummary = await summarizeInStages({
|
||||||
|
messages: pruned.droppedMessagesList,
|
||||||
|
model,
|
||||||
|
apiKey,
|
||||||
|
signal,
|
||||||
|
reserveTokens: Math.max(1, Math.floor(preparation.settings.reserveTokens)),
|
||||||
|
maxChunkTokens: droppedMaxChunkTokens,
|
||||||
|
contextWindow: contextWindowTokens,
|
||||||
|
customInstructions,
|
||||||
|
previousSummary: preparation.previousSummary,
|
||||||
|
});
|
||||||
|
} catch (droppedError) {
|
||||||
|
console.warn(
|
||||||
|
`Compaction safeguard: failed to summarize dropped messages, continuing without: ${
|
||||||
|
droppedError instanceof Error ? droppedError.message : String(droppedError)
|
||||||
|
}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -210,6 +249,10 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
|||||||
const maxChunkTokens = Math.max(1, Math.floor(contextWindowTokens * adaptiveRatio));
|
const maxChunkTokens = Math.max(1, Math.floor(contextWindowTokens * adaptiveRatio));
|
||||||
const reserveTokens = Math.max(1, Math.floor(preparation.settings.reserveTokens));
|
const reserveTokens = Math.max(1, Math.floor(preparation.settings.reserveTokens));
|
||||||
|
|
||||||
|
// Feed dropped-messages summary as previousSummary so the main summarization
|
||||||
|
// incorporates context from pruned messages instead of losing it entirely.
|
||||||
|
const effectivePreviousSummary = droppedSummary ?? preparation.previousSummary;
|
||||||
|
|
||||||
const historySummary = await summarizeInStages({
|
const historySummary = await summarizeInStages({
|
||||||
messages: messagesToSummarize,
|
messages: messagesToSummarize,
|
||||||
model,
|
model,
|
||||||
@@ -219,7 +262,7 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
|||||||
maxChunkTokens,
|
maxChunkTokens,
|
||||||
contextWindow: contextWindowTokens,
|
contextWindow: contextWindowTokens,
|
||||||
customInstructions,
|
customInstructions,
|
||||||
previousSummary: preparation.previousSummary,
|
previousSummary: effectivePreviousSummary,
|
||||||
});
|
});
|
||||||
|
|
||||||
let summary = historySummary;
|
let summary = historySummary;
|
||||||
|
|||||||
@@ -244,6 +244,8 @@ export type AgentCompactionConfig = {
|
|||||||
mode?: AgentCompactionMode;
|
mode?: AgentCompactionMode;
|
||||||
/** Minimum reserve tokens enforced for Pi compaction (0 disables the floor). */
|
/** Minimum reserve tokens enforced for Pi compaction (0 disables the floor). */
|
||||||
reserveTokensFloor?: number;
|
reserveTokensFloor?: number;
|
||||||
|
/** Max share of context window for history during safeguard pruning (0.1–0.9, default 0.5). */
|
||||||
|
maxHistoryShare?: number;
|
||||||
/** Pre-compaction memory flush (agentic turn). Default: enabled. */
|
/** Pre-compaction memory flush (agentic turn). Default: enabled. */
|
||||||
memoryFlush?: AgentCompactionMemoryFlushConfig;
|
memoryFlush?: AgentCompactionMemoryFlushConfig;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -90,6 +90,7 @@ export const AgentDefaultsSchema = z
|
|||||||
.object({
|
.object({
|
||||||
mode: z.union([z.literal("default"), z.literal("safeguard")]).optional(),
|
mode: z.union([z.literal("default"), z.literal("safeguard")]).optional(),
|
||||||
reserveTokensFloor: z.number().int().nonnegative().optional(),
|
reserveTokensFloor: z.number().int().nonnegative().optional(),
|
||||||
|
maxHistoryShare: z.number().min(0.1).max(0.9).optional(),
|
||||||
memoryFlush: z
|
memoryFlush: z
|
||||||
.object({
|
.object({
|
||||||
enabled: z.boolean().optional(),
|
enabled: z.boolean().optional(),
|
||||||
|
|||||||
Reference in New Issue
Block a user