Files
clawdbot/src/agents/compaction.ts

346 lines
10 KiB
TypeScript

import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
import { estimateTokens, generateSummary } from "@mariozechner/pi-coding-agent";
import { DEFAULT_CONTEXT_TOKENS } from "./defaults.js";
export const BASE_CHUNK_RATIO = 0.4;
export const MIN_CHUNK_RATIO = 0.15;
export const SAFETY_MARGIN = 1.2; // 20% buffer for estimateTokens() inaccuracy
const DEFAULT_SUMMARY_FALLBACK = "No prior history.";
const DEFAULT_PARTS = 2;
const MERGE_SUMMARIES_INSTRUCTIONS =
"Merge these partial summaries into a single cohesive summary. Preserve decisions," +
" TODOs, open questions, and any constraints.";
export function estimateMessagesTokens(messages: AgentMessage[]): number {
return messages.reduce((sum, message) => sum + estimateTokens(message), 0);
}
function normalizeParts(parts: number, messageCount: number): number {
if (!Number.isFinite(parts) || parts <= 1) return 1;
return Math.min(Math.max(1, Math.floor(parts)), Math.max(1, messageCount));
}
export function splitMessagesByTokenShare(
messages: AgentMessage[],
parts = DEFAULT_PARTS,
): AgentMessage[][] {
if (messages.length === 0) return [];
const normalizedParts = normalizeParts(parts, messages.length);
if (normalizedParts <= 1) return [messages];
const totalTokens = estimateMessagesTokens(messages);
const targetTokens = totalTokens / normalizedParts;
const chunks: AgentMessage[][] = [];
let current: AgentMessage[] = [];
let currentTokens = 0;
for (const message of messages) {
const messageTokens = estimateTokens(message);
if (
chunks.length < normalizedParts - 1 &&
current.length > 0 &&
currentTokens + messageTokens > targetTokens
) {
chunks.push(current);
current = [];
currentTokens = 0;
}
current.push(message);
currentTokens += messageTokens;
}
if (current.length > 0) {
chunks.push(current);
}
return chunks;
}
export function chunkMessagesByMaxTokens(
messages: AgentMessage[],
maxTokens: number,
): AgentMessage[][] {
if (messages.length === 0) return [];
const chunks: AgentMessage[][] = [];
let currentChunk: AgentMessage[] = [];
let currentTokens = 0;
for (const message of messages) {
const messageTokens = estimateTokens(message);
if (currentChunk.length > 0 && currentTokens + messageTokens > maxTokens) {
chunks.push(currentChunk);
currentChunk = [];
currentTokens = 0;
}
currentChunk.push(message);
currentTokens += messageTokens;
if (messageTokens > maxTokens) {
// Split oversized messages to avoid unbounded chunk growth.
chunks.push(currentChunk);
currentChunk = [];
currentTokens = 0;
}
}
if (currentChunk.length > 0) {
chunks.push(currentChunk);
}
return chunks;
}
/**
* Compute adaptive chunk ratio based on average message size.
* When messages are large, we use smaller chunks to avoid exceeding model limits.
*/
export function computeAdaptiveChunkRatio(messages: AgentMessage[], contextWindow: number): number {
if (messages.length === 0) return BASE_CHUNK_RATIO;
const totalTokens = estimateMessagesTokens(messages);
const avgTokens = totalTokens / messages.length;
// Apply safety margin to account for estimation inaccuracy
const safeAvgTokens = avgTokens * SAFETY_MARGIN;
const avgRatio = safeAvgTokens / contextWindow;
// If average message is > 10% of context, reduce chunk ratio
if (avgRatio > 0.1) {
const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO);
return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction);
}
return BASE_CHUNK_RATIO;
}
/**
* Check if a single message is too large to summarize.
* If single message > 50% of context, it can't be summarized safely.
*/
export function isOversizedForSummary(msg: AgentMessage, contextWindow: number): boolean {
const tokens = estimateTokens(msg) * SAFETY_MARGIN;
return tokens > contextWindow * 0.5;
}
async function summarizeChunks(params: {
messages: AgentMessage[];
model: NonNullable<ExtensionContext["model"]>;
apiKey: string;
signal: AbortSignal;
reserveTokens: number;
maxChunkTokens: number;
customInstructions?: string;
previousSummary?: string;
}): Promise<string> {
if (params.messages.length === 0) {
return params.previousSummary ?? DEFAULT_SUMMARY_FALLBACK;
}
const chunks = chunkMessagesByMaxTokens(params.messages, params.maxChunkTokens);
let summary = params.previousSummary;
for (const chunk of chunks) {
summary = await generateSummary(
chunk,
params.model,
params.reserveTokens,
params.apiKey,
params.signal,
params.customInstructions,
summary,
);
}
return summary ?? DEFAULT_SUMMARY_FALLBACK;
}
/**
* Summarize with progressive fallback for handling oversized messages.
* If full summarization fails, tries partial summarization excluding oversized messages.
*/
export async function summarizeWithFallback(params: {
messages: AgentMessage[];
model: NonNullable<ExtensionContext["model"]>;
apiKey: string;
signal: AbortSignal;
reserveTokens: number;
maxChunkTokens: number;
contextWindow: number;
customInstructions?: string;
previousSummary?: string;
}): Promise<string> {
const { messages, contextWindow } = params;
if (messages.length === 0) {
return params.previousSummary ?? DEFAULT_SUMMARY_FALLBACK;
}
// Try full summarization first
try {
return await summarizeChunks(params);
} catch (fullError) {
console.warn(
`Full summarization failed, trying partial: ${
fullError instanceof Error ? fullError.message : String(fullError)
}`,
);
}
// Fallback 1: Summarize only small messages, note oversized ones
const smallMessages: AgentMessage[] = [];
const oversizedNotes: string[] = [];
for (const msg of messages) {
if (isOversizedForSummary(msg, contextWindow)) {
const role = (msg as { role?: string }).role ?? "message";
const tokens = estimateTokens(msg);
oversizedNotes.push(
`[Large ${role} (~${Math.round(tokens / 1000)}K tokens) omitted from summary]`,
);
} else {
smallMessages.push(msg);
}
}
if (smallMessages.length > 0) {
try {
const partialSummary = await summarizeChunks({
...params,
messages: smallMessages,
});
const notes = oversizedNotes.length > 0 ? `\n\n${oversizedNotes.join("\n")}` : "";
return partialSummary + notes;
} catch (partialError) {
console.warn(
`Partial summarization also failed: ${
partialError instanceof Error ? partialError.message : String(partialError)
}`,
);
}
}
// Final fallback: Just note what was there
return (
`Context contained ${messages.length} messages (${oversizedNotes.length} oversized). ` +
`Summary unavailable due to size limits.`
);
}
export async function summarizeInStages(params: {
messages: AgentMessage[];
model: NonNullable<ExtensionContext["model"]>;
apiKey: string;
signal: AbortSignal;
reserveTokens: number;
maxChunkTokens: number;
contextWindow: number;
customInstructions?: string;
previousSummary?: string;
parts?: number;
minMessagesForSplit?: number;
}): Promise<string> {
const { messages } = params;
if (messages.length === 0) {
return params.previousSummary ?? DEFAULT_SUMMARY_FALLBACK;
}
const minMessagesForSplit = Math.max(2, params.minMessagesForSplit ?? 4);
const parts = normalizeParts(params.parts ?? DEFAULT_PARTS, messages.length);
const totalTokens = estimateMessagesTokens(messages);
if (parts <= 1 || messages.length < minMessagesForSplit || totalTokens <= params.maxChunkTokens) {
return summarizeWithFallback(params);
}
const splits = splitMessagesByTokenShare(messages, parts).filter((chunk) => chunk.length > 0);
if (splits.length <= 1) {
return summarizeWithFallback(params);
}
const partialSummaries: string[] = [];
for (const chunk of splits) {
partialSummaries.push(
await summarizeWithFallback({
...params,
messages: chunk,
previousSummary: undefined,
}),
);
}
if (partialSummaries.length === 1) {
return partialSummaries[0];
}
const summaryMessages: AgentMessage[] = partialSummaries.map((summary) => ({
role: "user",
content: summary,
timestamp: Date.now(),
}));
const mergeInstructions = params.customInstructions
? `${MERGE_SUMMARIES_INSTRUCTIONS}\n\nAdditional focus:\n${params.customInstructions}`
: MERGE_SUMMARIES_INSTRUCTIONS;
return summarizeWithFallback({
...params,
messages: summaryMessages,
customInstructions: mergeInstructions,
});
}
export function pruneHistoryForContextShare(params: {
messages: AgentMessage[];
maxContextTokens: number;
maxHistoryShare?: number;
parts?: number;
}): {
messages: AgentMessage[];
droppedMessagesList: AgentMessage[];
droppedChunks: number;
droppedMessages: number;
droppedTokens: number;
keptTokens: number;
budgetTokens: number;
} {
const maxHistoryShare = params.maxHistoryShare ?? 0.5;
const budgetTokens = Math.max(1, Math.floor(params.maxContextTokens * maxHistoryShare));
let keptMessages = params.messages;
const allDroppedMessages: AgentMessage[] = [];
let droppedChunks = 0;
let droppedMessages = 0;
let droppedTokens = 0;
const parts = normalizeParts(params.parts ?? DEFAULT_PARTS, keptMessages.length);
while (keptMessages.length > 0 && estimateMessagesTokens(keptMessages) > budgetTokens) {
const chunks = splitMessagesByTokenShare(keptMessages, parts);
if (chunks.length <= 1) break;
const [dropped, ...rest] = chunks;
droppedChunks += 1;
droppedMessages += dropped.length;
droppedTokens += estimateMessagesTokens(dropped);
allDroppedMessages.push(...dropped);
keptMessages = rest.flat();
}
return {
messages: keptMessages,
droppedMessagesList: allDroppedMessages,
droppedChunks,
droppedMessages,
droppedTokens,
keptTokens: estimateMessagesTokens(keptMessages),
budgetTokens,
};
}
export function resolveContextWindowTokens(model?: ExtensionContext["model"]): number {
return Math.max(1, Math.floor(model?.contextWindow ?? DEFAULT_CONTEXT_TOKENS));
}