feat: add pre-compaction memory flush
This commit is contained in:
@@ -73,7 +73,10 @@ import {
|
||||
import { normalizeModelCompat } from "./model-compat.js";
|
||||
import { ensureClawdbotModelsJson } from "./models-config.js";
|
||||
import type { MessagingToolSend } from "./pi-embedded-messaging.js";
|
||||
import { ensurePiCompactionReserveTokens } from "./pi-settings.js";
|
||||
import {
|
||||
ensurePiCompactionReserveTokens,
|
||||
resolveCompactionReserveTokensFloor,
|
||||
} from "./pi-settings.js";
|
||||
import { acquireSessionWriteLock } from "./session-write-lock.js";
|
||||
|
||||
export type { MessagingToolSend } from "./pi-embedded-messaging.js";
|
||||
@@ -1184,7 +1187,12 @@ export async function compactEmbeddedPiSession(params: {
|
||||
effectiveWorkspace,
|
||||
agentDir,
|
||||
);
|
||||
ensurePiCompactionReserveTokens({ settingsManager });
|
||||
ensurePiCompactionReserveTokens({
|
||||
settingsManager,
|
||||
minReserveTokens: resolveCompactionReserveTokensFloor(
|
||||
params.config,
|
||||
),
|
||||
});
|
||||
const additionalExtensionPaths = buildEmbeddedExtensionPaths({
|
||||
cfg: params.config,
|
||||
sessionManager,
|
||||
@@ -1584,7 +1592,12 @@ export async function runEmbeddedPiAgent(params: {
|
||||
effectiveWorkspace,
|
||||
agentDir,
|
||||
);
|
||||
ensurePiCompactionReserveTokens({ settingsManager });
|
||||
ensurePiCompactionReserveTokens({
|
||||
settingsManager,
|
||||
minReserveTokens: resolveCompactionReserveTokensFloor(
|
||||
params.config,
|
||||
),
|
||||
});
|
||||
const additionalExtensionPaths = buildEmbeddedExtensionPaths({
|
||||
cfg: params.config,
|
||||
sessionManager,
|
||||
|
||||
@@ -3,6 +3,7 @@ import { describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR,
|
||||
ensurePiCompactionReserveTokens,
|
||||
resolveCompactionReserveTokensFloor,
|
||||
} from "./pi-settings.js";
|
||||
|
||||
describe("ensurePiCompactionReserveTokens", () => {
|
||||
@@ -35,3 +36,24 @@ describe("ensurePiCompactionReserveTokens", () => {
|
||||
expect(settingsManager.applyOverrides).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveCompactionReserveTokensFloor", () => {
|
||||
it("returns the default when config is missing", () => {
|
||||
expect(resolveCompactionReserveTokensFloor()).toBe(
|
||||
DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR,
|
||||
);
|
||||
});
|
||||
|
||||
it("accepts configured floors, including zero", () => {
|
||||
expect(
|
||||
resolveCompactionReserveTokensFloor({
|
||||
agents: { defaults: { compaction: { reserveTokensFloor: 24_000 } } },
|
||||
}),
|
||||
).toBe(24_000);
|
||||
expect(
|
||||
resolveCompactionReserveTokensFloor({
|
||||
agents: { defaults: { compaction: { reserveTokensFloor: 0 } } },
|
||||
}),
|
||||
).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import type { ClawdbotConfig } from "../config/config.js";
|
||||
|
||||
export const DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000;
|
||||
|
||||
type PiSettingsManagerLike = {
|
||||
@@ -25,3 +27,13 @@ export function ensurePiCompactionReserveTokens(params: {
|
||||
|
||||
return { didOverride: true, reserveTokens: minReserveTokens };
|
||||
}
|
||||
|
||||
export function resolveCompactionReserveTokensFloor(
|
||||
cfg?: ClawdbotConfig,
|
||||
): number {
|
||||
const raw = cfg?.agents?.defaults?.compaction?.reserveTokensFloor;
|
||||
if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) {
|
||||
return Math.floor(raw);
|
||||
}
|
||||
return DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR;
|
||||
}
|
||||
|
||||
@@ -53,6 +53,11 @@ import {
|
||||
} from "./block-reply-pipeline.js";
|
||||
import { resolveBlockStreamingCoalescing } from "./block-streaming.js";
|
||||
import { createFollowupRunner } from "./followup-runner.js";
|
||||
import {
|
||||
resolveMemoryFlushContextWindowTokens,
|
||||
resolveMemoryFlushSettings,
|
||||
shouldRunMemoryFlush,
|
||||
} from "./memory-flush.js";
|
||||
import {
|
||||
enqueueFollowupRun,
|
||||
type FollowupRun,
|
||||
@@ -337,6 +342,122 @@ export async function runReplyAgent(params: {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const memoryFlushSettings = resolveMemoryFlushSettings(cfg);
|
||||
const shouldFlushMemory =
|
||||
memoryFlushSettings &&
|
||||
!isHeartbeat &&
|
||||
!isCliProvider(followupRun.run.provider, cfg) &&
|
||||
shouldRunMemoryFlush({
|
||||
entry:
|
||||
activeSessionEntry ??
|
||||
(sessionKey ? activeSessionStore?.[sessionKey] : undefined),
|
||||
contextWindowTokens: resolveMemoryFlushContextWindowTokens({
|
||||
modelId: followupRun.run.model ?? defaultModel,
|
||||
agentCfgContextTokens,
|
||||
}),
|
||||
reserveTokensFloor: memoryFlushSettings.reserveTokensFloor,
|
||||
softThresholdTokens: memoryFlushSettings.softThresholdTokens,
|
||||
});
|
||||
if (shouldFlushMemory) {
|
||||
const flushRunId = crypto.randomUUID();
|
||||
if (sessionKey) {
|
||||
registerAgentRunContext(flushRunId, {
|
||||
sessionKey,
|
||||
verboseLevel: resolvedVerboseLevel,
|
||||
});
|
||||
}
|
||||
let memoryCompactionCompleted = false;
|
||||
const flushSystemPrompt = [
|
||||
followupRun.run.extraSystemPrompt,
|
||||
memoryFlushSettings.systemPrompt,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n\n");
|
||||
try {
|
||||
await runWithModelFallback({
|
||||
cfg: followupRun.run.config,
|
||||
provider: followupRun.run.provider,
|
||||
model: followupRun.run.model,
|
||||
run: (provider, model) =>
|
||||
runEmbeddedPiAgent({
|
||||
sessionId: followupRun.run.sessionId,
|
||||
sessionKey,
|
||||
messageProvider:
|
||||
sessionCtx.Provider?.trim().toLowerCase() || undefined,
|
||||
agentAccountId: sessionCtx.AccountId,
|
||||
// Provider threading context for tool auto-injection
|
||||
...buildThreadingToolContext({
|
||||
sessionCtx,
|
||||
config: followupRun.run.config,
|
||||
hasRepliedRef: opts?.hasRepliedRef,
|
||||
}),
|
||||
sessionFile: followupRun.run.sessionFile,
|
||||
workspaceDir: followupRun.run.workspaceDir,
|
||||
agentDir: followupRun.run.agentDir,
|
||||
config: followupRun.run.config,
|
||||
skillsSnapshot: followupRun.run.skillsSnapshot,
|
||||
prompt: memoryFlushSettings.prompt,
|
||||
extraSystemPrompt: flushSystemPrompt,
|
||||
ownerNumbers: followupRun.run.ownerNumbers,
|
||||
enforceFinalTag: followupRun.run.enforceFinalTag,
|
||||
provider,
|
||||
model,
|
||||
authProfileId: followupRun.run.authProfileId,
|
||||
thinkLevel: followupRun.run.thinkLevel,
|
||||
verboseLevel: followupRun.run.verboseLevel,
|
||||
reasoningLevel: followupRun.run.reasoningLevel,
|
||||
bashElevated: followupRun.run.bashElevated,
|
||||
timeoutMs: followupRun.run.timeoutMs,
|
||||
runId: flushRunId,
|
||||
onAgentEvent: (evt) => {
|
||||
if (evt.stream === "compaction") {
|
||||
const phase =
|
||||
typeof evt.data.phase === "string" ? evt.data.phase : "";
|
||||
const willRetry = Boolean(evt.data.willRetry);
|
||||
if (phase === "end" && !willRetry) {
|
||||
memoryCompactionCompleted = true;
|
||||
}
|
||||
}
|
||||
},
|
||||
}),
|
||||
});
|
||||
let memoryFlushCompactionCount =
|
||||
activeSessionEntry?.compactionCount ??
|
||||
(sessionKey ? activeSessionStore?.[sessionKey]?.compactionCount : 0) ??
|
||||
0;
|
||||
if (memoryCompactionCompleted) {
|
||||
const nextCount = await incrementCompactionCount({
|
||||
sessionEntry: activeSessionEntry,
|
||||
sessionStore: activeSessionStore,
|
||||
sessionKey,
|
||||
storePath,
|
||||
});
|
||||
if (typeof nextCount === "number") {
|
||||
memoryFlushCompactionCount = nextCount;
|
||||
}
|
||||
}
|
||||
if (storePath && sessionKey) {
|
||||
try {
|
||||
const updatedEntry = await updateSessionStoreEntry({
|
||||
storePath,
|
||||
sessionKey,
|
||||
update: async () => ({
|
||||
memoryFlushAt: Date.now(),
|
||||
memoryFlushCompactionCount,
|
||||
}),
|
||||
});
|
||||
if (updatedEntry) {
|
||||
activeSessionEntry = updatedEntry;
|
||||
}
|
||||
} catch (err) {
|
||||
logVerbose(`failed to persist memory flush metadata: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
logVerbose(`memory flush run failed: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
const runFollowupTurn = createFollowupRunner({
|
||||
opts,
|
||||
typing,
|
||||
|
||||
103
src/auto-reply/reply/memory-flush.test.ts
Normal file
103
src/auto-reply/reply/memory-flush.test.ts
Normal file
@@ -0,0 +1,103 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import {
|
||||
DEFAULT_MEMORY_FLUSH_SOFT_TOKENS,
|
||||
resolveMemoryFlushContextWindowTokens,
|
||||
resolveMemoryFlushSettings,
|
||||
shouldRunMemoryFlush,
|
||||
} from "./memory-flush.js";
|
||||
|
||||
describe("memory flush settings", () => {
|
||||
it("defaults to enabled with fallback prompt and system prompt", () => {
|
||||
const settings = resolveMemoryFlushSettings();
|
||||
expect(settings).not.toBeNull();
|
||||
expect(settings?.enabled).toBe(true);
|
||||
expect(settings?.prompt.length).toBeGreaterThan(0);
|
||||
expect(settings?.systemPrompt.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("respects disable flag", () => {
|
||||
expect(
|
||||
resolveMemoryFlushSettings({
|
||||
agents: {
|
||||
defaults: { compaction: { memoryFlush: { enabled: false } } },
|
||||
},
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it("appends NO_REPLY hint when missing", () => {
|
||||
const settings = resolveMemoryFlushSettings({
|
||||
agents: {
|
||||
defaults: {
|
||||
compaction: {
|
||||
memoryFlush: {
|
||||
prompt: "Write memories now.",
|
||||
systemPrompt: "Flush memory.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(settings?.prompt).toContain("NO_REPLY");
|
||||
expect(settings?.systemPrompt).toContain("NO_REPLY");
|
||||
});
|
||||
});
|
||||
|
||||
describe("shouldRunMemoryFlush", () => {
|
||||
it("requires totalTokens and threshold", () => {
|
||||
expect(
|
||||
shouldRunMemoryFlush({
|
||||
entry: { totalTokens: 0 },
|
||||
contextWindowTokens: 16_000,
|
||||
reserveTokensFloor: 20_000,
|
||||
softThresholdTokens: DEFAULT_MEMORY_FLUSH_SOFT_TOKENS,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("skips when under threshold", () => {
|
||||
expect(
|
||||
shouldRunMemoryFlush({
|
||||
entry: { totalTokens: 10_000 },
|
||||
contextWindowTokens: 100_000,
|
||||
reserveTokensFloor: 20_000,
|
||||
softThresholdTokens: 10_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("skips when already flushed for current compaction count", () => {
|
||||
expect(
|
||||
shouldRunMemoryFlush({
|
||||
entry: {
|
||||
totalTokens: 90_000,
|
||||
compactionCount: 2,
|
||||
memoryFlushCompactionCount: 2,
|
||||
},
|
||||
contextWindowTokens: 100_000,
|
||||
reserveTokensFloor: 5_000,
|
||||
softThresholdTokens: 2_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("runs when above threshold and not flushed", () => {
|
||||
expect(
|
||||
shouldRunMemoryFlush({
|
||||
entry: { totalTokens: 96_000, compactionCount: 1 },
|
||||
contextWindowTokens: 100_000,
|
||||
reserveTokensFloor: 5_000,
|
||||
softThresholdTokens: 2_000,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveMemoryFlushContextWindowTokens", () => {
|
||||
it("falls back to agent config or default tokens", () => {
|
||||
expect(
|
||||
resolveMemoryFlushContextWindowTokens({ agentCfgContextTokens: 42_000 }),
|
||||
).toBe(42_000);
|
||||
});
|
||||
});
|
||||
103
src/auto-reply/reply/memory-flush.ts
Normal file
103
src/auto-reply/reply/memory-flush.ts
Normal file
@@ -0,0 +1,103 @@
|
||||
import { lookupContextTokens } from "../../agents/context.js";
|
||||
import { DEFAULT_CONTEXT_TOKENS } from "../../agents/defaults.js";
|
||||
import { DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR } from "../../agents/pi-settings.js";
|
||||
import type { ClawdbotConfig } from "../../config/config.js";
|
||||
import type { SessionEntry } from "../../config/sessions.js";
|
||||
import { SILENT_REPLY_TOKEN } from "../tokens.js";
|
||||
|
||||
export const DEFAULT_MEMORY_FLUSH_SOFT_TOKENS = 4000;
|
||||
|
||||
export const DEFAULT_MEMORY_FLUSH_PROMPT = [
|
||||
"Pre-compaction memory flush.",
|
||||
"Store durable memories now (use memory/YYYY-MM-DD.md; create memory/ if needed).",
|
||||
`If nothing to store, reply with ${SILENT_REPLY_TOKEN}.`,
|
||||
].join(" ");
|
||||
|
||||
export const DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT = [
|
||||
"Pre-compaction memory flush turn.",
|
||||
"The session is near auto-compaction; capture durable memories to disk.",
|
||||
`You may reply, but usually ${SILENT_REPLY_TOKEN} is correct.`,
|
||||
].join(" ");
|
||||
|
||||
export type MemoryFlushSettings = {
|
||||
enabled: boolean;
|
||||
softThresholdTokens: number;
|
||||
prompt: string;
|
||||
systemPrompt: string;
|
||||
reserveTokensFloor: number;
|
||||
};
|
||||
|
||||
const normalizeNonNegativeInt = (value: unknown): number | null => {
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) return null;
|
||||
const int = Math.floor(value);
|
||||
return int >= 0 ? int : null;
|
||||
};
|
||||
|
||||
export function resolveMemoryFlushSettings(
|
||||
cfg?: ClawdbotConfig,
|
||||
): MemoryFlushSettings | null {
|
||||
const defaults = cfg?.agents?.defaults?.compaction?.memoryFlush;
|
||||
const enabled = defaults?.enabled ?? true;
|
||||
if (!enabled) return null;
|
||||
const softThresholdTokens =
|
||||
normalizeNonNegativeInt(defaults?.softThresholdTokens) ??
|
||||
DEFAULT_MEMORY_FLUSH_SOFT_TOKENS;
|
||||
const prompt = defaults?.prompt?.trim() || DEFAULT_MEMORY_FLUSH_PROMPT;
|
||||
const systemPrompt =
|
||||
defaults?.systemPrompt?.trim() || DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT;
|
||||
const reserveTokensFloor =
|
||||
normalizeNonNegativeInt(
|
||||
cfg?.agents?.defaults?.compaction?.reserveTokensFloor,
|
||||
) ?? DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR;
|
||||
|
||||
return {
|
||||
enabled,
|
||||
softThresholdTokens,
|
||||
prompt: ensureNoReplyHint(prompt),
|
||||
systemPrompt: ensureNoReplyHint(systemPrompt),
|
||||
reserveTokensFloor,
|
||||
};
|
||||
}
|
||||
|
||||
function ensureNoReplyHint(text: string): string {
|
||||
if (text.includes(SILENT_REPLY_TOKEN)) return text;
|
||||
return `${text}\n\nIf no user-visible reply is needed, start with ${SILENT_REPLY_TOKEN}.`;
|
||||
}
|
||||
|
||||
export function resolveMemoryFlushContextWindowTokens(params: {
|
||||
modelId?: string;
|
||||
agentCfgContextTokens?: number;
|
||||
}): number {
|
||||
return (
|
||||
lookupContextTokens(params.modelId) ??
|
||||
params.agentCfgContextTokens ??
|
||||
DEFAULT_CONTEXT_TOKENS
|
||||
);
|
||||
}
|
||||
|
||||
export function shouldRunMemoryFlush(params: {
|
||||
entry?: Pick<
|
||||
SessionEntry,
|
||||
"totalTokens" | "compactionCount" | "memoryFlushCompactionCount"
|
||||
>;
|
||||
contextWindowTokens: number;
|
||||
reserveTokensFloor: number;
|
||||
softThresholdTokens: number;
|
||||
}): boolean {
|
||||
const totalTokens = params.entry?.totalTokens;
|
||||
if (!totalTokens || totalTokens <= 0) return false;
|
||||
const contextWindow = Math.max(1, Math.floor(params.contextWindowTokens));
|
||||
const reserveTokens = Math.max(0, Math.floor(params.reserveTokensFloor));
|
||||
const softThreshold = Math.max(0, Math.floor(params.softThresholdTokens));
|
||||
const threshold = Math.max(0, contextWindow - reserveTokens - softThreshold);
|
||||
if (threshold <= 0) return false;
|
||||
if (totalTokens < threshold) return false;
|
||||
|
||||
const compactionCount = params.entry?.compactionCount ?? 0;
|
||||
const lastFlushAt = params.entry?.memoryFlushCompactionCount;
|
||||
if (typeof lastFlushAt === "number" && lastFlushAt === compactionCount) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -109,6 +109,8 @@ export type SessionEntry = {
|
||||
model?: string;
|
||||
contextTokens?: number;
|
||||
compactionCount?: number;
|
||||
memoryFlushAt?: number;
|
||||
memoryFlushCompactionCount?: number;
|
||||
cliSessionIds?: Record<string, string>;
|
||||
claudeCliSessionId?: string;
|
||||
label?: string;
|
||||
|
||||
@@ -1523,6 +1523,8 @@ export type AgentDefaultsConfig = {
|
||||
cliBackends?: Record<string, CliBackendConfig>;
|
||||
/** Opt-in: prune old tool results from the LLM context to reduce token usage. */
|
||||
contextPruning?: AgentContextPruningConfig;
|
||||
/** Compaction tuning and pre-compaction memory flush behavior. */
|
||||
compaction?: AgentCompactionConfig;
|
||||
/** Default thinking level when no /think directive is present. */
|
||||
thinkingDefault?: "off" | "minimal" | "low" | "medium" | "high";
|
||||
/** Default verbose level when no /verbose directive is present. */
|
||||
@@ -1624,6 +1626,24 @@ export type AgentDefaultsConfig = {
|
||||
};
|
||||
};
|
||||
|
||||
export type AgentCompactionConfig = {
|
||||
/** Minimum reserve tokens enforced for Pi compaction (0 disables the floor). */
|
||||
reserveTokensFloor?: number;
|
||||
/** Pre-compaction memory flush (agentic turn). Default: enabled. */
|
||||
memoryFlush?: AgentCompactionMemoryFlushConfig;
|
||||
};
|
||||
|
||||
export type AgentCompactionMemoryFlushConfig = {
|
||||
/** Enable the pre-compaction memory flush (default: true). */
|
||||
enabled?: boolean;
|
||||
/** Run the memory flush when context is within this many tokens of the compaction threshold. */
|
||||
softThresholdTokens?: number;
|
||||
/** User prompt used for the memory flush turn (NO_REPLY is enforced if missing). */
|
||||
prompt?: string;
|
||||
/** System prompt appended for the memory flush turn. */
|
||||
systemPrompt?: string;
|
||||
};
|
||||
|
||||
export type ClawdbotConfig = {
|
||||
auth?: AuthConfig;
|
||||
env?: {
|
||||
|
||||
@@ -1130,6 +1130,19 @@ const AgentDefaultsSchema = z
|
||||
.optional(),
|
||||
})
|
||||
.optional(),
|
||||
compaction: z
|
||||
.object({
|
||||
reserveTokensFloor: z.number().int().nonnegative().optional(),
|
||||
memoryFlush: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
softThresholdTokens: z.number().int().nonnegative().optional(),
|
||||
prompt: z.string().optional(),
|
||||
systemPrompt: z.string().optional(),
|
||||
})
|
||||
.optional(),
|
||||
})
|
||||
.optional(),
|
||||
thinkingDefault: z
|
||||
.union([
|
||||
z.literal("off"),
|
||||
|
||||
Reference in New Issue
Block a user