From 7dbb21be8e5778deeb63ed3db9abc243ffa83cac Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 12 Jan 2026 05:28:17 +0000 Subject: [PATCH] feat: add pre-compaction memory flush --- CHANGELOG.md | 30 ++++- docs/concepts/agent-workspace.md | 2 + docs/concepts/compaction.md | 3 + docs/concepts/memory.md | 66 ++++++++++ docs/docs.json | 1 + docs/gateway/configuration.md | 34 +++++ .../session-management-compaction.md | 43 +++++-- docs/start/clawd.md | 1 + docs/start/faq.md | 9 ++ docs/start/hubs.md | 1 + src/agents/pi-embedded-runner.ts | 19 ++- src/agents/pi-settings.test.ts | 22 ++++ src/agents/pi-settings.ts | 12 ++ src/auto-reply/reply/agent-runner.ts | 121 ++++++++++++++++++ src/auto-reply/reply/memory-flush.test.ts | 103 +++++++++++++++ src/auto-reply/reply/memory-flush.ts | 103 +++++++++++++++ src/config/sessions.ts | 2 + src/config/types.ts | 20 +++ src/config/zod-schema.ts | 13 ++ 19 files changed, 583 insertions(+), 22 deletions(-) create mode 100644 docs/concepts/memory.md create mode 100644 src/auto-reply/reply/memory-flush.test.ts create mode 100644 src/auto-reply/reply/memory-flush.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e40ae463..a32535230 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ ## 2026.1.11 +### Highlights +- Plugins are now first-class: loader + CLI management, plus the new Voice Call plugin. +- Config: modular `$include` support for split config files. (#731) — thanks @pasogott. +- Agents/Pi: reserve compaction headroom so pre-compaction memory writes can run before auto-compaction. +- Agents: automatic pre-compaction memory flush turn to store durable memories before compaction. + ### Changes - CLI: simplify configure section selection (single-select with optional add-more). - Onboarding/CLI: group model/auth choice by provider and label Z.AI as GLM 4.7. @@ -12,21 +18,26 @@ - Docs: add beginner-friendly plugin quick start + expand Voice Call plugin docs. - Tests: add Docker plugin loader + tgz-install smoke test. - Tests: extend Docker plugin E2E to cover installing from local folders (`plugins.load.paths`) and `file:` npm specs. +- Tests: add coverage for pre-compaction memory flush settings. - Agents/Tools: add `apply_patch` tool for multi-file edits (experimental; gated by tools.exec.applyPatch; OpenAI-only). - Agents/Tools: rename the bash tool to exec (config alias maintained). (#748) — thanks @myfunc. +- Agents: add pre-compaction memory flush config (`agents.defaults.compaction.*`) with a soft threshold + system prompt. - Config: add `$include` directive for modular config files. (#731) — thanks @pasogott. - Build: set pnpm minimum release age to 2880 minutes (2 days). (#718) — thanks @dan-dr. - macOS: prompt to install the global `clawdbot` CLI when missing in local mode; install via `clawd.bot/install-cli.sh` (no onboarding) and use external launchd/CLI instead of the embedded gateway runtime. - Docs: add gog calendar event color IDs from `gog calendar colors`. (#715) — thanks @mjrussell. +- Cron/CLI: add `--model` flag to cron add/edit commands. (#711) — thanks @mjrussell. - Cron/CLI: trim model overrides on cron edits and document main-session guidance. (#711) — thanks @mjrussell. - Skills: bundle `skill-creator` to guide creating and packaging skills. -- Discord: expose channel/category management actions in the message tool. (#730) — thanks @NicholasSpisak +- Providers: add per-DM history limit overrides (`dmHistoryLimit`) with provider-level config. (#728) — thanks @pkrmf. +- Discord: expose channel/category management actions in the message tool. (#730) — thanks @NicholasSpisak. - Docs: rename README “macOS app” section to “Apps”. (#733) — thanks @AbhisekBasu1. - Gateway: require `client.id` in WebSocket connect params; use `client.instanceId` for presence de-dupe; update docs/tests. - macOS: remove the attach-only gateway setting; local mode now always manages launchd while still attaching to an existing gateway if present. ### Installer - Postinstall: replace `git apply` with builtin JS patcher (works npm/pnpm/bun; no git dependency) plus regression tests. +- Postinstall: skip pnpm patch fallback when the new patcher is active. - Installer tests: add root+non-root docker smokes, CI workflow to fetch clawd.bot scripts and run install sh/cli with onboarding skipped. - Installer UX: support `CLAWDBOT_NO_ONBOARD=1` for non-interactive installs; fix npm prefix on Linux and auto-install git. - Installer UX: add `install.sh --help` with flags/env and git install hint. @@ -34,21 +45,31 @@ ### Fixes - Models/Onboarding: configure MiniMax (minimax.io) via Anthropic-compatible `/anthropic` endpoint by default (keep `minimax-api` as a legacy alias). -- Gateway/WebChat: include handshake validation details in the WebSocket close reason for easier debugging. +- Gateway/WebChat: include handshake validation details in the WebSocket close reason for easier debugging; preserve close codes. - Gateway/Auth: send invalid connect responses before closing the handshake; stabilize invalid-connect auth test. +- Gateway: tighten gateway listener detection. +- Control UI: hide onboarding chat when configured and guard the mobile chat sidebar overlay. +- Auth: read Codex keychain credentials and make the lookup platform-aware. +- macOS/Release: avoid bundling dist artifacts in relay builds and generate appcasts from zip-only sources. - Doctor: surface plugin diagnostics in the report. -- Plugins: treat `plugins.load.paths` directory entries as package roots when they contain `package.json` + `clawdbot.extensions`. -- Config: expand `~` in `CLAWDBOT_CONFIG_PATH` and common path-like config fields (including `plugins.load.paths`). +- Plugins: treat `plugins.load.paths` directory entries as package roots when they contain `package.json` + `clawdbot.extensions`; load plugin packages from config dirs; extract archives without system tar. +- Config: expand `~` in `CLAWDBOT_CONFIG_PATH` and common path-like config fields (including `plugins.load.paths`); guard invalid `$include` paths. (#731) — thanks @pasogott. - Agents: stop pre-creating session transcripts so first user messages persist in JSONL history. - Auto-reply: align `/think` default display with model reasoning defaults. (#751) — thanks @gabriel-trigo. - Auto-reply: flush block reply buffers on tool boundaries. (#750) — thanks @sebslight. +- Heartbeat: refresh prompt text for updated defaults. +- Agents/Tools: use PowerShell on Windows to capture system utility output. (#748) — thanks @myfunc. - Docker: tolerate unset optional env vars in docker-setup.sh under strict mode. (#725) — thanks @petradonka. - CLI/Update: preserve base environment when passing overrides to update subprocesses. (#713) — thanks @danielz1z. - Agents: treat message tool errors as failures so fallback replies still send; require `to` + `message` for `action=send`. (#717) — thanks @theglove44. +- Agents: preserve reasoning items on tool-only turns. +- Agents/Subagents: wait for completion before announcing, align wait timeout with run timeout, and make announce prompts more emphatic. - Agents: route subagent transcripts to the target agent sessions directory and add regression coverage. (#708) — thanks @xMikeMickelson. - Agents/Tools: preserve action enums when flattening tool schemas. (#708) — thanks @xMikeMickelson. - Gateway/Agents: canonicalize main session aliases for store writes and add regression coverage. (#709) — thanks @xMikeMickelson. - Agents: reset sessions and retry when auto-compaction overflows instead of crashing the gateway. +- Providers/Telegram: normalize command mentions for consistent parsing. (#729) — thanks @obviyus. +- Providers: skip DM history limit handling for non-DM sessions. (#728) — thanks @pkrmf. - Sandbox: fix non-main mode incorrectly sandboxing the main DM session and align `/status` runtime reporting with effective sandbox state. - Sandbox/Gateway: treat `agent::main` as a main-session alias when `session.mainKey` is customized (backwards compatible). @@ -74,7 +95,6 @@ - Docker: allow optional home volume + extra bind mounts in `docker-setup.sh`. (#679) — thanks @gabriel-trigo. ### Fixes -- Agents/Pi: raise compaction `reserveTokens` floor to 20k to leave enough headroom for pre-compaction “memory write” turns. - Auto-reply: suppress draft/typing streaming for `NO_REPLY` (silent system ops) so it doesn’t leak partial output. - CLI/Status: expand tables to full terminal width; clarify provider setup vs runtime warnings; richer per-provider detail; token previews in `status` while keeping `status --all` redacted; add troubleshooting link footer; keep log tails pasteable; show gateway auth used when reachable; surface provider runtime errors (Signal/iMessage/Slack); harden `tailscale status --json` parsing; make `status --all` scan progress determinate; and replace the footer with a 3-line “Next steps” recommendation (share/debug/probe). - CLI/Gateway: clarify that `clawdbot gateway status` reports RPC health (connect + RPC) and shows RPC failures separately from connect failures. diff --git a/docs/concepts/agent-workspace.md b/docs/concepts/agent-workspace.md index 1432e2efd..d5f8e36ea 100644 --- a/docs/concepts/agent-workspace.md +++ b/docs/concepts/agent-workspace.md @@ -99,6 +99,8 @@ These are the standard files Clawdbot expects inside the workspace: - Curated long-term memory. - Only load in the main, private session (not shared/group contexts). +See [Memory](/concepts/memory) for the workflow and automatic memory flush. + - `skills/` (optional) - Workspace-specific skills. - Overrides managed/bundled skills when names collide. diff --git a/docs/concepts/compaction.md b/docs/concepts/compaction.md index 4ec1bfdfd..b4edc6591 100644 --- a/docs/concepts/compaction.md +++ b/docs/concepts/compaction.md @@ -22,6 +22,9 @@ You’ll see: - `🧹 Auto-compaction complete` in verbose mode - `/status` showing `🧹 Compactions: ` +Before compaction, Clawdbot can run a **silent memory flush** turn to store +durable notes to disk. See [Memory](/concepts/memory) for details and config. + ## Manual compaction Use `/compact` (optionally with instructions) to force a compaction pass: ``` diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md new file mode 100644 index 000000000..4300d49f2 --- /dev/null +++ b/docs/concepts/memory.md @@ -0,0 +1,66 @@ +--- +summary: "How Clawdbot memory works (workspace files + automatic memory flush)" +read_when: + - You want the memory file layout and workflow + - You want to tune the automatic pre-compaction memory flush +--- +# Memory + +Clawdbot memory is **plain Markdown in the agent workspace**. The files are the +source of truth; the model only “remembers” what gets written to disk. + +## Memory files (Markdown) + +The default workspace layout uses two memory layers: + +- `memory/YYYY-MM-DD.md` + - Daily log (append-only). + - Read today + yesterday at session start. +- `MEMORY.md` (optional) + - Curated long-term memory. + - **Only load in the main, private session** (never in group contexts). + +These files live under the workspace (`agents.defaults.workspace`, default +`~/clawd`). See [Agent workspace](/concepts/agent-workspace) for the full layout. + +## When to write memory + +- Decisions, preferences, and durable facts go to `MEMORY.md`. +- Day-to-day notes and running context go to `memory/YYYY-MM-DD.md`. +- If someone says “remember this,” write it down (don’t keep it in RAM). + +## Automatic memory flush (pre-compaction ping) + +When a session is **close to auto-compaction**, Clawdbot triggers a **silent +agentic turn** that reminds the model to write durable memory **before** the +context is compacted. The default prompt encourages the model to respond with +`NO_REPLY` when there’s nothing to store, so the user never sees this turn. + +This is controlled by `agents.defaults.compaction.memoryFlush`: + +```json5 +{ + agents: { + defaults: { + compaction: { + reserveTokensFloor: 20000, + memoryFlush: { + enabled: true, + softThresholdTokens: 4000, + systemPrompt: "Session nearing compaction. Store durable memories now.", + prompt: "Write any lasting notes to memory/YYYY-MM-DD.md; reply with NO_REPLY if nothing to store." + } + } + } + } +} +``` + +Details: +- **Soft threshold**: flush triggers when the session token estimate crosses + `contextWindow - reserveTokensFloor - softThresholdTokens`. +- **Silent** by default: prompts include `NO_REPLY` so nothing is delivered. +- **One flush per compaction cycle** (tracked in `sessions.json`). + +For the full compaction lifecycle, see +[Session management + compaction](/reference/session-management-compaction). diff --git a/docs/docs.json b/docs/docs.json index a216b7823..41c7d8768 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -608,6 +608,7 @@ "token-use", "concepts/oauth", "concepts/agent-workspace", + "concepts/memory", "concepts/multi-agent", "concepts/compaction", "concepts/session", diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 680e65028..0b6c1654f 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -1368,6 +1368,40 @@ Example (adaptive tuned): See [/concepts/session-pruning](/concepts/session-pruning) for behavior details. +#### `agents.defaults.compaction` (reserve headroom + memory flush) + +`agents.defaults.compaction.reserveTokensFloor` enforces a minimum `reserveTokens` +value for Pi compaction (default: `20000`). Set it to `0` to disable the floor. + +`agents.defaults.compaction.memoryFlush` runs a **silent** agentic turn before +auto-compaction, instructing the model to store durable memories on disk (e.g. +`memory/YYYY-MM-DD.md`). It triggers when the session token estimate crosses a +soft threshold below the compaction limit. + +Defaults: +- `memoryFlush.enabled`: `true` +- `memoryFlush.softThresholdTokens`: `4000` +- `memoryFlush.prompt` / `memoryFlush.systemPrompt`: built-in defaults with `NO_REPLY` + +Example (tuned): +```json5 +{ + agents: { + defaults: { + compaction: { + reserveTokensFloor: 24000, + memoryFlush: { + enabled: true, + softThresholdTokens: 6000, + systemPrompt: "Session nearing compaction. Store durable memories now.", + prompt: "Write any lasting notes to memory/YYYY-MM-DD.md; reply with NO_REPLY if nothing to store." + } + } + } + } +} +``` + Block streaming: - `agents.defaults.blockStreamingDefault`: `"on"`/`"off"` (default off). - Provider overrides: `*.blockStreaming` (and per-account variants) to force block streaming on/off. diff --git a/docs/reference/session-management-compaction.md b/docs/reference/session-management-compaction.md index fbd8b0999..e807d3a21 100644 --- a/docs/reference/session-management-compaction.md +++ b/docs/reference/session-management-compaction.md @@ -107,6 +107,8 @@ Key fields (not exhaustive): - Token counters (best-effort / provider-dependent): - `inputTokens`, `outputTokens`, `totalTokens`, `contextTokens` - `compactionCount`: how often auto-compaction completed for this session key +- `memoryFlushAt`: timestamp for the last pre-compaction memory flush +- `memoryFlushCompactionCount`: compaction count when the last flush ran The store is safe to edit, but the Gateway is the authority: it may rewrite or rehydrate entries as sessions run. @@ -191,12 +193,15 @@ Pi’s compaction settings live in Pi settings: Clawdbot also enforces a safety floor for embedded runs: -- If `compaction.reserveTokens < 20000`, Clawdbot bumps it to 20000. +- If `compaction.reserveTokens < reserveTokensFloor`, Clawdbot bumps it. +- Default floor is `20000` tokens. +- Set `agents.defaults.compaction.reserveTokensFloor: 0` to disable the floor. - If it’s already higher, Clawdbot leaves it alone. Why: leave enough headroom for multi-turn “housekeeping” (like memory writes) before compaction becomes unavoidable. -Implementation: `ensurePiCompactionReserveTokens()` in `src/agents/pi-settings.ts` (called from `src/agents/pi-embedded-runner.ts`). +Implementation: `ensurePiCompactionReserveTokens()` in `src/agents/pi-settings.ts` +(called from `src/agents/pi-embedded-runner.ts`). --- @@ -223,22 +228,33 @@ As of `2026.1.10`, Clawdbot also suppresses **draft/typing streaming** when a pa --- -## Pre-compaction “memory flush” (design) +## Pre-compaction “memory flush” (implemented) -Goal: before auto-compaction happens, run a short sequence of turns that writes durable state to disk (e.g. `memory/YYYY-MM-DD.md` in the agent workspace) so compaction can’t erase critical context. +Goal: before auto-compaction happens, run a silent agentic turn that writes durable +state to disk (e.g. `memory/YYYY-MM-DD.md` in the agent workspace) so compaction can’t +erase critical context. -Two viable hooks: +Clawdbot uses the **pre-threshold flush** approach: -1) **Pre-threshold flush (Clawdbot-side)** - - Monitor session context usage. - - When it crosses a “soft threshold” (below Pi’s real compaction threshold), enqueue a silent “write memory now” directive to the agent. - - Use `NO_REPLY` so the user sees nothing. +1) Monitor session context usage. +2) When it crosses a “soft threshold” (below Pi’s compaction threshold), run a silent + “write memory now” directive to the agent. +3) Use `NO_REPLY` so the user sees nothing. -2) **Pi extension hook (`session_before_compact`)** - - Pi’s extension API exposes a `session_before_compact` event that receives compaction preparation details and can cancel or replace compaction. - - Clawdbot can ship an extension that reacts here and performs housekeeping (and/or produces a custom compaction result). +Config (`agents.defaults.compaction.memoryFlush`): +- `enabled` (default: `true`) +- `softThresholdTokens` (default: `4000`) +- `prompt` (user message for the flush turn) +- `systemPrompt` (extra system prompt appended for the flush turn) -Clawdbot currently documents the *concept* of daily memory in the workspace template (see [/concepts/agent-workspace](/concepts/agent-workspace)) but does not yet ship an automated pre-compaction flush loop. +Notes: +- The default prompt/system prompt include a `NO_REPLY` hint to suppress delivery. +- The flush runs once per compaction cycle (tracked in `sessions.json`). +- The flush runs only for embedded Pi sessions (CLI backends skip it). +- See [Memory](/concepts/memory) for the workspace file layout and write patterns. + +Pi also exposes a `session_before_compact` hook in the extension API, but Clawdbot’s +flush logic lives on the Gateway side today. --- @@ -251,4 +267,3 @@ Clawdbot currently documents the *concept* of daily memory in the workspace temp - compaction settings (`reserveTokens` too high for the model window can cause earlier compaction) - tool-result bloat: enable/tune session pruning - Silent turns leaking? Confirm the reply starts with `NO_REPLY` (exact token) and you’re on a build that includes the streaming suppression fix. - diff --git a/docs/start/clawd.md b/docs/start/clawd.md index 837ef5c6c..8a6083401 100644 --- a/docs/start/clawd.md +++ b/docs/start/clawd.md @@ -102,6 +102,7 @@ clawdbot setup ``` Full workspace layout + backup guide: [Agent workspace](/concepts/agent-workspace) +Memory workflow: [Memory](/concepts/memory) Optional: choose a different workspace with `agents.defaults.workspace` (supports `~`). diff --git a/docs/start/faq.md b/docs/start/faq.md index 4eaf0c2cd..cd596a44e 100644 --- a/docs/start/faq.md +++ b/docs/start/faq.md @@ -197,6 +197,15 @@ ClawdHub installs into `./skills` under your current directory; Clawdbot treats Yes. See [Sandboxing](/gateway/sandboxing). For Docker-specific setup (full gateway in Docker or sandbox images), see [Docker](/install/docker). +### How does memory work? + +Clawdbot memory is just Markdown files in the agent workspace: +- Daily notes in `memory/YYYY-MM-DD.md` +- Curated long-term notes in `MEMORY.md` (main/private sessions only) + +Clawdbot also runs a **silent pre-compaction memory flush** to remind the model +to write durable notes before auto-compaction. See [Memory](/concepts/memory). + ## Where things live on disk ### Where does Clawdbot store its data? diff --git a/docs/start/hubs.md b/docs/start/hubs.md index 3c996e2ad..e639674c1 100644 --- a/docs/start/hubs.md +++ b/docs/start/hubs.md @@ -34,6 +34,7 @@ Use these hubs to discover every page, including deep dives and reference docs t - [Architecture](/concepts/architecture) - [Agent runtime](/concepts/agent) - [Agent workspace](/concepts/agent-workspace) +- [Memory](/concepts/memory) - [Agent loop](/concepts/agent-loop) - [Streaming + chunking](/concepts/streaming) - [Multi-agent routing](/concepts/multi-agent) diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 068685d00..49ee41a4b 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -73,7 +73,10 @@ import { import { normalizeModelCompat } from "./model-compat.js"; import { ensureClawdbotModelsJson } from "./models-config.js"; import type { MessagingToolSend } from "./pi-embedded-messaging.js"; -import { ensurePiCompactionReserveTokens } from "./pi-settings.js"; +import { + ensurePiCompactionReserveTokens, + resolveCompactionReserveTokensFloor, +} from "./pi-settings.js"; import { acquireSessionWriteLock } from "./session-write-lock.js"; export type { MessagingToolSend } from "./pi-embedded-messaging.js"; @@ -1184,7 +1187,12 @@ export async function compactEmbeddedPiSession(params: { effectiveWorkspace, agentDir, ); - ensurePiCompactionReserveTokens({ settingsManager }); + ensurePiCompactionReserveTokens({ + settingsManager, + minReserveTokens: resolveCompactionReserveTokensFloor( + params.config, + ), + }); const additionalExtensionPaths = buildEmbeddedExtensionPaths({ cfg: params.config, sessionManager, @@ -1584,7 +1592,12 @@ export async function runEmbeddedPiAgent(params: { effectiveWorkspace, agentDir, ); - ensurePiCompactionReserveTokens({ settingsManager }); + ensurePiCompactionReserveTokens({ + settingsManager, + minReserveTokens: resolveCompactionReserveTokensFloor( + params.config, + ), + }); const additionalExtensionPaths = buildEmbeddedExtensionPaths({ cfg: params.config, sessionManager, diff --git a/src/agents/pi-settings.test.ts b/src/agents/pi-settings.test.ts index 139dac3f4..9a7332540 100644 --- a/src/agents/pi-settings.test.ts +++ b/src/agents/pi-settings.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it, vi } from "vitest"; import { DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR, ensurePiCompactionReserveTokens, + resolveCompactionReserveTokensFloor, } from "./pi-settings.js"; describe("ensurePiCompactionReserveTokens", () => { @@ -35,3 +36,24 @@ describe("ensurePiCompactionReserveTokens", () => { expect(settingsManager.applyOverrides).not.toHaveBeenCalled(); }); }); + +describe("resolveCompactionReserveTokensFloor", () => { + it("returns the default when config is missing", () => { + expect(resolveCompactionReserveTokensFloor()).toBe( + DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR, + ); + }); + + it("accepts configured floors, including zero", () => { + expect( + resolveCompactionReserveTokensFloor({ + agents: { defaults: { compaction: { reserveTokensFloor: 24_000 } } }, + }), + ).toBe(24_000); + expect( + resolveCompactionReserveTokensFloor({ + agents: { defaults: { compaction: { reserveTokensFloor: 0 } } }, + }), + ).toBe(0); + }); +}); diff --git a/src/agents/pi-settings.ts b/src/agents/pi-settings.ts index 056c318e4..6ed6fcc22 100644 --- a/src/agents/pi-settings.ts +++ b/src/agents/pi-settings.ts @@ -1,3 +1,5 @@ +import type { ClawdbotConfig } from "../config/config.js"; + export const DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000; type PiSettingsManagerLike = { @@ -25,3 +27,13 @@ export function ensurePiCompactionReserveTokens(params: { return { didOverride: true, reserveTokens: minReserveTokens }; } + +export function resolveCompactionReserveTokensFloor( + cfg?: ClawdbotConfig, +): number { + const raw = cfg?.agents?.defaults?.compaction?.reserveTokensFloor; + if (typeof raw === "number" && Number.isFinite(raw) && raw >= 0) { + return Math.floor(raw); + } + return DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR; +} diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 23f7f8986..c42d45b11 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -53,6 +53,11 @@ import { } from "./block-reply-pipeline.js"; import { resolveBlockStreamingCoalescing } from "./block-streaming.js"; import { createFollowupRunner } from "./followup-runner.js"; +import { + resolveMemoryFlushContextWindowTokens, + resolveMemoryFlushSettings, + shouldRunMemoryFlush, +} from "./memory-flush.js"; import { enqueueFollowupRun, type FollowupRun, @@ -337,6 +342,122 @@ export async function runReplyAgent(params: { return undefined; } + const memoryFlushSettings = resolveMemoryFlushSettings(cfg); + const shouldFlushMemory = + memoryFlushSettings && + !isHeartbeat && + !isCliProvider(followupRun.run.provider, cfg) && + shouldRunMemoryFlush({ + entry: + activeSessionEntry ?? + (sessionKey ? activeSessionStore?.[sessionKey] : undefined), + contextWindowTokens: resolveMemoryFlushContextWindowTokens({ + modelId: followupRun.run.model ?? defaultModel, + agentCfgContextTokens, + }), + reserveTokensFloor: memoryFlushSettings.reserveTokensFloor, + softThresholdTokens: memoryFlushSettings.softThresholdTokens, + }); + if (shouldFlushMemory) { + const flushRunId = crypto.randomUUID(); + if (sessionKey) { + registerAgentRunContext(flushRunId, { + sessionKey, + verboseLevel: resolvedVerboseLevel, + }); + } + let memoryCompactionCompleted = false; + const flushSystemPrompt = [ + followupRun.run.extraSystemPrompt, + memoryFlushSettings.systemPrompt, + ] + .filter(Boolean) + .join("\n\n"); + try { + await runWithModelFallback({ + cfg: followupRun.run.config, + provider: followupRun.run.provider, + model: followupRun.run.model, + run: (provider, model) => + runEmbeddedPiAgent({ + sessionId: followupRun.run.sessionId, + sessionKey, + messageProvider: + sessionCtx.Provider?.trim().toLowerCase() || undefined, + agentAccountId: sessionCtx.AccountId, + // Provider threading context for tool auto-injection + ...buildThreadingToolContext({ + sessionCtx, + config: followupRun.run.config, + hasRepliedRef: opts?.hasRepliedRef, + }), + sessionFile: followupRun.run.sessionFile, + workspaceDir: followupRun.run.workspaceDir, + agentDir: followupRun.run.agentDir, + config: followupRun.run.config, + skillsSnapshot: followupRun.run.skillsSnapshot, + prompt: memoryFlushSettings.prompt, + extraSystemPrompt: flushSystemPrompt, + ownerNumbers: followupRun.run.ownerNumbers, + enforceFinalTag: followupRun.run.enforceFinalTag, + provider, + model, + authProfileId: followupRun.run.authProfileId, + thinkLevel: followupRun.run.thinkLevel, + verboseLevel: followupRun.run.verboseLevel, + reasoningLevel: followupRun.run.reasoningLevel, + bashElevated: followupRun.run.bashElevated, + timeoutMs: followupRun.run.timeoutMs, + runId: flushRunId, + onAgentEvent: (evt) => { + if (evt.stream === "compaction") { + const phase = + typeof evt.data.phase === "string" ? evt.data.phase : ""; + const willRetry = Boolean(evt.data.willRetry); + if (phase === "end" && !willRetry) { + memoryCompactionCompleted = true; + } + } + }, + }), + }); + let memoryFlushCompactionCount = + activeSessionEntry?.compactionCount ?? + (sessionKey ? activeSessionStore?.[sessionKey]?.compactionCount : 0) ?? + 0; + if (memoryCompactionCompleted) { + const nextCount = await incrementCompactionCount({ + sessionEntry: activeSessionEntry, + sessionStore: activeSessionStore, + sessionKey, + storePath, + }); + if (typeof nextCount === "number") { + memoryFlushCompactionCount = nextCount; + } + } + if (storePath && sessionKey) { + try { + const updatedEntry = await updateSessionStoreEntry({ + storePath, + sessionKey, + update: async () => ({ + memoryFlushAt: Date.now(), + memoryFlushCompactionCount, + }), + }); + if (updatedEntry) { + activeSessionEntry = updatedEntry; + } + } catch (err) { + logVerbose(`failed to persist memory flush metadata: ${String(err)}`); + } + } + } catch (err) { + logVerbose(`memory flush run failed: ${String(err)}`); + } + } + const runFollowupTurn = createFollowupRunner({ opts, typing, diff --git a/src/auto-reply/reply/memory-flush.test.ts b/src/auto-reply/reply/memory-flush.test.ts new file mode 100644 index 000000000..8f8aae2fb --- /dev/null +++ b/src/auto-reply/reply/memory-flush.test.ts @@ -0,0 +1,103 @@ +import { describe, expect, it } from "vitest"; + +import { + DEFAULT_MEMORY_FLUSH_SOFT_TOKENS, + resolveMemoryFlushContextWindowTokens, + resolveMemoryFlushSettings, + shouldRunMemoryFlush, +} from "./memory-flush.js"; + +describe("memory flush settings", () => { + it("defaults to enabled with fallback prompt and system prompt", () => { + const settings = resolveMemoryFlushSettings(); + expect(settings).not.toBeNull(); + expect(settings?.enabled).toBe(true); + expect(settings?.prompt.length).toBeGreaterThan(0); + expect(settings?.systemPrompt.length).toBeGreaterThan(0); + }); + + it("respects disable flag", () => { + expect( + resolveMemoryFlushSettings({ + agents: { + defaults: { compaction: { memoryFlush: { enabled: false } } }, + }, + }), + ).toBeNull(); + }); + + it("appends NO_REPLY hint when missing", () => { + const settings = resolveMemoryFlushSettings({ + agents: { + defaults: { + compaction: { + memoryFlush: { + prompt: "Write memories now.", + systemPrompt: "Flush memory.", + }, + }, + }, + }, + }); + expect(settings?.prompt).toContain("NO_REPLY"); + expect(settings?.systemPrompt).toContain("NO_REPLY"); + }); +}); + +describe("shouldRunMemoryFlush", () => { + it("requires totalTokens and threshold", () => { + expect( + shouldRunMemoryFlush({ + entry: { totalTokens: 0 }, + contextWindowTokens: 16_000, + reserveTokensFloor: 20_000, + softThresholdTokens: DEFAULT_MEMORY_FLUSH_SOFT_TOKENS, + }), + ).toBe(false); + }); + + it("skips when under threshold", () => { + expect( + shouldRunMemoryFlush({ + entry: { totalTokens: 10_000 }, + contextWindowTokens: 100_000, + reserveTokensFloor: 20_000, + softThresholdTokens: 10_000, + }), + ).toBe(false); + }); + + it("skips when already flushed for current compaction count", () => { + expect( + shouldRunMemoryFlush({ + entry: { + totalTokens: 90_000, + compactionCount: 2, + memoryFlushCompactionCount: 2, + }, + contextWindowTokens: 100_000, + reserveTokensFloor: 5_000, + softThresholdTokens: 2_000, + }), + ).toBe(false); + }); + + it("runs when above threshold and not flushed", () => { + expect( + shouldRunMemoryFlush({ + entry: { totalTokens: 96_000, compactionCount: 1 }, + contextWindowTokens: 100_000, + reserveTokensFloor: 5_000, + softThresholdTokens: 2_000, + }), + ).toBe(true); + }); +}); + +describe("resolveMemoryFlushContextWindowTokens", () => { + it("falls back to agent config or default tokens", () => { + expect( + resolveMemoryFlushContextWindowTokens({ agentCfgContextTokens: 42_000 }), + ).toBe(42_000); + }); +}); diff --git a/src/auto-reply/reply/memory-flush.ts b/src/auto-reply/reply/memory-flush.ts new file mode 100644 index 000000000..d1b0a5d32 --- /dev/null +++ b/src/auto-reply/reply/memory-flush.ts @@ -0,0 +1,103 @@ +import { lookupContextTokens } from "../../agents/context.js"; +import { DEFAULT_CONTEXT_TOKENS } from "../../agents/defaults.js"; +import { DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR } from "../../agents/pi-settings.js"; +import type { ClawdbotConfig } from "../../config/config.js"; +import type { SessionEntry } from "../../config/sessions.js"; +import { SILENT_REPLY_TOKEN } from "../tokens.js"; + +export const DEFAULT_MEMORY_FLUSH_SOFT_TOKENS = 4000; + +export const DEFAULT_MEMORY_FLUSH_PROMPT = [ + "Pre-compaction memory flush.", + "Store durable memories now (use memory/YYYY-MM-DD.md; create memory/ if needed).", + `If nothing to store, reply with ${SILENT_REPLY_TOKEN}.`, +].join(" "); + +export const DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT = [ + "Pre-compaction memory flush turn.", + "The session is near auto-compaction; capture durable memories to disk.", + `You may reply, but usually ${SILENT_REPLY_TOKEN} is correct.`, +].join(" "); + +export type MemoryFlushSettings = { + enabled: boolean; + softThresholdTokens: number; + prompt: string; + systemPrompt: string; + reserveTokensFloor: number; +}; + +const normalizeNonNegativeInt = (value: unknown): number | null => { + if (typeof value !== "number" || !Number.isFinite(value)) return null; + const int = Math.floor(value); + return int >= 0 ? int : null; +}; + +export function resolveMemoryFlushSettings( + cfg?: ClawdbotConfig, +): MemoryFlushSettings | null { + const defaults = cfg?.agents?.defaults?.compaction?.memoryFlush; + const enabled = defaults?.enabled ?? true; + if (!enabled) return null; + const softThresholdTokens = + normalizeNonNegativeInt(defaults?.softThresholdTokens) ?? + DEFAULT_MEMORY_FLUSH_SOFT_TOKENS; + const prompt = defaults?.prompt?.trim() || DEFAULT_MEMORY_FLUSH_PROMPT; + const systemPrompt = + defaults?.systemPrompt?.trim() || DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT; + const reserveTokensFloor = + normalizeNonNegativeInt( + cfg?.agents?.defaults?.compaction?.reserveTokensFloor, + ) ?? DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR; + + return { + enabled, + softThresholdTokens, + prompt: ensureNoReplyHint(prompt), + systemPrompt: ensureNoReplyHint(systemPrompt), + reserveTokensFloor, + }; +} + +function ensureNoReplyHint(text: string): string { + if (text.includes(SILENT_REPLY_TOKEN)) return text; + return `${text}\n\nIf no user-visible reply is needed, start with ${SILENT_REPLY_TOKEN}.`; +} + +export function resolveMemoryFlushContextWindowTokens(params: { + modelId?: string; + agentCfgContextTokens?: number; +}): number { + return ( + lookupContextTokens(params.modelId) ?? + params.agentCfgContextTokens ?? + DEFAULT_CONTEXT_TOKENS + ); +} + +export function shouldRunMemoryFlush(params: { + entry?: Pick< + SessionEntry, + "totalTokens" | "compactionCount" | "memoryFlushCompactionCount" + >; + contextWindowTokens: number; + reserveTokensFloor: number; + softThresholdTokens: number; +}): boolean { + const totalTokens = params.entry?.totalTokens; + if (!totalTokens || totalTokens <= 0) return false; + const contextWindow = Math.max(1, Math.floor(params.contextWindowTokens)); + const reserveTokens = Math.max(0, Math.floor(params.reserveTokensFloor)); + const softThreshold = Math.max(0, Math.floor(params.softThresholdTokens)); + const threshold = Math.max(0, contextWindow - reserveTokens - softThreshold); + if (threshold <= 0) return false; + if (totalTokens < threshold) return false; + + const compactionCount = params.entry?.compactionCount ?? 0; + const lastFlushAt = params.entry?.memoryFlushCompactionCount; + if (typeof lastFlushAt === "number" && lastFlushAt === compactionCount) { + return false; + } + + return true; +} diff --git a/src/config/sessions.ts b/src/config/sessions.ts index 074e7f983..5e91dc0e5 100644 --- a/src/config/sessions.ts +++ b/src/config/sessions.ts @@ -109,6 +109,8 @@ export type SessionEntry = { model?: string; contextTokens?: number; compactionCount?: number; + memoryFlushAt?: number; + memoryFlushCompactionCount?: number; cliSessionIds?: Record; claudeCliSessionId?: string; label?: string; diff --git a/src/config/types.ts b/src/config/types.ts index 7600d0104..476578c74 100644 --- a/src/config/types.ts +++ b/src/config/types.ts @@ -1523,6 +1523,8 @@ export type AgentDefaultsConfig = { cliBackends?: Record; /** Opt-in: prune old tool results from the LLM context to reduce token usage. */ contextPruning?: AgentContextPruningConfig; + /** Compaction tuning and pre-compaction memory flush behavior. */ + compaction?: AgentCompactionConfig; /** Default thinking level when no /think directive is present. */ thinkingDefault?: "off" | "minimal" | "low" | "medium" | "high"; /** Default verbose level when no /verbose directive is present. */ @@ -1624,6 +1626,24 @@ export type AgentDefaultsConfig = { }; }; +export type AgentCompactionConfig = { + /** Minimum reserve tokens enforced for Pi compaction (0 disables the floor). */ + reserveTokensFloor?: number; + /** Pre-compaction memory flush (agentic turn). Default: enabled. */ + memoryFlush?: AgentCompactionMemoryFlushConfig; +}; + +export type AgentCompactionMemoryFlushConfig = { + /** Enable the pre-compaction memory flush (default: true). */ + enabled?: boolean; + /** Run the memory flush when context is within this many tokens of the compaction threshold. */ + softThresholdTokens?: number; + /** User prompt used for the memory flush turn (NO_REPLY is enforced if missing). */ + prompt?: string; + /** System prompt appended for the memory flush turn. */ + systemPrompt?: string; +}; + export type ClawdbotConfig = { auth?: AuthConfig; env?: { diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index 6a5665f03..9cc2af1c2 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -1130,6 +1130,19 @@ const AgentDefaultsSchema = z .optional(), }) .optional(), + compaction: z + .object({ + reserveTokensFloor: z.number().int().nonnegative().optional(), + memoryFlush: z + .object({ + enabled: z.boolean().optional(), + softThresholdTokens: z.number().int().nonnegative().optional(), + prompt: z.string().optional(), + systemPrompt: z.string().optional(), + }) + .optional(), + }) + .optional(), thinkingDefault: z .union([ z.literal("off"),