From a3747b1ee3ae3d1bcdb29a7306daf48c7236d0bf Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 11 Jan 2026 10:46:20 +0000 Subject: [PATCH] fix: add compaction headroom for memory writes --- CHANGELOG.md | 2 + src/agents/pi-embedded-runner.ts | 3 ++ src/agents/pi-settings.test.ts | 37 +++++++++++++++++++ src/agents/pi-settings.ts | 27 ++++++++++++++ .../agent-runner.heartbeat-typing.test.ts | 19 ++++++++++ src/auto-reply/reply/agent-runner.ts | 3 +- 6 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 src/agents/pi-settings.test.ts create mode 100644 src/agents/pi-settings.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a891dfa8..a152dc10e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,8 @@ - Docker: allow optional home volume + extra bind mounts in `docker-setup.sh`. (#679) — thanks @gabriel-trigo. ### Fixes +- Agents/Pi: raise compaction `reserveTokens` floor to 20k to leave enough headroom for pre-compaction “memory write” turns. +- Auto-reply: suppress draft/typing streaming for `NO_REPLY` (silent system ops) so it doesn’t leak partial output. - CLI/Status: expand tables to full terminal width; clarify provider setup vs runtime warnings; richer per-provider detail; token previews in `status` while keeping `status --all` redacted; add troubleshooting link footer; keep log tails pasteable; show gateway auth used when reachable; surface provider runtime errors (Signal/iMessage/Slack); harden `tailscale status --json` parsing; make `status --all` scan progress determinate; and replace the footer with a 3-line “Next steps” recommendation (share/debug/probe). - CLI/Gateway: clarify that `clawdbot gateway status` reports RPC health (connect + RPC) and shows RPC failures separately from connect failures. - CLI/Update: gate progress spinner on stdout TTY and align clean-check step label. (#701) — thanks @bjesuiter. diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 67a63ca9c..4cd8ce07c 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -69,6 +69,7 @@ import { import { normalizeModelCompat } from "./model-compat.js"; import { ensureClawdbotModelsJson } from "./models-config.js"; import type { MessagingToolSend } from "./pi-embedded-messaging.js"; +import { ensurePiCompactionReserveTokens } from "./pi-settings.js"; import { acquireSessionWriteLock } from "./session-write-lock.js"; export type { MessagingToolSend } from "./pi-embedded-messaging.js"; @@ -981,6 +982,7 @@ export async function compactEmbeddedPiSession(params: { effectiveWorkspace, agentDir, ); + ensurePiCompactionReserveTokens({ settingsManager }); const additionalExtensionPaths = buildEmbeddedExtensionPaths({ cfg: params.config, sessionManager, @@ -1369,6 +1371,7 @@ export async function runEmbeddedPiAgent(params: { effectiveWorkspace, agentDir, ); + ensurePiCompactionReserveTokens({ settingsManager }); const additionalExtensionPaths = buildEmbeddedExtensionPaths({ cfg: params.config, sessionManager, diff --git a/src/agents/pi-settings.test.ts b/src/agents/pi-settings.test.ts new file mode 100644 index 000000000..139dac3f4 --- /dev/null +++ b/src/agents/pi-settings.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it, vi } from "vitest"; + +import { + DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR, + ensurePiCompactionReserveTokens, +} from "./pi-settings.js"; + +describe("ensurePiCompactionReserveTokens", () => { + it("bumps reserveTokens when below floor", () => { + const settingsManager = { + getCompactionReserveTokens: () => 16_384, + applyOverrides: vi.fn(), + }; + + const result = ensurePiCompactionReserveTokens({ settingsManager }); + + expect(result).toEqual({ + didOverride: true, + reserveTokens: DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR, + }); + expect(settingsManager.applyOverrides).toHaveBeenCalledWith({ + compaction: { reserveTokens: DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR }, + }); + }); + + it("does not override when already above floor", () => { + const settingsManager = { + getCompactionReserveTokens: () => 32_000, + applyOverrides: vi.fn(), + }; + + const result = ensurePiCompactionReserveTokens({ settingsManager }); + + expect(result).toEqual({ didOverride: false, reserveTokens: 32_000 }); + expect(settingsManager.applyOverrides).not.toHaveBeenCalled(); + }); +}); diff --git a/src/agents/pi-settings.ts b/src/agents/pi-settings.ts new file mode 100644 index 000000000..056c318e4 --- /dev/null +++ b/src/agents/pi-settings.ts @@ -0,0 +1,27 @@ +export const DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000; + +type PiSettingsManagerLike = { + getCompactionReserveTokens: () => number; + applyOverrides: (overrides: { + compaction: { reserveTokens: number }; + }) => void; +}; + +export function ensurePiCompactionReserveTokens(params: { + settingsManager: PiSettingsManagerLike; + minReserveTokens?: number; +}): { didOverride: boolean; reserveTokens: number } { + const minReserveTokens = + params.minReserveTokens ?? DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR; + const current = params.settingsManager.getCompactionReserveTokens(); + + if (current >= minReserveTokens) { + return { didOverride: false, reserveTokens: current }; + } + + params.settingsManager.applyOverrides({ + compaction: { reserveTokens: minReserveTokens }, + }); + + return { didOverride: true, reserveTokens: minReserveTokens }; +} diff --git a/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts b/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts index 19e0f362f..91f3086ca 100644 --- a/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts +++ b/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts @@ -164,6 +164,25 @@ describe("runReplyAgent typing (heartbeat)", () => { expect(typing.startTypingLoop).not.toHaveBeenCalled(); }); + it("suppresses partial streaming for NO_REPLY", async () => { + const onPartialReply = vi.fn(); + runEmbeddedPiAgentMock.mockImplementationOnce( + async (params: EmbeddedPiAgentParams) => { + await params.onPartialReply?.({ text: "NO_REPLY" }); + return { payloads: [{ text: "NO_REPLY" }], meta: {} }; + }, + ); + + const { run, typing } = createMinimalRun({ + opts: { isHeartbeat: false, onPartialReply }, + }); + await run(); + + expect(onPartialReply).not.toHaveBeenCalled(); + expect(typing.startTypingOnText).not.toHaveBeenCalled(); + expect(typing.startTypingLoop).not.toHaveBeenCalled(); + }); + it("starts typing only on deltas in message mode", async () => { runEmbeddedPiAgentMock.mockImplementationOnce(async () => ({ payloads: [{ text: "final" }], diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 1222e5c33..701d75b8e 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -36,7 +36,7 @@ import { import { stripHeartbeatToken } from "../heartbeat.js"; import type { OriginatingChannelType, TemplateContext } from "../templating.js"; import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js"; -import { SILENT_REPLY_TOKEN } from "../tokens.js"; +import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js"; import type { GetReplyOptions, ReplyPayload } from "../types.js"; import { createAudioAsVoiceBuffer, @@ -485,6 +485,7 @@ export async function runReplyAgent(params: { } text = stripped.text; } + if (isSilentReplyText(text, SILENT_REPLY_TOKEN)) return; await typingSignals.signalTextDelta(text); await opts.onPartialReply?.({ text,