feat(compaction): add adaptive chunk sizing, progressive fallback, and UI indicator (#1466)

* fix(ui): allow relative URLs in avatar validation The isAvatarUrl check only accepted http://, https://, or data: URLs, but the /avatar/{agentId} endpoint returns relative paths like /avatar/main. This caused local file avatars to display as text instead of images. Fixes avatar display for locally configured avatar files. * fix(gateway): resolve local avatars to URL in HTML injection and RPC The frontend fix alone wasn't enough because: 1. serveIndexHtml() was injecting the raw avatar filename into HTML 2. agent.identity.get RPC was returning raw filename, overwriting the HTML-injected value Now both paths resolve local file avatars (*.png, *.jpg, etc.) to the /avatar/{agentId} endpoint URL. * feat(compaction): add adaptive chunk sizing and progressive fallback - Add computeAdaptiveChunkRatio() to reduce chunk size for large messages - Add isOversizedForSummary() to detect messages too large to summarize - Add summarizeWithFallback() with progressive fallback: - Tries full summarization first - Falls back to partial summarization excluding oversized messages - Notes oversized messages in the summary output - Add SAFETY_MARGIN (1.2x) buffer for token estimation inaccuracy - Reduce MIN_CHUNK_RATIO to 0.15 for very large messages This prevents compaction failures when conversations contain unusually large tool outputs or responses that exceed the summarization model's context window. * feat(ui): add compaction indicator and improve event error handling Compaction indicator: - Add CompactionStatus type and handleCompactionEvent() in app-tool-stream.ts - Show '🧹 Compacting context...' toast while active (with pulse animation) - Show '🧹 Context compacted' briefly after completion - Auto-clear toast after 5 seconds - Add CSS styles for .callout.info, .callout.success, .compaction-indicator Error handling improvements: - Wrap onEvent callback in try/catch in gateway.ts to prevent errors from breaking the WebSocket message handler - Wrap handleGatewayEvent in try/catch with console.error logging to isolate errors and make them visible in devtools These changes address UI freezes during heavy agent activity by: 1. Showing users when compaction is happening 2. Preventing uncaught errors from silently breaking the event loop * fix(control-ui): add agentId to DEFAULT_ASSISTANT_IDENTITY TypeScript inferred the union type without agentId when falling back to DEFAULT_ASSISTANT_IDENTITY, causing build errors at control-ui.ts:222-223.
2026-01-23 01:32:30 -05:00
parent 68ea6e521b
commit d03c404cb4
11 changed files with 406 additions and 8 deletions
--- a/src/agents/pi-embedded-runner/google.ts
+++ b/src/agents/pi-embedded-runner/google.ts
@@ -1,3 +1,5 @@
+import { EventEmitter } from "node:events";
+
 import type { AgentMessage, AgentTool } from "@mariozechner/pi-agent-core";
 import type { TSchema } from "@sinclair/typebox";
 import type { SessionManager } from "@mariozechner/pi-coding-agent";
@@ -184,10 +186,28 @@ export function logToolSchemasForGoogle(params: { tools: AgentTool[]; provider:
  }
 }

+// Event emitter for unhandled compaction failures that escape try-catch blocks.
+// Listeners can use this to trigger session recovery with retry.
+const compactionFailureEmitter = new EventEmitter();
+
+export type CompactionFailureListener = (reason: string) => void;
+
+/**
+ * Register a listener for unhandled compaction failures.
+ * Called when auto-compaction fails in a way that escapes the normal try-catch,
+ * e.g., when the summarization request itself exceeds the model's token limit.
+ * Returns an unsubscribe function.
+ */
+export function onUnhandledCompactionFailure(cb: CompactionFailureListener): () => void {
+  compactionFailureEmitter.on("failure", cb);
+  return () => compactionFailureEmitter.off("failure", cb);
+}
+
 registerUnhandledRejectionHandler((reason) => {
  const message = describeUnknownError(reason);
  if (!isCompactionFailureError(message)) return false;
  log.error(`Auto-compaction failed (unhandled): ${message}`);
+  compactionFailureEmitter.emit("failure", message);
  return true;
 });

--- a/src/agents/pi-extensions/compaction-safeguard.test.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.test.ts
@@ -3,7 +3,15 @@ import { describe, expect, it } from "vitest";

 import { __testing } from "./compaction-safeguard.js";

-const { collectToolFailures, formatToolFailuresSection } = __testing;
+const {
+  collectToolFailures,
+  formatToolFailuresSection,
+  computeAdaptiveChunkRatio,
+  isOversizedForSummary,
+  BASE_CHUNK_RATIO,
+  MIN_CHUNK_RATIO,
+  SAFETY_MARGIN,
+} = __testing;

 describe("compaction-safeguard tool failures", () => {
  it("formats tool failures with meta and summary", () => {
@@ -96,3 +104,107 @@ describe("compaction-safeguard tool failures", () => {
    expect(section).toBe("");
  });
 });
+
+describe("computeAdaptiveChunkRatio", () => {
+  const CONTEXT_WINDOW = 200_000;
+
+  it("returns BASE_CHUNK_RATIO for normal messages", () => {
+    // Small messages: 1000 tokens each, well under 10% of context
+    const messages: AgentMessage[] = [
+      { role: "user", content: "x".repeat(1000), timestamp: Date.now() },
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "y".repeat(1000) }],
+        timestamp: Date.now(),
+      },
+    ];
+
+    const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
+    expect(ratio).toBe(BASE_CHUNK_RATIO);
+  });
+
+  it("reduces ratio when average message > 10% of context", () => {
+    // Large messages: ~50K tokens each (25% of context)
+    const messages: AgentMessage[] = [
+      { role: "user", content: "x".repeat(50_000 * 4), timestamp: Date.now() },
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "y".repeat(50_000 * 4) }],
+        timestamp: Date.now(),
+      },
+    ];
+
+    const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
+    expect(ratio).toBeLessThan(BASE_CHUNK_RATIO);
+    expect(ratio).toBeGreaterThanOrEqual(MIN_CHUNK_RATIO);
+  });
+
+  it("respects MIN_CHUNK_RATIO floor", () => {
+    // Very large messages that would push ratio below minimum
+    const messages: AgentMessage[] = [
+      { role: "user", content: "x".repeat(150_000 * 4), timestamp: Date.now() },
+    ];
+
+    const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
+    expect(ratio).toBeGreaterThanOrEqual(MIN_CHUNK_RATIO);
+  });
+
+  it("handles empty message array", () => {
+    const ratio = computeAdaptiveChunkRatio([], CONTEXT_WINDOW);
+    expect(ratio).toBe(BASE_CHUNK_RATIO);
+  });
+
+  it("handles single huge message", () => {
+    // Single massive message
+    const messages: AgentMessage[] = [
+      { role: "user", content: "x".repeat(180_000 * 4), timestamp: Date.now() },
+    ];
+
+    const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
+    expect(ratio).toBeGreaterThanOrEqual(MIN_CHUNK_RATIO);
+    expect(ratio).toBeLessThanOrEqual(BASE_CHUNK_RATIO);
+  });
+});
+
+describe("isOversizedForSummary", () => {
+  const CONTEXT_WINDOW = 200_000;
+
+  it("returns false for small messages", () => {
+    const msg: AgentMessage = {
+      role: "user",
+      content: "Hello, world!",
+      timestamp: Date.now(),
+    };
+
+    expect(isOversizedForSummary(msg, CONTEXT_WINDOW)).toBe(false);
+  });
+
+  it("returns true for messages > 50% of context", () => {
+    // Message with ~120K tokens (60% of 200K context)
+    // After safety margin (1.2x), effective is 144K which is > 100K (50%)
+    const msg: AgentMessage = {
+      role: "user",
+      content: "x".repeat(120_000 * 4),
+      timestamp: Date.now(),
+    };
+
+    expect(isOversizedForSummary(msg, CONTEXT_WINDOW)).toBe(true);
+  });
+
+  it("applies safety margin", () => {
+    // Message at exactly 50% of context before margin
+    // After SAFETY_MARGIN (1.2), it becomes 60% which is > 50%
+    const halfContextChars = (CONTEXT_WINDOW * 0.5) / SAFETY_MARGIN;
+    const msg: AgentMessage = {
+      role: "user",
+      content: "x".repeat(Math.floor(halfContextChars * 4)),
+      timestamp: Date.now(),
+    };
+
+    // With safety margin applied, this should be at the boundary
+    // The function checks if tokens * SAFETY_MARGIN > contextWindow * 0.5
+    const isOversized = isOversizedForSummary(msg, CONTEXT_WINDOW);
+    // Due to token estimation, this could be either true or false at the boundary
+    expect(typeof isOversized).toBe("boolean");
+  });
+});
--- a/src/agents/pi-extensions/compaction-safeguard.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.ts
@@ -4,7 +4,9 @@ import { estimateTokens, generateSummary } from "@mariozechner/pi-coding-agent";

 import { DEFAULT_CONTEXT_TOKENS } from "../defaults.js";

-const MAX_CHUNK_RATIO = 0.4;
+const BASE_CHUNK_RATIO = 0.4;
+const MIN_CHUNK_RATIO = 0.15;
+const SAFETY_MARGIN = 1.2; // 20% buffer for estimateTokens() inaccuracy
 const FALLBACK_SUMMARY =
  "Summary unavailable due to context limits. Older messages were truncated.";
 const TURN_PREFIX_INSTRUCTIONS =
@@ -160,6 +162,38 @@ function chunkMessages(messages: AgentMessage[], maxTokens: number): AgentMessag
  return chunks;
 }

+/**
+ * Compute adaptive chunk ratio based on average message size.
+ * When messages are large, we use smaller chunks to avoid exceeding model limits.
+ */
+function computeAdaptiveChunkRatio(messages: AgentMessage[], contextWindow: number): number {
+  if (messages.length === 0) return BASE_CHUNK_RATIO;
+
+  const totalTokens = messages.reduce((sum, m) => sum + estimateTokens(m), 0);
+  const avgTokens = totalTokens / messages.length;
+
+  // Apply safety margin to account for estimation inaccuracy
+  const safeAvgTokens = avgTokens * SAFETY_MARGIN;
+  const avgRatio = safeAvgTokens / contextWindow;
+
+  // If average message is > 10% of context, reduce chunk ratio
+  if (avgRatio > 0.1) {
+    const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO);
+    return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction);
+  }
+
+  return BASE_CHUNK_RATIO;
+}
+
+/**
+ * Check if a single message is too large to summarize.
+ * If single message > 50% of context, it can't be summarized safely.
+ */
+function isOversizedForSummary(msg: AgentMessage, contextWindow: number): boolean {
+  const tokens = estimateTokens(msg) * SAFETY_MARGIN;
+  return tokens > contextWindow * 0.5;
+}
+
 async function summarizeChunks(params: {
  messages: AgentMessage[];
  model: NonNullable<ExtensionContext["model"]>;
@@ -192,6 +226,78 @@ async function summarizeChunks(params: {
  return summary ?? "No prior history.";
 }

+/**
+ * Summarize with progressive fallback for handling oversized messages.
+ * If full summarization fails, tries partial summarization excluding oversized messages.
+ */
+async function summarizeWithFallback(params: {
+  messages: AgentMessage[];
+  model: NonNullable<ExtensionContext["model"]>;
+  apiKey: string;
+  signal: AbortSignal;
+  reserveTokens: number;
+  maxChunkTokens: number;
+  contextWindow: number;
+  customInstructions?: string;
+  previousSummary?: string;
+}): Promise<string> {
+  const { messages, contextWindow } = params;
+
+  if (messages.length === 0) {
+    return params.previousSummary ?? "No prior history.";
+  }
+
+  // Try full summarization first
+  try {
+    return await summarizeChunks(params);
+  } catch (fullError) {
+    console.warn(
+      `Full summarization failed, trying partial: ${
+        fullError instanceof Error ? fullError.message : String(fullError)
+      }`,
+    );
+  }
+
+  // Fallback 1: Summarize only small messages, note oversized ones
+  const smallMessages: AgentMessage[] = [];
+  const oversizedNotes: string[] = [];
+
+  for (const msg of messages) {
+    if (isOversizedForSummary(msg, contextWindow)) {
+      const role = (msg as { role?: string }).role ?? "message";
+      const tokens = estimateTokens(msg);
+      oversizedNotes.push(
+        `[Large ${role} (~${Math.round(tokens / 1000)}K tokens) omitted from summary]`,
+      );
+    } else {
+      smallMessages.push(msg);
+    }
+  }
+
+  if (smallMessages.length > 0) {
+    try {
+      const partialSummary = await summarizeChunks({
+        ...params,
+        messages: smallMessages,
+      });
+      const notes = oversizedNotes.length > 0 ? `\n\n${oversizedNotes.join("\n")}` : "";
+      return partialSummary + notes;
+    } catch (partialError) {
+      console.warn(
+        `Partial summarization also failed: ${
+          partialError instanceof Error ? partialError.message : String(partialError)
+        }`,
+      );
+    }
+  }
+
+  // Final fallback: Just note what was there
+  return (
+    `Context contained ${messages.length} messages (${oversizedNotes.length} oversized). ` +
+    `Summary unavailable due to size limits.`
+  );
+}
+
 export default function compactionSafeguardExtension(api: ExtensionAPI): void {
  api.on("session_before_compact", async (event, ctx) => {
    const { preparation, customInstructions, signal } = event;
@@ -233,29 +339,35 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
        1,
        Math.floor(model.contextWindow ?? DEFAULT_CONTEXT_TOKENS),
      );
-      const maxChunkTokens = Math.max(1, Math.floor(contextWindowTokens * MAX_CHUNK_RATIO));
+
+      // Use adaptive chunk ratio based on message sizes
+      const allMessages = [...preparation.messagesToSummarize, ...preparation.turnPrefixMessages];
+      const adaptiveRatio = computeAdaptiveChunkRatio(allMessages, contextWindowTokens);
+      const maxChunkTokens = Math.max(1, Math.floor(contextWindowTokens * adaptiveRatio));
      const reserveTokens = Math.max(1, Math.floor(preparation.settings.reserveTokens));

-      const historySummary = await summarizeChunks({
+      const historySummary = await summarizeWithFallback({
        messages: preparation.messagesToSummarize,
        model,
        apiKey,
        signal,
        reserveTokens,
        maxChunkTokens,
+        contextWindow: contextWindowTokens,
        customInstructions,
        previousSummary: preparation.previousSummary,
      });

      let summary = historySummary;
      if (preparation.isSplitTurn && preparation.turnPrefixMessages.length > 0) {
-        const prefixSummary = await summarizeChunks({
+        const prefixSummary = await summarizeWithFallback({
          messages: preparation.turnPrefixMessages,
          model,
          apiKey,
          signal,
          reserveTokens,
          maxChunkTokens,
+          contextWindow: contextWindowTokens,
          customInstructions: TURN_PREFIX_INSTRUCTIONS,
        });
        summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
@@ -293,4 +405,9 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
 export const __testing = {
  collectToolFailures,
  formatToolFailuresSection,
+  computeAdaptiveChunkRatio,
+  isOversizedForSummary,
+  BASE_CHUNK_RATIO,
+  MIN_CHUNK_RATIO,
+  SAFETY_MARGIN,
 } as const;
--- a/src/gateway/assistant-identity.ts
+++ b/src/gateway/assistant-identity.ts
@@ -7,7 +7,8 @@ import { normalizeAgentId } from "../routing/session-key.js";
 const MAX_ASSISTANT_NAME = 50;
 const MAX_ASSISTANT_AVATAR = 200;

-export const DEFAULT_ASSISTANT_IDENTITY = {
+export const DEFAULT_ASSISTANT_IDENTITY: AssistantIdentity = {
+  agentId: "main",
  name: "Assistant",
  avatar: "A",
 };