feat: add configurable bootstrap truncation

2026-01-13 04:24:17 +00:00
parent ea5597b483
commit 755a7e1b20
12 changed files with 154 additions and 24 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ### Changes
 - Models/Moonshot: add Kimi K2 0905 + turbo/thinking variants to the preset + docs. (#818 — thanks @mickahouan)
 - Memory: allow custom OpenAI-compatible embedding endpoints for memory search (remote baseUrl/apiKey/headers). (#819 — thanks @mukhtharcm)
 - Agents: make workspace bootstrap truncation configurable (default 20k) and warn when files are truncated.
 ### Fixes
 - Typing: keep typing indicators alive during tool execution. (#450, #447 — thanks @thewilloftheshadow)
--- a/docs/concepts/agent-workspace.md
+++ b/docs/concepts/agent-workspace.md
@@ -109,8 +109,10 @@ See [Memory](/concepts/memory) for the workflow and automatic memory flush.
  - Canvas UI files for node displays (for example `canvas/index.html`).
 If any bootstrap file is missing, Clawdbot injects a "missing file" marker into
-the session and continues. `clawdbot setup` can recreate missing defaults
+the session and continues. Large bootstrap files are truncated when injected;
-without overwriting existing files.
+adjust the limit with `agents.defaults.bootstrapMaxChars` (default: 20000).
 `clawdbot setup` can recreate missing defaults without overwriting existing
 files.
 ## What is NOT in the workspace
--- a/docs/concepts/system-prompt.md
+++ b/docs/concepts/system-prompt.md
@@ -38,7 +38,9 @@ Bootstrap files are trimmed and appended under **Project Context** so the model
 - `HEARTBEAT.md`
 - `BOOTSTRAP.md` (only on brand-new workspaces)
-Large files are truncated with a marker. Missing files inject a short missing-file marker.
+Large files are truncated with a marker. The max per-file size is controlled by
 `agents.defaults.bootstrapMaxChars` (default: 20000). Missing files inject a
 short missing-file marker.
 ## Time handling
--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@@ -1075,6 +1075,20 @@ Use this for pre-seeded deployments where your workspace files come from a repo.
 }
 ```
 ### `agents.defaults.bootstrapMaxChars`
 Max characters of each workspace bootstrap file injected into the system prompt
 before truncation. Default: `20000`.
 When a file exceeds this limit, Clawdbot logs a warning and injects a truncated
 head/tail with a marker.
 ```json5
 {
  agents: { defaults: { bootstrapMaxChars: 20000 } }
 }
 ```
 ### `agents.defaults.userTimezone`
 Sets the user’s timezone for **system prompt context** (not for timestamps in
--- a/docs/token-use.md
+++ b/docs/token-use.md
@@ -16,7 +16,7 @@ Clawdbot assembles its own system prompt on every run. It includes:
 - Tool list + short descriptions
 - Skills list (only metadata; instructions are loaded on demand with `read`)
 - Self-update instructions
- Workspace + bootstrap files (`AGENTS.md`, `SOUL.md`, `TOOLS.md`, `IDENTITY.md`, `USER.md`, `HEARTBEAT.md`, `BOOTSTRAP.md` when new)
+- Workspace + bootstrap files (`AGENTS.md`, `SOUL.md`, `TOOLS.md`, `IDENTITY.md`, `USER.md`, `HEARTBEAT.md`, `BOOTSTRAP.md` when new). Large files are truncated by `agents.defaults.bootstrapMaxChars` (default: 20000).
 - Time (UTC + user timezone)
 - Reply tags + heartbeat behavior
 - Runtime metadata (host/OS/model/thinking)
--- a/src/agents/cli-runner.ts
+++ b/src/agents/cli-runner.ts
@@ -21,6 +21,7 @@ import {
  classifyFailoverReason,
  type EmbeddedContextFile,
  isFailoverErrorMessage,
  resolveBootstrapMaxChars,
 } from "./pi-embedded-helpers.js";
 import type { EmbeddedPiRunResult } from "./pi-embedded-runner.js";
 import { buildAgentSystemPrompt } from "./system-prompt.js";
@@ -493,7 +494,11 @@ export async function runCliAgent(params: {
    await loadWorkspaceBootstrapFiles(workspaceDir),
    params.sessionKey ?? params.sessionId,
  );
-  const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
+  const sessionLabel = params.sessionKey ?? params.sessionId;
  const contextFiles = buildBootstrapContextFiles(bootstrapFiles, {
    maxChars: resolveBootstrapMaxChars(params.config),
    warn: (message) => log.warn(`${message} (sessionKey=${sessionLabel})`),
  });
  const { defaultAgentId, sessionAgentId } = resolveSessionAgentIds({
    sessionKey: params.sessionKey,
    config: params.config,
--- a/src/agents/pi-embedded-helpers.test.ts
+++ b/src/agents/pi-embedded-helpers.test.ts
@@ -1,9 +1,11 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import type { AssistantMessage } from "@mariozechner/pi-ai";
 import { describe, expect, it } from "vitest";
 import type { ClawdbotConfig } from "../config/config.js";
 import {
  buildBootstrapContextFiles,
  classifyFailoverReason,
  DEFAULT_BOOTSTRAP_MAX_CHARS,
  formatAssistantErrorText,
  isAuthErrorMessage,
  isBillingErrorMessage,
@@ -13,6 +15,7 @@ import {
  isMessagingToolDuplicate,
  isFailoverErrorMessage,
  normalizeTextForComparison,
  resolveBootstrapMaxChars,
  sanitizeGoogleTurnOrdering,
  sanitizeSessionMessagesImages,
  sanitizeToolCallId,
@@ -49,17 +52,58 @@ describe("buildBootstrapContextFiles", () => {
  });
  it("truncates large bootstrap content", () => {
-    const head = `HEAD-${"a".repeat(6000)}`;
+    const head = `HEAD-${"a".repeat(600)}`;
-    const tail = `${"b".repeat(3000)}-TAIL`;
+    const tail = `${"b".repeat(300)}-TAIL`;
    const long = `${head}${tail}`;
-    const files = [makeFile({ content: long })];
+    const files = [makeFile({ name: "TOOLS.md", content: long })];
-    const [result] = buildBootstrapContextFiles(files);
+    const warnings: string[] = [];
    const maxChars = 200;
    const expectedTailChars = Math.floor(maxChars * 0.2);
    const [result] = buildBootstrapContextFiles(files, {
      maxChars,
      warn: (message) => warnings.push(message),
    });
    expect(result?.content).toContain(
-      "[...truncated, read AGENTS.md for full content...]",
+      "[...truncated, read TOOLS.md for full content...]",
    );
    expect(result?.content.length).toBeLessThan(long.length);
    expect(result?.content.startsWith(long.slice(0, 120))).toBe(true);
-    expect(result?.content.endsWith(long.slice(-120))).toBe(true);
+    expect(result?.content.endsWith(long.slice(-expectedTailChars))).toBe(
      true,
    );
    expect(warnings).toHaveLength(1);
    expect(warnings[0]).toContain("TOOLS.md");
    expect(warnings[0]).toContain("limit 200");
  });
  it("keeps content under the default limit", () => {
    const long = "a".repeat(DEFAULT_BOOTSTRAP_MAX_CHARS - 10);
    const files = [makeFile({ content: long })];
    const [result] = buildBootstrapContextFiles(files);
    expect(result?.content).toBe(long);
    expect(result?.content).not.toContain(
      "[...truncated, read AGENTS.md for full content...]",
    );
  });
 });
 describe("resolveBootstrapMaxChars", () => {
  it("returns default when unset", () => {
    expect(resolveBootstrapMaxChars()).toBe(DEFAULT_BOOTSTRAP_MAX_CHARS);
  });
  it("uses configured value when valid", () => {
    const cfg = {
      agents: { defaults: { bootstrapMaxChars: 12345 } },
    } as ClawdbotConfig;
    expect(resolveBootstrapMaxChars(cfg)).toBe(12345);
  });
  it("falls back when invalid", () => {
    const cfg = {
      agents: { defaults: { bootstrapMaxChars: -1 } },
    } as ClawdbotConfig;
    expect(resolveBootstrapMaxChars(cfg)).toBe(DEFAULT_BOOTSTRAP_MAX_CHARS);
  });
 });
--- a/src/agents/pi-embedded-helpers.ts
+++ b/src/agents/pi-embedded-helpers.ts
@@ -53,23 +53,57 @@ export function stripThoughtSignatures<T>(content: T): T {
  }) as T;
 }
-const MAX_BOOTSTRAP_CHARS = 4000;
+export const DEFAULT_BOOTSTRAP_MAX_CHARS = 20_000;
-const BOOTSTRAP_HEAD_CHARS = 2800;
+const BOOTSTRAP_HEAD_RATIO = 0.7;
-const BOOTSTRAP_TAIL_CHARS = 800;
+const BOOTSTRAP_TAIL_RATIO = 0.2;
-function trimBootstrapContent(content: string, fileName: string): string {
+type TrimBootstrapResult = {
  content: string;
  truncated: boolean;
  maxChars: number;
  originalLength: number;
 };
 export function resolveBootstrapMaxChars(cfg?: ClawdbotConfig): number {
  const raw = cfg?.agents?.defaults?.bootstrapMaxChars;
  if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) {
    return Math.floor(raw);
  }
  return DEFAULT_BOOTSTRAP_MAX_CHARS;
 }
 function trimBootstrapContent(
  content: string,
  fileName: string,
  maxChars: number,
 ): TrimBootstrapResult {
  const trimmed = content.trimEnd();
-  if (trimmed.length <= MAX_BOOTSTRAP_CHARS) return trimmed;
+  if (trimmed.length <= maxChars) {
    return {
      content: trimmed,
      truncated: false,
      maxChars,
      originalLength: trimmed.length,
    };
  }
-  const head = trimmed.slice(0, BOOTSTRAP_HEAD_CHARS);
+  const headChars = Math.max(1, Math.floor(maxChars * BOOTSTRAP_HEAD_RATIO));
-  const tail = trimmed.slice(-BOOTSTRAP_TAIL_CHARS);
+  const tailChars = Math.max(1, Math.floor(maxChars * BOOTSTRAP_TAIL_RATIO));
-  return [
+  const head = trimmed.slice(0, headChars);
  const tail = trimmed.slice(-tailChars);
  const contentWithMarker = [
    head,
    "",
    `[...truncated, read ${fileName} for full content...]`,
    "",
    tail,
  ].join("\n");
  return {
    content: contentWithMarker,
    truncated: true,
    maxChars,
    originalLength: trimmed.length,
  };
 }
 export async function ensureSessionHeader(params: {
@@ -254,7 +288,9 @@ export function sanitizeGoogleTurnOrdering(
 export function buildBootstrapContextFiles(
  files: WorkspaceBootstrapFile[],
  opts?: { warn?: (message: string) => void; maxChars?: number },
 ): EmbeddedContextFile[] {
  const maxChars = opts?.maxChars ?? DEFAULT_BOOTSTRAP_MAX_CHARS;
  const result: EmbeddedContextFile[] = [];
  for (const file of files) {
    if (file.missing) {
@@ -264,11 +300,20 @@ export function buildBootstrapContextFiles(
      });
      continue;
    }
-    const trimmed = trimBootstrapContent(file.content ?? "", file.name);
+    const trimmed = trimBootstrapContent(
-    if (!trimmed) continue;
+      file.content ?? "",
      file.name,
      maxChars,
    );
    if (!trimmed.content) continue;
    if (trimmed.truncated) {
      opts?.warn?.(
        `workspace bootstrap file ${file.name} is ${trimmed.originalLength} chars (limit ${trimmed.maxChars}); truncating in injected context`,
      );
    }
    result.push({
      path: file.name,
-      content: trimmed,
+      content: trimmed.content,
    });
  }
  return result;
--- a/src/agents/pi-embedded-runner.ts
+++ b/src/agents/pi-embedded-runner.ts
@@ -99,6 +99,7 @@ import {
  isRateLimitAssistantError,
  isTimeoutErrorMessage,
  pickFallbackThinkingLevel,
  resolveBootstrapMaxChars,
  sanitizeGoogleTurnOrdering,
  sanitizeSessionMessagesImages,
  validateAnthropicTurns,
@@ -1152,7 +1153,12 @@ export async function compactEmbeddedPiSession(params: {
          await loadWorkspaceBootstrapFiles(effectiveWorkspace),
          params.sessionKey ?? params.sessionId,
        );
-        const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
+        const sessionLabel = params.sessionKey ?? params.sessionId;
        const contextFiles = buildBootstrapContextFiles(bootstrapFiles, {
          maxChars: resolveBootstrapMaxChars(params.config),
          warn: (message) =>
            log.warn(`${message} (sessionKey=${sessionLabel})`),
        });
        const runAbortController = new AbortController();
        const tools = createClawdbotCodingTools({
          exec: {
@@ -1584,7 +1590,12 @@ export async function runEmbeddedPiAgent(params: {
            await loadWorkspaceBootstrapFiles(effectiveWorkspace),
            params.sessionKey ?? params.sessionId,
          );
-          const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
+          const sessionLabel = params.sessionKey ?? params.sessionId;
          const contextFiles = buildBootstrapContextFiles(bootstrapFiles, {
            maxChars: resolveBootstrapMaxChars(params.config),
            warn: (message) =>
              log.warn(`${message} (sessionKey=${sessionLabel})`),
          });
          // Tool schemas must be provider-compatible (OpenAI requires top-level `type: "object"`).
          // `createClawdbotCodingTools()` normalizes schemas so the session can pass them through unchanged.
          const tools = createClawdbotCodingTools({
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -115,6 +115,7 @@ const FIELD_LABELS: Record<string, string> = {
  "gateway.reload.mode": "Config Reload Mode",
  "gateway.reload.debounceMs": "Config Reload Debounce (ms)",
  "agents.defaults.workspace": "Workspace",
  "agents.defaults.bootstrapMaxChars": "Bootstrap Max Chars",
  "agents.defaults.memorySearch": "Memory Search",
  "agents.defaults.memorySearch.enabled": "Enable Memory Search",
  "agents.defaults.memorySearch.provider": "Memory Search Provider",
@@ -233,6 +234,8 @@ const FIELD_HELP: Record<string, string> = {
    "Cap (hours) for billing backoff (default: 24).",
  "auth.cooldowns.failureWindowHours":
    "Failure window (hours) for backoff counters (default: 24).",
  "agents.defaults.bootstrapMaxChars":
    "Max characters of each workspace bootstrap file injected into the system prompt before truncation (default: 20000).",
  "agents.defaults.models":
    "Configured model catalog (keys are full provider/model IDs).",
  "agents.defaults.memorySearch":
--- a/src/config/types.ts
+++ b/src/config/types.ts
@@ -1593,6 +1593,8 @@ export type AgentDefaultsConfig = {
  workspace?: string;
  /** Skip bootstrap (BOOTSTRAP.md creation, etc.) for pre-configured deployments. */
  skipBootstrap?: boolean;
  /** Max chars for injected bootstrap files before truncation (default: 20000). */
  bootstrapMaxChars?: number;
  /** Optional IANA timezone for the user (used in system prompt; defaults to host timezone). */
  userTimezone?: string;
  /** Optional display-only context window override (used for % in status UIs). */
--- a/src/config/zod-schema.ts
+++ b/src/config/zod-schema.ts
@@ -1169,6 +1169,7 @@ const AgentDefaultsSchema = z
      .optional(),
    workspace: z.string().optional(),
    skipBootstrap: z.boolean().optional(),
    bootstrapMaxChars: z.number().int().positive().optional(),
    userTimezone: z.string().optional(),
    contextTokens: z.number().int().positive().optional(),
    cliBackends: z.record(z.string(), CliBackendSchema).optional(),