feat: add configurable bootstrap truncation

This commit is contained in:
Peter Steinberger
2026-01-13 04:24:17 +00:00
parent ea5597b483
commit 755a7e1b20
12 changed files with 154 additions and 24 deletions

View File

@@ -5,6 +5,7 @@
### Changes ### Changes
- Models/Moonshot: add Kimi K2 0905 + turbo/thinking variants to the preset + docs. (#818 — thanks @mickahouan) - Models/Moonshot: add Kimi K2 0905 + turbo/thinking variants to the preset + docs. (#818 — thanks @mickahouan)
- Memory: allow custom OpenAI-compatible embedding endpoints for memory search (remote baseUrl/apiKey/headers). (#819 — thanks @mukhtharcm) - Memory: allow custom OpenAI-compatible embedding endpoints for memory search (remote baseUrl/apiKey/headers). (#819 — thanks @mukhtharcm)
- Agents: make workspace bootstrap truncation configurable (default 20k) and warn when files are truncated.
### Fixes ### Fixes
- Typing: keep typing indicators alive during tool execution. (#450, #447 — thanks @thewilloftheshadow) - Typing: keep typing indicators alive during tool execution. (#450, #447 — thanks @thewilloftheshadow)

View File

@@ -109,8 +109,10 @@ See [Memory](/concepts/memory) for the workflow and automatic memory flush.
- Canvas UI files for node displays (for example `canvas/index.html`). - Canvas UI files for node displays (for example `canvas/index.html`).
If any bootstrap file is missing, Clawdbot injects a "missing file" marker into If any bootstrap file is missing, Clawdbot injects a "missing file" marker into
the session and continues. `clawdbot setup` can recreate missing defaults the session and continues. Large bootstrap files are truncated when injected;
without overwriting existing files. adjust the limit with `agents.defaults.bootstrapMaxChars` (default: 20000).
`clawdbot setup` can recreate missing defaults without overwriting existing
files.
## What is NOT in the workspace ## What is NOT in the workspace

View File

@@ -38,7 +38,9 @@ Bootstrap files are trimmed and appended under **Project Context** so the model
- `HEARTBEAT.md` - `HEARTBEAT.md`
- `BOOTSTRAP.md` (only on brand-new workspaces) - `BOOTSTRAP.md` (only on brand-new workspaces)
Large files are truncated with a marker. Missing files inject a short missing-file marker. Large files are truncated with a marker. The max per-file size is controlled by
`agents.defaults.bootstrapMaxChars` (default: 20000). Missing files inject a
short missing-file marker.
## Time handling ## Time handling

View File

@@ -1075,6 +1075,20 @@ Use this for pre-seeded deployments where your workspace files come from a repo.
} }
``` ```
### `agents.defaults.bootstrapMaxChars`
Max characters of each workspace bootstrap file injected into the system prompt
before truncation. Default: `20000`.
When a file exceeds this limit, Clawdbot logs a warning and injects a truncated
head/tail with a marker.
```json5
{
agents: { defaults: { bootstrapMaxChars: 20000 } }
}
```
### `agents.defaults.userTimezone` ### `agents.defaults.userTimezone`
Sets the users timezone for **system prompt context** (not for timestamps in Sets the users timezone for **system prompt context** (not for timestamps in

View File

@@ -16,7 +16,7 @@ Clawdbot assembles its own system prompt on every run. It includes:
- Tool list + short descriptions - Tool list + short descriptions
- Skills list (only metadata; instructions are loaded on demand with `read`) - Skills list (only metadata; instructions are loaded on demand with `read`)
- Self-update instructions - Self-update instructions
- Workspace + bootstrap files (`AGENTS.md`, `SOUL.md`, `TOOLS.md`, `IDENTITY.md`, `USER.md`, `HEARTBEAT.md`, `BOOTSTRAP.md` when new) - Workspace + bootstrap files (`AGENTS.md`, `SOUL.md`, `TOOLS.md`, `IDENTITY.md`, `USER.md`, `HEARTBEAT.md`, `BOOTSTRAP.md` when new). Large files are truncated by `agents.defaults.bootstrapMaxChars` (default: 20000).
- Time (UTC + user timezone) - Time (UTC + user timezone)
- Reply tags + heartbeat behavior - Reply tags + heartbeat behavior
- Runtime metadata (host/OS/model/thinking) - Runtime metadata (host/OS/model/thinking)

View File

@@ -21,6 +21,7 @@ import {
classifyFailoverReason, classifyFailoverReason,
type EmbeddedContextFile, type EmbeddedContextFile,
isFailoverErrorMessage, isFailoverErrorMessage,
resolveBootstrapMaxChars,
} from "./pi-embedded-helpers.js"; } from "./pi-embedded-helpers.js";
import type { EmbeddedPiRunResult } from "./pi-embedded-runner.js"; import type { EmbeddedPiRunResult } from "./pi-embedded-runner.js";
import { buildAgentSystemPrompt } from "./system-prompt.js"; import { buildAgentSystemPrompt } from "./system-prompt.js";
@@ -493,7 +494,11 @@ export async function runCliAgent(params: {
await loadWorkspaceBootstrapFiles(workspaceDir), await loadWorkspaceBootstrapFiles(workspaceDir),
params.sessionKey ?? params.sessionId, params.sessionKey ?? params.sessionId,
); );
const contextFiles = buildBootstrapContextFiles(bootstrapFiles); const sessionLabel = params.sessionKey ?? params.sessionId;
const contextFiles = buildBootstrapContextFiles(bootstrapFiles, {
maxChars: resolveBootstrapMaxChars(params.config),
warn: (message) => log.warn(`${message} (sessionKey=${sessionLabel})`),
});
const { defaultAgentId, sessionAgentId } = resolveSessionAgentIds({ const { defaultAgentId, sessionAgentId } = resolveSessionAgentIds({
sessionKey: params.sessionKey, sessionKey: params.sessionKey,
config: params.config, config: params.config,

View File

@@ -1,9 +1,11 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage } from "@mariozechner/pi-ai"; import type { AssistantMessage } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest"; import { describe, expect, it } from "vitest";
import type { ClawdbotConfig } from "../config/config.js";
import { import {
buildBootstrapContextFiles, buildBootstrapContextFiles,
classifyFailoverReason, classifyFailoverReason,
DEFAULT_BOOTSTRAP_MAX_CHARS,
formatAssistantErrorText, formatAssistantErrorText,
isAuthErrorMessage, isAuthErrorMessage,
isBillingErrorMessage, isBillingErrorMessage,
@@ -13,6 +15,7 @@ import {
isMessagingToolDuplicate, isMessagingToolDuplicate,
isFailoverErrorMessage, isFailoverErrorMessage,
normalizeTextForComparison, normalizeTextForComparison,
resolveBootstrapMaxChars,
sanitizeGoogleTurnOrdering, sanitizeGoogleTurnOrdering,
sanitizeSessionMessagesImages, sanitizeSessionMessagesImages,
sanitizeToolCallId, sanitizeToolCallId,
@@ -49,17 +52,58 @@ describe("buildBootstrapContextFiles", () => {
}); });
it("truncates large bootstrap content", () => { it("truncates large bootstrap content", () => {
const head = `HEAD-${"a".repeat(6000)}`; const head = `HEAD-${"a".repeat(600)}`;
const tail = `${"b".repeat(3000)}-TAIL`; const tail = `${"b".repeat(300)}-TAIL`;
const long = `${head}${tail}`; const long = `${head}${tail}`;
const files = [makeFile({ content: long })]; const files = [makeFile({ name: "TOOLS.md", content: long })];
const [result] = buildBootstrapContextFiles(files); const warnings: string[] = [];
const maxChars = 200;
const expectedTailChars = Math.floor(maxChars * 0.2);
const [result] = buildBootstrapContextFiles(files, {
maxChars,
warn: (message) => warnings.push(message),
});
expect(result?.content).toContain( expect(result?.content).toContain(
"[...truncated, read AGENTS.md for full content...]", "[...truncated, read TOOLS.md for full content...]",
); );
expect(result?.content.length).toBeLessThan(long.length); expect(result?.content.length).toBeLessThan(long.length);
expect(result?.content.startsWith(long.slice(0, 120))).toBe(true); expect(result?.content.startsWith(long.slice(0, 120))).toBe(true);
expect(result?.content.endsWith(long.slice(-120))).toBe(true); expect(result?.content.endsWith(long.slice(-expectedTailChars))).toBe(
true,
);
expect(warnings).toHaveLength(1);
expect(warnings[0]).toContain("TOOLS.md");
expect(warnings[0]).toContain("limit 200");
});
it("keeps content under the default limit", () => {
const long = "a".repeat(DEFAULT_BOOTSTRAP_MAX_CHARS - 10);
const files = [makeFile({ content: long })];
const [result] = buildBootstrapContextFiles(files);
expect(result?.content).toBe(long);
expect(result?.content).not.toContain(
"[...truncated, read AGENTS.md for full content...]",
);
});
});
describe("resolveBootstrapMaxChars", () => {
it("returns default when unset", () => {
expect(resolveBootstrapMaxChars()).toBe(DEFAULT_BOOTSTRAP_MAX_CHARS);
});
it("uses configured value when valid", () => {
const cfg = {
agents: { defaults: { bootstrapMaxChars: 12345 } },
} as ClawdbotConfig;
expect(resolveBootstrapMaxChars(cfg)).toBe(12345);
});
it("falls back when invalid", () => {
const cfg = {
agents: { defaults: { bootstrapMaxChars: -1 } },
} as ClawdbotConfig;
expect(resolveBootstrapMaxChars(cfg)).toBe(DEFAULT_BOOTSTRAP_MAX_CHARS);
}); });
}); });

View File

@@ -53,23 +53,57 @@ export function stripThoughtSignatures<T>(content: T): T {
}) as T; }) as T;
} }
const MAX_BOOTSTRAP_CHARS = 4000; export const DEFAULT_BOOTSTRAP_MAX_CHARS = 20_000;
const BOOTSTRAP_HEAD_CHARS = 2800; const BOOTSTRAP_HEAD_RATIO = 0.7;
const BOOTSTRAP_TAIL_CHARS = 800; const BOOTSTRAP_TAIL_RATIO = 0.2;
function trimBootstrapContent(content: string, fileName: string): string { type TrimBootstrapResult = {
content: string;
truncated: boolean;
maxChars: number;
originalLength: number;
};
export function resolveBootstrapMaxChars(cfg?: ClawdbotConfig): number {
const raw = cfg?.agents?.defaults?.bootstrapMaxChars;
if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) {
return Math.floor(raw);
}
return DEFAULT_BOOTSTRAP_MAX_CHARS;
}
function trimBootstrapContent(
content: string,
fileName: string,
maxChars: number,
): TrimBootstrapResult {
const trimmed = content.trimEnd(); const trimmed = content.trimEnd();
if (trimmed.length <= MAX_BOOTSTRAP_CHARS) return trimmed; if (trimmed.length <= maxChars) {
return {
content: trimmed,
truncated: false,
maxChars,
originalLength: trimmed.length,
};
}
const head = trimmed.slice(0, BOOTSTRAP_HEAD_CHARS); const headChars = Math.max(1, Math.floor(maxChars * BOOTSTRAP_HEAD_RATIO));
const tail = trimmed.slice(-BOOTSTRAP_TAIL_CHARS); const tailChars = Math.max(1, Math.floor(maxChars * BOOTSTRAP_TAIL_RATIO));
return [ const head = trimmed.slice(0, headChars);
const tail = trimmed.slice(-tailChars);
const contentWithMarker = [
head, head,
"", "",
`[...truncated, read ${fileName} for full content...]`, `[...truncated, read ${fileName} for full content...]`,
"", "",
tail, tail,
].join("\n"); ].join("\n");
return {
content: contentWithMarker,
truncated: true,
maxChars,
originalLength: trimmed.length,
};
} }
export async function ensureSessionHeader(params: { export async function ensureSessionHeader(params: {
@@ -254,7 +288,9 @@ export function sanitizeGoogleTurnOrdering(
export function buildBootstrapContextFiles( export function buildBootstrapContextFiles(
files: WorkspaceBootstrapFile[], files: WorkspaceBootstrapFile[],
opts?: { warn?: (message: string) => void; maxChars?: number },
): EmbeddedContextFile[] { ): EmbeddedContextFile[] {
const maxChars = opts?.maxChars ?? DEFAULT_BOOTSTRAP_MAX_CHARS;
const result: EmbeddedContextFile[] = []; const result: EmbeddedContextFile[] = [];
for (const file of files) { for (const file of files) {
if (file.missing) { if (file.missing) {
@@ -264,11 +300,20 @@ export function buildBootstrapContextFiles(
}); });
continue; continue;
} }
const trimmed = trimBootstrapContent(file.content ?? "", file.name); const trimmed = trimBootstrapContent(
if (!trimmed) continue; file.content ?? "",
file.name,
maxChars,
);
if (!trimmed.content) continue;
if (trimmed.truncated) {
opts?.warn?.(
`workspace bootstrap file ${file.name} is ${trimmed.originalLength} chars (limit ${trimmed.maxChars}); truncating in injected context`,
);
}
result.push({ result.push({
path: file.name, path: file.name,
content: trimmed, content: trimmed.content,
}); });
} }
return result; return result;

View File

@@ -99,6 +99,7 @@ import {
isRateLimitAssistantError, isRateLimitAssistantError,
isTimeoutErrorMessage, isTimeoutErrorMessage,
pickFallbackThinkingLevel, pickFallbackThinkingLevel,
resolveBootstrapMaxChars,
sanitizeGoogleTurnOrdering, sanitizeGoogleTurnOrdering,
sanitizeSessionMessagesImages, sanitizeSessionMessagesImages,
validateAnthropicTurns, validateAnthropicTurns,
@@ -1152,7 +1153,12 @@ export async function compactEmbeddedPiSession(params: {
await loadWorkspaceBootstrapFiles(effectiveWorkspace), await loadWorkspaceBootstrapFiles(effectiveWorkspace),
params.sessionKey ?? params.sessionId, params.sessionKey ?? params.sessionId,
); );
const contextFiles = buildBootstrapContextFiles(bootstrapFiles); const sessionLabel = params.sessionKey ?? params.sessionId;
const contextFiles = buildBootstrapContextFiles(bootstrapFiles, {
maxChars: resolveBootstrapMaxChars(params.config),
warn: (message) =>
log.warn(`${message} (sessionKey=${sessionLabel})`),
});
const runAbortController = new AbortController(); const runAbortController = new AbortController();
const tools = createClawdbotCodingTools({ const tools = createClawdbotCodingTools({
exec: { exec: {
@@ -1584,7 +1590,12 @@ export async function runEmbeddedPiAgent(params: {
await loadWorkspaceBootstrapFiles(effectiveWorkspace), await loadWorkspaceBootstrapFiles(effectiveWorkspace),
params.sessionKey ?? params.sessionId, params.sessionKey ?? params.sessionId,
); );
const contextFiles = buildBootstrapContextFiles(bootstrapFiles); const sessionLabel = params.sessionKey ?? params.sessionId;
const contextFiles = buildBootstrapContextFiles(bootstrapFiles, {
maxChars: resolveBootstrapMaxChars(params.config),
warn: (message) =>
log.warn(`${message} (sessionKey=${sessionLabel})`),
});
// Tool schemas must be provider-compatible (OpenAI requires top-level `type: "object"`). // Tool schemas must be provider-compatible (OpenAI requires top-level `type: "object"`).
// `createClawdbotCodingTools()` normalizes schemas so the session can pass them through unchanged. // `createClawdbotCodingTools()` normalizes schemas so the session can pass them through unchanged.
const tools = createClawdbotCodingTools({ const tools = createClawdbotCodingTools({

View File

@@ -115,6 +115,7 @@ const FIELD_LABELS: Record<string, string> = {
"gateway.reload.mode": "Config Reload Mode", "gateway.reload.mode": "Config Reload Mode",
"gateway.reload.debounceMs": "Config Reload Debounce (ms)", "gateway.reload.debounceMs": "Config Reload Debounce (ms)",
"agents.defaults.workspace": "Workspace", "agents.defaults.workspace": "Workspace",
"agents.defaults.bootstrapMaxChars": "Bootstrap Max Chars",
"agents.defaults.memorySearch": "Memory Search", "agents.defaults.memorySearch": "Memory Search",
"agents.defaults.memorySearch.enabled": "Enable Memory Search", "agents.defaults.memorySearch.enabled": "Enable Memory Search",
"agents.defaults.memorySearch.provider": "Memory Search Provider", "agents.defaults.memorySearch.provider": "Memory Search Provider",
@@ -233,6 +234,8 @@ const FIELD_HELP: Record<string, string> = {
"Cap (hours) for billing backoff (default: 24).", "Cap (hours) for billing backoff (default: 24).",
"auth.cooldowns.failureWindowHours": "auth.cooldowns.failureWindowHours":
"Failure window (hours) for backoff counters (default: 24).", "Failure window (hours) for backoff counters (default: 24).",
"agents.defaults.bootstrapMaxChars":
"Max characters of each workspace bootstrap file injected into the system prompt before truncation (default: 20000).",
"agents.defaults.models": "agents.defaults.models":
"Configured model catalog (keys are full provider/model IDs).", "Configured model catalog (keys are full provider/model IDs).",
"agents.defaults.memorySearch": "agents.defaults.memorySearch":

View File

@@ -1593,6 +1593,8 @@ export type AgentDefaultsConfig = {
workspace?: string; workspace?: string;
/** Skip bootstrap (BOOTSTRAP.md creation, etc.) for pre-configured deployments. */ /** Skip bootstrap (BOOTSTRAP.md creation, etc.) for pre-configured deployments. */
skipBootstrap?: boolean; skipBootstrap?: boolean;
/** Max chars for injected bootstrap files before truncation (default: 20000). */
bootstrapMaxChars?: number;
/** Optional IANA timezone for the user (used in system prompt; defaults to host timezone). */ /** Optional IANA timezone for the user (used in system prompt; defaults to host timezone). */
userTimezone?: string; userTimezone?: string;
/** Optional display-only context window override (used for % in status UIs). */ /** Optional display-only context window override (used for % in status UIs). */

View File

@@ -1169,6 +1169,7 @@ const AgentDefaultsSchema = z
.optional(), .optional(),
workspace: z.string().optional(), workspace: z.string().optional(),
skipBootstrap: z.boolean().optional(), skipBootstrap: z.boolean().optional(),
bootstrapMaxChars: z.number().int().positive().optional(),
userTimezone: z.string().optional(), userTimezone: z.string().optional(),
contextTokens: z.number().int().positive().optional(), contextTokens: z.number().int().positive().optional(),
cliBackends: z.record(z.string(), CliBackendSchema).optional(), cliBackends: z.record(z.string(), CliBackendSchema).optional(),