diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ea3fea46..9be72e8d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Changes - Models/Moonshot: add Kimi K2 0905 + turbo/thinking variants to the preset + docs. (#818 — thanks @mickahouan) - Memory: allow custom OpenAI-compatible embedding endpoints for memory search (remote baseUrl/apiKey/headers). (#819 — thanks @mukhtharcm) +- Agents: make workspace bootstrap truncation configurable (default 20k) and warn when files are truncated. ### Fixes - Typing: keep typing indicators alive during tool execution. (#450, #447 — thanks @thewilloftheshadow) diff --git a/docs/concepts/agent-workspace.md b/docs/concepts/agent-workspace.md index d5f8e36ea..789097bc5 100644 --- a/docs/concepts/agent-workspace.md +++ b/docs/concepts/agent-workspace.md @@ -109,8 +109,10 @@ See [Memory](/concepts/memory) for the workflow and automatic memory flush. - Canvas UI files for node displays (for example `canvas/index.html`). If any bootstrap file is missing, Clawdbot injects a "missing file" marker into -the session and continues. `clawdbot setup` can recreate missing defaults -without overwriting existing files. +the session and continues. Large bootstrap files are truncated when injected; +adjust the limit with `agents.defaults.bootstrapMaxChars` (default: 20000). +`clawdbot setup` can recreate missing defaults without overwriting existing +files. ## What is NOT in the workspace diff --git a/docs/concepts/system-prompt.md b/docs/concepts/system-prompt.md index a81b1583b..75de873c1 100644 --- a/docs/concepts/system-prompt.md +++ b/docs/concepts/system-prompt.md @@ -38,7 +38,9 @@ Bootstrap files are trimmed and appended under **Project Context** so the model - `HEARTBEAT.md` - `BOOTSTRAP.md` (only on brand-new workspaces) -Large files are truncated with a marker. Missing files inject a short missing-file marker. +Large files are truncated with a marker. The max per-file size is controlled by +`agents.defaults.bootstrapMaxChars` (default: 20000). Missing files inject a +short missing-file marker. ## Time handling diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 85a018c2b..2582a737d 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -1075,6 +1075,20 @@ Use this for pre-seeded deployments where your workspace files come from a repo. } ``` +### `agents.defaults.bootstrapMaxChars` + +Max characters of each workspace bootstrap file injected into the system prompt +before truncation. Default: `20000`. + +When a file exceeds this limit, Clawdbot logs a warning and injects a truncated +head/tail with a marker. + +```json5 +{ + agents: { defaults: { bootstrapMaxChars: 20000 } } +} +``` + ### `agents.defaults.userTimezone` Sets the user’s timezone for **system prompt context** (not for timestamps in diff --git a/docs/token-use.md b/docs/token-use.md index d628b1fb6..c75108964 100644 --- a/docs/token-use.md +++ b/docs/token-use.md @@ -16,7 +16,7 @@ Clawdbot assembles its own system prompt on every run. It includes: - Tool list + short descriptions - Skills list (only metadata; instructions are loaded on demand with `read`) - Self-update instructions -- Workspace + bootstrap files (`AGENTS.md`, `SOUL.md`, `TOOLS.md`, `IDENTITY.md`, `USER.md`, `HEARTBEAT.md`, `BOOTSTRAP.md` when new) +- Workspace + bootstrap files (`AGENTS.md`, `SOUL.md`, `TOOLS.md`, `IDENTITY.md`, `USER.md`, `HEARTBEAT.md`, `BOOTSTRAP.md` when new). Large files are truncated by `agents.defaults.bootstrapMaxChars` (default: 20000). - Time (UTC + user timezone) - Reply tags + heartbeat behavior - Runtime metadata (host/OS/model/thinking) diff --git a/src/agents/cli-runner.ts b/src/agents/cli-runner.ts index 9bc7e2528..0706bbeb7 100644 --- a/src/agents/cli-runner.ts +++ b/src/agents/cli-runner.ts @@ -21,6 +21,7 @@ import { classifyFailoverReason, type EmbeddedContextFile, isFailoverErrorMessage, + resolveBootstrapMaxChars, } from "./pi-embedded-helpers.js"; import type { EmbeddedPiRunResult } from "./pi-embedded-runner.js"; import { buildAgentSystemPrompt } from "./system-prompt.js"; @@ -493,7 +494,11 @@ export async function runCliAgent(params: { await loadWorkspaceBootstrapFiles(workspaceDir), params.sessionKey ?? params.sessionId, ); - const contextFiles = buildBootstrapContextFiles(bootstrapFiles); + const sessionLabel = params.sessionKey ?? params.sessionId; + const contextFiles = buildBootstrapContextFiles(bootstrapFiles, { + maxChars: resolveBootstrapMaxChars(params.config), + warn: (message) => log.warn(`${message} (sessionKey=${sessionLabel})`), + }); const { defaultAgentId, sessionAgentId } = resolveSessionAgentIds({ sessionKey: params.sessionKey, config: params.config, diff --git a/src/agents/pi-embedded-helpers.test.ts b/src/agents/pi-embedded-helpers.test.ts index dcb19ed83..74d898c98 100644 --- a/src/agents/pi-embedded-helpers.test.ts +++ b/src/agents/pi-embedded-helpers.test.ts @@ -1,9 +1,11 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { AssistantMessage } from "@mariozechner/pi-ai"; import { describe, expect, it } from "vitest"; +import type { ClawdbotConfig } from "../config/config.js"; import { buildBootstrapContextFiles, classifyFailoverReason, + DEFAULT_BOOTSTRAP_MAX_CHARS, formatAssistantErrorText, isAuthErrorMessage, isBillingErrorMessage, @@ -13,6 +15,7 @@ import { isMessagingToolDuplicate, isFailoverErrorMessage, normalizeTextForComparison, + resolveBootstrapMaxChars, sanitizeGoogleTurnOrdering, sanitizeSessionMessagesImages, sanitizeToolCallId, @@ -49,17 +52,58 @@ describe("buildBootstrapContextFiles", () => { }); it("truncates large bootstrap content", () => { - const head = `HEAD-${"a".repeat(6000)}`; - const tail = `${"b".repeat(3000)}-TAIL`; + const head = `HEAD-${"a".repeat(600)}`; + const tail = `${"b".repeat(300)}-TAIL`; const long = `${head}${tail}`; - const files = [makeFile({ content: long })]; - const [result] = buildBootstrapContextFiles(files); + const files = [makeFile({ name: "TOOLS.md", content: long })]; + const warnings: string[] = []; + const maxChars = 200; + const expectedTailChars = Math.floor(maxChars * 0.2); + const [result] = buildBootstrapContextFiles(files, { + maxChars, + warn: (message) => warnings.push(message), + }); expect(result?.content).toContain( - "[...truncated, read AGENTS.md for full content...]", + "[...truncated, read TOOLS.md for full content...]", ); expect(result?.content.length).toBeLessThan(long.length); expect(result?.content.startsWith(long.slice(0, 120))).toBe(true); - expect(result?.content.endsWith(long.slice(-120))).toBe(true); + expect(result?.content.endsWith(long.slice(-expectedTailChars))).toBe( + true, + ); + expect(warnings).toHaveLength(1); + expect(warnings[0]).toContain("TOOLS.md"); + expect(warnings[0]).toContain("limit 200"); + }); + + it("keeps content under the default limit", () => { + const long = "a".repeat(DEFAULT_BOOTSTRAP_MAX_CHARS - 10); + const files = [makeFile({ content: long })]; + const [result] = buildBootstrapContextFiles(files); + expect(result?.content).toBe(long); + expect(result?.content).not.toContain( + "[...truncated, read AGENTS.md for full content...]", + ); + }); +}); + +describe("resolveBootstrapMaxChars", () => { + it("returns default when unset", () => { + expect(resolveBootstrapMaxChars()).toBe(DEFAULT_BOOTSTRAP_MAX_CHARS); + }); + + it("uses configured value when valid", () => { + const cfg = { + agents: { defaults: { bootstrapMaxChars: 12345 } }, + } as ClawdbotConfig; + expect(resolveBootstrapMaxChars(cfg)).toBe(12345); + }); + + it("falls back when invalid", () => { + const cfg = { + agents: { defaults: { bootstrapMaxChars: -1 } }, + } as ClawdbotConfig; + expect(resolveBootstrapMaxChars(cfg)).toBe(DEFAULT_BOOTSTRAP_MAX_CHARS); }); }); diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 88e1b1447..02a05d4a5 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -53,23 +53,57 @@ export function stripThoughtSignatures(content: T): T { }) as T; } -const MAX_BOOTSTRAP_CHARS = 4000; -const BOOTSTRAP_HEAD_CHARS = 2800; -const BOOTSTRAP_TAIL_CHARS = 800; +export const DEFAULT_BOOTSTRAP_MAX_CHARS = 20_000; +const BOOTSTRAP_HEAD_RATIO = 0.7; +const BOOTSTRAP_TAIL_RATIO = 0.2; -function trimBootstrapContent(content: string, fileName: string): string { +type TrimBootstrapResult = { + content: string; + truncated: boolean; + maxChars: number; + originalLength: number; +}; + +export function resolveBootstrapMaxChars(cfg?: ClawdbotConfig): number { + const raw = cfg?.agents?.defaults?.bootstrapMaxChars; + if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) { + return Math.floor(raw); + } + return DEFAULT_BOOTSTRAP_MAX_CHARS; +} + +function trimBootstrapContent( + content: string, + fileName: string, + maxChars: number, +): TrimBootstrapResult { const trimmed = content.trimEnd(); - if (trimmed.length <= MAX_BOOTSTRAP_CHARS) return trimmed; + if (trimmed.length <= maxChars) { + return { + content: trimmed, + truncated: false, + maxChars, + originalLength: trimmed.length, + }; + } - const head = trimmed.slice(0, BOOTSTRAP_HEAD_CHARS); - const tail = trimmed.slice(-BOOTSTRAP_TAIL_CHARS); - return [ + const headChars = Math.max(1, Math.floor(maxChars * BOOTSTRAP_HEAD_RATIO)); + const tailChars = Math.max(1, Math.floor(maxChars * BOOTSTRAP_TAIL_RATIO)); + const head = trimmed.slice(0, headChars); + const tail = trimmed.slice(-tailChars); + const contentWithMarker = [ head, "", `[...truncated, read ${fileName} for full content...]`, "", tail, ].join("\n"); + return { + content: contentWithMarker, + truncated: true, + maxChars, + originalLength: trimmed.length, + }; } export async function ensureSessionHeader(params: { @@ -254,7 +288,9 @@ export function sanitizeGoogleTurnOrdering( export function buildBootstrapContextFiles( files: WorkspaceBootstrapFile[], + opts?: { warn?: (message: string) => void; maxChars?: number }, ): EmbeddedContextFile[] { + const maxChars = opts?.maxChars ?? DEFAULT_BOOTSTRAP_MAX_CHARS; const result: EmbeddedContextFile[] = []; for (const file of files) { if (file.missing) { @@ -264,11 +300,20 @@ export function buildBootstrapContextFiles( }); continue; } - const trimmed = trimBootstrapContent(file.content ?? "", file.name); - if (!trimmed) continue; + const trimmed = trimBootstrapContent( + file.content ?? "", + file.name, + maxChars, + ); + if (!trimmed.content) continue; + if (trimmed.truncated) { + opts?.warn?.( + `workspace bootstrap file ${file.name} is ${trimmed.originalLength} chars (limit ${trimmed.maxChars}); truncating in injected context`, + ); + } result.push({ path: file.name, - content: trimmed, + content: trimmed.content, }); } return result; diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 51f711e74..521d22212 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -99,6 +99,7 @@ import { isRateLimitAssistantError, isTimeoutErrorMessage, pickFallbackThinkingLevel, + resolveBootstrapMaxChars, sanitizeGoogleTurnOrdering, sanitizeSessionMessagesImages, validateAnthropicTurns, @@ -1152,7 +1153,12 @@ export async function compactEmbeddedPiSession(params: { await loadWorkspaceBootstrapFiles(effectiveWorkspace), params.sessionKey ?? params.sessionId, ); - const contextFiles = buildBootstrapContextFiles(bootstrapFiles); + const sessionLabel = params.sessionKey ?? params.sessionId; + const contextFiles = buildBootstrapContextFiles(bootstrapFiles, { + maxChars: resolveBootstrapMaxChars(params.config), + warn: (message) => + log.warn(`${message} (sessionKey=${sessionLabel})`), + }); const runAbortController = new AbortController(); const tools = createClawdbotCodingTools({ exec: { @@ -1584,7 +1590,12 @@ export async function runEmbeddedPiAgent(params: { await loadWorkspaceBootstrapFiles(effectiveWorkspace), params.sessionKey ?? params.sessionId, ); - const contextFiles = buildBootstrapContextFiles(bootstrapFiles); + const sessionLabel = params.sessionKey ?? params.sessionId; + const contextFiles = buildBootstrapContextFiles(bootstrapFiles, { + maxChars: resolveBootstrapMaxChars(params.config), + warn: (message) => + log.warn(`${message} (sessionKey=${sessionLabel})`), + }); // Tool schemas must be provider-compatible (OpenAI requires top-level `type: "object"`). // `createClawdbotCodingTools()` normalizes schemas so the session can pass them through unchanged. const tools = createClawdbotCodingTools({ diff --git a/src/config/schema.ts b/src/config/schema.ts index 738420b2d..b213b0ea2 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -115,6 +115,7 @@ const FIELD_LABELS: Record = { "gateway.reload.mode": "Config Reload Mode", "gateway.reload.debounceMs": "Config Reload Debounce (ms)", "agents.defaults.workspace": "Workspace", + "agents.defaults.bootstrapMaxChars": "Bootstrap Max Chars", "agents.defaults.memorySearch": "Memory Search", "agents.defaults.memorySearch.enabled": "Enable Memory Search", "agents.defaults.memorySearch.provider": "Memory Search Provider", @@ -233,6 +234,8 @@ const FIELD_HELP: Record = { "Cap (hours) for billing backoff (default: 24).", "auth.cooldowns.failureWindowHours": "Failure window (hours) for backoff counters (default: 24).", + "agents.defaults.bootstrapMaxChars": + "Max characters of each workspace bootstrap file injected into the system prompt before truncation (default: 20000).", "agents.defaults.models": "Configured model catalog (keys are full provider/model IDs).", "agents.defaults.memorySearch": diff --git a/src/config/types.ts b/src/config/types.ts index ba008a610..16c43578b 100644 --- a/src/config/types.ts +++ b/src/config/types.ts @@ -1593,6 +1593,8 @@ export type AgentDefaultsConfig = { workspace?: string; /** Skip bootstrap (BOOTSTRAP.md creation, etc.) for pre-configured deployments. */ skipBootstrap?: boolean; + /** Max chars for injected bootstrap files before truncation (default: 20000). */ + bootstrapMaxChars?: number; /** Optional IANA timezone for the user (used in system prompt; defaults to host timezone). */ userTimezone?: string; /** Optional display-only context window override (used for % in status UIs). */ diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index 3f78fad7f..d6a020411 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -1169,6 +1169,7 @@ const AgentDefaultsSchema = z .optional(), workspace: z.string().optional(), skipBootstrap: z.boolean().optional(), + bootstrapMaxChars: z.number().int().positive().optional(), userTimezone: z.string().optional(), contextTokens: z.number().int().positive().optional(), cliBackends: z.record(z.string(), CliBackendSchema).optional(),