feat: add configurable bootstrap truncation

This commit is contained in:
Peter Steinberger
2026-01-13 04:24:17 +00:00
parent ea5597b483
commit 755a7e1b20
12 changed files with 154 additions and 24 deletions

View File

@@ -21,6 +21,7 @@ import {
classifyFailoverReason,
type EmbeddedContextFile,
isFailoverErrorMessage,
resolveBootstrapMaxChars,
} from "./pi-embedded-helpers.js";
import type { EmbeddedPiRunResult } from "./pi-embedded-runner.js";
import { buildAgentSystemPrompt } from "./system-prompt.js";
@@ -493,7 +494,11 @@ export async function runCliAgent(params: {
await loadWorkspaceBootstrapFiles(workspaceDir),
params.sessionKey ?? params.sessionId,
);
const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
const sessionLabel = params.sessionKey ?? params.sessionId;
const contextFiles = buildBootstrapContextFiles(bootstrapFiles, {
maxChars: resolveBootstrapMaxChars(params.config),
warn: (message) => log.warn(`${message} (sessionKey=${sessionLabel})`),
});
const { defaultAgentId, sessionAgentId } = resolveSessionAgentIds({
sessionKey: params.sessionKey,
config: params.config,

View File

@@ -1,9 +1,11 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import type { ClawdbotConfig } from "../config/config.js";
import {
buildBootstrapContextFiles,
classifyFailoverReason,
DEFAULT_BOOTSTRAP_MAX_CHARS,
formatAssistantErrorText,
isAuthErrorMessage,
isBillingErrorMessage,
@@ -13,6 +15,7 @@ import {
isMessagingToolDuplicate,
isFailoverErrorMessage,
normalizeTextForComparison,
resolveBootstrapMaxChars,
sanitizeGoogleTurnOrdering,
sanitizeSessionMessagesImages,
sanitizeToolCallId,
@@ -49,17 +52,58 @@ describe("buildBootstrapContextFiles", () => {
});
it("truncates large bootstrap content", () => {
const head = `HEAD-${"a".repeat(6000)}`;
const tail = `${"b".repeat(3000)}-TAIL`;
const head = `HEAD-${"a".repeat(600)}`;
const tail = `${"b".repeat(300)}-TAIL`;
const long = `${head}${tail}`;
const files = [makeFile({ content: long })];
const [result] = buildBootstrapContextFiles(files);
const files = [makeFile({ name: "TOOLS.md", content: long })];
const warnings: string[] = [];
const maxChars = 200;
const expectedTailChars = Math.floor(maxChars * 0.2);
const [result] = buildBootstrapContextFiles(files, {
maxChars,
warn: (message) => warnings.push(message),
});
expect(result?.content).toContain(
"[...truncated, read AGENTS.md for full content...]",
"[...truncated, read TOOLS.md for full content...]",
);
expect(result?.content.length).toBeLessThan(long.length);
expect(result?.content.startsWith(long.slice(0, 120))).toBe(true);
expect(result?.content.endsWith(long.slice(-120))).toBe(true);
expect(result?.content.endsWith(long.slice(-expectedTailChars))).toBe(
true,
);
expect(warnings).toHaveLength(1);
expect(warnings[0]).toContain("TOOLS.md");
expect(warnings[0]).toContain("limit 200");
});
it("keeps content under the default limit", () => {
const long = "a".repeat(DEFAULT_BOOTSTRAP_MAX_CHARS - 10);
const files = [makeFile({ content: long })];
const [result] = buildBootstrapContextFiles(files);
expect(result?.content).toBe(long);
expect(result?.content).not.toContain(
"[...truncated, read AGENTS.md for full content...]",
);
});
});
describe("resolveBootstrapMaxChars", () => {
it("returns default when unset", () => {
expect(resolveBootstrapMaxChars()).toBe(DEFAULT_BOOTSTRAP_MAX_CHARS);
});
it("uses configured value when valid", () => {
const cfg = {
agents: { defaults: { bootstrapMaxChars: 12345 } },
} as ClawdbotConfig;
expect(resolveBootstrapMaxChars(cfg)).toBe(12345);
});
it("falls back when invalid", () => {
const cfg = {
agents: { defaults: { bootstrapMaxChars: -1 } },
} as ClawdbotConfig;
expect(resolveBootstrapMaxChars(cfg)).toBe(DEFAULT_BOOTSTRAP_MAX_CHARS);
});
});

View File

@@ -53,23 +53,57 @@ export function stripThoughtSignatures<T>(content: T): T {
}) as T;
}
const MAX_BOOTSTRAP_CHARS = 4000;
const BOOTSTRAP_HEAD_CHARS = 2800;
const BOOTSTRAP_TAIL_CHARS = 800;
export const DEFAULT_BOOTSTRAP_MAX_CHARS = 20_000;
const BOOTSTRAP_HEAD_RATIO = 0.7;
const BOOTSTRAP_TAIL_RATIO = 0.2;
function trimBootstrapContent(content: string, fileName: string): string {
type TrimBootstrapResult = {
content: string;
truncated: boolean;
maxChars: number;
originalLength: number;
};
export function resolveBootstrapMaxChars(cfg?: ClawdbotConfig): number {
const raw = cfg?.agents?.defaults?.bootstrapMaxChars;
if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) {
return Math.floor(raw);
}
return DEFAULT_BOOTSTRAP_MAX_CHARS;
}
function trimBootstrapContent(
content: string,
fileName: string,
maxChars: number,
): TrimBootstrapResult {
const trimmed = content.trimEnd();
if (trimmed.length <= MAX_BOOTSTRAP_CHARS) return trimmed;
if (trimmed.length <= maxChars) {
return {
content: trimmed,
truncated: false,
maxChars,
originalLength: trimmed.length,
};
}
const head = trimmed.slice(0, BOOTSTRAP_HEAD_CHARS);
const tail = trimmed.slice(-BOOTSTRAP_TAIL_CHARS);
return [
const headChars = Math.max(1, Math.floor(maxChars * BOOTSTRAP_HEAD_RATIO));
const tailChars = Math.max(1, Math.floor(maxChars * BOOTSTRAP_TAIL_RATIO));
const head = trimmed.slice(0, headChars);
const tail = trimmed.slice(-tailChars);
const contentWithMarker = [
head,
"",
`[...truncated, read ${fileName} for full content...]`,
"",
tail,
].join("\n");
return {
content: contentWithMarker,
truncated: true,
maxChars,
originalLength: trimmed.length,
};
}
export async function ensureSessionHeader(params: {
@@ -254,7 +288,9 @@ export function sanitizeGoogleTurnOrdering(
export function buildBootstrapContextFiles(
files: WorkspaceBootstrapFile[],
opts?: { warn?: (message: string) => void; maxChars?: number },
): EmbeddedContextFile[] {
const maxChars = opts?.maxChars ?? DEFAULT_BOOTSTRAP_MAX_CHARS;
const result: EmbeddedContextFile[] = [];
for (const file of files) {
if (file.missing) {
@@ -264,11 +300,20 @@ export function buildBootstrapContextFiles(
});
continue;
}
const trimmed = trimBootstrapContent(file.content ?? "", file.name);
if (!trimmed) continue;
const trimmed = trimBootstrapContent(
file.content ?? "",
file.name,
maxChars,
);
if (!trimmed.content) continue;
if (trimmed.truncated) {
opts?.warn?.(
`workspace bootstrap file ${file.name} is ${trimmed.originalLength} chars (limit ${trimmed.maxChars}); truncating in injected context`,
);
}
result.push({
path: file.name,
content: trimmed,
content: trimmed.content,
});
}
return result;

View File

@@ -99,6 +99,7 @@ import {
isRateLimitAssistantError,
isTimeoutErrorMessage,
pickFallbackThinkingLevel,
resolveBootstrapMaxChars,
sanitizeGoogleTurnOrdering,
sanitizeSessionMessagesImages,
validateAnthropicTurns,
@@ -1152,7 +1153,12 @@ export async function compactEmbeddedPiSession(params: {
await loadWorkspaceBootstrapFiles(effectiveWorkspace),
params.sessionKey ?? params.sessionId,
);
const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
const sessionLabel = params.sessionKey ?? params.sessionId;
const contextFiles = buildBootstrapContextFiles(bootstrapFiles, {
maxChars: resolveBootstrapMaxChars(params.config),
warn: (message) =>
log.warn(`${message} (sessionKey=${sessionLabel})`),
});
const runAbortController = new AbortController();
const tools = createClawdbotCodingTools({
exec: {
@@ -1584,7 +1590,12 @@ export async function runEmbeddedPiAgent(params: {
await loadWorkspaceBootstrapFiles(effectiveWorkspace),
params.sessionKey ?? params.sessionId,
);
const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
const sessionLabel = params.sessionKey ?? params.sessionId;
const contextFiles = buildBootstrapContextFiles(bootstrapFiles, {
maxChars: resolveBootstrapMaxChars(params.config),
warn: (message) =>
log.warn(`${message} (sessionKey=${sessionLabel})`),
});
// Tool schemas must be provider-compatible (OpenAI requires top-level `type: "object"`).
// `createClawdbotCodingTools()` normalizes schemas so the session can pass them through unchanged.
const tools = createClawdbotCodingTools({

View File

@@ -115,6 +115,7 @@ const FIELD_LABELS: Record<string, string> = {
"gateway.reload.mode": "Config Reload Mode",
"gateway.reload.debounceMs": "Config Reload Debounce (ms)",
"agents.defaults.workspace": "Workspace",
"agents.defaults.bootstrapMaxChars": "Bootstrap Max Chars",
"agents.defaults.memorySearch": "Memory Search",
"agents.defaults.memorySearch.enabled": "Enable Memory Search",
"agents.defaults.memorySearch.provider": "Memory Search Provider",
@@ -233,6 +234,8 @@ const FIELD_HELP: Record<string, string> = {
"Cap (hours) for billing backoff (default: 24).",
"auth.cooldowns.failureWindowHours":
"Failure window (hours) for backoff counters (default: 24).",
"agents.defaults.bootstrapMaxChars":
"Max characters of each workspace bootstrap file injected into the system prompt before truncation (default: 20000).",
"agents.defaults.models":
"Configured model catalog (keys are full provider/model IDs).",
"agents.defaults.memorySearch":

View File

@@ -1593,6 +1593,8 @@ export type AgentDefaultsConfig = {
workspace?: string;
/** Skip bootstrap (BOOTSTRAP.md creation, etc.) for pre-configured deployments. */
skipBootstrap?: boolean;
/** Max chars for injected bootstrap files before truncation (default: 20000). */
bootstrapMaxChars?: number;
/** Optional IANA timezone for the user (used in system prompt; defaults to host timezone). */
userTimezone?: string;
/** Optional display-only context window override (used for % in status UIs). */

View File

@@ -1169,6 +1169,7 @@ const AgentDefaultsSchema = z
.optional(),
workspace: z.string().optional(),
skipBootstrap: z.boolean().optional(),
bootstrapMaxChars: z.number().int().positive().optional(),
userTimezone: z.string().optional(),
contextTokens: z.number().int().positive().optional(),
cliBackends: z.record(z.string(), CliBackendSchema).optional(),