diff --git a/CHANGELOG.md b/CHANGELOG.md index 35bddccfe..b785c2ee8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Docs: https://docs.clawd.bot - Exec: prefer bash when fish is default shell, falling back to sh if bash is missing. (#1297) — thanks @ysqander. - Exec: merge login-shell PATH for host=gateway exec while keeping daemon PATH minimal. (#1304) - Plugins: add Nextcloud Talk manifest for plugin config validation. (#1297) — thanks @ysqander. +- Anthropic: default API prompt caching to 1h with configurable TTL override; ignore TTL for OAuth. ## 2026.1.19-3 diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 742fd53a4..65d46932b 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -1414,7 +1414,7 @@ Each `agents.defaults.models` entry can include: - `alias` (optional model shortcut, e.g. `/opus`). - `params` (optional provider-specific API params passed through to the model request). -`params` is also applied to streaming runs (embedded agent + compaction). Supported keys today: `temperature`, `maxTokens`. These merge with call-time options; caller-supplied values win. `temperature` is an advanced knob—leave unset unless you know the model’s defaults and need a change. +`params` is also applied to streaming runs (embedded agent + compaction). Supported keys today: `temperature`, `maxTokens`, `cacheControlTtl` (`"5m"` or `"1h"`, Anthropic API + OpenRouter Anthropic models only; ignored for Anthropic OAuth/Claude Code tokens). These merge with call-time options; caller-supplied values win. `temperature` is an advanced knob—leave unset unless you know the model’s defaults and need a change. Anthropic API defaults to `"1h"` unless you override (`cacheControlTtl: "5m"`). Clawdbot includes the `extended-cache-ttl-2025-04-11` beta flag for Anthropic API; keep it if you override provider headers. Example: diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md index 5c93e65c2..9c72b990a 100644 --- a/docs/providers/anthropic.md +++ b/docs/providers/anthropic.md @@ -34,6 +34,30 @@ clawdbot onboard --anthropic-api-key "$ANTHROPIC_API_KEY" } ``` +## Prompt caching (Anthropic API) + +Clawdbot enables **1-hour prompt caching by default** for Anthropic API keys. +This is **API-only**; Claude Code CLI OAuth ignores TTL settings. + +To override the TTL per model, set `cacheControlTtl` in the model `params`: + +```json5 +{ + agents: { + defaults: { + models: { + "anthropic/claude-opus-4-5": { + params: { cacheControlTtl: "5m" } // or "1h" + } + } + } + } +} +``` + +Clawdbot includes the `extended-cache-ttl-2025-04-11` beta flag for Anthropic API +requests; keep it if you override provider headers (see [/gateway/configuration](/gateway/configuration)). + ## Option B: Claude Code CLI (setup-token or OAuth) **Best for:** using your Claude subscription or existing Claude Code CLI login. diff --git a/package.json b/package.json index 97439fffd..84129343b 100644 --- a/package.json +++ b/package.json @@ -152,8 +152,8 @@ "@lydell/node-pty": "1.2.0-beta.3", "@mariozechner/pi-agent-core": "0.49.2", "@mariozechner/pi-ai": "0.49.2", - "@mariozechner/pi-coding-agent": "^0.49.2", - "@mariozechner/pi-tui": "^0.49.2", + "@mariozechner/pi-coding-agent": "0.49.2", + "@mariozechner/pi-tui": "0.49.2", "@mozilla/readability": "^0.6.0", "@sinclair/typebox": "0.34.47", "@slack/bolt": "^4.6.0", @@ -232,6 +232,9 @@ "@sinclair/typebox": "0.34.47", "hono": "4.11.4", "tar": "7.5.3" + }, + "patchedDependencies": { + "@mariozechner/pi-ai@0.49.2": "patches/@mariozechner__pi-ai@0.49.2.patch" } }, "vitest": { diff --git a/patches/@mariozechner__pi-ai@0.49.2.patch b/patches/@mariozechner__pi-ai@0.49.2.patch new file mode 100644 index 000000000..57fb965f8 --- /dev/null +++ b/patches/@mariozechner__pi-ai@0.49.2.patch @@ -0,0 +1,135 @@ +diff --git a/dist/providers/anthropic.js b/dist/providers/anthropic.js +index 1cba2f1365812fd2f88993009c9cc06e9c348279..664dd6d8b400ec523fb735480741b9ad64f9a68c 100644 +--- a/dist/providers/anthropic.js ++++ b/dist/providers/anthropic.js +@@ -298,10 +298,11 @@ function createClient(model, apiKey, interleavedThinking) { + }); + return { client, isOAuthToken: true }; + } ++ const apiBetaFeatures = ["extended-cache-ttl-2025-04-11", ...betaFeatures]; + const defaultHeaders = { + accept: "application/json", + "anthropic-dangerous-direct-browser-access": "true", +- "anthropic-beta": betaFeatures.join(","), ++ "anthropic-beta": apiBetaFeatures.join(","), + ...(model.headers || {}), + }; + const client = new Anthropic({ +@@ -313,9 +314,11 @@ function createClient(model, apiKey, interleavedThinking) { + return { client, isOAuthToken: false }; + } + function buildParams(model, context, isOAuthToken, options) { ++ const cacheControlTtl = !isOAuthToken ? (options?.cacheControlTtl ?? "1h") : undefined; ++ const cacheControl = cacheControlTtl ? { type: "ephemeral", ttl: cacheControlTtl } : { type: "ephemeral" }; + const params = { + model: model.id, +- messages: convertMessages(context.messages, model, isOAuthToken), ++ messages: convertMessages(context.messages, model, isOAuthToken, cacheControl), + max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0, + stream: true, + }; +@@ -325,18 +328,14 @@ function buildParams(model, context, isOAuthToken, options) { + { + type: "text", + text: "You are Claude Code, Anthropic's official CLI for Claude.", +- cache_control: { +- type: "ephemeral", +- }, ++ cache_control: cacheControl, + }, + ]; + if (context.systemPrompt) { + params.system.push({ + type: "text", + text: sanitizeSurrogates(context.systemPrompt), +- cache_control: { +- type: "ephemeral", +- }, ++ cache_control: cacheControl, + }); + } + } +@@ -346,9 +345,7 @@ function buildParams(model, context, isOAuthToken, options) { + { + type: "text", + text: sanitizeSurrogates(context.systemPrompt), +- cache_control: { +- type: "ephemeral", +- }, ++ cache_control: cacheControl, + }, + ]; + } +@@ -378,7 +375,7 @@ function buildParams(model, context, isOAuthToken, options) { + function normalizeToolCallId(id) { + return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64); + } +-function convertMessages(messages, model, isOAuthToken) { ++function convertMessages(messages, model, isOAuthToken, cacheControl) { + const params = []; + // Transform messages for cross-provider compatibility + const transformedMessages = transformMessages(messages, model, normalizeToolCallId); +@@ -514,7 +511,7 @@ function convertMessages(messages, model, isOAuthToken) { + const lastBlock = lastMessage.content[lastMessage.content.length - 1]; + if (lastBlock && + (lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result")) { +- lastBlock.cache_control = { type: "ephemeral" }; ++ lastBlock.cache_control = cacheControl; + } + } + } +diff --git a/dist/providers/openai-completions.js b/dist/providers/openai-completions.js +index ee5c88d8e280ceeff45ed075f2c7357d40005578..89daad7b0e53753e094028291226d32da9446440 100644 +--- a/dist/providers/openai-completions.js ++++ b/dist/providers/openai-completions.js +@@ -305,7 +305,7 @@ function createClient(model, context, apiKey) { + function buildParams(model, context, options) { + const compat = getCompat(model); + const messages = convertMessages(model, context, compat); +- maybeAddOpenRouterAnthropicCacheControl(model, messages); ++ maybeAddOpenRouterAnthropicCacheControl(model, messages, options?.cacheControlTtl); + const params = { + model: model.id, + messages, +@@ -349,9 +349,10 @@ function buildParams(model, context, options) { + } + return params; + } +-function maybeAddOpenRouterAnthropicCacheControl(model, messages) { ++function maybeAddOpenRouterAnthropicCacheControl(model, messages, cacheControlTtl) { + if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) + return; ++ const cacheControl = cacheControlTtl ? { type: "ephemeral", ttl: cacheControlTtl } : { type: "ephemeral" }; + // Anthropic-style caching requires cache_control on a text part. Add a breakpoint + // on the last user/assistant message (walking backwards until we find text content). + for (let i = messages.length - 1; i >= 0; i--) { +@@ -361,7 +362,7 @@ function maybeAddOpenRouterAnthropicCacheControl(model, messages) { + const content = msg.content; + if (typeof content === "string") { + msg.content = [ +- Object.assign({ type: "text", text: content }, { cache_control: { type: "ephemeral" } }), ++ Object.assign({ type: "text", text: content }, { cache_control: cacheControl }), + ]; + return; + } +@@ -371,7 +372,7 @@ function maybeAddOpenRouterAnthropicCacheControl(model, messages) { + for (let j = content.length - 1; j >= 0; j--) { + const part = content[j]; + if (part?.type === "text") { +- Object.assign(part, { cache_control: { type: "ephemeral" } }); ++ Object.assign(part, { cache_control: cacheControl }); + return; + } + } +diff --git a/dist/stream.js b/dist/stream.js +index d23fdd9f226a949fac4f2c7160af76f7f5fe71d1..3500f074bd88b85f4c7dd9bf42279f80fdf264d1 100644 +--- a/dist/stream.js ++++ b/dist/stream.js +@@ -146,6 +146,7 @@ function mapOptionsForApi(model, options, apiKey) { + signal: options?.signal, + apiKey: apiKey || options?.apiKey, + sessionId: options?.sessionId, ++ cacheControlTtl: options?.cacheControlTtl, + }; + // Helper to clamp xhigh to high for providers that don't support it + const clampReasoning = (effort) => (effort === "xhigh" ? "high" : effort); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 59cb34d7a..2c17a6a5d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,6 +9,11 @@ overrides: hono: 4.11.4 tar: 7.5.3 +patchedDependencies: + '@mariozechner/pi-ai@0.49.2': + hash: 4ae0a92a4b2c74703711e2a62b745ca8af6a9948ea7fa923097e875c76354d7e + path: patches/@mariozechner__pi-ai@0.49.2.patch + importers: .: @@ -39,12 +44,12 @@ importers: version: 0.49.2(ws@8.19.0)(zod@4.3.5) '@mariozechner/pi-ai': specifier: 0.49.2 - version: 0.49.2(ws@8.19.0)(zod@4.3.5) + version: 0.49.2(patch_hash=4ae0a92a4b2c74703711e2a62b745ca8af6a9948ea7fa923097e875c76354d7e)(ws@8.19.0)(zod@4.3.5) '@mariozechner/pi-coding-agent': - specifier: ^0.49.2 + specifier: 0.49.2 version: 0.49.2(ws@8.19.0)(zod@4.3.5) '@mariozechner/pi-tui': - specifier: ^0.49.2 + specifier: 0.49.2 version: 0.49.2 '@mozilla/readability': specifier: ^0.6.0 @@ -5931,7 +5936,7 @@ snapshots: '@mariozechner/pi-agent-core@0.49.2(ws@8.19.0)(zod@4.3.5)': dependencies: - '@mariozechner/pi-ai': 0.49.2(ws@8.19.0)(zod@4.3.5) + '@mariozechner/pi-ai': 0.49.2(patch_hash=4ae0a92a4b2c74703711e2a62b745ca8af6a9948ea7fa923097e875c76354d7e)(ws@8.19.0)(zod@4.3.5) '@mariozechner/pi-tui': 0.49.2 transitivePeerDependencies: - '@modelcontextprotocol/sdk' @@ -5942,7 +5947,7 @@ snapshots: - ws - zod - '@mariozechner/pi-ai@0.49.2(ws@8.19.0)(zod@4.3.5)': + '@mariozechner/pi-ai@0.49.2(patch_hash=4ae0a92a4b2c74703711e2a62b745ca8af6a9948ea7fa923097e875c76354d7e)(ws@8.19.0)(zod@4.3.5)': dependencies: '@anthropic-ai/sdk': 0.71.2(zod@4.3.5) '@aws-sdk/client-bedrock-runtime': 3.971.0 @@ -5969,7 +5974,7 @@ snapshots: '@mariozechner/clipboard': 0.3.0 '@mariozechner/jiti': 2.6.5 '@mariozechner/pi-agent-core': 0.49.2(ws@8.19.0)(zod@4.3.5) - '@mariozechner/pi-ai': 0.49.2(ws@8.19.0)(zod@4.3.5) + '@mariozechner/pi-ai': 0.49.2(patch_hash=4ae0a92a4b2c74703711e2a62b745ca8af6a9948ea7fa923097e875c76354d7e)(ws@8.19.0)(zod@4.3.5) '@mariozechner/pi-tui': 0.49.2 '@silvia-odwyer/photon-node': 0.3.4 chalk: 5.6.2 diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index fbb1e161b..11f2ab83a 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -21,21 +21,41 @@ export function resolveExtraParams(params: { return modelConfig?.params ? { ...modelConfig.params } : undefined; } +type CacheControlTtl = "5m" | "1h"; + +function resolveCacheControlTtl( + extraParams: Record | undefined, + provider: string, + modelId: string, +): CacheControlTtl | undefined { + const raw = extraParams?.cacheControlTtl; + if (raw !== "5m" && raw !== "1h") return undefined; + if (provider === "anthropic") return raw; + if (provider === "openrouter" && modelId.startsWith("anthropic/")) return raw; + return undefined; +} + function createStreamFnWithExtraParams( baseStreamFn: StreamFn | undefined, extraParams: Record | undefined, + provider: string, + modelId: string, ): StreamFn | undefined { if (!extraParams || Object.keys(extraParams).length === 0) { return undefined; } - const streamParams: Partial = {}; + const streamParams: Partial & { cacheControlTtl?: CacheControlTtl } = {}; if (typeof extraParams.temperature === "number") { streamParams.temperature = extraParams.temperature; } if (typeof extraParams.maxTokens === "number") { streamParams.maxTokens = extraParams.maxTokens; } + const cacheControlTtl = resolveCacheControlTtl(extraParams, provider, modelId); + if (cacheControlTtl) { + streamParams.cacheControlTtl = cacheControlTtl; + } if (Object.keys(streamParams).length === 0) { return undefined; @@ -77,7 +97,7 @@ export function applyExtraParamsToAgent( ) : undefined; const merged = Object.assign({}, extraParams, override); - const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, merged); + const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, merged, provider, modelId); if (wrappedStreamFn) { log.debug(`applying extraParams to agent streamFn for ${provider}/${modelId}`);