fix: default Anthropic API cache TTL to 1h

2026-01-20 15:07:46 +00:00
parent a76aea1bc0
commit 74f382f732
7 changed files with 199 additions and 11 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,7 @@ Docs: https://docs.clawd.bot
 - Exec: prefer bash when fish is default shell, falling back to sh if bash is missing. (#1297) — thanks @ysqander.
 - Exec: merge login-shell PATH for host=gateway exec while keeping daemon PATH minimal. (#1304)
 - Plugins: add Nextcloud Talk manifest for plugin config validation. (#1297) — thanks @ysqander.
+- Anthropic: default API prompt caching to 1h with configurable TTL override; ignore TTL for OAuth.

 ## 2026.1.19-3

--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@@ -1414,7 +1414,7 @@ Each `agents.defaults.models` entry can include:
 - `alias` (optional model shortcut, e.g. `/opus`).
 - `params` (optional provider-specific API params passed through to the model request).

-`params` is also applied to streaming runs (embedded agent + compaction). Supported keys today: `temperature`, `maxTokens`. These merge with call-time options; caller-supplied values win. `temperature` is an advanced knob—leave unset unless you know the model’s defaults and need a change.
+`params` is also applied to streaming runs (embedded agent + compaction). Supported keys today: `temperature`, `maxTokens`, `cacheControlTtl` (`"5m"` or `"1h"`, Anthropic API + OpenRouter Anthropic models only; ignored for Anthropic OAuth/Claude Code tokens). These merge with call-time options; caller-supplied values win. `temperature` is an advanced knob—leave unset unless you know the model’s defaults and need a change. Anthropic API defaults to `"1h"` unless you override (`cacheControlTtl: "5m"`). Clawdbot includes the `extended-cache-ttl-2025-04-11` beta flag for Anthropic API; keep it if you override provider headers.

 Example:

--- a/docs/providers/anthropic.md
+++ b/docs/providers/anthropic.md
@@ -34,6 +34,30 @@ clawdbot onboard --anthropic-api-key "$ANTHROPIC_API_KEY"
 }
 ```

+## Prompt caching (Anthropic API)
+
+Clawdbot enables **1-hour prompt caching by default** for Anthropic API keys.
+This is **API-only**; Claude Code CLI OAuth ignores TTL settings.
+
+To override the TTL per model, set `cacheControlTtl` in the model `params`:
+
+```json5
+{
+  agents: {
+    defaults: {
+      models: {
+        "anthropic/claude-opus-4-5": {
+          params: { cacheControlTtl: "5m" } // or "1h"
+        }
+      }
+    }
+  }
+}
+```
+
+Clawdbot includes the `extended-cache-ttl-2025-04-11` beta flag for Anthropic API
+requests; keep it if you override provider headers (see [/gateway/configuration](/gateway/configuration)).
+
 ## Option B: Claude Code CLI (setup-token or OAuth)

 **Best for:** using your Claude subscription or existing Claude Code CLI login.
--- a/package.json
+++ b/package.json
@@ -152,8 +152,8 @@
    "@lydell/node-pty": "1.2.0-beta.3",
    "@mariozechner/pi-agent-core": "0.49.2",
    "@mariozechner/pi-ai": "0.49.2",
-    "@mariozechner/pi-coding-agent": "^0.49.2",
-    "@mariozechner/pi-tui": "^0.49.2",
+    "@mariozechner/pi-coding-agent": "0.49.2",
+    "@mariozechner/pi-tui": "0.49.2",
    "@mozilla/readability": "^0.6.0",
    "@sinclair/typebox": "0.34.47",
    "@slack/bolt": "^4.6.0",
@@ -232,6 +232,9 @@
      "@sinclair/typebox": "0.34.47",
      "hono": "4.11.4",
      "tar": "7.5.3"
+    },
+    "patchedDependencies": {
+      "@mariozechner/pi-ai@0.49.2": "patches/@mariozechner__pi-ai@0.49.2.patch"
    }
  },
  "vitest": {
--- a/patches/@mariozechner__pi-ai@0.49.2.patch
+++ b/patches/@mariozechner__pi-ai@0.49.2.patch
@@ -0,0 +1,135 @@
+diff --git a/dist/providers/anthropic.js b/dist/providers/anthropic.js
+index 1cba2f1365812fd2f88993009c9cc06e9c348279..664dd6d8b400ec523fb735480741b9ad64f9a68c 100644
+--- a/dist/providers/anthropic.js
+++ b/dist/providers/anthropic.js
+@@ -298,10 +298,11 @@ function createClient(model, apiKey, interleavedThinking) {
+         });
+         return { client, isOAuthToken: true };
+     }
+    const apiBetaFeatures = ["extended-cache-ttl-2025-04-11", ...betaFeatures];
+     const defaultHeaders = {
+         accept: "application/json",
+         "anthropic-dangerous-direct-browser-access": "true",
+-        "anthropic-beta": betaFeatures.join(","),
+        "anthropic-beta": apiBetaFeatures.join(","),
+         ...(model.headers || {}),
+     };
+     const client = new Anthropic({
+@@ -313,9 +314,11 @@ function createClient(model, apiKey, interleavedThinking) {
+     return { client, isOAuthToken: false };
+ }
+ function buildParams(model, context, isOAuthToken, options) {
+    const cacheControlTtl = !isOAuthToken ? (options?.cacheControlTtl ?? "1h") : undefined;
+    const cacheControl = cacheControlTtl ? { type: "ephemeral", ttl: cacheControlTtl } : { type: "ephemeral" };
+     const params = {
+         model: model.id,
+-        messages: convertMessages(context.messages, model, isOAuthToken),
+        messages: convertMessages(context.messages, model, isOAuthToken, cacheControl),
+         max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
+         stream: true,
+     };
+@@ -325,18 +328,14 @@ function buildParams(model, context, isOAuthToken, options) {
+             {
+                 type: "text",
+                 text: "You are Claude Code, Anthropic's official CLI for Claude.",
+-                cache_control: {
+-                    type: "ephemeral",
+-                },
+                cache_control: cacheControl,
+             },
+         ];
+         if (context.systemPrompt) {
+             params.system.push({
+                 type: "text",
+                 text: sanitizeSurrogates(context.systemPrompt),
+-                cache_control: {
+-                    type: "ephemeral",
+-                },
+                cache_control: cacheControl,
+             });
+         }
+     }
+@@ -346,9 +345,7 @@ function buildParams(model, context, isOAuthToken, options) {
+             {
+                 type: "text",
+                 text: sanitizeSurrogates(context.systemPrompt),
+-                cache_control: {
+-                    type: "ephemeral",
+-                },
+                cache_control: cacheControl,
+             },
+         ];
+     }
+@@ -378,7 +375,7 @@ function buildParams(model, context, isOAuthToken, options) {
+ function normalizeToolCallId(id) {
+     return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
+ }
+-function convertMessages(messages, model, isOAuthToken) {
+function convertMessages(messages, model, isOAuthToken, cacheControl) {
+     const params = [];
+     // Transform messages for cross-provider compatibility
+     const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
+@@ -514,7 +511,7 @@ function convertMessages(messages, model, isOAuthToken) {
+                 const lastBlock = lastMessage.content[lastMessage.content.length - 1];
+                 if (lastBlock &&
+                     (lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result")) {
+-                    lastBlock.cache_control = { type: "ephemeral" };
+                    lastBlock.cache_control = cacheControl;
+                 }
+             }
+         }
+diff --git a/dist/providers/openai-completions.js b/dist/providers/openai-completions.js
+index ee5c88d8e280ceeff45ed075f2c7357d40005578..89daad7b0e53753e094028291226d32da9446440 100644
+--- a/dist/providers/openai-completions.js
+++ b/dist/providers/openai-completions.js
+@@ -305,7 +305,7 @@ function createClient(model, context, apiKey) {
+ function buildParams(model, context, options) {
+     const compat = getCompat(model);
+     const messages = convertMessages(model, context, compat);
+-    maybeAddOpenRouterAnthropicCacheControl(model, messages);
+    maybeAddOpenRouterAnthropicCacheControl(model, messages, options?.cacheControlTtl);
+     const params = {
+         model: model.id,
+         messages,
+@@ -349,9 +349,10 @@ function buildParams(model, context, options) {
+     }
+     return params;
+ }
+-function maybeAddOpenRouterAnthropicCacheControl(model, messages) {
+function maybeAddOpenRouterAnthropicCacheControl(model, messages, cacheControlTtl) {
+     if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/"))
+         return;
+    const cacheControl = cacheControlTtl ? { type: "ephemeral", ttl: cacheControlTtl } : { type: "ephemeral" };
+     // Anthropic-style caching requires cache_control on a text part. Add a breakpoint
+     // on the last user/assistant message (walking backwards until we find text content).
+     for (let i = messages.length - 1; i >= 0; i--) {
+@@ -361,7 +362,7 @@ function maybeAddOpenRouterAnthropicCacheControl(model, messages) {
+         const content = msg.content;
+         if (typeof content === "string") {
+             msg.content = [
+-                Object.assign({ type: "text", text: content }, { cache_control: { type: "ephemeral" } }),
+                Object.assign({ type: "text", text: content }, { cache_control: cacheControl }),
+             ];
+             return;
+         }
+@@ -371,7 +372,7 @@ function maybeAddOpenRouterAnthropicCacheControl(model, messages) {
+         for (let j = content.length - 1; j >= 0; j--) {
+             const part = content[j];
+             if (part?.type === "text") {
+-                Object.assign(part, { cache_control: { type: "ephemeral" } });
+                Object.assign(part, { cache_control: cacheControl });
+                 return;
+             }
+         }
+diff --git a/dist/stream.js b/dist/stream.js
+index d23fdd9f226a949fac4f2c7160af76f7f5fe71d1..3500f074bd88b85f4c7dd9bf42279f80fdf264d1 100644
+--- a/dist/stream.js
+++ b/dist/stream.js
+@@ -146,6 +146,7 @@ function mapOptionsForApi(model, options, apiKey) {
+         signal: options?.signal,
+         apiKey: apiKey || options?.apiKey,
+         sessionId: options?.sessionId,
+        cacheControlTtl: options?.cacheControlTtl,
+     };
+     // Helper to clamp xhigh to high for providers that don't support it
+     const clampReasoning = (effort) => (effort === "xhigh" ? "high" : effort);
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -9,6 +9,11 @@ overrides:
  hono: 4.11.4
  tar: 7.5.3

+patchedDependencies:
+  '@mariozechner/pi-ai@0.49.2':
+    hash: 4ae0a92a4b2c74703711e2a62b745ca8af6a9948ea7fa923097e875c76354d7e
+    path: patches/@mariozechner__pi-ai@0.49.2.patch
+
 importers:

  .:
@@ -39,12 +44,12 @@ importers:
        version: 0.49.2(ws@8.19.0)(zod@4.3.5)
      '@mariozechner/pi-ai':
        specifier: 0.49.2
-        version: 0.49.2(ws@8.19.0)(zod@4.3.5)
+        version: 0.49.2(patch_hash=4ae0a92a4b2c74703711e2a62b745ca8af6a9948ea7fa923097e875c76354d7e)(ws@8.19.0)(zod@4.3.5)
      '@mariozechner/pi-coding-agent':
-        specifier: ^0.49.2
+        specifier: 0.49.2
        version: 0.49.2(ws@8.19.0)(zod@4.3.5)
      '@mariozechner/pi-tui':
-        specifier: ^0.49.2
+        specifier: 0.49.2
        version: 0.49.2
      '@mozilla/readability':
        specifier: ^0.6.0
@@ -5931,7 +5936,7 @@ snapshots:

  '@mariozechner/pi-agent-core@0.49.2(ws@8.19.0)(zod@4.3.5)':
    dependencies:
-      '@mariozechner/pi-ai': 0.49.2(ws@8.19.0)(zod@4.3.5)
+      '@mariozechner/pi-ai': 0.49.2(patch_hash=4ae0a92a4b2c74703711e2a62b745ca8af6a9948ea7fa923097e875c76354d7e)(ws@8.19.0)(zod@4.3.5)
      '@mariozechner/pi-tui': 0.49.2
    transitivePeerDependencies:
      - '@modelcontextprotocol/sdk'
@@ -5942,7 +5947,7 @@ snapshots:
      - ws
      - zod

-  '@mariozechner/pi-ai@0.49.2(ws@8.19.0)(zod@4.3.5)':
+  '@mariozechner/pi-ai@0.49.2(patch_hash=4ae0a92a4b2c74703711e2a62b745ca8af6a9948ea7fa923097e875c76354d7e)(ws@8.19.0)(zod@4.3.5)':
    dependencies:
      '@anthropic-ai/sdk': 0.71.2(zod@4.3.5)
      '@aws-sdk/client-bedrock-runtime': 3.971.0
@@ -5969,7 +5974,7 @@ snapshots:
      '@mariozechner/clipboard': 0.3.0
      '@mariozechner/jiti': 2.6.5
      '@mariozechner/pi-agent-core': 0.49.2(ws@8.19.0)(zod@4.3.5)
-      '@mariozechner/pi-ai': 0.49.2(ws@8.19.0)(zod@4.3.5)
+      '@mariozechner/pi-ai': 0.49.2(patch_hash=4ae0a92a4b2c74703711e2a62b745ca8af6a9948ea7fa923097e875c76354d7e)(ws@8.19.0)(zod@4.3.5)
      '@mariozechner/pi-tui': 0.49.2
      '@silvia-odwyer/photon-node': 0.3.4
      chalk: 5.6.2
--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@@ -21,21 +21,41 @@ export function resolveExtraParams(params: {
  return modelConfig?.params ? { ...modelConfig.params } : undefined;
 }

+type CacheControlTtl = "5m" | "1h";
+
+function resolveCacheControlTtl(
+  extraParams: Record<string, unknown> | undefined,
+  provider: string,
+  modelId: string,
+): CacheControlTtl | undefined {
+  const raw = extraParams?.cacheControlTtl;
+  if (raw !== "5m" && raw !== "1h") return undefined;
+  if (provider === "anthropic") return raw;
+  if (provider === "openrouter" && modelId.startsWith("anthropic/")) return raw;
+  return undefined;
+}
+
 function createStreamFnWithExtraParams(
  baseStreamFn: StreamFn | undefined,
  extraParams: Record<string, unknown> | undefined,
+  provider: string,
+  modelId: string,
 ): StreamFn | undefined {
  if (!extraParams || Object.keys(extraParams).length === 0) {
    return undefined;
  }

-  const streamParams: Partial<SimpleStreamOptions> = {};
+  const streamParams: Partial<SimpleStreamOptions> & { cacheControlTtl?: CacheControlTtl } = {};
  if (typeof extraParams.temperature === "number") {
    streamParams.temperature = extraParams.temperature;
  }
  if (typeof extraParams.maxTokens === "number") {
    streamParams.maxTokens = extraParams.maxTokens;
  }
+  const cacheControlTtl = resolveCacheControlTtl(extraParams, provider, modelId);
+  if (cacheControlTtl) {
+    streamParams.cacheControlTtl = cacheControlTtl;
+  }

  if (Object.keys(streamParams).length === 0) {
    return undefined;
@@ -77,7 +97,7 @@ export function applyExtraParamsToAgent(
        )
      : undefined;
  const merged = Object.assign({}, extraParams, override);
-  const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, merged);
+  const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, merged, provider, modelId);

  if (wrappedStreamFn) {
    log.debug(`applying extraParams to agent streamFn for ${provider}/${modelId}`);