From c3aed2543e0d80092020b8c7860ad295fb76b047 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 12 Dec 2025 23:22:05 +0000 Subject: [PATCH] fix(status): account cached prompt tokens --- src/agents/agents.test.ts | 7 +++- src/agents/pi.ts | 5 ++- src/agents/usage.ts | 69 +++++++++++++++++++++++++++++++++++ src/auto-reply/reply.ts | 11 ++++-- src/auto-reply/status.test.ts | 55 +++++++++++++++++++++++++++- src/auto-reply/status.ts | 51 ++++++++++++++++---------- 6 files changed, 171 insertions(+), 27 deletions(-) create mode 100644 src/agents/usage.ts diff --git a/src/agents/agents.test.ts b/src/agents/agents.test.ts index e513528f6..3c1bcbe4b 100644 --- a/src/agents/agents.test.ts +++ b/src/agents/agents.test.ts @@ -36,12 +36,17 @@ describe("pi agent helpers", () => { it("parses final assistant message and preserves usage meta", () => { const stdout = [ '{"type":"message_start","message":{"role":"assistant"}}', - '{"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"hello world"}],"usage":{"input":10,"output":5},"model":"pi-1","provider":"inflection","stopReason":"end"}}', + '{"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"hello world"}],"usage":{"input":10,"output":5,"cacheRead":100,"cacheWrite":20,"totalTokens":135},"model":"pi-1","provider":"inflection","stopReason":"end"}}', ].join("\n"); const parsed = piSpec.parseOutput(stdout); expect(parsed.texts?.[0]).toBe("hello world"); expect(parsed.meta?.provider).toBe("inflection"); expect((parsed.meta?.usage as { output?: number })?.output).toBe(5); + expect((parsed.meta?.usage as { cacheRead?: number })?.cacheRead).toBe(100); + expect((parsed.meta?.usage as { cacheWrite?: number })?.cacheWrite).toBe( + 20, + ); + expect((parsed.meta?.usage as { total?: number })?.total).toBe(135); }); it("piSpec carries tool names when present", () => { diff --git a/src/agents/pi.ts b/src/agents/pi.ts index 779dd1f88..745aff090 100644 --- a/src/agents/pi.ts +++ b/src/agents/pi.ts @@ -6,11 +6,12 @@ import type { AgentSpec, AgentToolResult, } from "./types.js"; +import { normalizeUsage, type UsageLike } from "./usage.js"; type PiAssistantMessage = { role?: string; content?: Array<{ type?: string; text?: string }>; - usage?: { input?: number; output?: number }; + usage?: UsageLike; model?: string; provider?: string; stopReason?: string; @@ -153,7 +154,7 @@ function parsePiJson(raw: string): AgentParseResult { model: lastAssistant.model, provider: lastAssistant.provider, stopReason: lastAssistant.stopReason, - usage: lastAssistant.usage, + usage: normalizeUsage(lastAssistant.usage), } : undefined; diff --git a/src/agents/usage.ts b/src/agents/usage.ts new file mode 100644 index 000000000..bc33a942b --- /dev/null +++ b/src/agents/usage.ts @@ -0,0 +1,69 @@ +export type UsageLike = { + input?: number; + output?: number; + cacheRead?: number; + cacheWrite?: number; + total?: number; + // Some agents/logs emit alternate naming. + totalTokens?: number; + total_tokens?: number; + cache_read?: number; + cache_write?: number; +}; + +const asFiniteNumber = (value: unknown): number | undefined => { + if (typeof value !== "number") return undefined; + if (!Number.isFinite(value)) return undefined; + return value; +}; + +export function normalizeUsage(raw?: UsageLike | null): + | { + input?: number; + output?: number; + cacheRead?: number; + cacheWrite?: number; + total?: number; + } + | undefined { + if (!raw) return undefined; + + const input = asFiniteNumber(raw.input); + const output = asFiniteNumber(raw.output); + const cacheRead = asFiniteNumber(raw.cacheRead ?? raw.cache_read); + const cacheWrite = asFiniteNumber(raw.cacheWrite ?? raw.cache_write); + const total = asFiniteNumber( + raw.total ?? raw.totalTokens ?? raw.total_tokens, + ); + + if ( + input === undefined && + output === undefined && + cacheRead === undefined && + cacheWrite === undefined && + total === undefined + ) { + return undefined; + } + + return { + input, + output, + cacheRead, + cacheWrite, + total, + }; +} + +export function derivePromptTokens(usage?: { + input?: number; + cacheRead?: number; + cacheWrite?: number; +}): number | undefined { + if (!usage) return undefined; + const input = usage.input ?? 0; + const cacheRead = usage.cacheRead ?? 0; + const cacheWrite = usage.cacheWrite ?? 0; + const sum = input + cacheRead + cacheWrite; + return sum > 0 ? sum : undefined; +} diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index e0828b82a..6591bf79f 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -822,12 +822,15 @@ export async function getReplyFromConfig( if (entry) { const input = usage.input ?? 0; const output = usage.output ?? 0; - const total = usage.total ?? input + output; + const promptTokens = + input + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0); sessionEntry = { ...entry, - inputTokens: (entry.inputTokens ?? 0) + input, - outputTokens: (entry.outputTokens ?? 0) + output, - totalTokens: (entry.totalTokens ?? 0) + total, + inputTokens: input, + outputTokens: output, + // Track the effective prompt/context size (cached + uncached input). + totalTokens: + promptTokens > 0 ? promptTokens : (usage.total ?? input), model, contextTokens: contextTokens ?? entry.contextTokens, updatedAt: Date.now(), diff --git a/src/auto-reply/status.test.ts b/src/auto-reply/status.test.ts index 948ad0d4b..5d2dd9be3 100644 --- a/src/auto-reply/status.test.ts +++ b/src/auto-reply/status.test.ts @@ -1,5 +1,7 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; import { afterEach, describe, expect, it, vi } from "vitest"; - import { buildStatusMessage } from "./status.js"; afterEach(() => { @@ -60,4 +62,55 @@ describe("buildStatusMessage", () => { expect(text).toContain("Context:"); expect(text).toContain("Web: not linked"); }); + + it("prefers cached prompt tokens from the session log", () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "clawdis-status-")); + const storePath = path.join(dir, "sessions.json"); + const sessionId = "sess-1"; + const logPath = path.join(dir, `${sessionId}.jsonl`); + + fs.writeFileSync( + logPath, + [ + JSON.stringify({ + type: "message", + message: { + role: "assistant", + model: "claude-opus-4-5", + usage: { + input: 1, + output: 2, + cacheRead: 1000, + cacheWrite: 0, + totalTokens: 1003, + }, + }, + }), + ].join("\n"), + "utf-8", + ); + + const text = buildStatusMessage({ + reply: { + mode: "command", + command: ["echo", "{{Body}}"], + agent: { kind: "pi", model: "claude-opus-4-5", contextTokens: 32_000 }, + session: { scope: "per-sender" }, + }, + sessionEntry: { + sessionId, + updatedAt: 0, + totalTokens: 3, // would be wrong if cached prompt tokens exist + contextTokens: 32_000, + }, + sessionKey: "main", + sessionScope: "per-sender", + storePath, + webLinked: true, + }); + + expect(text).toContain("Context: 1.0k/32k"); + + fs.rmSync(dir, { recursive: true, force: true }); + }); }); diff --git a/src/auto-reply/status.ts b/src/auto-reply/status.ts index bc9d083c6..e8203fc87 100644 --- a/src/auto-reply/status.ts +++ b/src/auto-reply/status.ts @@ -5,6 +5,11 @@ import path from "node:path"; import { lookupContextTokens } from "../agents/context.js"; import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL } from "../agents/defaults.js"; +import { + derivePromptTokens, + normalizeUsage, + type UsageLike, +} from "../agents/usage.js"; import type { ClawdisConfig } from "../config/config.js"; import type { SessionEntry, SessionScope } from "../config/sessions.js"; import type { ThinkLevel, VerboseLevel } from "./thinking.js"; @@ -117,6 +122,7 @@ const readUsageFromSessionLog = ( | { input: number; output: number; + promptTokens: number; total: number; model?: string; } @@ -144,33 +150,38 @@ const readUsageFromSessionLog = ( const lines = fs.readFileSync(logPath, "utf-8").split(/\n+/); let input = 0; let output = 0; + let promptTokens = 0; let model: string | undefined; + let lastUsage: ReturnType | undefined; for (const line of lines) { if (!line.trim()) continue; try { const parsed = JSON.parse(line) as { message?: { - usage?: { input?: number; output?: number; total?: number }; + usage?: UsageLike; model?: string; }; - usage?: { input?: number; output?: number; total?: number }; + usage?: UsageLike; model?: string; }; - const usage = parsed.message?.usage ?? parsed.usage; - if (usage) { - input += usage.input ?? 0; - output += usage.output ?? 0; - } + const usageRaw = parsed.message?.usage ?? parsed.usage; + const usage = normalizeUsage(usageRaw); + if (usage) lastUsage = usage; model = parsed.message?.model ?? parsed.model ?? model; } catch { // ignore bad lines } } - const total = input + output; - if (total === 0) return undefined; - return { input, output, total, model }; + if (!lastUsage) return undefined; + input = lastUsage.input ?? 0; + output = lastUsage.output ?? 0; + promptTokens = + derivePromptTokens(lastUsage) ?? lastUsage.total ?? input + output; + const total = lastUsage.total ?? promptTokens + output; + if (promptTokens === 0 && total === 0) return undefined; + return { input, output, promptTokens, total, model }; } catch { return undefined; } @@ -190,15 +201,17 @@ export function buildStatusMessage(args: StatusArgs): string { entry?.totalTokens ?? (entry?.inputTokens ?? 0) + (entry?.outputTokens ?? 0); - // Fallback: derive usage from the session transcript if the store lacks it - if (!totalTokens || totalTokens === 0) { - const logUsage = readUsageFromSessionLog(entry?.sessionId, args.storePath); - if (logUsage) { - totalTokens = logUsage.total; - if (!model) model = logUsage.model ?? model; - if (!contextTokens && logUsage.model) { - contextTokens = lookupContextTokens(logUsage.model) ?? contextTokens; - } + // Prefer prompt-size tokens from the session transcript when it looks larger + // (cached prompt tokens are often missing from agent meta/store). + const logUsage = readUsageFromSessionLog(entry?.sessionId, args.storePath); + if (logUsage) { + const candidate = logUsage.promptTokens || logUsage.total; + if (!totalTokens || totalTokens === 0 || candidate > totalTokens) { + totalTokens = candidate; + } + if (!model) model = logUsage.model ?? model; + if (!contextTokens && logUsage.model) { + contextTokens = lookupContextTokens(logUsage.model) ?? contextTokens; } } const agentProbe = probeAgentCommand(args.reply?.command);