fix(status): account cached prompt tokens
This commit is contained in:
@@ -36,12 +36,17 @@ describe("pi agent helpers", () => {
|
||||
it("parses final assistant message and preserves usage meta", () => {
|
||||
const stdout = [
|
||||
'{"type":"message_start","message":{"role":"assistant"}}',
|
||||
'{"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"hello world"}],"usage":{"input":10,"output":5},"model":"pi-1","provider":"inflection","stopReason":"end"}}',
|
||||
'{"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"hello world"}],"usage":{"input":10,"output":5,"cacheRead":100,"cacheWrite":20,"totalTokens":135},"model":"pi-1","provider":"inflection","stopReason":"end"}}',
|
||||
].join("\n");
|
||||
const parsed = piSpec.parseOutput(stdout);
|
||||
expect(parsed.texts?.[0]).toBe("hello world");
|
||||
expect(parsed.meta?.provider).toBe("inflection");
|
||||
expect((parsed.meta?.usage as { output?: number })?.output).toBe(5);
|
||||
expect((parsed.meta?.usage as { cacheRead?: number })?.cacheRead).toBe(100);
|
||||
expect((parsed.meta?.usage as { cacheWrite?: number })?.cacheWrite).toBe(
|
||||
20,
|
||||
);
|
||||
expect((parsed.meta?.usage as { total?: number })?.total).toBe(135);
|
||||
});
|
||||
|
||||
it("piSpec carries tool names when present", () => {
|
||||
|
||||
@@ -6,11 +6,12 @@ import type {
|
||||
AgentSpec,
|
||||
AgentToolResult,
|
||||
} from "./types.js";
|
||||
import { normalizeUsage, type UsageLike } from "./usage.js";
|
||||
|
||||
type PiAssistantMessage = {
|
||||
role?: string;
|
||||
content?: Array<{ type?: string; text?: string }>;
|
||||
usage?: { input?: number; output?: number };
|
||||
usage?: UsageLike;
|
||||
model?: string;
|
||||
provider?: string;
|
||||
stopReason?: string;
|
||||
@@ -153,7 +154,7 @@ function parsePiJson(raw: string): AgentParseResult {
|
||||
model: lastAssistant.model,
|
||||
provider: lastAssistant.provider,
|
||||
stopReason: lastAssistant.stopReason,
|
||||
usage: lastAssistant.usage,
|
||||
usage: normalizeUsage(lastAssistant.usage),
|
||||
}
|
||||
: undefined;
|
||||
|
||||
|
||||
69
src/agents/usage.ts
Normal file
69
src/agents/usage.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
export type UsageLike = {
|
||||
input?: number;
|
||||
output?: number;
|
||||
cacheRead?: number;
|
||||
cacheWrite?: number;
|
||||
total?: number;
|
||||
// Some agents/logs emit alternate naming.
|
||||
totalTokens?: number;
|
||||
total_tokens?: number;
|
||||
cache_read?: number;
|
||||
cache_write?: number;
|
||||
};
|
||||
|
||||
const asFiniteNumber = (value: unknown): number | undefined => {
|
||||
if (typeof value !== "number") return undefined;
|
||||
if (!Number.isFinite(value)) return undefined;
|
||||
return value;
|
||||
};
|
||||
|
||||
export function normalizeUsage(raw?: UsageLike | null):
|
||||
| {
|
||||
input?: number;
|
||||
output?: number;
|
||||
cacheRead?: number;
|
||||
cacheWrite?: number;
|
||||
total?: number;
|
||||
}
|
||||
| undefined {
|
||||
if (!raw) return undefined;
|
||||
|
||||
const input = asFiniteNumber(raw.input);
|
||||
const output = asFiniteNumber(raw.output);
|
||||
const cacheRead = asFiniteNumber(raw.cacheRead ?? raw.cache_read);
|
||||
const cacheWrite = asFiniteNumber(raw.cacheWrite ?? raw.cache_write);
|
||||
const total = asFiniteNumber(
|
||||
raw.total ?? raw.totalTokens ?? raw.total_tokens,
|
||||
);
|
||||
|
||||
if (
|
||||
input === undefined &&
|
||||
output === undefined &&
|
||||
cacheRead === undefined &&
|
||||
cacheWrite === undefined &&
|
||||
total === undefined
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return {
|
||||
input,
|
||||
output,
|
||||
cacheRead,
|
||||
cacheWrite,
|
||||
total,
|
||||
};
|
||||
}
|
||||
|
||||
export function derivePromptTokens(usage?: {
|
||||
input?: number;
|
||||
cacheRead?: number;
|
||||
cacheWrite?: number;
|
||||
}): number | undefined {
|
||||
if (!usage) return undefined;
|
||||
const input = usage.input ?? 0;
|
||||
const cacheRead = usage.cacheRead ?? 0;
|
||||
const cacheWrite = usage.cacheWrite ?? 0;
|
||||
const sum = input + cacheRead + cacheWrite;
|
||||
return sum > 0 ? sum : undefined;
|
||||
}
|
||||
@@ -822,12 +822,15 @@ export async function getReplyFromConfig(
|
||||
if (entry) {
|
||||
const input = usage.input ?? 0;
|
||||
const output = usage.output ?? 0;
|
||||
const total = usage.total ?? input + output;
|
||||
const promptTokens =
|
||||
input + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
|
||||
sessionEntry = {
|
||||
...entry,
|
||||
inputTokens: (entry.inputTokens ?? 0) + input,
|
||||
outputTokens: (entry.outputTokens ?? 0) + output,
|
||||
totalTokens: (entry.totalTokens ?? 0) + total,
|
||||
inputTokens: input,
|
||||
outputTokens: output,
|
||||
// Track the effective prompt/context size (cached + uncached input).
|
||||
totalTokens:
|
||||
promptTokens > 0 ? promptTokens : (usage.total ?? input),
|
||||
model,
|
||||
contextTokens: contextTokens ?? entry.contextTokens,
|
||||
updatedAt: Date.now(),
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
import { buildStatusMessage } from "./status.js";
|
||||
|
||||
afterEach(() => {
|
||||
@@ -60,4 +62,55 @@ describe("buildStatusMessage", () => {
|
||||
expect(text).toContain("Context:");
|
||||
expect(text).toContain("Web: not linked");
|
||||
});
|
||||
|
||||
it("prefers cached prompt tokens from the session log", () => {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "clawdis-status-"));
|
||||
const storePath = path.join(dir, "sessions.json");
|
||||
const sessionId = "sess-1";
|
||||
const logPath = path.join(dir, `${sessionId}.jsonl`);
|
||||
|
||||
fs.writeFileSync(
|
||||
logPath,
|
||||
[
|
||||
JSON.stringify({
|
||||
type: "message",
|
||||
message: {
|
||||
role: "assistant",
|
||||
model: "claude-opus-4-5",
|
||||
usage: {
|
||||
input: 1,
|
||||
output: 2,
|
||||
cacheRead: 1000,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 1003,
|
||||
},
|
||||
},
|
||||
}),
|
||||
].join("\n"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
const text = buildStatusMessage({
|
||||
reply: {
|
||||
mode: "command",
|
||||
command: ["echo", "{{Body}}"],
|
||||
agent: { kind: "pi", model: "claude-opus-4-5", contextTokens: 32_000 },
|
||||
session: { scope: "per-sender" },
|
||||
},
|
||||
sessionEntry: {
|
||||
sessionId,
|
||||
updatedAt: 0,
|
||||
totalTokens: 3, // would be wrong if cached prompt tokens exist
|
||||
contextTokens: 32_000,
|
||||
},
|
||||
sessionKey: "main",
|
||||
sessionScope: "per-sender",
|
||||
storePath,
|
||||
webLinked: true,
|
||||
});
|
||||
|
||||
expect(text).toContain("Context: 1.0k/32k");
|
||||
|
||||
fs.rmSync(dir, { recursive: true, force: true });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,6 +5,11 @@ import path from "node:path";
|
||||
|
||||
import { lookupContextTokens } from "../agents/context.js";
|
||||
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL } from "../agents/defaults.js";
|
||||
import {
|
||||
derivePromptTokens,
|
||||
normalizeUsage,
|
||||
type UsageLike,
|
||||
} from "../agents/usage.js";
|
||||
import type { ClawdisConfig } from "../config/config.js";
|
||||
import type { SessionEntry, SessionScope } from "../config/sessions.js";
|
||||
import type { ThinkLevel, VerboseLevel } from "./thinking.js";
|
||||
@@ -117,6 +122,7 @@ const readUsageFromSessionLog = (
|
||||
| {
|
||||
input: number;
|
||||
output: number;
|
||||
promptTokens: number;
|
||||
total: number;
|
||||
model?: string;
|
||||
}
|
||||
@@ -144,33 +150,38 @@ const readUsageFromSessionLog = (
|
||||
const lines = fs.readFileSync(logPath, "utf-8").split(/\n+/);
|
||||
let input = 0;
|
||||
let output = 0;
|
||||
let promptTokens = 0;
|
||||
let model: string | undefined;
|
||||
let lastUsage: ReturnType<typeof normalizeUsage> | undefined;
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const parsed = JSON.parse(line) as {
|
||||
message?: {
|
||||
usage?: { input?: number; output?: number; total?: number };
|
||||
usage?: UsageLike;
|
||||
model?: string;
|
||||
};
|
||||
usage?: { input?: number; output?: number; total?: number };
|
||||
usage?: UsageLike;
|
||||
model?: string;
|
||||
};
|
||||
const usage = parsed.message?.usage ?? parsed.usage;
|
||||
if (usage) {
|
||||
input += usage.input ?? 0;
|
||||
output += usage.output ?? 0;
|
||||
}
|
||||
const usageRaw = parsed.message?.usage ?? parsed.usage;
|
||||
const usage = normalizeUsage(usageRaw);
|
||||
if (usage) lastUsage = usage;
|
||||
model = parsed.message?.model ?? parsed.model ?? model;
|
||||
} catch {
|
||||
// ignore bad lines
|
||||
}
|
||||
}
|
||||
|
||||
const total = input + output;
|
||||
if (total === 0) return undefined;
|
||||
return { input, output, total, model };
|
||||
if (!lastUsage) return undefined;
|
||||
input = lastUsage.input ?? 0;
|
||||
output = lastUsage.output ?? 0;
|
||||
promptTokens =
|
||||
derivePromptTokens(lastUsage) ?? lastUsage.total ?? input + output;
|
||||
const total = lastUsage.total ?? promptTokens + output;
|
||||
if (promptTokens === 0 && total === 0) return undefined;
|
||||
return { input, output, promptTokens, total, model };
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
@@ -190,15 +201,17 @@ export function buildStatusMessage(args: StatusArgs): string {
|
||||
entry?.totalTokens ??
|
||||
(entry?.inputTokens ?? 0) + (entry?.outputTokens ?? 0);
|
||||
|
||||
// Fallback: derive usage from the session transcript if the store lacks it
|
||||
if (!totalTokens || totalTokens === 0) {
|
||||
const logUsage = readUsageFromSessionLog(entry?.sessionId, args.storePath);
|
||||
if (logUsage) {
|
||||
totalTokens = logUsage.total;
|
||||
if (!model) model = logUsage.model ?? model;
|
||||
if (!contextTokens && logUsage.model) {
|
||||
contextTokens = lookupContextTokens(logUsage.model) ?? contextTokens;
|
||||
}
|
||||
// Prefer prompt-size tokens from the session transcript when it looks larger
|
||||
// (cached prompt tokens are often missing from agent meta/store).
|
||||
const logUsage = readUsageFromSessionLog(entry?.sessionId, args.storePath);
|
||||
if (logUsage) {
|
||||
const candidate = logUsage.promptTokens || logUsage.total;
|
||||
if (!totalTokens || totalTokens === 0 || candidate > totalTokens) {
|
||||
totalTokens = candidate;
|
||||
}
|
||||
if (!model) model = logUsage.model ?? model;
|
||||
if (!contextTokens && logUsage.model) {
|
||||
contextTokens = lookupContextTokens(logUsage.model) ?? contextTokens;
|
||||
}
|
||||
}
|
||||
const agentProbe = probeAgentCommand(args.reply?.command);
|
||||
|
||||
Reference in New Issue
Block a user