fix(status): account cached prompt tokens
This commit is contained in:
@@ -36,12 +36,17 @@ describe("pi agent helpers", () => {
|
|||||||
it("parses final assistant message and preserves usage meta", () => {
|
it("parses final assistant message and preserves usage meta", () => {
|
||||||
const stdout = [
|
const stdout = [
|
||||||
'{"type":"message_start","message":{"role":"assistant"}}',
|
'{"type":"message_start","message":{"role":"assistant"}}',
|
||||||
'{"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"hello world"}],"usage":{"input":10,"output":5},"model":"pi-1","provider":"inflection","stopReason":"end"}}',
|
'{"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"hello world"}],"usage":{"input":10,"output":5,"cacheRead":100,"cacheWrite":20,"totalTokens":135},"model":"pi-1","provider":"inflection","stopReason":"end"}}',
|
||||||
].join("\n");
|
].join("\n");
|
||||||
const parsed = piSpec.parseOutput(stdout);
|
const parsed = piSpec.parseOutput(stdout);
|
||||||
expect(parsed.texts?.[0]).toBe("hello world");
|
expect(parsed.texts?.[0]).toBe("hello world");
|
||||||
expect(parsed.meta?.provider).toBe("inflection");
|
expect(parsed.meta?.provider).toBe("inflection");
|
||||||
expect((parsed.meta?.usage as { output?: number })?.output).toBe(5);
|
expect((parsed.meta?.usage as { output?: number })?.output).toBe(5);
|
||||||
|
expect((parsed.meta?.usage as { cacheRead?: number })?.cacheRead).toBe(100);
|
||||||
|
expect((parsed.meta?.usage as { cacheWrite?: number })?.cacheWrite).toBe(
|
||||||
|
20,
|
||||||
|
);
|
||||||
|
expect((parsed.meta?.usage as { total?: number })?.total).toBe(135);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("piSpec carries tool names when present", () => {
|
it("piSpec carries tool names when present", () => {
|
||||||
|
|||||||
@@ -6,11 +6,12 @@ import type {
|
|||||||
AgentSpec,
|
AgentSpec,
|
||||||
AgentToolResult,
|
AgentToolResult,
|
||||||
} from "./types.js";
|
} from "./types.js";
|
||||||
|
import { normalizeUsage, type UsageLike } from "./usage.js";
|
||||||
|
|
||||||
type PiAssistantMessage = {
|
type PiAssistantMessage = {
|
||||||
role?: string;
|
role?: string;
|
||||||
content?: Array<{ type?: string; text?: string }>;
|
content?: Array<{ type?: string; text?: string }>;
|
||||||
usage?: { input?: number; output?: number };
|
usage?: UsageLike;
|
||||||
model?: string;
|
model?: string;
|
||||||
provider?: string;
|
provider?: string;
|
||||||
stopReason?: string;
|
stopReason?: string;
|
||||||
@@ -153,7 +154,7 @@ function parsePiJson(raw: string): AgentParseResult {
|
|||||||
model: lastAssistant.model,
|
model: lastAssistant.model,
|
||||||
provider: lastAssistant.provider,
|
provider: lastAssistant.provider,
|
||||||
stopReason: lastAssistant.stopReason,
|
stopReason: lastAssistant.stopReason,
|
||||||
usage: lastAssistant.usage,
|
usage: normalizeUsage(lastAssistant.usage),
|
||||||
}
|
}
|
||||||
: undefined;
|
: undefined;
|
||||||
|
|
||||||
|
|||||||
69
src/agents/usage.ts
Normal file
69
src/agents/usage.ts
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
export type UsageLike = {
|
||||||
|
input?: number;
|
||||||
|
output?: number;
|
||||||
|
cacheRead?: number;
|
||||||
|
cacheWrite?: number;
|
||||||
|
total?: number;
|
||||||
|
// Some agents/logs emit alternate naming.
|
||||||
|
totalTokens?: number;
|
||||||
|
total_tokens?: number;
|
||||||
|
cache_read?: number;
|
||||||
|
cache_write?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
const asFiniteNumber = (value: unknown): number | undefined => {
|
||||||
|
if (typeof value !== "number") return undefined;
|
||||||
|
if (!Number.isFinite(value)) return undefined;
|
||||||
|
return value;
|
||||||
|
};
|
||||||
|
|
||||||
|
export function normalizeUsage(raw?: UsageLike | null):
|
||||||
|
| {
|
||||||
|
input?: number;
|
||||||
|
output?: number;
|
||||||
|
cacheRead?: number;
|
||||||
|
cacheWrite?: number;
|
||||||
|
total?: number;
|
||||||
|
}
|
||||||
|
| undefined {
|
||||||
|
if (!raw) return undefined;
|
||||||
|
|
||||||
|
const input = asFiniteNumber(raw.input);
|
||||||
|
const output = asFiniteNumber(raw.output);
|
||||||
|
const cacheRead = asFiniteNumber(raw.cacheRead ?? raw.cache_read);
|
||||||
|
const cacheWrite = asFiniteNumber(raw.cacheWrite ?? raw.cache_write);
|
||||||
|
const total = asFiniteNumber(
|
||||||
|
raw.total ?? raw.totalTokens ?? raw.total_tokens,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (
|
||||||
|
input === undefined &&
|
||||||
|
output === undefined &&
|
||||||
|
cacheRead === undefined &&
|
||||||
|
cacheWrite === undefined &&
|
||||||
|
total === undefined
|
||||||
|
) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
input,
|
||||||
|
output,
|
||||||
|
cacheRead,
|
||||||
|
cacheWrite,
|
||||||
|
total,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function derivePromptTokens(usage?: {
|
||||||
|
input?: number;
|
||||||
|
cacheRead?: number;
|
||||||
|
cacheWrite?: number;
|
||||||
|
}): number | undefined {
|
||||||
|
if (!usage) return undefined;
|
||||||
|
const input = usage.input ?? 0;
|
||||||
|
const cacheRead = usage.cacheRead ?? 0;
|
||||||
|
const cacheWrite = usage.cacheWrite ?? 0;
|
||||||
|
const sum = input + cacheRead + cacheWrite;
|
||||||
|
return sum > 0 ? sum : undefined;
|
||||||
|
}
|
||||||
@@ -822,12 +822,15 @@ export async function getReplyFromConfig(
|
|||||||
if (entry) {
|
if (entry) {
|
||||||
const input = usage.input ?? 0;
|
const input = usage.input ?? 0;
|
||||||
const output = usage.output ?? 0;
|
const output = usage.output ?? 0;
|
||||||
const total = usage.total ?? input + output;
|
const promptTokens =
|
||||||
|
input + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
|
||||||
sessionEntry = {
|
sessionEntry = {
|
||||||
...entry,
|
...entry,
|
||||||
inputTokens: (entry.inputTokens ?? 0) + input,
|
inputTokens: input,
|
||||||
outputTokens: (entry.outputTokens ?? 0) + output,
|
outputTokens: output,
|
||||||
totalTokens: (entry.totalTokens ?? 0) + total,
|
// Track the effective prompt/context size (cached + uncached input).
|
||||||
|
totalTokens:
|
||||||
|
promptTokens > 0 ? promptTokens : (usage.total ?? input),
|
||||||
model,
|
model,
|
||||||
contextTokens: contextTokens ?? entry.contextTokens,
|
contextTokens: contextTokens ?? entry.contextTokens,
|
||||||
updatedAt: Date.now(),
|
updatedAt: Date.now(),
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
|
import fs from "node:fs";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
import { buildStatusMessage } from "./status.js";
|
import { buildStatusMessage } from "./status.js";
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
@@ -60,4 +62,55 @@ describe("buildStatusMessage", () => {
|
|||||||
expect(text).toContain("Context:");
|
expect(text).toContain("Context:");
|
||||||
expect(text).toContain("Web: not linked");
|
expect(text).toContain("Web: not linked");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("prefers cached prompt tokens from the session log", () => {
|
||||||
|
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "clawdis-status-"));
|
||||||
|
const storePath = path.join(dir, "sessions.json");
|
||||||
|
const sessionId = "sess-1";
|
||||||
|
const logPath = path.join(dir, `${sessionId}.jsonl`);
|
||||||
|
|
||||||
|
fs.writeFileSync(
|
||||||
|
logPath,
|
||||||
|
[
|
||||||
|
JSON.stringify({
|
||||||
|
type: "message",
|
||||||
|
message: {
|
||||||
|
role: "assistant",
|
||||||
|
model: "claude-opus-4-5",
|
||||||
|
usage: {
|
||||||
|
input: 1,
|
||||||
|
output: 2,
|
||||||
|
cacheRead: 1000,
|
||||||
|
cacheWrite: 0,
|
||||||
|
totalTokens: 1003,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
].join("\n"),
|
||||||
|
"utf-8",
|
||||||
|
);
|
||||||
|
|
||||||
|
const text = buildStatusMessage({
|
||||||
|
reply: {
|
||||||
|
mode: "command",
|
||||||
|
command: ["echo", "{{Body}}"],
|
||||||
|
agent: { kind: "pi", model: "claude-opus-4-5", contextTokens: 32_000 },
|
||||||
|
session: { scope: "per-sender" },
|
||||||
|
},
|
||||||
|
sessionEntry: {
|
||||||
|
sessionId,
|
||||||
|
updatedAt: 0,
|
||||||
|
totalTokens: 3, // would be wrong if cached prompt tokens exist
|
||||||
|
contextTokens: 32_000,
|
||||||
|
},
|
||||||
|
sessionKey: "main",
|
||||||
|
sessionScope: "per-sender",
|
||||||
|
storePath,
|
||||||
|
webLinked: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(text).toContain("Context: 1.0k/32k");
|
||||||
|
|
||||||
|
fs.rmSync(dir, { recursive: true, force: true });
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -5,6 +5,11 @@ import path from "node:path";
|
|||||||
|
|
||||||
import { lookupContextTokens } from "../agents/context.js";
|
import { lookupContextTokens } from "../agents/context.js";
|
||||||
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL } from "../agents/defaults.js";
|
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL } from "../agents/defaults.js";
|
||||||
|
import {
|
||||||
|
derivePromptTokens,
|
||||||
|
normalizeUsage,
|
||||||
|
type UsageLike,
|
||||||
|
} from "../agents/usage.js";
|
||||||
import type { ClawdisConfig } from "../config/config.js";
|
import type { ClawdisConfig } from "../config/config.js";
|
||||||
import type { SessionEntry, SessionScope } from "../config/sessions.js";
|
import type { SessionEntry, SessionScope } from "../config/sessions.js";
|
||||||
import type { ThinkLevel, VerboseLevel } from "./thinking.js";
|
import type { ThinkLevel, VerboseLevel } from "./thinking.js";
|
||||||
@@ -117,6 +122,7 @@ const readUsageFromSessionLog = (
|
|||||||
| {
|
| {
|
||||||
input: number;
|
input: number;
|
||||||
output: number;
|
output: number;
|
||||||
|
promptTokens: number;
|
||||||
total: number;
|
total: number;
|
||||||
model?: string;
|
model?: string;
|
||||||
}
|
}
|
||||||
@@ -144,33 +150,38 @@ const readUsageFromSessionLog = (
|
|||||||
const lines = fs.readFileSync(logPath, "utf-8").split(/\n+/);
|
const lines = fs.readFileSync(logPath, "utf-8").split(/\n+/);
|
||||||
let input = 0;
|
let input = 0;
|
||||||
let output = 0;
|
let output = 0;
|
||||||
|
let promptTokens = 0;
|
||||||
let model: string | undefined;
|
let model: string | undefined;
|
||||||
|
let lastUsage: ReturnType<typeof normalizeUsage> | undefined;
|
||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
if (!line.trim()) continue;
|
if (!line.trim()) continue;
|
||||||
try {
|
try {
|
||||||
const parsed = JSON.parse(line) as {
|
const parsed = JSON.parse(line) as {
|
||||||
message?: {
|
message?: {
|
||||||
usage?: { input?: number; output?: number; total?: number };
|
usage?: UsageLike;
|
||||||
model?: string;
|
model?: string;
|
||||||
};
|
};
|
||||||
usage?: { input?: number; output?: number; total?: number };
|
usage?: UsageLike;
|
||||||
model?: string;
|
model?: string;
|
||||||
};
|
};
|
||||||
const usage = parsed.message?.usage ?? parsed.usage;
|
const usageRaw = parsed.message?.usage ?? parsed.usage;
|
||||||
if (usage) {
|
const usage = normalizeUsage(usageRaw);
|
||||||
input += usage.input ?? 0;
|
if (usage) lastUsage = usage;
|
||||||
output += usage.output ?? 0;
|
|
||||||
}
|
|
||||||
model = parsed.message?.model ?? parsed.model ?? model;
|
model = parsed.message?.model ?? parsed.model ?? model;
|
||||||
} catch {
|
} catch {
|
||||||
// ignore bad lines
|
// ignore bad lines
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const total = input + output;
|
if (!lastUsage) return undefined;
|
||||||
if (total === 0) return undefined;
|
input = lastUsage.input ?? 0;
|
||||||
return { input, output, total, model };
|
output = lastUsage.output ?? 0;
|
||||||
|
promptTokens =
|
||||||
|
derivePromptTokens(lastUsage) ?? lastUsage.total ?? input + output;
|
||||||
|
const total = lastUsage.total ?? promptTokens + output;
|
||||||
|
if (promptTokens === 0 && total === 0) return undefined;
|
||||||
|
return { input, output, promptTokens, total, model };
|
||||||
} catch {
|
} catch {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
@@ -190,17 +201,19 @@ export function buildStatusMessage(args: StatusArgs): string {
|
|||||||
entry?.totalTokens ??
|
entry?.totalTokens ??
|
||||||
(entry?.inputTokens ?? 0) + (entry?.outputTokens ?? 0);
|
(entry?.inputTokens ?? 0) + (entry?.outputTokens ?? 0);
|
||||||
|
|
||||||
// Fallback: derive usage from the session transcript if the store lacks it
|
// Prefer prompt-size tokens from the session transcript when it looks larger
|
||||||
if (!totalTokens || totalTokens === 0) {
|
// (cached prompt tokens are often missing from agent meta/store).
|
||||||
const logUsage = readUsageFromSessionLog(entry?.sessionId, args.storePath);
|
const logUsage = readUsageFromSessionLog(entry?.sessionId, args.storePath);
|
||||||
if (logUsage) {
|
if (logUsage) {
|
||||||
totalTokens = logUsage.total;
|
const candidate = logUsage.promptTokens || logUsage.total;
|
||||||
|
if (!totalTokens || totalTokens === 0 || candidate > totalTokens) {
|
||||||
|
totalTokens = candidate;
|
||||||
|
}
|
||||||
if (!model) model = logUsage.model ?? model;
|
if (!model) model = logUsage.model ?? model;
|
||||||
if (!contextTokens && logUsage.model) {
|
if (!contextTokens && logUsage.model) {
|
||||||
contextTokens = lookupContextTokens(logUsage.model) ?? contextTokens;
|
contextTokens = lookupContextTokens(logUsage.model) ?? contextTokens;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
const agentProbe = probeAgentCommand(args.reply?.command);
|
const agentProbe = probeAgentCommand(args.reply?.command);
|
||||||
|
|
||||||
const thinkLevel =
|
const thinkLevel =
|
||||||
|
|||||||
Reference in New Issue
Block a user