feat: add usage cost reporting

This commit is contained in:
Peter Steinberger
2026-01-09 02:21:17 +00:00
parent dfbee10377
commit 151523f47b
29 changed files with 696 additions and 184 deletions

View File

@@ -2,12 +2,13 @@ import crypto from "node:crypto";
import fs from "node:fs";
import { lookupContextTokens } from "../../agents/context.js";
import { DEFAULT_CONTEXT_TOKENS } from "../../agents/defaults.js";
import { resolveModelAuthMode } from "../../agents/model-auth.js";
import { runWithModelFallback } from "../../agents/model-fallback.js";
import {
queueEmbeddedPiMessage,
runEmbeddedPiAgent,
} from "../../agents/pi-embedded.js";
import { hasNonzeroUsage } from "../../agents/usage.js";
import { hasNonzeroUsage, type NormalizedUsage } from "../../agents/usage.js";
import {
loadSessionStore,
resolveSessionTranscriptPath,
@@ -18,6 +19,12 @@ import type { TypingMode } from "../../config/types.js";
import { logVerbose } from "../../globals.js";
import { registerAgentRunContext } from "../../infra/agent-events.js";
import { defaultRuntime } from "../../runtime.js";
import {
estimateUsageCost,
formatTokenCount,
formatUsd,
resolveModelCostConfig,
} from "../../utils/usage-format.js";
import { stripHeartbeatToken } from "../heartbeat.js";
import type { OriginatingChannelType, TemplateContext } from "../templating.js";
import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
@@ -62,6 +69,65 @@ const formatBunFetchSocketError = (message: string) => {
].join("\n");
};
const formatResponseUsageLine = (params: {
usage?: NormalizedUsage;
showCost: boolean;
costConfig?: {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
};
}): string | null => {
const usage = params.usage;
if (!usage) return null;
const input = usage.input;
const output = usage.output;
if (typeof input !== "number" && typeof output !== "number") return null;
const inputLabel = typeof input === "number" ? formatTokenCount(input) : "?";
const outputLabel =
typeof output === "number" ? formatTokenCount(output) : "?";
const cost =
params.showCost && typeof input === "number" && typeof output === "number"
? estimateUsageCost({
usage: {
input,
output,
cacheRead: usage.cacheRead,
cacheWrite: usage.cacheWrite,
},
cost: params.costConfig,
})
: undefined;
const costLabel = params.showCost ? formatUsd(cost) : undefined;
const suffix = costLabel ? ` · est ${costLabel}` : "";
return `Usage: ${inputLabel} in / ${outputLabel} out${suffix}`;
};
const appendUsageLine = (
payloads: ReplyPayload[],
line: string,
): ReplyPayload[] => {
let index = -1;
for (let i = payloads.length - 1; i >= 0; i -= 1) {
if (payloads[i]?.text) {
index = i;
break;
}
}
if (index === -1) return [...payloads, { text: line }];
const existing = payloads[index];
const existingText = existing.text ?? "";
const separator = existingText.endsWith("\n") ? "" : "\n";
const next = {
...existing,
text: `${existingText}${separator}${line}`,
};
const updated = payloads.slice();
updated[index] = next;
return updated;
};
const withTimeout = async <T>(
promise: Promise<T>,
timeoutMs: number,
@@ -191,6 +257,7 @@ export async function runReplyAgent(params: {
replyToChannel,
);
const applyReplyToMode = createReplyToModeFilter(replyToMode);
const cfg = followupRun.run.config;
if (shouldSteer && isStreaming) {
const steered = queueEmbeddedPiMessage(
@@ -242,6 +309,7 @@ export async function runReplyAgent(params: {
let didLogHeartbeatStrip = false;
let autoCompactionCompleted = false;
let responseUsageLine: string | undefined;
try {
const runId = crypto.randomUUID();
if (sessionKey) {
@@ -641,20 +709,20 @@ export async function runReplyAgent(params: {
await typingSignals.signalRunStart();
}
if (sessionStore && sessionKey) {
const usage = runResult.meta.agentMeta?.usage;
const modelUsed =
runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
const providerUsed =
runResult.meta.agentMeta?.provider ??
fallbackProvider ??
followupRun.run.provider;
const contextTokensUsed =
agentCfgContextTokens ??
lookupContextTokens(modelUsed) ??
sessionEntry?.contextTokens ??
DEFAULT_CONTEXT_TOKENS;
const usage = runResult.meta.agentMeta?.usage;
const modelUsed =
runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
const providerUsed =
runResult.meta.agentMeta?.provider ??
fallbackProvider ??
followupRun.run.provider;
const contextTokensUsed =
agentCfgContextTokens ??
lookupContextTokens(modelUsed) ??
sessionEntry?.contextTokens ??
DEFAULT_CONTEXT_TOKENS;
if (sessionStore && sessionKey) {
if (hasNonzeroUsage(usage)) {
const entry = sessionEntry ?? sessionStore[sessionKey];
if (entry) {
@@ -694,6 +762,29 @@ export async function runReplyAgent(params: {
}
}
const responseUsageEnabled =
(sessionEntry?.responseUsage ??
(sessionKey
? sessionStore?.[sessionKey]?.responseUsage
: undefined)) === "on";
if (responseUsageEnabled && hasNonzeroUsage(usage)) {
const authMode = resolveModelAuthMode(providerUsed, cfg);
const showCost = authMode === "api-key";
const costConfig = showCost
? resolveModelCostConfig({
provider: providerUsed,
model: modelUsed,
config: cfg,
})
: undefined;
const formatted = formatResponseUsageLine({
usage,
showCost,
costConfig,
});
if (formatted) responseUsageLine = formatted;
}
// If verbose is enabled and this is a new session, prepend a session hint.
let finalPayloads = replyPayloads;
if (autoCompactionCompleted) {
@@ -717,6 +808,9 @@ export async function runReplyAgent(params: {
...finalPayloads,
];
}
if (responseUsageLine) {
finalPayloads = appendUsageLine(finalPayloads, responseUsageLine);
}
return finalizeWithFollowup(
finalPayloads.length === 1 ? finalPayloads[0] : finalPayloads,

View File

@@ -1,11 +1,5 @@
import {
ensureAuthProfileStore,
listProfilesForProvider,
} from "../../agents/auth-profiles.js";
import {
getCustomProviderApiKey,
resolveEnvApiKey,
} from "../../agents/model-auth.js";
import crypto from "node:crypto";
import { resolveModelAuthMode } from "../../agents/model-auth.js";
import { normalizeProviderId } from "../../agents/model-selection.js";
import {
abortEmbeddedPiRun,
@@ -55,8 +49,10 @@ import type {
ElevatedLevel,
ReasoningLevel,
ThinkLevel,
UsageDisplayLevel,
VerboseLevel,
} from "../thinking.js";
import { normalizeUsageDisplay } from "../thinking.js";
import type { ReplyPayload } from "../types.js";
import { isAbortTrigger, setAbortMemory } from "./abort.js";
import type { InlineDirectives } from "./directive-handling.js";
@@ -109,36 +105,6 @@ export type CommandContext = {
to?: string;
};
function resolveModelAuthLabel(
provider?: string,
cfg?: ClawdbotConfig,
): string | undefined {
const resolved = provider?.trim();
if (!resolved) return undefined;
const store = ensureAuthProfileStore();
const profiles = listProfilesForProvider(store, resolved);
if (profiles.length > 0) {
const modes = new Set(
profiles
.map((id) => store.profiles[id]?.type)
.filter((mode): mode is "api_key" | "oauth" => Boolean(mode)),
);
if (modes.has("oauth") && modes.has("api_key")) return "mixed";
if (modes.has("oauth")) return "oauth";
if (modes.has("api_key")) return "api-key";
}
const envKey = resolveEnvApiKey(resolved);
if (envKey?.apiKey) {
return envKey.source.includes("OAUTH_TOKEN") ? "oauth" : "api-key";
}
if (getCustomProviderApiKey(cfg, resolved)) return "api-key";
return "unknown";
}
function extractCompactInstructions(params: {
rawBody?: string;
ctx: MsgContext;
@@ -468,6 +434,7 @@ export async function handleCommands(params: {
defaultGroupActivation())
: undefined;
const statusText = buildStatusMessage({
config: cfg,
agent: {
...cfg.agent,
model: {
@@ -488,7 +455,7 @@ export async function handleCommands(params: {
resolvedVerbose: resolvedVerboseLevel,
resolvedReasoning: resolvedReasoningLevel,
resolvedElevated: resolvedElevatedLevel,
modelAuth: resolveModelAuthLabel(provider, cfg),
modelAuth: resolveModelAuthMode(provider, cfg),
usageLine: usageLine ?? undefined,
queue: {
mode: queueSettings.mode,
@@ -503,6 +470,51 @@ export async function handleCommands(params: {
return { shouldContinue: false, reply: { text: statusText } };
}
const costRequested =
command.commandBodyNormalized === "/cost" ||
command.commandBodyNormalized.startsWith("/cost ");
if (allowTextCommands && costRequested) {
if (!command.isAuthorizedSender) {
logVerbose(
`Ignoring /cost from unauthorized sender: ${command.senderE164 || "<unknown>"}`,
);
return { shouldContinue: false };
}
const rawArgs = command.commandBodyNormalized.slice("/cost".length).trim();
const normalized =
rawArgs.length > 0 ? normalizeUsageDisplay(rawArgs) : undefined;
if (rawArgs.length > 0 && !normalized) {
return {
shouldContinue: false,
reply: { text: "⚙️ Usage: /cost on|off" },
};
}
const current: UsageDisplayLevel =
sessionEntry?.responseUsage === "on" ? "on" : "off";
const next = normalized ?? (current === "on" ? "off" : "on");
if (sessionStore && sessionKey) {
const entry = sessionEntry ??
sessionStore[sessionKey] ?? {
sessionId: crypto.randomUUID(),
updatedAt: Date.now(),
};
if (next === "off") delete entry.responseUsage;
else entry.responseUsage = next;
entry.updatedAt = Date.now();
sessionStore[sessionKey] = entry;
if (storePath) {
await saveSessionStore(storePath, sessionStore);
}
}
return {
shouldContinue: false,
reply: {
text:
next === "on" ? "⚙️ Usage line enabled." : "⚙️ Usage line disabled.",
},
};
}
const stopRequested = command.commandBodyNormalized === "/stop";
if (allowTextCommands && stopRequested) {
if (!command.isAuthorizedSender) {

View File

@@ -194,6 +194,7 @@ export async function initSessionState(params: {
// Persist previously stored thinking/verbose levels when present.
thinkingLevel: persistedThinking ?? baseEntry?.thinkingLevel,
verboseLevel: persistedVerbose ?? baseEntry?.verboseLevel,
responseUsage: baseEntry?.responseUsage,
modelOverride: persistedModelOverride ?? baseEntry?.modelOverride,
providerOverride: persistedProviderOverride ?? baseEntry?.providerOverride,
sendPolicy: baseEntry?.sendPolicy,