feat: add diagnostics events and otel exporter

This commit is contained in:
Peter Steinberger
2026-01-20 18:56:10 +00:00
parent b1f086b536
commit 5c4079f66c
14 changed files with 1030 additions and 13 deletions

View File

@@ -18,7 +18,7 @@ import {
import type { TypingMode } from "../../config/types.js";
import { logVerbose } from "../../globals.js";
import { defaultRuntime } from "../../runtime.js";
import { resolveModelCostConfig } from "../../utils/usage-format.js";
import { estimateUsageCost, resolveModelCostConfig } from "../../utils/usage-format.js";
import type { OriginatingChannelType, TemplateContext } from "../templating.js";
import { resolveResponseUsageMode, type VerboseLevel } from "../thinking.js";
import type { GetReplyOptions, ReplyPayload } from "../types.js";
@@ -41,6 +41,7 @@ import { createReplyToModeFilterForChannel, resolveReplyToMode } from "./reply-t
import { incrementCompactionCount } from "./session-updates.js";
import type { TypingController } from "./typing.js";
import { createTypingSignaler } from "./typing-mode.js";
import { emitDiagnosticEvent, isDiagnosticsEnabled } from "../../infra/diagnostic-events.js";
const BLOCK_REPLY_SEND_TIMEOUT_MS = 15_000;
@@ -296,6 +297,7 @@ export async function runReplyAgent(params: {
cleanupTranscripts: true,
});
try {
const runStartedAt = Date.now();
const runOutcome = await runAgentTurnWithFallback({
commandBody,
followupRun,
@@ -403,6 +405,43 @@ export async function runReplyAgent(params: {
activeSessionEntry?.contextTokens ??
DEFAULT_CONTEXT_TOKENS;
if (isDiagnosticsEnabled(cfg) && hasNonzeroUsage(usage)) {
const input = usage.input ?? 0;
const output = usage.output ?? 0;
const cacheRead = usage.cacheRead ?? 0;
const cacheWrite = usage.cacheWrite ?? 0;
const promptTokens = input + cacheRead + cacheWrite;
const totalTokens = usage.total ?? promptTokens + output;
const costConfig = resolveModelCostConfig({
provider: providerUsed,
model: modelUsed,
config: cfg,
});
const costUsd = estimateUsageCost({ usage, cost: costConfig });
emitDiagnosticEvent({
type: "model.usage",
sessionKey,
sessionId: followupRun.run.sessionId,
channel: replyToChannel,
provider: providerUsed,
model: modelUsed,
usage: {
input,
output,
cacheRead,
cacheWrite,
promptTokens,
total: totalTokens,
},
context: {
limit: contextTokensUsed,
used: totalTokens,
},
costUsd,
durationMs: Date.now() - runStartedAt,
});
}
if (storePath && sessionKey) {
if (hasNonzeroUsage(usage)) {
try {

View File

@@ -47,6 +47,7 @@ export type ChannelUiMetadata = {
const GROUP_LABELS: Record<string, string> = {
wizard: "Wizard",
update: "Update",
diagnostics: "Diagnostics",
logging: "Logging",
gateway: "Gateway",
agents: "Agents",
@@ -73,6 +74,7 @@ const GROUP_LABELS: Record<string, string> = {
const GROUP_ORDER: Record<string, number> = {
wizard: 20,
update: 25,
diagnostics: 27,
gateway: 30,
agents: 40,
tools: 50,
@@ -101,6 +103,17 @@ const FIELD_LABELS: Record<string, string> = {
"meta.lastTouchedAt": "Config Last Touched At",
"update.channel": "Update Channel",
"update.checkOnStart": "Update Check on Start",
"diagnostics.enabled": "Diagnostics Enabled",
"diagnostics.otel.enabled": "OpenTelemetry Enabled",
"diagnostics.otel.endpoint": "OpenTelemetry Endpoint",
"diagnostics.otel.protocol": "OpenTelemetry Protocol",
"diagnostics.otel.headers": "OpenTelemetry Headers",
"diagnostics.otel.serviceName": "OpenTelemetry Service Name",
"diagnostics.otel.traces": "OpenTelemetry Traces Enabled",
"diagnostics.otel.metrics": "OpenTelemetry Metrics Enabled",
"diagnostics.otel.logs": "OpenTelemetry Logs Enabled",
"diagnostics.otel.sampleRate": "OpenTelemetry Trace Sample Rate",
"diagnostics.otel.flushIntervalMs": "OpenTelemetry Flush Interval (ms)",
"gateway.remote.url": "Remote Gateway URL",
"gateway.remote.sshTarget": "Remote Gateway SSH Target",
"gateway.remote.sshIdentity": "Remote Gateway SSH Identity",

View File

@@ -102,6 +102,26 @@ export type LoggingConfig = {
redactPatterns?: string[];
};
export type DiagnosticsOtelConfig = {
enabled?: boolean;
endpoint?: string;
protocol?: "http/protobuf" | "grpc";
headers?: Record<string, string>;
serviceName?: string;
traces?: boolean;
metrics?: boolean;
logs?: boolean;
/** Trace sample rate (0.0 - 1.0). */
sampleRate?: number;
/** Metric export interval (ms). */
flushIntervalMs?: number;
};
export type DiagnosticsConfig = {
enabled?: boolean;
otel?: DiagnosticsOtelConfig;
};
export type WebReconnectConfig = {
initialMs?: number;
maxMs?: number;

View File

@@ -1,6 +1,6 @@
import type { AgentBinding, AgentsConfig } from "./types.agents.js";
import type { AuthConfig } from "./types.auth.js";
import type { LoggingConfig, SessionConfig, WebConfig } from "./types.base.js";
import type { DiagnosticsConfig, LoggingConfig, SessionConfig, WebConfig } from "./types.base.js";
import type { BrowserConfig } from "./types.browser.js";
import type { ChannelsConfig } from "./types.channels.js";
import type { CronConfig } from "./types.cron.js";
@@ -53,6 +53,7 @@ export type ClawdbotConfig = {
lastRunCommand?: string;
lastRunMode?: "local" | "remote";
};
diagnostics?: DiagnosticsConfig;
logging?: LoggingConfig;
update?: {
/** Update channel for git + npm installs ("stable", "beta", or "dev"). */

View File

@@ -38,6 +38,27 @@ export const ClawdbotSchema = z
})
.strict()
.optional(),
diagnostics: z
.object({
enabled: z.boolean().optional(),
otel: z
.object({
enabled: z.boolean().optional(),
endpoint: z.string().optional(),
protocol: z.union([z.literal("http/protobuf"), z.literal("grpc")]).optional(),
headers: z.record(z.string(), z.string()).optional(),
serviceName: z.string().optional(),
traces: z.boolean().optional(),
metrics: z.boolean().optional(),
logs: z.boolean().optional(),
sampleRate: z.number().min(0).max(1).optional(),
flushIntervalMs: z.number().int().nonnegative().optional(),
})
.strict()
.optional(),
})
.strict()
.optional(),
logging: z
.object({
level: z

View File

@@ -0,0 +1,28 @@
import { describe, expect, test } from "vitest";
import {
emitDiagnosticEvent,
onDiagnosticEvent,
resetDiagnosticEventsForTest,
} from "./diagnostic-events.js";
describe("diagnostic-events", () => {
test("emits monotonic seq", async () => {
resetDiagnosticEventsForTest();
const seqs: number[] = [];
const stop = onDiagnosticEvent((evt) => seqs.push(evt.seq));
emitDiagnosticEvent({
type: "model.usage",
usage: { total: 1 },
});
emitDiagnosticEvent({
type: "model.usage",
usage: { total: 2 },
});
stop();
expect(seqs).toEqual([1, 2]);
});
});

View File

@@ -0,0 +1,60 @@
import type { ClawdbotConfig } from "../config/config.js";
export type DiagnosticUsageEvent = {
type: "model.usage";
ts: number;
seq: number;
sessionKey?: string;
sessionId?: string;
channel?: string;
provider?: string;
model?: string;
usage: {
input?: number;
output?: number;
cacheRead?: number;
cacheWrite?: number;
promptTokens?: number;
total?: number;
};
context?: {
limit?: number;
used?: number;
};
costUsd?: number;
durationMs?: number;
};
export type DiagnosticEventPayload = DiagnosticUsageEvent;
let seq = 0;
const listeners = new Set<(evt: DiagnosticEventPayload) => void>();
export function isDiagnosticsEnabled(config?: ClawdbotConfig): boolean {
return config?.diagnostics?.enabled === true;
}
export function emitDiagnosticEvent(event: Omit<DiagnosticEventPayload, "seq" | "ts">) {
const enriched: DiagnosticEventPayload = {
...event,
seq: (seq += 1),
ts: Date.now(),
};
for (const listener of listeners) {
try {
listener(enriched);
} catch {
// Ignore listener failures.
}
}
}
export function onDiagnosticEvent(listener: (evt: DiagnosticEventPayload) => void): () => void {
listeners.add(listener);
return () => listeners.delete(listener);
}
export function resetDiagnosticEventsForTest(): void {
seq = 0;
listeners.clear();
}

View File

@@ -58,7 +58,11 @@ export type {
ChannelToolSend,
} from "../channels/plugins/types.js";
export type { ChannelConfigSchema, ChannelPlugin } from "../channels/plugins/types.plugin.js";
export type { ClawdbotPluginApi } from "../plugins/types.js";
export type {
ClawdbotPluginApi,
ClawdbotPluginService,
ClawdbotPluginServiceContext,
} from "../plugins/types.js";
export type { PluginRuntime } from "../plugins/runtime/types.js";
export { emptyPluginConfigSchema } from "../plugins/config-schema.js";
export type { ClawdbotConfig } from "../config/config.js";
@@ -178,6 +182,12 @@ export { formatDocsLink } from "../terminal/links.js";
export type { HookEntry } from "../hooks/types.js";
export { normalizeE164 } from "../utils.js";
export { missingTargetError } from "../infra/outbound/target-errors.js";
export {
emitDiagnosticEvent,
isDiagnosticsEnabled,
onDiagnosticEvent,
} from "../infra/diagnostic-events.js";
export type { DiagnosticEventPayload, DiagnosticUsageEvent } from "../infra/diagnostic-events.js";
// Channel: Discord
export {