feat: add diagnostics events and otel exporter
This commit is contained in:
@@ -18,7 +18,7 @@ import {
|
||||
import type { TypingMode } from "../../config/types.js";
|
||||
import { logVerbose } from "../../globals.js";
|
||||
import { defaultRuntime } from "../../runtime.js";
|
||||
import { resolveModelCostConfig } from "../../utils/usage-format.js";
|
||||
import { estimateUsageCost, resolveModelCostConfig } from "../../utils/usage-format.js";
|
||||
import type { OriginatingChannelType, TemplateContext } from "../templating.js";
|
||||
import { resolveResponseUsageMode, type VerboseLevel } from "../thinking.js";
|
||||
import type { GetReplyOptions, ReplyPayload } from "../types.js";
|
||||
@@ -41,6 +41,7 @@ import { createReplyToModeFilterForChannel, resolveReplyToMode } from "./reply-t
|
||||
import { incrementCompactionCount } from "./session-updates.js";
|
||||
import type { TypingController } from "./typing.js";
|
||||
import { createTypingSignaler } from "./typing-mode.js";
|
||||
import { emitDiagnosticEvent, isDiagnosticsEnabled } from "../../infra/diagnostic-events.js";
|
||||
|
||||
const BLOCK_REPLY_SEND_TIMEOUT_MS = 15_000;
|
||||
|
||||
@@ -296,6 +297,7 @@ export async function runReplyAgent(params: {
|
||||
cleanupTranscripts: true,
|
||||
});
|
||||
try {
|
||||
const runStartedAt = Date.now();
|
||||
const runOutcome = await runAgentTurnWithFallback({
|
||||
commandBody,
|
||||
followupRun,
|
||||
@@ -403,6 +405,43 @@ export async function runReplyAgent(params: {
|
||||
activeSessionEntry?.contextTokens ??
|
||||
DEFAULT_CONTEXT_TOKENS;
|
||||
|
||||
if (isDiagnosticsEnabled(cfg) && hasNonzeroUsage(usage)) {
|
||||
const input = usage.input ?? 0;
|
||||
const output = usage.output ?? 0;
|
||||
const cacheRead = usage.cacheRead ?? 0;
|
||||
const cacheWrite = usage.cacheWrite ?? 0;
|
||||
const promptTokens = input + cacheRead + cacheWrite;
|
||||
const totalTokens = usage.total ?? promptTokens + output;
|
||||
const costConfig = resolveModelCostConfig({
|
||||
provider: providerUsed,
|
||||
model: modelUsed,
|
||||
config: cfg,
|
||||
});
|
||||
const costUsd = estimateUsageCost({ usage, cost: costConfig });
|
||||
emitDiagnosticEvent({
|
||||
type: "model.usage",
|
||||
sessionKey,
|
||||
sessionId: followupRun.run.sessionId,
|
||||
channel: replyToChannel,
|
||||
provider: providerUsed,
|
||||
model: modelUsed,
|
||||
usage: {
|
||||
input,
|
||||
output,
|
||||
cacheRead,
|
||||
cacheWrite,
|
||||
promptTokens,
|
||||
total: totalTokens,
|
||||
},
|
||||
context: {
|
||||
limit: contextTokensUsed,
|
||||
used: totalTokens,
|
||||
},
|
||||
costUsd,
|
||||
durationMs: Date.now() - runStartedAt,
|
||||
});
|
||||
}
|
||||
|
||||
if (storePath && sessionKey) {
|
||||
if (hasNonzeroUsage(usage)) {
|
||||
try {
|
||||
|
||||
@@ -47,6 +47,7 @@ export type ChannelUiMetadata = {
|
||||
const GROUP_LABELS: Record<string, string> = {
|
||||
wizard: "Wizard",
|
||||
update: "Update",
|
||||
diagnostics: "Diagnostics",
|
||||
logging: "Logging",
|
||||
gateway: "Gateway",
|
||||
agents: "Agents",
|
||||
@@ -73,6 +74,7 @@ const GROUP_LABELS: Record<string, string> = {
|
||||
const GROUP_ORDER: Record<string, number> = {
|
||||
wizard: 20,
|
||||
update: 25,
|
||||
diagnostics: 27,
|
||||
gateway: 30,
|
||||
agents: 40,
|
||||
tools: 50,
|
||||
@@ -101,6 +103,17 @@ const FIELD_LABELS: Record<string, string> = {
|
||||
"meta.lastTouchedAt": "Config Last Touched At",
|
||||
"update.channel": "Update Channel",
|
||||
"update.checkOnStart": "Update Check on Start",
|
||||
"diagnostics.enabled": "Diagnostics Enabled",
|
||||
"diagnostics.otel.enabled": "OpenTelemetry Enabled",
|
||||
"diagnostics.otel.endpoint": "OpenTelemetry Endpoint",
|
||||
"diagnostics.otel.protocol": "OpenTelemetry Protocol",
|
||||
"diagnostics.otel.headers": "OpenTelemetry Headers",
|
||||
"diagnostics.otel.serviceName": "OpenTelemetry Service Name",
|
||||
"diagnostics.otel.traces": "OpenTelemetry Traces Enabled",
|
||||
"diagnostics.otel.metrics": "OpenTelemetry Metrics Enabled",
|
||||
"diagnostics.otel.logs": "OpenTelemetry Logs Enabled",
|
||||
"diagnostics.otel.sampleRate": "OpenTelemetry Trace Sample Rate",
|
||||
"diagnostics.otel.flushIntervalMs": "OpenTelemetry Flush Interval (ms)",
|
||||
"gateway.remote.url": "Remote Gateway URL",
|
||||
"gateway.remote.sshTarget": "Remote Gateway SSH Target",
|
||||
"gateway.remote.sshIdentity": "Remote Gateway SSH Identity",
|
||||
|
||||
@@ -102,6 +102,26 @@ export type LoggingConfig = {
|
||||
redactPatterns?: string[];
|
||||
};
|
||||
|
||||
export type DiagnosticsOtelConfig = {
|
||||
enabled?: boolean;
|
||||
endpoint?: string;
|
||||
protocol?: "http/protobuf" | "grpc";
|
||||
headers?: Record<string, string>;
|
||||
serviceName?: string;
|
||||
traces?: boolean;
|
||||
metrics?: boolean;
|
||||
logs?: boolean;
|
||||
/** Trace sample rate (0.0 - 1.0). */
|
||||
sampleRate?: number;
|
||||
/** Metric export interval (ms). */
|
||||
flushIntervalMs?: number;
|
||||
};
|
||||
|
||||
export type DiagnosticsConfig = {
|
||||
enabled?: boolean;
|
||||
otel?: DiagnosticsOtelConfig;
|
||||
};
|
||||
|
||||
export type WebReconnectConfig = {
|
||||
initialMs?: number;
|
||||
maxMs?: number;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { AgentBinding, AgentsConfig } from "./types.agents.js";
|
||||
import type { AuthConfig } from "./types.auth.js";
|
||||
import type { LoggingConfig, SessionConfig, WebConfig } from "./types.base.js";
|
||||
import type { DiagnosticsConfig, LoggingConfig, SessionConfig, WebConfig } from "./types.base.js";
|
||||
import type { BrowserConfig } from "./types.browser.js";
|
||||
import type { ChannelsConfig } from "./types.channels.js";
|
||||
import type { CronConfig } from "./types.cron.js";
|
||||
@@ -53,6 +53,7 @@ export type ClawdbotConfig = {
|
||||
lastRunCommand?: string;
|
||||
lastRunMode?: "local" | "remote";
|
||||
};
|
||||
diagnostics?: DiagnosticsConfig;
|
||||
logging?: LoggingConfig;
|
||||
update?: {
|
||||
/** Update channel for git + npm installs ("stable", "beta", or "dev"). */
|
||||
|
||||
@@ -38,6 +38,27 @@ export const ClawdbotSchema = z
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
diagnostics: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
otel: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
endpoint: z.string().optional(),
|
||||
protocol: z.union([z.literal("http/protobuf"), z.literal("grpc")]).optional(),
|
||||
headers: z.record(z.string(), z.string()).optional(),
|
||||
serviceName: z.string().optional(),
|
||||
traces: z.boolean().optional(),
|
||||
metrics: z.boolean().optional(),
|
||||
logs: z.boolean().optional(),
|
||||
sampleRate: z.number().min(0).max(1).optional(),
|
||||
flushIntervalMs: z.number().int().nonnegative().optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
logging: z
|
||||
.object({
|
||||
level: z
|
||||
|
||||
28
src/infra/diagnostic-events.test.ts
Normal file
28
src/infra/diagnostic-events.test.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import { describe, expect, test } from "vitest";
|
||||
|
||||
import {
|
||||
emitDiagnosticEvent,
|
||||
onDiagnosticEvent,
|
||||
resetDiagnosticEventsForTest,
|
||||
} from "./diagnostic-events.js";
|
||||
|
||||
describe("diagnostic-events", () => {
|
||||
test("emits monotonic seq", async () => {
|
||||
resetDiagnosticEventsForTest();
|
||||
const seqs: number[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => seqs.push(evt.seq));
|
||||
|
||||
emitDiagnosticEvent({
|
||||
type: "model.usage",
|
||||
usage: { total: 1 },
|
||||
});
|
||||
emitDiagnosticEvent({
|
||||
type: "model.usage",
|
||||
usage: { total: 2 },
|
||||
});
|
||||
|
||||
stop();
|
||||
|
||||
expect(seqs).toEqual([1, 2]);
|
||||
});
|
||||
});
|
||||
60
src/infra/diagnostic-events.ts
Normal file
60
src/infra/diagnostic-events.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import type { ClawdbotConfig } from "../config/config.js";
|
||||
|
||||
export type DiagnosticUsageEvent = {
|
||||
type: "model.usage";
|
||||
ts: number;
|
||||
seq: number;
|
||||
sessionKey?: string;
|
||||
sessionId?: string;
|
||||
channel?: string;
|
||||
provider?: string;
|
||||
model?: string;
|
||||
usage: {
|
||||
input?: number;
|
||||
output?: number;
|
||||
cacheRead?: number;
|
||||
cacheWrite?: number;
|
||||
promptTokens?: number;
|
||||
total?: number;
|
||||
};
|
||||
context?: {
|
||||
limit?: number;
|
||||
used?: number;
|
||||
};
|
||||
costUsd?: number;
|
||||
durationMs?: number;
|
||||
};
|
||||
|
||||
export type DiagnosticEventPayload = DiagnosticUsageEvent;
|
||||
|
||||
let seq = 0;
|
||||
const listeners = new Set<(evt: DiagnosticEventPayload) => void>();
|
||||
|
||||
export function isDiagnosticsEnabled(config?: ClawdbotConfig): boolean {
|
||||
return config?.diagnostics?.enabled === true;
|
||||
}
|
||||
|
||||
export function emitDiagnosticEvent(event: Omit<DiagnosticEventPayload, "seq" | "ts">) {
|
||||
const enriched: DiagnosticEventPayload = {
|
||||
...event,
|
||||
seq: (seq += 1),
|
||||
ts: Date.now(),
|
||||
};
|
||||
for (const listener of listeners) {
|
||||
try {
|
||||
listener(enriched);
|
||||
} catch {
|
||||
// Ignore listener failures.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function onDiagnosticEvent(listener: (evt: DiagnosticEventPayload) => void): () => void {
|
||||
listeners.add(listener);
|
||||
return () => listeners.delete(listener);
|
||||
}
|
||||
|
||||
export function resetDiagnosticEventsForTest(): void {
|
||||
seq = 0;
|
||||
listeners.clear();
|
||||
}
|
||||
@@ -58,7 +58,11 @@ export type {
|
||||
ChannelToolSend,
|
||||
} from "../channels/plugins/types.js";
|
||||
export type { ChannelConfigSchema, ChannelPlugin } from "../channels/plugins/types.plugin.js";
|
||||
export type { ClawdbotPluginApi } from "../plugins/types.js";
|
||||
export type {
|
||||
ClawdbotPluginApi,
|
||||
ClawdbotPluginService,
|
||||
ClawdbotPluginServiceContext,
|
||||
} from "../plugins/types.js";
|
||||
export type { PluginRuntime } from "../plugins/runtime/types.js";
|
||||
export { emptyPluginConfigSchema } from "../plugins/config-schema.js";
|
||||
export type { ClawdbotConfig } from "../config/config.js";
|
||||
@@ -178,6 +182,12 @@ export { formatDocsLink } from "../terminal/links.js";
|
||||
export type { HookEntry } from "../hooks/types.js";
|
||||
export { normalizeE164 } from "../utils.js";
|
||||
export { missingTargetError } from "../infra/outbound/target-errors.js";
|
||||
export {
|
||||
emitDiagnosticEvent,
|
||||
isDiagnosticsEnabled,
|
||||
onDiagnosticEvent,
|
||||
} from "../infra/diagnostic-events.js";
|
||||
export type { DiagnosticEventPayload, DiagnosticUsageEvent } from "../infra/diagnostic-events.js";
|
||||
|
||||
// Channel: Discord
|
||||
export {
|
||||
|
||||
Reference in New Issue
Block a user