fix: emit diagnostics across channels

This commit is contained in:
Peter Steinberger
2026-01-21 00:29:42 +00:00
parent e447233533
commit ec01e5c7e6
8 changed files with 318 additions and 275 deletions

View File

@@ -13,6 +13,11 @@ const mocks = vi.hoisted(() => ({
aborted: false, aborted: false,
})), })),
})); }));
const diagnosticMocks = vi.hoisted(() => ({
logMessageQueued: vi.fn(),
logMessageProcessed: vi.fn(),
logSessionStateChange: vi.fn(),
}));
vi.mock("./route-reply.js", () => ({ vi.mock("./route-reply.js", () => ({
isRoutableChannel: (channel: string | undefined) => isRoutableChannel: (channel: string | undefined) =>
@@ -34,6 +39,12 @@ vi.mock("./abort.js", () => ({
}, },
})); }));
vi.mock("../../logging/diagnostic.js", () => ({
logMessageQueued: diagnosticMocks.logMessageQueued,
logMessageProcessed: diagnosticMocks.logMessageProcessed,
logSessionStateChange: diagnosticMocks.logSessionStateChange,
}));
const { dispatchReplyFromConfig } = await import("./dispatch-from-config.js"); const { dispatchReplyFromConfig } = await import("./dispatch-from-config.js");
const { resetInboundDedupe } = await import("./inbound-dedupe.js"); const { resetInboundDedupe } = await import("./inbound-dedupe.js");
@@ -50,6 +61,9 @@ function createDispatcher(): ReplyDispatcher {
describe("dispatchReplyFromConfig", () => { describe("dispatchReplyFromConfig", () => {
beforeEach(() => { beforeEach(() => {
resetInboundDedupe(); resetInboundDedupe();
diagnosticMocks.logMessageQueued.mockReset();
diagnosticMocks.logMessageProcessed.mockReset();
diagnosticMocks.logSessionStateChange.mockReset();
}); });
it("does not route when Provider matches OriginatingChannel (even if Surface is missing)", async () => { it("does not route when Provider matches OriginatingChannel (even if Surface is missing)", async () => {
mocks.tryFastAbortFromMessage.mockResolvedValue({ mocks.tryFastAbortFromMessage.mockResolvedValue({
@@ -186,4 +200,74 @@ describe("dispatchReplyFromConfig", () => {
expect(replyResolver).toHaveBeenCalledTimes(1); expect(replyResolver).toHaveBeenCalledTimes(1);
}); });
it("emits diagnostics when enabled", async () => {
mocks.tryFastAbortFromMessage.mockResolvedValue({
handled: false,
aborted: false,
});
const cfg = { diagnostics: { enabled: true } } as ClawdbotConfig;
const dispatcher = createDispatcher();
const ctx = buildTestCtx({
Provider: "slack",
Surface: "slack",
SessionKey: "agent:main:main",
MessageSid: "msg-1",
To: "slack:C123",
});
const replyResolver = async () => ({ text: "hi" }) satisfies ReplyPayload;
await dispatchReplyFromConfig({ ctx, cfg, dispatcher, replyResolver });
expect(diagnosticMocks.logMessageQueued).toHaveBeenCalledTimes(1);
expect(diagnosticMocks.logSessionStateChange).toHaveBeenCalledWith({
sessionKey: "agent:main:main",
state: "processing",
reason: "message_start",
});
expect(diagnosticMocks.logMessageProcessed).toHaveBeenCalledWith(
expect.objectContaining({
channel: "slack",
outcome: "completed",
sessionKey: "agent:main:main",
}),
);
});
it("marks diagnostics skipped for duplicate inbound messages", async () => {
mocks.tryFastAbortFromMessage.mockResolvedValue({
handled: false,
aborted: false,
});
const cfg = { diagnostics: { enabled: true } } as ClawdbotConfig;
const ctx = buildTestCtx({
Provider: "whatsapp",
OriginatingChannel: "whatsapp",
OriginatingTo: "whatsapp:+15555550123",
MessageSid: "msg-dup",
});
const replyResolver = vi.fn(async () => ({ text: "hi" }) as ReplyPayload);
await dispatchReplyFromConfig({
ctx,
cfg,
dispatcher: createDispatcher(),
replyResolver,
});
await dispatchReplyFromConfig({
ctx,
cfg,
dispatcher: createDispatcher(),
replyResolver,
});
expect(replyResolver).toHaveBeenCalledTimes(1);
expect(diagnosticMocks.logMessageProcessed).toHaveBeenCalledWith(
expect.objectContaining({
channel: "whatsapp",
outcome: "skipped",
reason: "duplicate",
}),
);
});
}); });

View File

@@ -1,5 +1,11 @@
import type { ClawdbotConfig } from "../../config/config.js"; import type { ClawdbotConfig } from "../../config/config.js";
import { logVerbose } from "../../globals.js"; import { logVerbose } from "../../globals.js";
import { isDiagnosticsEnabled } from "../../infra/diagnostic-events.js";
import {
logMessageProcessed,
logMessageQueued,
logSessionStateChange,
} from "../../logging/diagnostic.js";
import { getReplyFromConfig } from "../reply.js"; import { getReplyFromConfig } from "../reply.js";
import type { FinalizedMsgContext } from "../templating.js"; import type { FinalizedMsgContext } from "../templating.js";
import type { GetReplyOptions, ReplyPayload } from "../types.js"; import type { GetReplyOptions, ReplyPayload } from "../types.js";
@@ -21,8 +27,55 @@ export async function dispatchReplyFromConfig(params: {
replyResolver?: typeof getReplyFromConfig; replyResolver?: typeof getReplyFromConfig;
}): Promise<DispatchFromConfigResult> { }): Promise<DispatchFromConfigResult> {
const { ctx, cfg, dispatcher } = params; const { ctx, cfg, dispatcher } = params;
const diagnosticsEnabled = isDiagnosticsEnabled(cfg);
const channel = String(ctx.Surface ?? ctx.Provider ?? "unknown").toLowerCase();
const chatId = ctx.To ?? ctx.From;
const messageId = ctx.MessageSid ?? ctx.MessageSidFirst ?? ctx.MessageSidLast;
const sessionKey = ctx.SessionKey;
const startTime = diagnosticsEnabled ? Date.now() : 0;
const canTrackSession = diagnosticsEnabled && Boolean(sessionKey);
const recordProcessed = (
outcome: "completed" | "skipped" | "error",
opts?: {
reason?: string;
error?: string;
},
) => {
if (!diagnosticsEnabled) return;
logMessageProcessed({
channel,
chatId,
messageId,
sessionKey,
durationMs: Date.now() - startTime,
outcome,
reason: opts?.reason,
error: opts?.error,
});
};
const markProcessing = () => {
if (!canTrackSession || !sessionKey) return;
logMessageQueued({ sessionKey, channel, source: "dispatch" });
logSessionStateChange({
sessionKey,
state: "processing",
reason: "message_start",
});
};
const markIdle = (reason: string) => {
if (!canTrackSession || !sessionKey) return;
logSessionStateChange({
sessionKey,
state: "idle",
reason,
});
};
if (shouldSkipDuplicateInbound(ctx)) { if (shouldSkipDuplicateInbound(ctx)) {
recordProcessed("skipped", { reason: "duplicate" });
return { queuedFinal: false, counts: dispatcher.getQueuedCounts() }; return { queuedFinal: false, counts: dispatcher.getQueuedCounts() };
} }
@@ -68,6 +121,9 @@ export async function dispatchReplyFromConfig(params: {
} }
}; };
markProcessing();
try {
const fastAbort = await tryFastAbortFromMessage({ ctx, cfg }); const fastAbort = await tryFastAbortFromMessage({ ctx, cfg });
if (fastAbort.handled) { if (fastAbort.handled) {
const payload = { const payload = {
@@ -98,6 +154,8 @@ export async function dispatchReplyFromConfig(params: {
await dispatcher.waitForIdle(); await dispatcher.waitForIdle();
const counts = dispatcher.getQueuedCounts(); const counts = dispatcher.getQueuedCounts();
counts.final += routedFinalCount; counts.final += routedFinalCount;
recordProcessed("completed", { reason: "fast_abort" });
markIdle("message_completed");
return { queuedFinal, counts }; return { queuedFinal, counts };
} }
@@ -158,5 +216,12 @@ export async function dispatchReplyFromConfig(params: {
const counts = dispatcher.getQueuedCounts(); const counts = dispatcher.getQueuedCounts();
counts.final += routedFinalCount; counts.final += routedFinalCount;
recordProcessed("completed");
markIdle("message_completed");
return { queuedFinal, counts }; return { queuedFinal, counts };
} catch (err) {
recordProcessed("error", { error: String(err) });
markIdle("message_error");
throw err;
}
} }

View File

@@ -13,6 +13,7 @@ import {
readConfigFileSnapshot, readConfigFileSnapshot,
writeConfigFile, writeConfigFile,
} from "../config/config.js"; } from "../config/config.js";
import { isDiagnosticsEnabled } from "../infra/diagnostic-events.js";
import { applyPluginAutoEnable } from "../config/plugin-auto-enable.js"; import { applyPluginAutoEnable } from "../config/plugin-auto-enable.js";
import { clearAgentRunContext, onAgentEvent } from "../infra/agent-events.js"; import { clearAgentRunContext, onAgentEvent } from "../infra/agent-events.js";
import { onHeartbeatEvent } from "../infra/heartbeat-events.js"; import { onHeartbeatEvent } from "../infra/heartbeat-events.js";
@@ -26,6 +27,7 @@ import {
} from "../infra/skills-remote.js"; } from "../infra/skills-remote.js";
import { scheduleGatewayUpdateCheck } from "../infra/update-startup.js"; import { scheduleGatewayUpdateCheck } from "../infra/update-startup.js";
import { setGatewaySigusr1RestartPolicy } from "../infra/restart.js"; import { setGatewaySigusr1RestartPolicy } from "../infra/restart.js";
import { startDiagnosticHeartbeat, stopDiagnosticHeartbeat } from "../logging/diagnostic.js";
import { createSubsystemLogger, runtimeForLogger } from "../logging/subsystem.js"; import { createSubsystemLogger, runtimeForLogger } from "../logging/subsystem.js";
import type { PluginServicesHandle } from "../plugins/services.js"; import type { PluginServicesHandle } from "../plugins/services.js";
import type { RuntimeEnv } from "../runtime.js"; import type { RuntimeEnv } from "../runtime.js";
@@ -198,6 +200,10 @@ export async function startGatewayServer(
} }
const cfgAtStart = loadConfig(); const cfgAtStart = loadConfig();
const diagnosticsEnabled = isDiagnosticsEnabled(cfgAtStart);
if (diagnosticsEnabled) {
startDiagnosticHeartbeat();
}
setGatewaySigusr1RestartPolicy({ allowExternal: cfgAtStart.commands?.restart === true }); setGatewaySigusr1RestartPolicy({ allowExternal: cfgAtStart.commands?.restart === true });
initSubagentRegistry(); initSubagentRegistry();
const defaultAgentId = resolveDefaultAgentId(cfgAtStart); const defaultAgentId = resolveDefaultAgentId(cfgAtStart);
@@ -533,5 +539,12 @@ export async function startGatewayServer(
httpServer, httpServer,
}); });
return { close }; return {
close: async (opts) => {
if (diagnosticsEnabled) {
stopDiagnosticHeartbeat();
}
await close(opts);
},
};
} }

View File

@@ -141,9 +141,11 @@ export type DiagnosticEventPayload =
| DiagnosticRunAttemptEvent | DiagnosticRunAttemptEvent
| DiagnosticHeartbeatEvent; | DiagnosticHeartbeatEvent;
type DiagnosticEventInput<T extends DiagnosticEventPayload = DiagnosticEventPayload> = export type DiagnosticEventInput = DiagnosticEventPayload extends infer Event
T extends DiagnosticEventPayload ? Omit<T, "seq" | "ts"> : never; ? Event extends DiagnosticEventPayload
? Omit<Event, "seq" | "ts">
: never
: never;
let seq = 0; let seq = 0;
const listeners = new Set<(evt: DiagnosticEventPayload) => void>(); const listeners = new Set<(evt: DiagnosticEventPayload) => void>();
@@ -151,14 +153,12 @@ export function isDiagnosticsEnabled(config?: ClawdbotConfig): boolean {
return config?.diagnostics?.enabled === true; return config?.diagnostics?.enabled === true;
} }
export function emitDiagnosticEvent<T extends DiagnosticEventPayload>( export function emitDiagnosticEvent(event: DiagnosticEventInput) {
event: DiagnosticEventInput<T>,
) {
const enriched = { const enriched = {
...event, ...event,
seq: (seq += 1), seq: (seq += 1),
ts: Date.now(), ts: Date.now(),
} as DiagnosticEventPayload; } satisfies DiagnosticEventPayload;
for (const listener of listeners) { for (const listener of listeners) {
try { try {
listener(enriched); listener(enriched);

View File

@@ -338,6 +338,7 @@ export function startDiagnosticHeartbeat() {
} }
} }
}, 30_000); }, 30_000);
heartbeatInterval.unref?.();
} }
export function stopDiagnosticHeartbeat() { export function stopDiagnosticHeartbeat() {

View File

@@ -2,14 +2,6 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
const buildTelegramMessageContext = vi.hoisted(() => vi.fn()); const buildTelegramMessageContext = vi.hoisted(() => vi.fn());
const dispatchTelegramMessage = vi.hoisted(() => vi.fn()); const dispatchTelegramMessage = vi.hoisted(() => vi.fn());
const logMessageQueued = vi.hoisted(() => vi.fn());
const logMessageProcessed = vi.hoisted(() => vi.fn());
const logSessionStateChange = vi.hoisted(() => vi.fn());
const diagnosticLogger = vi.hoisted(() => ({
info: vi.fn(),
debug: vi.fn(),
error: vi.fn(),
}));
vi.mock("./bot-message-context.js", () => ({ vi.mock("./bot-message-context.js", () => ({
buildTelegramMessageContext, buildTelegramMessageContext,
@@ -19,25 +11,12 @@ vi.mock("./bot-message-dispatch.js", () => ({
dispatchTelegramMessage, dispatchTelegramMessage,
})); }));
vi.mock("../logging/diagnostic.js", () => ({
diagnosticLogger,
logMessageQueued,
logMessageProcessed,
logSessionStateChange,
}));
import { createTelegramMessageProcessor } from "./bot-message.js"; import { createTelegramMessageProcessor } from "./bot-message.js";
describe("telegram bot message diagnostics", () => { describe("telegram bot message processor", () => {
beforeEach(() => { beforeEach(() => {
buildTelegramMessageContext.mockReset(); buildTelegramMessageContext.mockReset();
dispatchTelegramMessage.mockReset(); dispatchTelegramMessage.mockReset();
logMessageQueued.mockReset();
logMessageProcessed.mockReset();
logSessionStateChange.mockReset();
diagnosticLogger.info.mockReset();
diagnosticLogger.debug.mockReset();
diagnosticLogger.error.mockReset();
}); });
const baseDeps = { const baseDeps = {
@@ -63,39 +42,19 @@ describe("telegram bot message diagnostics", () => {
resolveBotTopicsEnabled: () => false, resolveBotTopicsEnabled: () => false,
}; };
it("decrements queue depth after successful processing", async () => { it("dispatches when context is available", async () => {
buildTelegramMessageContext.mockResolvedValue({ buildTelegramMessageContext.mockResolvedValue({ route: { sessionKey: "agent:main:main" } });
route: { sessionKey: "agent:main:main" },
});
const processMessage = createTelegramMessageProcessor(baseDeps); const processMessage = createTelegramMessageProcessor(baseDeps);
await processMessage({ message: { chat: { id: 123 }, message_id: 456 } }, [], [], {}); await processMessage({ message: { chat: { id: 123 }, message_id: 456 } }, [], [], {});
expect(logMessageQueued).toHaveBeenCalledTimes(1); expect(dispatchTelegramMessage).toHaveBeenCalledTimes(1);
expect(logSessionStateChange).toHaveBeenCalledWith({
sessionKey: "agent:main:main",
state: "idle",
reason: "message_completed",
});
}); });
it("decrements queue depth after processing error", async () => { it("skips dispatch when no context is produced", async () => {
buildTelegramMessageContext.mockResolvedValue({ buildTelegramMessageContext.mockResolvedValue(null);
route: { sessionKey: "agent:main:main" },
});
dispatchTelegramMessage.mockRejectedValue(new Error("boom"));
const processMessage = createTelegramMessageProcessor(baseDeps); const processMessage = createTelegramMessageProcessor(baseDeps);
await processMessage({ message: { chat: { id: 123 }, message_id: 456 } }, [], [], {});
await expect( expect(dispatchTelegramMessage).not.toHaveBeenCalled();
processMessage({ message: { chat: { id: 123 }, message_id: 456 } }, [], [], {}),
).rejects.toThrow("boom");
expect(logMessageQueued).toHaveBeenCalledTimes(1);
expect(logSessionStateChange).toHaveBeenCalledWith({
sessionKey: "agent:main:main",
state: "idle",
reason: "message_error",
});
}); });
}); });

View File

@@ -1,12 +1,6 @@
// @ts-nocheck // @ts-nocheck
import { buildTelegramMessageContext } from "./bot-message-context.js"; import { buildTelegramMessageContext } from "./bot-message-context.js";
import { dispatchTelegramMessage } from "./bot-message-dispatch.js"; import { dispatchTelegramMessage } from "./bot-message-dispatch.js";
import {
diagnosticLogger as diag,
logMessageProcessed,
logMessageQueued,
logSessionStateChange,
} from "../logging/diagnostic.js";
export const createTelegramMessageProcessor = (deps) => { export const createTelegramMessageProcessor = (deps) => {
const { const {
@@ -33,19 +27,6 @@ export const createTelegramMessageProcessor = (deps) => {
} = deps; } = deps;
return async (primaryCtx, allMedia, storeAllowFrom, options) => { return async (primaryCtx, allMedia, storeAllowFrom, options) => {
const chatId = primaryCtx?.message?.chat?.id ?? primaryCtx?.chat?.id ?? "unknown";
const messageId = primaryCtx?.message?.message_id ?? "unknown";
const startTime = Date.now();
diag.info(
`process message start: channel=telegram chatId=${chatId} messageId=${messageId} mediaCount=${
allMedia?.length ?? 0
}`,
);
let sessionKey: string | undefined;
try {
const context = await buildTelegramMessageContext({ const context = await buildTelegramMessageContext({
primaryCtx, primaryCtx,
allMedia, allMedia,
@@ -65,32 +46,7 @@ export const createTelegramMessageProcessor = (deps) => {
resolveGroupRequireMention, resolveGroupRequireMention,
resolveTelegramGroupConfig, resolveTelegramGroupConfig,
}); });
if (!context) { if (!context) return;
const durationMs = Date.now() - startTime;
diag.debug(
`process message skipped: channel=telegram chatId=${chatId} messageId=${messageId} reason=no_context`,
);
logMessageProcessed({
channel: "telegram",
chatId,
messageId,
durationMs,
outcome: "skipped",
reason: "no_context",
});
return;
}
sessionKey = context?.route?.sessionKey;
diag.info(
`process message dispatching: channel=telegram chatId=${chatId} messageId=${messageId} sessionKey=${
sessionKey ?? "unknown"
}`,
);
if (sessionKey) {
logMessageQueued({ sessionKey, channel: "telegram", source: "telegram" });
}
await dispatchTelegramMessage({ await dispatchTelegramMessage({
context, context,
bot, bot,
@@ -103,52 +59,5 @@ export const createTelegramMessageProcessor = (deps) => {
opts, opts,
resolveBotTopicsEnabled, resolveBotTopicsEnabled,
}); });
const durationMs = Date.now() - startTime;
logMessageProcessed({
channel: "telegram",
chatId,
messageId,
sessionKey,
durationMs,
outcome: "completed",
});
if (sessionKey) {
logSessionStateChange({
sessionKey,
state: "idle",
reason: "message_completed",
});
}
diag.info(
`process message complete: channel=telegram chatId=${chatId} messageId=${messageId} sessionKey=${
sessionKey ?? "unknown"
} durationMs=${durationMs}`,
);
} catch (err) {
const durationMs = Date.now() - startTime;
logMessageProcessed({
channel: "telegram",
chatId,
messageId,
sessionKey,
durationMs,
outcome: "error",
error: String(err),
});
if (sessionKey) {
logSessionStateChange({
sessionKey,
state: "idle",
reason: "message_error",
});
}
diag.error(
`process message error: channel=telegram chatId=${chatId} messageId=${messageId} durationMs=${durationMs} error="${String(
err,
)}"`,
);
throw err;
}
}; };
}; };

View File

@@ -2,6 +2,7 @@ import { createServer } from "node:http";
import { webhookCallback } from "grammy"; import { webhookCallback } from "grammy";
import type { ClawdbotConfig } from "../config/config.js"; import type { ClawdbotConfig } from "../config/config.js";
import { isDiagnosticsEnabled } from "../infra/diagnostic-events.js";
import { formatErrorMessage } from "../infra/errors.js"; import { formatErrorMessage } from "../infra/errors.js";
import type { RuntimeEnv } from "../runtime.js"; import type { RuntimeEnv } from "../runtime.js";
import { defaultRuntime } from "../runtime.js"; import { defaultRuntime } from "../runtime.js";
@@ -34,6 +35,7 @@ export async function startTelegramWebhook(opts: {
const port = opts.port ?? 8787; const port = opts.port ?? 8787;
const host = opts.host ?? "0.0.0.0"; const host = opts.host ?? "0.0.0.0";
const runtime = opts.runtime ?? defaultRuntime; const runtime = opts.runtime ?? defaultRuntime;
const diagnosticsEnabled = isDiagnosticsEnabled(opts.config);
const bot = createTelegramBot({ const bot = createTelegramBot({
token: opts.token, token: opts.token,
runtime, runtime,
@@ -45,7 +47,9 @@ export async function startTelegramWebhook(opts: {
secretToken: opts.secret, secretToken: opts.secret,
}); });
if (diagnosticsEnabled) {
startDiagnosticHeartbeat(); startDiagnosticHeartbeat();
}
const server = createServer((req, res) => { const server = createServer((req, res) => {
if (req.url === healthPath) { if (req.url === healthPath) {
@@ -59,24 +63,30 @@ export async function startTelegramWebhook(opts: {
return; return;
} }
const startTime = Date.now(); const startTime = Date.now();
if (diagnosticsEnabled) {
logWebhookReceived({ channel: "telegram", updateType: "telegram-post" }); logWebhookReceived({ channel: "telegram", updateType: "telegram-post" });
}
const handled = handler(req, res); const handled = handler(req, res);
if (handled && typeof (handled as Promise<void>).catch === "function") { if (handled && typeof (handled as Promise<void>).catch === "function") {
void (handled as Promise<void>) void (handled as Promise<void>)
.then(() => { .then(() => {
if (diagnosticsEnabled) {
logWebhookProcessed({ logWebhookProcessed({
channel: "telegram", channel: "telegram",
updateType: "telegram-post", updateType: "telegram-post",
durationMs: Date.now() - startTime, durationMs: Date.now() - startTime,
}); });
}
}) })
.catch((err) => { .catch((err) => {
const errMsg = formatErrorMessage(err); const errMsg = formatErrorMessage(err);
if (diagnosticsEnabled) {
logWebhookError({ logWebhookError({
channel: "telegram", channel: "telegram",
updateType: "telegram-post", updateType: "telegram-post",
error: errMsg, error: errMsg,
}); });
}
runtime.log?.(`webhook handler failed: ${errMsg}`); runtime.log?.(`webhook handler failed: ${errMsg}`);
if (!res.headersSent) res.writeHead(500); if (!res.headersSent) res.writeHead(500);
res.end(); res.end();
@@ -98,7 +108,9 @@ export async function startTelegramWebhook(opts: {
const shutdown = () => { const shutdown = () => {
server.close(); server.close();
void bot.stop(); void bot.stop();
if (diagnosticsEnabled) {
stopDiagnosticHeartbeat(); stopDiagnosticHeartbeat();
}
}; };
if (opts.abortSignal) { if (opts.abortSignal) {
opts.abortSignal.addEventListener("abort", shutdown, { once: true }); opts.abortSignal.addEventListener("abort", shutdown, { once: true });