feat: add diagnostics flags

2026-01-25 10:38:49 +00:00
parent 737037129e
commit 612a27f3dd
9 changed files with 309 additions and 50 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ Docs: https://docs.clawd.bot
 - Telegram: add `channels.telegram.linkPreview` to toggle outbound link previews. (#1700) Thanks @zerone0x. https://docs.clawd.bot/channels/telegram
 - Telegram: treat DM topics as separate sessions and keep DM history limits stable with thread suffixes. (#1597) Thanks @rohannagpal.
 - Telegram: add verbose raw-update logging for inbound Telegram updates. (#1597) Thanks @rohannagpal.
+- Diagnostics: add diagnostic flags for targeted debug logs (config + env override). https://docs.clawd.bot/diagnostics/flags

 ### Fixes
 - Gateway: include inline config env vars in service install environments. (#1735) Thanks @Seredeep.
--- a/docs/diagnostics/flags.md
+++ b/docs/diagnostics/flags.md
@@ -0,0 +1,89 @@
+---
+summary: "Diagnostics flags for targeted debug logs"
+read_when:
+  - You need targeted debug logs without raising global logging levels
+  - You need to capture subsystem-specific logs for support
+---
+# Diagnostics Flags
+
+Diagnostics flags let you enable targeted debug logs without turning on verbose logging everywhere. Flags are opt-in and have no effect unless a subsystem checks them.
+
+## How it works
+
+- Flags are strings (case-insensitive).
+- You can enable flags in config or via an env override.
+- Wildcards are supported:
+  - `telegram.*` matches `telegram.http`
+  - `*` enables all flags
+
+## Enable via config
+
+```json
+{
+  "diagnostics": {
+    "flags": ["telegram.http"]
+  }
+}
+```
+
+Multiple flags:
+
+```json
+{
+  "diagnostics": {
+    "flags": ["telegram.http", "gateway.*"]
+  }
+}
+```
+
+Restart the gateway after changing flags.
+
+## Env override (one-off)
+
+```bash
+CLAWDBOT_DIAGNOSTICS=telegram.http,telegram.payload
+```
+
+Disable all flags:
+
+```bash
+CLAWDBOT_DIAGNOSTICS=0
+```
+
+## Where logs go
+
+Flags emit logs into the standard diagnostics log file. By default:
+
+```
+/tmp/clawdbot/clawdbot-YYYY-MM-DD.log
+```
+
+If you set `logging.file`, use that path instead. Logs are JSONL (one JSON object per line). Redaction still applies based on `logging.redactSensitive`.
+
+## Extract logs
+
+Pick the latest log file:
+
+```bash
+ls -t /tmp/clawdbot/clawdbot-*.log | head -n 1
+```
+
+Filter for Telegram HTTP diagnostics:
+
+```bash
+rg "telegram http error" /tmp/clawdbot/clawdbot-*.log
+```
+
+Or tail while reproducing:
+
+```bash
+tail -f /tmp/clawdbot/clawdbot-$(date +%F).log | rg "telegram http error"
+```
+
+For remote gateways, you can also use `clawdbot logs --follow` (see [/cli/logs](/cli/logs)).
+
+## Notes
+
+- If `logging.level` is set higher than `warn`, these logs may be suppressed. Default `info` is fine.
+- Flags are safe to leave enabled; they only affect log volume for the specific subsystem.
+- Use [/logging](/logging) to change log destinations, levels, and redaction.
--- a/docs/logging.md
+++ b/docs/logging.md
@@ -192,6 +192,30 @@ Use this if you want diagnostics events available to plugins or custom sinks:
 }
 ```

+### Diagnostics flags (targeted logs)
+
+Use flags to turn on extra, targeted debug logs without raising `logging.level`.
+Flags are case-insensitive and support wildcards (e.g. `telegram.*` or `*`).
+
+```json
+{
+  "diagnostics": {
+    "flags": ["telegram.http"]
+  }
+}
+```
+
+Env override (one-off):
+
+```
+CLAWDBOT_DIAGNOSTICS=telegram.http,telegram.payload
+```
+
+Notes:
+- Flag logs go to the standard log file (same as `logging.file`).
+- Output is still redacted according to `logging.redactSensitive`.
+- Full guide: [/diagnostics/flags](/diagnostics/flags).
+
 ### Export to OpenTelemetry

 Diagnostics can be exported via the `diagnostics-otel` plugin (OTLP/HTTP). This
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -107,6 +107,7 @@ const FIELD_LABELS: Record<string, string> = {
  "update.channel": "Update Channel",
  "update.checkOnStart": "Update Check on Start",
  "diagnostics.enabled": "Diagnostics Enabled",
+  "diagnostics.flags": "Diagnostics Flags",
  "diagnostics.otel.enabled": "OpenTelemetry Enabled",
  "diagnostics.otel.endpoint": "OpenTelemetry Endpoint",
  "diagnostics.otel.protocol": "OpenTelemetry Protocol",
@@ -388,6 +389,8 @@ const FIELD_HELP: Record<string, string> = {
  "nodeHost.browserProxy.enabled": "Expose the local browser control server via node proxy.",
  "nodeHost.browserProxy.allowProfiles":
    "Optional allowlist of browser profile names exposed via the node proxy.",
+  "diagnostics.flags":
+    'Enable targeted diagnostics logs by flag (e.g. ["telegram.http"]). Supports wildcards like "telegram.*" or "*".',
  "diagnostics.cacheTrace.enabled":
    "Log cache trace snapshots for embedded agent runs (default: false).",
  "diagnostics.cacheTrace.filePath":
--- a/src/config/types.base.ts
+++ b/src/config/types.base.ts
@@ -135,6 +135,8 @@ export type DiagnosticsCacheTraceConfig = {

 export type DiagnosticsConfig = {
  enabled?: boolean;
+  /** Optional ad-hoc diagnostics flags (e.g. "telegram.http"). */
+  flags?: string[];
  otel?: DiagnosticsOtelConfig;
  cacheTrace?: DiagnosticsCacheTraceConfig;
 };
--- a/src/config/zod-schema.ts
+++ b/src/config/zod-schema.ts
@@ -62,6 +62,7 @@ export const ClawdbotSchema = z
    diagnostics: z
      .object({
        enabled: z.boolean().optional(),
+        flags: z.array(z.string()).optional(),
        otel: z
          .object({
            enabled: z.boolean().optional(),
--- a/src/infra/diagnostic-flags.test.ts
+++ b/src/infra/diagnostic-flags.test.ts
@@ -0,0 +1,31 @@
+import { describe, expect, it } from "vitest";
+
+import type { ClawdbotConfig } from "../config/config.js";
+import { isDiagnosticFlagEnabled, resolveDiagnosticFlags } from "./diagnostic-flags.js";
+
+describe("diagnostic flags", () => {
+  it("merges config + env flags", () => {
+    const cfg = {
+      diagnostics: { flags: ["telegram.http", "cache.*"] },
+    } as ClawdbotConfig;
+    const env = {
+      CLAWDBOT_DIAGNOSTICS: "foo,bar",
+    } as NodeJS.ProcessEnv;
+
+    const flags = resolveDiagnosticFlags(cfg, env);
+    expect(flags).toEqual(expect.arrayContaining(["telegram.http", "cache.*", "foo", "bar"]));
+    expect(isDiagnosticFlagEnabled("telegram.http", cfg, env)).toBe(true);
+    expect(isDiagnosticFlagEnabled("cache.hit", cfg, env)).toBe(true);
+    expect(isDiagnosticFlagEnabled("foo", cfg, env)).toBe(true);
+  });
+
+  it("treats env true as wildcard", () => {
+    const env = { CLAWDBOT_DIAGNOSTICS: "1" } as NodeJS.ProcessEnv;
+    expect(isDiagnosticFlagEnabled("anything.here", undefined, env)).toBe(true);
+  });
+
+  it("treats env false as disabled", () => {
+    const env = { CLAWDBOT_DIAGNOSTICS: "0" } as NodeJS.ProcessEnv;
+    expect(isDiagnosticFlagEnabled("telegram.http", undefined, env)).toBe(false);
+  });
+});
--- a/src/infra/diagnostic-flags.ts
+++ b/src/infra/diagnostic-flags.ts
@@ -0,0 +1,70 @@
+import type { ClawdbotConfig } from "../config/config.js";
+
+const DIAGNOSTICS_ENV = "CLAWDBOT_DIAGNOSTICS";
+
+function normalizeFlag(value: string): string {
+  return value.trim().toLowerCase();
+}
+
+function parseEnvFlags(raw?: string): string[] {
+  if (!raw) return [];
+  const trimmed = raw.trim();
+  if (!trimmed) return [];
+  const lowered = trimmed.toLowerCase();
+  if (["0", "false", "off", "none"].includes(lowered)) return [];
+  if (["1", "true", "all", "*"].includes(lowered)) return ["*"];
+  return trimmed
+    .split(/[,\s]+/)
+    .map(normalizeFlag)
+    .filter(Boolean);
+}
+
+function uniqueFlags(flags: string[]): string[] {
+  const seen = new Set<string>();
+  const out: string[] = [];
+  for (const flag of flags) {
+    const normalized = normalizeFlag(flag);
+    if (!normalized || seen.has(normalized)) continue;
+    seen.add(normalized);
+    out.push(normalized);
+  }
+  return out;
+}
+
+export function resolveDiagnosticFlags(
+  cfg?: ClawdbotConfig,
+  env: NodeJS.ProcessEnv = process.env,
+): string[] {
+  const configFlags = Array.isArray(cfg?.diagnostics?.flags) ? cfg?.diagnostics?.flags : [];
+  const envFlags = parseEnvFlags(env[DIAGNOSTICS_ENV]);
+  return uniqueFlags([...configFlags, ...envFlags]);
+}
+
+export function matchesDiagnosticFlag(flag: string, enabledFlags: string[]): boolean {
+  const target = normalizeFlag(flag);
+  if (!target) return false;
+  for (const raw of enabledFlags) {
+    const enabled = normalizeFlag(raw);
+    if (!enabled) continue;
+    if (enabled === "*" || enabled === "all") return true;
+    if (enabled.endsWith(".*")) {
+      const prefix = enabled.slice(0, -2);
+      if (target === prefix || target.startsWith(`${prefix}.`)) return true;
+    }
+    if (enabled.endsWith("*")) {
+      const prefix = enabled.slice(0, -1);
+      if (target.startsWith(prefix)) return true;
+    }
+    if (enabled === target) return true;
+  }
+  return false;
+}
+
+export function isDiagnosticFlagEnabled(
+  flag: string,
+  cfg?: ClawdbotConfig,
+  env: NodeJS.ProcessEnv = process.env,
+): boolean {
+  const flags = resolveDiagnosticFlags(cfg, env);
+  return matchesDiagnosticFlag(flag, flags);
+}
--- a/src/telegram/send.ts
+++ b/src/telegram/send.ts
@@ -4,13 +4,16 @@ import type {
  ReactionType,
  ReactionTypeEmoji,
 } from "@grammyjs/types";
-import { type ApiClientOptions, Bot, InputFile } from "grammy";
+import { type ApiClientOptions, Bot, HttpError, InputFile } from "grammy";
 import { loadConfig } from "../config/config.js";
 import { logVerbose } from "../globals.js";
 import { recordChannelActivity } from "../infra/channel-activity.js";
-import { formatErrorMessage } from "../infra/errors.js";
+import { formatErrorMessage, formatUncaughtError } from "../infra/errors.js";
+import { isDiagnosticFlagEnabled } from "../infra/diagnostic-flags.js";
 import type { RetryConfig } from "../infra/retry.js";
 import { createTelegramRetryRunner } from "../infra/retry-policy.js";
+import { redactSensitiveText } from "../logging/redact.js";
+import { createSubsystemLogger } from "../logging/subsystem.js";
 import { mediaKindFromMime } from "../media/constants.js";
 import { isGifMedia } from "../media/mime.js";
 import { loadWebMedia } from "../web/media.js";
@@ -59,6 +62,19 @@ type TelegramReactionOpts = {
 };

 const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i;
+const diagLogger = createSubsystemLogger("telegram/diagnostic");
+
+function createTelegramHttpLogger(cfg: ReturnType<typeof loadConfig>) {
+  const enabled = isDiagnosticFlagEnabled("telegram.http", cfg);
+  if (!enabled) {
+    return () => {};
+  }
+  return (label: string, err: unknown) => {
+    if (!(err instanceof HttpError)) return;
+    const detail = redactSensitiveText(formatUncaughtError(err.error ?? err));
+    diagLogger.warn(`telegram http error (${label}): ${detail}`);
+  };
+}

 function resolveToken(explicit: string | undefined, params: { accountId: string; token: string }) {
  if (explicit?.trim()) return explicit.trim();
@@ -178,7 +194,12 @@ export async function sendMessageTelegram(
    configRetry: account.config.retry,
    verbose: opts.verbose,
  });
-
+  const logHttpError = createTelegramHttpLogger(cfg);
+  const requestWithDiag = <T>(fn: () => Promise<T>, label?: string) =>
+    request(fn, label).catch((err) => {
+      logHttpError(label ?? "request", err);
+      throw err;
+    });
  const wrapChatNotFound = (err: unknown) => {
    if (!/400: Bad Request: chat not found/i.test(formatErrorMessage(err))) return err;
    return new Error(
@@ -217,30 +238,31 @@ export async function sendMessageTelegram(
      parse_mode: "HTML" as const,
      ...baseParams,
    };
-    const res = await request(() => api.sendMessage(chatId, htmlText, sendParams), "message").catch(
-      async (err) => {
-        // Telegram rejects malformed HTML (e.g., unsupported tags or entities).
-        // When that happens, fall back to plain text so the message still delivers.
-        const errText = formatErrorMessage(err);
-        if (PARSE_ERR_RE.test(errText)) {
-          if (opts.verbose) {
-            console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`);
-          }
-          const fallback = fallbackText ?? rawText;
-          const plainParams = hasBaseParams ? baseParams : undefined;
-          return await request(
-            () =>
-              plainParams
-                ? api.sendMessage(chatId, fallback, plainParams)
-                : api.sendMessage(chatId, fallback),
-            "message-plain",
-          ).catch((err2) => {
-            throw wrapChatNotFound(err2);
-          });
+    const res = await requestWithDiag(
+      () => api.sendMessage(chatId, htmlText, sendParams),
+      "message",
+    ).catch(async (err) => {
+      // Telegram rejects malformed HTML (e.g., unsupported tags or entities).
+      // When that happens, fall back to plain text so the message still delivers.
+      const errText = formatErrorMessage(err);
+      if (PARSE_ERR_RE.test(errText)) {
+        if (opts.verbose) {
+          console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`);
        }
-        throw wrapChatNotFound(err);
-      },
-    );
+        const fallback = fallbackText ?? rawText;
+        const plainParams = hasBaseParams ? baseParams : undefined;
+        return await requestWithDiag(
+          () =>
+            plainParams
+              ? api.sendMessage(chatId, fallback, plainParams)
+              : api.sendMessage(chatId, fallback),
+          "message-plain",
+        ).catch((err2) => {
+          throw wrapChatNotFound(err2);
+        });
+      }
+      throw wrapChatNotFound(err);
+    });
    return res;
  };

@@ -277,19 +299,20 @@ export async function sendMessageTelegram(
      | Awaited<ReturnType<typeof api.sendAnimation>>
      | Awaited<ReturnType<typeof api.sendDocument>>;
    if (isGif) {
-      result = await request(() => api.sendAnimation(chatId, file, mediaParams), "animation").catch(
-        (err) => {
-          throw wrapChatNotFound(err);
-        },
-      );
+      result = await requestWithDiag(
+        () => api.sendAnimation(chatId, file, mediaParams),
+        "animation",
+      ).catch((err) => {
+        throw wrapChatNotFound(err);
+      });
    } else if (kind === "image") {
-      result = await request(() => api.sendPhoto(chatId, file, mediaParams), "photo").catch(
+      result = await requestWithDiag(() => api.sendPhoto(chatId, file, mediaParams), "photo").catch(
        (err) => {
          throw wrapChatNotFound(err);
        },
      );
    } else if (kind === "video") {
-      result = await request(() => api.sendVideo(chatId, file, mediaParams), "video").catch(
+      result = await requestWithDiag(() => api.sendVideo(chatId, file, mediaParams), "video").catch(
        (err) => {
          throw wrapChatNotFound(err);
        },
@@ -302,24 +325,27 @@ export async function sendMessageTelegram(
        logFallback: logVerbose,
      });
      if (useVoice) {
-        result = await request(() => api.sendVoice(chatId, file, mediaParams), "voice").catch(
-          (err) => {
-            throw wrapChatNotFound(err);
-          },
-        );
+        result = await requestWithDiag(
+          () => api.sendVoice(chatId, file, mediaParams),
+          "voice",
+        ).catch((err) => {
+          throw wrapChatNotFound(err);
+        });
      } else {
-        result = await request(() => api.sendAudio(chatId, file, mediaParams), "audio").catch(
-          (err) => {
-            throw wrapChatNotFound(err);
-          },
-        );
+        result = await requestWithDiag(
+          () => api.sendAudio(chatId, file, mediaParams),
+          "audio",
+        ).catch((err) => {
+          throw wrapChatNotFound(err);
+        });
      }
    } else {
-      result = await request(() => api.sendDocument(chatId, file, mediaParams), "document").catch(
-        (err) => {
-          throw wrapChatNotFound(err);
-        },
-      );
+      result = await requestWithDiag(
+        () => api.sendDocument(chatId, file, mediaParams),
+        "document",
+      ).catch((err) => {
+        throw wrapChatNotFound(err);
+      });
    }
    const mediaMessageId = String(result?.message_id ?? "unknown");
    const resolvedChatId = String(result?.chat?.id ?? chatId);
@@ -400,6 +426,12 @@ export async function reactMessageTelegram(
    configRetry: account.config.retry,
    verbose: opts.verbose,
  });
+  const logHttpError = createTelegramHttpLogger(cfg);
+  const requestWithDiag = <T>(fn: () => Promise<T>, label?: string) =>
+    request(fn, label).catch((err) => {
+      logHttpError(label ?? "request", err);
+      throw err;
+    });
  const remove = opts.remove === true;
  const trimmedEmoji = emoji.trim();
  // Build the reaction array. We cast emoji to the grammY union type since
@@ -411,7 +443,7 @@ export async function reactMessageTelegram(
  if (typeof api.setMessageReaction !== "function") {
    throw new Error("Telegram reactions are unavailable in this bot API.");
  }
-  await request(() => api.setMessageReaction(chatId, messageId, reactions), "reaction");
+  await requestWithDiag(() => api.setMessageReaction(chatId, messageId, reactions), "reaction");
  return { ok: true };
 }

@@ -446,7 +478,13 @@ export async function deleteMessageTelegram(
    configRetry: account.config.retry,
    verbose: opts.verbose,
  });
-  await request(() => api.deleteMessage(chatId, messageId), "deleteMessage");
+  const logHttpError = createTelegramHttpLogger(cfg);
+  const requestWithDiag = <T>(fn: () => Promise<T>, label?: string) =>
+    request(fn, label).catch((err) => {
+      logHttpError(label ?? "request", err);
+      throw err;
+    });
+  await requestWithDiag(() => api.deleteMessage(chatId, messageId), "deleteMessage");
  logVerbose(`[telegram] Deleted message ${messageId} from chat ${chatId}`);
  return { ok: true };
 }