refactor: tighten media diagnostics

2026-01-17 07:27:38 +00:00
parent 0c0e1e4226
commit 2ee45d50a4
5 changed files with 252 additions and 10 deletions
--- a/src/media-understanding/resolve.test.ts
+++ b/src/media-understanding/resolve.test.ts
@@ -0,0 +1,136 @@
+import { describe, expect, it } from "vitest";
+
+import type { ClawdbotConfig } from "../config/config.js";
+import {
+  resolveEntriesWithActiveFallback,
+  resolveModelEntries,
+} from "./resolve.js";
+
+const providerRegistry = new Map([
+  ["openai", { capabilities: ["image"] }],
+  ["groq", { capabilities: ["audio"] }],
+]);
+
+describe("resolveModelEntries", () => {
+  it("uses provider capabilities for shared entries without explicit caps", () => {
+    const cfg: ClawdbotConfig = {
+      tools: {
+        media: {
+          models: [{ provider: "openai", model: "gpt-5.2" }],
+        },
+      },
+    };
+
+    const imageEntries = resolveModelEntries({
+      cfg,
+      capability: "image",
+      providerRegistry,
+    });
+    expect(imageEntries).toHaveLength(1);
+
+    const audioEntries = resolveModelEntries({
+      cfg,
+      capability: "audio",
+      providerRegistry,
+    });
+    expect(audioEntries).toHaveLength(0);
+  });
+
+  it("keeps per-capability entries even without explicit caps", () => {
+    const cfg: ClawdbotConfig = {
+      tools: {
+        media: {
+          image: {
+            models: [{ provider: "openai", model: "gpt-5.2" }],
+          },
+        },
+      },
+    };
+
+    const imageEntries = resolveModelEntries({
+      cfg,
+      capability: "image",
+      config: cfg.tools?.media?.image,
+      providerRegistry,
+    });
+    expect(imageEntries).toHaveLength(1);
+  });
+
+  it("skips shared CLI entries without capabilities", () => {
+    const cfg: ClawdbotConfig = {
+      tools: {
+        media: {
+          models: [{ type: "cli", command: "gemini", args: ["--file", "{{MediaPath}}"] }],
+        },
+      },
+    };
+
+    const entries = resolveModelEntries({
+      cfg,
+      capability: "image",
+      providerRegistry,
+    });
+    expect(entries).toHaveLength(0);
+  });
+});
+
+describe("resolveEntriesWithActiveFallback", () => {
+  it("uses active model when enabled and no models are configured", () => {
+    const cfg: ClawdbotConfig = {
+      tools: {
+        media: {
+          audio: { enabled: true },
+        },
+      },
+    };
+
+    const entries = resolveEntriesWithActiveFallback({
+      cfg,
+      capability: "audio",
+      config: cfg.tools?.media?.audio,
+      providerRegistry,
+      activeModel: { provider: "groq", model: "whisper-large-v3" },
+    });
+    expect(entries).toHaveLength(1);
+    expect(entries[0]?.provider).toBe("groq");
+  });
+
+  it("ignores active model when configured entries exist", () => {
+    const cfg: ClawdbotConfig = {
+      tools: {
+        media: {
+          audio: { enabled: true, models: [{ provider: "openai", model: "whisper-1" }] },
+        },
+      },
+    };
+
+    const entries = resolveEntriesWithActiveFallback({
+      cfg,
+      capability: "audio",
+      config: cfg.tools?.media?.audio,
+      providerRegistry,
+      activeModel: { provider: "groq", model: "whisper-large-v3" },
+    });
+    expect(entries).toHaveLength(1);
+    expect(entries[0]?.provider).toBe("openai");
+  });
+
+  it("skips active model when provider lacks capability", () => {
+    const cfg: ClawdbotConfig = {
+      tools: {
+        media: {
+          video: { enabled: true },
+        },
+      },
+    };
+
+    const entries = resolveEntriesWithActiveFallback({
+      cfg,
+      capability: "video",
+      config: cfg.tools?.media?.video,
+      providerRegistry,
+      activeModel: { provider: "groq", model: "whisper-large-v3" },
+    });
+    expect(entries).toHaveLength(0);
+  });
+});
--- a/src/media-understanding/runner.ts
+++ b/src/media-understanding/runner.ts
@@ -98,6 +98,23 @@ function buildModelDecision(params: {
  };
 }

+function formatDecisionSummary(decision: MediaUnderstandingDecision): string {
+  const total = decision.attachments.length;
+  const success = decision.attachments.filter((entry) => entry.chosen?.outcome === "success").length;
+  const chosen = decision.attachments.find((entry) => entry.chosen)?.chosen;
+  const provider = chosen?.provider?.trim();
+  const model = chosen?.model?.trim();
+  const modelLabel = provider ? (model ? `${provider}/${model}` : provider) : undefined;
+  const reason = decision.attachments
+    .flatMap((entry) => entry.attempts.map((attempt) => attempt.reason).filter(Boolean))
+    .find(Boolean);
+  const shortReason = reason ? reason.split(":")[0]?.trim() : undefined;
+  const countLabel = total > 0 ? ` (${success}/${total})` : "";
+  const viaLabel = modelLabel ? ` via ${modelLabel}` : "";
+  const reasonLabel = shortReason ? ` reason=${shortReason}` : "";
+  return `${decision.capability}: ${decision.outcome}${countLabel}${viaLabel}${reasonLabel}`;
+}
+
 async function runProviderEntry(params: {
  capability: MediaUnderstandingCapability;
  entry: MediaUnderstandingModelConfig;
@@ -495,12 +512,16 @@ export async function runCapability(params: {
      chosen: attempts.find((attempt) => attempt.outcome === "success"),
    });
  }
+  const decision: MediaUnderstandingDecision = {
+    capability,
+    outcome: outputs.length > 0 ? "success" : "skipped",
+    attachments: attachmentDecisions,
+  };
+  if (shouldLogVerbose()) {
+    logVerbose(`Media understanding ${formatDecisionSummary(decision)}`);
+  }
  return {
    outputs,
-    decision: {
-      capability,
-      outcome: outputs.length > 0 ? "success" : "skipped",
-      attachments: attachmentDecisions,
-    },
+    decision,
  };
 }