refactor: tighten media diagnostics
This commit is contained in:
@@ -256,6 +256,15 @@ When `mode: "all"`, outputs are labeled `[Image 1/2]`, `[Audio 2/2]`, etc.
|
||||
}
|
||||
```
|
||||
|
||||
## Status output
|
||||
When media understanding runs, `/status` includes a short summary line:
|
||||
|
||||
```
|
||||
📎 Media: image ok (openai/gpt-5.2) · audio skipped (maxBytes)
|
||||
```
|
||||
|
||||
This shows per‑capability outcomes and the chosen provider/model when applicable.
|
||||
|
||||
## Notes
|
||||
- Understanding is **best‑effort**. Errors do not block replies.
|
||||
- Attachments are still passed to models even when understanding is disabled.
|
||||
|
||||
@@ -41,4 +41,69 @@ describe("buildInboundMediaNote", () => {
|
||||
});
|
||||
expect(note).toBe("[media attached: /tmp/b.png | https://example.com/b.png]");
|
||||
});
|
||||
|
||||
it("only suppresses attachments when media understanding succeeded", () => {
|
||||
const note = buildInboundMediaNote({
|
||||
MediaPaths: ["/tmp/a.png", "/tmp/b.png"],
|
||||
MediaUrls: ["https://example.com/a.png", "https://example.com/b.png"],
|
||||
MediaUnderstandingDecisions: [
|
||||
{
|
||||
capability: "image",
|
||||
outcome: "skipped",
|
||||
attachments: [
|
||||
{
|
||||
attachmentIndex: 0,
|
||||
attempts: [
|
||||
{
|
||||
type: "provider",
|
||||
outcome: "skipped",
|
||||
reason: "maxBytes: too large",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(note).toBe(
|
||||
[
|
||||
"[media attached: 2 files]",
|
||||
"[media attached 1/2: /tmp/a.png | https://example.com/a.png]",
|
||||
"[media attached 2/2: /tmp/b.png | https://example.com/b.png]",
|
||||
].join("\n"),
|
||||
);
|
||||
});
|
||||
|
||||
it("suppresses attachments when media understanding succeeds via decisions", () => {
|
||||
const note = buildInboundMediaNote({
|
||||
MediaPaths: ["/tmp/a.png", "/tmp/b.png"],
|
||||
MediaUrls: ["https://example.com/a.png", "https://example.com/b.png"],
|
||||
MediaUnderstandingDecisions: [
|
||||
{
|
||||
capability: "image",
|
||||
outcome: "success",
|
||||
attachments: [
|
||||
{
|
||||
attachmentIndex: 0,
|
||||
attempts: [
|
||||
{
|
||||
type: "provider",
|
||||
outcome: "success",
|
||||
provider: "openai",
|
||||
model: "gpt-5.2",
|
||||
},
|
||||
],
|
||||
chosen: {
|
||||
type: "provider",
|
||||
outcome: "success",
|
||||
provider: "openai",
|
||||
model: "gpt-5.2",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(note).toBe("[media attached: /tmp/b.png | https://example.com/b.png]");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -19,11 +19,22 @@ function formatMediaAttachedLine(params: {
|
||||
|
||||
export function buildInboundMediaNote(ctx: MsgContext): string | undefined {
|
||||
// Attachment indices follow MediaPaths/MediaUrls ordering as supplied by the channel.
|
||||
const suppressed = new Set(
|
||||
Array.isArray(ctx.MediaUnderstanding)
|
||||
? ctx.MediaUnderstanding.map((output) => output.attachmentIndex)
|
||||
: [],
|
||||
);
|
||||
const suppressed = new Set<number>();
|
||||
if (Array.isArray(ctx.MediaUnderstanding)) {
|
||||
for (const output of ctx.MediaUnderstanding) {
|
||||
suppressed.add(output.attachmentIndex);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(ctx.MediaUnderstandingDecisions)) {
|
||||
for (const decision of ctx.MediaUnderstandingDecisions) {
|
||||
if (decision.outcome !== "success") continue;
|
||||
for (const attachment of decision.attachments) {
|
||||
if (attachment.chosen?.outcome === "success") {
|
||||
suppressed.add(attachment.attachmentIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
const pathsFromArray = Array.isArray(ctx.MediaPaths) ? ctx.MediaPaths : undefined;
|
||||
const paths =
|
||||
pathsFromArray && pathsFromArray.length > 0
|
||||
|
||||
136
src/media-understanding/resolve.test.ts
Normal file
136
src/media-understanding/resolve.test.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import type { ClawdbotConfig } from "../config/config.js";
|
||||
import {
|
||||
resolveEntriesWithActiveFallback,
|
||||
resolveModelEntries,
|
||||
} from "./resolve.js";
|
||||
|
||||
const providerRegistry = new Map([
|
||||
["openai", { capabilities: ["image"] }],
|
||||
["groq", { capabilities: ["audio"] }],
|
||||
]);
|
||||
|
||||
describe("resolveModelEntries", () => {
|
||||
it("uses provider capabilities for shared entries without explicit caps", () => {
|
||||
const cfg: ClawdbotConfig = {
|
||||
tools: {
|
||||
media: {
|
||||
models: [{ provider: "openai", model: "gpt-5.2" }],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const imageEntries = resolveModelEntries({
|
||||
cfg,
|
||||
capability: "image",
|
||||
providerRegistry,
|
||||
});
|
||||
expect(imageEntries).toHaveLength(1);
|
||||
|
||||
const audioEntries = resolveModelEntries({
|
||||
cfg,
|
||||
capability: "audio",
|
||||
providerRegistry,
|
||||
});
|
||||
expect(audioEntries).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("keeps per-capability entries even without explicit caps", () => {
|
||||
const cfg: ClawdbotConfig = {
|
||||
tools: {
|
||||
media: {
|
||||
image: {
|
||||
models: [{ provider: "openai", model: "gpt-5.2" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const imageEntries = resolveModelEntries({
|
||||
cfg,
|
||||
capability: "image",
|
||||
config: cfg.tools?.media?.image,
|
||||
providerRegistry,
|
||||
});
|
||||
expect(imageEntries).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("skips shared CLI entries without capabilities", () => {
|
||||
const cfg: ClawdbotConfig = {
|
||||
tools: {
|
||||
media: {
|
||||
models: [{ type: "cli", command: "gemini", args: ["--file", "{{MediaPath}}"] }],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const entries = resolveModelEntries({
|
||||
cfg,
|
||||
capability: "image",
|
||||
providerRegistry,
|
||||
});
|
||||
expect(entries).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveEntriesWithActiveFallback", () => {
|
||||
it("uses active model when enabled and no models are configured", () => {
|
||||
const cfg: ClawdbotConfig = {
|
||||
tools: {
|
||||
media: {
|
||||
audio: { enabled: true },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const entries = resolveEntriesWithActiveFallback({
|
||||
cfg,
|
||||
capability: "audio",
|
||||
config: cfg.tools?.media?.audio,
|
||||
providerRegistry,
|
||||
activeModel: { provider: "groq", model: "whisper-large-v3" },
|
||||
});
|
||||
expect(entries).toHaveLength(1);
|
||||
expect(entries[0]?.provider).toBe("groq");
|
||||
});
|
||||
|
||||
it("ignores active model when configured entries exist", () => {
|
||||
const cfg: ClawdbotConfig = {
|
||||
tools: {
|
||||
media: {
|
||||
audio: { enabled: true, models: [{ provider: "openai", model: "whisper-1" }] },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const entries = resolveEntriesWithActiveFallback({
|
||||
cfg,
|
||||
capability: "audio",
|
||||
config: cfg.tools?.media?.audio,
|
||||
providerRegistry,
|
||||
activeModel: { provider: "groq", model: "whisper-large-v3" },
|
||||
});
|
||||
expect(entries).toHaveLength(1);
|
||||
expect(entries[0]?.provider).toBe("openai");
|
||||
});
|
||||
|
||||
it("skips active model when provider lacks capability", () => {
|
||||
const cfg: ClawdbotConfig = {
|
||||
tools: {
|
||||
media: {
|
||||
video: { enabled: true },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const entries = resolveEntriesWithActiveFallback({
|
||||
cfg,
|
||||
capability: "video",
|
||||
config: cfg.tools?.media?.video,
|
||||
providerRegistry,
|
||||
activeModel: { provider: "groq", model: "whisper-large-v3" },
|
||||
});
|
||||
expect(entries).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
@@ -98,6 +98,23 @@ function buildModelDecision(params: {
|
||||
};
|
||||
}
|
||||
|
||||
function formatDecisionSummary(decision: MediaUnderstandingDecision): string {
|
||||
const total = decision.attachments.length;
|
||||
const success = decision.attachments.filter((entry) => entry.chosen?.outcome === "success").length;
|
||||
const chosen = decision.attachments.find((entry) => entry.chosen)?.chosen;
|
||||
const provider = chosen?.provider?.trim();
|
||||
const model = chosen?.model?.trim();
|
||||
const modelLabel = provider ? (model ? `${provider}/${model}` : provider) : undefined;
|
||||
const reason = decision.attachments
|
||||
.flatMap((entry) => entry.attempts.map((attempt) => attempt.reason).filter(Boolean))
|
||||
.find(Boolean);
|
||||
const shortReason = reason ? reason.split(":")[0]?.trim() : undefined;
|
||||
const countLabel = total > 0 ? ` (${success}/${total})` : "";
|
||||
const viaLabel = modelLabel ? ` via ${modelLabel}` : "";
|
||||
const reasonLabel = shortReason ? ` reason=${shortReason}` : "";
|
||||
return `${decision.capability}: ${decision.outcome}${countLabel}${viaLabel}${reasonLabel}`;
|
||||
}
|
||||
|
||||
async function runProviderEntry(params: {
|
||||
capability: MediaUnderstandingCapability;
|
||||
entry: MediaUnderstandingModelConfig;
|
||||
@@ -495,12 +512,16 @@ export async function runCapability(params: {
|
||||
chosen: attempts.find((attempt) => attempt.outcome === "success"),
|
||||
});
|
||||
}
|
||||
const decision: MediaUnderstandingDecision = {
|
||||
capability,
|
||||
outcome: outputs.length > 0 ? "success" : "skipped",
|
||||
attachments: attachmentDecisions,
|
||||
};
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`Media understanding ${formatDecisionSummary(decision)}`);
|
||||
}
|
||||
return {
|
||||
outputs,
|
||||
decision: {
|
||||
capability,
|
||||
outcome: outputs.length > 0 ? "success" : "skipped",
|
||||
attachments: attachmentDecisions,
|
||||
},
|
||||
decision,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user