refactor: tighten media diagnostics
This commit is contained in:
@@ -256,6 +256,15 @@ When `mode: "all"`, outputs are labeled `[Image 1/2]`, `[Audio 2/2]`, etc.
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Status output
|
||||||
|
When media understanding runs, `/status` includes a short summary line:
|
||||||
|
|
||||||
|
```
|
||||||
|
📎 Media: image ok (openai/gpt-5.2) · audio skipped (maxBytes)
|
||||||
|
```
|
||||||
|
|
||||||
|
This shows per‑capability outcomes and the chosen provider/model when applicable.
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
- Understanding is **best‑effort**. Errors do not block replies.
|
- Understanding is **best‑effort**. Errors do not block replies.
|
||||||
- Attachments are still passed to models even when understanding is disabled.
|
- Attachments are still passed to models even when understanding is disabled.
|
||||||
|
|||||||
@@ -41,4 +41,69 @@ describe("buildInboundMediaNote", () => {
|
|||||||
});
|
});
|
||||||
expect(note).toBe("[media attached: /tmp/b.png | https://example.com/b.png]");
|
expect(note).toBe("[media attached: /tmp/b.png | https://example.com/b.png]");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("only suppresses attachments when media understanding succeeded", () => {
|
||||||
|
const note = buildInboundMediaNote({
|
||||||
|
MediaPaths: ["/tmp/a.png", "/tmp/b.png"],
|
||||||
|
MediaUrls: ["https://example.com/a.png", "https://example.com/b.png"],
|
||||||
|
MediaUnderstandingDecisions: [
|
||||||
|
{
|
||||||
|
capability: "image",
|
||||||
|
outcome: "skipped",
|
||||||
|
attachments: [
|
||||||
|
{
|
||||||
|
attachmentIndex: 0,
|
||||||
|
attempts: [
|
||||||
|
{
|
||||||
|
type: "provider",
|
||||||
|
outcome: "skipped",
|
||||||
|
reason: "maxBytes: too large",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
expect(note).toBe(
|
||||||
|
[
|
||||||
|
"[media attached: 2 files]",
|
||||||
|
"[media attached 1/2: /tmp/a.png | https://example.com/a.png]",
|
||||||
|
"[media attached 2/2: /tmp/b.png | https://example.com/b.png]",
|
||||||
|
].join("\n"),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("suppresses attachments when media understanding succeeds via decisions", () => {
|
||||||
|
const note = buildInboundMediaNote({
|
||||||
|
MediaPaths: ["/tmp/a.png", "/tmp/b.png"],
|
||||||
|
MediaUrls: ["https://example.com/a.png", "https://example.com/b.png"],
|
||||||
|
MediaUnderstandingDecisions: [
|
||||||
|
{
|
||||||
|
capability: "image",
|
||||||
|
outcome: "success",
|
||||||
|
attachments: [
|
||||||
|
{
|
||||||
|
attachmentIndex: 0,
|
||||||
|
attempts: [
|
||||||
|
{
|
||||||
|
type: "provider",
|
||||||
|
outcome: "success",
|
||||||
|
provider: "openai",
|
||||||
|
model: "gpt-5.2",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
chosen: {
|
||||||
|
type: "provider",
|
||||||
|
outcome: "success",
|
||||||
|
provider: "openai",
|
||||||
|
model: "gpt-5.2",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
expect(note).toBe("[media attached: /tmp/b.png | https://example.com/b.png]");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -19,11 +19,22 @@ function formatMediaAttachedLine(params: {
|
|||||||
|
|
||||||
export function buildInboundMediaNote(ctx: MsgContext): string | undefined {
|
export function buildInboundMediaNote(ctx: MsgContext): string | undefined {
|
||||||
// Attachment indices follow MediaPaths/MediaUrls ordering as supplied by the channel.
|
// Attachment indices follow MediaPaths/MediaUrls ordering as supplied by the channel.
|
||||||
const suppressed = new Set(
|
const suppressed = new Set<number>();
|
||||||
Array.isArray(ctx.MediaUnderstanding)
|
if (Array.isArray(ctx.MediaUnderstanding)) {
|
||||||
? ctx.MediaUnderstanding.map((output) => output.attachmentIndex)
|
for (const output of ctx.MediaUnderstanding) {
|
||||||
: [],
|
suppressed.add(output.attachmentIndex);
|
||||||
);
|
}
|
||||||
|
}
|
||||||
|
if (Array.isArray(ctx.MediaUnderstandingDecisions)) {
|
||||||
|
for (const decision of ctx.MediaUnderstandingDecisions) {
|
||||||
|
if (decision.outcome !== "success") continue;
|
||||||
|
for (const attachment of decision.attachments) {
|
||||||
|
if (attachment.chosen?.outcome === "success") {
|
||||||
|
suppressed.add(attachment.attachmentIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
const pathsFromArray = Array.isArray(ctx.MediaPaths) ? ctx.MediaPaths : undefined;
|
const pathsFromArray = Array.isArray(ctx.MediaPaths) ? ctx.MediaPaths : undefined;
|
||||||
const paths =
|
const paths =
|
||||||
pathsFromArray && pathsFromArray.length > 0
|
pathsFromArray && pathsFromArray.length > 0
|
||||||
|
|||||||
136
src/media-understanding/resolve.test.ts
Normal file
136
src/media-understanding/resolve.test.ts
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import type { ClawdbotConfig } from "../config/config.js";
|
||||||
|
import {
|
||||||
|
resolveEntriesWithActiveFallback,
|
||||||
|
resolveModelEntries,
|
||||||
|
} from "./resolve.js";
|
||||||
|
|
||||||
|
const providerRegistry = new Map([
|
||||||
|
["openai", { capabilities: ["image"] }],
|
||||||
|
["groq", { capabilities: ["audio"] }],
|
||||||
|
]);
|
||||||
|
|
||||||
|
describe("resolveModelEntries", () => {
|
||||||
|
it("uses provider capabilities for shared entries without explicit caps", () => {
|
||||||
|
const cfg: ClawdbotConfig = {
|
||||||
|
tools: {
|
||||||
|
media: {
|
||||||
|
models: [{ provider: "openai", model: "gpt-5.2" }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const imageEntries = resolveModelEntries({
|
||||||
|
cfg,
|
||||||
|
capability: "image",
|
||||||
|
providerRegistry,
|
||||||
|
});
|
||||||
|
expect(imageEntries).toHaveLength(1);
|
||||||
|
|
||||||
|
const audioEntries = resolveModelEntries({
|
||||||
|
cfg,
|
||||||
|
capability: "audio",
|
||||||
|
providerRegistry,
|
||||||
|
});
|
||||||
|
expect(audioEntries).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("keeps per-capability entries even without explicit caps", () => {
|
||||||
|
const cfg: ClawdbotConfig = {
|
||||||
|
tools: {
|
||||||
|
media: {
|
||||||
|
image: {
|
||||||
|
models: [{ provider: "openai", model: "gpt-5.2" }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const imageEntries = resolveModelEntries({
|
||||||
|
cfg,
|
||||||
|
capability: "image",
|
||||||
|
config: cfg.tools?.media?.image,
|
||||||
|
providerRegistry,
|
||||||
|
});
|
||||||
|
expect(imageEntries).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("skips shared CLI entries without capabilities", () => {
|
||||||
|
const cfg: ClawdbotConfig = {
|
||||||
|
tools: {
|
||||||
|
media: {
|
||||||
|
models: [{ type: "cli", command: "gemini", args: ["--file", "{{MediaPath}}"] }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const entries = resolveModelEntries({
|
||||||
|
cfg,
|
||||||
|
capability: "image",
|
||||||
|
providerRegistry,
|
||||||
|
});
|
||||||
|
expect(entries).toHaveLength(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("resolveEntriesWithActiveFallback", () => {
|
||||||
|
it("uses active model when enabled and no models are configured", () => {
|
||||||
|
const cfg: ClawdbotConfig = {
|
||||||
|
tools: {
|
||||||
|
media: {
|
||||||
|
audio: { enabled: true },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const entries = resolveEntriesWithActiveFallback({
|
||||||
|
cfg,
|
||||||
|
capability: "audio",
|
||||||
|
config: cfg.tools?.media?.audio,
|
||||||
|
providerRegistry,
|
||||||
|
activeModel: { provider: "groq", model: "whisper-large-v3" },
|
||||||
|
});
|
||||||
|
expect(entries).toHaveLength(1);
|
||||||
|
expect(entries[0]?.provider).toBe("groq");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ignores active model when configured entries exist", () => {
|
||||||
|
const cfg: ClawdbotConfig = {
|
||||||
|
tools: {
|
||||||
|
media: {
|
||||||
|
audio: { enabled: true, models: [{ provider: "openai", model: "whisper-1" }] },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const entries = resolveEntriesWithActiveFallback({
|
||||||
|
cfg,
|
||||||
|
capability: "audio",
|
||||||
|
config: cfg.tools?.media?.audio,
|
||||||
|
providerRegistry,
|
||||||
|
activeModel: { provider: "groq", model: "whisper-large-v3" },
|
||||||
|
});
|
||||||
|
expect(entries).toHaveLength(1);
|
||||||
|
expect(entries[0]?.provider).toBe("openai");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("skips active model when provider lacks capability", () => {
|
||||||
|
const cfg: ClawdbotConfig = {
|
||||||
|
tools: {
|
||||||
|
media: {
|
||||||
|
video: { enabled: true },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const entries = resolveEntriesWithActiveFallback({
|
||||||
|
cfg,
|
||||||
|
capability: "video",
|
||||||
|
config: cfg.tools?.media?.video,
|
||||||
|
providerRegistry,
|
||||||
|
activeModel: { provider: "groq", model: "whisper-large-v3" },
|
||||||
|
});
|
||||||
|
expect(entries).toHaveLength(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -98,6 +98,23 @@ function buildModelDecision(params: {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function formatDecisionSummary(decision: MediaUnderstandingDecision): string {
|
||||||
|
const total = decision.attachments.length;
|
||||||
|
const success = decision.attachments.filter((entry) => entry.chosen?.outcome === "success").length;
|
||||||
|
const chosen = decision.attachments.find((entry) => entry.chosen)?.chosen;
|
||||||
|
const provider = chosen?.provider?.trim();
|
||||||
|
const model = chosen?.model?.trim();
|
||||||
|
const modelLabel = provider ? (model ? `${provider}/${model}` : provider) : undefined;
|
||||||
|
const reason = decision.attachments
|
||||||
|
.flatMap((entry) => entry.attempts.map((attempt) => attempt.reason).filter(Boolean))
|
||||||
|
.find(Boolean);
|
||||||
|
const shortReason = reason ? reason.split(":")[0]?.trim() : undefined;
|
||||||
|
const countLabel = total > 0 ? ` (${success}/${total})` : "";
|
||||||
|
const viaLabel = modelLabel ? ` via ${modelLabel}` : "";
|
||||||
|
const reasonLabel = shortReason ? ` reason=${shortReason}` : "";
|
||||||
|
return `${decision.capability}: ${decision.outcome}${countLabel}${viaLabel}${reasonLabel}`;
|
||||||
|
}
|
||||||
|
|
||||||
async function runProviderEntry(params: {
|
async function runProviderEntry(params: {
|
||||||
capability: MediaUnderstandingCapability;
|
capability: MediaUnderstandingCapability;
|
||||||
entry: MediaUnderstandingModelConfig;
|
entry: MediaUnderstandingModelConfig;
|
||||||
@@ -495,12 +512,16 @@ export async function runCapability(params: {
|
|||||||
chosen: attempts.find((attempt) => attempt.outcome === "success"),
|
chosen: attempts.find((attempt) => attempt.outcome === "success"),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
const decision: MediaUnderstandingDecision = {
|
||||||
|
capability,
|
||||||
|
outcome: outputs.length > 0 ? "success" : "skipped",
|
||||||
|
attachments: attachmentDecisions,
|
||||||
|
};
|
||||||
|
if (shouldLogVerbose()) {
|
||||||
|
logVerbose(`Media understanding ${formatDecisionSummary(decision)}`);
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
outputs,
|
outputs,
|
||||||
decision: {
|
decision,
|
||||||
capability,
|
|
||||||
outcome: outputs.length > 0 ? "success" : "skipped",
|
|
||||||
attachments: attachmentDecisions,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user