refactor: normalize media attachment selection
This commit is contained in:
@@ -424,4 +424,64 @@ describe("applyMediaUnderstanding", () => {
|
||||
["[Audio 1/2]\nTranscript:\nnote-a.ogg", "[Audio 2/2]\nTranscript:\nnote-b.ogg"].join("\n\n"),
|
||||
);
|
||||
});
|
||||
|
||||
it("orders mixed media outputs as image, audio, video", async () => {
|
||||
const { applyMediaUnderstanding } = await loadApply();
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-media-"));
|
||||
const imagePath = path.join(dir, "photo.jpg");
|
||||
const audioPath = path.join(dir, "note.ogg");
|
||||
const videoPath = path.join(dir, "clip.mp4");
|
||||
await fs.writeFile(imagePath, "image-bytes");
|
||||
await fs.writeFile(audioPath, "audio-bytes");
|
||||
await fs.writeFile(videoPath, "video-bytes");
|
||||
|
||||
const ctx: MsgContext = {
|
||||
Body: "<media:mixed>",
|
||||
MediaPaths: [imagePath, audioPath, videoPath],
|
||||
MediaTypes: ["image/jpeg", "audio/ogg", "video/mp4"],
|
||||
};
|
||||
const cfg: ClawdbotConfig = {
|
||||
tools: {
|
||||
media: {
|
||||
image: { enabled: true, models: [{ provider: "openai", model: "gpt-5.2" }] },
|
||||
audio: { enabled: true, models: [{ provider: "groq" }] },
|
||||
video: { enabled: true, models: [{ provider: "google", model: "gemini-3" }] },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = await applyMediaUnderstanding({
|
||||
ctx,
|
||||
cfg,
|
||||
agentDir: dir,
|
||||
providers: {
|
||||
openai: {
|
||||
id: "openai",
|
||||
describeImage: async () => ({ text: "image ok" }),
|
||||
},
|
||||
groq: {
|
||||
id: "groq",
|
||||
transcribeAudio: async () => ({ text: "audio ok" }),
|
||||
},
|
||||
google: {
|
||||
id: "google",
|
||||
describeVideo: async () => ({ text: "video ok" }),
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.appliedImage).toBe(true);
|
||||
expect(result.appliedAudio).toBe(true);
|
||||
expect(result.appliedVideo).toBe(true);
|
||||
expect(ctx.Body).toBe(
|
||||
[
|
||||
"[Image]\nDescription:\nimage ok",
|
||||
"[Audio]\nTranscript:\naudio ok",
|
||||
"[Video]\nDescription:\nvideo ok",
|
||||
].join("\n\n"),
|
||||
);
|
||||
expect(ctx.Transcript).toBe("audio ok");
|
||||
expect(ctx.CommandBody).toBe("audio ok");
|
||||
expect(ctx.BodyForCommands).toBe("audio ok");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -7,7 +7,7 @@ import { fileURLToPath } from "node:url";
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type { MediaUnderstandingAttachmentsConfig } from "../config/types.tools.js";
|
||||
import { fetchRemoteMedia, MediaFetchError } from "../media/fetch.js";
|
||||
import { detectMime, getFileExtension, isAudioFileName } from "../media/mime.js";
|
||||
import { detectMime, getFileExtension, isAudioFileName, kindFromMime } from "../media/mime.js";
|
||||
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||
import { fetchWithTimeout } from "./providers/shared.js";
|
||||
import type { MediaAttachment, MediaUnderstandingCapability } from "./types.js";
|
||||
@@ -100,23 +100,32 @@ export function normalizeAttachments(ctx: MsgContext): MediaAttachment[] {
|
||||
];
|
||||
}
|
||||
|
||||
export function isVideoAttachment(attachment: MediaAttachment): boolean {
|
||||
if (attachment.mime?.startsWith("video/")) return true;
|
||||
export function resolveAttachmentKind(
|
||||
attachment: MediaAttachment,
|
||||
): "image" | "audio" | "video" | "unknown" {
|
||||
const kind = kindFromMime(attachment.mime);
|
||||
if (kind !== "unknown") return kind;
|
||||
|
||||
const ext = getFileExtension(attachment.path ?? attachment.url);
|
||||
if (!ext) return false;
|
||||
return [".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"].includes(ext);
|
||||
if (!ext) return "unknown";
|
||||
if ([".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"].includes(ext)) return "video";
|
||||
if (isAudioFileName(attachment.path ?? attachment.url)) return "audio";
|
||||
if ([".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tiff", ".tif"].includes(ext)) {
|
||||
return "image";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
export function isVideoAttachment(attachment: MediaAttachment): boolean {
|
||||
return resolveAttachmentKind(attachment) === "video";
|
||||
}
|
||||
|
||||
export function isAudioAttachment(attachment: MediaAttachment): boolean {
|
||||
if (attachment.mime?.startsWith("audio/")) return true;
|
||||
return isAudioFileName(attachment.path ?? attachment.url);
|
||||
return resolveAttachmentKind(attachment) === "audio";
|
||||
}
|
||||
|
||||
export function isImageAttachment(attachment: MediaAttachment): boolean {
|
||||
if (attachment.mime?.startsWith("image/")) return true;
|
||||
const ext = getFileExtension(attachment.path ?? attachment.url);
|
||||
if (!ext) return false;
|
||||
return [".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tiff", ".tif"].includes(ext);
|
||||
return resolveAttachmentKind(attachment) === "image";
|
||||
}
|
||||
|
||||
function isAbortError(err: unknown): boolean {
|
||||
|
||||
Reference in New Issue
Block a user