refactor: normalize media attachment selection
This commit is contained in:
@@ -424,4 +424,64 @@ describe("applyMediaUnderstanding", () => {
|
|||||||
["[Audio 1/2]\nTranscript:\nnote-a.ogg", "[Audio 2/2]\nTranscript:\nnote-b.ogg"].join("\n\n"),
|
["[Audio 1/2]\nTranscript:\nnote-a.ogg", "[Audio 2/2]\nTranscript:\nnote-b.ogg"].join("\n\n"),
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("orders mixed media outputs as image, audio, video", async () => {
|
||||||
|
const { applyMediaUnderstanding } = await loadApply();
|
||||||
|
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-media-"));
|
||||||
|
const imagePath = path.join(dir, "photo.jpg");
|
||||||
|
const audioPath = path.join(dir, "note.ogg");
|
||||||
|
const videoPath = path.join(dir, "clip.mp4");
|
||||||
|
await fs.writeFile(imagePath, "image-bytes");
|
||||||
|
await fs.writeFile(audioPath, "audio-bytes");
|
||||||
|
await fs.writeFile(videoPath, "video-bytes");
|
||||||
|
|
||||||
|
const ctx: MsgContext = {
|
||||||
|
Body: "<media:mixed>",
|
||||||
|
MediaPaths: [imagePath, audioPath, videoPath],
|
||||||
|
MediaTypes: ["image/jpeg", "audio/ogg", "video/mp4"],
|
||||||
|
};
|
||||||
|
const cfg: ClawdbotConfig = {
|
||||||
|
tools: {
|
||||||
|
media: {
|
||||||
|
image: { enabled: true, models: [{ provider: "openai", model: "gpt-5.2" }] },
|
||||||
|
audio: { enabled: true, models: [{ provider: "groq" }] },
|
||||||
|
video: { enabled: true, models: [{ provider: "google", model: "gemini-3" }] },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await applyMediaUnderstanding({
|
||||||
|
ctx,
|
||||||
|
cfg,
|
||||||
|
agentDir: dir,
|
||||||
|
providers: {
|
||||||
|
openai: {
|
||||||
|
id: "openai",
|
||||||
|
describeImage: async () => ({ text: "image ok" }),
|
||||||
|
},
|
||||||
|
groq: {
|
||||||
|
id: "groq",
|
||||||
|
transcribeAudio: async () => ({ text: "audio ok" }),
|
||||||
|
},
|
||||||
|
google: {
|
||||||
|
id: "google",
|
||||||
|
describeVideo: async () => ({ text: "video ok" }),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.appliedImage).toBe(true);
|
||||||
|
expect(result.appliedAudio).toBe(true);
|
||||||
|
expect(result.appliedVideo).toBe(true);
|
||||||
|
expect(ctx.Body).toBe(
|
||||||
|
[
|
||||||
|
"[Image]\nDescription:\nimage ok",
|
||||||
|
"[Audio]\nTranscript:\naudio ok",
|
||||||
|
"[Video]\nDescription:\nvideo ok",
|
||||||
|
].join("\n\n"),
|
||||||
|
);
|
||||||
|
expect(ctx.Transcript).toBe("audio ok");
|
||||||
|
expect(ctx.CommandBody).toBe("audio ok");
|
||||||
|
expect(ctx.BodyForCommands).toBe("audio ok");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import { fileURLToPath } from "node:url";
|
|||||||
import type { MsgContext } from "../auto-reply/templating.js";
|
import type { MsgContext } from "../auto-reply/templating.js";
|
||||||
import type { MediaUnderstandingAttachmentsConfig } from "../config/types.tools.js";
|
import type { MediaUnderstandingAttachmentsConfig } from "../config/types.tools.js";
|
||||||
import { fetchRemoteMedia, MediaFetchError } from "../media/fetch.js";
|
import { fetchRemoteMedia, MediaFetchError } from "../media/fetch.js";
|
||||||
import { detectMime, getFileExtension, isAudioFileName } from "../media/mime.js";
|
import { detectMime, getFileExtension, isAudioFileName, kindFromMime } from "../media/mime.js";
|
||||||
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||||
import { fetchWithTimeout } from "./providers/shared.js";
|
import { fetchWithTimeout } from "./providers/shared.js";
|
||||||
import type { MediaAttachment, MediaUnderstandingCapability } from "./types.js";
|
import type { MediaAttachment, MediaUnderstandingCapability } from "./types.js";
|
||||||
@@ -100,23 +100,32 @@ export function normalizeAttachments(ctx: MsgContext): MediaAttachment[] {
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isVideoAttachment(attachment: MediaAttachment): boolean {
|
export function resolveAttachmentKind(
|
||||||
if (attachment.mime?.startsWith("video/")) return true;
|
attachment: MediaAttachment,
|
||||||
|
): "image" | "audio" | "video" | "unknown" {
|
||||||
|
const kind = kindFromMime(attachment.mime);
|
||||||
|
if (kind !== "unknown") return kind;
|
||||||
|
|
||||||
const ext = getFileExtension(attachment.path ?? attachment.url);
|
const ext = getFileExtension(attachment.path ?? attachment.url);
|
||||||
if (!ext) return false;
|
if (!ext) return "unknown";
|
||||||
return [".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"].includes(ext);
|
if ([".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"].includes(ext)) return "video";
|
||||||
|
if (isAudioFileName(attachment.path ?? attachment.url)) return "audio";
|
||||||
|
if ([".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tiff", ".tif"].includes(ext)) {
|
||||||
|
return "image";
|
||||||
|
}
|
||||||
|
return "unknown";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isVideoAttachment(attachment: MediaAttachment): boolean {
|
||||||
|
return resolveAttachmentKind(attachment) === "video";
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isAudioAttachment(attachment: MediaAttachment): boolean {
|
export function isAudioAttachment(attachment: MediaAttachment): boolean {
|
||||||
if (attachment.mime?.startsWith("audio/")) return true;
|
return resolveAttachmentKind(attachment) === "audio";
|
||||||
return isAudioFileName(attachment.path ?? attachment.url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isImageAttachment(attachment: MediaAttachment): boolean {
|
export function isImageAttachment(attachment: MediaAttachment): boolean {
|
||||||
if (attachment.mime?.startsWith("image/")) return true;
|
return resolveAttachmentKind(attachment) === "image";
|
||||||
const ext = getFileExtension(attachment.path ?? attachment.url);
|
|
||||||
if (!ext) return false;
|
|
||||||
return [".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tiff", ".tif"].includes(ext);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function isAbortError(err: unknown): boolean {
|
function isAbortError(err: unknown): boolean {
|
||||||
|
|||||||
Reference in New Issue
Block a user