fix(media): sniff mime and keep extensions
This commit is contained in:
130
src/media/mime.ts
Normal file
130
src/media/mime.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
import path from "node:path";
|
||||
|
||||
import { mediaKindFromMime, type MediaKind } from "./constants.js";
|
||||
|
||||
// Map common mimes to preferred file extensions.
|
||||
const EXT_BY_MIME: Record<string, string> = {
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/webp": ".webp",
|
||||
"image/gif": ".gif",
|
||||
"audio/ogg": ".ogg",
|
||||
"audio/mpeg": ".mp3",
|
||||
"video/mp4": ".mp4",
|
||||
"application/pdf": ".pdf",
|
||||
"text/plain": ".txt",
|
||||
};
|
||||
|
||||
const MIME_BY_EXT: Record<string, string> = Object.fromEntries(
|
||||
Object.entries(EXT_BY_MIME).map(([mime, ext]) => [ext, mime]),
|
||||
);
|
||||
|
||||
function normalizeHeaderMime(mime?: string | null): string | undefined {
|
||||
if (!mime) return undefined;
|
||||
const cleaned = mime.split(";")[0]?.trim().toLowerCase();
|
||||
return cleaned || undefined;
|
||||
}
|
||||
|
||||
function sniffMime(buffer?: Buffer): string | undefined {
|
||||
if (!buffer || buffer.length < 4) return undefined;
|
||||
|
||||
// JPEG: FF D8 FF
|
||||
if (buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff) {
|
||||
return "image/jpeg";
|
||||
}
|
||||
|
||||
// PNG: 89 50 4E 47 0D 0A 1A 0A
|
||||
if (
|
||||
buffer.length >= 8 &&
|
||||
buffer[0] === 0x89 &&
|
||||
buffer[1] === 0x50 &&
|
||||
buffer[2] === 0x4e &&
|
||||
buffer[3] === 0x47 &&
|
||||
buffer[4] === 0x0d &&
|
||||
buffer[5] === 0x0a &&
|
||||
buffer[6] === 0x1a &&
|
||||
buffer[7] === 0x0a
|
||||
) {
|
||||
return "image/png";
|
||||
}
|
||||
|
||||
// GIF: GIF87a / GIF89a
|
||||
if (buffer.length >= 6) {
|
||||
const sig = buffer.subarray(0, 6).toString("ascii");
|
||||
if (sig === "GIF87a" || sig === "GIF89a") return "image/gif";
|
||||
}
|
||||
|
||||
// WebP: RIFF....WEBP
|
||||
if (
|
||||
buffer.length >= 12 &&
|
||||
buffer.subarray(0, 4).toString("ascii") === "RIFF" &&
|
||||
buffer.subarray(8, 12).toString("ascii") === "WEBP"
|
||||
) {
|
||||
return "image/webp";
|
||||
}
|
||||
|
||||
// PDF: %PDF-
|
||||
if (buffer.subarray(0, 5).toString("ascii") === "%PDF-") {
|
||||
return "application/pdf";
|
||||
}
|
||||
|
||||
// Ogg / Opus: OggS
|
||||
if (buffer.subarray(0, 4).toString("ascii") === "OggS") {
|
||||
return "audio/ogg";
|
||||
}
|
||||
|
||||
// MP3: ID3 tag or frame sync FF E0+.
|
||||
if (buffer.subarray(0, 3).toString("ascii") === "ID3") {
|
||||
return "audio/mpeg";
|
||||
}
|
||||
if (buffer[0] === 0xff && (buffer[1] & 0xe0) === 0xe0) {
|
||||
return "audio/mpeg";
|
||||
}
|
||||
|
||||
// MP4: "ftyp" at offset 4.
|
||||
if (buffer.length >= 12 && buffer.subarray(4, 8).toString("ascii") === "ftyp") {
|
||||
return "video/mp4";
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function extFromPath(filePath?: string): string | undefined {
|
||||
if (!filePath) return undefined;
|
||||
try {
|
||||
if (/^https?:\/\//i.test(filePath)) {
|
||||
const url = new URL(filePath);
|
||||
return path.extname(url.pathname).toLowerCase() || undefined;
|
||||
}
|
||||
} catch {
|
||||
// fall back to plain path parsing
|
||||
}
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
return ext || undefined;
|
||||
}
|
||||
|
||||
export function detectMime(opts: {
|
||||
buffer?: Buffer;
|
||||
headerMime?: string | null;
|
||||
filePath?: string;
|
||||
}): string | undefined {
|
||||
const sniffed = sniffMime(opts.buffer);
|
||||
if (sniffed) return sniffed;
|
||||
|
||||
const headerMime = normalizeHeaderMime(opts.headerMime);
|
||||
if (headerMime) return headerMime;
|
||||
|
||||
const ext = extFromPath(opts.filePath);
|
||||
if (ext && MIME_BY_EXT[ext]) return MIME_BY_EXT[ext];
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function extensionForMime(mime?: string | null): string | undefined {
|
||||
if (!mime) return undefined;
|
||||
return EXT_BY_MIME[mime.toLowerCase()];
|
||||
}
|
||||
|
||||
export function kindFromMime(mime?: string | null): MediaKind {
|
||||
return mediaKindFromMime(mime);
|
||||
}
|
||||
@@ -2,6 +2,7 @@ import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import sharp from "sharp";
|
||||
|
||||
const realOs = await vi.importActual<typeof import("node:os")>("node:os");
|
||||
const HOME = path.join(realOs.tmpdir(), "warelay-home-test");
|
||||
@@ -35,6 +36,16 @@ describe("media store", () => {
|
||||
const savedStat = await fs.stat(saved.path);
|
||||
expect(savedStat.size).toBe(buf.length);
|
||||
expect(saved.contentType).toBe("text/plain");
|
||||
expect(saved.path.endsWith(".txt")).toBe(true);
|
||||
|
||||
const jpeg = await sharp({
|
||||
create: { width: 2, height: 2, channels: 3, background: "#123456" },
|
||||
})
|
||||
.jpeg({ quality: 80 })
|
||||
.toBuffer();
|
||||
const savedJpeg = await store.saveMediaBuffer(jpeg, "image/jpeg");
|
||||
expect(savedJpeg.contentType).toBe("image/jpeg");
|
||||
expect(savedJpeg.path.endsWith(".jpg")).toBe(true);
|
||||
|
||||
const huge = Buffer.alloc(5 * 1024 * 1024 + 1);
|
||||
await expect(store.saveMediaBuffer(huge)).rejects.toThrow(
|
||||
@@ -50,6 +61,7 @@ describe("media store", () => {
|
||||
expect(saved.size).toBe(10);
|
||||
const savedStat = await fs.stat(saved.path);
|
||||
expect(savedStat.isFile()).toBe(true);
|
||||
expect(path.extname(saved.path)).toBe(".txt");
|
||||
|
||||
// make the file look old and ensure cleanOldMedia removes it
|
||||
const past = Date.now() - 10_000;
|
||||
@@ -57,4 +69,21 @@ describe("media store", () => {
|
||||
await store.cleanOldMedia(1);
|
||||
await expect(fs.stat(saved.path)).rejects.toThrow();
|
||||
});
|
||||
|
||||
it("renames media based on detected mime even when extension is wrong", async () => {
|
||||
const pngBytes = await sharp({
|
||||
create: { width: 2, height: 2, channels: 3, background: "#00ff00" },
|
||||
})
|
||||
.png()
|
||||
.toBuffer();
|
||||
const bogusExt = path.join(HOME, "image-wrong.bin");
|
||||
await fs.writeFile(bogusExt, pngBytes);
|
||||
|
||||
const saved = await store.saveMediaSource(bogusExt);
|
||||
expect(saved.contentType).toBe("image/png");
|
||||
expect(path.extname(saved.path)).toBe(".png");
|
||||
|
||||
const buf = await fs.readFile(saved.path);
|
||||
expect(buf.equals(pngBytes)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -6,6 +6,8 @@ import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { pipeline } from "node:stream/promises";
|
||||
|
||||
import { detectMime, extensionForMime } from "./mime.js";
|
||||
|
||||
const MEDIA_DIR = path.join(os.homedir(), ".warelay", "media");
|
||||
const MAX_BYTES = 5 * 1024 * 1024; // 5MB
|
||||
const DEFAULT_TTL_MS = 2 * 60 * 1000; // 2 minutes
|
||||
@@ -43,23 +45,42 @@ async function downloadToFile(
|
||||
url: string,
|
||||
dest: string,
|
||||
headers?: Record<string, string>,
|
||||
) {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
): Promise<{ headerMime?: string; sniffBuffer: Buffer; size: number }>
|
||||
/**
|
||||
* Download media to disk while capturing the first few KB for mime sniffing.
|
||||
*/
|
||||
{
|
||||
return await new Promise((resolve, reject) => {
|
||||
const req = request(url, { headers }, (res) => {
|
||||
if (!res.statusCode || res.statusCode >= 400) {
|
||||
reject(new Error(`HTTP ${res.statusCode ?? "?"} downloading media`));
|
||||
return;
|
||||
}
|
||||
let total = 0;
|
||||
const sniffChunks: Buffer[] = [];
|
||||
let sniffLen = 0;
|
||||
const out = createWriteStream(dest);
|
||||
res.on("data", (chunk) => {
|
||||
total += chunk.length;
|
||||
if (sniffLen < 16384) {
|
||||
sniffChunks.push(chunk);
|
||||
sniffLen += chunk.length;
|
||||
}
|
||||
if (total > MAX_BYTES) {
|
||||
req.destroy(new Error("Media exceeds 5MB limit"));
|
||||
}
|
||||
});
|
||||
pipeline(res, out)
|
||||
.then(() => resolve())
|
||||
.then(() => {
|
||||
const sniffBuffer = Buffer.concat(sniffChunks, Math.min(sniffLen, 16384));
|
||||
const rawHeader = res.headers["content-type"];
|
||||
const headerMime = Array.isArray(rawHeader) ? rawHeader[0] : rawHeader;
|
||||
resolve({
|
||||
headerMime,
|
||||
sniffBuffer,
|
||||
size: total,
|
||||
});
|
||||
})
|
||||
.catch(reject);
|
||||
});
|
||||
req.on("error", reject);
|
||||
@@ -83,11 +104,22 @@ export async function saveMediaSource(
|
||||
await fs.mkdir(dir, { recursive: true });
|
||||
await cleanOldMedia();
|
||||
const id = crypto.randomUUID();
|
||||
const dest = path.join(dir, id);
|
||||
if (looksLikeUrl(source)) {
|
||||
await downloadToFile(source, dest, headers);
|
||||
const stat = await fs.stat(dest);
|
||||
return { id, path: dest, size: stat.size };
|
||||
const tempDest = path.join(dir, `${id}.tmp`);
|
||||
const { headerMime, sniffBuffer, size } = await downloadToFile(
|
||||
source,
|
||||
tempDest,
|
||||
headers,
|
||||
);
|
||||
const mime = detectMime({
|
||||
buffer: sniffBuffer,
|
||||
headerMime,
|
||||
filePath: source,
|
||||
});
|
||||
const ext = extensionForMime(mime) ?? path.extname(new URL(source).pathname);
|
||||
const finalDest = path.join(dir, ext ? `${id}${ext}` : id);
|
||||
await fs.rename(tempDest, finalDest);
|
||||
return { id, path: finalDest, size, contentType: mime };
|
||||
}
|
||||
// local path
|
||||
const stat = await fs.stat(source);
|
||||
@@ -97,8 +129,12 @@ export async function saveMediaSource(
|
||||
if (stat.size > MAX_BYTES) {
|
||||
throw new Error("Media exceeds 5MB limit");
|
||||
}
|
||||
await fs.copyFile(source, dest);
|
||||
return { id, path: dest, size: stat.size };
|
||||
const buffer = await fs.readFile(source);
|
||||
const mime = detectMime({ buffer, filePath: source });
|
||||
const ext = extensionForMime(mime) ?? path.extname(source);
|
||||
const dest = path.join(dir, ext ? `${id}${ext}` : id);
|
||||
await fs.writeFile(dest, buffer);
|
||||
return { id, path: dest, size: stat.size, contentType: mime };
|
||||
}
|
||||
|
||||
export async function saveMediaBuffer(
|
||||
@@ -112,7 +148,9 @@ export async function saveMediaBuffer(
|
||||
const dir = path.join(MEDIA_DIR, subdir);
|
||||
await fs.mkdir(dir, { recursive: true });
|
||||
const id = crypto.randomUUID();
|
||||
const dest = path.join(dir, id);
|
||||
const mime = detectMime({ buffer, headerMime: contentType });
|
||||
const ext = extensionForMime(mime);
|
||||
const dest = path.join(dir, ext ? `${id}${ext}` : id);
|
||||
await fs.writeFile(dest, buffer);
|
||||
return { id, path: dest, size: buffer.byteLength, contentType };
|
||||
return { id, path: dest, size: buffer.byteLength, contentType: mime };
|
||||
}
|
||||
|
||||
93
src/web/inbound.media.test.ts
Normal file
93
src/web/inbound.media.test.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import crypto from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const HOME = path.join(os.tmpdir(), `warelay-inbound-media-${crypto.randomUUID()}`);
|
||||
process.env.HOME = HOME;
|
||||
|
||||
vi.mock("@whiskeysockets/baileys", async () => {
|
||||
const actual = await vi.importActual<typeof import("@whiskeysockets/baileys")>(
|
||||
"@whiskeysockets/baileys",
|
||||
);
|
||||
const jpegBuffer = Buffer.from([
|
||||
0xff, 0xd8, 0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02, 0x03, 0x03,
|
||||
0x03, 0x03, 0x04, 0x06, 0x04, 0x04, 0x04, 0x04, 0x04, 0x08, 0x06, 0x06, 0x05, 0x06, 0x09, 0x08, 0x0a, 0x0a, 0x09,
|
||||
0x08, 0x09, 0x09, 0x0a, 0x0c, 0x0f, 0x0c, 0x0a, 0x0b, 0x0e, 0x0b, 0x09, 0x09, 0x0d, 0x11, 0x0d, 0x0e, 0x0f, 0x10,
|
||||
0x10, 0x11, 0x10, 0x0a, 0x0c, 0x12, 0x13, 0x12, 0x10, 0x13, 0x0f, 0x10, 0x10, 0x10, 0xff, 0xc0, 0x00, 0x11, 0x08,
|
||||
0x00, 0x01, 0x00, 0x01, 0x03, 0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, 0x14, 0x00,
|
||||
0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xc4, 0x00,
|
||||
0x14, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xda, 0x00,
|
||||
0x0c, 0x03, 0x01, 0x00, 0x02, 0x11, 0x03, 0x11, 0x00, 0x3f, 0x00, 0xff, 0xd9,
|
||||
]);
|
||||
return {
|
||||
...actual,
|
||||
downloadMediaMessage: vi.fn().mockResolvedValue(jpegBuffer),
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("./session.js", () => {
|
||||
const { EventEmitter } = require("node:events");
|
||||
const ev = new EventEmitter();
|
||||
const sock = {
|
||||
ev,
|
||||
ws: { close: vi.fn() },
|
||||
sendPresenceUpdate: vi.fn().mockResolvedValue(undefined),
|
||||
sendMessage: vi.fn().mockResolvedValue(undefined),
|
||||
readMessages: vi.fn().mockResolvedValue(undefined),
|
||||
updateMediaMessage: vi.fn(),
|
||||
logger: {},
|
||||
user: { id: "me@s.whatsapp.net" },
|
||||
};
|
||||
return {
|
||||
createWaSocket: vi.fn().mockResolvedValue(sock),
|
||||
waitForWaConnection: vi.fn().mockResolvedValue(undefined),
|
||||
getStatusCode: vi.fn(() => 200),
|
||||
};
|
||||
});
|
||||
|
||||
import { monitorWebInbox } from "./inbound.js";
|
||||
|
||||
describe("web inbound media saves with extension", () => {
|
||||
beforeAll(async () => {
|
||||
await fs.rm(HOME, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await fs.rm(HOME, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("stores inbound image with jpeg extension", async () => {
|
||||
const onMessage = vi.fn();
|
||||
const listener = await monitorWebInbox({ verbose: false, onMessage });
|
||||
const { createWaSocket } = await import("./session.js");
|
||||
const realSock = await (createWaSocket as unknown as () => Promise<{
|
||||
ev: import("node:events").EventEmitter;
|
||||
}>)();
|
||||
|
||||
const upsert = {
|
||||
type: "notify",
|
||||
messages: [
|
||||
{
|
||||
key: { id: "img1", fromMe: false, remoteJid: "111@s.whatsapp.net" },
|
||||
message: { imageMessage: { mimetype: "image/jpeg" } },
|
||||
messageTimestamp: 1_700_000_001,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
realSock.ev.emit("messages.upsert", upsert);
|
||||
await new Promise((resolve) => setTimeout(resolve, 5));
|
||||
|
||||
expect(onMessage).toHaveBeenCalledTimes(1);
|
||||
const msg = onMessage.mock.calls[0][0];
|
||||
expect(msg.mediaPath).toBeDefined();
|
||||
expect(path.extname(msg.mediaPath!)).toBe(".jpg");
|
||||
const stat = await fs.stat(msg.mediaPath!);
|
||||
expect(stat.size).toBeGreaterThan(0);
|
||||
|
||||
await listener.close();
|
||||
});
|
||||
});
|
||||
@@ -38,4 +38,20 @@ describe("web media loading", () => {
|
||||
expect(result.buffer.length).toBeLessThanOrEqual(cap);
|
||||
expect(result.buffer.length).toBeLessThan(buffer.length);
|
||||
});
|
||||
|
||||
it("sniffs mime before extension when loading local files", async () => {
|
||||
const pngBuffer = await sharp({
|
||||
create: { width: 2, height: 2, channels: 3, background: "#00ff00" },
|
||||
})
|
||||
.png()
|
||||
.toBuffer();
|
||||
const wrongExt = path.join(os.tmpdir(), `warelay-media-${Date.now()}.bin`);
|
||||
tmpFiles.push(wrongExt);
|
||||
await fs.writeFile(wrongExt, pngBuffer);
|
||||
|
||||
const result = await loadWebMedia(wrongExt, 1024 * 1024);
|
||||
|
||||
expect(result.kind).toBe("image");
|
||||
expect(result.contentType).toBe("image/jpeg");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import sharp from "sharp";
|
||||
|
||||
import { isVerbose, logVerbose } from "../globals.js";
|
||||
@@ -8,6 +7,7 @@ import {
|
||||
maxBytesForKind,
|
||||
mediaKindFromMime,
|
||||
} from "../media/constants.js";
|
||||
import { detectMime } from "../media/mime.js";
|
||||
|
||||
export async function loadWebMedia(
|
||||
mediaUrl: string,
|
||||
@@ -45,7 +45,11 @@ export async function loadWebMedia(
|
||||
throw new Error(`Failed to fetch media: HTTP ${res.status}`);
|
||||
}
|
||||
const array = Buffer.from(await res.arrayBuffer());
|
||||
const contentType = res.headers.get("content-type");
|
||||
const contentType = detectMime({
|
||||
buffer: array,
|
||||
headerMime: res.headers.get("content-type"),
|
||||
filePath: mediaUrl,
|
||||
});
|
||||
const kind = mediaKindFromMime(contentType);
|
||||
const cap = Math.min(
|
||||
maxBytes ?? maxBytesForKind(kind),
|
||||
@@ -66,24 +70,7 @@ export async function loadWebMedia(
|
||||
|
||||
// Local path
|
||||
const data = await fs.readFile(mediaUrl);
|
||||
const ext = path.extname(mediaUrl);
|
||||
const mime =
|
||||
(ext &&
|
||||
(
|
||||
{
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".webp": "image/webp",
|
||||
".gif": "image/gif",
|
||||
".ogg": "audio/ogg",
|
||||
".opus": "audio/ogg",
|
||||
".mp3": "audio/mpeg",
|
||||
".mp4": "video/mp4",
|
||||
".pdf": "application/pdf",
|
||||
} as Record<string, string | undefined>
|
||||
)[ext.toLowerCase()]) ??
|
||||
undefined;
|
||||
const mime = detectMime({ buffer: data, filePath: mediaUrl });
|
||||
const kind = mediaKindFromMime(mime);
|
||||
const cap = Math.min(
|
||||
maxBytes ?? maxBytesForKind(kind),
|
||||
|
||||
Reference in New Issue
Block a user