fix: use file-type for mime sniffing
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import path from "node:path";
|
||||
|
||||
import { fileTypeFromBuffer } from "file-type";
|
||||
import { type MediaKind, mediaKindFromMime } from "./constants.js";
|
||||
|
||||
// Map common mimes to preferred file extensions.
|
||||
@@ -12,7 +13,23 @@ const EXT_BY_MIME: Record<string, string> = {
|
||||
"audio/mpeg": ".mp3",
|
||||
"video/mp4": ".mp4",
|
||||
"application/pdf": ".pdf",
|
||||
"application/json": ".json",
|
||||
"application/zip": ".zip",
|
||||
"application/gzip": ".gz",
|
||||
"application/x-tar": ".tar",
|
||||
"application/x-7z-compressed": ".7z",
|
||||
"application/vnd.rar": ".rar",
|
||||
"application/msword": ".doc",
|
||||
"application/vnd.ms-excel": ".xls",
|
||||
"application/vnd.ms-powerpoint": ".ppt",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
||||
".docx",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation":
|
||||
".pptx",
|
||||
"text/csv": ".csv",
|
||||
"text/plain": ".txt",
|
||||
"text/markdown": ".md",
|
||||
};
|
||||
|
||||
const MIME_BY_EXT: Record<string, string> = Object.fromEntries(
|
||||
@@ -25,71 +42,14 @@ function normalizeHeaderMime(mime?: string | null): string | undefined {
|
||||
return cleaned || undefined;
|
||||
}
|
||||
|
||||
function sniffMime(buffer?: Buffer): string | undefined {
|
||||
if (!buffer || buffer.length < 4) return undefined;
|
||||
|
||||
// JPEG: FF D8 FF
|
||||
if (buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff) {
|
||||
return "image/jpeg";
|
||||
async function sniffMime(buffer?: Buffer): Promise<string | undefined> {
|
||||
if (!buffer) return undefined;
|
||||
try {
|
||||
const type = await fileTypeFromBuffer(buffer);
|
||||
return type?.mime ?? undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// PNG: 89 50 4E 47 0D 0A 1A 0A
|
||||
if (
|
||||
buffer.length >= 8 &&
|
||||
buffer[0] === 0x89 &&
|
||||
buffer[1] === 0x50 &&
|
||||
buffer[2] === 0x4e &&
|
||||
buffer[3] === 0x47 &&
|
||||
buffer[4] === 0x0d &&
|
||||
buffer[5] === 0x0a &&
|
||||
buffer[6] === 0x1a &&
|
||||
buffer[7] === 0x0a
|
||||
) {
|
||||
return "image/png";
|
||||
}
|
||||
|
||||
// GIF: GIF87a / GIF89a
|
||||
if (buffer.length >= 6) {
|
||||
const sig = buffer.subarray(0, 6).toString("ascii");
|
||||
if (sig === "GIF87a" || sig === "GIF89a") return "image/gif";
|
||||
}
|
||||
|
||||
// WebP: RIFF....WEBP
|
||||
if (
|
||||
buffer.length >= 12 &&
|
||||
buffer.subarray(0, 4).toString("ascii") === "RIFF" &&
|
||||
buffer.subarray(8, 12).toString("ascii") === "WEBP"
|
||||
) {
|
||||
return "image/webp";
|
||||
}
|
||||
|
||||
// PDF: %PDF-
|
||||
if (buffer.subarray(0, 5).toString("ascii") === "%PDF-") {
|
||||
return "application/pdf";
|
||||
}
|
||||
|
||||
// Ogg / Opus: OggS
|
||||
if (buffer.subarray(0, 4).toString("ascii") === "OggS") {
|
||||
return "audio/ogg";
|
||||
}
|
||||
|
||||
// MP3: ID3 tag or frame sync FF E0+.
|
||||
if (buffer.subarray(0, 3).toString("ascii") === "ID3") {
|
||||
return "audio/mpeg";
|
||||
}
|
||||
if (buffer[0] === 0xff && (buffer[1] & 0xe0) === 0xe0) {
|
||||
return "audio/mpeg";
|
||||
}
|
||||
|
||||
// MP4: "ftyp" at offset 4.
|
||||
if (
|
||||
buffer.length >= 12 &&
|
||||
buffer.subarray(4, 8).toString("ascii") === "ftyp"
|
||||
) {
|
||||
return "video/mp4";
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function extFromPath(filePath?: string): string | undefined {
|
||||
@@ -110,15 +70,34 @@ export function detectMime(opts: {
|
||||
buffer?: Buffer;
|
||||
headerMime?: string | null;
|
||||
filePath?: string;
|
||||
}): string | undefined {
|
||||
const sniffed = sniffMime(opts.buffer);
|
||||
if (sniffed) return sniffed;
|
||||
}): Promise<string | undefined> {
|
||||
return detectMimeImpl(opts);
|
||||
}
|
||||
|
||||
function isGenericMime(mime?: string): boolean {
|
||||
if (!mime) return true;
|
||||
const m = mime.toLowerCase();
|
||||
return m === "application/octet-stream" || m === "application/zip";
|
||||
}
|
||||
|
||||
async function detectMimeImpl(opts: {
|
||||
buffer?: Buffer;
|
||||
headerMime?: string | null;
|
||||
filePath?: string;
|
||||
}): Promise<string | undefined> {
|
||||
const ext = extFromPath(opts.filePath);
|
||||
const extMime = ext ? MIME_BY_EXT[ext] : undefined;
|
||||
|
||||
const headerMime = normalizeHeaderMime(opts.headerMime);
|
||||
if (headerMime) return headerMime;
|
||||
const sniffed = await sniffMime(opts.buffer);
|
||||
|
||||
const ext = extFromPath(opts.filePath);
|
||||
if (ext && MIME_BY_EXT[ext]) return MIME_BY_EXT[ext];
|
||||
// Prefer sniffed types, but don't let generic container types override a more
|
||||
// specific extension mapping (e.g. XLSX vs ZIP).
|
||||
if (sniffed && (!isGenericMime(sniffed) || !extMime)) return sniffed;
|
||||
if (extMime) return extMime;
|
||||
if (headerMime && !isGenericMime(headerMime)) return headerMime;
|
||||
if (sniffed) return sniffed;
|
||||
if (headerMime) return headerMime;
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user