fix: use file-type for mime sniffing

This commit is contained in:
Peter Steinberger
2025-12-20 19:13:50 +01:00
parent 1356498ee1
commit 36c85a617a
14 changed files with 5291 additions and 91 deletions

View File

@@ -1,5 +1,6 @@
import path from "node:path";
import { fileTypeFromBuffer } from "file-type";
import { type MediaKind, mediaKindFromMime } from "./constants.js";
// Map common mimes to preferred file extensions.
@@ -12,7 +13,23 @@ const EXT_BY_MIME: Record<string, string> = {
"audio/mpeg": ".mp3",
"video/mp4": ".mp4",
"application/pdf": ".pdf",
"application/json": ".json",
"application/zip": ".zip",
"application/gzip": ".gz",
"application/x-tar": ".tar",
"application/x-7z-compressed": ".7z",
"application/vnd.rar": ".rar",
"application/msword": ".doc",
"application/vnd.ms-excel": ".xls",
"application/vnd.ms-powerpoint": ".ppt",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document":
".docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation":
".pptx",
"text/csv": ".csv",
"text/plain": ".txt",
"text/markdown": ".md",
};
const MIME_BY_EXT: Record<string, string> = Object.fromEntries(
@@ -25,71 +42,14 @@ function normalizeHeaderMime(mime?: string | null): string | undefined {
return cleaned || undefined;
}
function sniffMime(buffer?: Buffer): string | undefined {
if (!buffer || buffer.length < 4) return undefined;
// JPEG: FF D8 FF
if (buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff) {
return "image/jpeg";
async function sniffMime(buffer?: Buffer): Promise<string | undefined> {
if (!buffer) return undefined;
try {
const type = await fileTypeFromBuffer(buffer);
return type?.mime ?? undefined;
} catch {
return undefined;
}
// PNG: 89 50 4E 47 0D 0A 1A 0A
if (
buffer.length >= 8 &&
buffer[0] === 0x89 &&
buffer[1] === 0x50 &&
buffer[2] === 0x4e &&
buffer[3] === 0x47 &&
buffer[4] === 0x0d &&
buffer[5] === 0x0a &&
buffer[6] === 0x1a &&
buffer[7] === 0x0a
) {
return "image/png";
}
// GIF: GIF87a / GIF89a
if (buffer.length >= 6) {
const sig = buffer.subarray(0, 6).toString("ascii");
if (sig === "GIF87a" || sig === "GIF89a") return "image/gif";
}
// WebP: RIFF....WEBP
if (
buffer.length >= 12 &&
buffer.subarray(0, 4).toString("ascii") === "RIFF" &&
buffer.subarray(8, 12).toString("ascii") === "WEBP"
) {
return "image/webp";
}
// PDF: %PDF-
if (buffer.subarray(0, 5).toString("ascii") === "%PDF-") {
return "application/pdf";
}
// Ogg / Opus: OggS
if (buffer.subarray(0, 4).toString("ascii") === "OggS") {
return "audio/ogg";
}
// MP3: ID3 tag or frame sync FF E0+.
if (buffer.subarray(0, 3).toString("ascii") === "ID3") {
return "audio/mpeg";
}
if (buffer[0] === 0xff && (buffer[1] & 0xe0) === 0xe0) {
return "audio/mpeg";
}
// MP4: "ftyp" at offset 4.
if (
buffer.length >= 12 &&
buffer.subarray(4, 8).toString("ascii") === "ftyp"
) {
return "video/mp4";
}
return undefined;
}
function extFromPath(filePath?: string): string | undefined {
@@ -110,15 +70,34 @@ export function detectMime(opts: {
buffer?: Buffer;
headerMime?: string | null;
filePath?: string;
}): string | undefined {
const sniffed = sniffMime(opts.buffer);
if (sniffed) return sniffed;
}): Promise<string | undefined> {
return detectMimeImpl(opts);
}
function isGenericMime(mime?: string): boolean {
if (!mime) return true;
const m = mime.toLowerCase();
return m === "application/octet-stream" || m === "application/zip";
}
async function detectMimeImpl(opts: {
buffer?: Buffer;
headerMime?: string | null;
filePath?: string;
}): Promise<string | undefined> {
const ext = extFromPath(opts.filePath);
const extMime = ext ? MIME_BY_EXT[ext] : undefined;
const headerMime = normalizeHeaderMime(opts.headerMime);
if (headerMime) return headerMime;
const sniffed = await sniffMime(opts.buffer);
const ext = extFromPath(opts.filePath);
if (ext && MIME_BY_EXT[ext]) return MIME_BY_EXT[ext];
// Prefer sniffed types, but don't let generic container types override a more
// specific extension mapping (e.g. XLSX vs ZIP).
if (sniffed && (!isGenericMime(sniffed) || !extMime)) return sniffed;
if (extMime) return extMime;
if (headerMime && !isGenericMime(headerMime)) return headerMime;
if (sniffed) return sniffed;
if (headerMime) return headerMime;
return undefined;
}