Files
clawdbot/src/media/store.ts
2026-01-22 03:37:29 +00:00

219 lines
7.2 KiB
TypeScript

import crypto from "node:crypto";
import { createWriteStream } from "node:fs";
import fs from "node:fs/promises";
import { request } from "node:https";
import path from "node:path";
import { pipeline } from "node:stream/promises";
import { resolveConfigDir } from "../utils.js";
import { detectMime, extensionForMime } from "./mime.js";
const resolveMediaDir = () => path.join(resolveConfigDir(), "media");
const MAX_BYTES = 5 * 1024 * 1024; // 5MB default
const DEFAULT_TTL_MS = 2 * 60 * 1000; // 2 minutes
/**
* Sanitize a filename for cross-platform safety.
* Removes chars unsafe on Windows/SharePoint/all platforms.
* Keeps: alphanumeric, dots, hyphens, underscores, Unicode letters/numbers.
*/
function sanitizeFilename(name: string): string {
// Remove: < > : " / \ | ? * and control chars (U+0000-U+001F)
// oxlint-disable-next-line no-control-regex -- Intentionally matching control chars
const unsafe = /[<>:"/\\|?*\x00-\x1f]/g;
const sanitized = name.trim().replace(unsafe, "_").replace(/\s+/g, "_"); // Replace whitespace runs with underscore
// Collapse multiple underscores, trim leading/trailing, limit length
return sanitized.replace(/_+/g, "_").replace(/^_|_$/g, "").slice(0, 60);
}
/**
* Extract original filename from path if it matches the embedded format.
* Pattern: {original}---{uuid}.{ext} → returns "{original}.{ext}"
* Falls back to basename if no pattern match, or "file.bin" if empty.
*/
export function extractOriginalFilename(filePath: string): string {
const basename = path.basename(filePath);
if (!basename) return "file.bin"; // Fallback for empty input
const ext = path.extname(basename);
const nameWithoutExt = path.basename(basename, ext);
// Check for ---{uuid} pattern (36 chars: 8-4-4-4-12 with hyphens)
const match = nameWithoutExt.match(
/^(.+)---[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$/i,
);
if (match?.[1]) {
return `${match[1]}${ext}`;
}
return basename; // Fallback: use as-is
}
export function getMediaDir() {
return resolveMediaDir();
}
export async function ensureMediaDir() {
const mediaDir = resolveMediaDir();
await fs.mkdir(mediaDir, { recursive: true });
return mediaDir;
}
export async function cleanOldMedia(ttlMs = DEFAULT_TTL_MS) {
const mediaDir = await ensureMediaDir();
const entries = await fs.readdir(mediaDir).catch(() => []);
const now = Date.now();
await Promise.all(
entries.map(async (file) => {
const full = path.join(mediaDir, file);
const stat = await fs.stat(full).catch(() => null);
if (!stat) return;
if (now - stat.mtimeMs > ttlMs) {
await fs.rm(full).catch(() => {});
}
}),
);
}
function looksLikeUrl(src: string) {
return /^https?:\/\//i.test(src);
}
/**
* Download media to disk while capturing the first few KB for mime sniffing.
*/
async function downloadToFile(
url: string,
dest: string,
headers?: Record<string, string>,
maxRedirects = 5,
): Promise<{ headerMime?: string; sniffBuffer: Buffer; size: number }> {
return await new Promise((resolve, reject) => {
const req = request(url, { headers }, (res) => {
// Follow redirects
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400) {
const location = res.headers.location;
if (!location || maxRedirects <= 0) {
reject(new Error(`Redirect loop or missing Location header`));
return;
}
const redirectUrl = new URL(location, url).href;
resolve(downloadToFile(redirectUrl, dest, headers, maxRedirects - 1));
return;
}
if (!res.statusCode || res.statusCode >= 400) {
reject(new Error(`HTTP ${res.statusCode ?? "?"} downloading media`));
return;
}
let total = 0;
const sniffChunks: Buffer[] = [];
let sniffLen = 0;
const out = createWriteStream(dest);
res.on("data", (chunk) => {
total += chunk.length;
if (sniffLen < 16384) {
sniffChunks.push(chunk);
sniffLen += chunk.length;
}
if (total > MAX_BYTES) {
req.destroy(new Error("Media exceeds 5MB limit"));
}
});
pipeline(res, out)
.then(() => {
const sniffBuffer = Buffer.concat(sniffChunks, Math.min(sniffLen, 16384));
const rawHeader = res.headers["content-type"];
const headerMime = Array.isArray(rawHeader) ? rawHeader[0] : rawHeader;
resolve({
headerMime,
sniffBuffer,
size: total,
});
})
.catch(reject);
});
req.on("error", reject);
req.end();
});
}
export type SavedMedia = {
id: string;
path: string;
size: number;
contentType?: string;
};
export async function saveMediaSource(
source: string,
headers?: Record<string, string>,
subdir = "",
): Promise<SavedMedia> {
const baseDir = resolveMediaDir();
const dir = subdir ? path.join(baseDir, subdir) : baseDir;
await fs.mkdir(dir, { recursive: true });
await cleanOldMedia();
const baseId = crypto.randomUUID();
if (looksLikeUrl(source)) {
const tempDest = path.join(dir, `${baseId}.tmp`);
const { headerMime, sniffBuffer, size } = await downloadToFile(source, tempDest, headers);
const mime = await detectMime({
buffer: sniffBuffer,
headerMime,
filePath: source,
});
const ext = extensionForMime(mime) ?? path.extname(new URL(source).pathname);
const id = ext ? `${baseId}${ext}` : baseId;
const finalDest = path.join(dir, id);
await fs.rename(tempDest, finalDest);
return { id, path: finalDest, size, contentType: mime };
}
// local path
const stat = await fs.stat(source);
if (!stat.isFile()) {
throw new Error("Media path is not a file");
}
if (stat.size > MAX_BYTES) {
throw new Error("Media exceeds 5MB limit");
}
const buffer = await fs.readFile(source);
const mime = await detectMime({ buffer, filePath: source });
const ext = extensionForMime(mime) ?? path.extname(source);
const id = ext ? `${baseId}${ext}` : baseId;
const dest = path.join(dir, id);
await fs.writeFile(dest, buffer);
return { id, path: dest, size: stat.size, contentType: mime };
}
export async function saveMediaBuffer(
buffer: Buffer,
contentType?: string,
subdir = "inbound",
maxBytes = MAX_BYTES,
originalFilename?: string,
): Promise<SavedMedia> {
if (buffer.byteLength > maxBytes) {
throw new Error(`Media exceeds ${(maxBytes / (1024 * 1024)).toFixed(0)}MB limit`);
}
const dir = path.join(resolveMediaDir(), subdir);
await fs.mkdir(dir, { recursive: true });
const uuid = crypto.randomUUID();
const headerExt = extensionForMime(contentType?.split(";")[0]?.trim() ?? undefined);
const mime = await detectMime({ buffer, headerMime: contentType });
const ext = headerExt ?? extensionForMime(mime) ?? "";
let id: string;
if (originalFilename) {
// Embed original name: {sanitized}---{uuid}.ext
const base = path.parse(originalFilename).name;
const sanitized = sanitizeFilename(base);
id = sanitized ? `${sanitized}---${uuid}${ext}` : `${uuid}${ext}`;
} else {
// Legacy: just UUID
id = ext ? `${uuid}${ext}` : uuid;
}
const dest = path.join(dir, id);
await fs.writeFile(dest, buffer);
return { id, path: dest, size: buffer.byteLength, contentType: mime };
}