mac: bundle web chat assets

2025-12-06 05:01:28 +01:00
parent 15cdeeddaf
commit 42d843297d
315 changed files with 16618 additions and 20 deletions
--- a/apps/macos/Sources/Clawdis/Resources/WebChat/utils/attachment-utils.js
+++ b/apps/macos/Sources/Clawdis/Resources/WebChat/utils/attachment-utils.js
@@ -0,0 +1,415 @@
+import { parseAsync } from "docx-preview";
+import JSZip from "jszip";
+import * as pdfjsLib from "pdfjs-dist";
+import * as XLSX from "xlsx";
+import { i18n } from "./i18n.js";
+// Configure PDF.js worker - we'll need to bundle this
+pdfjsLib.GlobalWorkerOptions.workerSrc = new URL("pdfjs-dist/build/pdf.worker.min.mjs", import.meta.url).toString();
+/**
+ * Load an attachment from various sources
+ * @param source - URL string, File, Blob, or ArrayBuffer
+ * @param fileName - Optional filename override
+ * @returns Promise<Attachment>
+ * @throws Error if loading fails
+ */
+export async function loadAttachment(source, fileName) {
+    let arrayBuffer;
+    let detectedFileName = fileName || "unnamed";
+    let mimeType = "application/octet-stream";
+    let size = 0;
+    // Convert source to ArrayBuffer
+    if (typeof source === "string") {
+        // It's a URL - fetch it
+        const response = await fetch(source);
+        if (!response.ok) {
+            throw new Error(i18n("Failed to fetch file"));
+        }
+        arrayBuffer = await response.arrayBuffer();
+        size = arrayBuffer.byteLength;
+        mimeType = response.headers.get("content-type") || mimeType;
+        if (!fileName) {
+            // Try to extract filename from URL
+            const urlParts = source.split("/");
+            detectedFileName = urlParts[urlParts.length - 1] || "document";
+        }
+    }
+    else if (source instanceof File) {
+        arrayBuffer = await source.arrayBuffer();
+        size = source.size;
+        mimeType = source.type || mimeType;
+        detectedFileName = fileName || source.name;
+    }
+    else if (source instanceof Blob) {
+        arrayBuffer = await source.arrayBuffer();
+        size = source.size;
+        mimeType = source.type || mimeType;
+    }
+    else if (source instanceof ArrayBuffer) {
+        arrayBuffer = source;
+        size = source.byteLength;
+    }
+    else {
+        throw new Error(i18n("Invalid source type"));
+    }
+    // Convert ArrayBuffer to base64 - handle large files properly
+    const uint8Array = new Uint8Array(arrayBuffer);
+    let binary = "";
+    const chunkSize = 0x8000; // Process in 32KB chunks to avoid stack overflow
+    for (let i = 0; i < uint8Array.length; i += chunkSize) {
+        const chunk = uint8Array.slice(i, i + chunkSize);
+        binary += String.fromCharCode(...chunk);
+    }
+    const base64Content = btoa(binary);
+    // Detect type and process accordingly
+    const id = `${detectedFileName}_${Date.now()}_${Math.random()}`;
+    // Check if it's a PDF
+    if (mimeType === "application/pdf" || detectedFileName.toLowerCase().endsWith(".pdf")) {
+        const { extractedText, preview } = await processPdf(arrayBuffer, detectedFileName);
+        return {
+            id,
+            type: "document",
+            fileName: detectedFileName,
+            mimeType: "application/pdf",
+            size,
+            content: base64Content,
+            extractedText,
+            preview,
+        };
+    }
+    // Check if it's a DOCX file
+    if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
+        detectedFileName.toLowerCase().endsWith(".docx")) {
+        const { extractedText } = await processDocx(arrayBuffer, detectedFileName);
+        return {
+            id,
+            type: "document",
+            fileName: detectedFileName,
+            mimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            size,
+            content: base64Content,
+            extractedText,
+        };
+    }
+    // Check if it's a PPTX file
+    if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation" ||
+        detectedFileName.toLowerCase().endsWith(".pptx")) {
+        const { extractedText } = await processPptx(arrayBuffer, detectedFileName);
+        return {
+            id,
+            type: "document",
+            fileName: detectedFileName,
+            mimeType: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+            size,
+            content: base64Content,
+            extractedText,
+        };
+    }
+    // Check if it's an Excel file (XLSX/XLS)
+    const excelMimeTypes = [
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        "application/vnd.ms-excel",
+    ];
+    if (excelMimeTypes.includes(mimeType) ||
+        detectedFileName.toLowerCase().endsWith(".xlsx") ||
+        detectedFileName.toLowerCase().endsWith(".xls")) {
+        const { extractedText } = await processExcel(arrayBuffer, detectedFileName);
+        return {
+            id,
+            type: "document",
+            fileName: detectedFileName,
+            mimeType: mimeType.startsWith("application/vnd")
+                ? mimeType
+                : "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            size,
+            content: base64Content,
+            extractedText,
+        };
+    }
+    // Check if it's an image
+    if (mimeType.startsWith("image/")) {
+        return {
+            id,
+            type: "image",
+            fileName: detectedFileName,
+            mimeType,
+            size,
+            content: base64Content,
+            preview: base64Content, // For images, preview is the same as content
+        };
+    }
+    // Check if it's a text document
+    const textExtensions = [
+        ".txt",
+        ".md",
+        ".json",
+        ".xml",
+        ".html",
+        ".css",
+        ".js",
+        ".ts",
+        ".jsx",
+        ".tsx",
+        ".yml",
+        ".yaml",
+    ];
+    const isTextFile = mimeType.startsWith("text/") || textExtensions.some((ext) => detectedFileName.toLowerCase().endsWith(ext));
+    if (isTextFile) {
+        const decoder = new TextDecoder();
+        const text = decoder.decode(arrayBuffer);
+        return {
+            id,
+            type: "document",
+            fileName: detectedFileName,
+            mimeType: mimeType.startsWith("text/") ? mimeType : "text/plain",
+            size,
+            content: base64Content,
+            extractedText: text,
+        };
+    }
+    throw new Error(`Unsupported file type: ${mimeType}`);
+}
+async function processPdf(arrayBuffer, fileName) {
+    let pdf = null;
+    try {
+        pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
+        // Extract text with page structure
+        let extractedText = `<pdf filename="${fileName}">`;
+        for (let i = 1; i <= pdf.numPages; i++) {
+            const page = await pdf.getPage(i);
+            const textContent = await page.getTextContent();
+            const pageText = textContent.items
+                .map((item) => item.str)
+                .filter((str) => str.trim())
+                .join(" ");
+            extractedText += `\n<page number="${i}">\n${pageText}\n</page>`;
+        }
+        extractedText += "\n</pdf>";
+        // Generate preview from first page
+        const preview = await generatePdfPreview(pdf);
+        return { extractedText, preview };
+    }
+    catch (error) {
+        console.error("Error processing PDF:", error);
+        throw new Error(`Failed to process PDF: ${String(error)}`);
+    }
+    finally {
+        // Clean up PDF resources
+        if (pdf) {
+            pdf.destroy();
+        }
+    }
+}
+async function generatePdfPreview(pdf) {
+    try {
+        const page = await pdf.getPage(1);
+        const viewport = page.getViewport({ scale: 1.0 });
+        // Create canvas with reasonable size for thumbnail (160x160 max)
+        const scale = Math.min(160 / viewport.width, 160 / viewport.height);
+        const scaledViewport = page.getViewport({ scale });
+        const canvas = document.createElement("canvas");
+        const context = canvas.getContext("2d");
+        if (!context) {
+            return undefined;
+        }
+        canvas.height = scaledViewport.height;
+        canvas.width = scaledViewport.width;
+        const renderContext = {
+            canvasContext: context,
+            viewport: scaledViewport,
+            canvas: canvas,
+        };
+        await page.render(renderContext).promise;
+        // Return base64 without data URL prefix
+        return canvas.toDataURL("image/png").split(",")[1];
+    }
+    catch (error) {
+        console.error("Error generating PDF preview:", error);
+        return undefined;
+    }
+}
+async function processDocx(arrayBuffer, fileName) {
+    try {
+        // Parse document structure
+        const wordDoc = await parseAsync(arrayBuffer);
+        // Extract structured text from document body
+        let extractedText = `<docx filename="${fileName}">\n<page number="1">\n`;
+        const body = wordDoc.documentPart?.body;
+        if (body?.children) {
+            // Walk through document elements and extract text
+            const texts = [];
+            for (const element of body.children) {
+                const text = extractTextFromElement(element);
+                if (text) {
+                    texts.push(text);
+                }
+            }
+            extractedText += texts.join("\n");
+        }
+        extractedText += `\n</page>\n</docx>`;
+        return { extractedText };
+    }
+    catch (error) {
+        console.error("Error processing DOCX:", error);
+        throw new Error(`Failed to process DOCX: ${String(error)}`);
+    }
+}
+function extractTextFromElement(element) {
+    let text = "";
+    // Check type with lowercase
+    const elementType = element.type?.toLowerCase() || "";
+    // Handle paragraphs
+    if (elementType === "paragraph" && element.children) {
+        for (const child of element.children) {
+            const childType = child.type?.toLowerCase() || "";
+            if (childType === "run" && child.children) {
+                for (const textChild of child.children) {
+                    const textType = textChild.type?.toLowerCase() || "";
+                    if (textType === "text") {
+                        text += textChild.text || "";
+                    }
+                }
+            }
+            else if (childType === "text") {
+                text += child.text || "";
+            }
+        }
+    }
+    // Handle tables
+    else if (elementType === "table") {
+        if (element.children) {
+            const tableTexts = [];
+            for (const row of element.children) {
+                const rowType = row.type?.toLowerCase() || "";
+                if (rowType === "tablerow" && row.children) {
+                    const rowTexts = [];
+                    for (const cell of row.children) {
+                        const cellType = cell.type?.toLowerCase() || "";
+                        if (cellType === "tablecell" && cell.children) {
+                            const cellTexts = [];
+                            for (const cellElement of cell.children) {
+                                const cellText = extractTextFromElement(cellElement);
+                                if (cellText)
+                                    cellTexts.push(cellText);
+                            }
+                            if (cellTexts.length > 0)
+                                rowTexts.push(cellTexts.join(" "));
+                        }
+                    }
+                    if (rowTexts.length > 0)
+                        tableTexts.push(rowTexts.join(" | "));
+                }
+            }
+            if (tableTexts.length > 0) {
+                text = "\n[Table]\n" + tableTexts.join("\n") + "\n[/Table]\n";
+            }
+        }
+    }
+    // Recursively handle other container elements
+    else if (element.children && Array.isArray(element.children)) {
+        const childTexts = [];
+        for (const child of element.children) {
+            const childText = extractTextFromElement(child);
+            if (childText)
+                childTexts.push(childText);
+        }
+        text = childTexts.join(" ");
+    }
+    return text.trim();
+}
+async function processPptx(arrayBuffer, fileName) {
+    try {
+        // Load the PPTX file as a ZIP
+        const zip = await JSZip.loadAsync(arrayBuffer);
+        // PPTX slides are stored in ppt/slides/slide[n].xml
+        let extractedText = `<pptx filename="${fileName}">`;
+        // Get all slide files and sort them numerically
+        const slideFiles = Object.keys(zip.files)
+            .filter((name) => name.match(/ppt\/slides\/slide\d+\.xml$/))
+            .sort((a, b) => {
+            const numA = Number.parseInt(a.match(/slide(\d+)\.xml$/)?.[1] || "0", 10);
+            const numB = Number.parseInt(b.match(/slide(\d+)\.xml$/)?.[1] || "0", 10);
+            return numA - numB;
+        });
+        // Extract text from each slide
+        for (let i = 0; i < slideFiles.length; i++) {
+            const slideFile = zip.file(slideFiles[i]);
+            if (slideFile) {
+                const slideXml = await slideFile.async("text");
+                // Extract text from XML (simple regex approach)
+                // Looking for <a:t> tags which contain text in PPTX
+                const textMatches = slideXml.match(/<a:t[^>]*>([^<]+)<\/a:t>/g);
+                if (textMatches) {
+                    extractedText += `\n<slide number="${i + 1}">`;
+                    const slideTexts = textMatches
+                        .map((match) => {
+                        const textMatch = match.match(/<a:t[^>]*>([^<]+)<\/a:t>/);
+                        return textMatch ? textMatch[1] : "";
+                    })
+                        .filter((t) => t.trim());
+                    if (slideTexts.length > 0) {
+                        extractedText += "\n" + slideTexts.join("\n");
+                    }
+                    extractedText += "\n</slide>";
+                }
+            }
+        }
+        // Also try to extract text from notes
+        const notesFiles = Object.keys(zip.files)
+            .filter((name) => name.match(/ppt\/notesSlides\/notesSlide\d+\.xml$/))
+            .sort((a, b) => {
+            const numA = Number.parseInt(a.match(/notesSlide(\d+)\.xml$/)?.[1] || "0", 10);
+            const numB = Number.parseInt(b.match(/notesSlide(\d+)\.xml$/)?.[1] || "0", 10);
+            return numA - numB;
+        });
+        if (notesFiles.length > 0) {
+            extractedText += "\n<notes>";
+            for (const noteFile of notesFiles) {
+                const file = zip.file(noteFile);
+                if (file) {
+                    const noteXml = await file.async("text");
+                    const textMatches = noteXml.match(/<a:t[^>]*>([^<]+)<\/a:t>/g);
+                    if (textMatches) {
+                        const noteTexts = textMatches
+                            .map((match) => {
+                            const textMatch = match.match(/<a:t[^>]*>([^<]+)<\/a:t>/);
+                            return textMatch ? textMatch[1] : "";
+                        })
+                            .filter((t) => t.trim());
+                        if (noteTexts.length > 0) {
+                            const slideNum = noteFile.match(/notesSlide(\d+)\.xml$/)?.[1];
+                            extractedText += `\n[Slide ${slideNum} notes]: ${noteTexts.join(" ")}`;
+                        }
+                    }
+                }
+            }
+            extractedText += "\n</notes>";
+        }
+        extractedText += "\n</pptx>";
+        return { extractedText };
+    }
+    catch (error) {
+        console.error("Error processing PPTX:", error);
+        throw new Error(`Failed to process PPTX: ${String(error)}`);
+    }
+}
+async function processExcel(arrayBuffer, fileName) {
+    try {
+        // Read the workbook
+        const workbook = XLSX.read(arrayBuffer, { type: "array" });
+        let extractedText = `<excel filename="${fileName}">`;
+        // Process each sheet
+        for (const [index, sheetName] of workbook.SheetNames.entries()) {
+            const worksheet = workbook.Sheets[sheetName];
+            // Extract text as CSV for the extractedText field
+            const csvText = XLSX.utils.sheet_to_csv(worksheet);
+            extractedText += `\n<sheet name="${sheetName}" index="${index + 1}">\n${csvText}\n</sheet>`;
+        }
+        extractedText += "\n</excel>";
+        return { extractedText };
+    }
+    catch (error) {
+        console.error("Error processing Excel:", error);
+        throw new Error(`Failed to process Excel: ${String(error)}`);
+    }
+}
+//# sourceMappingURL=attachment-utils.js.map