mac: bundle web chat assets
This commit is contained in:
@@ -0,0 +1,415 @@
|
||||
import { parseAsync } from "docx-preview";
|
||||
import JSZip from "jszip";
|
||||
import * as pdfjsLib from "pdfjs-dist";
|
||||
import * as XLSX from "xlsx";
|
||||
import { i18n } from "./i18n.js";
|
||||
// Configure PDF.js worker - we'll need to bundle this
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL("pdfjs-dist/build/pdf.worker.min.mjs", import.meta.url).toString();
|
||||
/**
|
||||
* Load an attachment from various sources
|
||||
* @param source - URL string, File, Blob, or ArrayBuffer
|
||||
* @param fileName - Optional filename override
|
||||
* @returns Promise<Attachment>
|
||||
* @throws Error if loading fails
|
||||
*/
|
||||
export async function loadAttachment(source, fileName) {
|
||||
let arrayBuffer;
|
||||
let detectedFileName = fileName || "unnamed";
|
||||
let mimeType = "application/octet-stream";
|
||||
let size = 0;
|
||||
// Convert source to ArrayBuffer
|
||||
if (typeof source === "string") {
|
||||
// It's a URL - fetch it
|
||||
const response = await fetch(source);
|
||||
if (!response.ok) {
|
||||
throw new Error(i18n("Failed to fetch file"));
|
||||
}
|
||||
arrayBuffer = await response.arrayBuffer();
|
||||
size = arrayBuffer.byteLength;
|
||||
mimeType = response.headers.get("content-type") || mimeType;
|
||||
if (!fileName) {
|
||||
// Try to extract filename from URL
|
||||
const urlParts = source.split("/");
|
||||
detectedFileName = urlParts[urlParts.length - 1] || "document";
|
||||
}
|
||||
}
|
||||
else if (source instanceof File) {
|
||||
arrayBuffer = await source.arrayBuffer();
|
||||
size = source.size;
|
||||
mimeType = source.type || mimeType;
|
||||
detectedFileName = fileName || source.name;
|
||||
}
|
||||
else if (source instanceof Blob) {
|
||||
arrayBuffer = await source.arrayBuffer();
|
||||
size = source.size;
|
||||
mimeType = source.type || mimeType;
|
||||
}
|
||||
else if (source instanceof ArrayBuffer) {
|
||||
arrayBuffer = source;
|
||||
size = source.byteLength;
|
||||
}
|
||||
else {
|
||||
throw new Error(i18n("Invalid source type"));
|
||||
}
|
||||
// Convert ArrayBuffer to base64 - handle large files properly
|
||||
const uint8Array = new Uint8Array(arrayBuffer);
|
||||
let binary = "";
|
||||
const chunkSize = 0x8000; // Process in 32KB chunks to avoid stack overflow
|
||||
for (let i = 0; i < uint8Array.length; i += chunkSize) {
|
||||
const chunk = uint8Array.slice(i, i + chunkSize);
|
||||
binary += String.fromCharCode(...chunk);
|
||||
}
|
||||
const base64Content = btoa(binary);
|
||||
// Detect type and process accordingly
|
||||
const id = `${detectedFileName}_${Date.now()}_${Math.random()}`;
|
||||
// Check if it's a PDF
|
||||
if (mimeType === "application/pdf" || detectedFileName.toLowerCase().endsWith(".pdf")) {
|
||||
const { extractedText, preview } = await processPdf(arrayBuffer, detectedFileName);
|
||||
return {
|
||||
id,
|
||||
type: "document",
|
||||
fileName: detectedFileName,
|
||||
mimeType: "application/pdf",
|
||||
size,
|
||||
content: base64Content,
|
||||
extractedText,
|
||||
preview,
|
||||
};
|
||||
}
|
||||
// Check if it's a DOCX file
|
||||
if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
|
||||
detectedFileName.toLowerCase().endsWith(".docx")) {
|
||||
const { extractedText } = await processDocx(arrayBuffer, detectedFileName);
|
||||
return {
|
||||
id,
|
||||
type: "document",
|
||||
fileName: detectedFileName,
|
||||
mimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
size,
|
||||
content: base64Content,
|
||||
extractedText,
|
||||
};
|
||||
}
|
||||
// Check if it's a PPTX file
|
||||
if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation" ||
|
||||
detectedFileName.toLowerCase().endsWith(".pptx")) {
|
||||
const { extractedText } = await processPptx(arrayBuffer, detectedFileName);
|
||||
return {
|
||||
id,
|
||||
type: "document",
|
||||
fileName: detectedFileName,
|
||||
mimeType: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
size,
|
||||
content: base64Content,
|
||||
extractedText,
|
||||
};
|
||||
}
|
||||
// Check if it's an Excel file (XLSX/XLS)
|
||||
const excelMimeTypes = [
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/vnd.ms-excel",
|
||||
];
|
||||
if (excelMimeTypes.includes(mimeType) ||
|
||||
detectedFileName.toLowerCase().endsWith(".xlsx") ||
|
||||
detectedFileName.toLowerCase().endsWith(".xls")) {
|
||||
const { extractedText } = await processExcel(arrayBuffer, detectedFileName);
|
||||
return {
|
||||
id,
|
||||
type: "document",
|
||||
fileName: detectedFileName,
|
||||
mimeType: mimeType.startsWith("application/vnd")
|
||||
? mimeType
|
||||
: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
size,
|
||||
content: base64Content,
|
||||
extractedText,
|
||||
};
|
||||
}
|
||||
// Check if it's an image
|
||||
if (mimeType.startsWith("image/")) {
|
||||
return {
|
||||
id,
|
||||
type: "image",
|
||||
fileName: detectedFileName,
|
||||
mimeType,
|
||||
size,
|
||||
content: base64Content,
|
||||
preview: base64Content, // For images, preview is the same as content
|
||||
};
|
||||
}
|
||||
// Check if it's a text document
|
||||
const textExtensions = [
|
||||
".txt",
|
||||
".md",
|
||||
".json",
|
||||
".xml",
|
||||
".html",
|
||||
".css",
|
||||
".js",
|
||||
".ts",
|
||||
".jsx",
|
||||
".tsx",
|
||||
".yml",
|
||||
".yaml",
|
||||
];
|
||||
const isTextFile = mimeType.startsWith("text/") || textExtensions.some((ext) => detectedFileName.toLowerCase().endsWith(ext));
|
||||
if (isTextFile) {
|
||||
const decoder = new TextDecoder();
|
||||
const text = decoder.decode(arrayBuffer);
|
||||
return {
|
||||
id,
|
||||
type: "document",
|
||||
fileName: detectedFileName,
|
||||
mimeType: mimeType.startsWith("text/") ? mimeType : "text/plain",
|
||||
size,
|
||||
content: base64Content,
|
||||
extractedText: text,
|
||||
};
|
||||
}
|
||||
throw new Error(`Unsupported file type: ${mimeType}`);
|
||||
}
|
||||
async function processPdf(arrayBuffer, fileName) {
|
||||
let pdf = null;
|
||||
try {
|
||||
pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
|
||||
// Extract text with page structure
|
||||
let extractedText = `<pdf filename="${fileName}">`;
|
||||
for (let i = 1; i <= pdf.numPages; i++) {
|
||||
const page = await pdf.getPage(i);
|
||||
const textContent = await page.getTextContent();
|
||||
const pageText = textContent.items
|
||||
.map((item) => item.str)
|
||||
.filter((str) => str.trim())
|
||||
.join(" ");
|
||||
extractedText += `\n<page number="${i}">\n${pageText}\n</page>`;
|
||||
}
|
||||
extractedText += "\n</pdf>";
|
||||
// Generate preview from first page
|
||||
const preview = await generatePdfPreview(pdf);
|
||||
return { extractedText, preview };
|
||||
}
|
||||
catch (error) {
|
||||
console.error("Error processing PDF:", error);
|
||||
throw new Error(`Failed to process PDF: ${String(error)}`);
|
||||
}
|
||||
finally {
|
||||
// Clean up PDF resources
|
||||
if (pdf) {
|
||||
pdf.destroy();
|
||||
}
|
||||
}
|
||||
}
|
||||
async function generatePdfPreview(pdf) {
|
||||
try {
|
||||
const page = await pdf.getPage(1);
|
||||
const viewport = page.getViewport({ scale: 1.0 });
|
||||
// Create canvas with reasonable size for thumbnail (160x160 max)
|
||||
const scale = Math.min(160 / viewport.width, 160 / viewport.height);
|
||||
const scaledViewport = page.getViewport({ scale });
|
||||
const canvas = document.createElement("canvas");
|
||||
const context = canvas.getContext("2d");
|
||||
if (!context) {
|
||||
return undefined;
|
||||
}
|
||||
canvas.height = scaledViewport.height;
|
||||
canvas.width = scaledViewport.width;
|
||||
const renderContext = {
|
||||
canvasContext: context,
|
||||
viewport: scaledViewport,
|
||||
canvas: canvas,
|
||||
};
|
||||
await page.render(renderContext).promise;
|
||||
// Return base64 without data URL prefix
|
||||
return canvas.toDataURL("image/png").split(",")[1];
|
||||
}
|
||||
catch (error) {
|
||||
console.error("Error generating PDF preview:", error);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
async function processDocx(arrayBuffer, fileName) {
|
||||
try {
|
||||
// Parse document structure
|
||||
const wordDoc = await parseAsync(arrayBuffer);
|
||||
// Extract structured text from document body
|
||||
let extractedText = `<docx filename="${fileName}">\n<page number="1">\n`;
|
||||
const body = wordDoc.documentPart?.body;
|
||||
if (body?.children) {
|
||||
// Walk through document elements and extract text
|
||||
const texts = [];
|
||||
for (const element of body.children) {
|
||||
const text = extractTextFromElement(element);
|
||||
if (text) {
|
||||
texts.push(text);
|
||||
}
|
||||
}
|
||||
extractedText += texts.join("\n");
|
||||
}
|
||||
extractedText += `\n</page>\n</docx>`;
|
||||
return { extractedText };
|
||||
}
|
||||
catch (error) {
|
||||
console.error("Error processing DOCX:", error);
|
||||
throw new Error(`Failed to process DOCX: ${String(error)}`);
|
||||
}
|
||||
}
|
||||
function extractTextFromElement(element) {
|
||||
let text = "";
|
||||
// Check type with lowercase
|
||||
const elementType = element.type?.toLowerCase() || "";
|
||||
// Handle paragraphs
|
||||
if (elementType === "paragraph" && element.children) {
|
||||
for (const child of element.children) {
|
||||
const childType = child.type?.toLowerCase() || "";
|
||||
if (childType === "run" && child.children) {
|
||||
for (const textChild of child.children) {
|
||||
const textType = textChild.type?.toLowerCase() || "";
|
||||
if (textType === "text") {
|
||||
text += textChild.text || "";
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (childType === "text") {
|
||||
text += child.text || "";
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle tables
|
||||
else if (elementType === "table") {
|
||||
if (element.children) {
|
||||
const tableTexts = [];
|
||||
for (const row of element.children) {
|
||||
const rowType = row.type?.toLowerCase() || "";
|
||||
if (rowType === "tablerow" && row.children) {
|
||||
const rowTexts = [];
|
||||
for (const cell of row.children) {
|
||||
const cellType = cell.type?.toLowerCase() || "";
|
||||
if (cellType === "tablecell" && cell.children) {
|
||||
const cellTexts = [];
|
||||
for (const cellElement of cell.children) {
|
||||
const cellText = extractTextFromElement(cellElement);
|
||||
if (cellText)
|
||||
cellTexts.push(cellText);
|
||||
}
|
||||
if (cellTexts.length > 0)
|
||||
rowTexts.push(cellTexts.join(" "));
|
||||
}
|
||||
}
|
||||
if (rowTexts.length > 0)
|
||||
tableTexts.push(rowTexts.join(" | "));
|
||||
}
|
||||
}
|
||||
if (tableTexts.length > 0) {
|
||||
text = "\n[Table]\n" + tableTexts.join("\n") + "\n[/Table]\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
// Recursively handle other container elements
|
||||
else if (element.children && Array.isArray(element.children)) {
|
||||
const childTexts = [];
|
||||
for (const child of element.children) {
|
||||
const childText = extractTextFromElement(child);
|
||||
if (childText)
|
||||
childTexts.push(childText);
|
||||
}
|
||||
text = childTexts.join(" ");
|
||||
}
|
||||
return text.trim();
|
||||
}
|
||||
async function processPptx(arrayBuffer, fileName) {
|
||||
try {
|
||||
// Load the PPTX file as a ZIP
|
||||
const zip = await JSZip.loadAsync(arrayBuffer);
|
||||
// PPTX slides are stored in ppt/slides/slide[n].xml
|
||||
let extractedText = `<pptx filename="${fileName}">`;
|
||||
// Get all slide files and sort them numerically
|
||||
const slideFiles = Object.keys(zip.files)
|
||||
.filter((name) => name.match(/ppt\/slides\/slide\d+\.xml$/))
|
||||
.sort((a, b) => {
|
||||
const numA = Number.parseInt(a.match(/slide(\d+)\.xml$/)?.[1] || "0", 10);
|
||||
const numB = Number.parseInt(b.match(/slide(\d+)\.xml$/)?.[1] || "0", 10);
|
||||
return numA - numB;
|
||||
});
|
||||
// Extract text from each slide
|
||||
for (let i = 0; i < slideFiles.length; i++) {
|
||||
const slideFile = zip.file(slideFiles[i]);
|
||||
if (slideFile) {
|
||||
const slideXml = await slideFile.async("text");
|
||||
// Extract text from XML (simple regex approach)
|
||||
// Looking for <a:t> tags which contain text in PPTX
|
||||
const textMatches = slideXml.match(/<a:t[^>]*>([^<]+)<\/a:t>/g);
|
||||
if (textMatches) {
|
||||
extractedText += `\n<slide number="${i + 1}">`;
|
||||
const slideTexts = textMatches
|
||||
.map((match) => {
|
||||
const textMatch = match.match(/<a:t[^>]*>([^<]+)<\/a:t>/);
|
||||
return textMatch ? textMatch[1] : "";
|
||||
})
|
||||
.filter((t) => t.trim());
|
||||
if (slideTexts.length > 0) {
|
||||
extractedText += "\n" + slideTexts.join("\n");
|
||||
}
|
||||
extractedText += "\n</slide>";
|
||||
}
|
||||
}
|
||||
}
|
||||
// Also try to extract text from notes
|
||||
const notesFiles = Object.keys(zip.files)
|
||||
.filter((name) => name.match(/ppt\/notesSlides\/notesSlide\d+\.xml$/))
|
||||
.sort((a, b) => {
|
||||
const numA = Number.parseInt(a.match(/notesSlide(\d+)\.xml$/)?.[1] || "0", 10);
|
||||
const numB = Number.parseInt(b.match(/notesSlide(\d+)\.xml$/)?.[1] || "0", 10);
|
||||
return numA - numB;
|
||||
});
|
||||
if (notesFiles.length > 0) {
|
||||
extractedText += "\n<notes>";
|
||||
for (const noteFile of notesFiles) {
|
||||
const file = zip.file(noteFile);
|
||||
if (file) {
|
||||
const noteXml = await file.async("text");
|
||||
const textMatches = noteXml.match(/<a:t[^>]*>([^<]+)<\/a:t>/g);
|
||||
if (textMatches) {
|
||||
const noteTexts = textMatches
|
||||
.map((match) => {
|
||||
const textMatch = match.match(/<a:t[^>]*>([^<]+)<\/a:t>/);
|
||||
return textMatch ? textMatch[1] : "";
|
||||
})
|
||||
.filter((t) => t.trim());
|
||||
if (noteTexts.length > 0) {
|
||||
const slideNum = noteFile.match(/notesSlide(\d+)\.xml$/)?.[1];
|
||||
extractedText += `\n[Slide ${slideNum} notes]: ${noteTexts.join(" ")}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
extractedText += "\n</notes>";
|
||||
}
|
||||
extractedText += "\n</pptx>";
|
||||
return { extractedText };
|
||||
}
|
||||
catch (error) {
|
||||
console.error("Error processing PPTX:", error);
|
||||
throw new Error(`Failed to process PPTX: ${String(error)}`);
|
||||
}
|
||||
}
|
||||
async function processExcel(arrayBuffer, fileName) {
|
||||
try {
|
||||
// Read the workbook
|
||||
const workbook = XLSX.read(arrayBuffer, { type: "array" });
|
||||
let extractedText = `<excel filename="${fileName}">`;
|
||||
// Process each sheet
|
||||
for (const [index, sheetName] of workbook.SheetNames.entries()) {
|
||||
const worksheet = workbook.Sheets[sheetName];
|
||||
// Extract text as CSV for the extractedText field
|
||||
const csvText = XLSX.utils.sheet_to_csv(worksheet);
|
||||
extractedText += `\n<sheet name="${sheetName}" index="${index + 1}">\n${csvText}\n</sheet>`;
|
||||
}
|
||||
extractedText += "\n</excel>";
|
||||
return { extractedText };
|
||||
}
|
||||
catch (error) {
|
||||
console.error("Error processing Excel:", error);
|
||||
throw new Error(`Failed to process Excel: ${String(error)}`);
|
||||
}
|
||||
}
|
||||
//# sourceMappingURL=attachment-utils.js.map
|
||||
Reference in New Issue
Block a user