fix: lazy-load pdf/image input deps
This commit is contained in:
@@ -1,7 +1,35 @@
|
|||||||
import { lookup } from "node:dns/promises";
|
import { lookup } from "node:dns/promises";
|
||||||
|
|
||||||
import { createCanvas } from "@napi-rs/canvas";
|
type CanvasModule = typeof import("@napi-rs/canvas");
|
||||||
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
type PdfJsModule = typeof import("pdfjs-dist/legacy/build/pdf.mjs");
|
||||||
|
|
||||||
|
let canvasModulePromise: Promise<CanvasModule> | null = null;
|
||||||
|
let pdfJsModulePromise: Promise<PdfJsModule> | null = null;
|
||||||
|
|
||||||
|
// Lazy-load optional PDF/image deps so non-PDF paths don't require native installs.
|
||||||
|
async function loadCanvasModule(): Promise<CanvasModule> {
|
||||||
|
if (!canvasModulePromise) {
|
||||||
|
canvasModulePromise = import("@napi-rs/canvas").catch((err) => {
|
||||||
|
canvasModulePromise = null;
|
||||||
|
throw new Error(
|
||||||
|
`Optional dependency @napi-rs/canvas is required for PDF image extraction: ${String(err)}`,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return canvasModulePromise;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadPdfJsModule(): Promise<PdfJsModule> {
|
||||||
|
if (!pdfJsModulePromise) {
|
||||||
|
pdfJsModulePromise = import("pdfjs-dist/legacy/build/pdf.mjs").catch((err) => {
|
||||||
|
pdfJsModulePromise = null;
|
||||||
|
throw new Error(
|
||||||
|
`Optional dependency pdfjs-dist is required for PDF extraction: ${String(err)}`,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return pdfJsModulePromise;
|
||||||
|
}
|
||||||
|
|
||||||
export type InputImageContent = {
|
export type InputImageContent = {
|
||||||
type: "image";
|
type: "image";
|
||||||
@@ -238,9 +266,9 @@ async function extractPdfContent(params: {
|
|||||||
limits: InputFileLimits;
|
limits: InputFileLimits;
|
||||||
}): Promise<{ text: string; images: InputImageContent[] }> {
|
}): Promise<{ text: string; images: InputImageContent[] }> {
|
||||||
const { buffer, limits } = params;
|
const { buffer, limits } = params;
|
||||||
|
const { getDocument } = await loadPdfJsModule();
|
||||||
const pdf = await getDocument({
|
const pdf = await getDocument({
|
||||||
data: new Uint8Array(buffer),
|
data: new Uint8Array(buffer),
|
||||||
// @ts-expect-error pdfjs-dist legacy option not in current type defs.
|
|
||||||
disableWorker: true,
|
disableWorker: true,
|
||||||
}).promise;
|
}).promise;
|
||||||
const maxPages = Math.min(pdf.numPages, limits.pdf.maxPages);
|
const maxPages = Math.min(pdf.numPages, limits.pdf.maxPages);
|
||||||
@@ -261,6 +289,7 @@ async function extractPdfContent(params: {
|
|||||||
return { text, images: [] };
|
return { text, images: [] };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const { createCanvas } = await loadCanvasModule();
|
||||||
const images: InputImageContent[] = [];
|
const images: InputImageContent[] = [];
|
||||||
for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) {
|
for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) {
|
||||||
const page = await pdf.getPage(pageNum);
|
const page = await pdf.getPage(pageNum);
|
||||||
|
|||||||
Reference in New Issue
Block a user