From f06ad4502b4acdbb2d1eab4a2ea34d2bec5df5a4 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 20 Jan 2026 07:59:25 +0000 Subject: [PATCH] refactor: share responses input handling --- docs/gateway/openresponses-http-api.md | 21 + src/gateway/http-common.ts | 57 +++ src/gateway/openai-http.ts | 45 +-- src/gateway/openresponses-http.ts | 525 +++++-------------------- src/media/input-files.ts | 385 ++++++++++++++++++ 5 files changed, 566 insertions(+), 467 deletions(-) create mode 100644 src/gateway/http-common.ts create mode 100644 src/media/input-files.ts diff --git a/docs/gateway/openresponses-http-api.md b/docs/gateway/openresponses-http-api.md index 5abf1f445..031555dd2 100644 --- a/docs/gateway/openresponses-http-api.md +++ b/docs/gateway/openresponses-http-api.md @@ -173,6 +173,14 @@ Current behavior: - PDFs are parsed for text. If little text is found, the first pages are rasterized into images and passed to the model. +PDF parsing uses the Node-friendly `pdfjs-dist` legacy build (no worker). The modern +PDF.js build expects browser workers/DOM globals, so it is not used in the Gateway. + +URL fetch defaults: +- `files.allowUrl`: `true` +- `images.allowUrl`: `true` +- Requests are guarded (DNS resolution, private IP blocking, redirect caps, timeouts). + ## File + image limits (config) Defaults can be tuned under `gateway.http.endpoints.responses`: @@ -212,6 +220,19 @@ Defaults can be tuned under `gateway.http.endpoints.responses`: } ``` +Defaults when omitted: +- `maxBodyBytes`: 20MB +- `files.maxBytes`: 5MB +- `files.maxChars`: 200k +- `files.maxRedirects`: 3 +- `files.timeoutMs`: 10s +- `files.pdf.maxPages`: 4 +- `files.pdf.maxPixels`: 4,000,000 +- `files.pdf.minTextChars`: 200 +- `images.maxBytes`: 10MB +- `images.maxRedirects`: 3 +- `images.timeoutMs`: 10s + ## Streaming (SSE) Set `stream: true` to receive Server-Sent Events (SSE): diff --git a/src/gateway/http-common.ts b/src/gateway/http-common.ts new file mode 100644 index 000000000..993a3ac36 --- /dev/null +++ b/src/gateway/http-common.ts @@ -0,0 +1,57 @@ +import type { IncomingMessage, ServerResponse } from "node:http"; + +import { readJsonBody } from "./hooks.js"; + +export function sendJson(res: ServerResponse, status: number, body: unknown) { + res.statusCode = status; + res.setHeader("Content-Type", "application/json; charset=utf-8"); + res.end(JSON.stringify(body)); +} + +export function sendText(res: ServerResponse, status: number, body: string) { + res.statusCode = status; + res.setHeader("Content-Type", "text/plain; charset=utf-8"); + res.end(body); +} + +export function sendMethodNotAllowed(res: ServerResponse, allow = "POST") { + res.setHeader("Allow", allow); + sendText(res, 405, "Method Not Allowed"); +} + +export function sendUnauthorized(res: ServerResponse) { + sendJson(res, 401, { + error: { message: "Unauthorized", type: "unauthorized" }, + }); +} + +export function sendInvalidRequest(res: ServerResponse, message: string) { + sendJson(res, 400, { + error: { message, type: "invalid_request_error" }, + }); +} + +export async function readJsonBodyOrError( + req: IncomingMessage, + res: ServerResponse, + maxBytes: number, +): Promise { + const body = await readJsonBody(req, maxBytes); + if (!body.ok) { + sendInvalidRequest(res, body.error); + return undefined; + } + return body.value; +} + +export function writeDone(res: ServerResponse) { + res.write("data: [DONE]\n\n"); +} + +export function setSseHeaders(res: ServerResponse) { + res.statusCode = 200; + res.setHeader("Content-Type", "text/event-stream; charset=utf-8"); + res.setHeader("Cache-Control", "no-cache"); + res.setHeader("Connection", "keep-alive"); + res.flushHeaders?.(); +} diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts index 6e7f1d521..49c87231a 100644 --- a/src/gateway/openai-http.ts +++ b/src/gateway/openai-http.ts @@ -7,8 +7,15 @@ import { agentCommand } from "../commands/agent.js"; import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js"; import { defaultRuntime } from "../runtime.js"; import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js"; +import { + readJsonBodyOrError, + sendJson, + sendMethodNotAllowed, + sendUnauthorized, + setSseHeaders, + writeDone, +} from "./http-common.js"; import { getBearerToken, resolveAgentIdForRequest, resolveSessionKey } from "./http-utils.js"; -import { readJsonBody } from "./hooks.js"; type OpenAiHttpOptions = { auth: ResolvedGatewayAuth; @@ -28,20 +35,10 @@ type OpenAiChatCompletionRequest = { user?: unknown; }; -function sendJson(res: ServerResponse, status: number, body: unknown) { - res.statusCode = status; - res.setHeader("Content-Type", "application/json; charset=utf-8"); - res.end(JSON.stringify(body)); -} - function writeSse(res: ServerResponse, data: unknown) { res.write(`data: ${JSON.stringify(data)}\n\n`); } -function writeDone(res: ServerResponse) { - res.write("data: [DONE]\n\n"); -} - function asMessages(val: unknown): OpenAiChatMessage[] { return Array.isArray(val) ? (val as OpenAiChatMessage[]) : []; } @@ -162,10 +159,7 @@ export async function handleOpenAiHttpRequest( if (url.pathname !== "/v1/chat/completions") return false; if (req.method !== "POST") { - res.statusCode = 405; - res.setHeader("Allow", "POST"); - res.setHeader("Content-Type", "text/plain; charset=utf-8"); - res.end("Method Not Allowed"); + sendMethodNotAllowed(res); return true; } @@ -176,21 +170,14 @@ export async function handleOpenAiHttpRequest( req, }); if (!authResult.ok) { - sendJson(res, 401, { - error: { message: "Unauthorized", type: "unauthorized" }, - }); + sendUnauthorized(res); return true; } - const body = await readJsonBody(req, opts.maxBodyBytes ?? 1024 * 1024); - if (!body.ok) { - sendJson(res, 400, { - error: { message: body.error, type: "invalid_request_error" }, - }); - return true; - } + const body = await readJsonBodyOrError(req, res, opts.maxBodyBytes ?? 1024 * 1024); + if (body === undefined) return true; - const payload = coerceRequest(body.value); + const payload = coerceRequest(body); const stream = Boolean(payload.stream); const model = typeof payload.model === "string" ? payload.model : "clawdbot"; const user = typeof payload.user === "string" ? payload.user : undefined; @@ -258,11 +245,7 @@ export async function handleOpenAiHttpRequest( return true; } - res.statusCode = 200; - res.setHeader("Content-Type", "text/event-stream; charset=utf-8"); - res.setHeader("Cache-Control", "no-cache"); - res.setHeader("Connection", "keep-alive"); - res.flushHeaders?.(); + setSseHeaders(res); let wroteRole = false; let sawAssistantDelta = false; diff --git a/src/gateway/openresponses-http.ts b/src/gateway/openresponses-http.ts index c7dd4e9de..110eee7e8 100644 --- a/src/gateway/openresponses-http.ts +++ b/src/gateway/openresponses-http.ts @@ -7,7 +7,6 @@ */ import { randomUUID } from "node:crypto"; -import { lookup } from "node:dns/promises"; import type { IncomingMessage, ServerResponse } from "node:http"; import { buildHistoryContextFromEntries, type HistoryEntry } from "../auto-reply/reply/history.js"; @@ -17,7 +16,14 @@ import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js"; import { defaultRuntime } from "../runtime.js"; import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js"; import { getBearerToken, resolveAgentIdForRequest, resolveSessionKey } from "./http-utils.js"; -import { readJsonBody } from "./hooks.js"; +import { + readJsonBodyOrError, + sendJson, + sendMethodNotAllowed, + sendUnauthorized, + setSseHeaders, + writeDone, +} from "./http-common.js"; import { CreateResponseBodySchema, type ContentPart, @@ -31,6 +37,24 @@ import { import type { GatewayHttpResponsesConfig } from "../config/types.gateway.js"; import type { ClientToolDefinition } from "../agents/pi-embedded-runner/run/params.js"; import type { ImageContent } from "../commands/agent/types.js"; +import { + DEFAULT_INPUT_FILE_MAX_BYTES, + DEFAULT_INPUT_FILE_MAX_CHARS, + DEFAULT_INPUT_FILE_MIMES, + DEFAULT_INPUT_IMAGE_MAX_BYTES, + DEFAULT_INPUT_IMAGE_MIMES, + DEFAULT_INPUT_MAX_REDIRECTS, + DEFAULT_INPUT_PDF_MAX_PAGES, + DEFAULT_INPUT_PDF_MAX_PIXELS, + DEFAULT_INPUT_PDF_MIN_TEXT_CHARS, + DEFAULT_INPUT_TIMEOUT_MS, + extractFileContentFromSource, + extractImageContentFromSource, + normalizeMimeList, + type InputFileLimits, + type InputImageLimits, + type InputImageSource, +} from "../media/input-files.js"; type OpenResponsesHttpOptions = { auth: ResolvedGatewayAuth; @@ -38,31 +62,6 @@ type OpenResponsesHttpOptions = { config?: GatewayHttpResponsesConfig; }; -type CanvasModule = typeof import("@napi-rs/canvas"); -type PdfJsModule = typeof import("pdfjs-dist/legacy/build/pdf.mjs"); - -async function loadCanvasModule(): Promise { - try { - return await import("@napi-rs/canvas"); - } catch { - return null; - } -} - -async function loadPdfJsModule(): Promise { - try { - return await import("pdfjs-dist/legacy/build/pdf.mjs"); - } catch { - return null; - } -} - -function sendJson(res: ServerResponse, status: number, body: unknown) { - res.statusCode = status; - res.setHeader("Content-Type", "application/json; charset=utf-8"); - res.end(JSON.stringify(body)); -} - const DEFAULT_BODY_BYTES = 20 * 1024 * 1024; function writeSseEvent(res: ServerResponse, event: StreamingEvent) { @@ -70,10 +69,6 @@ function writeSseEvent(res: ServerResponse, event: StreamingEvent) { res.write(`data: ${JSON.stringify(event)}\n\n`); } -function writeDone(res: ServerResponse) { - res.write("data: [DONE]\n\n"); -} - function extractTextContent(content: string | ContentPart[]): string { if (typeof content === "string") return content; return content @@ -88,68 +83,10 @@ function extractTextContent(content: string | ContentPart[]): string { type ResolvedResponsesLimits = { maxBodyBytes: number; - files: { - allowUrl: boolean; - allowedMimes: Set; - maxBytes: number; - maxChars: number; - maxRedirects: number; - timeoutMs: number; - pdf: { - maxPages: number; - maxPixels: number; - minTextChars: number; - }; - }; - images: { - allowUrl: boolean; - allowedMimes: Set; - maxBytes: number; - maxRedirects: number; - timeoutMs: number; - }; + files: InputFileLimits; + images: InputImageLimits; }; -const DEFAULT_IMAGE_MIMES = ["image/jpeg", "image/png", "image/gif", "image/webp"]; -const DEFAULT_FILE_MIMES = [ - "text/plain", - "text/markdown", - "text/html", - "text/csv", - "application/json", - "application/pdf", -]; -const DEFAULT_IMAGE_MAX_BYTES = 10 * 1024 * 1024; -const DEFAULT_FILE_MAX_BYTES = 5 * 1024 * 1024; -const DEFAULT_FILE_MAX_CHARS = 200_000; -const DEFAULT_MAX_REDIRECTS = 3; -const DEFAULT_TIMEOUT_MS = 10_000; -const DEFAULT_PDF_MAX_PAGES = 4; -const DEFAULT_PDF_MAX_PIXELS = 4_000_000; -const DEFAULT_PDF_MIN_TEXT_CHARS = 200; - -function normalizeMimeType(value: string | undefined): string | undefined { - if (!value) return undefined; - const [raw] = value.split(";"); - const normalized = raw?.trim().toLowerCase(); - return normalized || undefined; -} - -function parseContentType(value: string | undefined): { mimeType?: string; charset?: string } { - if (!value) return {}; - const parts = value.split(";").map((part) => part.trim()); - const mimeType = normalizeMimeType(parts[0]); - const charset = parts - .map((part) => part.match(/^charset=(.+)$/i)?.[1]?.trim()) - .find((part) => part && part.length > 0); - return { mimeType, charset }; -} - -function normalizeMimeList(values: string[] | undefined, fallback: string[]): Set { - const input = values && values.length > 0 ? values : fallback; - return new Set(input.map((value) => normalizeMimeType(value)).filter(Boolean) as string[]); -} - function resolveResponsesLimits( config: GatewayHttpResponsesConfig | undefined, ): ResolvedResponsesLimits { @@ -159,336 +96,27 @@ function resolveResponsesLimits( maxBodyBytes: config?.maxBodyBytes ?? DEFAULT_BODY_BYTES, files: { allowUrl: files?.allowUrl ?? true, - allowedMimes: normalizeMimeList(files?.allowedMimes, DEFAULT_FILE_MIMES), - maxBytes: files?.maxBytes ?? DEFAULT_FILE_MAX_BYTES, - maxChars: files?.maxChars ?? DEFAULT_FILE_MAX_CHARS, - maxRedirects: files?.maxRedirects ?? DEFAULT_MAX_REDIRECTS, - timeoutMs: files?.timeoutMs ?? DEFAULT_TIMEOUT_MS, + allowedMimes: normalizeMimeList(files?.allowedMimes, DEFAULT_INPUT_FILE_MIMES), + maxBytes: files?.maxBytes ?? DEFAULT_INPUT_FILE_MAX_BYTES, + maxChars: files?.maxChars ?? DEFAULT_INPUT_FILE_MAX_CHARS, + maxRedirects: files?.maxRedirects ?? DEFAULT_INPUT_MAX_REDIRECTS, + timeoutMs: files?.timeoutMs ?? DEFAULT_INPUT_TIMEOUT_MS, pdf: { - maxPages: files?.pdf?.maxPages ?? DEFAULT_PDF_MAX_PAGES, - maxPixels: files?.pdf?.maxPixels ?? DEFAULT_PDF_MAX_PIXELS, - minTextChars: files?.pdf?.minTextChars ?? DEFAULT_PDF_MIN_TEXT_CHARS, + maxPages: files?.pdf?.maxPages ?? DEFAULT_INPUT_PDF_MAX_PAGES, + maxPixels: files?.pdf?.maxPixels ?? DEFAULT_INPUT_PDF_MAX_PIXELS, + minTextChars: files?.pdf?.minTextChars ?? DEFAULT_INPUT_PDF_MIN_TEXT_CHARS, }, }, images: { allowUrl: images?.allowUrl ?? true, - allowedMimes: normalizeMimeList(images?.allowedMimes, DEFAULT_IMAGE_MIMES), - maxBytes: images?.maxBytes ?? DEFAULT_IMAGE_MAX_BYTES, - maxRedirects: images?.maxRedirects ?? DEFAULT_MAX_REDIRECTS, - timeoutMs: images?.timeoutMs ?? DEFAULT_TIMEOUT_MS, + allowedMimes: normalizeMimeList(images?.allowedMimes, DEFAULT_INPUT_IMAGE_MIMES), + maxBytes: images?.maxBytes ?? DEFAULT_INPUT_IMAGE_MAX_BYTES, + maxRedirects: images?.maxRedirects ?? DEFAULT_INPUT_MAX_REDIRECTS, + timeoutMs: images?.timeoutMs ?? DEFAULT_INPUT_TIMEOUT_MS, }, }; } -const PRIVATE_IPV4_PATTERNS = [ - /^127\./, - /^10\./, - /^192\.168\./, - /^172\.(1[6-9]|2[0-9]|3[0-1])\./, - /^0\./, -]; -const PRIVATE_IPV6_PREFIXES = ["::1", "fe80:", "fec0:", "fc", "fd"]; - -function isPrivateIpAddress(address: string): boolean { - if (address.includes(":")) { - const lower = address.toLowerCase(); - if (lower === "::1") return true; - return PRIVATE_IPV6_PREFIXES.some((prefix) => lower.startsWith(prefix)); - } - return PRIVATE_IPV4_PATTERNS.some((pattern) => pattern.test(address)); -} - -function isBlockedHostname(hostname: string): boolean { - const lower = hostname.toLowerCase(); - return ( - lower === "localhost" || - lower.endsWith(".localhost") || - lower.endsWith(".local") || - lower.endsWith(".internal") - ); -} - -async function assertPublicHostname(hostname: string): Promise { - if (isBlockedHostname(hostname)) { - throw new Error(`Blocked hostname: ${hostname}`); - } - - const results = await lookup(hostname, { all: true }); - if (results.length === 0) { - throw new Error(`Unable to resolve hostname: ${hostname}`); - } - for (const entry of results) { - if (isPrivateIpAddress(entry.address)) { - throw new Error(`Private IP addresses are not allowed: ${entry.address}`); - } - } -} - -function isRedirectStatus(status: number): boolean { - return status === 301 || status === 302 || status === 303 || status === 307 || status === 308; -} - -// Fetch with SSRF protection, timeout, redirect limits, and size limits. -async function fetchWithGuard(params: { - url: string; - maxBytes: number; - timeoutMs: number; - maxRedirects: number; -}): Promise<{ data: string; mimeType: string; contentType?: string }> { - let currentUrl = params.url; - let redirectCount = 0; - - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), params.timeoutMs); - - try { - while (true) { - const parsedUrl = new URL(currentUrl); - if (!["http:", "https:"].includes(parsedUrl.protocol)) { - throw new Error(`Invalid URL protocol: ${parsedUrl.protocol}. Only HTTP/HTTPS allowed.`); - } - await assertPublicHostname(parsedUrl.hostname); - - const response = await fetch(parsedUrl, { - signal: controller.signal, - headers: { "User-Agent": "Clawdbot-Gateway/1.0" }, - redirect: "manual", - }); - - if (isRedirectStatus(response.status)) { - const location = response.headers.get("location"); - if (!location) { - throw new Error(`Redirect missing location header (${response.status})`); - } - redirectCount += 1; - if (redirectCount > params.maxRedirects) { - throw new Error(`Too many redirects (limit: ${params.maxRedirects})`); - } - currentUrl = new URL(location, parsedUrl).toString(); - continue; - } - - if (!response.ok) { - throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`); - } - - const contentLength = response.headers.get("content-length"); - if (contentLength) { - const size = parseInt(contentLength, 10); - if (size > params.maxBytes) { - throw new Error(`Content too large: ${size} bytes (limit: ${params.maxBytes} bytes)`); - } - } - - const buffer = await response.arrayBuffer(); - if (buffer.byteLength > params.maxBytes) { - throw new Error( - `Content too large: ${buffer.byteLength} bytes (limit: ${params.maxBytes} bytes)`, - ); - } - - const contentType = response.headers.get("content-type") || undefined; - const parsed = parseContentType(contentType); - const mimeType = parsed.mimeType ?? "application/octet-stream"; - return { data: Buffer.from(buffer).toString("base64"), mimeType, contentType }; - } - } finally { - clearTimeout(timeoutId); - } -} - -type FileExtractResult = { - filename: string; - text?: string; - images?: ImageContent[]; -}; - -function decodeTextContent(buffer: Buffer, charset: string | undefined): string { - const encoding = charset?.trim().toLowerCase() || "utf-8"; - try { - return new TextDecoder(encoding).decode(buffer); - } catch { - return new TextDecoder("utf-8").decode(buffer); - } -} - -function clampText(text: string, maxChars: number): string { - if (text.length <= maxChars) return text; - return text.slice(0, maxChars); -} - -async function extractPdfContent(params: { - buffer: Buffer; - limits: ResolvedResponsesLimits; -}): Promise<{ text: string; images: ImageContent[] }> { - const { buffer, limits } = params; - const pdfjs = await loadPdfJsModule(); - if (!pdfjs) { - throw new Error("PDF parsing requires pdfjs-dist; install it to enable PDF support."); - } - const pdf = await pdfjs.getDocument({ - data: new Uint8Array(buffer), - disableWorker: true, - }).promise; - const maxPages = Math.min(pdf.numPages, limits.files.pdf.maxPages); - const textParts: string[] = []; - - for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) { - const page = await pdf.getPage(pageNum); - const textContent = await page.getTextContent(); - const pageText = textContent.items - .map((item) => ("str" in item ? String(item.str) : "")) - .filter(Boolean) - .join(" "); - if (pageText) textParts.push(pageText); - } - - const text = textParts.join("\n\n"); - if (text.trim().length >= limits.files.pdf.minTextChars) { - return { text, images: [] }; - } - - const images: ImageContent[] = []; - const canvasModule = await loadCanvasModule(); - if (!canvasModule) { - throw new Error("PDF image extraction requires @napi-rs/canvas; install it to enable images."); - } - for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) { - const page = await pdf.getPage(pageNum); - const viewport = page.getViewport({ scale: 1 }); - const maxPixels = limits.files.pdf.maxPixels; - const pixelBudget = Math.max(1, maxPixels); - const pagePixels = viewport.width * viewport.height; - const scale = Math.min(1, Math.sqrt(pixelBudget / pagePixels)); - const scaled = page.getViewport({ scale: Math.max(0.1, scale) }); - const canvas = canvasModule.createCanvas(Math.ceil(scaled.width), Math.ceil(scaled.height)); - await page.render({ - canvas: canvas as unknown as HTMLCanvasElement, - viewport: scaled, - }).promise; - const png = canvas.toBuffer("image/png"); - images.push({ type: "image", data: png.toString("base64"), mimeType: "image/png" }); - } - - return { text, images }; -} - -async function extractImageContent( - part: ContentPart, - limits: ResolvedResponsesLimits, -): Promise { - if (part.type !== "input_image") return null; - - const source = part.source as { type: string; url?: string; data?: string; media_type?: string }; - - if (source.type === "base64") { - if (!source.data) { - throw new Error("input_image base64 source missing 'data' field"); - } - const mimeType = normalizeMimeType(source.media_type) ?? "image/png"; - if (!limits.images.allowedMimes.has(mimeType)) { - throw new Error(`Unsupported image MIME type: ${mimeType}`); - } - const buffer = Buffer.from(source.data, "base64"); - if (buffer.byteLength > limits.images.maxBytes) { - throw new Error( - `Image too large: ${buffer.byteLength} bytes (limit: ${limits.images.maxBytes} bytes)`, - ); - } - return { type: "image", data: source.data, mimeType }; - } - - if (source.type === "url" && source.url) { - if (!limits.images.allowUrl) { - throw new Error("input_image URL sources are disabled by config"); - } - const result = await fetchWithGuard({ - url: source.url, - maxBytes: limits.images.maxBytes, - timeoutMs: limits.images.timeoutMs, - maxRedirects: limits.images.maxRedirects, - }); - if (!limits.images.allowedMimes.has(result.mimeType)) { - throw new Error(`Unsupported image MIME type from URL: ${result.mimeType}`); - } - return { type: "image", data: result.data, mimeType: result.mimeType }; - } - - throw new Error("input_image must have 'source.url' or 'source.data'"); -} - -async function extractFileContent( - part: ContentPart, - limits: ResolvedResponsesLimits, -): Promise { - if (part.type !== "input_file") return null; - - const source = part.source as { - type: string; - url?: string; - data?: string; - media_type?: string; - filename?: string; - }; - const filename = source.filename || "file"; - - let buffer: Buffer; - let mimeType: string | undefined; - let charset: string | undefined; - - if (source.type === "base64") { - if (!source.data) { - throw new Error("input_file base64 source missing 'data' field"); - } - const parsed = parseContentType(source.media_type); - mimeType = parsed.mimeType; - charset = parsed.charset; - buffer = Buffer.from(source.data, "base64"); - } else if (source.type === "url" && source.url) { - if (!limits.files.allowUrl) { - throw new Error("input_file URL sources are disabled by config"); - } - const result = await fetchWithGuard({ - url: source.url, - maxBytes: limits.files.maxBytes, - timeoutMs: limits.files.timeoutMs, - maxRedirects: limits.files.maxRedirects, - }); - const parsed = parseContentType(result.contentType); - mimeType = parsed.mimeType ?? normalizeMimeType(result.mimeType); - charset = parsed.charset; - buffer = Buffer.from(result.data, "base64"); - } else { - throw new Error("input_file must have 'source.url' or 'source.data'"); - } - - if (buffer.byteLength > limits.files.maxBytes) { - throw new Error( - `File too large: ${buffer.byteLength} bytes (limit: ${limits.files.maxBytes} bytes)`, - ); - } - - if (!mimeType) { - throw new Error("input_file missing media type"); - } - if (!limits.files.allowedMimes.has(mimeType)) { - throw new Error(`Unsupported file MIME type: ${mimeType}`); - } - - if (mimeType === "application/pdf") { - const extracted = await extractPdfContent({ buffer, limits }); - const text = extracted.text ? clampText(extracted.text, limits.files.maxChars) : ""; - return { - filename, - text, - images: extracted.images.length > 0 ? extracted.images : undefined, - }; - } - - const text = clampText(decodeTextContent(buffer, charset), limits.files.maxChars); - return { filename, text }; -} - function extractClientTools(body: CreateResponseBody): ClientToolDefinition[] { return (body.tools ?? []) as ClientToolDefinition[]; } @@ -694,10 +322,7 @@ export async function handleOpenResponsesHttpRequest( if (url.pathname !== "/v1/responses") return false; if (req.method !== "POST") { - res.statusCode = 405; - res.setHeader("Allow", "POST"); - res.setHeader("Content-Type", "text/plain; charset=utf-8"); - res.end("Method Not Allowed"); + sendMethodNotAllowed(res); return true; } @@ -708,9 +333,7 @@ export async function handleOpenResponsesHttpRequest( req, }); if (!authResult.ok) { - sendJson(res, 401, { - error: { message: "Unauthorized", type: "unauthorized" }, - }); + sendUnauthorized(res); return true; } @@ -720,16 +343,11 @@ export async function handleOpenResponsesHttpRequest( (opts.config?.maxBodyBytes ? limits.maxBodyBytes : Math.max(limits.maxBodyBytes, limits.files.maxBytes * 2, limits.images.maxBytes * 2)); - const body = await readJsonBody(req, maxBodyBytes); - if (!body.ok) { - sendJson(res, 400, { - error: { message: body.error, type: "invalid_request_error" }, - }); - return true; - } + const body = await readJsonBodyOrError(req, res, maxBodyBytes); + if (body === undefined) return true; // Validate request body with Zod - const parseResult = CreateResponseBodySchema.safeParse(body.value); + const parseResult = CreateResponseBodySchema.safeParse(body); if (!parseResult.success) { const issue = parseResult.error.issues[0]; const message = issue ? `${issue.path.join(".")}: ${issue.message}` : "Invalid request body"; @@ -752,13 +370,52 @@ export async function handleOpenResponsesHttpRequest( for (const item of payload.input) { if (item.type === "message" && typeof item.content !== "string") { for (const part of item.content) { - const image = await extractImageContent(part, limits); - if (image) { + if (part.type === "input_image") { + const source = part.source as { + type?: string; + url?: string; + data?: string; + media_type?: string; + }; + const sourceType = + source.type === "base64" || source.type === "url" ? source.type : undefined; + if (!sourceType) { + throw new Error("input_image must have 'source.url' or 'source.data'"); + } + const imageSource: InputImageSource = { + type: sourceType, + url: source.url, + data: source.data, + mediaType: source.media_type, + }; + const image = await extractImageContentFromSource(imageSource, limits.images); images.push(image); continue; } - const file = await extractFileContent(part, limits); - if (file) { + + if (part.type === "input_file") { + const source = part.source as { + type?: string; + url?: string; + data?: string; + media_type?: string; + filename?: string; + }; + const sourceType = + source.type === "base64" || source.type === "url" ? source.type : undefined; + if (!sourceType) { + throw new Error("input_file must have 'source.url' or 'source.data'"); + } + const file = await extractFileContentFromSource({ + source: { + type: sourceType, + url: source.url, + data: source.data, + mediaType: source.media_type, + filename: source.filename, + }, + limits: limits.files, + }); if (file.text?.trim()) { fileContexts.push(`\n${file.text}\n`); } else if (file.images && file.images.length > 0) { @@ -923,11 +580,7 @@ export async function handleOpenResponsesHttpRequest( // Streaming mode // ───────────────────────────────────────────────────────────────────────── - res.statusCode = 200; - res.setHeader("Content-Type", "text/event-stream; charset=utf-8"); - res.setHeader("Cache-Control", "no-cache"); - res.setHeader("Connection", "keep-alive"); - res.flushHeaders?.(); + setSseHeaders(res); let accumulatedText = ""; let sawAssistantDelta = false; diff --git a/src/media/input-files.ts b/src/media/input-files.ts new file mode 100644 index 000000000..74aae0c50 --- /dev/null +++ b/src/media/input-files.ts @@ -0,0 +1,385 @@ +import { lookup } from "node:dns/promises"; + +import { createCanvas } from "@napi-rs/canvas"; +import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs"; + +export type InputImageContent = { + type: "image"; + data: string; + mimeType: string; +}; + +export type InputFileExtractResult = { + filename: string; + text?: string; + images?: InputImageContent[]; +}; + +export type InputPdfLimits = { + maxPages: number; + maxPixels: number; + minTextChars: number; +}; + +export type InputFileLimits = { + allowUrl: boolean; + allowedMimes: Set; + maxBytes: number; + maxChars: number; + maxRedirects: number; + timeoutMs: number; + pdf: InputPdfLimits; +}; + +export type InputImageLimits = { + allowUrl: boolean; + allowedMimes: Set; + maxBytes: number; + maxRedirects: number; + timeoutMs: number; +}; + +export type InputImageSource = { + type: "base64" | "url"; + data?: string; + url?: string; + mediaType?: string; +}; + +export type InputFileSource = { + type: "base64" | "url"; + data?: string; + url?: string; + mediaType?: string; + filename?: string; +}; + +export type InputFetchResult = { + buffer: Buffer; + mimeType: string; + contentType?: string; +}; + +export const DEFAULT_INPUT_IMAGE_MIMES = ["image/jpeg", "image/png", "image/gif", "image/webp"]; +export const DEFAULT_INPUT_FILE_MIMES = [ + "text/plain", + "text/markdown", + "text/html", + "text/csv", + "application/json", + "application/pdf", +]; +export const DEFAULT_INPUT_IMAGE_MAX_BYTES = 10 * 1024 * 1024; +export const DEFAULT_INPUT_FILE_MAX_BYTES = 5 * 1024 * 1024; +export const DEFAULT_INPUT_FILE_MAX_CHARS = 200_000; +export const DEFAULT_INPUT_MAX_REDIRECTS = 3; +export const DEFAULT_INPUT_TIMEOUT_MS = 10_000; +export const DEFAULT_INPUT_PDF_MAX_PAGES = 4; +export const DEFAULT_INPUT_PDF_MAX_PIXELS = 4_000_000; +export const DEFAULT_INPUT_PDF_MIN_TEXT_CHARS = 200; + +const PRIVATE_IPV4_PATTERNS = [ + /^127\./, + /^10\./, + /^192\.168\./, + /^172\.(1[6-9]|2[0-9]|3[0-1])\./, + /^0\./, +]; +const PRIVATE_IPV6_PREFIXES = ["::1", "fe80:", "fec0:", "fc", "fd"]; + +function isPrivateIpAddress(address: string): boolean { + if (address.includes(":")) { + const lower = address.toLowerCase(); + if (lower === "::1") return true; + return PRIVATE_IPV6_PREFIXES.some((prefix) => lower.startsWith(prefix)); + } + return PRIVATE_IPV4_PATTERNS.some((pattern) => pattern.test(address)); +} + +function isBlockedHostname(hostname: string): boolean { + const lower = hostname.toLowerCase(); + return ( + lower === "localhost" || + lower.endsWith(".localhost") || + lower.endsWith(".local") || + lower.endsWith(".internal") + ); +} + +async function assertPublicHostname(hostname: string): Promise { + if (isBlockedHostname(hostname)) { + throw new Error(`Blocked hostname: ${hostname}`); + } + + const results = await lookup(hostname, { all: true }); + if (results.length === 0) { + throw new Error(`Unable to resolve hostname: ${hostname}`); + } + for (const entry of results) { + if (isPrivateIpAddress(entry.address)) { + throw new Error(`Private IP addresses are not allowed: ${entry.address}`); + } + } +} + +function isRedirectStatus(status: number): boolean { + return status === 301 || status === 302 || status === 303 || status === 307 || status === 308; +} + +export function normalizeMimeType(value: string | undefined): string | undefined { + if (!value) return undefined; + const [raw] = value.split(";"); + const normalized = raw?.trim().toLowerCase(); + return normalized || undefined; +} + +export function parseContentType(value: string | undefined): { + mimeType?: string; + charset?: string; +} { + if (!value) return {}; + const parts = value.split(";").map((part) => part.trim()); + const mimeType = normalizeMimeType(parts[0]); + const charset = parts + .map((part) => part.match(/^charset=(.+)$/i)?.[1]?.trim()) + .find((part) => part && part.length > 0); + return { mimeType, charset }; +} + +export function normalizeMimeList(values: string[] | undefined, fallback: string[]): Set { + const input = values && values.length > 0 ? values : fallback; + return new Set(input.map((value) => normalizeMimeType(value)).filter(Boolean) as string[]); +} + +export async function fetchWithGuard(params: { + url: string; + maxBytes: number; + timeoutMs: number; + maxRedirects: number; +}): Promise { + let currentUrl = params.url; + let redirectCount = 0; + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), params.timeoutMs); + + try { + while (true) { + const parsedUrl = new URL(currentUrl); + if (!["http:", "https:"].includes(parsedUrl.protocol)) { + throw new Error(`Invalid URL protocol: ${parsedUrl.protocol}. Only HTTP/HTTPS allowed.`); + } + await assertPublicHostname(parsedUrl.hostname); + + const response = await fetch(parsedUrl, { + signal: controller.signal, + headers: { "User-Agent": "Clawdbot-Gateway/1.0" }, + redirect: "manual", + }); + + if (isRedirectStatus(response.status)) { + const location = response.headers.get("location"); + if (!location) { + throw new Error(`Redirect missing location header (${response.status})`); + } + redirectCount += 1; + if (redirectCount > params.maxRedirects) { + throw new Error(`Too many redirects (limit: ${params.maxRedirects})`); + } + currentUrl = new URL(location, parsedUrl).toString(); + continue; + } + + if (!response.ok) { + throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`); + } + + const contentLength = response.headers.get("content-length"); + if (contentLength) { + const size = parseInt(contentLength, 10); + if (size > params.maxBytes) { + throw new Error(`Content too large: ${size} bytes (limit: ${params.maxBytes} bytes)`); + } + } + + const buffer = Buffer.from(await response.arrayBuffer()); + if (buffer.byteLength > params.maxBytes) { + throw new Error( + `Content too large: ${buffer.byteLength} bytes (limit: ${params.maxBytes} bytes)`, + ); + } + + const contentType = response.headers.get("content-type") || undefined; + const parsed = parseContentType(contentType); + const mimeType = parsed.mimeType ?? "application/octet-stream"; + return { buffer, mimeType, contentType }; + } + } finally { + clearTimeout(timeoutId); + } +} + +function decodeTextContent(buffer: Buffer, charset: string | undefined): string { + const encoding = charset?.trim().toLowerCase() || "utf-8"; + try { + return new TextDecoder(encoding).decode(buffer); + } catch { + return new TextDecoder("utf-8").decode(buffer); + } +} + +function clampText(text: string, maxChars: number): string { + if (text.length <= maxChars) return text; + return text.slice(0, maxChars); +} + +async function extractPdfContent(params: { + buffer: Buffer; + limits: InputFileLimits; +}): Promise<{ text: string; images: InputImageContent[] }> { + const { buffer, limits } = params; + const pdf = await getDocument({ + data: new Uint8Array(buffer), + // @ts-expect-error pdfjs-dist legacy option not in current type defs. + disableWorker: true, + }).promise; + const maxPages = Math.min(pdf.numPages, limits.pdf.maxPages); + const textParts: string[] = []; + + for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) { + const page = await pdf.getPage(pageNum); + const textContent = await page.getTextContent(); + const pageText = textContent.items + .map((item) => ("str" in item ? String(item.str) : "")) + .filter(Boolean) + .join(" "); + if (pageText) textParts.push(pageText); + } + + const text = textParts.join("\n\n"); + if (text.trim().length >= limits.pdf.minTextChars) { + return { text, images: [] }; + } + + const images: InputImageContent[] = []; + for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) { + const page = await pdf.getPage(pageNum); + const viewport = page.getViewport({ scale: 1 }); + const maxPixels = limits.pdf.maxPixels; + const pixelBudget = Math.max(1, maxPixels); + const pagePixels = viewport.width * viewport.height; + const scale = Math.min(1, Math.sqrt(pixelBudget / pagePixels)); + const scaled = page.getViewport({ scale: Math.max(0.1, scale) }); + const canvas = createCanvas(Math.ceil(scaled.width), Math.ceil(scaled.height)); + await page.render({ + canvas: canvas as unknown as HTMLCanvasElement, + viewport: scaled, + }).promise; + const png = canvas.toBuffer("image/png"); + images.push({ type: "image", data: png.toString("base64"), mimeType: "image/png" }); + } + + return { text, images }; +} + +export async function extractImageContentFromSource( + source: InputImageSource, + limits: InputImageLimits, +): Promise { + if (source.type === "base64") { + if (!source.data) { + throw new Error("input_image base64 source missing 'data' field"); + } + const mimeType = normalizeMimeType(source.mediaType) ?? "image/png"; + if (!limits.allowedMimes.has(mimeType)) { + throw new Error(`Unsupported image MIME type: ${mimeType}`); + } + const buffer = Buffer.from(source.data, "base64"); + if (buffer.byteLength > limits.maxBytes) { + throw new Error( + `Image too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`, + ); + } + return { type: "image", data: source.data, mimeType }; + } + + if (source.type === "url" && source.url) { + if (!limits.allowUrl) { + throw new Error("input_image URL sources are disabled by config"); + } + const result = await fetchWithGuard({ + url: source.url, + maxBytes: limits.maxBytes, + timeoutMs: limits.timeoutMs, + maxRedirects: limits.maxRedirects, + }); + if (!limits.allowedMimes.has(result.mimeType)) { + throw new Error(`Unsupported image MIME type from URL: ${result.mimeType}`); + } + return { type: "image", data: result.buffer.toString("base64"), mimeType: result.mimeType }; + } + + throw new Error("input_image must have 'source.url' or 'source.data'"); +} + +export async function extractFileContentFromSource(params: { + source: InputFileSource; + limits: InputFileLimits; +}): Promise { + const { source, limits } = params; + const filename = source.filename || "file"; + + let buffer: Buffer; + let mimeType: string | undefined; + let charset: string | undefined; + + if (source.type === "base64") { + if (!source.data) { + throw new Error("input_file base64 source missing 'data' field"); + } + const parsed = parseContentType(source.mediaType); + mimeType = parsed.mimeType; + charset = parsed.charset; + buffer = Buffer.from(source.data, "base64"); + } else if (source.type === "url" && source.url) { + if (!limits.allowUrl) { + throw new Error("input_file URL sources are disabled by config"); + } + const result = await fetchWithGuard({ + url: source.url, + maxBytes: limits.maxBytes, + timeoutMs: limits.timeoutMs, + maxRedirects: limits.maxRedirects, + }); + const parsed = parseContentType(result.contentType); + mimeType = parsed.mimeType ?? normalizeMimeType(result.mimeType); + charset = parsed.charset; + buffer = result.buffer; + } else { + throw new Error("input_file must have 'source.url' or 'source.data'"); + } + + if (buffer.byteLength > limits.maxBytes) { + throw new Error(`File too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`); + } + + if (!mimeType) { + throw new Error("input_file missing media type"); + } + if (!limits.allowedMimes.has(mimeType)) { + throw new Error(`Unsupported file MIME type: ${mimeType}`); + } + + if (mimeType === "application/pdf") { + const extracted = await extractPdfContent({ buffer, limits }); + const text = extracted.text ? clampText(extracted.text, limits.maxChars) : ""; + return { + filename, + text, + images: extracted.images.length > 0 ? extracted.images : undefined, + }; + } + + const text = clampText(decodeTextContent(buffer, charset), limits.maxChars); + return { filename, text }; +}