refactor: share responses input handling

This commit is contained in:
Peter Steinberger
2026-01-20 07:59:25 +00:00
parent e26c647828
commit f06ad4502b
5 changed files with 566 additions and 467 deletions

View File

@@ -173,6 +173,14 @@ Current behavior:
- PDFs are parsed for text. If little text is found, the first pages are rasterized - PDFs are parsed for text. If little text is found, the first pages are rasterized
into images and passed to the model. into images and passed to the model.
PDF parsing uses the Node-friendly `pdfjs-dist` legacy build (no worker). The modern
PDF.js build expects browser workers/DOM globals, so it is not used in the Gateway.
URL fetch defaults:
- `files.allowUrl`: `true`
- `images.allowUrl`: `true`
- Requests are guarded (DNS resolution, private IP blocking, redirect caps, timeouts).
## File + image limits (config) ## File + image limits (config)
Defaults can be tuned under `gateway.http.endpoints.responses`: Defaults can be tuned under `gateway.http.endpoints.responses`:
@@ -212,6 +220,19 @@ Defaults can be tuned under `gateway.http.endpoints.responses`:
} }
``` ```
Defaults when omitted:
- `maxBodyBytes`: 20MB
- `files.maxBytes`: 5MB
- `files.maxChars`: 200k
- `files.maxRedirects`: 3
- `files.timeoutMs`: 10s
- `files.pdf.maxPages`: 4
- `files.pdf.maxPixels`: 4,000,000
- `files.pdf.minTextChars`: 200
- `images.maxBytes`: 10MB
- `images.maxRedirects`: 3
- `images.timeoutMs`: 10s
## Streaming (SSE) ## Streaming (SSE)
Set `stream: true` to receive Server-Sent Events (SSE): Set `stream: true` to receive Server-Sent Events (SSE):

View File

@@ -0,0 +1,57 @@
import type { IncomingMessage, ServerResponse } from "node:http";
import { readJsonBody } from "./hooks.js";
export function sendJson(res: ServerResponse, status: number, body: unknown) {
res.statusCode = status;
res.setHeader("Content-Type", "application/json; charset=utf-8");
res.end(JSON.stringify(body));
}
export function sendText(res: ServerResponse, status: number, body: string) {
res.statusCode = status;
res.setHeader("Content-Type", "text/plain; charset=utf-8");
res.end(body);
}
export function sendMethodNotAllowed(res: ServerResponse, allow = "POST") {
res.setHeader("Allow", allow);
sendText(res, 405, "Method Not Allowed");
}
export function sendUnauthorized(res: ServerResponse) {
sendJson(res, 401, {
error: { message: "Unauthorized", type: "unauthorized" },
});
}
export function sendInvalidRequest(res: ServerResponse, message: string) {
sendJson(res, 400, {
error: { message, type: "invalid_request_error" },
});
}
export async function readJsonBodyOrError(
req: IncomingMessage,
res: ServerResponse,
maxBytes: number,
): Promise<unknown> {
const body = await readJsonBody(req, maxBytes);
if (!body.ok) {
sendInvalidRequest(res, body.error);
return undefined;
}
return body.value;
}
export function writeDone(res: ServerResponse) {
res.write("data: [DONE]\n\n");
}
export function setSseHeaders(res: ServerResponse) {
res.statusCode = 200;
res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
res.flushHeaders?.();
}

View File

@@ -7,8 +7,15 @@ import { agentCommand } from "../commands/agent.js";
import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js"; import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js";
import { defaultRuntime } from "../runtime.js"; import { defaultRuntime } from "../runtime.js";
import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js"; import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js";
import {
readJsonBodyOrError,
sendJson,
sendMethodNotAllowed,
sendUnauthorized,
setSseHeaders,
writeDone,
} from "./http-common.js";
import { getBearerToken, resolveAgentIdForRequest, resolveSessionKey } from "./http-utils.js"; import { getBearerToken, resolveAgentIdForRequest, resolveSessionKey } from "./http-utils.js";
import { readJsonBody } from "./hooks.js";
type OpenAiHttpOptions = { type OpenAiHttpOptions = {
auth: ResolvedGatewayAuth; auth: ResolvedGatewayAuth;
@@ -28,20 +35,10 @@ type OpenAiChatCompletionRequest = {
user?: unknown; user?: unknown;
}; };
function sendJson(res: ServerResponse, status: number, body: unknown) {
res.statusCode = status;
res.setHeader("Content-Type", "application/json; charset=utf-8");
res.end(JSON.stringify(body));
}
function writeSse(res: ServerResponse, data: unknown) { function writeSse(res: ServerResponse, data: unknown) {
res.write(`data: ${JSON.stringify(data)}\n\n`); res.write(`data: ${JSON.stringify(data)}\n\n`);
} }
function writeDone(res: ServerResponse) {
res.write("data: [DONE]\n\n");
}
function asMessages(val: unknown): OpenAiChatMessage[] { function asMessages(val: unknown): OpenAiChatMessage[] {
return Array.isArray(val) ? (val as OpenAiChatMessage[]) : []; return Array.isArray(val) ? (val as OpenAiChatMessage[]) : [];
} }
@@ -162,10 +159,7 @@ export async function handleOpenAiHttpRequest(
if (url.pathname !== "/v1/chat/completions") return false; if (url.pathname !== "/v1/chat/completions") return false;
if (req.method !== "POST") { if (req.method !== "POST") {
res.statusCode = 405; sendMethodNotAllowed(res);
res.setHeader("Allow", "POST");
res.setHeader("Content-Type", "text/plain; charset=utf-8");
res.end("Method Not Allowed");
return true; return true;
} }
@@ -176,21 +170,14 @@ export async function handleOpenAiHttpRequest(
req, req,
}); });
if (!authResult.ok) { if (!authResult.ok) {
sendJson(res, 401, { sendUnauthorized(res);
error: { message: "Unauthorized", type: "unauthorized" },
});
return true; return true;
} }
const body = await readJsonBody(req, opts.maxBodyBytes ?? 1024 * 1024); const body = await readJsonBodyOrError(req, res, opts.maxBodyBytes ?? 1024 * 1024);
if (!body.ok) { if (body === undefined) return true;
sendJson(res, 400, {
error: { message: body.error, type: "invalid_request_error" },
});
return true;
}
const payload = coerceRequest(body.value); const payload = coerceRequest(body);
const stream = Boolean(payload.stream); const stream = Boolean(payload.stream);
const model = typeof payload.model === "string" ? payload.model : "clawdbot"; const model = typeof payload.model === "string" ? payload.model : "clawdbot";
const user = typeof payload.user === "string" ? payload.user : undefined; const user = typeof payload.user === "string" ? payload.user : undefined;
@@ -258,11 +245,7 @@ export async function handleOpenAiHttpRequest(
return true; return true;
} }
res.statusCode = 200; setSseHeaders(res);
res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
res.flushHeaders?.();
let wroteRole = false; let wroteRole = false;
let sawAssistantDelta = false; let sawAssistantDelta = false;

View File

@@ -7,7 +7,6 @@
*/ */
import { randomUUID } from "node:crypto"; import { randomUUID } from "node:crypto";
import { lookup } from "node:dns/promises";
import type { IncomingMessage, ServerResponse } from "node:http"; import type { IncomingMessage, ServerResponse } from "node:http";
import { buildHistoryContextFromEntries, type HistoryEntry } from "../auto-reply/reply/history.js"; import { buildHistoryContextFromEntries, type HistoryEntry } from "../auto-reply/reply/history.js";
@@ -17,7 +16,14 @@ import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js";
import { defaultRuntime } from "../runtime.js"; import { defaultRuntime } from "../runtime.js";
import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js"; import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js";
import { getBearerToken, resolveAgentIdForRequest, resolveSessionKey } from "./http-utils.js"; import { getBearerToken, resolveAgentIdForRequest, resolveSessionKey } from "./http-utils.js";
import { readJsonBody } from "./hooks.js"; import {
readJsonBodyOrError,
sendJson,
sendMethodNotAllowed,
sendUnauthorized,
setSseHeaders,
writeDone,
} from "./http-common.js";
import { import {
CreateResponseBodySchema, CreateResponseBodySchema,
type ContentPart, type ContentPart,
@@ -31,6 +37,24 @@ import {
import type { GatewayHttpResponsesConfig } from "../config/types.gateway.js"; import type { GatewayHttpResponsesConfig } from "../config/types.gateway.js";
import type { ClientToolDefinition } from "../agents/pi-embedded-runner/run/params.js"; import type { ClientToolDefinition } from "../agents/pi-embedded-runner/run/params.js";
import type { ImageContent } from "../commands/agent/types.js"; import type { ImageContent } from "../commands/agent/types.js";
import {
DEFAULT_INPUT_FILE_MAX_BYTES,
DEFAULT_INPUT_FILE_MAX_CHARS,
DEFAULT_INPUT_FILE_MIMES,
DEFAULT_INPUT_IMAGE_MAX_BYTES,
DEFAULT_INPUT_IMAGE_MIMES,
DEFAULT_INPUT_MAX_REDIRECTS,
DEFAULT_INPUT_PDF_MAX_PAGES,
DEFAULT_INPUT_PDF_MAX_PIXELS,
DEFAULT_INPUT_PDF_MIN_TEXT_CHARS,
DEFAULT_INPUT_TIMEOUT_MS,
extractFileContentFromSource,
extractImageContentFromSource,
normalizeMimeList,
type InputFileLimits,
type InputImageLimits,
type InputImageSource,
} from "../media/input-files.js";
type OpenResponsesHttpOptions = { type OpenResponsesHttpOptions = {
auth: ResolvedGatewayAuth; auth: ResolvedGatewayAuth;
@@ -38,31 +62,6 @@ type OpenResponsesHttpOptions = {
config?: GatewayHttpResponsesConfig; config?: GatewayHttpResponsesConfig;
}; };
type CanvasModule = typeof import("@napi-rs/canvas");
type PdfJsModule = typeof import("pdfjs-dist/legacy/build/pdf.mjs");
async function loadCanvasModule(): Promise<CanvasModule | null> {
try {
return await import("@napi-rs/canvas");
} catch {
return null;
}
}
async function loadPdfJsModule(): Promise<PdfJsModule | null> {
try {
return await import("pdfjs-dist/legacy/build/pdf.mjs");
} catch {
return null;
}
}
function sendJson(res: ServerResponse, status: number, body: unknown) {
res.statusCode = status;
res.setHeader("Content-Type", "application/json; charset=utf-8");
res.end(JSON.stringify(body));
}
const DEFAULT_BODY_BYTES = 20 * 1024 * 1024; const DEFAULT_BODY_BYTES = 20 * 1024 * 1024;
function writeSseEvent(res: ServerResponse, event: StreamingEvent) { function writeSseEvent(res: ServerResponse, event: StreamingEvent) {
@@ -70,10 +69,6 @@ function writeSseEvent(res: ServerResponse, event: StreamingEvent) {
res.write(`data: ${JSON.stringify(event)}\n\n`); res.write(`data: ${JSON.stringify(event)}\n\n`);
} }
function writeDone(res: ServerResponse) {
res.write("data: [DONE]\n\n");
}
function extractTextContent(content: string | ContentPart[]): string { function extractTextContent(content: string | ContentPart[]): string {
if (typeof content === "string") return content; if (typeof content === "string") return content;
return content return content
@@ -88,68 +83,10 @@ function extractTextContent(content: string | ContentPart[]): string {
type ResolvedResponsesLimits = { type ResolvedResponsesLimits = {
maxBodyBytes: number; maxBodyBytes: number;
files: { files: InputFileLimits;
allowUrl: boolean; images: InputImageLimits;
allowedMimes: Set<string>;
maxBytes: number;
maxChars: number;
maxRedirects: number;
timeoutMs: number;
pdf: {
maxPages: number;
maxPixels: number;
minTextChars: number;
};
};
images: {
allowUrl: boolean;
allowedMimes: Set<string>;
maxBytes: number;
maxRedirects: number;
timeoutMs: number;
};
}; };
const DEFAULT_IMAGE_MIMES = ["image/jpeg", "image/png", "image/gif", "image/webp"];
const DEFAULT_FILE_MIMES = [
"text/plain",
"text/markdown",
"text/html",
"text/csv",
"application/json",
"application/pdf",
];
const DEFAULT_IMAGE_MAX_BYTES = 10 * 1024 * 1024;
const DEFAULT_FILE_MAX_BYTES = 5 * 1024 * 1024;
const DEFAULT_FILE_MAX_CHARS = 200_000;
const DEFAULT_MAX_REDIRECTS = 3;
const DEFAULT_TIMEOUT_MS = 10_000;
const DEFAULT_PDF_MAX_PAGES = 4;
const DEFAULT_PDF_MAX_PIXELS = 4_000_000;
const DEFAULT_PDF_MIN_TEXT_CHARS = 200;
function normalizeMimeType(value: string | undefined): string | undefined {
if (!value) return undefined;
const [raw] = value.split(";");
const normalized = raw?.trim().toLowerCase();
return normalized || undefined;
}
function parseContentType(value: string | undefined): { mimeType?: string; charset?: string } {
if (!value) return {};
const parts = value.split(";").map((part) => part.trim());
const mimeType = normalizeMimeType(parts[0]);
const charset = parts
.map((part) => part.match(/^charset=(.+)$/i)?.[1]?.trim())
.find((part) => part && part.length > 0);
return { mimeType, charset };
}
function normalizeMimeList(values: string[] | undefined, fallback: string[]): Set<string> {
const input = values && values.length > 0 ? values : fallback;
return new Set(input.map((value) => normalizeMimeType(value)).filter(Boolean) as string[]);
}
function resolveResponsesLimits( function resolveResponsesLimits(
config: GatewayHttpResponsesConfig | undefined, config: GatewayHttpResponsesConfig | undefined,
): ResolvedResponsesLimits { ): ResolvedResponsesLimits {
@@ -159,336 +96,27 @@ function resolveResponsesLimits(
maxBodyBytes: config?.maxBodyBytes ?? DEFAULT_BODY_BYTES, maxBodyBytes: config?.maxBodyBytes ?? DEFAULT_BODY_BYTES,
files: { files: {
allowUrl: files?.allowUrl ?? true, allowUrl: files?.allowUrl ?? true,
allowedMimes: normalizeMimeList(files?.allowedMimes, DEFAULT_FILE_MIMES), allowedMimes: normalizeMimeList(files?.allowedMimes, DEFAULT_INPUT_FILE_MIMES),
maxBytes: files?.maxBytes ?? DEFAULT_FILE_MAX_BYTES, maxBytes: files?.maxBytes ?? DEFAULT_INPUT_FILE_MAX_BYTES,
maxChars: files?.maxChars ?? DEFAULT_FILE_MAX_CHARS, maxChars: files?.maxChars ?? DEFAULT_INPUT_FILE_MAX_CHARS,
maxRedirects: files?.maxRedirects ?? DEFAULT_MAX_REDIRECTS, maxRedirects: files?.maxRedirects ?? DEFAULT_INPUT_MAX_REDIRECTS,
timeoutMs: files?.timeoutMs ?? DEFAULT_TIMEOUT_MS, timeoutMs: files?.timeoutMs ?? DEFAULT_INPUT_TIMEOUT_MS,
pdf: { pdf: {
maxPages: files?.pdf?.maxPages ?? DEFAULT_PDF_MAX_PAGES, maxPages: files?.pdf?.maxPages ?? DEFAULT_INPUT_PDF_MAX_PAGES,
maxPixels: files?.pdf?.maxPixels ?? DEFAULT_PDF_MAX_PIXELS, maxPixels: files?.pdf?.maxPixels ?? DEFAULT_INPUT_PDF_MAX_PIXELS,
minTextChars: files?.pdf?.minTextChars ?? DEFAULT_PDF_MIN_TEXT_CHARS, minTextChars: files?.pdf?.minTextChars ?? DEFAULT_INPUT_PDF_MIN_TEXT_CHARS,
}, },
}, },
images: { images: {
allowUrl: images?.allowUrl ?? true, allowUrl: images?.allowUrl ?? true,
allowedMimes: normalizeMimeList(images?.allowedMimes, DEFAULT_IMAGE_MIMES), allowedMimes: normalizeMimeList(images?.allowedMimes, DEFAULT_INPUT_IMAGE_MIMES),
maxBytes: images?.maxBytes ?? DEFAULT_IMAGE_MAX_BYTES, maxBytes: images?.maxBytes ?? DEFAULT_INPUT_IMAGE_MAX_BYTES,
maxRedirects: images?.maxRedirects ?? DEFAULT_MAX_REDIRECTS, maxRedirects: images?.maxRedirects ?? DEFAULT_INPUT_MAX_REDIRECTS,
timeoutMs: images?.timeoutMs ?? DEFAULT_TIMEOUT_MS, timeoutMs: images?.timeoutMs ?? DEFAULT_INPUT_TIMEOUT_MS,
}, },
}; };
} }
const PRIVATE_IPV4_PATTERNS = [
/^127\./,
/^10\./,
/^192\.168\./,
/^172\.(1[6-9]|2[0-9]|3[0-1])\./,
/^0\./,
];
const PRIVATE_IPV6_PREFIXES = ["::1", "fe80:", "fec0:", "fc", "fd"];
function isPrivateIpAddress(address: string): boolean {
if (address.includes(":")) {
const lower = address.toLowerCase();
if (lower === "::1") return true;
return PRIVATE_IPV6_PREFIXES.some((prefix) => lower.startsWith(prefix));
}
return PRIVATE_IPV4_PATTERNS.some((pattern) => pattern.test(address));
}
function isBlockedHostname(hostname: string): boolean {
const lower = hostname.toLowerCase();
return (
lower === "localhost" ||
lower.endsWith(".localhost") ||
lower.endsWith(".local") ||
lower.endsWith(".internal")
);
}
async function assertPublicHostname(hostname: string): Promise<void> {
if (isBlockedHostname(hostname)) {
throw new Error(`Blocked hostname: ${hostname}`);
}
const results = await lookup(hostname, { all: true });
if (results.length === 0) {
throw new Error(`Unable to resolve hostname: ${hostname}`);
}
for (const entry of results) {
if (isPrivateIpAddress(entry.address)) {
throw new Error(`Private IP addresses are not allowed: ${entry.address}`);
}
}
}
function isRedirectStatus(status: number): boolean {
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
}
// Fetch with SSRF protection, timeout, redirect limits, and size limits.
async function fetchWithGuard(params: {
url: string;
maxBytes: number;
timeoutMs: number;
maxRedirects: number;
}): Promise<{ data: string; mimeType: string; contentType?: string }> {
let currentUrl = params.url;
let redirectCount = 0;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), params.timeoutMs);
try {
while (true) {
const parsedUrl = new URL(currentUrl);
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
throw new Error(`Invalid URL protocol: ${parsedUrl.protocol}. Only HTTP/HTTPS allowed.`);
}
await assertPublicHostname(parsedUrl.hostname);
const response = await fetch(parsedUrl, {
signal: controller.signal,
headers: { "User-Agent": "Clawdbot-Gateway/1.0" },
redirect: "manual",
});
if (isRedirectStatus(response.status)) {
const location = response.headers.get("location");
if (!location) {
throw new Error(`Redirect missing location header (${response.status})`);
}
redirectCount += 1;
if (redirectCount > params.maxRedirects) {
throw new Error(`Too many redirects (limit: ${params.maxRedirects})`);
}
currentUrl = new URL(location, parsedUrl).toString();
continue;
}
if (!response.ok) {
throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
}
const contentLength = response.headers.get("content-length");
if (contentLength) {
const size = parseInt(contentLength, 10);
if (size > params.maxBytes) {
throw new Error(`Content too large: ${size} bytes (limit: ${params.maxBytes} bytes)`);
}
}
const buffer = await response.arrayBuffer();
if (buffer.byteLength > params.maxBytes) {
throw new Error(
`Content too large: ${buffer.byteLength} bytes (limit: ${params.maxBytes} bytes)`,
);
}
const contentType = response.headers.get("content-type") || undefined;
const parsed = parseContentType(contentType);
const mimeType = parsed.mimeType ?? "application/octet-stream";
return { data: Buffer.from(buffer).toString("base64"), mimeType, contentType };
}
} finally {
clearTimeout(timeoutId);
}
}
type FileExtractResult = {
filename: string;
text?: string;
images?: ImageContent[];
};
function decodeTextContent(buffer: Buffer, charset: string | undefined): string {
const encoding = charset?.trim().toLowerCase() || "utf-8";
try {
return new TextDecoder(encoding).decode(buffer);
} catch {
return new TextDecoder("utf-8").decode(buffer);
}
}
function clampText(text: string, maxChars: number): string {
if (text.length <= maxChars) return text;
return text.slice(0, maxChars);
}
async function extractPdfContent(params: {
buffer: Buffer;
limits: ResolvedResponsesLimits;
}): Promise<{ text: string; images: ImageContent[] }> {
const { buffer, limits } = params;
const pdfjs = await loadPdfJsModule();
if (!pdfjs) {
throw new Error("PDF parsing requires pdfjs-dist; install it to enable PDF support.");
}
const pdf = await pdfjs.getDocument({
data: new Uint8Array(buffer),
disableWorker: true,
}).promise;
const maxPages = Math.min(pdf.numPages, limits.files.pdf.maxPages);
const textParts: string[] = [];
for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) {
const page = await pdf.getPage(pageNum);
const textContent = await page.getTextContent();
const pageText = textContent.items
.map((item) => ("str" in item ? String(item.str) : ""))
.filter(Boolean)
.join(" ");
if (pageText) textParts.push(pageText);
}
const text = textParts.join("\n\n");
if (text.trim().length >= limits.files.pdf.minTextChars) {
return { text, images: [] };
}
const images: ImageContent[] = [];
const canvasModule = await loadCanvasModule();
if (!canvasModule) {
throw new Error("PDF image extraction requires @napi-rs/canvas; install it to enable images.");
}
for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) {
const page = await pdf.getPage(pageNum);
const viewport = page.getViewport({ scale: 1 });
const maxPixels = limits.files.pdf.maxPixels;
const pixelBudget = Math.max(1, maxPixels);
const pagePixels = viewport.width * viewport.height;
const scale = Math.min(1, Math.sqrt(pixelBudget / pagePixels));
const scaled = page.getViewport({ scale: Math.max(0.1, scale) });
const canvas = canvasModule.createCanvas(Math.ceil(scaled.width), Math.ceil(scaled.height));
await page.render({
canvas: canvas as unknown as HTMLCanvasElement,
viewport: scaled,
}).promise;
const png = canvas.toBuffer("image/png");
images.push({ type: "image", data: png.toString("base64"), mimeType: "image/png" });
}
return { text, images };
}
async function extractImageContent(
part: ContentPart,
limits: ResolvedResponsesLimits,
): Promise<ImageContent | null> {
if (part.type !== "input_image") return null;
const source = part.source as { type: string; url?: string; data?: string; media_type?: string };
if (source.type === "base64") {
if (!source.data) {
throw new Error("input_image base64 source missing 'data' field");
}
const mimeType = normalizeMimeType(source.media_type) ?? "image/png";
if (!limits.images.allowedMimes.has(mimeType)) {
throw new Error(`Unsupported image MIME type: ${mimeType}`);
}
const buffer = Buffer.from(source.data, "base64");
if (buffer.byteLength > limits.images.maxBytes) {
throw new Error(
`Image too large: ${buffer.byteLength} bytes (limit: ${limits.images.maxBytes} bytes)`,
);
}
return { type: "image", data: source.data, mimeType };
}
if (source.type === "url" && source.url) {
if (!limits.images.allowUrl) {
throw new Error("input_image URL sources are disabled by config");
}
const result = await fetchWithGuard({
url: source.url,
maxBytes: limits.images.maxBytes,
timeoutMs: limits.images.timeoutMs,
maxRedirects: limits.images.maxRedirects,
});
if (!limits.images.allowedMimes.has(result.mimeType)) {
throw new Error(`Unsupported image MIME type from URL: ${result.mimeType}`);
}
return { type: "image", data: result.data, mimeType: result.mimeType };
}
throw new Error("input_image must have 'source.url' or 'source.data'");
}
async function extractFileContent(
part: ContentPart,
limits: ResolvedResponsesLimits,
): Promise<FileExtractResult | null> {
if (part.type !== "input_file") return null;
const source = part.source as {
type: string;
url?: string;
data?: string;
media_type?: string;
filename?: string;
};
const filename = source.filename || "file";
let buffer: Buffer;
let mimeType: string | undefined;
let charset: string | undefined;
if (source.type === "base64") {
if (!source.data) {
throw new Error("input_file base64 source missing 'data' field");
}
const parsed = parseContentType(source.media_type);
mimeType = parsed.mimeType;
charset = parsed.charset;
buffer = Buffer.from(source.data, "base64");
} else if (source.type === "url" && source.url) {
if (!limits.files.allowUrl) {
throw new Error("input_file URL sources are disabled by config");
}
const result = await fetchWithGuard({
url: source.url,
maxBytes: limits.files.maxBytes,
timeoutMs: limits.files.timeoutMs,
maxRedirects: limits.files.maxRedirects,
});
const parsed = parseContentType(result.contentType);
mimeType = parsed.mimeType ?? normalizeMimeType(result.mimeType);
charset = parsed.charset;
buffer = Buffer.from(result.data, "base64");
} else {
throw new Error("input_file must have 'source.url' or 'source.data'");
}
if (buffer.byteLength > limits.files.maxBytes) {
throw new Error(
`File too large: ${buffer.byteLength} bytes (limit: ${limits.files.maxBytes} bytes)`,
);
}
if (!mimeType) {
throw new Error("input_file missing media type");
}
if (!limits.files.allowedMimes.has(mimeType)) {
throw new Error(`Unsupported file MIME type: ${mimeType}`);
}
if (mimeType === "application/pdf") {
const extracted = await extractPdfContent({ buffer, limits });
const text = extracted.text ? clampText(extracted.text, limits.files.maxChars) : "";
return {
filename,
text,
images: extracted.images.length > 0 ? extracted.images : undefined,
};
}
const text = clampText(decodeTextContent(buffer, charset), limits.files.maxChars);
return { filename, text };
}
function extractClientTools(body: CreateResponseBody): ClientToolDefinition[] { function extractClientTools(body: CreateResponseBody): ClientToolDefinition[] {
return (body.tools ?? []) as ClientToolDefinition[]; return (body.tools ?? []) as ClientToolDefinition[];
} }
@@ -694,10 +322,7 @@ export async function handleOpenResponsesHttpRequest(
if (url.pathname !== "/v1/responses") return false; if (url.pathname !== "/v1/responses") return false;
if (req.method !== "POST") { if (req.method !== "POST") {
res.statusCode = 405; sendMethodNotAllowed(res);
res.setHeader("Allow", "POST");
res.setHeader("Content-Type", "text/plain; charset=utf-8");
res.end("Method Not Allowed");
return true; return true;
} }
@@ -708,9 +333,7 @@ export async function handleOpenResponsesHttpRequest(
req, req,
}); });
if (!authResult.ok) { if (!authResult.ok) {
sendJson(res, 401, { sendUnauthorized(res);
error: { message: "Unauthorized", type: "unauthorized" },
});
return true; return true;
} }
@@ -720,16 +343,11 @@ export async function handleOpenResponsesHttpRequest(
(opts.config?.maxBodyBytes (opts.config?.maxBodyBytes
? limits.maxBodyBytes ? limits.maxBodyBytes
: Math.max(limits.maxBodyBytes, limits.files.maxBytes * 2, limits.images.maxBytes * 2)); : Math.max(limits.maxBodyBytes, limits.files.maxBytes * 2, limits.images.maxBytes * 2));
const body = await readJsonBody(req, maxBodyBytes); const body = await readJsonBodyOrError(req, res, maxBodyBytes);
if (!body.ok) { if (body === undefined) return true;
sendJson(res, 400, {
error: { message: body.error, type: "invalid_request_error" },
});
return true;
}
// Validate request body with Zod // Validate request body with Zod
const parseResult = CreateResponseBodySchema.safeParse(body.value); const parseResult = CreateResponseBodySchema.safeParse(body);
if (!parseResult.success) { if (!parseResult.success) {
const issue = parseResult.error.issues[0]; const issue = parseResult.error.issues[0];
const message = issue ? `${issue.path.join(".")}: ${issue.message}` : "Invalid request body"; const message = issue ? `${issue.path.join(".")}: ${issue.message}` : "Invalid request body";
@@ -752,13 +370,52 @@ export async function handleOpenResponsesHttpRequest(
for (const item of payload.input) { for (const item of payload.input) {
if (item.type === "message" && typeof item.content !== "string") { if (item.type === "message" && typeof item.content !== "string") {
for (const part of item.content) { for (const part of item.content) {
const image = await extractImageContent(part, limits); if (part.type === "input_image") {
if (image) { const source = part.source as {
type?: string;
url?: string;
data?: string;
media_type?: string;
};
const sourceType =
source.type === "base64" || source.type === "url" ? source.type : undefined;
if (!sourceType) {
throw new Error("input_image must have 'source.url' or 'source.data'");
}
const imageSource: InputImageSource = {
type: sourceType,
url: source.url,
data: source.data,
mediaType: source.media_type,
};
const image = await extractImageContentFromSource(imageSource, limits.images);
images.push(image); images.push(image);
continue; continue;
} }
const file = await extractFileContent(part, limits);
if (file) { if (part.type === "input_file") {
const source = part.source as {
type?: string;
url?: string;
data?: string;
media_type?: string;
filename?: string;
};
const sourceType =
source.type === "base64" || source.type === "url" ? source.type : undefined;
if (!sourceType) {
throw new Error("input_file must have 'source.url' or 'source.data'");
}
const file = await extractFileContentFromSource({
source: {
type: sourceType,
url: source.url,
data: source.data,
mediaType: source.media_type,
filename: source.filename,
},
limits: limits.files,
});
if (file.text?.trim()) { if (file.text?.trim()) {
fileContexts.push(`<file name="${file.filename}">\n${file.text}\n</file>`); fileContexts.push(`<file name="${file.filename}">\n${file.text}\n</file>`);
} else if (file.images && file.images.length > 0) { } else if (file.images && file.images.length > 0) {
@@ -923,11 +580,7 @@ export async function handleOpenResponsesHttpRequest(
// Streaming mode // Streaming mode
// ───────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────
res.statusCode = 200; setSseHeaders(res);
res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
res.flushHeaders?.();
let accumulatedText = ""; let accumulatedText = "";
let sawAssistantDelta = false; let sawAssistantDelta = false;

385
src/media/input-files.ts Normal file
View File

@@ -0,0 +1,385 @@
import { lookup } from "node:dns/promises";
import { createCanvas } from "@napi-rs/canvas";
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
export type InputImageContent = {
type: "image";
data: string;
mimeType: string;
};
export type InputFileExtractResult = {
filename: string;
text?: string;
images?: InputImageContent[];
};
export type InputPdfLimits = {
maxPages: number;
maxPixels: number;
minTextChars: number;
};
export type InputFileLimits = {
allowUrl: boolean;
allowedMimes: Set<string>;
maxBytes: number;
maxChars: number;
maxRedirects: number;
timeoutMs: number;
pdf: InputPdfLimits;
};
export type InputImageLimits = {
allowUrl: boolean;
allowedMimes: Set<string>;
maxBytes: number;
maxRedirects: number;
timeoutMs: number;
};
export type InputImageSource = {
type: "base64" | "url";
data?: string;
url?: string;
mediaType?: string;
};
export type InputFileSource = {
type: "base64" | "url";
data?: string;
url?: string;
mediaType?: string;
filename?: string;
};
export type InputFetchResult = {
buffer: Buffer;
mimeType: string;
contentType?: string;
};
export const DEFAULT_INPUT_IMAGE_MIMES = ["image/jpeg", "image/png", "image/gif", "image/webp"];
export const DEFAULT_INPUT_FILE_MIMES = [
"text/plain",
"text/markdown",
"text/html",
"text/csv",
"application/json",
"application/pdf",
];
export const DEFAULT_INPUT_IMAGE_MAX_BYTES = 10 * 1024 * 1024;
export const DEFAULT_INPUT_FILE_MAX_BYTES = 5 * 1024 * 1024;
export const DEFAULT_INPUT_FILE_MAX_CHARS = 200_000;
export const DEFAULT_INPUT_MAX_REDIRECTS = 3;
export const DEFAULT_INPUT_TIMEOUT_MS = 10_000;
export const DEFAULT_INPUT_PDF_MAX_PAGES = 4;
export const DEFAULT_INPUT_PDF_MAX_PIXELS = 4_000_000;
export const DEFAULT_INPUT_PDF_MIN_TEXT_CHARS = 200;
const PRIVATE_IPV4_PATTERNS = [
/^127\./,
/^10\./,
/^192\.168\./,
/^172\.(1[6-9]|2[0-9]|3[0-1])\./,
/^0\./,
];
const PRIVATE_IPV6_PREFIXES = ["::1", "fe80:", "fec0:", "fc", "fd"];
function isPrivateIpAddress(address: string): boolean {
if (address.includes(":")) {
const lower = address.toLowerCase();
if (lower === "::1") return true;
return PRIVATE_IPV6_PREFIXES.some((prefix) => lower.startsWith(prefix));
}
return PRIVATE_IPV4_PATTERNS.some((pattern) => pattern.test(address));
}
function isBlockedHostname(hostname: string): boolean {
const lower = hostname.toLowerCase();
return (
lower === "localhost" ||
lower.endsWith(".localhost") ||
lower.endsWith(".local") ||
lower.endsWith(".internal")
);
}
async function assertPublicHostname(hostname: string): Promise<void> {
if (isBlockedHostname(hostname)) {
throw new Error(`Blocked hostname: ${hostname}`);
}
const results = await lookup(hostname, { all: true });
if (results.length === 0) {
throw new Error(`Unable to resolve hostname: ${hostname}`);
}
for (const entry of results) {
if (isPrivateIpAddress(entry.address)) {
throw new Error(`Private IP addresses are not allowed: ${entry.address}`);
}
}
}
function isRedirectStatus(status: number): boolean {
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
}
export function normalizeMimeType(value: string | undefined): string | undefined {
if (!value) return undefined;
const [raw] = value.split(";");
const normalized = raw?.trim().toLowerCase();
return normalized || undefined;
}
export function parseContentType(value: string | undefined): {
mimeType?: string;
charset?: string;
} {
if (!value) return {};
const parts = value.split(";").map((part) => part.trim());
const mimeType = normalizeMimeType(parts[0]);
const charset = parts
.map((part) => part.match(/^charset=(.+)$/i)?.[1]?.trim())
.find((part) => part && part.length > 0);
return { mimeType, charset };
}
export function normalizeMimeList(values: string[] | undefined, fallback: string[]): Set<string> {
const input = values && values.length > 0 ? values : fallback;
return new Set(input.map((value) => normalizeMimeType(value)).filter(Boolean) as string[]);
}
export async function fetchWithGuard(params: {
url: string;
maxBytes: number;
timeoutMs: number;
maxRedirects: number;
}): Promise<InputFetchResult> {
let currentUrl = params.url;
let redirectCount = 0;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), params.timeoutMs);
try {
while (true) {
const parsedUrl = new URL(currentUrl);
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
throw new Error(`Invalid URL protocol: ${parsedUrl.protocol}. Only HTTP/HTTPS allowed.`);
}
await assertPublicHostname(parsedUrl.hostname);
const response = await fetch(parsedUrl, {
signal: controller.signal,
headers: { "User-Agent": "Clawdbot-Gateway/1.0" },
redirect: "manual",
});
if (isRedirectStatus(response.status)) {
const location = response.headers.get("location");
if (!location) {
throw new Error(`Redirect missing location header (${response.status})`);
}
redirectCount += 1;
if (redirectCount > params.maxRedirects) {
throw new Error(`Too many redirects (limit: ${params.maxRedirects})`);
}
currentUrl = new URL(location, parsedUrl).toString();
continue;
}
if (!response.ok) {
throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
}
const contentLength = response.headers.get("content-length");
if (contentLength) {
const size = parseInt(contentLength, 10);
if (size > params.maxBytes) {
throw new Error(`Content too large: ${size} bytes (limit: ${params.maxBytes} bytes)`);
}
}
const buffer = Buffer.from(await response.arrayBuffer());
if (buffer.byteLength > params.maxBytes) {
throw new Error(
`Content too large: ${buffer.byteLength} bytes (limit: ${params.maxBytes} bytes)`,
);
}
const contentType = response.headers.get("content-type") || undefined;
const parsed = parseContentType(contentType);
const mimeType = parsed.mimeType ?? "application/octet-stream";
return { buffer, mimeType, contentType };
}
} finally {
clearTimeout(timeoutId);
}
}
function decodeTextContent(buffer: Buffer, charset: string | undefined): string {
const encoding = charset?.trim().toLowerCase() || "utf-8";
try {
return new TextDecoder(encoding).decode(buffer);
} catch {
return new TextDecoder("utf-8").decode(buffer);
}
}
function clampText(text: string, maxChars: number): string {
if (text.length <= maxChars) return text;
return text.slice(0, maxChars);
}
async function extractPdfContent(params: {
buffer: Buffer;
limits: InputFileLimits;
}): Promise<{ text: string; images: InputImageContent[] }> {
const { buffer, limits } = params;
const pdf = await getDocument({
data: new Uint8Array(buffer),
// @ts-expect-error pdfjs-dist legacy option not in current type defs.
disableWorker: true,
}).promise;
const maxPages = Math.min(pdf.numPages, limits.pdf.maxPages);
const textParts: string[] = [];
for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) {
const page = await pdf.getPage(pageNum);
const textContent = await page.getTextContent();
const pageText = textContent.items
.map((item) => ("str" in item ? String(item.str) : ""))
.filter(Boolean)
.join(" ");
if (pageText) textParts.push(pageText);
}
const text = textParts.join("\n\n");
if (text.trim().length >= limits.pdf.minTextChars) {
return { text, images: [] };
}
const images: InputImageContent[] = [];
for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) {
const page = await pdf.getPage(pageNum);
const viewport = page.getViewport({ scale: 1 });
const maxPixels = limits.pdf.maxPixels;
const pixelBudget = Math.max(1, maxPixels);
const pagePixels = viewport.width * viewport.height;
const scale = Math.min(1, Math.sqrt(pixelBudget / pagePixels));
const scaled = page.getViewport({ scale: Math.max(0.1, scale) });
const canvas = createCanvas(Math.ceil(scaled.width), Math.ceil(scaled.height));
await page.render({
canvas: canvas as unknown as HTMLCanvasElement,
viewport: scaled,
}).promise;
const png = canvas.toBuffer("image/png");
images.push({ type: "image", data: png.toString("base64"), mimeType: "image/png" });
}
return { text, images };
}
export async function extractImageContentFromSource(
source: InputImageSource,
limits: InputImageLimits,
): Promise<InputImageContent> {
if (source.type === "base64") {
if (!source.data) {
throw new Error("input_image base64 source missing 'data' field");
}
const mimeType = normalizeMimeType(source.mediaType) ?? "image/png";
if (!limits.allowedMimes.has(mimeType)) {
throw new Error(`Unsupported image MIME type: ${mimeType}`);
}
const buffer = Buffer.from(source.data, "base64");
if (buffer.byteLength > limits.maxBytes) {
throw new Error(
`Image too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`,
);
}
return { type: "image", data: source.data, mimeType };
}
if (source.type === "url" && source.url) {
if (!limits.allowUrl) {
throw new Error("input_image URL sources are disabled by config");
}
const result = await fetchWithGuard({
url: source.url,
maxBytes: limits.maxBytes,
timeoutMs: limits.timeoutMs,
maxRedirects: limits.maxRedirects,
});
if (!limits.allowedMimes.has(result.mimeType)) {
throw new Error(`Unsupported image MIME type from URL: ${result.mimeType}`);
}
return { type: "image", data: result.buffer.toString("base64"), mimeType: result.mimeType };
}
throw new Error("input_image must have 'source.url' or 'source.data'");
}
export async function extractFileContentFromSource(params: {
source: InputFileSource;
limits: InputFileLimits;
}): Promise<InputFileExtractResult> {
const { source, limits } = params;
const filename = source.filename || "file";
let buffer: Buffer;
let mimeType: string | undefined;
let charset: string | undefined;
if (source.type === "base64") {
if (!source.data) {
throw new Error("input_file base64 source missing 'data' field");
}
const parsed = parseContentType(source.mediaType);
mimeType = parsed.mimeType;
charset = parsed.charset;
buffer = Buffer.from(source.data, "base64");
} else if (source.type === "url" && source.url) {
if (!limits.allowUrl) {
throw new Error("input_file URL sources are disabled by config");
}
const result = await fetchWithGuard({
url: source.url,
maxBytes: limits.maxBytes,
timeoutMs: limits.timeoutMs,
maxRedirects: limits.maxRedirects,
});
const parsed = parseContentType(result.contentType);
mimeType = parsed.mimeType ?? normalizeMimeType(result.mimeType);
charset = parsed.charset;
buffer = result.buffer;
} else {
throw new Error("input_file must have 'source.url' or 'source.data'");
}
if (buffer.byteLength > limits.maxBytes) {
throw new Error(`File too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`);
}
if (!mimeType) {
throw new Error("input_file missing media type");
}
if (!limits.allowedMimes.has(mimeType)) {
throw new Error(`Unsupported file MIME type: ${mimeType}`);
}
if (mimeType === "application/pdf") {
const extracted = await extractPdfContent({ buffer, limits });
const text = extracted.text ? clampText(extracted.text, limits.maxChars) : "";
return {
filename,
text,
images: extracted.images.length > 0 ? extracted.images : undefined,
};
}
const text = clampText(decodeTextContent(buffer, charset), limits.maxChars);
return { filename, text };
}