diff --git a/CHANGELOG.md b/CHANGELOG.md index e5a68c047..f229bbca0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Docs: https://docs.clawd.bot ### Changes - Android: remove legacy bridge transport code now that nodes use the gateway protocol. - Android: send structured payloads in node events/invokes and include user-agent metadata in gateway connects. +- Gateway: expand `/v1/responses` to support file/image inputs, tool_choice, usage, and output limits. (#1229) — thanks @RyanLisse. - Docs: surface Amazon Bedrock in provider lists and clarify Bedrock auth env vars. (#1289) — thanks @steipete. ### Fixes diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 0d7f7ce74..e89af4b54 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -2669,6 +2669,7 @@ Notes: - `clawdbot gateway` refuses to start unless `gateway.mode` is set to `local` (or you pass the override flag). - `gateway.port` controls the single multiplexed port used for WebSocket + HTTP (control UI, hooks, A2UI). - OpenAI Chat Completions endpoint: **disabled by default**; enable with `gateway.http.endpoints.chatCompletions.enabled: true`. +- OpenResponses endpoint: **disabled by default**; enable with `gateway.http.endpoints.responses.enabled: true`. - Precedence: `--port` > `CLAWDBOT_GATEWAY_PORT` > `gateway.port` > default `18789`. - Non-loopback binds (`lan`/`tailnet`/`auto`) require auth. Use `gateway.auth.token` (or `CLAWDBOT_GATEWAY_TOKEN`). - The onboarding wizard generates a gateway token by default (even on loopback). diff --git a/docs/gateway/index.md b/docs/gateway/index.md index fb19a1263..83ed93952 100644 --- a/docs/gateway/index.md +++ b/docs/gateway/index.md @@ -29,6 +29,7 @@ pnpm gateway:watch - Binds WebSocket control plane to `127.0.0.1:` (default 18789). - The same port also serves HTTP (control UI, hooks, A2UI). Single-port multiplex. - OpenAI Chat Completions (HTTP): [`/v1/chat/completions`](/gateway/openai-http-api). + - OpenResponses (HTTP): [`/v1/responses`](/gateway/openresponses-http-api). - Starts a Canvas file server by default on `canvasHost.port` (default `18793`), serving `http://:18793/__clawdbot__/canvas/` from `~/clawd/canvas`. Disable with `canvasHost.enabled=false` or `CLAWDBOT_SKIP_CANVAS_HOST=1`. - Logs to stdout; use launchd/systemd to keep it alive and rotate logs. - Pass `--verbose` to mirror debug logging (handshakes, req/res, events) from the log file into stdio when troubleshooting. diff --git a/docs/gateway/openresponses-http-api.md b/docs/gateway/openresponses-http-api.md new file mode 100644 index 000000000..5abf1f445 --- /dev/null +++ b/docs/gateway/openresponses-http-api.md @@ -0,0 +1,277 @@ +--- +summary: "Expose an OpenResponses-compatible /v1/responses HTTP endpoint from the Gateway" +read_when: + - Integrating clients that speak the OpenResponses API + - You want item-based inputs, client tool calls, or SSE events +--- +# OpenResponses API (HTTP) + +Clawdbot’s Gateway can serve an OpenResponses-compatible `POST /v1/responses` endpoint. + +This endpoint is **disabled by default**. Enable it in config first. + +- `POST /v1/responses` +- Same port as the Gateway (WS + HTTP multiplex): `http://:/v1/responses` + +Under the hood, requests are executed as a normal Gateway agent run (same codepath as +`clawdbot agent`), so routing/permissions/config match your Gateway. + +## Authentication + +Uses the Gateway auth configuration. Send a bearer token: + +- `Authorization: Bearer ` + +Notes: +- When `gateway.auth.mode="token"`, use `gateway.auth.token` (or `CLAWDBOT_GATEWAY_TOKEN`). +- When `gateway.auth.mode="password"`, use `gateway.auth.password` (or `CLAWDBOT_GATEWAY_PASSWORD`). + +## Choosing an agent + +No custom headers required: encode the agent id in the OpenResponses `model` field: + +- `model: "clawdbot:"` (example: `"clawdbot:main"`, `"clawdbot:beta"`) +- `model: "agent:"` (alias) + +Or target a specific Clawdbot agent by header: + +- `x-clawdbot-agent-id: ` (default: `main`) + +Advanced: +- `x-clawdbot-session-key: ` to fully control session routing. + +## Enabling the endpoint + +Set `gateway.http.endpoints.responses.enabled` to `true`: + +```json5 +{ + gateway: { + http: { + endpoints: { + responses: { enabled: true } + } + } + } +} +``` + +## Disabling the endpoint + +Set `gateway.http.endpoints.responses.enabled` to `false`: + +```json5 +{ + gateway: { + http: { + endpoints: { + responses: { enabled: false } + } + } + } +} +``` + +## Session behavior + +By default the endpoint is **stateless per request** (a new session key is generated each call). + +If the request includes an OpenResponses `user` string, the Gateway derives a stable session key +from it, so repeated calls can share an agent session. + +## Request shape (supported) + +The request follows the OpenResponses API with item-based input. Current support: + +- `input`: string or array of item objects. +- `instructions`: merged into the system prompt. +- `tools`: client tool definitions (function tools). +- `tool_choice`: filter or require client tools. +- `stream`: enables SSE streaming. +- `max_output_tokens`: best-effort output limit (provider dependent). +- `user`: stable session routing. + +Accepted but **currently ignored**: + +- `max_tool_calls` +- `reasoning` +- `metadata` +- `store` +- `previous_response_id` +- `truncation` + +## Items (input) + +### `message` +Roles: `system`, `developer`, `user`, `assistant`. + +- `system` and `developer` are appended to the system prompt. +- The most recent `user` or `function_call_output` item becomes the “current message.” +- Earlier user/assistant messages are included as history for context. + +### `function_call_output` (turn-based tools) + +Send tool results back to the model: + +```json +{ + "type": "function_call_output", + "call_id": "call_123", + "output": "{\"temperature\": \"72F\"}" +} +``` + +### `reasoning` and `item_reference` + +Accepted for schema compatibility but ignored when building the prompt. + +## Tools (client-side function tools) + +Provide tools with `tools: [{ type: "function", function: { name, description?, parameters? } }]`. + +If the agent decides to call a tool, the response returns a `function_call` output item. +You then send a follow-up request with `function_call_output` to continue the turn. + +## Images (`input_image`) + +Supports base64 or URL sources: + +```json +{ + "type": "input_image", + "source": { "type": "url", "url": "https://example.com/image.png" } +} +``` + +Allowed MIME types (current): `image/jpeg`, `image/png`, `image/gif`, `image/webp`. +Max size (current): 10MB. + +## Files (`input_file`) + +Supports base64 or URL sources: + +```json +{ + "type": "input_file", + "source": { + "type": "base64", + "media_type": "text/plain", + "data": "SGVsbG8gV29ybGQh", + "filename": "hello.txt" + } +} +``` + +Allowed MIME types (current): `text/plain`, `text/markdown`, `text/html`, `text/csv`, +`application/json`, `application/pdf`. + +Max size (current): 5MB. + +Current behavior: +- File content is decoded and added to the **system prompt**, not the user message, + so it stays ephemeral (not persisted in session history). +- PDFs are parsed for text. If little text is found, the first pages are rasterized + into images and passed to the model. + +## File + image limits (config) + +Defaults can be tuned under `gateway.http.endpoints.responses`: + +```json5 +{ + gateway: { + http: { + endpoints: { + responses: { + enabled: true, + maxBodyBytes: 20000000, + files: { + allowUrl: true, + allowedMimes: ["text/plain", "text/markdown", "text/html", "text/csv", "application/json", "application/pdf"], + maxBytes: 5242880, + maxChars: 200000, + maxRedirects: 3, + timeoutMs: 10000, + pdf: { + maxPages: 4, + maxPixels: 4000000, + minTextChars: 200 + } + }, + images: { + allowUrl: true, + allowedMimes: ["image/jpeg", "image/png", "image/gif", "image/webp"], + maxBytes: 10485760, + maxRedirects: 3, + timeoutMs: 10000 + } + } + } + } + } +} +``` + +## Streaming (SSE) + +Set `stream: true` to receive Server-Sent Events (SSE): + +- `Content-Type: text/event-stream` +- Each event line is `event: ` and `data: ` +- Stream ends with `data: [DONE]` + +Event types currently emitted: +- `response.created` +- `response.in_progress` +- `response.output_item.added` +- `response.content_part.added` +- `response.output_text.delta` +- `response.output_text.done` +- `response.content_part.done` +- `response.output_item.done` +- `response.completed` +- `response.failed` (on error) + +## Usage + +`usage` is populated when the underlying provider reports token counts. + +## Errors + +Errors use a JSON object like: + +```json +{ "error": { "message": "...", "type": "invalid_request_error" } } +``` + +Common cases: +- `401` missing/invalid auth +- `400` invalid request body +- `405` wrong method + +## Examples + +Non-streaming: +```bash +curl -sS http://127.0.0.1:18789/v1/responses \ + -H 'Authorization: Bearer YOUR_TOKEN' \ + -H 'Content-Type: application/json' \ + -H 'x-clawdbot-agent-id: main' \ + -d '{ + "model": "clawdbot", + "input": "hi" + }' +``` + +Streaming: +```bash +curl -N http://127.0.0.1:18789/v1/responses \ + -H 'Authorization: Bearer YOUR_TOKEN' \ + -H 'Content-Type: application/json' \ + -H 'x-clawdbot-agent-id: main' \ + -d '{ + "model": "clawdbot", + "stream": true, + "input": "hi" + }' +``` diff --git a/package.json b/package.json index bf274dee1..c31058980 100644 --- a/package.json +++ b/package.json @@ -155,6 +155,7 @@ "@mariozechner/pi-coding-agent": "^0.46.0", "@mariozechner/pi-tui": "^0.46.0", "@mozilla/readability": "^0.6.0", + "@napi-rs/canvas": "^0.1.88", "@sinclair/typebox": "0.34.47", "@slack/bolt": "^4.6.0", "@slack/web-api": "^7.13.0", @@ -181,6 +182,7 @@ "long": "5.3.2", "markdown-it": "^14.1.0", "osc-progress": "^0.2.0", + "pdfjs-dist": "^5.4.530", "playwright-core": "1.57.0", "proper-lockfile": "^4.1.2", "qrcode-terminal": "^0.12.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c60188274..23e901215 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -49,6 +49,9 @@ importers: '@mozilla/readability': specifier: ^0.6.0 version: 0.6.0 + '@napi-rs/canvas': + specifier: ^0.1.88 + version: 0.1.88 '@sinclair/typebox': specifier: 0.34.47 version: 0.34.47 @@ -127,6 +130,9 @@ importers: osc-progress: specifier: ^0.2.0 version: 0.2.0 + pdfjs-dist: + specifier: ^5.4.530 + version: 5.4.530 playwright-core: specifier: 1.57.0 version: 1.57.0 @@ -1205,6 +1211,76 @@ packages: resolution: {integrity: sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ==} engines: {node: '>=14.0.0'} + '@napi-rs/canvas-android-arm64@0.1.88': + resolution: {integrity: sha512-KEaClPnZuVxJ8smUWjV1wWFkByBO/D+vy4lN+Dm5DFH514oqwukxKGeck9xcKJhaWJGjfruGmYGiwRe//+/zQQ==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [android] + + '@napi-rs/canvas-darwin-arm64@0.1.88': + resolution: {integrity: sha512-Xgywz0dDxOKSgx3eZnK85WgGMmGrQEW7ZLA/E7raZdlEE+xXCozobgqz2ZvYigpB6DJFYkqnwHjqCOTSDGlFdg==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [darwin] + + '@napi-rs/canvas-darwin-x64@0.1.88': + resolution: {integrity: sha512-Yz4wSCIQOUgNucgk+8NFtQxQxZV5NO8VKRl9ePKE6XoNyNVC8JDqtvhh3b3TPqKK8W5p2EQpAr1rjjm0mfBxdg==} + engines: {node: '>= 10'} + cpu: [x64] + os: [darwin] + + '@napi-rs/canvas-linux-arm-gnueabihf@0.1.88': + resolution: {integrity: sha512-9gQM2SlTo76hYhxHi2XxWTAqpTOb+JtxMPEIr+H5nAhHhyEtNmTSDRtz93SP7mGd2G3Ojf2oF5tP9OdgtgXyKg==} + engines: {node: '>= 10'} + cpu: [arm] + os: [linux] + + '@napi-rs/canvas-linux-arm64-gnu@0.1.88': + resolution: {integrity: sha512-7qgaOBMXuVRk9Fzztzr3BchQKXDxGbY+nwsovD3I/Sx81e+sX0ReEDYHTItNb0Je4NHbAl7D0MKyd4SvUc04sg==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [linux] + + '@napi-rs/canvas-linux-arm64-musl@0.1.88': + resolution: {integrity: sha512-kYyNrUsHLkoGHBc77u4Unh067GrfiCUMbGHC2+OTxbeWfZkPt2o32UOQkhnSswKd9Fko/wSqqGkY956bIUzruA==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [linux] + + '@napi-rs/canvas-linux-riscv64-gnu@0.1.88': + resolution: {integrity: sha512-HVuH7QgzB0yavYdNZDRyAsn/ejoXB0hn8twwFnOqUbCCdkV+REna7RXjSR7+PdfW0qMQ2YYWsLvVBT5iL/mGpw==} + engines: {node: '>= 10'} + cpu: [riscv64] + os: [linux] + + '@napi-rs/canvas-linux-x64-gnu@0.1.88': + resolution: {integrity: sha512-hvcvKIcPEQrvvJtJnwD35B3qk6umFJ8dFIr8bSymfrSMem0EQsfn1ztys8ETIFndTwdNWJKWluvxztA41ivsEw==} + engines: {node: '>= 10'} + cpu: [x64] + os: [linux] + + '@napi-rs/canvas-linux-x64-musl@0.1.88': + resolution: {integrity: sha512-eSMpGYY2xnZSQ6UxYJ6plDboxq4KeJ4zT5HaVkUnbObNN6DlbJe0Mclh3wifAmquXfrlgTZt6zhHsUgz++AK6g==} + engines: {node: '>= 10'} + cpu: [x64] + os: [linux] + + '@napi-rs/canvas-win32-arm64-msvc@0.1.88': + resolution: {integrity: sha512-qcIFfEgHrchyYqRrxsCeTQgpJZ/GqHiqPcU/Fvw/ARVlQeDX1VyFH+X+0gCR2tca6UJrq96vnW+5o7buCq+erA==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [win32] + + '@napi-rs/canvas-win32-x64-msvc@0.1.88': + resolution: {integrity: sha512-ROVqbfS4QyZxYkqmaIBBpbz/BQvAR+05FXM5PAtTYVc0uyY8Y4BHJSMdGAaMf6TdIVRsQsiq+FG/dH9XhvWCFQ==} + engines: {node: '>= 10'} + cpu: [x64] + os: [win32] + + '@napi-rs/canvas@0.1.88': + resolution: {integrity: sha512-/p08f93LEbsL5mDZFQ3DBxcPv/I4QG9EDYRRq1WNlCOXVfAHBTHMSVMwxlqG/AtnSfUr9+vgfN7MKiyDo0+Weg==} + engines: {node: '>= 10'} + '@napi-rs/wasm-runtime@1.1.1': resolution: {integrity: sha512-p64ah1M1ld8xjWv3qbvFwHiFVWrq1yFvV4f7w+mzaqiR4IlSgkqhcRdHwsGgomwzBH51sRY4NEowLxnaBjcW/A==} @@ -3756,6 +3832,10 @@ packages: pathe@2.0.3: resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==} + pdfjs-dist@5.4.530: + resolution: {integrity: sha512-r1hWsSIGGmyYUAHR26zSXkxYWLXLMd6AwqcaFYG9YUZ0GBf5GvcjJSeo512tabM4GYFhxhl5pMCmPr7Q72Rq2Q==} + engines: {node: '>=20.16.0 || >=22.3.0'} + picocolors@1.1.1: resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} @@ -5671,6 +5751,53 @@ snapshots: '@mozilla/readability@0.6.0': {} + '@napi-rs/canvas-android-arm64@0.1.88': + optional: true + + '@napi-rs/canvas-darwin-arm64@0.1.88': + optional: true + + '@napi-rs/canvas-darwin-x64@0.1.88': + optional: true + + '@napi-rs/canvas-linux-arm-gnueabihf@0.1.88': + optional: true + + '@napi-rs/canvas-linux-arm64-gnu@0.1.88': + optional: true + + '@napi-rs/canvas-linux-arm64-musl@0.1.88': + optional: true + + '@napi-rs/canvas-linux-riscv64-gnu@0.1.88': + optional: true + + '@napi-rs/canvas-linux-x64-gnu@0.1.88': + optional: true + + '@napi-rs/canvas-linux-x64-musl@0.1.88': + optional: true + + '@napi-rs/canvas-win32-arm64-msvc@0.1.88': + optional: true + + '@napi-rs/canvas-win32-x64-msvc@0.1.88': + optional: true + + '@napi-rs/canvas@0.1.88': + optionalDependencies: + '@napi-rs/canvas-android-arm64': 0.1.88 + '@napi-rs/canvas-darwin-arm64': 0.1.88 + '@napi-rs/canvas-darwin-x64': 0.1.88 + '@napi-rs/canvas-linux-arm-gnueabihf': 0.1.88 + '@napi-rs/canvas-linux-arm64-gnu': 0.1.88 + '@napi-rs/canvas-linux-arm64-musl': 0.1.88 + '@napi-rs/canvas-linux-riscv64-gnu': 0.1.88 + '@napi-rs/canvas-linux-x64-gnu': 0.1.88 + '@napi-rs/canvas-linux-x64-musl': 0.1.88 + '@napi-rs/canvas-win32-arm64-msvc': 0.1.88 + '@napi-rs/canvas-win32-x64-msvc': 0.1.88 + '@napi-rs/wasm-runtime@1.1.1': dependencies: '@emnapi/core': 1.8.1 @@ -8462,6 +8589,10 @@ snapshots: pathe@2.0.3: {} + pdfjs-dist@5.4.530: + optionalDependencies: + '@napi-rs/canvas': 0.1.88 + picocolors@1.1.1: {} picomatch@2.3.1: {} diff --git a/src/agents/cli-runner.ts b/src/agents/cli-runner.ts index 0f1b27e86..01d5432fc 100644 --- a/src/agents/cli-runner.ts +++ b/src/agents/cli-runner.ts @@ -45,6 +45,7 @@ export async function runCliAgent(params: { timeoutMs: number; runId: string; extraSystemPrompt?: string; + streamParams?: import("../commands/agent/types.js").AgentStreamParams; ownerNumbers?: string[]; cliSessionId?: string; images?: ImageContent[]; diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index a58ba1c71..fbb1e161b 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -63,13 +63,21 @@ export function applyExtraParamsToAgent( cfg: ClawdbotConfig | undefined, provider: string, modelId: string, + extraParamsOverride?: Record, ): void { const extraParams = resolveExtraParams({ cfg, provider, modelId, }); - const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, extraParams); + const override = + extraParamsOverride && Object.keys(extraParamsOverride).length > 0 + ? Object.fromEntries( + Object.entries(extraParamsOverride).filter(([, value]) => value !== undefined), + ) + : undefined; + const merged = Object.assign({}, extraParams, override); + const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, merged); if (wrappedStreamFn) { log.debug(`applying extraParams to agent streamFn for ${provider}/${modelId}`); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 67f769665..3e59ff5ed 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -239,6 +239,7 @@ export async function runEmbeddedPiAgent( onToolResult: params.onToolResult, onAgentEvent: params.onAgentEvent, extraSystemPrompt: params.extraSystemPrompt, + streamParams: params.streamParams, ownerNumbers: params.ownerNumbers, enforceFinalTag: params.enforceFinalTag, }); diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index eff5afa22..92fdc9ee5 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -349,7 +349,13 @@ export async function runEmbeddedAttempt( // Force a stable streamFn reference so vitest can reliably mock @mariozechner/pi-ai. activeSession.agent.streamFn = streamSimple; - applyExtraParamsToAgent(activeSession.agent, params.config, params.provider, params.modelId); + applyExtraParamsToAgent( + activeSession.agent, + params.config, + params.provider, + params.modelId, + params.streamParams, + ); try { const prior = await sanitizeSessionHistory({ diff --git a/src/agents/pi-embedded-runner/run/params.ts b/src/agents/pi-embedded-runner/run/params.ts index ada8656d6..1ebc129b1 100644 --- a/src/agents/pi-embedded-runner/run/params.ts +++ b/src/agents/pi-embedded-runner/run/params.ts @@ -1,6 +1,7 @@ import type { ImageContent } from "@mariozechner/pi-ai"; import type { ReasoningLevel, ThinkLevel, VerboseLevel } from "../../../auto-reply/thinking.js"; import type { ClawdbotConfig } from "../../../config/config.js"; +import type { AgentStreamParams } from "../../../commands/agent/types.js"; import type { enqueueCommand } from "../../../process/command-queue.js"; import type { ExecElevatedDefaults, ExecToolDefaults } from "../../bash-tools.js"; import type { BlockReplyChunking, ToolResultFormat } from "../../pi-embedded-subscribe.js"; @@ -70,6 +71,7 @@ export type RunEmbeddedPiAgentParams = { lane?: string; enqueue?: typeof enqueueCommand; extraSystemPrompt?: string; + streamParams?: AgentStreamParams; ownerNumbers?: string[]; enforceFinalTag?: boolean; }; diff --git a/src/agents/pi-embedded-runner/run/types.ts b/src/agents/pi-embedded-runner/run/types.ts index 005f118e0..87940f4d4 100644 --- a/src/agents/pi-embedded-runner/run/types.ts +++ b/src/agents/pi-embedded-runner/run/types.ts @@ -4,6 +4,7 @@ import type { discoverAuthStorage, discoverModels } from "@mariozechner/pi-codin import type { ReasoningLevel, ThinkLevel, VerboseLevel } from "../../../auto-reply/thinking.js"; import type { ClawdbotConfig } from "../../../config/config.js"; +import type { AgentStreamParams } from "../../../commands/agent/types.js"; import type { ExecElevatedDefaults, ExecToolDefaults } from "../../bash-tools.js"; import type { MessagingToolSend } from "../../pi-embedded-messaging.js"; import type { BlockReplyChunking, ToolResultFormat } from "../../pi-embedded-subscribe.js"; @@ -63,6 +64,7 @@ export type EmbeddedRunAttemptParams = { onToolResult?: (payload: { text?: string; mediaUrls?: string[] }) => void | Promise; onAgentEvent?: (evt: { stream: string; data: Record }) => void; extraSystemPrompt?: string; + streamParams?: AgentStreamParams; ownerNumbers?: string[]; enforceFinalTag?: boolean; }; diff --git a/src/commands/agent.ts b/src/commands/agent.ts index f0ef72638..63258a4b8 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -396,6 +396,7 @@ export async function agentCommand( extraSystemPrompt: opts.extraSystemPrompt, cliSessionId, images: opts.images, + streamParams: opts.streamParams, }); } const authProfileId = @@ -429,6 +430,7 @@ export async function agentCommand( lane: opts.lane, abortSignal: opts.abortSignal, extraSystemPrompt: opts.extraSystemPrompt, + streamParams: opts.streamParams, agentDir, onAgentEvent: (evt) => { if ( diff --git a/src/commands/agent/types.ts b/src/commands/agent/types.ts index 900534911..deb1b7bc8 100644 --- a/src/commands/agent/types.ts +++ b/src/commands/agent/types.ts @@ -8,6 +8,12 @@ export type ImageContent = { mimeType: string; }; +export type AgentStreamParams = { + /** Provider stream params override (best-effort). */ + temperature?: number; + maxTokens?: number; +}; + export type AgentRunContext = { messageChannel?: string; accountId?: string; @@ -53,4 +59,6 @@ export type AgentCommandOpts = { lane?: string; runId?: string; extraSystemPrompt?: string; + /** Per-call stream param overrides (best-effort). */ + streamParams?: AgentStreamParams; }; diff --git a/src/config/types.gateway.ts b/src/config/types.gateway.ts index 6157b072f..77e6c88a2 100644 --- a/src/config/types.gateway.ts +++ b/src/config/types.gateway.ts @@ -111,6 +111,54 @@ export type GatewayHttpResponsesConfig = { * Default: false when absent. */ enabled?: boolean; + /** + * Max request body size in bytes for `/v1/responses`. + * Default: 20MB. + */ + maxBodyBytes?: number; + /** File inputs (input_file). */ + files?: GatewayHttpResponsesFilesConfig; + /** Image inputs (input_image). */ + images?: GatewayHttpResponsesImagesConfig; +}; + +export type GatewayHttpResponsesFilesConfig = { + /** Allow URL fetches for input_file. Default: true. */ + allowUrl?: boolean; + /** Allowed MIME types (case-insensitive). */ + allowedMimes?: string[]; + /** Max bytes per file. Default: 5MB. */ + maxBytes?: number; + /** Max decoded characters per file. Default: 200k. */ + maxChars?: number; + /** Max redirects when fetching a URL. Default: 3. */ + maxRedirects?: number; + /** Fetch timeout in ms. Default: 10s. */ + timeoutMs?: number; + /** PDF handling (application/pdf). */ + pdf?: GatewayHttpResponsesPdfConfig; +}; + +export type GatewayHttpResponsesPdfConfig = { + /** Max pages to parse/render. Default: 4. */ + maxPages?: number; + /** Max pixels per rendered page. Default: 4M. */ + maxPixels?: number; + /** Minimum extracted text length to skip rasterization. Default: 200 chars. */ + minTextChars?: number; +}; + +export type GatewayHttpResponsesImagesConfig = { + /** Allow URL fetches for input_image. Default: true. */ + allowUrl?: boolean; + /** Allowed MIME types (case-insensitive). */ + allowedMimes?: string[]; + /** Max bytes per image. Default: 10MB. */ + maxBytes?: number; + /** Max redirects when fetching a URL. Default: 3. */ + maxRedirects?: number; + /** Fetch timeout in ms. Default: 10s. */ + timeoutMs?: number; }; export type GatewayHttpEndpointsConfig = { diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index e0811c8e9..58612d459 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -302,6 +302,36 @@ export const ClawdbotSchema = z responses: z .object({ enabled: z.boolean().optional(), + maxBodyBytes: z.number().int().positive().optional(), + files: z + .object({ + allowUrl: z.boolean().optional(), + allowedMimes: z.array(z.string()).optional(), + maxBytes: z.number().int().positive().optional(), + maxChars: z.number().int().positive().optional(), + maxRedirects: z.number().int().nonnegative().optional(), + timeoutMs: z.number().int().positive().optional(), + pdf: z + .object({ + maxPages: z.number().int().positive().optional(), + maxPixels: z.number().int().positive().optional(), + minTextChars: z.number().int().nonnegative().optional(), + }) + .strict() + .optional(), + }) + .strict() + .optional(), + images: z + .object({ + allowUrl: z.boolean().optional(), + allowedMimes: z.array(z.string()).optional(), + maxBytes: z.number().int().positive().optional(), + maxRedirects: z.number().int().nonnegative().optional(), + timeoutMs: z.number().int().positive().optional(), + }) + .strict() + .optional(), }) .strict() .optional(), diff --git a/src/gateway/http-utils.ts b/src/gateway/http-utils.ts new file mode 100644 index 000000000..dde9dd3f6 --- /dev/null +++ b/src/gateway/http-utils.ts @@ -0,0 +1,64 @@ +import { randomUUID } from "node:crypto"; +import type { IncomingMessage } from "node:http"; + +import { buildAgentMainSessionKey, normalizeAgentId } from "../routing/session-key.js"; + +export function getHeader(req: IncomingMessage, name: string): string | undefined { + const raw = req.headers[name.toLowerCase()]; + if (typeof raw === "string") return raw; + if (Array.isArray(raw)) return raw[0]; + return undefined; +} + +export function getBearerToken(req: IncomingMessage): string | undefined { + const raw = getHeader(req, "authorization")?.trim() ?? ""; + if (!raw.toLowerCase().startsWith("bearer ")) return undefined; + const token = raw.slice(7).trim(); + return token || undefined; +} + +export function resolveAgentIdFromHeader(req: IncomingMessage): string | undefined { + const raw = + getHeader(req, "x-clawdbot-agent-id")?.trim() || + getHeader(req, "x-clawdbot-agent")?.trim() || + ""; + if (!raw) return undefined; + return normalizeAgentId(raw); +} + +export function resolveAgentIdFromModel(model: string | undefined): string | undefined { + const raw = model?.trim(); + if (!raw) return undefined; + + const m = + raw.match(/^clawdbot[:/](?[a-z0-9][a-z0-9_-]{0,63})$/i) ?? + raw.match(/^agent:(?[a-z0-9][a-z0-9_-]{0,63})$/i); + const agentId = m?.groups?.agentId; + if (!agentId) return undefined; + return normalizeAgentId(agentId); +} + +export function resolveAgentIdForRequest(params: { + req: IncomingMessage; + model: string | undefined; +}): string { + const fromHeader = resolveAgentIdFromHeader(params.req); + if (fromHeader) return fromHeader; + + const fromModel = resolveAgentIdFromModel(params.model); + return fromModel ?? "main"; +} + +export function resolveSessionKey(params: { + req: IncomingMessage; + agentId: string; + user?: string | undefined; + prefix: string; +}): string { + const explicit = getHeader(params.req, "x-clawdbot-session-key")?.trim(); + if (explicit) return explicit; + + const user = params.user?.trim(); + const mainKey = user ? `${params.prefix}-user:${user}` : `${params.prefix}:${randomUUID()}`; + return buildAgentMainSessionKey({ agentId: params.agentId, mainKey }); +} diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts index 64709c4df..6e7f1d521 100644 --- a/src/gateway/openai-http.ts +++ b/src/gateway/openai-http.ts @@ -5,9 +5,9 @@ import { buildHistoryContextFromEntries, type HistoryEntry } from "../auto-reply import { createDefaultDeps } from "../cli/deps.js"; import { agentCommand } from "../commands/agent.js"; import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js"; -import { buildAgentMainSessionKey, normalizeAgentId } from "../routing/session-key.js"; import { defaultRuntime } from "../runtime.js"; import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js"; +import { getBearerToken, resolveAgentIdForRequest, resolveSessionKey } from "./http-utils.js"; import { readJsonBody } from "./hooks.js"; type OpenAiHttpOptions = { @@ -34,20 +34,6 @@ function sendJson(res: ServerResponse, status: number, body: unknown) { res.end(JSON.stringify(body)); } -function getHeader(req: IncomingMessage, name: string): string | undefined { - const raw = req.headers[name.toLowerCase()]; - if (typeof raw === "string") return raw; - if (Array.isArray(raw)) return raw[0]; - return undefined; -} - -function getBearerToken(req: IncomingMessage): string | undefined { - const raw = getHeader(req, "authorization")?.trim() ?? ""; - if (!raw.toLowerCase().startsWith("bearer ")) return undefined; - const token = raw.slice(7).trim(); - return token || undefined; -} - function writeSse(res: ServerResponse, data: unknown) { res.write(`data: ${JSON.stringify(data)}\n\n`); } @@ -154,50 +140,12 @@ function buildAgentPrompt(messagesUnknown: unknown): { }; } -function resolveAgentIdFromHeader(req: IncomingMessage): string | undefined { - const raw = - getHeader(req, "x-clawdbot-agent-id")?.trim() || - getHeader(req, "x-clawdbot-agent")?.trim() || - ""; - if (!raw) return undefined; - return normalizeAgentId(raw); -} - -function resolveAgentIdFromModel(model: string | undefined): string | undefined { - const raw = model?.trim(); - if (!raw) return undefined; - - const m = - raw.match(/^clawdbot[:/](?[a-z0-9][a-z0-9_-]{0,63})$/i) ?? - raw.match(/^agent:(?[a-z0-9][a-z0-9_-]{0,63})$/i); - const agentId = m?.groups?.agentId; - if (!agentId) return undefined; - return normalizeAgentId(agentId); -} - -function resolveAgentIdForRequest(params: { - req: IncomingMessage; - model: string | undefined; -}): string { - const fromHeader = resolveAgentIdFromHeader(params.req); - if (fromHeader) return fromHeader; - - const fromModel = resolveAgentIdFromModel(params.model); - return fromModel ?? "main"; -} - -function resolveSessionKey(params: { +function resolveOpenAiSessionKey(params: { req: IncomingMessage; agentId: string; user?: string | undefined; }): string { - const explicit = getHeader(params.req, "x-clawdbot-session-key")?.trim(); - if (explicit) return explicit; - - // Default: stateless per-request session key, but stable if OpenAI "user" is provided. - const user = params.user?.trim(); - const mainKey = user ? `openai-user:${user}` : `openai:${randomUUID()}`; - return buildAgentMainSessionKey({ agentId: params.agentId, mainKey }); + return resolveSessionKey({ ...params, prefix: "openai" }); } function coerceRequest(val: unknown): OpenAiChatCompletionRequest { @@ -248,7 +196,7 @@ export async function handleOpenAiHttpRequest( const user = typeof payload.user === "string" ? payload.user : undefined; const agentId = resolveAgentIdForRequest({ req, model }); - const sessionKey = resolveSessionKey({ req, agentId, user }); + const sessionKey = resolveOpenAiSessionKey({ req, agentId, user }); const prompt = buildAgentPrompt(payload.messages); if (!prompt.message) { sendJson(res, 400, { diff --git a/src/gateway/openresponses-http.e2e.test.ts b/src/gateway/openresponses-http.e2e.test.ts index 4664d0237..189d67864 100644 --- a/src/gateway/openresponses-http.e2e.test.ts +++ b/src/gateway/openresponses-http.e2e.test.ts @@ -358,6 +358,182 @@ describe("OpenResponses HTTP API (e2e)", () => { } }); + it("moves input_file content into extraSystemPrompt", async () => { + agentCommand.mockResolvedValueOnce({ + payloads: [{ text: "ok" }], + } as never); + + const port = await getFreePort(); + const server = await startServer(port); + try { + const res = await postResponses(port, { + model: "clawdbot", + input: [ + { + type: "message", + role: "user", + content: [ + { type: "input_text", text: "read this" }, + { + type: "input_file", + source: { + type: "base64", + media_type: "text/plain", + data: Buffer.from("hello").toString("base64"), + filename: "hello.txt", + }, + }, + ], + }, + ], + }); + expect(res.status).toBe(200); + + const [opts] = agentCommand.mock.calls[0] ?? []; + const message = (opts as { message?: string } | undefined)?.message ?? ""; + const extraSystemPrompt = + (opts as { extraSystemPrompt?: string } | undefined)?.extraSystemPrompt ?? ""; + expect(message).toBe("read this"); + expect(extraSystemPrompt).toContain(''); + } finally { + await server.close({ reason: "test done" }); + } + }); + + it("applies tool_choice=none by dropping tools", async () => { + agentCommand.mockResolvedValueOnce({ + payloads: [{ text: "ok" }], + } as never); + + const port = await getFreePort(); + const server = await startServer(port); + try { + const res = await postResponses(port, { + model: "clawdbot", + input: "hi", + tools: [ + { + type: "function", + function: { name: "get_weather", description: "Get weather" }, + }, + ], + tool_choice: "none", + }); + expect(res.status).toBe(200); + + const [opts] = agentCommand.mock.calls[0] ?? []; + expect((opts as { clientTools?: unknown[] } | undefined)?.clientTools).toBeUndefined(); + } finally { + await server.close({ reason: "test done" }); + } + }); + + it("applies tool_choice to a specific tool", async () => { + agentCommand.mockResolvedValueOnce({ + payloads: [{ text: "ok" }], + } as never); + + const port = await getFreePort(); + const server = await startServer(port); + try { + const res = await postResponses(port, { + model: "clawdbot", + input: "hi", + tools: [ + { + type: "function", + function: { name: "get_weather", description: "Get weather" }, + }, + { + type: "function", + function: { name: "get_time", description: "Get time" }, + }, + ], + tool_choice: { type: "function", function: { name: "get_time" } }, + }); + expect(res.status).toBe(200); + + const [opts] = agentCommand.mock.calls[0] ?? []; + const clientTools = + (opts as { clientTools?: Array<{ function?: { name?: string } }> })?.clientTools ?? []; + expect(clientTools).toHaveLength(1); + expect(clientTools[0]?.function?.name).toBe("get_time"); + } finally { + await server.close({ reason: "test done" }); + } + }); + + it("rejects tool_choice that references an unknown tool", async () => { + const port = await getFreePort(); + const server = await startServer(port); + try { + const res = await postResponses(port, { + model: "clawdbot", + input: "hi", + tools: [ + { + type: "function", + function: { name: "get_weather", description: "Get weather" }, + }, + ], + tool_choice: { type: "function", function: { name: "unknown_tool" } }, + }); + expect(res.status).toBe(400); + } finally { + await server.close({ reason: "test done" }); + } + }); + + it("passes max_output_tokens through to the agent stream params", async () => { + agentCommand.mockResolvedValueOnce({ + payloads: [{ text: "ok" }], + } as never); + + const port = await getFreePort(); + const server = await startServer(port); + try { + const res = await postResponses(port, { + model: "clawdbot", + input: "hi", + max_output_tokens: 123, + }); + expect(res.status).toBe(200); + + const [opts] = agentCommand.mock.calls[0] ?? []; + expect( + (opts as { streamParams?: { maxTokens?: number } } | undefined)?.streamParams?.maxTokens, + ).toBe(123); + } finally { + await server.close({ reason: "test done" }); + } + }); + + it("returns usage when available", async () => { + agentCommand.mockResolvedValueOnce({ + payloads: [{ text: "ok" }], + meta: { + agentMeta: { + usage: { input: 3, output: 5, cacheRead: 1, cacheWrite: 1 }, + }, + }, + } as never); + + const port = await getFreePort(); + const server = await startServer(port); + try { + const res = await postResponses(port, { + stream: false, + model: "clawdbot", + input: "hi", + }); + expect(res.status).toBe(200); + const json = (await res.json()) as Record; + expect(json.usage).toEqual({ input_tokens: 3, output_tokens: 5, total_tokens: 10 }); + } finally { + await server.close({ reason: "test done" }); + } + }); + it("returns a non-streaming response with correct shape", async () => { agentCommand.mockResolvedValueOnce({ payloads: [{ text: "hello" }], @@ -436,6 +612,7 @@ describe("OpenResponses HTTP API (e2e)", () => { const eventTypes = events.map((e) => e.event).filter(Boolean); expect(eventTypes).toContain("response.created"); expect(eventTypes).toContain("response.output_item.added"); + expect(eventTypes).toContain("response.in_progress"); expect(eventTypes).toContain("response.content_part.added"); expect(eventTypes).toContain("response.output_text.delta"); expect(eventTypes).toContain("response.output_text.done"); diff --git a/src/gateway/openresponses-http.ts b/src/gateway/openresponses-http.ts index 9274eb47f..af2a44a28 100644 --- a/src/gateway/openresponses-http.ts +++ b/src/gateway/openresponses-http.ts @@ -7,15 +7,17 @@ */ import { randomUUID } from "node:crypto"; +import { lookup } from "node:dns/promises"; import type { IncomingMessage, ServerResponse } from "node:http"; +import { createCanvas } from "@napi-rs/canvas"; import { buildHistoryContextFromEntries, type HistoryEntry } from "../auto-reply/reply/history.js"; import { createDefaultDeps } from "../cli/deps.js"; import { agentCommand } from "../commands/agent.js"; import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js"; -import { buildAgentMainSessionKey, normalizeAgentId } from "../routing/session-key.js"; import { defaultRuntime } from "../runtime.js"; import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js"; +import { getBearerToken, resolveAgentIdForRequest, resolveSessionKey } from "./http-utils.js"; import { readJsonBody } from "./hooks.js"; import { CreateResponseBodySchema, @@ -27,12 +29,15 @@ import { type StreamingEvent, type Usage, } from "./open-responses.schema.js"; +import type { GatewayHttpResponsesConfig } from "../config/types.gateway.js"; import type { ClientToolDefinition } from "../agents/pi-embedded-runner/run/params.js"; import type { ImageContent } from "../commands/agent/types.js"; +import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs"; type OpenResponsesHttpOptions = { auth: ResolvedGatewayAuth; maxBodyBytes?: number; + config?: GatewayHttpResponsesConfig; }; function sendJson(res: ServerResponse, status: number, body: unknown) { @@ -41,19 +46,7 @@ function sendJson(res: ServerResponse, status: number, body: unknown) { res.end(JSON.stringify(body)); } -function getHeader(req: IncomingMessage, name: string): string | undefined { - const raw = req.headers[name.toLowerCase()]; - if (typeof raw === "string") return raw; - if (Array.isArray(raw)) return raw[0]; - return undefined; -} - -function getBearerToken(req: IncomingMessage): string | undefined { - const raw = getHeader(req, "authorization")?.trim() ?? ""; - if (!raw.toLowerCase().startsWith("bearer ")) return undefined; - const token = raw.slice(7).trim(); - return token || undefined; -} +const DEFAULT_BODY_BYTES = 20 * 1024 * 1024; function writeSseEvent(res: ServerResponse, event: StreamingEvent) { res.write(`event: ${event.type}\n`); @@ -76,88 +69,290 @@ function extractTextContent(content: string | ContentPart[]): string { .join("\n"); } -const PRIVATE_IP_PATTERNS = [ - /^127\./, // Loopback - /^192\.168\./, // Private network - /^10\./, // Private network - /^172\.(1[6-9]|2[0-9]|3[0-1])\./, // Private network - /^::1$/, // IPv6 loopback - /^fe80:/, // IPv6 link-local - /^fec0:/, // IPv6 site-local -]; +type ResolvedResponsesLimits = { + maxBodyBytes: number; + files: { + allowUrl: boolean; + allowedMimes: Set; + maxBytes: number; + maxChars: number; + maxRedirects: number; + timeoutMs: number; + pdf: { + maxPages: number; + maxPixels: number; + minTextChars: number; + }; + }; + images: { + allowUrl: boolean; + allowedMimes: Set; + maxBytes: number; + maxRedirects: number; + timeoutMs: number; + }; +}; -function isPrivateIp(hostname: string): boolean { - return PRIVATE_IP_PATTERNS.some((pattern) => pattern.test(hostname)); +const DEFAULT_IMAGE_MIMES = ["image/jpeg", "image/png", "image/gif", "image/webp"]; +const DEFAULT_FILE_MIMES = [ + "text/plain", + "text/markdown", + "text/html", + "text/csv", + "application/json", + "application/pdf", +]; +const DEFAULT_IMAGE_MAX_BYTES = 10 * 1024 * 1024; +const DEFAULT_FILE_MAX_BYTES = 5 * 1024 * 1024; +const DEFAULT_FILE_MAX_CHARS = 200_000; +const DEFAULT_MAX_REDIRECTS = 3; +const DEFAULT_TIMEOUT_MS = 10_000; +const DEFAULT_PDF_MAX_PAGES = 4; +const DEFAULT_PDF_MAX_PIXELS = 4_000_000; +const DEFAULT_PDF_MIN_TEXT_CHARS = 200; + +function normalizeMimeType(value: string | undefined): string | undefined { + if (!value) return undefined; + const [raw] = value.split(";"); + const normalized = raw?.trim().toLowerCase(); + return normalized || undefined; } -// Fetch with SSRF protection, timeout, and size limits -async function fetchWithGuard( - url: string, - maxBytes: number, - timeoutMs: number = 10000, -): Promise<{ data: string; mimeType: string }> { - const parsedUrl = new URL(url); +function parseContentType(value: string | undefined): { mimeType?: string; charset?: string } { + if (!value) return {}; + const parts = value.split(";").map((part) => part.trim()); + const mimeType = normalizeMimeType(parts[0]); + const charset = parts + .map((part) => part.match(/^charset=(.+)$/i)?.[1]?.trim()) + .find((part) => part && part.length > 0); + return { mimeType, charset }; +} - // Only allow HTTP/HTTPS - if (!["http:", "https:"].includes(parsedUrl.protocol)) { - throw new Error(`Invalid URL protocol: ${parsedUrl.protocol}. Only HTTP/HTTPS allowed.`); +function normalizeMimeList(values: string[] | undefined, fallback: string[]): Set { + const input = values && values.length > 0 ? values : fallback; + return new Set(input.map((value) => normalizeMimeType(value)).filter(Boolean) as string[]); +} + +function resolveResponsesLimits( + config: GatewayHttpResponsesConfig | undefined, +): ResolvedResponsesLimits { + const files = config?.files; + const images = config?.images; + return { + maxBodyBytes: config?.maxBodyBytes ?? DEFAULT_BODY_BYTES, + files: { + allowUrl: files?.allowUrl ?? true, + allowedMimes: normalizeMimeList(files?.allowedMimes, DEFAULT_FILE_MIMES), + maxBytes: files?.maxBytes ?? DEFAULT_FILE_MAX_BYTES, + maxChars: files?.maxChars ?? DEFAULT_FILE_MAX_CHARS, + maxRedirects: files?.maxRedirects ?? DEFAULT_MAX_REDIRECTS, + timeoutMs: files?.timeoutMs ?? DEFAULT_TIMEOUT_MS, + pdf: { + maxPages: files?.pdf?.maxPages ?? DEFAULT_PDF_MAX_PAGES, + maxPixels: files?.pdf?.maxPixels ?? DEFAULT_PDF_MAX_PIXELS, + minTextChars: files?.pdf?.minTextChars ?? DEFAULT_PDF_MIN_TEXT_CHARS, + }, + }, + images: { + allowUrl: images?.allowUrl ?? true, + allowedMimes: normalizeMimeList(images?.allowedMimes, DEFAULT_IMAGE_MIMES), + maxBytes: images?.maxBytes ?? DEFAULT_IMAGE_MAX_BYTES, + maxRedirects: images?.maxRedirects ?? DEFAULT_MAX_REDIRECTS, + timeoutMs: images?.timeoutMs ?? DEFAULT_TIMEOUT_MS, + }, + }; +} + +const PRIVATE_IPV4_PATTERNS = [ + /^127\./, + /^10\./, + /^192\.168\./, + /^172\.(1[6-9]|2[0-9]|3[0-1])\./, + /^0\./, +]; +const PRIVATE_IPV6_PREFIXES = ["::1", "fe80:", "fec0:", "fc", "fd"]; + +function isPrivateIpAddress(address: string): boolean { + if (address.includes(":")) { + const lower = address.toLowerCase(); + if (lower === "::1") return true; + return PRIVATE_IPV6_PREFIXES.some((prefix) => lower.startsWith(prefix)); + } + return PRIVATE_IPV4_PATTERNS.some((pattern) => pattern.test(address)); +} + +function isBlockedHostname(hostname: string): boolean { + const lower = hostname.toLowerCase(); + return ( + lower === "localhost" || + lower.endsWith(".localhost") || + lower.endsWith(".local") || + lower.endsWith(".internal") + ); +} + +async function assertPublicHostname(hostname: string): Promise { + if (isBlockedHostname(hostname)) { + throw new Error(`Blocked hostname: ${hostname}`); } - // Block private IPs (SSRF protection) - if (isPrivateIp(parsedUrl.hostname)) { - throw new Error(`Private IP addresses are not allowed: ${parsedUrl.hostname}`); + const results = await lookup(hostname, { all: true }); + if (results.length === 0) { + throw new Error(`Unable to resolve hostname: ${hostname}`); } + for (const entry of results) { + if (isPrivateIpAddress(entry.address)) { + throw new Error(`Private IP addresses are not allowed: ${entry.address}`); + } + } +} + +function isRedirectStatus(status: number): boolean { + return status === 301 || status === 302 || status === 303 || status === 307 || status === 308; +} + +// Fetch with SSRF protection, timeout, redirect limits, and size limits. +async function fetchWithGuard(params: { + url: string; + maxBytes: number; + timeoutMs: number; + maxRedirects: number; +}): Promise<{ data: string; mimeType: string; contentType?: string }> { + let currentUrl = params.url; + let redirectCount = 0; const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), timeoutMs); + const timeoutId = setTimeout(() => controller.abort(), params.timeoutMs); try { - const response = await fetch(url, { - signal: controller.signal, - headers: { "User-Agent": "Clawdbot-Gateway/1.0" }, - }); - - if (!response.ok) { - throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`); - } - - const contentLength = response.headers.get("content-length"); - if (contentLength) { - const size = parseInt(contentLength, 10); - if (size > maxBytes) { - throw new Error(`Content too large: ${size} bytes (limit: ${maxBytes} bytes)`); + while (true) { + const parsedUrl = new URL(currentUrl); + if (!["http:", "https:"].includes(parsedUrl.protocol)) { + throw new Error(`Invalid URL protocol: ${parsedUrl.protocol}. Only HTTP/HTTPS allowed.`); } + await assertPublicHostname(parsedUrl.hostname); + + const response = await fetch(parsedUrl, { + signal: controller.signal, + headers: { "User-Agent": "Clawdbot-Gateway/1.0" }, + redirect: "manual", + }); + + if (isRedirectStatus(response.status)) { + const location = response.headers.get("location"); + if (!location) { + throw new Error(`Redirect missing location header (${response.status})`); + } + redirectCount += 1; + if (redirectCount > params.maxRedirects) { + throw new Error(`Too many redirects (limit: ${params.maxRedirects})`); + } + currentUrl = new URL(location, parsedUrl).toString(); + continue; + } + + if (!response.ok) { + throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`); + } + + const contentLength = response.headers.get("content-length"); + if (contentLength) { + const size = parseInt(contentLength, 10); + if (size > params.maxBytes) { + throw new Error(`Content too large: ${size} bytes (limit: ${params.maxBytes} bytes)`); + } + } + + const buffer = await response.arrayBuffer(); + if (buffer.byteLength > params.maxBytes) { + throw new Error( + `Content too large: ${buffer.byteLength} bytes (limit: ${params.maxBytes} bytes)`, + ); + } + + const contentType = response.headers.get("content-type") || undefined; + const parsed = parseContentType(contentType); + const mimeType = parsed.mimeType ?? "application/octet-stream"; + return { data: Buffer.from(buffer).toString("base64"), mimeType, contentType }; } - - const buffer = await response.arrayBuffer(); - if (buffer.byteLength > maxBytes) { - throw new Error(`Content too large: ${buffer.byteLength} bytes (limit: ${maxBytes} bytes)`); - } - - const mimeType = response.headers.get("content-type") || "application/octet-stream"; - - return { - data: Buffer.from(buffer).toString("base64"), - mimeType, - }; } finally { clearTimeout(timeoutId); } } -const ALLOWED_IMAGE_MIMES = new Set(["image/jpeg", "image/png", "image/gif", "image/webp"]); -const MAX_IMAGE_BYTES = 10 * 1024 * 1024; // 10MB -const MAX_FILE_BYTES = 5 * 1024 * 1024; // 5MB -const ALLOWED_FILE_MIMES = new Set([ - "text/plain", - "text/markdown", - "text/html", - "text/csv", - "application/pdf", - "application/json", -]); +type FileExtractResult = { + filename: string; + text?: string; + images?: ImageContent[]; +}; -async function extractImageContent(part: ContentPart): Promise { +function decodeTextContent(buffer: Buffer, charset: string | undefined): string { + const encoding = charset?.trim().toLowerCase() || "utf-8"; + try { + return new TextDecoder(encoding).decode(buffer); + } catch { + return new TextDecoder("utf-8").decode(buffer); + } +} + +function clampText(text: string, maxChars: number): string { + if (text.length <= maxChars) return text; + return text.slice(0, maxChars); +} + +async function extractPdfContent(params: { + buffer: Buffer; + limits: ResolvedResponsesLimits; +}): Promise<{ text: string; images: ImageContent[] }> { + const { buffer, limits } = params; + const pdf = await getDocument({ + data: new Uint8Array(buffer), + // @ts-expect-error pdfjs-dist legacy option not in current type defs. + disableWorker: true, + }).promise; + const maxPages = Math.min(pdf.numPages, limits.files.pdf.maxPages); + const textParts: string[] = []; + + for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) { + const page = await pdf.getPage(pageNum); + const textContent = await page.getTextContent(); + const pageText = textContent.items + .map((item) => ("str" in item ? String(item.str) : "")) + .filter(Boolean) + .join(" "); + if (pageText) textParts.push(pageText); + } + + const text = textParts.join("\n\n"); + if (text.trim().length >= limits.files.pdf.minTextChars) { + return { text, images: [] }; + } + + const images: ImageContent[] = []; + for (let pageNum = 1; pageNum <= maxPages; pageNum += 1) { + const page = await pdf.getPage(pageNum); + const viewport = page.getViewport({ scale: 1 }); + const maxPixels = limits.files.pdf.maxPixels; + const pixelBudget = Math.max(1, maxPixels); + const pagePixels = viewport.width * viewport.height; + const scale = Math.min(1, Math.sqrt(pixelBudget / pagePixels)); + const scaled = page.getViewport({ scale: Math.max(0.1, scale) }); + const canvas = createCanvas(Math.ceil(scaled.width), Math.ceil(scaled.height)); + await page.render({ + canvas: canvas as unknown as HTMLCanvasElement, + viewport: scaled, + }).promise; + const png = canvas.toBuffer("image/png"); + images.push({ type: "image", data: png.toString("base64"), mimeType: "image/png" }); + } + + return { text, images }; +} + +async function extractImageContent( + part: ContentPart, + limits: ResolvedResponsesLimits, +): Promise { if (part.type !== "input_image") return null; const source = part.source as { type: string; url?: string; data?: string; media_type?: string }; @@ -166,16 +361,30 @@ async function extractImageContent(part: ContentPart): Promise limits.images.maxBytes) { + throw new Error( + `Image too large: ${buffer.byteLength} bytes (limit: ${limits.images.maxBytes} bytes)`, + ); + } return { type: "image", data: source.data, mimeType }; } if (source.type === "url" && source.url) { - const result = await fetchWithGuard(source.url, MAX_IMAGE_BYTES); - if (!ALLOWED_IMAGE_MIMES.has(result.mimeType)) { + if (!limits.images.allowUrl) { + throw new Error("input_image URL sources are disabled by config"); + } + const result = await fetchWithGuard({ + url: source.url, + maxBytes: limits.images.maxBytes, + timeoutMs: limits.images.timeoutMs, + maxRedirects: limits.images.maxRedirects, + }); + if (!limits.images.allowedMimes.has(result.mimeType)) { throw new Error(`Unsupported image MIME type from URL: ${result.mimeType}`); } return { type: "image", data: result.data, mimeType: result.mimeType }; @@ -184,7 +393,10 @@ async function extractImageContent(part: ContentPart): Promise { +async function extractFileContent( + part: ContentPart, + limits: ResolvedResponsesLimits, +): Promise { if (part.type !== "input_file") return null; const source = part.source as { @@ -196,36 +408,106 @@ async function extractFileContent(part: ContentPart): Promise { }; const filename = source.filename || "file"; - let content: string; + let buffer: Buffer; + let mimeType: string | undefined; + let charset: string | undefined; if (source.type === "base64") { if (!source.data) { throw new Error("input_file base64 source missing 'data' field"); } - const buffer = Buffer.from(source.data, "base64"); - if (buffer.byteLength > MAX_FILE_BYTES) { - throw new Error( - `File too large: ${buffer.byteLength} bytes (limit: ${MAX_FILE_BYTES} bytes)`, - ); - } - content = buffer.toString("utf-8"); + const parsed = parseContentType(source.media_type); + mimeType = parsed.mimeType; + charset = parsed.charset; + buffer = Buffer.from(source.data, "base64"); } else if (source.type === "url" && source.url) { - const result = await fetchWithGuard(source.url, MAX_FILE_BYTES); - if (!ALLOWED_FILE_MIMES.has(result.mimeType)) { - throw new Error(`Unsupported file MIME type: ${result.mimeType}`); + if (!limits.files.allowUrl) { + throw new Error("input_file URL sources are disabled by config"); } - content = Buffer.from(result.data, "base64").toString("utf-8"); + const result = await fetchWithGuard({ + url: source.url, + maxBytes: limits.files.maxBytes, + timeoutMs: limits.files.timeoutMs, + maxRedirects: limits.files.maxRedirects, + }); + const parsed = parseContentType(result.contentType); + mimeType = parsed.mimeType ?? normalizeMimeType(result.mimeType); + charset = parsed.charset; + buffer = Buffer.from(result.data, "base64"); } else { throw new Error("input_file must have 'source.url' or 'source.data'"); } - return `\n${content}\n`; + if (buffer.byteLength > limits.files.maxBytes) { + throw new Error( + `File too large: ${buffer.byteLength} bytes (limit: ${limits.files.maxBytes} bytes)`, + ); + } + + if (!mimeType) { + throw new Error("input_file missing media type"); + } + if (!limits.files.allowedMimes.has(mimeType)) { + throw new Error(`Unsupported file MIME type: ${mimeType}`); + } + + if (mimeType === "application/pdf") { + const extracted = await extractPdfContent({ buffer, limits }); + const text = extracted.text ? clampText(extracted.text, limits.files.maxChars) : ""; + return { + filename, + text, + images: extracted.images.length > 0 ? extracted.images : undefined, + }; + } + + const text = clampText(decodeTextContent(buffer, charset), limits.files.maxChars); + return { filename, text }; } function extractClientTools(body: CreateResponseBody): ClientToolDefinition[] { return (body.tools ?? []) as ClientToolDefinition[]; } +function applyToolChoice(params: { + tools: ClientToolDefinition[]; + toolChoice: CreateResponseBody["tool_choice"]; +}): { tools: ClientToolDefinition[]; extraSystemPrompt?: string } { + const { tools, toolChoice } = params; + if (!toolChoice) return { tools }; + + if (toolChoice === "none") { + return { tools: [] }; + } + + if (toolChoice === "required") { + if (tools.length === 0) { + throw new Error("tool_choice=required but no tools were provided"); + } + return { + tools, + extraSystemPrompt: "You must call one of the available tools before responding.", + }; + } + + if (typeof toolChoice === "object" && toolChoice.type === "function") { + const targetName = toolChoice.function?.name?.trim(); + if (!targetName) { + throw new Error("tool_choice.function.name is required"); + } + const matched = tools.filter((tool) => tool.function?.name === targetName); + if (matched.length === 0) { + throw new Error(`tool_choice requested unknown tool: ${targetName}`); + } + return { + tools: matched, + extraSystemPrompt: `You must call the ${targetName} tool before responding.`, + }; + } + + return { tools }; +} + export function buildAgentPrompt(input: string | ItemParam[]): { message: string; extraSystemPrompt?: string; @@ -299,56 +581,52 @@ export function buildAgentPrompt(input: string | ItemParam[]): { }; } -function resolveAgentIdFromHeader(req: IncomingMessage): string | undefined { - const raw = - getHeader(req, "x-clawdbot-agent-id")?.trim() || - getHeader(req, "x-clawdbot-agent")?.trim() || - ""; - if (!raw) return undefined; - return normalizeAgentId(raw); -} - -function resolveAgentIdFromModel(model: string | undefined): string | undefined { - const raw = model?.trim(); - if (!raw) return undefined; - - const m = - raw.match(/^clawdbot[:/](?[a-z0-9][a-z0-9_-]{0,63})$/i) ?? - raw.match(/^agent:(?[a-z0-9][a-z0-9_-]{0,63})$/i); - const agentId = m?.groups?.agentId; - if (!agentId) return undefined; - return normalizeAgentId(agentId); -} - -function resolveAgentIdForRequest(params: { - req: IncomingMessage; - model: string | undefined; -}): string { - const fromHeader = resolveAgentIdFromHeader(params.req); - if (fromHeader) return fromHeader; - - const fromModel = resolveAgentIdFromModel(params.model); - return fromModel ?? "main"; -} - -function resolveSessionKey(params: { +function resolveOpenResponsesSessionKey(params: { req: IncomingMessage; agentId: string; user?: string | undefined; }): string { - const explicit = getHeader(params.req, "x-clawdbot-session-key")?.trim(); - if (explicit) return explicit; - - // Default: stateless per-request session key, but stable if OpenResponses "user" is provided. - const user = params.user?.trim(); - const mainKey = user ? `openresponses-user:${user}` : `openresponses:${randomUUID()}`; - return buildAgentMainSessionKey({ agentId: params.agentId, mainKey }); + return resolveSessionKey({ ...params, prefix: "openresponses" }); } function createEmptyUsage(): Usage { return { input_tokens: 0, output_tokens: 0, total_tokens: 0 }; } +function toUsage( + value: + | { + input?: number; + output?: number; + cacheRead?: number; + cacheWrite?: number; + total?: number; + } + | undefined, +): Usage { + if (!value) return createEmptyUsage(); + const input = value.input ?? 0; + const output = value.output ?? 0; + const cacheRead = value.cacheRead ?? 0; + const cacheWrite = value.cacheWrite ?? 0; + const total = value.total ?? input + output + cacheRead + cacheWrite; + return { + input_tokens: Math.max(0, input), + output_tokens: Math.max(0, output), + total_tokens: Math.max(0, total), + }; +} + +function extractUsageFromResult(result: unknown): Usage { + const meta = (result as { meta?: { agentMeta?: { usage?: unknown } } } | null)?.meta; + const usage = meta && typeof meta === "object" ? meta.agentMeta?.usage : undefined; + return toUsage( + usage as + | { input?: number; output?: number; cacheRead?: number; cacheWrite?: number; total?: number } + | undefined, + ); +} + function createResponseResource(params: { id: string; model: string; @@ -412,7 +690,13 @@ export async function handleOpenResponsesHttpRequest( return true; } - const body = await readJsonBody(req, opts.maxBodyBytes ?? 1024 * 1024); + const limits = resolveResponsesLimits(opts.config); + const maxBodyBytes = + opts.maxBodyBytes ?? + (opts.config?.maxBodyBytes + ? limits.maxBodyBytes + : Math.max(limits.maxBodyBytes, limits.files.maxBytes * 2, limits.images.maxBytes * 2)); + const body = await readJsonBody(req, maxBodyBytes); if (!body.ok) { sendJson(res, 400, { error: { message: body.error, type: "invalid_request_error" }, @@ -436,44 +720,79 @@ export async function handleOpenResponsesHttpRequest( const model = payload.model; const user = payload.user; - // Extract images, files, and tools from input (Phase 2) + // Extract images + files from input (Phase 2) let images: ImageContent[] = []; - let fileContents: string[] = []; - if (Array.isArray(payload.input)) { - for (const item of payload.input) { - if (item.type === "message" && typeof item.content !== "string") { - for (const part of item.content) { - const image = await extractImageContent(part); - if (image) { - images.push(image); - continue; - } - const file = await extractFileContent(part); - if (file) { - fileContents.push(file); + let fileContexts: string[] = []; + try { + if (Array.isArray(payload.input)) { + for (const item of payload.input) { + if (item.type === "message" && typeof item.content !== "string") { + for (const part of item.content) { + const image = await extractImageContent(part, limits); + if (image) { + images.push(image); + continue; + } + const file = await extractFileContent(part, limits); + if (file) { + if (file.text?.trim()) { + fileContexts.push(`\n${file.text}\n`); + } else if (file.images && file.images.length > 0) { + fileContexts.push( + `[PDF content rendered to images]`, + ); + } + if (file.images && file.images.length > 0) { + images = images.concat(file.images); + } + } } } } } + } catch (err) { + sendJson(res, 400, { + error: { message: String(err), type: "invalid_request_error" }, + }); + return true; } const clientTools = extractClientTools(payload); + let toolChoicePrompt: string | undefined; + let resolvedClientTools = clientTools; + try { + const toolChoiceResult = applyToolChoice({ + tools: clientTools, + toolChoice: payload.tool_choice, + }); + resolvedClientTools = toolChoiceResult.tools; + toolChoicePrompt = toolChoiceResult.extraSystemPrompt; + } catch (err) { + sendJson(res, 400, { + error: { message: String(err), type: "invalid_request_error" }, + }); + return true; + } const agentId = resolveAgentIdForRequest({ req, model }); - const sessionKey = resolveSessionKey({ req, agentId, user }); + const sessionKey = resolveOpenResponsesSessionKey({ req, agentId, user }); // Build prompt from input const prompt = buildAgentPrompt(payload.input); - // Append file contents to the message - const fullMessage = - fileContents.length > 0 ? `${prompt.message}\n\n${fileContents.join("\n\n")}` : prompt.message; + const fileContext = fileContexts.length > 0 ? fileContexts.join("\n\n") : undefined; + const toolChoiceContext = toolChoicePrompt?.trim(); - // Handle instructions as extra system prompt - const extraSystemPrompt = [payload.instructions, prompt.extraSystemPrompt] + // Handle instructions + file context as extra system prompt + const extraSystemPrompt = [ + payload.instructions, + prompt.extraSystemPrompt, + toolChoiceContext, + fileContext, + ] .filter(Boolean) .join("\n\n"); - if (!fullMessage) { + if (!prompt.message) { sendJson(res, 400, { error: { message: "Missing user message in `input`.", @@ -486,15 +805,20 @@ export async function handleOpenResponsesHttpRequest( const responseId = `resp_${randomUUID()}`; const outputItemId = `msg_${randomUUID()}`; const deps = createDefaultDeps(); + const streamParams = + typeof payload.max_output_tokens === "number" + ? { maxTokens: payload.max_output_tokens } + : undefined; if (!stream) { try { const result = await agentCommand( { - message: fullMessage, + message: prompt.message, images: images.length > 0 ? images : undefined, - clientTools: clientTools.length > 0 ? clientTools : undefined, + clientTools: resolvedClientTools.length > 0 ? resolvedClientTools : undefined, extraSystemPrompt: extraSystemPrompt || undefined, + streamParams: streamParams ?? undefined, sessionKey, runId: responseId, deliver: false, @@ -506,6 +830,7 @@ export async function handleOpenResponsesHttpRequest( ); const payloads = (result as { payloads?: Array<{ text?: string }> } | null)?.payloads; + const usage = extractUsageFromResult(result); const meta = (result as { meta?: unknown } | null)?.meta; const stopReason = meta && typeof meta === "object" ? (meta as { stopReason?: string }).stopReason : undefined; @@ -518,6 +843,7 @@ export async function handleOpenResponsesHttpRequest( // If agent called a client tool, return function_call instead of text if (stopReason === "tool_calls" && pendingToolCalls && pendingToolCalls.length > 0) { const functionCall = pendingToolCalls[0]; + const functionCallItemId = `call_${randomUUID()}`; const response = createResponseResource({ id: responseId, model, @@ -525,12 +851,13 @@ export async function handleOpenResponsesHttpRequest( output: [ { type: "function_call", - id: functionCall.id, + id: functionCallItemId, call_id: functionCall.id, name: functionCall.name, arguments: functionCall.arguments, }, ], + usage, }); sendJson(res, 200, response); return true; @@ -551,6 +878,7 @@ export async function handleOpenResponsesHttpRequest( output: [ createAssistantOutputItem({ id: outputItemId, text: content, status: "completed" }), ], + usage, }); sendJson(res, 200, response); @@ -580,6 +908,65 @@ export async function handleOpenResponsesHttpRequest( let accumulatedText = ""; let sawAssistantDelta = false; let closed = false; + let unsubscribe = () => {}; + let finalUsage: Usage | undefined; + let finalizeRequested: { status: ResponseResource["status"]; text: string } | null = null; + + const maybeFinalize = () => { + if (closed) return; + if (!finalizeRequested) return; + if (!finalUsage) return; + const usage = finalUsage; + + closed = true; + unsubscribe(); + + writeSseEvent(res, { + type: "response.output_text.done", + item_id: outputItemId, + output_index: 0, + content_index: 0, + text: finalizeRequested.text, + }); + + writeSseEvent(res, { + type: "response.content_part.done", + item_id: outputItemId, + output_index: 0, + content_index: 0, + part: { type: "output_text", text: finalizeRequested.text }, + }); + + const completedItem = createAssistantOutputItem({ + id: outputItemId, + text: finalizeRequested.text, + status: "completed", + }); + + writeSseEvent(res, { + type: "response.output_item.done", + output_index: 0, + item: completedItem, + }); + + const finalResponse = createResponseResource({ + id: responseId, + model, + status: finalizeRequested.status, + output: [completedItem], + usage, + }); + + writeSseEvent(res, { type: "response.completed", response: finalResponse }); + writeDone(res); + res.end(); + }; + + const requestFinalize = (status: ResponseResource["status"], text: string) => { + if (finalizeRequested) return; + finalizeRequested = { status, text }; + maybeFinalize(); + }; // Send initial events const initialResponse = createResponseResource({ @@ -590,6 +977,7 @@ export async function handleOpenResponsesHttpRequest( }); writeSseEvent(res, { type: "response.created", response: initialResponse }); + writeSseEvent(res, { type: "response.in_progress", response: initialResponse }); // Add output item const outputItem = createAssistantOutputItem({ @@ -613,7 +1001,7 @@ export async function handleOpenResponsesHttpRequest( part: { type: "output_text", text: "" }, }); - const unsubscribe = onAgentEvent((evt) => { + unsubscribe = onAgentEvent((evt) => { if (evt.runId !== responseId) return; if (closed) return; @@ -639,51 +1027,9 @@ export async function handleOpenResponsesHttpRequest( if (evt.stream === "lifecycle") { const phase = evt.data?.phase; if (phase === "end" || phase === "error") { - closed = true; - unsubscribe(); - - // Complete the stream with final events const finalText = accumulatedText || "No response from Clawdbot."; const finalStatus = phase === "error" ? "failed" : "completed"; - - writeSseEvent(res, { - type: "response.output_text.done", - item_id: outputItemId, - output_index: 0, - content_index: 0, - text: finalText, - }); - - writeSseEvent(res, { - type: "response.content_part.done", - item_id: outputItemId, - output_index: 0, - content_index: 0, - part: { type: "output_text", text: finalText }, - }); - - const completedItem = createAssistantOutputItem({ - id: outputItemId, - text: finalText, - status: "completed", - }); - - writeSseEvent(res, { - type: "response.output_item.done", - output_index: 0, - item: completedItem, - }); - - const finalResponse = createResponseResource({ - id: responseId, - model, - status: finalStatus, - output: [completedItem], - }); - - writeSseEvent(res, { type: "response.completed", response: finalResponse }); - writeDone(res); - res.end(); + requestFinalize(finalStatus, finalText); } } }); @@ -697,10 +1043,11 @@ export async function handleOpenResponsesHttpRequest( try { const result = await agentCommand( { - message: fullMessage, + message: prompt.message, images: images.length > 0 ? images : undefined, - clientTools: clientTools.length > 0 ? clientTools : undefined, + clientTools: resolvedClientTools.length > 0 ? resolvedClientTools : undefined, extraSystemPrompt: extraSystemPrompt || undefined, + streamParams: streamParams ?? undefined, sessionKey, runId: responseId, deliver: false, @@ -711,6 +1058,9 @@ export async function handleOpenResponsesHttpRequest( deps, ); + finalUsage = extractUsageFromResult(result); + maybeFinalize(); + if (closed) return; // Fallback: if no streaming deltas were received, send the full response @@ -734,7 +1084,8 @@ export async function handleOpenResponsesHttpRequest( // If agent called a client tool, emit function_call instead of text if (stopReason === "tool_calls" && pendingToolCalls && pendingToolCalls.length > 0) { const functionCall = pendingToolCalls[0]; - // Complete the text content part + const usage = finalUsage ?? createEmptyUsage(); + writeSseEvent(res, { type: "response.output_text.done", item_id: outputItemId, @@ -750,7 +1101,6 @@ export async function handleOpenResponsesHttpRequest( part: { type: "output_text", text: "" }, }); - // Complete the message item const completedItem = createAssistantOutputItem({ id: outputItemId, text: "", @@ -762,7 +1112,6 @@ export async function handleOpenResponsesHttpRequest( item: completedItem, }); - // Send function_call item const functionCallItemId = `call_${randomUUID()}`; const functionCallItem = { type: "function_call" as const, @@ -781,18 +1130,16 @@ export async function handleOpenResponsesHttpRequest( output_index: 1, item: { ...functionCallItem, status: "completed" as const }, }); - writeSseEvent(res, { - type: "response.output_item.done", - output_index: 1, - item: { ...functionCallItem, status: "completed" as const }, - }); const incompleteResponse = createResponseResource({ id: responseId, model, status: "incomplete", output: [completedItem, functionCallItem], + usage, }); + closed = true; + unsubscribe(); writeSseEvent(res, { type: "response.completed", response: incompleteResponse }); writeDone(res); res.end(); @@ -821,12 +1168,14 @@ export async function handleOpenResponsesHttpRequest( } catch (err) { if (closed) return; + finalUsage = finalUsage ?? createEmptyUsage(); const errorResponse = createResponseResource({ id: responseId, model, status: "failed", output: [], error: { code: "api_error", message: String(err) }, + usage: finalUsage, }); writeSseEvent(res, { type: "response.failed", response: errorResponse }); diff --git a/src/gateway/server-http.ts b/src/gateway/server-http.ts index cdf54a00c..8638f0823 100644 --- a/src/gateway/server-http.ts +++ b/src/gateway/server-http.ts @@ -194,6 +194,7 @@ export function createGatewayHttpServer(opts: { controlUiBasePath: string; openAiChatCompletionsEnabled: boolean; openResponsesEnabled: boolean; + openResponsesConfig?: import("../config/types.gateway.js").GatewayHttpResponsesConfig; handleHooksRequest: HooksRequestHandler; handlePluginRequest?: HooksRequestHandler; resolvedAuth: import("./auth.js").ResolvedGatewayAuth; @@ -205,6 +206,7 @@ export function createGatewayHttpServer(opts: { controlUiBasePath, openAiChatCompletionsEnabled, openResponsesEnabled, + openResponsesConfig, handleHooksRequest, handlePluginRequest, resolvedAuth, @@ -226,7 +228,13 @@ export function createGatewayHttpServer(opts: { if (await handleSlackHttpRequest(req, res)) return; if (handlePluginRequest && (await handlePluginRequest(req, res))) return; if (openResponsesEnabled) { - if (await handleOpenResponsesHttpRequest(req, res, { auth: resolvedAuth })) return; + if ( + await handleOpenResponsesHttpRequest(req, res, { + auth: resolvedAuth, + config: openResponsesConfig, + }) + ) + return; } if (openAiChatCompletionsEnabled) { if (await handleOpenAiHttpRequest(req, res, { auth: resolvedAuth })) return; diff --git a/src/gateway/server-runtime-config.ts b/src/gateway/server-runtime-config.ts index 02d603803..c8b4a1721 100644 --- a/src/gateway/server-runtime-config.ts +++ b/src/gateway/server-runtime-config.ts @@ -18,6 +18,7 @@ export type GatewayRuntimeConfig = { controlUiEnabled: boolean; openAiChatCompletionsEnabled: boolean; openResponsesEnabled: boolean; + openResponsesConfig?: import("../config/types.gateway.js").GatewayHttpResponsesConfig; controlUiBasePath: string; resolvedAuth: ResolvedGatewayAuth; authMode: ResolvedGatewayAuth["mode"]; @@ -47,8 +48,8 @@ export async function resolveGatewayRuntimeConfig(params: { params.openAiChatCompletionsEnabled ?? params.cfg.gateway?.http?.endpoints?.chatCompletions?.enabled ?? false; - const openResponsesEnabled = - params.openResponsesEnabled ?? params.cfg.gateway?.http?.endpoints?.responses?.enabled ?? false; + const openResponsesConfig = params.cfg.gateway?.http?.endpoints?.responses; + const openResponsesEnabled = params.openResponsesEnabled ?? openResponsesConfig?.enabled ?? false; const controlUiBasePath = normalizeControlUiBasePath(params.cfg.gateway?.controlUi?.basePath); const authBase = params.cfg.gateway?.auth ?? {}; const authOverrides = params.auth ?? {}; @@ -93,6 +94,9 @@ export async function resolveGatewayRuntimeConfig(params: { controlUiEnabled, openAiChatCompletionsEnabled, openResponsesEnabled, + openResponsesConfig: openResponsesConfig + ? { ...openResponsesConfig, enabled: openResponsesEnabled } + : undefined, controlUiBasePath, resolvedAuth, authMode, diff --git a/src/gateway/server-runtime-state.ts b/src/gateway/server-runtime-state.ts index 3aeb1cbf1..788467518 100644 --- a/src/gateway/server-runtime-state.ts +++ b/src/gateway/server-runtime-state.ts @@ -28,6 +28,7 @@ export async function createGatewayRuntimeState(params: { controlUiBasePath: string; openAiChatCompletionsEnabled: boolean; openResponsesEnabled: boolean; + openResponsesConfig?: import("../config/types.gateway.js").GatewayHttpResponsesConfig; resolvedAuth: ResolvedGatewayAuth; gatewayTls?: GatewayTlsRuntime; hooksConfig: () => HooksConfigResolved | null; @@ -105,6 +106,7 @@ export async function createGatewayRuntimeState(params: { controlUiBasePath: params.controlUiBasePath, openAiChatCompletionsEnabled: params.openAiChatCompletionsEnabled, openResponsesEnabled: params.openResponsesEnabled, + openResponsesConfig: params.openResponsesConfig, handleHooksRequest, handlePluginRequest, resolvedAuth: params.resolvedAuth, diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index dd5d337ff..9572b5a32 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -219,6 +219,7 @@ export async function startGatewayServer( controlUiEnabled, openAiChatCompletionsEnabled, openResponsesEnabled, + openResponsesConfig, controlUiBasePath, resolvedAuth, tailscaleConfig, @@ -258,6 +259,7 @@ export async function startGatewayServer( controlUiBasePath, openAiChatCompletionsEnabled, openResponsesEnabled, + openResponsesConfig, resolvedAuth, gatewayTls, hooksConfig: () => hooksConfig,