fix: expand /v1/responses inputs (#1229) (thanks @RyanLisse)

2026-01-20 07:35:29 +00:00
parent 4f02c74dca
commit bbc67f3754
24 changed files with 1350 additions and 275 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ Docs: https://docs.clawd.bot
 ### Changes
 - Android: remove legacy bridge transport code now that nodes use the gateway protocol.
 - Android: send structured payloads in node events/invokes and include user-agent metadata in gateway connects.
+- Gateway: expand `/v1/responses` to support file/image inputs, tool_choice, usage, and output limits. (#1229) — thanks @RyanLisse.
 - Docs: surface Amazon Bedrock in provider lists and clarify Bedrock auth env vars. (#1289) — thanks @steipete.

 ### Fixes
--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@@ -2669,6 +2669,7 @@ Notes:
 - `clawdbot gateway` refuses to start unless `gateway.mode` is set to `local` (or you pass the override flag).
 - `gateway.port` controls the single multiplexed port used for WebSocket + HTTP (control UI, hooks, A2UI).
 - OpenAI Chat Completions endpoint: **disabled by default**; enable with `gateway.http.endpoints.chatCompletions.enabled: true`.
+- OpenResponses endpoint: **disabled by default**; enable with `gateway.http.endpoints.responses.enabled: true`.
 - Precedence: `--port` > `CLAWDBOT_GATEWAY_PORT` > `gateway.port` > default `18789`.
 - Non-loopback binds (`lan`/`tailnet`/`auto`) require auth. Use `gateway.auth.token` (or `CLAWDBOT_GATEWAY_TOKEN`).
 - The onboarding wizard generates a gateway token by default (even on loopback).
--- a/docs/gateway/index.md
+++ b/docs/gateway/index.md
@@ -29,6 +29,7 @@ pnpm gateway:watch
 - Binds WebSocket control plane to `127.0.0.1:<port>` (default 18789).
 - The same port also serves HTTP (control UI, hooks, A2UI). Single-port multiplex.
  - OpenAI Chat Completions (HTTP): [`/v1/chat/completions`](/gateway/openai-http-api).
+  - OpenResponses (HTTP): [`/v1/responses`](/gateway/openresponses-http-api).
 - Starts a Canvas file server by default on `canvasHost.port` (default `18793`), serving `http://<gateway-host>:18793/__clawdbot__/canvas/` from `~/clawd/canvas`. Disable with `canvasHost.enabled=false` or `CLAWDBOT_SKIP_CANVAS_HOST=1`.
 - Logs to stdout; use launchd/systemd to keep it alive and rotate logs.
 - Pass `--verbose` to mirror debug logging (handshakes, req/res, events) from the log file into stdio when troubleshooting.
--- a/docs/gateway/openresponses-http-api.md
+++ b/docs/gateway/openresponses-http-api.md
@@ -0,0 +1,277 @@
+---
+summary: "Expose an OpenResponses-compatible /v1/responses HTTP endpoint from the Gateway"
+read_when:
+  - Integrating clients that speak the OpenResponses API
+  - You want item-based inputs, client tool calls, or SSE events
+---
+# OpenResponses API (HTTP)
+
+Clawdbot’s Gateway can serve an OpenResponses-compatible `POST /v1/responses` endpoint.
+
+This endpoint is **disabled by default**. Enable it in config first.
+
+- `POST /v1/responses`
+- Same port as the Gateway (WS + HTTP multiplex): `http://<gateway-host>:<port>/v1/responses`
+
+Under the hood, requests are executed as a normal Gateway agent run (same codepath as
+`clawdbot agent`), so routing/permissions/config match your Gateway.
+
+## Authentication
+
+Uses the Gateway auth configuration. Send a bearer token:
+
+- `Authorization: Bearer <token>`
+
+Notes:
+- When `gateway.auth.mode="token"`, use `gateway.auth.token` (or `CLAWDBOT_GATEWAY_TOKEN`).
+- When `gateway.auth.mode="password"`, use `gateway.auth.password` (or `CLAWDBOT_GATEWAY_PASSWORD`).
+
+## Choosing an agent
+
+No custom headers required: encode the agent id in the OpenResponses `model` field:
+
+- `model: "clawdbot:<agentId>"` (example: `"clawdbot:main"`, `"clawdbot:beta"`)
+- `model: "agent:<agentId>"` (alias)
+
+Or target a specific Clawdbot agent by header:
+
+- `x-clawdbot-agent-id: <agentId>` (default: `main`)
+
+Advanced:
+- `x-clawdbot-session-key: <sessionKey>` to fully control session routing.
+
+## Enabling the endpoint
+
+Set `gateway.http.endpoints.responses.enabled` to `true`:
+
+```json5
+{
+  gateway: {
+    http: {
+      endpoints: {
+        responses: { enabled: true }
+      }
+    }
+  }
+}
+```
+
+## Disabling the endpoint
+
+Set `gateway.http.endpoints.responses.enabled` to `false`:
+
+```json5
+{
+  gateway: {
+    http: {
+      endpoints: {
+        responses: { enabled: false }
+      }
+    }
+  }
+}
+```
+
+## Session behavior
+
+By default the endpoint is **stateless per request** (a new session key is generated each call).
+
+If the request includes an OpenResponses `user` string, the Gateway derives a stable session key
+from it, so repeated calls can share an agent session.
+
+## Request shape (supported)
+
+The request follows the OpenResponses API with item-based input. Current support:
+
+- `input`: string or array of item objects.
+- `instructions`: merged into the system prompt.
+- `tools`: client tool definitions (function tools).
+- `tool_choice`: filter or require client tools.
+- `stream`: enables SSE streaming.
+- `max_output_tokens`: best-effort output limit (provider dependent).
+- `user`: stable session routing.
+
+Accepted but **currently ignored**:
+
+- `max_tool_calls`
+- `reasoning`
+- `metadata`
+- `store`
+- `previous_response_id`
+- `truncation`
+
+## Items (input)
+
+### `message`
+Roles: `system`, `developer`, `user`, `assistant`.
+
+- `system` and `developer` are appended to the system prompt.
+- The most recent `user` or `function_call_output` item becomes the “current message.”
+- Earlier user/assistant messages are included as history for context.
+
+### `function_call_output` (turn-based tools)
+
+Send tool results back to the model:
+
+```json
+{
+  "type": "function_call_output",
+  "call_id": "call_123",
+  "output": "{\"temperature\": \"72F\"}"
+}
+```
+
+### `reasoning` and `item_reference`
+
+Accepted for schema compatibility but ignored when building the prompt.
+
+## Tools (client-side function tools)
+
+Provide tools with `tools: [{ type: "function", function: { name, description?, parameters? } }]`.
+
+If the agent decides to call a tool, the response returns a `function_call` output item.
+You then send a follow-up request with `function_call_output` to continue the turn.
+
+## Images (`input_image`)
+
+Supports base64 or URL sources:
+
+```json
+{
+  "type": "input_image",
+  "source": { "type": "url", "url": "https://example.com/image.png" }
+}
+```
+
+Allowed MIME types (current): `image/jpeg`, `image/png`, `image/gif`, `image/webp`.
+Max size (current): 10MB.
+
+## Files (`input_file`)
+
+Supports base64 or URL sources:
+
+```json
+{
+  "type": "input_file",
+  "source": {
+    "type": "base64",
+    "media_type": "text/plain",
+    "data": "SGVsbG8gV29ybGQh",
+    "filename": "hello.txt"
+  }
+}
+```
+
+Allowed MIME types (current): `text/plain`, `text/markdown`, `text/html`, `text/csv`,
+`application/json`, `application/pdf`.
+
+Max size (current): 5MB.
+
+Current behavior:
+- File content is decoded and added to the **system prompt**, not the user message,
+  so it stays ephemeral (not persisted in session history).
+- PDFs are parsed for text. If little text is found, the first pages are rasterized
+  into images and passed to the model.
+
+## File + image limits (config)
+
+Defaults can be tuned under `gateway.http.endpoints.responses`:
+
+```json5
+{
+  gateway: {
+    http: {
+      endpoints: {
+        responses: {
+          enabled: true,
+          maxBodyBytes: 20000000,
+          files: {
+            allowUrl: true,
+            allowedMimes: ["text/plain", "text/markdown", "text/html", "text/csv", "application/json", "application/pdf"],
+            maxBytes: 5242880,
+            maxChars: 200000,
+            maxRedirects: 3,
+            timeoutMs: 10000,
+            pdf: {
+              maxPages: 4,
+              maxPixels: 4000000,
+              minTextChars: 200
+            }
+          },
+          images: {
+            allowUrl: true,
+            allowedMimes: ["image/jpeg", "image/png", "image/gif", "image/webp"],
+            maxBytes: 10485760,
+            maxRedirects: 3,
+            timeoutMs: 10000
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+## Streaming (SSE)
+
+Set `stream: true` to receive Server-Sent Events (SSE):
+
+- `Content-Type: text/event-stream`
+- Each event line is `event: <type>` and `data: <json>`
+- Stream ends with `data: [DONE]`
+
+Event types currently emitted:
+- `response.created`
+- `response.in_progress`
+- `response.output_item.added`
+- `response.content_part.added`
+- `response.output_text.delta`
+- `response.output_text.done`
+- `response.content_part.done`
+- `response.output_item.done`
+- `response.completed`
+- `response.failed` (on error)
+
+## Usage
+
+`usage` is populated when the underlying provider reports token counts.
+
+## Errors
+
+Errors use a JSON object like:
+
+```json
+{ "error": { "message": "...", "type": "invalid_request_error" } }
+```
+
+Common cases:
+- `401` missing/invalid auth
+- `400` invalid request body
+- `405` wrong method
+
+## Examples
+
+Non-streaming:
+```bash
+curl -sS http://127.0.0.1:18789/v1/responses \
+  -H 'Authorization: Bearer YOUR_TOKEN' \
+  -H 'Content-Type: application/json' \
+  -H 'x-clawdbot-agent-id: main' \
+  -d '{
+    "model": "clawdbot",
+    "input": "hi"
+  }'
+```
+
+Streaming:
+```bash
+curl -N http://127.0.0.1:18789/v1/responses \
+  -H 'Authorization: Bearer YOUR_TOKEN' \
+  -H 'Content-Type: application/json' \
+  -H 'x-clawdbot-agent-id: main' \
+  -d '{
+    "model": "clawdbot",
+    "stream": true,
+    "input": "hi"
+  }'
+```
--- a/package.json
+++ b/package.json
@@ -155,6 +155,7 @@
    "@mariozechner/pi-coding-agent": "^0.46.0",
    "@mariozechner/pi-tui": "^0.46.0",
    "@mozilla/readability": "^0.6.0",
+    "@napi-rs/canvas": "^0.1.88",
    "@sinclair/typebox": "0.34.47",
    "@slack/bolt": "^4.6.0",
    "@slack/web-api": "^7.13.0",
@@ -181,6 +182,7 @@
    "long": "5.3.2",
    "markdown-it": "^14.1.0",
    "osc-progress": "^0.2.0",
+    "pdfjs-dist": "^5.4.530",
    "playwright-core": "1.57.0",
    "proper-lockfile": "^4.1.2",
    "qrcode-terminal": "^0.12.0",
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -49,6 +49,9 @@ importers:
      '@mozilla/readability':
        specifier: ^0.6.0
        version: 0.6.0
+      '@napi-rs/canvas':
+        specifier: ^0.1.88
+        version: 0.1.88
      '@sinclair/typebox':
        specifier: 0.34.47
        version: 0.34.47
@@ -127,6 +130,9 @@ importers:
      osc-progress:
        specifier: ^0.2.0
        version: 0.2.0
+      pdfjs-dist:
+        specifier: ^5.4.530
+        version: 5.4.530
      playwright-core:
        specifier: 1.57.0
        version: 1.57.0
@@ -1205,6 +1211,76 @@ packages:
    resolution: {integrity: sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ==}
    engines: {node: '>=14.0.0'}

+  '@napi-rs/canvas-android-arm64@0.1.88':
+    resolution: {integrity: sha512-KEaClPnZuVxJ8smUWjV1wWFkByBO/D+vy4lN+Dm5DFH514oqwukxKGeck9xcKJhaWJGjfruGmYGiwRe//+/zQQ==}
+    engines: {node: '>= 10'}
+    cpu: [arm64]
+    os: [android]
+
+  '@napi-rs/canvas-darwin-arm64@0.1.88':
+    resolution: {integrity: sha512-Xgywz0dDxOKSgx3eZnK85WgGMmGrQEW7ZLA/E7raZdlEE+xXCozobgqz2ZvYigpB6DJFYkqnwHjqCOTSDGlFdg==}
+    engines: {node: '>= 10'}
+    cpu: [arm64]
+    os: [darwin]
+
+  '@napi-rs/canvas-darwin-x64@0.1.88':
+    resolution: {integrity: sha512-Yz4wSCIQOUgNucgk+8NFtQxQxZV5NO8VKRl9ePKE6XoNyNVC8JDqtvhh3b3TPqKK8W5p2EQpAr1rjjm0mfBxdg==}
+    engines: {node: '>= 10'}
+    cpu: [x64]
+    os: [darwin]
+
+  '@napi-rs/canvas-linux-arm-gnueabihf@0.1.88':
+    resolution: {integrity: sha512-9gQM2SlTo76hYhxHi2XxWTAqpTOb+JtxMPEIr+H5nAhHhyEtNmTSDRtz93SP7mGd2G3Ojf2oF5tP9OdgtgXyKg==}
+    engines: {node: '>= 10'}
+    cpu: [arm]
+    os: [linux]
+
+  '@napi-rs/canvas-linux-arm64-gnu@0.1.88':
+    resolution: {integrity: sha512-7qgaOBMXuVRk9Fzztzr3BchQKXDxGbY+nwsovD3I/Sx81e+sX0ReEDYHTItNb0Je4NHbAl7D0MKyd4SvUc04sg==}
+    engines: {node: '>= 10'}
+    cpu: [arm64]
+    os: [linux]
+
+  '@napi-rs/canvas-linux-arm64-musl@0.1.88':
+    resolution: {integrity: sha512-kYyNrUsHLkoGHBc77u4Unh067GrfiCUMbGHC2+OTxbeWfZkPt2o32UOQkhnSswKd9Fko/wSqqGkY956bIUzruA==}
+    engines: {node: '>= 10'}
+    cpu: [arm64]
+    os: [linux]
+
+  '@napi-rs/canvas-linux-riscv64-gnu@0.1.88':
+    resolution: {integrity: sha512-HVuH7QgzB0yavYdNZDRyAsn/ejoXB0hn8twwFnOqUbCCdkV+REna7RXjSR7+PdfW0qMQ2YYWsLvVBT5iL/mGpw==}
+    engines: {node: '>= 10'}
+    cpu: [riscv64]
+    os: [linux]
+
+  '@napi-rs/canvas-linux-x64-gnu@0.1.88':
+    resolution: {integrity: sha512-hvcvKIcPEQrvvJtJnwD35B3qk6umFJ8dFIr8bSymfrSMem0EQsfn1ztys8ETIFndTwdNWJKWluvxztA41ivsEw==}
+    engines: {node: '>= 10'}
+    cpu: [x64]
+    os: [linux]
+
+  '@napi-rs/canvas-linux-x64-musl@0.1.88':
+    resolution: {integrity: sha512-eSMpGYY2xnZSQ6UxYJ6plDboxq4KeJ4zT5HaVkUnbObNN6DlbJe0Mclh3wifAmquXfrlgTZt6zhHsUgz++AK6g==}
+    engines: {node: '>= 10'}
+    cpu: [x64]
+    os: [linux]
+
+  '@napi-rs/canvas-win32-arm64-msvc@0.1.88':
+    resolution: {integrity: sha512-qcIFfEgHrchyYqRrxsCeTQgpJZ/GqHiqPcU/Fvw/ARVlQeDX1VyFH+X+0gCR2tca6UJrq96vnW+5o7buCq+erA==}
+    engines: {node: '>= 10'}
+    cpu: [arm64]
+    os: [win32]
+
+  '@napi-rs/canvas-win32-x64-msvc@0.1.88':
+    resolution: {integrity: sha512-ROVqbfS4QyZxYkqmaIBBpbz/BQvAR+05FXM5PAtTYVc0uyY8Y4BHJSMdGAaMf6TdIVRsQsiq+FG/dH9XhvWCFQ==}
+    engines: {node: '>= 10'}
+    cpu: [x64]
+    os: [win32]
+
+  '@napi-rs/canvas@0.1.88':
+    resolution: {integrity: sha512-/p08f93LEbsL5mDZFQ3DBxcPv/I4QG9EDYRRq1WNlCOXVfAHBTHMSVMwxlqG/AtnSfUr9+vgfN7MKiyDo0+Weg==}
+    engines: {node: '>= 10'}
+
  '@napi-rs/wasm-runtime@1.1.1':
    resolution: {integrity: sha512-p64ah1M1ld8xjWv3qbvFwHiFVWrq1yFvV4f7w+mzaqiR4IlSgkqhcRdHwsGgomwzBH51sRY4NEowLxnaBjcW/A==}

@@ -3756,6 +3832,10 @@ packages:
  pathe@2.0.3:
    resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==}

+  pdfjs-dist@5.4.530:
+    resolution: {integrity: sha512-r1hWsSIGGmyYUAHR26zSXkxYWLXLMd6AwqcaFYG9YUZ0GBf5GvcjJSeo512tabM4GYFhxhl5pMCmPr7Q72Rq2Q==}
+    engines: {node: '>=20.16.0 || >=22.3.0'}
+
  picocolors@1.1.1:
    resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==}

@@ -5671,6 +5751,53 @@ snapshots:

  '@mozilla/readability@0.6.0': {}

+  '@napi-rs/canvas-android-arm64@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas-darwin-arm64@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas-darwin-x64@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas-linux-arm-gnueabihf@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas-linux-arm64-gnu@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas-linux-arm64-musl@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas-linux-riscv64-gnu@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas-linux-x64-gnu@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas-linux-x64-musl@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas-win32-arm64-msvc@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas-win32-x64-msvc@0.1.88':
+    optional: true
+
+  '@napi-rs/canvas@0.1.88':
+    optionalDependencies:
+      '@napi-rs/canvas-android-arm64': 0.1.88
+      '@napi-rs/canvas-darwin-arm64': 0.1.88
+      '@napi-rs/canvas-darwin-x64': 0.1.88
+      '@napi-rs/canvas-linux-arm-gnueabihf': 0.1.88
+      '@napi-rs/canvas-linux-arm64-gnu': 0.1.88
+      '@napi-rs/canvas-linux-arm64-musl': 0.1.88
+      '@napi-rs/canvas-linux-riscv64-gnu': 0.1.88
+      '@napi-rs/canvas-linux-x64-gnu': 0.1.88
+      '@napi-rs/canvas-linux-x64-musl': 0.1.88
+      '@napi-rs/canvas-win32-arm64-msvc': 0.1.88
+      '@napi-rs/canvas-win32-x64-msvc': 0.1.88
+
  '@napi-rs/wasm-runtime@1.1.1':
    dependencies:
      '@emnapi/core': 1.8.1
@@ -8462,6 +8589,10 @@ snapshots:

  pathe@2.0.3: {}

+  pdfjs-dist@5.4.530:
+    optionalDependencies:
+      '@napi-rs/canvas': 0.1.88
+
  picocolors@1.1.1: {}

  picomatch@2.3.1: {}
--- a/src/agents/cli-runner.ts
+++ b/src/agents/cli-runner.ts
@@ -45,6 +45,7 @@ export async function runCliAgent(params: {
  timeoutMs: number;
  runId: string;
  extraSystemPrompt?: string;
+  streamParams?: import("../commands/agent/types.js").AgentStreamParams;
  ownerNumbers?: string[];
  cliSessionId?: string;
  images?: ImageContent[];
--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@@ -63,13 +63,21 @@ export function applyExtraParamsToAgent(
  cfg: ClawdbotConfig | undefined,
  provider: string,
  modelId: string,
+  extraParamsOverride?: Record<string, unknown>,
 ): void {
  const extraParams = resolveExtraParams({
    cfg,
    provider,
    modelId,
  });
-  const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, extraParams);
+  const override =
+    extraParamsOverride && Object.keys(extraParamsOverride).length > 0
+      ? Object.fromEntries(
+          Object.entries(extraParamsOverride).filter(([, value]) => value !== undefined),
+        )
+      : undefined;
+  const merged = Object.assign({}, extraParams, override);
+  const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, merged);

  if (wrappedStreamFn) {
    log.debug(`applying extraParams to agent streamFn for ${provider}/${modelId}`);
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -239,6 +239,7 @@ export async function runEmbeddedPiAgent(
            onToolResult: params.onToolResult,
            onAgentEvent: params.onAgentEvent,
            extraSystemPrompt: params.extraSystemPrompt,
+            streamParams: params.streamParams,
            ownerNumbers: params.ownerNumbers,
            enforceFinalTag: params.enforceFinalTag,
          });
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -349,7 +349,13 @@ export async function runEmbeddedAttempt(
      // Force a stable streamFn reference so vitest can reliably mock @mariozechner/pi-ai.
      activeSession.agent.streamFn = streamSimple;

-      applyExtraParamsToAgent(activeSession.agent, params.config, params.provider, params.modelId);
+      applyExtraParamsToAgent(
+        activeSession.agent,
+        params.config,
+        params.provider,
+        params.modelId,
+        params.streamParams,
+      );

      try {
        const prior = await sanitizeSessionHistory({
--- a/src/agents/pi-embedded-runner/run/params.ts
+++ b/src/agents/pi-embedded-runner/run/params.ts
@@ -1,6 +1,7 @@
 import type { ImageContent } from "@mariozechner/pi-ai";
 import type { ReasoningLevel, ThinkLevel, VerboseLevel } from "../../../auto-reply/thinking.js";
 import type { ClawdbotConfig } from "../../../config/config.js";
+import type { AgentStreamParams } from "../../../commands/agent/types.js";
 import type { enqueueCommand } from "../../../process/command-queue.js";
 import type { ExecElevatedDefaults, ExecToolDefaults } from "../../bash-tools.js";
 import type { BlockReplyChunking, ToolResultFormat } from "../../pi-embedded-subscribe.js";
@@ -70,6 +71,7 @@ export type RunEmbeddedPiAgentParams = {
  lane?: string;
  enqueue?: typeof enqueueCommand;
  extraSystemPrompt?: string;
+  streamParams?: AgentStreamParams;
  ownerNumbers?: string[];
  enforceFinalTag?: boolean;
 };
--- a/src/agents/pi-embedded-runner/run/types.ts
+++ b/src/agents/pi-embedded-runner/run/types.ts
@@ -4,6 +4,7 @@ import type { discoverAuthStorage, discoverModels } from "@mariozechner/pi-codin

 import type { ReasoningLevel, ThinkLevel, VerboseLevel } from "../../../auto-reply/thinking.js";
 import type { ClawdbotConfig } from "../../../config/config.js";
+import type { AgentStreamParams } from "../../../commands/agent/types.js";
 import type { ExecElevatedDefaults, ExecToolDefaults } from "../../bash-tools.js";
 import type { MessagingToolSend } from "../../pi-embedded-messaging.js";
 import type { BlockReplyChunking, ToolResultFormat } from "../../pi-embedded-subscribe.js";
@@ -63,6 +64,7 @@ export type EmbeddedRunAttemptParams = {
  onToolResult?: (payload: { text?: string; mediaUrls?: string[] }) => void | Promise<void>;
  onAgentEvent?: (evt: { stream: string; data: Record<string, unknown> }) => void;
  extraSystemPrompt?: string;
+  streamParams?: AgentStreamParams;
  ownerNumbers?: string[];
  enforceFinalTag?: boolean;
 };
--- a/src/commands/agent.ts
+++ b/src/commands/agent.ts
@@ -396,6 +396,7 @@ export async function agentCommand(
              extraSystemPrompt: opts.extraSystemPrompt,
              cliSessionId,
              images: opts.images,
+              streamParams: opts.streamParams,
            });
          }
          const authProfileId =
@@ -429,6 +430,7 @@ export async function agentCommand(
            lane: opts.lane,
            abortSignal: opts.abortSignal,
            extraSystemPrompt: opts.extraSystemPrompt,
+            streamParams: opts.streamParams,
            agentDir,
            onAgentEvent: (evt) => {
              if (
--- a/src/commands/agent/types.ts
+++ b/src/commands/agent/types.ts
@@ -8,6 +8,12 @@ export type ImageContent = {
  mimeType: string;
 };

+export type AgentStreamParams = {
+  /** Provider stream params override (best-effort). */
+  temperature?: number;
+  maxTokens?: number;
+};
+
 export type AgentRunContext = {
  messageChannel?: string;
  accountId?: string;
@@ -53,4 +59,6 @@ export type AgentCommandOpts = {
  lane?: string;
  runId?: string;
  extraSystemPrompt?: string;
+  /** Per-call stream param overrides (best-effort). */
+  streamParams?: AgentStreamParams;
 };
--- a/src/config/types.gateway.ts
+++ b/src/config/types.gateway.ts
@@ -111,6 +111,54 @@ export type GatewayHttpResponsesConfig = {
   * Default: false when absent.
   */
  enabled?: boolean;
+  /**
+   * Max request body size in bytes for `/v1/responses`.
+   * Default: 20MB.
+   */
+  maxBodyBytes?: number;
+  /** File inputs (input_file). */
+  files?: GatewayHttpResponsesFilesConfig;
+  /** Image inputs (input_image). */
+  images?: GatewayHttpResponsesImagesConfig;
+};
+
+export type GatewayHttpResponsesFilesConfig = {
+  /** Allow URL fetches for input_file. Default: true. */
+  allowUrl?: boolean;
+  /** Allowed MIME types (case-insensitive). */
+  allowedMimes?: string[];
+  /** Max bytes per file. Default: 5MB. */
+  maxBytes?: number;
+  /** Max decoded characters per file. Default: 200k. */
+  maxChars?: number;
+  /** Max redirects when fetching a URL. Default: 3. */
+  maxRedirects?: number;
+  /** Fetch timeout in ms. Default: 10s. */
+  timeoutMs?: number;
+  /** PDF handling (application/pdf). */
+  pdf?: GatewayHttpResponsesPdfConfig;
+};
+
+export type GatewayHttpResponsesPdfConfig = {
+  /** Max pages to parse/render. Default: 4. */
+  maxPages?: number;
+  /** Max pixels per rendered page. Default: 4M. */
+  maxPixels?: number;
+  /** Minimum extracted text length to skip rasterization. Default: 200 chars. */
+  minTextChars?: number;
+};
+
+export type GatewayHttpResponsesImagesConfig = {
+  /** Allow URL fetches for input_image. Default: true. */
+  allowUrl?: boolean;
+  /** Allowed MIME types (case-insensitive). */
+  allowedMimes?: string[];
+  /** Max bytes per image. Default: 10MB. */
+  maxBytes?: number;
+  /** Max redirects when fetching a URL. Default: 3. */
+  maxRedirects?: number;
+  /** Fetch timeout in ms. Default: 10s. */
+  timeoutMs?: number;
 };

 export type GatewayHttpEndpointsConfig = {
--- a/src/config/zod-schema.ts
+++ b/src/config/zod-schema.ts
@@ -302,6 +302,36 @@ export const ClawdbotSchema = z
                responses: z
                  .object({
                    enabled: z.boolean().optional(),
+                    maxBodyBytes: z.number().int().positive().optional(),
+                    files: z
+                      .object({
+                        allowUrl: z.boolean().optional(),
+                        allowedMimes: z.array(z.string()).optional(),
+                        maxBytes: z.number().int().positive().optional(),
+                        maxChars: z.number().int().positive().optional(),
+                        maxRedirects: z.number().int().nonnegative().optional(),
+                        timeoutMs: z.number().int().positive().optional(),
+                        pdf: z
+                          .object({
+                            maxPages: z.number().int().positive().optional(),
+                            maxPixels: z.number().int().positive().optional(),
+                            minTextChars: z.number().int().nonnegative().optional(),
+                          })
+                          .strict()
+                          .optional(),
+                      })
+                      .strict()
+                      .optional(),
+                    images: z
+                      .object({
+                        allowUrl: z.boolean().optional(),
+                        allowedMimes: z.array(z.string()).optional(),
+                        maxBytes: z.number().int().positive().optional(),
+                        maxRedirects: z.number().int().nonnegative().optional(),
+                        timeoutMs: z.number().int().positive().optional(),
+                      })
+                      .strict()
+                      .optional(),
                  })
                  .strict()
                  .optional(),
--- a/src/gateway/http-utils.ts
+++ b/src/gateway/http-utils.ts
@@ -0,0 +1,64 @@
+import { randomUUID } from "node:crypto";
+import type { IncomingMessage } from "node:http";
+
+import { buildAgentMainSessionKey, normalizeAgentId } from "../routing/session-key.js";
+
+export function getHeader(req: IncomingMessage, name: string): string | undefined {
+  const raw = req.headers[name.toLowerCase()];
+  if (typeof raw === "string") return raw;
+  if (Array.isArray(raw)) return raw[0];
+  return undefined;
+}
+
+export function getBearerToken(req: IncomingMessage): string | undefined {
+  const raw = getHeader(req, "authorization")?.trim() ?? "";
+  if (!raw.toLowerCase().startsWith("bearer ")) return undefined;
+  const token = raw.slice(7).trim();
+  return token || undefined;
+}
+
+export function resolveAgentIdFromHeader(req: IncomingMessage): string | undefined {
+  const raw =
+    getHeader(req, "x-clawdbot-agent-id")?.trim() ||
+    getHeader(req, "x-clawdbot-agent")?.trim() ||
+    "";
+  if (!raw) return undefined;
+  return normalizeAgentId(raw);
+}
+
+export function resolveAgentIdFromModel(model: string | undefined): string | undefined {
+  const raw = model?.trim();
+  if (!raw) return undefined;
+
+  const m =
+    raw.match(/^clawdbot[:/](?<agentId>[a-z0-9][a-z0-9_-]{0,63})$/i) ??
+    raw.match(/^agent:(?<agentId>[a-z0-9][a-z0-9_-]{0,63})$/i);
+  const agentId = m?.groups?.agentId;
+  if (!agentId) return undefined;
+  return normalizeAgentId(agentId);
+}
+
+export function resolveAgentIdForRequest(params: {
+  req: IncomingMessage;
+  model: string | undefined;
+}): string {
+  const fromHeader = resolveAgentIdFromHeader(params.req);
+  if (fromHeader) return fromHeader;
+
+  const fromModel = resolveAgentIdFromModel(params.model);
+  return fromModel ?? "main";
+}
+
+export function resolveSessionKey(params: {
+  req: IncomingMessage;
+  agentId: string;
+  user?: string | undefined;
+  prefix: string;
+}): string {
+  const explicit = getHeader(params.req, "x-clawdbot-session-key")?.trim();
+  if (explicit) return explicit;
+
+  const user = params.user?.trim();
+  const mainKey = user ? `${params.prefix}-user:${user}` : `${params.prefix}:${randomUUID()}`;
+  return buildAgentMainSessionKey({ agentId: params.agentId, mainKey });
+}
--- a/src/gateway/openai-http.ts
+++ b/src/gateway/openai-http.ts
@@ -5,9 +5,9 @@ import { buildHistoryContextFromEntries, type HistoryEntry } from "../auto-reply
 import { createDefaultDeps } from "../cli/deps.js";
 import { agentCommand } from "../commands/agent.js";
 import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js";
-import { buildAgentMainSessionKey, normalizeAgentId } from "../routing/session-key.js";
 import { defaultRuntime } from "../runtime.js";
 import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js";
+import { getBearerToken, resolveAgentIdForRequest, resolveSessionKey } from "./http-utils.js";
 import { readJsonBody } from "./hooks.js";

 type OpenAiHttpOptions = {
@@ -34,20 +34,6 @@ function sendJson(res: ServerResponse, status: number, body: unknown) {
  res.end(JSON.stringify(body));
 }

-function getHeader(req: IncomingMessage, name: string): string | undefined {
-  const raw = req.headers[name.toLowerCase()];
-  if (typeof raw === "string") return raw;
-  if (Array.isArray(raw)) return raw[0];
-  return undefined;
-}
-
-function getBearerToken(req: IncomingMessage): string | undefined {
-  const raw = getHeader(req, "authorization")?.trim() ?? "";
-  if (!raw.toLowerCase().startsWith("bearer ")) return undefined;
-  const token = raw.slice(7).trim();
-  return token || undefined;
-}
-
 function writeSse(res: ServerResponse, data: unknown) {
  res.write(`data: ${JSON.stringify(data)}\n\n`);
 }
@@ -154,50 +140,12 @@ function buildAgentPrompt(messagesUnknown: unknown): {
  };
 }

-function resolveAgentIdFromHeader(req: IncomingMessage): string | undefined {
-  const raw =
-    getHeader(req, "x-clawdbot-agent-id")?.trim() ||
-    getHeader(req, "x-clawdbot-agent")?.trim() ||
-    "";
-  if (!raw) return undefined;
-  return normalizeAgentId(raw);
-}
-
-function resolveAgentIdFromModel(model: string | undefined): string | undefined {
-  const raw = model?.trim();
-  if (!raw) return undefined;
-
-  const m =
-    raw.match(/^clawdbot[:/](?<agentId>[a-z0-9][a-z0-9_-]{0,63})$/i) ??
-    raw.match(/^agent:(?<agentId>[a-z0-9][a-z0-9_-]{0,63})$/i);
-  const agentId = m?.groups?.agentId;
-  if (!agentId) return undefined;
-  return normalizeAgentId(agentId);
-}
-
-function resolveAgentIdForRequest(params: {
-  req: IncomingMessage;
-  model: string | undefined;
-}): string {
-  const fromHeader = resolveAgentIdFromHeader(params.req);
-  if (fromHeader) return fromHeader;
-
-  const fromModel = resolveAgentIdFromModel(params.model);
-  return fromModel ?? "main";
-}
-
-function resolveSessionKey(params: {
+function resolveOpenAiSessionKey(params: {
  req: IncomingMessage;
  agentId: string;
  user?: string | undefined;
 }): string {
-  const explicit = getHeader(params.req, "x-clawdbot-session-key")?.trim();
-  if (explicit) return explicit;
-
-  // Default: stateless per-request session key, but stable if OpenAI "user" is provided.
-  const user = params.user?.trim();
-  const mainKey = user ? `openai-user:${user}` : `openai:${randomUUID()}`;
-  return buildAgentMainSessionKey({ agentId: params.agentId, mainKey });
+  return resolveSessionKey({ ...params, prefix: "openai" });
 }

 function coerceRequest(val: unknown): OpenAiChatCompletionRequest {
@@ -248,7 +196,7 @@ export async function handleOpenAiHttpRequest(
  const user = typeof payload.user === "string" ? payload.user : undefined;

  const agentId = resolveAgentIdForRequest({ req, model });
-  const sessionKey = resolveSessionKey({ req, agentId, user });
+  const sessionKey = resolveOpenAiSessionKey({ req, agentId, user });
  const prompt = buildAgentPrompt(payload.messages);
  if (!prompt.message) {
    sendJson(res, 400, {
--- a/src/gateway/openresponses-http.e2e.test.ts
+++ b/src/gateway/openresponses-http.e2e.test.ts
@@ -358,6 +358,182 @@ describe("OpenResponses HTTP API (e2e)", () => {
    }
  });

+  it("moves input_file content into extraSystemPrompt", async () => {
+    agentCommand.mockResolvedValueOnce({
+      payloads: [{ text: "ok" }],
+    } as never);
+
+    const port = await getFreePort();
+    const server = await startServer(port);
+    try {
+      const res = await postResponses(port, {
+        model: "clawdbot",
+        input: [
+          {
+            type: "message",
+            role: "user",
+            content: [
+              { type: "input_text", text: "read this" },
+              {
+                type: "input_file",
+                source: {
+                  type: "base64",
+                  media_type: "text/plain",
+                  data: Buffer.from("hello").toString("base64"),
+                  filename: "hello.txt",
+                },
+              },
+            ],
+          },
+        ],
+      });
+      expect(res.status).toBe(200);
+
+      const [opts] = agentCommand.mock.calls[0] ?? [];
+      const message = (opts as { message?: string } | undefined)?.message ?? "";
+      const extraSystemPrompt =
+        (opts as { extraSystemPrompt?: string } | undefined)?.extraSystemPrompt ?? "";
+      expect(message).toBe("read this");
+      expect(extraSystemPrompt).toContain('<file name="hello.txt">');
+    } finally {
+      await server.close({ reason: "test done" });
+    }
+  });
+
+  it("applies tool_choice=none by dropping tools", async () => {
+    agentCommand.mockResolvedValueOnce({
+      payloads: [{ text: "ok" }],
+    } as never);
+
+    const port = await getFreePort();
+    const server = await startServer(port);
+    try {
+      const res = await postResponses(port, {
+        model: "clawdbot",
+        input: "hi",
+        tools: [
+          {
+            type: "function",
+            function: { name: "get_weather", description: "Get weather" },
+          },
+        ],
+        tool_choice: "none",
+      });
+      expect(res.status).toBe(200);
+
+      const [opts] = agentCommand.mock.calls[0] ?? [];
+      expect((opts as { clientTools?: unknown[] } | undefined)?.clientTools).toBeUndefined();
+    } finally {
+      await server.close({ reason: "test done" });
+    }
+  });
+
+  it("applies tool_choice to a specific tool", async () => {
+    agentCommand.mockResolvedValueOnce({
+      payloads: [{ text: "ok" }],
+    } as never);
+
+    const port = await getFreePort();
+    const server = await startServer(port);
+    try {
+      const res = await postResponses(port, {
+        model: "clawdbot",
+        input: "hi",
+        tools: [
+          {
+            type: "function",
+            function: { name: "get_weather", description: "Get weather" },
+          },
+          {
+            type: "function",
+            function: { name: "get_time", description: "Get time" },
+          },
+        ],
+        tool_choice: { type: "function", function: { name: "get_time" } },
+      });
+      expect(res.status).toBe(200);
+
+      const [opts] = agentCommand.mock.calls[0] ?? [];
+      const clientTools =
+        (opts as { clientTools?: Array<{ function?: { name?: string } }> })?.clientTools ?? [];
+      expect(clientTools).toHaveLength(1);
+      expect(clientTools[0]?.function?.name).toBe("get_time");
+    } finally {
+      await server.close({ reason: "test done" });
+    }
+  });
+
+  it("rejects tool_choice that references an unknown tool", async () => {
+    const port = await getFreePort();
+    const server = await startServer(port);
+    try {
+      const res = await postResponses(port, {
+        model: "clawdbot",
+        input: "hi",
+        tools: [
+          {
+            type: "function",
+            function: { name: "get_weather", description: "Get weather" },
+          },
+        ],
+        tool_choice: { type: "function", function: { name: "unknown_tool" } },
+      });
+      expect(res.status).toBe(400);
+    } finally {
+      await server.close({ reason: "test done" });
+    }
+  });
+
+  it("passes max_output_tokens through to the agent stream params", async () => {
+    agentCommand.mockResolvedValueOnce({
+      payloads: [{ text: "ok" }],
+    } as never);
+
+    const port = await getFreePort();
+    const server = await startServer(port);
+    try {
+      const res = await postResponses(port, {
+        model: "clawdbot",
+        input: "hi",
+        max_output_tokens: 123,
+      });
+      expect(res.status).toBe(200);
+
+      const [opts] = agentCommand.mock.calls[0] ?? [];
+      expect(
+        (opts as { streamParams?: { maxTokens?: number } } | undefined)?.streamParams?.maxTokens,
+      ).toBe(123);
+    } finally {
+      await server.close({ reason: "test done" });
+    }
+  });
+
+  it("returns usage when available", async () => {
+    agentCommand.mockResolvedValueOnce({
+      payloads: [{ text: "ok" }],
+      meta: {
+        agentMeta: {
+          usage: { input: 3, output: 5, cacheRead: 1, cacheWrite: 1 },
+        },
+      },
+    } as never);
+
+    const port = await getFreePort();
+    const server = await startServer(port);
+    try {
+      const res = await postResponses(port, {
+        stream: false,
+        model: "clawdbot",
+        input: "hi",
+      });
+      expect(res.status).toBe(200);
+      const json = (await res.json()) as Record<string, unknown>;
+      expect(json.usage).toEqual({ input_tokens: 3, output_tokens: 5, total_tokens: 10 });
+    } finally {
+      await server.close({ reason: "test done" });
+    }
+  });
+
  it("returns a non-streaming response with correct shape", async () => {
    agentCommand.mockResolvedValueOnce({
      payloads: [{ text: "hello" }],
@@ -436,6 +612,7 @@ describe("OpenResponses HTTP API (e2e)", () => {
      const eventTypes = events.map((e) => e.event).filter(Boolean);
      expect(eventTypes).toContain("response.created");
      expect(eventTypes).toContain("response.output_item.added");
+      expect(eventTypes).toContain("response.in_progress");
      expect(eventTypes).toContain("response.content_part.added");
      expect(eventTypes).toContain("response.output_text.delta");
      expect(eventTypes).toContain("response.output_text.done");
--- a/src/gateway/openresponses-http.ts
+++ b/src/gateway/openresponses-http.ts
--- a/src/gateway/server-http.ts
+++ b/src/gateway/server-http.ts
@@ -194,6 +194,7 @@ export function createGatewayHttpServer(opts: {
  controlUiBasePath: string;
  openAiChatCompletionsEnabled: boolean;
  openResponsesEnabled: boolean;
+  openResponsesConfig?: import("../config/types.gateway.js").GatewayHttpResponsesConfig;
  handleHooksRequest: HooksRequestHandler;
  handlePluginRequest?: HooksRequestHandler;
  resolvedAuth: import("./auth.js").ResolvedGatewayAuth;
@@ -205,6 +206,7 @@ export function createGatewayHttpServer(opts: {
    controlUiBasePath,
    openAiChatCompletionsEnabled,
    openResponsesEnabled,
+    openResponsesConfig,
    handleHooksRequest,
    handlePluginRequest,
    resolvedAuth,
@@ -226,7 +228,13 @@ export function createGatewayHttpServer(opts: {
      if (await handleSlackHttpRequest(req, res)) return;
      if (handlePluginRequest && (await handlePluginRequest(req, res))) return;
      if (openResponsesEnabled) {
-        if (await handleOpenResponsesHttpRequest(req, res, { auth: resolvedAuth })) return;
+        if (
+          await handleOpenResponsesHttpRequest(req, res, {
+            auth: resolvedAuth,
+            config: openResponsesConfig,
+          })
+        )
+          return;
      }
      if (openAiChatCompletionsEnabled) {
        if (await handleOpenAiHttpRequest(req, res, { auth: resolvedAuth })) return;
--- a/src/gateway/server-runtime-config.ts
+++ b/src/gateway/server-runtime-config.ts
@@ -18,6 +18,7 @@ export type GatewayRuntimeConfig = {
  controlUiEnabled: boolean;
  openAiChatCompletionsEnabled: boolean;
  openResponsesEnabled: boolean;
+  openResponsesConfig?: import("../config/types.gateway.js").GatewayHttpResponsesConfig;
  controlUiBasePath: string;
  resolvedAuth: ResolvedGatewayAuth;
  authMode: ResolvedGatewayAuth["mode"];
@@ -47,8 +48,8 @@ export async function resolveGatewayRuntimeConfig(params: {
    params.openAiChatCompletionsEnabled ??
    params.cfg.gateway?.http?.endpoints?.chatCompletions?.enabled ??
    false;
-  const openResponsesEnabled =
-    params.openResponsesEnabled ?? params.cfg.gateway?.http?.endpoints?.responses?.enabled ?? false;
+  const openResponsesConfig = params.cfg.gateway?.http?.endpoints?.responses;
+  const openResponsesEnabled = params.openResponsesEnabled ?? openResponsesConfig?.enabled ?? false;
  const controlUiBasePath = normalizeControlUiBasePath(params.cfg.gateway?.controlUi?.basePath);
  const authBase = params.cfg.gateway?.auth ?? {};
  const authOverrides = params.auth ?? {};
@@ -93,6 +94,9 @@ export async function resolveGatewayRuntimeConfig(params: {
    controlUiEnabled,
    openAiChatCompletionsEnabled,
    openResponsesEnabled,
+    openResponsesConfig: openResponsesConfig
+      ? { ...openResponsesConfig, enabled: openResponsesEnabled }
+      : undefined,
    controlUiBasePath,
    resolvedAuth,
    authMode,
--- a/src/gateway/server-runtime-state.ts
+++ b/src/gateway/server-runtime-state.ts
@@ -28,6 +28,7 @@ export async function createGatewayRuntimeState(params: {
  controlUiBasePath: string;
  openAiChatCompletionsEnabled: boolean;
  openResponsesEnabled: boolean;
+  openResponsesConfig?: import("../config/types.gateway.js").GatewayHttpResponsesConfig;
  resolvedAuth: ResolvedGatewayAuth;
  gatewayTls?: GatewayTlsRuntime;
  hooksConfig: () => HooksConfigResolved | null;
@@ -105,6 +106,7 @@ export async function createGatewayRuntimeState(params: {
    controlUiBasePath: params.controlUiBasePath,
    openAiChatCompletionsEnabled: params.openAiChatCompletionsEnabled,
    openResponsesEnabled: params.openResponsesEnabled,
+    openResponsesConfig: params.openResponsesConfig,
    handleHooksRequest,
    handlePluginRequest,
    resolvedAuth: params.resolvedAuth,
--- a/src/gateway/server.impl.ts
+++ b/src/gateway/server.impl.ts
@@ -219,6 +219,7 @@ export async function startGatewayServer(
    controlUiEnabled,
    openAiChatCompletionsEnabled,
    openResponsesEnabled,
+    openResponsesConfig,
    controlUiBasePath,
    resolvedAuth,
    tailscaleConfig,
@@ -258,6 +259,7 @@ export async function startGatewayServer(
    controlUiBasePath,
    openAiChatCompletionsEnabled,
    openResponsesEnabled,
+    openResponsesConfig,
    resolvedAuth,
    gatewayTls,
    hooksConfig: () => hooksConfig,