diff --git a/docs/experiments/plans/openresponses-gateway.md b/docs/experiments/plans/openresponses-gateway.md index 703b2aa19..96b9be671 100644 --- a/docs/experiments/plans/openresponses-gateway.md +++ b/docs/experiments/plans/openresponses-gateway.md @@ -8,6 +8,7 @@ last_updated: "2026-01-19" # OpenResponses Gateway Integration Plan ## Context + Clawdbot Gateway currently exposes a minimal OpenAI-compatible Chat Completions endpoint at `/v1/chat/completions` (see [OpenAI Chat Completions](/gateway/openai-http-api)). @@ -16,19 +17,23 @@ for agentic workflows and uses item-based inputs plus semantic streaming events. spec defines `/v1/responses`, not `/v1/chat/completions`. ## Goals + - Add a `/v1/responses` endpoint that adheres to OpenResponses semantics. - Keep Chat Completions as a compatibility layer that is easy to disable and eventually remove. - Standardize validation and parsing with isolated, reusable schemas. ## Non-goals + - Full OpenResponses feature parity in the first pass (images, files, hosted tools). - Replacing internal agent execution logic or tool orchestration. - Changing the existing `/v1/chat/completions` behavior during the first phase. ## Research Summary + Sources: OpenResponses OpenAPI, OpenResponses specification site, and the Hugging Face blog post. Key points extracted: + - `POST /v1/responses` accepts `CreateResponseBody` fields like `model`, `input` (string or `ItemParam[]`), `instructions`, `tools`, `tool_choice`, `stream`, `max_output_tokens`, and `max_tool_calls`. @@ -52,6 +57,7 @@ Key points extracted: - HF examples include `OpenResponses-Version: latest` in requests (optional header). ## Proposed Architecture + - Add `src/gateway/open-responses.schema.ts` containing Zod schemas only (no gateway imports). - Add `src/gateway/openresponses-http.ts` (or `open-responses-http.ts`) for `/v1/responses`. - Keep `src/gateway/openai-http.ts` intact as a legacy compatibility adapter. @@ -61,6 +67,7 @@ Key points extracted: - Emit a startup warning when Chat Completions is enabled to signal legacy status. ## Deprecation Path for Chat Completions + - Maintain strict module boundaries: no shared schema types between responses and chat completions. - Make Chat Completions opt-in by config so it can be disabled without code changes. - Update docs to label Chat Completions as legacy once `/v1/responses` is stable. @@ -68,6 +75,7 @@ Key points extracted: removal path. ## Phase 1 Support Subset + - Accept `input` as string or `ItemParam[]` with message roles and `function_call_output`. - Extract system and developer messages into `extraSystemPrompt`. - Use the most recent `user` or `function_call_output` as the current message for agent runs. @@ -76,6 +84,7 @@ Key points extracted: - Return `usage` with zeroed values until token accounting is wired. ## Validation Strategy (No SDK) + - Implement Zod schemas for the supported subset of: - `CreateResponseBody` - `ItemParam` + message content part unions @@ -84,6 +93,7 @@ Key points extracted: - Keep schemas in a single, isolated module to avoid drift and allow future codegen. ## Streaming Implementation (Phase 1) + - SSE lines with both `event:` and `data:`. - Required sequence (minimum viable): - `response.created` @@ -96,6 +106,7 @@ Key points extracted: - `[DONE]` ## Tests and Verification Plan + - Add e2e coverage for `/v1/responses`: - Auth required - Non-stream response shape @@ -106,5 +117,6 @@ Key points extracted: `[DONE]`. ## Doc Updates (Follow-up) + - Add a new docs page for `/v1/responses` usage and examples. - Update `/gateway/openai-http-api` with a legacy note and pointer to `/v1/responses`. diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index c0e17b103..67f769665 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -482,6 +482,17 @@ export async function runEmbeddedPiAgent( agentMeta, aborted, systemPromptReport: attempt.systemPromptReport, + // Handle client tool calls (OpenResponses hosted tools) + stopReason: attempt.clientToolCall ? "tool_calls" : undefined, + pendingToolCalls: attempt.clientToolCall + ? [ + { + id: `call_${Date.now()}`, + name: attempt.clientToolCall.name, + arguments: JSON.stringify(attempt.clientToolCall.params), + }, + ] + : undefined, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, messagingToolSentTexts: attempt.messagingToolSentTexts, diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 06272d7ce..eff5afa22 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -64,6 +64,7 @@ import { prewarmSessionFile, trackSessionManagerAccess } from "../session-manage import { prepareSessionManagerForRun } from "../session-manager-init.js"; import { buildEmbeddedSystemPrompt, createSystemPromptOverride } from "../system-prompt.js"; import { splitSdkTools } from "../tool-split.js"; +import { toClientToolDefinitions } from "../../pi-tool-definition-adapter.js"; import { buildSystemPromptParams } from "../../system-prompt-params.js"; import { describeUnknownError, mapThinkingLevel } from "../utils.js"; import { resolveSandboxRuntimeStatus } from "../../sandbox/runtime-status.js"; @@ -314,6 +315,16 @@ export async function runEmbeddedAttempt( sandboxEnabled: !!sandbox?.enabled, }); + // Add client tools (OpenResponses hosted tools) to customTools + let clientToolCallDetected: { name: string; params: Record } | null = null; + const clientToolDefs = params.clientTools + ? toClientToolDefinitions(params.clientTools, (toolName, toolParams) => { + clientToolCallDetected = { name: toolName, params: toolParams }; + }) + : []; + + const allCustomTools = [...customTools, ...clientToolDefs]; + ({ session } = await createAgentSession({ cwd: resolvedWorkspace, agentDir, @@ -323,7 +334,7 @@ export async function runEmbeddedAttempt( thinkingLevel: mapThinkingLevel(params.thinkLevel), systemPrompt, tools: builtInTools, - customTools, + customTools: allCustomTools, sessionManager, settingsManager, skills: [], @@ -681,6 +692,8 @@ export async function runEmbeddedAttempt( cloudCodeAssistFormatError: Boolean( lastAssistant?.errorMessage && isCloudCodeAssistFormatError(lastAssistant.errorMessage), ), + // Client tool call detected (OpenResponses hosted tools) + clientToolCall: clientToolCallDetected ?? undefined, }; } finally { // Always tear down the session (and release the lock) before we leave this attempt. diff --git a/src/agents/pi-embedded-runner/run/params.ts b/src/agents/pi-embedded-runner/run/params.ts index ea8e0f5d5..ada8656d6 100644 --- a/src/agents/pi-embedded-runner/run/params.ts +++ b/src/agents/pi-embedded-runner/run/params.ts @@ -6,6 +6,16 @@ import type { ExecElevatedDefaults, ExecToolDefaults } from "../../bash-tools.js import type { BlockReplyChunking, ToolResultFormat } from "../../pi-embedded-subscribe.js"; import type { SkillSnapshot } from "../../skills.js"; +// Simplified tool definition for client-provided tools (OpenResponses hosted tools) +export type ClientToolDefinition = { + type: "function"; + function: { + name: string; + description?: string; + parameters?: Record; + }; +}; + export type RunEmbeddedPiAgentParams = { sessionId: string; sessionKey?: string; @@ -27,6 +37,8 @@ export type RunEmbeddedPiAgentParams = { skillsSnapshot?: SkillSnapshot; prompt: string; images?: ImageContent[]; + /** Optional client-provided tools (OpenResponses hosted tools). */ + clientTools?: ClientToolDefinition[]; provider?: string; model?: string; authProfileId?: string; diff --git a/src/agents/pi-embedded-runner/run/types.ts b/src/agents/pi-embedded-runner/run/types.ts index b1ae7670f..005f118e0 100644 --- a/src/agents/pi-embedded-runner/run/types.ts +++ b/src/agents/pi-embedded-runner/run/types.ts @@ -9,6 +9,7 @@ import type { MessagingToolSend } from "../../pi-embedded-messaging.js"; import type { BlockReplyChunking, ToolResultFormat } from "../../pi-embedded-subscribe.js"; import type { SkillSnapshot } from "../../skills.js"; import type { SessionSystemPromptReport } from "../../../config/sessions/types.js"; +import type { ClientToolDefinition } from "./params.js"; type AuthStorage = ReturnType; type ModelRegistry = ReturnType; @@ -30,6 +31,8 @@ export type EmbeddedRunAttemptParams = { skillsSnapshot?: SkillSnapshot; prompt: string; images?: ImageContent[]; + /** Optional client-provided tools (OpenResponses hosted tools). */ + clientTools?: ClientToolDefinition[]; provider: string; modelId: string; model: Model; @@ -79,4 +82,6 @@ export type EmbeddedRunAttemptResult = { messagingToolSentTexts: string[]; messagingToolSentTargets: MessagingToolSend[]; cloudCodeAssistFormatError: boolean; + /** Client tool call detected (OpenResponses hosted tools). */ + clientToolCall?: { name: string; params: Record }; }; diff --git a/src/agents/pi-embedded-runner/types.ts b/src/agents/pi-embedded-runner/types.ts index e2d33047a..a8aa3c48c 100644 --- a/src/agents/pi-embedded-runner/types.ts +++ b/src/agents/pi-embedded-runner/types.ts @@ -23,6 +23,14 @@ export type EmbeddedPiRunMeta = { kind: "context_overflow" | "compaction_failure" | "role_ordering"; message: string; }; + /** Stop reason for the agent run (e.g., "completed", "tool_calls"). */ + stopReason?: string; + /** Pending tool calls when stopReason is "tool_calls". */ + pendingToolCalls?: Array<{ + id: string; + name: string; + arguments: string; + }>; }; export type EmbeddedPiRunResult = { diff --git a/src/agents/pi-tool-definition-adapter.ts b/src/agents/pi-tool-definition-adapter.ts index 0b9b5bbe1..963292822 100644 --- a/src/agents/pi-tool-definition-adapter.ts +++ b/src/agents/pi-tool-definition-adapter.ts @@ -4,6 +4,7 @@ import type { AgentToolUpdateCallback, } from "@mariozechner/pi-agent-core"; import type { ToolDefinition } from "@mariozechner/pi-coding-agent"; +import type { ClientToolDefinition } from "./pi-embedded-runner/run/params.js"; import { logDebug, logError } from "../logger.js"; import { normalizeToolName } from "./tool-policy.js"; import { jsonResult } from "./tools/common.js"; @@ -65,3 +66,38 @@ export function toToolDefinitions(tools: AnyAgentTool[]): ToolDefinition[] { } satisfies ToolDefinition; }); } + +// Convert client tools (OpenResponses hosted tools) to ToolDefinition format +// These tools are intercepted to return a "pending" result instead of executing +export function toClientToolDefinitions( + tools: ClientToolDefinition[], + onClientToolCall?: (toolName: string, params: Record) => void, +): ToolDefinition[] { + return tools.map((tool) => { + const func = tool.function; + return { + name: func.name, + label: func.name, + description: func.description ?? "", + parameters: func.parameters as any, + execute: async ( + toolCallId, + params, + _onUpdate: AgentToolUpdateCallback | undefined, + _ctx, + _signal, + ): Promise> => { + // Notify handler that a client tool was called + if (onClientToolCall) { + onClientToolCall(func.name, params as Record); + } + // Return a pending result - the client will execute this tool + return jsonResult({ + status: "pending", + tool: func.name, + message: "Tool execution delegated to client", + }); + }, + } satisfies ToolDefinition; + }); +} diff --git a/src/commands/agent.ts b/src/commands/agent.ts index 4cef10216..f0ef72638 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -415,6 +415,7 @@ export async function agentCommand( skillsSnapshot, prompt: body, images: opts.images, + clientTools: opts.clientTools, provider: providerOverride, model: modelOverride, authProfileId, diff --git a/src/commands/agent/types.ts b/src/commands/agent/types.ts index f02282a4d..900534911 100644 --- a/src/commands/agent/types.ts +++ b/src/commands/agent/types.ts @@ -1,4 +1,5 @@ import type { ChannelOutboundTargetMode } from "../../channels/plugins/types.js"; +import type { ClientToolDefinition } from "../../agents/pi-embedded-runner/run/params.js"; /** Image content block for Claude API multimodal messages. */ export type ImageContent = { @@ -20,6 +21,8 @@ export type AgentCommandOpts = { message: string; /** Optional image attachments for multimodal messages. */ images?: ImageContent[]; + /** Optional client-provided tools (OpenResponses hosted tools). */ + clientTools?: ClientToolDefinition[]; /** Agent id override (must exist in config). */ agentId?: string; to?: string; diff --git a/src/gateway/client.ts b/src/gateway/client.ts index 8a9922c4e..2c52526b2 100644 --- a/src/gateway/client.ts +++ b/src/gateway/client.ts @@ -90,7 +90,7 @@ export class GatewayClient { }; if (url.startsWith("wss://") && this.opts.tlsFingerprint) { wsOptions.rejectUnauthorized = false; - wsOptions.checkServerIdentity = (_host: string, cert: CertMeta) => { + wsOptions.checkServerIdentity = ((_host: string, cert: CertMeta) => { const fingerprintValue = typeof cert === "object" && cert && "fingerprint256" in cert ? ((cert as { fingerprint256?: string }).fingerprint256 ?? "") @@ -99,9 +99,17 @@ export class GatewayClient { typeof fingerprintValue === "string" ? fingerprintValue : "", ); const expected = normalizeFingerprint(this.opts.tlsFingerprint ?? ""); - if (!expected || !fingerprint) return false; - return fingerprint === expected; - }; + if (!expected) { + return new Error("gateway tls fingerprint missing"); + } + if (!fingerprint) { + return new Error("gateway tls fingerprint unavailable"); + } + if (fingerprint !== expected) { + return new Error("gateway tls fingerprint mismatch"); + } + return undefined; + }) as any; } this.ws = new WebSocket(url, wsOptions); diff --git a/src/gateway/open-responses.schema.ts b/src/gateway/open-responses.schema.ts index 9a486354e..e07288610 100644 --- a/src/gateway/open-responses.schema.ts +++ b/src/gateway/open-responses.schema.ts @@ -27,18 +27,46 @@ export const OutputTextContentPartSchema = z }) .strict(); -// For Phase 1, we reject image/file content with helpful errors +// OpenResponses Image Content: Supports URL or base64 sources +export const InputImageSourceSchema = z.discriminatedUnion("type", [ + z.object({ + type: z.literal("url"), + url: z.string().url(), + }), + z.object({ + type: z.literal("base64"), + media_type: z.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]), + data: z.string().min(1), // base64-encoded + }), +]); + export const InputImageContentPartSchema = z .object({ type: z.literal("input_image"), + source: InputImageSourceSchema, }) - .passthrough(); + .strict(); + +// OpenResponses File Content: Supports URL or base64 sources +export const InputFileSourceSchema = z.discriminatedUnion("type", [ + z.object({ + type: z.literal("url"), + url: z.string().url(), + }), + z.object({ + type: z.literal("base64"), + media_type: z.string().min(1), // MIME type + data: z.string().min(1), // base64-encoded + filename: z.string().optional(), + }), +]); export const InputFileContentPartSchema = z .object({ type: z.literal("input_file"), + source: InputFileSourceSchema, }) - .passthrough(); + .strict(); export const ContentPartSchema = z.discriminatedUnion("type", [ InputTextContentPartSchema, @@ -117,13 +145,14 @@ export const FunctionToolDefinitionSchema = z .object({ type: z.literal("function"), function: z.object({ - name: z.string(), + name: z.string().min(1, "Tool name cannot be empty"), description: z.string().optional(), parameters: z.record(z.string(), z.unknown()).optional(), }), }) .strict(); +// OpenResponses tool definitions match internal ToolDefinition structure export const ToolDefinitionSchema = FunctionToolDefinitionSchema; export type ToolDefinition = z.infer; diff --git a/src/gateway/openresponses-http.ts b/src/gateway/openresponses-http.ts index 0ddbeba66..9274eb47f 100644 --- a/src/gateway/openresponses-http.ts +++ b/src/gateway/openresponses-http.ts @@ -27,6 +27,8 @@ import { type StreamingEvent, type Usage, } from "./open-responses.schema.js"; +import type { ClientToolDefinition } from "../agents/pi-embedded-runner/run/params.js"; +import type { ImageContent } from "../commands/agent/types.js"; type OpenResponsesHttpOptions = { auth: ResolvedGatewayAuth; @@ -74,16 +76,157 @@ function extractTextContent(content: string | ContentPart[]): string { .join("\n"); } -function hasUnsupportedContent(content: string | ContentPart[]): string | null { - if (typeof content === "string") return null; - for (const part of content) { - if (part.type === "input_image") return "input_image content is not supported in Phase 1"; - if (part.type === "input_file") return "input_file content is not supported in Phase 1"; - } - return null; +const PRIVATE_IP_PATTERNS = [ + /^127\./, // Loopback + /^192\.168\./, // Private network + /^10\./, // Private network + /^172\.(1[6-9]|2[0-9]|3[0-1])\./, // Private network + /^::1$/, // IPv6 loopback + /^fe80:/, // IPv6 link-local + /^fec0:/, // IPv6 site-local +]; + +function isPrivateIp(hostname: string): boolean { + return PRIVATE_IP_PATTERNS.some((pattern) => pattern.test(hostname)); } -function buildAgentPrompt(input: string | ItemParam[]): { +// Fetch with SSRF protection, timeout, and size limits +async function fetchWithGuard( + url: string, + maxBytes: number, + timeoutMs: number = 10000, +): Promise<{ data: string; mimeType: string }> { + const parsedUrl = new URL(url); + + // Only allow HTTP/HTTPS + if (!["http:", "https:"].includes(parsedUrl.protocol)) { + throw new Error(`Invalid URL protocol: ${parsedUrl.protocol}. Only HTTP/HTTPS allowed.`); + } + + // Block private IPs (SSRF protection) + if (isPrivateIp(parsedUrl.hostname)) { + throw new Error(`Private IP addresses are not allowed: ${parsedUrl.hostname}`); + } + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), timeoutMs); + + try { + const response = await fetch(url, { + signal: controller.signal, + headers: { "User-Agent": "Clawdbot-Gateway/1.0" }, + }); + + if (!response.ok) { + throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`); + } + + const contentLength = response.headers.get("content-length"); + if (contentLength) { + const size = parseInt(contentLength, 10); + if (size > maxBytes) { + throw new Error(`Content too large: ${size} bytes (limit: ${maxBytes} bytes)`); + } + } + + const buffer = await response.arrayBuffer(); + if (buffer.byteLength > maxBytes) { + throw new Error(`Content too large: ${buffer.byteLength} bytes (limit: ${maxBytes} bytes)`); + } + + const mimeType = response.headers.get("content-type") || "application/octet-stream"; + + return { + data: Buffer.from(buffer).toString("base64"), + mimeType, + }; + } finally { + clearTimeout(timeoutId); + } +} + +const ALLOWED_IMAGE_MIMES = new Set(["image/jpeg", "image/png", "image/gif", "image/webp"]); +const MAX_IMAGE_BYTES = 10 * 1024 * 1024; // 10MB +const MAX_FILE_BYTES = 5 * 1024 * 1024; // 5MB +const ALLOWED_FILE_MIMES = new Set([ + "text/plain", + "text/markdown", + "text/html", + "text/csv", + "application/pdf", + "application/json", +]); + +async function extractImageContent(part: ContentPart): Promise { + if (part.type !== "input_image") return null; + + const source = part.source as { type: string; url?: string; data?: string; media_type?: string }; + + if (source.type === "base64") { + if (!source.data) { + throw new Error("input_image base64 source missing 'data' field"); + } + const mimeType = source.media_type || "image/png"; + if (!ALLOWED_IMAGE_MIMES.has(mimeType)) { + throw new Error(`Unsupported image MIME type: ${mimeType}`); + } + return { type: "image", data: source.data, mimeType }; + } + + if (source.type === "url" && source.url) { + const result = await fetchWithGuard(source.url, MAX_IMAGE_BYTES); + if (!ALLOWED_IMAGE_MIMES.has(result.mimeType)) { + throw new Error(`Unsupported image MIME type from URL: ${result.mimeType}`); + } + return { type: "image", data: result.data, mimeType: result.mimeType }; + } + + throw new Error("input_image must have 'source.url' or 'source.data'"); +} + +async function extractFileContent(part: ContentPart): Promise { + if (part.type !== "input_file") return null; + + const source = part.source as { + type: string; + url?: string; + data?: string; + media_type?: string; + filename?: string; + }; + const filename = source.filename || "file"; + + let content: string; + + if (source.type === "base64") { + if (!source.data) { + throw new Error("input_file base64 source missing 'data' field"); + } + const buffer = Buffer.from(source.data, "base64"); + if (buffer.byteLength > MAX_FILE_BYTES) { + throw new Error( + `File too large: ${buffer.byteLength} bytes (limit: ${MAX_FILE_BYTES} bytes)`, + ); + } + content = buffer.toString("utf-8"); + } else if (source.type === "url" && source.url) { + const result = await fetchWithGuard(source.url, MAX_FILE_BYTES); + if (!ALLOWED_FILE_MIMES.has(result.mimeType)) { + throw new Error(`Unsupported file MIME type: ${result.mimeType}`); + } + content = Buffer.from(result.data, "base64").toString("utf-8"); + } else { + throw new Error("input_file must have 'source.url' or 'source.data'"); + } + + return `\n${content}\n`; +} + +function extractClientTools(body: CreateResponseBody): ClientToolDefinition[] { + return (body.tools ?? []) as ClientToolDefinition[]; +} + +export function buildAgentPrompt(input: string | ItemParam[]): { message: string; extraSystemPrompt?: string; } { @@ -293,33 +436,44 @@ export async function handleOpenResponsesHttpRequest( const model = payload.model; const user = payload.user; - // Check for unsupported content types (Phase 1) + // Extract images, files, and tools from input (Phase 2) + let images: ImageContent[] = []; + let fileContents: string[] = []; if (Array.isArray(payload.input)) { for (const item of payload.input) { if (item.type === "message" && typeof item.content !== "string") { - const unsupported = hasUnsupportedContent(item.content); - if (unsupported) { - sendJson(res, 400, { - error: { message: unsupported, type: "invalid_request_error" }, - }); - return true; + for (const part of item.content) { + const image = await extractImageContent(part); + if (image) { + images.push(image); + continue; + } + const file = await extractFileContent(part); + if (file) { + fileContents.push(file); + } } } } } + const clientTools = extractClientTools(payload); const agentId = resolveAgentIdForRequest({ req, model }); const sessionKey = resolveSessionKey({ req, agentId, user }); // Build prompt from input const prompt = buildAgentPrompt(payload.input); + // Append file contents to the message + const fullMessage = + fileContents.length > 0 ? `${prompt.message}\n\n${fileContents.join("\n\n")}` : prompt.message; + // Handle instructions as extra system prompt const extraSystemPrompt = [payload.instructions, prompt.extraSystemPrompt] .filter(Boolean) .join("\n\n"); - if (!prompt.message) { + if (!fullMessage) { sendJson(res, 400, { error: { message: "Missing user message in `input`.", @@ -337,7 +491,9 @@ export async function handleOpenResponsesHttpRequest( try { const result = await agentCommand( { - message: prompt.message, + message: fullMessage, + images: images.length > 0 ? images : undefined, + clientTools: clientTools.length > 0 ? clientTools : undefined, extraSystemPrompt: extraSystemPrompt || undefined, sessionKey, runId: responseId, @@ -350,6 +506,36 @@ export async function handleOpenResponsesHttpRequest( ); const payloads = (result as { payloads?: Array<{ text?: string }> } | null)?.payloads; + const meta = (result as { meta?: unknown } | null)?.meta; + const stopReason = + meta && typeof meta === "object" ? (meta as { stopReason?: string }).stopReason : undefined; + const pendingToolCalls = + meta && typeof meta === "object" + ? (meta as { pendingToolCalls?: Array<{ id: string; name: string; arguments: string }> }) + .pendingToolCalls + : undefined; + + // If agent called a client tool, return function_call instead of text + if (stopReason === "tool_calls" && pendingToolCalls && pendingToolCalls.length > 0) { + const functionCall = pendingToolCalls[0]; + const response = createResponseResource({ + id: responseId, + model, + status: "incomplete", + output: [ + { + type: "function_call", + id: functionCall.id, + call_id: functionCall.id, + name: functionCall.name, + arguments: functionCall.arguments, + }, + ], + }); + sendJson(res, 200, response); + return true; + } + const content = Array.isArray(payloads) && payloads.length > 0 ? payloads @@ -511,7 +697,9 @@ export async function handleOpenResponsesHttpRequest( try { const result = await agentCommand( { - message: prompt.message, + message: fullMessage, + images: images.length > 0 ? images : undefined, + clientTools: clientTools.length > 0 ? clientTools : undefined, extraSystemPrompt: extraSystemPrompt || undefined, sessionKey, runId: responseId, @@ -527,7 +715,90 @@ export async function handleOpenResponsesHttpRequest( // Fallback: if no streaming deltas were received, send the full response if (!sawAssistantDelta) { - const payloads = (result as { payloads?: Array<{ text?: string }> } | null)?.payloads; + const resultAny = result as { payloads?: Array<{ text?: string }>; meta?: unknown }; + const payloads = resultAny.payloads; + const meta = resultAny.meta; + const stopReason = + meta && typeof meta === "object" + ? (meta as { stopReason?: string }).stopReason + : undefined; + const pendingToolCalls = + meta && typeof meta === "object" + ? ( + meta as { + pendingToolCalls?: Array<{ id: string; name: string; arguments: string }>; + } + ).pendingToolCalls + : undefined; + + // If agent called a client tool, emit function_call instead of text + if (stopReason === "tool_calls" && pendingToolCalls && pendingToolCalls.length > 0) { + const functionCall = pendingToolCalls[0]; + // Complete the text content part + writeSseEvent(res, { + type: "response.output_text.done", + item_id: outputItemId, + output_index: 0, + content_index: 0, + text: "", + }); + writeSseEvent(res, { + type: "response.content_part.done", + item_id: outputItemId, + output_index: 0, + content_index: 0, + part: { type: "output_text", text: "" }, + }); + + // Complete the message item + const completedItem = createAssistantOutputItem({ + id: outputItemId, + text: "", + status: "completed", + }); + writeSseEvent(res, { + type: "response.output_item.done", + output_index: 0, + item: completedItem, + }); + + // Send function_call item + const functionCallItemId = `call_${randomUUID()}`; + const functionCallItem = { + type: "function_call" as const, + id: functionCallItemId, + call_id: functionCall.id, + name: functionCall.name, + arguments: functionCall.arguments, + }; + writeSseEvent(res, { + type: "response.output_item.added", + output_index: 1, + item: functionCallItem, + }); + writeSseEvent(res, { + type: "response.output_item.done", + output_index: 1, + item: { ...functionCallItem, status: "completed" as const }, + }); + writeSseEvent(res, { + type: "response.output_item.done", + output_index: 1, + item: { ...functionCallItem, status: "completed" as const }, + }); + + const incompleteResponse = createResponseResource({ + id: responseId, + model, + status: "incomplete", + output: [completedItem, functionCallItem], + }); + writeSseEvent(res, { type: "response.completed", response: incompleteResponse }); + writeDone(res); + res.end(); + return; + } + const content = Array.isArray(payloads) && payloads.length > 0 ? payloads