fix: expand /v1/responses inputs (#1229) (thanks @RyanLisse)

This commit is contained in:
Peter Steinberger
2026-01-20 07:35:29 +00:00
parent 4f02c74dca
commit bbc67f3754
24 changed files with 1350 additions and 275 deletions

View File

@@ -45,6 +45,7 @@ export async function runCliAgent(params: {
timeoutMs: number;
runId: string;
extraSystemPrompt?: string;
streamParams?: import("../commands/agent/types.js").AgentStreamParams;
ownerNumbers?: string[];
cliSessionId?: string;
images?: ImageContent[];

View File

@@ -63,13 +63,21 @@ export function applyExtraParamsToAgent(
cfg: ClawdbotConfig | undefined,
provider: string,
modelId: string,
extraParamsOverride?: Record<string, unknown>,
): void {
const extraParams = resolveExtraParams({
cfg,
provider,
modelId,
});
const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, extraParams);
const override =
extraParamsOverride && Object.keys(extraParamsOverride).length > 0
? Object.fromEntries(
Object.entries(extraParamsOverride).filter(([, value]) => value !== undefined),
)
: undefined;
const merged = Object.assign({}, extraParams, override);
const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, merged);
if (wrappedStreamFn) {
log.debug(`applying extraParams to agent streamFn for ${provider}/${modelId}`);

View File

@@ -239,6 +239,7 @@ export async function runEmbeddedPiAgent(
onToolResult: params.onToolResult,
onAgentEvent: params.onAgentEvent,
extraSystemPrompt: params.extraSystemPrompt,
streamParams: params.streamParams,
ownerNumbers: params.ownerNumbers,
enforceFinalTag: params.enforceFinalTag,
});

View File

@@ -349,7 +349,13 @@ export async function runEmbeddedAttempt(
// Force a stable streamFn reference so vitest can reliably mock @mariozechner/pi-ai.
activeSession.agent.streamFn = streamSimple;
applyExtraParamsToAgent(activeSession.agent, params.config, params.provider, params.modelId);
applyExtraParamsToAgent(
activeSession.agent,
params.config,
params.provider,
params.modelId,
params.streamParams,
);
try {
const prior = await sanitizeSessionHistory({

View File

@@ -1,6 +1,7 @@
import type { ImageContent } from "@mariozechner/pi-ai";
import type { ReasoningLevel, ThinkLevel, VerboseLevel } from "../../../auto-reply/thinking.js";
import type { ClawdbotConfig } from "../../../config/config.js";
import type { AgentStreamParams } from "../../../commands/agent/types.js";
import type { enqueueCommand } from "../../../process/command-queue.js";
import type { ExecElevatedDefaults, ExecToolDefaults } from "../../bash-tools.js";
import type { BlockReplyChunking, ToolResultFormat } from "../../pi-embedded-subscribe.js";
@@ -70,6 +71,7 @@ export type RunEmbeddedPiAgentParams = {
lane?: string;
enqueue?: typeof enqueueCommand;
extraSystemPrompt?: string;
streamParams?: AgentStreamParams;
ownerNumbers?: string[];
enforceFinalTag?: boolean;
};

View File

@@ -4,6 +4,7 @@ import type { discoverAuthStorage, discoverModels } from "@mariozechner/pi-codin
import type { ReasoningLevel, ThinkLevel, VerboseLevel } from "../../../auto-reply/thinking.js";
import type { ClawdbotConfig } from "../../../config/config.js";
import type { AgentStreamParams } from "../../../commands/agent/types.js";
import type { ExecElevatedDefaults, ExecToolDefaults } from "../../bash-tools.js";
import type { MessagingToolSend } from "../../pi-embedded-messaging.js";
import type { BlockReplyChunking, ToolResultFormat } from "../../pi-embedded-subscribe.js";
@@ -63,6 +64,7 @@ export type EmbeddedRunAttemptParams = {
onToolResult?: (payload: { text?: string; mediaUrls?: string[] }) => void | Promise<void>;
onAgentEvent?: (evt: { stream: string; data: Record<string, unknown> }) => void;
extraSystemPrompt?: string;
streamParams?: AgentStreamParams;
ownerNumbers?: string[];
enforceFinalTag?: boolean;
};

View File

@@ -396,6 +396,7 @@ export async function agentCommand(
extraSystemPrompt: opts.extraSystemPrompt,
cliSessionId,
images: opts.images,
streamParams: opts.streamParams,
});
}
const authProfileId =
@@ -429,6 +430,7 @@ export async function agentCommand(
lane: opts.lane,
abortSignal: opts.abortSignal,
extraSystemPrompt: opts.extraSystemPrompt,
streamParams: opts.streamParams,
agentDir,
onAgentEvent: (evt) => {
if (

View File

@@ -8,6 +8,12 @@ export type ImageContent = {
mimeType: string;
};
export type AgentStreamParams = {
/** Provider stream params override (best-effort). */
temperature?: number;
maxTokens?: number;
};
export type AgentRunContext = {
messageChannel?: string;
accountId?: string;
@@ -53,4 +59,6 @@ export type AgentCommandOpts = {
lane?: string;
runId?: string;
extraSystemPrompt?: string;
/** Per-call stream param overrides (best-effort). */
streamParams?: AgentStreamParams;
};

View File

@@ -111,6 +111,54 @@ export type GatewayHttpResponsesConfig = {
* Default: false when absent.
*/
enabled?: boolean;
/**
* Max request body size in bytes for `/v1/responses`.
* Default: 20MB.
*/
maxBodyBytes?: number;
/** File inputs (input_file). */
files?: GatewayHttpResponsesFilesConfig;
/** Image inputs (input_image). */
images?: GatewayHttpResponsesImagesConfig;
};
export type GatewayHttpResponsesFilesConfig = {
/** Allow URL fetches for input_file. Default: true. */
allowUrl?: boolean;
/** Allowed MIME types (case-insensitive). */
allowedMimes?: string[];
/** Max bytes per file. Default: 5MB. */
maxBytes?: number;
/** Max decoded characters per file. Default: 200k. */
maxChars?: number;
/** Max redirects when fetching a URL. Default: 3. */
maxRedirects?: number;
/** Fetch timeout in ms. Default: 10s. */
timeoutMs?: number;
/** PDF handling (application/pdf). */
pdf?: GatewayHttpResponsesPdfConfig;
};
export type GatewayHttpResponsesPdfConfig = {
/** Max pages to parse/render. Default: 4. */
maxPages?: number;
/** Max pixels per rendered page. Default: 4M. */
maxPixels?: number;
/** Minimum extracted text length to skip rasterization. Default: 200 chars. */
minTextChars?: number;
};
export type GatewayHttpResponsesImagesConfig = {
/** Allow URL fetches for input_image. Default: true. */
allowUrl?: boolean;
/** Allowed MIME types (case-insensitive). */
allowedMimes?: string[];
/** Max bytes per image. Default: 10MB. */
maxBytes?: number;
/** Max redirects when fetching a URL. Default: 3. */
maxRedirects?: number;
/** Fetch timeout in ms. Default: 10s. */
timeoutMs?: number;
};
export type GatewayHttpEndpointsConfig = {

View File

@@ -302,6 +302,36 @@ export const ClawdbotSchema = z
responses: z
.object({
enabled: z.boolean().optional(),
maxBodyBytes: z.number().int().positive().optional(),
files: z
.object({
allowUrl: z.boolean().optional(),
allowedMimes: z.array(z.string()).optional(),
maxBytes: z.number().int().positive().optional(),
maxChars: z.number().int().positive().optional(),
maxRedirects: z.number().int().nonnegative().optional(),
timeoutMs: z.number().int().positive().optional(),
pdf: z
.object({
maxPages: z.number().int().positive().optional(),
maxPixels: z.number().int().positive().optional(),
minTextChars: z.number().int().nonnegative().optional(),
})
.strict()
.optional(),
})
.strict()
.optional(),
images: z
.object({
allowUrl: z.boolean().optional(),
allowedMimes: z.array(z.string()).optional(),
maxBytes: z.number().int().positive().optional(),
maxRedirects: z.number().int().nonnegative().optional(),
timeoutMs: z.number().int().positive().optional(),
})
.strict()
.optional(),
})
.strict()
.optional(),

64
src/gateway/http-utils.ts Normal file
View File

@@ -0,0 +1,64 @@
import { randomUUID } from "node:crypto";
import type { IncomingMessage } from "node:http";
import { buildAgentMainSessionKey, normalizeAgentId } from "../routing/session-key.js";
export function getHeader(req: IncomingMessage, name: string): string | undefined {
const raw = req.headers[name.toLowerCase()];
if (typeof raw === "string") return raw;
if (Array.isArray(raw)) return raw[0];
return undefined;
}
export function getBearerToken(req: IncomingMessage): string | undefined {
const raw = getHeader(req, "authorization")?.trim() ?? "";
if (!raw.toLowerCase().startsWith("bearer ")) return undefined;
const token = raw.slice(7).trim();
return token || undefined;
}
export function resolveAgentIdFromHeader(req: IncomingMessage): string | undefined {
const raw =
getHeader(req, "x-clawdbot-agent-id")?.trim() ||
getHeader(req, "x-clawdbot-agent")?.trim() ||
"";
if (!raw) return undefined;
return normalizeAgentId(raw);
}
export function resolveAgentIdFromModel(model: string | undefined): string | undefined {
const raw = model?.trim();
if (!raw) return undefined;
const m =
raw.match(/^clawdbot[:/](?<agentId>[a-z0-9][a-z0-9_-]{0,63})$/i) ??
raw.match(/^agent:(?<agentId>[a-z0-9][a-z0-9_-]{0,63})$/i);
const agentId = m?.groups?.agentId;
if (!agentId) return undefined;
return normalizeAgentId(agentId);
}
export function resolveAgentIdForRequest(params: {
req: IncomingMessage;
model: string | undefined;
}): string {
const fromHeader = resolveAgentIdFromHeader(params.req);
if (fromHeader) return fromHeader;
const fromModel = resolveAgentIdFromModel(params.model);
return fromModel ?? "main";
}
export function resolveSessionKey(params: {
req: IncomingMessage;
agentId: string;
user?: string | undefined;
prefix: string;
}): string {
const explicit = getHeader(params.req, "x-clawdbot-session-key")?.trim();
if (explicit) return explicit;
const user = params.user?.trim();
const mainKey = user ? `${params.prefix}-user:${user}` : `${params.prefix}:${randomUUID()}`;
return buildAgentMainSessionKey({ agentId: params.agentId, mainKey });
}

View File

@@ -5,9 +5,9 @@ import { buildHistoryContextFromEntries, type HistoryEntry } from "../auto-reply
import { createDefaultDeps } from "../cli/deps.js";
import { agentCommand } from "../commands/agent.js";
import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js";
import { buildAgentMainSessionKey, normalizeAgentId } from "../routing/session-key.js";
import { defaultRuntime } from "../runtime.js";
import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js";
import { getBearerToken, resolveAgentIdForRequest, resolveSessionKey } from "./http-utils.js";
import { readJsonBody } from "./hooks.js";
type OpenAiHttpOptions = {
@@ -34,20 +34,6 @@ function sendJson(res: ServerResponse, status: number, body: unknown) {
res.end(JSON.stringify(body));
}
function getHeader(req: IncomingMessage, name: string): string | undefined {
const raw = req.headers[name.toLowerCase()];
if (typeof raw === "string") return raw;
if (Array.isArray(raw)) return raw[0];
return undefined;
}
function getBearerToken(req: IncomingMessage): string | undefined {
const raw = getHeader(req, "authorization")?.trim() ?? "";
if (!raw.toLowerCase().startsWith("bearer ")) return undefined;
const token = raw.slice(7).trim();
return token || undefined;
}
function writeSse(res: ServerResponse, data: unknown) {
res.write(`data: ${JSON.stringify(data)}\n\n`);
}
@@ -154,50 +140,12 @@ function buildAgentPrompt(messagesUnknown: unknown): {
};
}
function resolveAgentIdFromHeader(req: IncomingMessage): string | undefined {
const raw =
getHeader(req, "x-clawdbot-agent-id")?.trim() ||
getHeader(req, "x-clawdbot-agent")?.trim() ||
"";
if (!raw) return undefined;
return normalizeAgentId(raw);
}
function resolveAgentIdFromModel(model: string | undefined): string | undefined {
const raw = model?.trim();
if (!raw) return undefined;
const m =
raw.match(/^clawdbot[:/](?<agentId>[a-z0-9][a-z0-9_-]{0,63})$/i) ??
raw.match(/^agent:(?<agentId>[a-z0-9][a-z0-9_-]{0,63})$/i);
const agentId = m?.groups?.agentId;
if (!agentId) return undefined;
return normalizeAgentId(agentId);
}
function resolveAgentIdForRequest(params: {
req: IncomingMessage;
model: string | undefined;
}): string {
const fromHeader = resolveAgentIdFromHeader(params.req);
if (fromHeader) return fromHeader;
const fromModel = resolveAgentIdFromModel(params.model);
return fromModel ?? "main";
}
function resolveSessionKey(params: {
function resolveOpenAiSessionKey(params: {
req: IncomingMessage;
agentId: string;
user?: string | undefined;
}): string {
const explicit = getHeader(params.req, "x-clawdbot-session-key")?.trim();
if (explicit) return explicit;
// Default: stateless per-request session key, but stable if OpenAI "user" is provided.
const user = params.user?.trim();
const mainKey = user ? `openai-user:${user}` : `openai:${randomUUID()}`;
return buildAgentMainSessionKey({ agentId: params.agentId, mainKey });
return resolveSessionKey({ ...params, prefix: "openai" });
}
function coerceRequest(val: unknown): OpenAiChatCompletionRequest {
@@ -248,7 +196,7 @@ export async function handleOpenAiHttpRequest(
const user = typeof payload.user === "string" ? payload.user : undefined;
const agentId = resolveAgentIdForRequest({ req, model });
const sessionKey = resolveSessionKey({ req, agentId, user });
const sessionKey = resolveOpenAiSessionKey({ req, agentId, user });
const prompt = buildAgentPrompt(payload.messages);
if (!prompt.message) {
sendJson(res, 400, {

View File

@@ -358,6 +358,182 @@ describe("OpenResponses HTTP API (e2e)", () => {
}
});
it("moves input_file content into extraSystemPrompt", async () => {
agentCommand.mockResolvedValueOnce({
payloads: [{ text: "ok" }],
} as never);
const port = await getFreePort();
const server = await startServer(port);
try {
const res = await postResponses(port, {
model: "clawdbot",
input: [
{
type: "message",
role: "user",
content: [
{ type: "input_text", text: "read this" },
{
type: "input_file",
source: {
type: "base64",
media_type: "text/plain",
data: Buffer.from("hello").toString("base64"),
filename: "hello.txt",
},
},
],
},
],
});
expect(res.status).toBe(200);
const [opts] = agentCommand.mock.calls[0] ?? [];
const message = (opts as { message?: string } | undefined)?.message ?? "";
const extraSystemPrompt =
(opts as { extraSystemPrompt?: string } | undefined)?.extraSystemPrompt ?? "";
expect(message).toBe("read this");
expect(extraSystemPrompt).toContain('<file name="hello.txt">');
} finally {
await server.close({ reason: "test done" });
}
});
it("applies tool_choice=none by dropping tools", async () => {
agentCommand.mockResolvedValueOnce({
payloads: [{ text: "ok" }],
} as never);
const port = await getFreePort();
const server = await startServer(port);
try {
const res = await postResponses(port, {
model: "clawdbot",
input: "hi",
tools: [
{
type: "function",
function: { name: "get_weather", description: "Get weather" },
},
],
tool_choice: "none",
});
expect(res.status).toBe(200);
const [opts] = agentCommand.mock.calls[0] ?? [];
expect((opts as { clientTools?: unknown[] } | undefined)?.clientTools).toBeUndefined();
} finally {
await server.close({ reason: "test done" });
}
});
it("applies tool_choice to a specific tool", async () => {
agentCommand.mockResolvedValueOnce({
payloads: [{ text: "ok" }],
} as never);
const port = await getFreePort();
const server = await startServer(port);
try {
const res = await postResponses(port, {
model: "clawdbot",
input: "hi",
tools: [
{
type: "function",
function: { name: "get_weather", description: "Get weather" },
},
{
type: "function",
function: { name: "get_time", description: "Get time" },
},
],
tool_choice: { type: "function", function: { name: "get_time" } },
});
expect(res.status).toBe(200);
const [opts] = agentCommand.mock.calls[0] ?? [];
const clientTools =
(opts as { clientTools?: Array<{ function?: { name?: string } }> })?.clientTools ?? [];
expect(clientTools).toHaveLength(1);
expect(clientTools[0]?.function?.name).toBe("get_time");
} finally {
await server.close({ reason: "test done" });
}
});
it("rejects tool_choice that references an unknown tool", async () => {
const port = await getFreePort();
const server = await startServer(port);
try {
const res = await postResponses(port, {
model: "clawdbot",
input: "hi",
tools: [
{
type: "function",
function: { name: "get_weather", description: "Get weather" },
},
],
tool_choice: { type: "function", function: { name: "unknown_tool" } },
});
expect(res.status).toBe(400);
} finally {
await server.close({ reason: "test done" });
}
});
it("passes max_output_tokens through to the agent stream params", async () => {
agentCommand.mockResolvedValueOnce({
payloads: [{ text: "ok" }],
} as never);
const port = await getFreePort();
const server = await startServer(port);
try {
const res = await postResponses(port, {
model: "clawdbot",
input: "hi",
max_output_tokens: 123,
});
expect(res.status).toBe(200);
const [opts] = agentCommand.mock.calls[0] ?? [];
expect(
(opts as { streamParams?: { maxTokens?: number } } | undefined)?.streamParams?.maxTokens,
).toBe(123);
} finally {
await server.close({ reason: "test done" });
}
});
it("returns usage when available", async () => {
agentCommand.mockResolvedValueOnce({
payloads: [{ text: "ok" }],
meta: {
agentMeta: {
usage: { input: 3, output: 5, cacheRead: 1, cacheWrite: 1 },
},
},
} as never);
const port = await getFreePort();
const server = await startServer(port);
try {
const res = await postResponses(port, {
stream: false,
model: "clawdbot",
input: "hi",
});
expect(res.status).toBe(200);
const json = (await res.json()) as Record<string, unknown>;
expect(json.usage).toEqual({ input_tokens: 3, output_tokens: 5, total_tokens: 10 });
} finally {
await server.close({ reason: "test done" });
}
});
it("returns a non-streaming response with correct shape", async () => {
agentCommand.mockResolvedValueOnce({
payloads: [{ text: "hello" }],
@@ -436,6 +612,7 @@ describe("OpenResponses HTTP API (e2e)", () => {
const eventTypes = events.map((e) => e.event).filter(Boolean);
expect(eventTypes).toContain("response.created");
expect(eventTypes).toContain("response.output_item.added");
expect(eventTypes).toContain("response.in_progress");
expect(eventTypes).toContain("response.content_part.added");
expect(eventTypes).toContain("response.output_text.delta");
expect(eventTypes).toContain("response.output_text.done");

File diff suppressed because it is too large Load Diff

View File

@@ -194,6 +194,7 @@ export function createGatewayHttpServer(opts: {
controlUiBasePath: string;
openAiChatCompletionsEnabled: boolean;
openResponsesEnabled: boolean;
openResponsesConfig?: import("../config/types.gateway.js").GatewayHttpResponsesConfig;
handleHooksRequest: HooksRequestHandler;
handlePluginRequest?: HooksRequestHandler;
resolvedAuth: import("./auth.js").ResolvedGatewayAuth;
@@ -205,6 +206,7 @@ export function createGatewayHttpServer(opts: {
controlUiBasePath,
openAiChatCompletionsEnabled,
openResponsesEnabled,
openResponsesConfig,
handleHooksRequest,
handlePluginRequest,
resolvedAuth,
@@ -226,7 +228,13 @@ export function createGatewayHttpServer(opts: {
if (await handleSlackHttpRequest(req, res)) return;
if (handlePluginRequest && (await handlePluginRequest(req, res))) return;
if (openResponsesEnabled) {
if (await handleOpenResponsesHttpRequest(req, res, { auth: resolvedAuth })) return;
if (
await handleOpenResponsesHttpRequest(req, res, {
auth: resolvedAuth,
config: openResponsesConfig,
})
)
return;
}
if (openAiChatCompletionsEnabled) {
if (await handleOpenAiHttpRequest(req, res, { auth: resolvedAuth })) return;

View File

@@ -18,6 +18,7 @@ export type GatewayRuntimeConfig = {
controlUiEnabled: boolean;
openAiChatCompletionsEnabled: boolean;
openResponsesEnabled: boolean;
openResponsesConfig?: import("../config/types.gateway.js").GatewayHttpResponsesConfig;
controlUiBasePath: string;
resolvedAuth: ResolvedGatewayAuth;
authMode: ResolvedGatewayAuth["mode"];
@@ -47,8 +48,8 @@ export async function resolveGatewayRuntimeConfig(params: {
params.openAiChatCompletionsEnabled ??
params.cfg.gateway?.http?.endpoints?.chatCompletions?.enabled ??
false;
const openResponsesEnabled =
params.openResponsesEnabled ?? params.cfg.gateway?.http?.endpoints?.responses?.enabled ?? false;
const openResponsesConfig = params.cfg.gateway?.http?.endpoints?.responses;
const openResponsesEnabled = params.openResponsesEnabled ?? openResponsesConfig?.enabled ?? false;
const controlUiBasePath = normalizeControlUiBasePath(params.cfg.gateway?.controlUi?.basePath);
const authBase = params.cfg.gateway?.auth ?? {};
const authOverrides = params.auth ?? {};
@@ -93,6 +94,9 @@ export async function resolveGatewayRuntimeConfig(params: {
controlUiEnabled,
openAiChatCompletionsEnabled,
openResponsesEnabled,
openResponsesConfig: openResponsesConfig
? { ...openResponsesConfig, enabled: openResponsesEnabled }
: undefined,
controlUiBasePath,
resolvedAuth,
authMode,

View File

@@ -28,6 +28,7 @@ export async function createGatewayRuntimeState(params: {
controlUiBasePath: string;
openAiChatCompletionsEnabled: boolean;
openResponsesEnabled: boolean;
openResponsesConfig?: import("../config/types.gateway.js").GatewayHttpResponsesConfig;
resolvedAuth: ResolvedGatewayAuth;
gatewayTls?: GatewayTlsRuntime;
hooksConfig: () => HooksConfigResolved | null;
@@ -105,6 +106,7 @@ export async function createGatewayRuntimeState(params: {
controlUiBasePath: params.controlUiBasePath,
openAiChatCompletionsEnabled: params.openAiChatCompletionsEnabled,
openResponsesEnabled: params.openResponsesEnabled,
openResponsesConfig: params.openResponsesConfig,
handleHooksRequest,
handlePluginRequest,
resolvedAuth: params.resolvedAuth,

View File

@@ -219,6 +219,7 @@ export async function startGatewayServer(
controlUiEnabled,
openAiChatCompletionsEnabled,
openResponsesEnabled,
openResponsesConfig,
controlUiBasePath,
resolvedAuth,
tailscaleConfig,
@@ -258,6 +259,7 @@ export async function startGatewayServer(
controlUiBasePath,
openAiChatCompletionsEnabled,
openResponsesEnabled,
openResponsesConfig,
resolvedAuth,
gatewayTls,
hooksConfig: () => hooksConfig,