From 782863ea6c8b6f7ba4ad33805885f616543ab282 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 10 Jan 2026 03:06:02 +0000 Subject: [PATCH] test(gateway): add offline tool-calling regression --- .../gateway-models.profiles.live.test.ts | 4 +- .../gateway.tool-calling.mock-openai.test.ts | 369 ++++++++++++++++++ 2 files changed, 371 insertions(+), 2 deletions(-) create mode 100644 src/gateway/gateway.tool-calling.mock-openai.test.ts diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts index a6f58c31a..90a603944 100644 --- a/src/gateway/gateway-models.profiles.live.test.ts +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -243,7 +243,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { sessionKey, idempotencyKey: `idem-${runIdTool}-tool`, message: - `Call the Read tool on "${toolProbePath}". ` + + `Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) on "${toolProbePath}". ` + `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`, deliver: false, }, @@ -273,7 +273,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { sessionKey, idempotencyKey: `idem-${runId2}-1`, message: - "Call the Read tool on package.json. Do not write any other text.", + "Call the tool named `read` (or `Read`) on package.json. Do not write any other text.", deliver: false, }, { expectFinal: true }, diff --git a/src/gateway/gateway.tool-calling.mock-openai.test.ts b/src/gateway/gateway.tool-calling.mock-openai.test.ts new file mode 100644 index 000000000..4f8420798 --- /dev/null +++ b/src/gateway/gateway.tool-calling.mock-openai.test.ts @@ -0,0 +1,369 @@ +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import { createServer } from "node:net"; +import os from "node:os"; +import path from "node:path"; + +import { describe, expect, it, vi } from "vitest"; + +type OpenAIResponsesParams = { + input?: unknown[]; +}; + +type OpenAIResponseStreamEvent = + | { type: "response.output_item.added"; item: Record } + | { type: "response.function_call_arguments.delta"; delta: string } + | { type: "response.output_item.done"; item: Record } + | { + type: "response.completed"; + response: { + status: "completed"; + usage: { + input_tokens: number; + output_tokens: number; + total_tokens: number; + input_tokens_details?: { cached_tokens?: number }; + }; + }; + }; + +function extractLastUserText(input: unknown[]): string { + for (let i = input.length - 1; i >= 0; i -= 1) { + const item = input[i] as Record | undefined; + if (!item || item.role !== "user") continue; + const content = item.content; + if (Array.isArray(content)) { + const text = content + .filter( + (c): c is { type: "input_text"; text: string } => + !!c && + typeof c === "object" && + (c as { type?: unknown }).type === "input_text" && + typeof (c as { text?: unknown }).text === "string", + ) + .map((c) => c.text) + .join("\n") + .trim(); + if (text) return text; + } + } + return ""; +} + +function extractToolOutput(input: unknown[]): string { + for (const itemRaw of input) { + const item = itemRaw as Record | undefined; + if (!item || item.type !== "function_call_output") continue; + return typeof item.output === "string" ? item.output : ""; + } + return ""; +} + +async function* fakeOpenAIResponsesStream( + params: OpenAIResponsesParams, +): AsyncGenerator { + const input = Array.isArray(params.input) ? params.input : []; + const toolOutput = extractToolOutput(input); + + // Turn 1: return a tool call to `read`. + if (!toolOutput) { + const prompt = extractLastUserText(input); + const quoted = /"([^"]+)"/.exec(prompt)?.[1]; + const toolPath = quoted ?? "package.json"; + const argsJson = JSON.stringify({ path: toolPath }); + + yield { + type: "response.output_item.added", + item: { + type: "function_call", + id: "fc_test_1", + call_id: "call_test_1", + name: "read", + arguments: "", + }, + }; + yield { type: "response.function_call_arguments.delta", delta: argsJson }; + yield { + type: "response.output_item.done", + item: { + type: "function_call", + id: "fc_test_1", + call_id: "call_test_1", + name: "read", + arguments: argsJson, + }, + }; + yield { + type: "response.completed", + response: { + status: "completed", + usage: { input_tokens: 10, output_tokens: 10, total_tokens: 20 }, + }, + }; + return; + } + + // Turn 2: echo the nonces extracted from the Read tool output. + const nonceA = /nonceA=([^\s]+)/.exec(toolOutput)?.[1] ?? ""; + const nonceB = /nonceB=([^\s]+)/.exec(toolOutput)?.[1] ?? ""; + const reply = `${nonceA} ${nonceB}`.trim(); + + yield { + type: "response.output_item.added", + item: { + type: "message", + id: "msg_test_1", + role: "assistant", + content: [], + status: "in_progress", + }, + }; + yield { + type: "response.output_item.done", + item: { + type: "message", + id: "msg_test_1", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text: reply, annotations: [] }], + }, + }; + yield { + type: "response.completed", + response: { + status: "completed", + usage: { input_tokens: 10, output_tokens: 10, total_tokens: 20 }, + }, + }; +} + +vi.mock("openai", () => { + class OpenAI { + responses = { + create: async (params: OpenAIResponsesParams) => + fakeOpenAIResponsesStream(params), + }; + } + + return { default: OpenAI }; +}); + +async function getFreePort(): Promise { + return await new Promise((resolve, reject) => { + const srv = createServer(); + srv.on("error", reject); + srv.listen(0, "127.0.0.1", () => { + const addr = srv.address(); + if (!addr || typeof addr === "string") { + srv.close(); + reject(new Error("failed to acquire free port")); + return; + } + const port = addr.port; + srv.close((err) => { + if (err) reject(err); + else resolve(port); + }); + }); + }); +} + +async function isPortFree(port: number): Promise { + if (!Number.isFinite(port) || port <= 0 || port > 65535) return false; + return await new Promise((resolve) => { + const srv = createServer(); + srv.once("error", () => resolve(false)); + srv.listen(port, "127.0.0.1", () => { + srv.close(() => resolve(true)); + }); + }); +} + +async function getFreeGatewayPort(): Promise { + // Gateway uses derived ports (bridge/browser/canvas). Avoid flaky collisions by + // ensuring the common derived offsets are free too. + for (let attempt = 0; attempt < 25; attempt += 1) { + const port = await getFreePort(); + const candidates = [port, port + 1, port + 2, port + 4]; + const ok = ( + await Promise.all(candidates.map((candidate) => isPortFree(candidate))) + ).every(Boolean); + if (ok) return port; + } + throw new Error("failed to acquire a free gateway port block"); +} + +function extractPayloadText(result: unknown): string { + const record = result as Record; + const payloads = Array.isArray(record.payloads) ? record.payloads : []; + const texts = payloads + .map((p) => + p && typeof p === "object" + ? (p as Record).text + : undefined, + ) + .filter((t): t is string => typeof t === "string" && t.trim().length > 0); + return texts.join("\n").trim(); +} + +async function connectClient(params: { url: string; token: string }) { + const { GatewayClient } = await import("./client.js"); + return await new Promise>( + (resolve, reject) => { + let settled = false; + const stop = ( + err?: Error, + client?: InstanceType, + ) => { + if (settled) return; + settled = true; + clearTimeout(timer); + if (err) reject(err); + else resolve(client as InstanceType); + }; + const client = new GatewayClient({ + url: params.url, + token: params.token, + clientName: "vitest-mock-openai", + clientVersion: "dev", + mode: "test", + onHelloOk: () => stop(undefined, client), + onConnectError: (err) => stop(err), + onClose: (code, reason) => + stop(new Error(`gateway closed during connect (${code}): ${reason}`)), + }); + const timer = setTimeout( + () => stop(new Error("gateway connect timeout")), + 10_000, + ); + timer.unref(); + client.start(); + }, + ); +} + +describe("gateway (mock openai): tool calling", () => { + it("runs a Read tool call end-to-end via gateway agent loop", async () => { + const prev = { + home: process.env.HOME, + configPath: process.env.CLAWDBOT_CONFIG_PATH, + token: process.env.CLAWDBOT_GATEWAY_TOKEN, + skipProviders: process.env.CLAWDBOT_SKIP_PROVIDERS, + skipGmail: process.env.CLAWDBOT_SKIP_GMAIL_WATCHER, + skipCron: process.env.CLAWDBOT_SKIP_CRON, + skipCanvas: process.env.CLAWDBOT_SKIP_CANVAS_HOST, + }; + + const tempHome = await fs.mkdtemp( + path.join(os.tmpdir(), "clawdbot-gw-mock-home-"), + ); + process.env.HOME = tempHome; + process.env.CLAWDBOT_SKIP_PROVIDERS = "1"; + process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = "1"; + process.env.CLAWDBOT_SKIP_CRON = "1"; + process.env.CLAWDBOT_SKIP_CANVAS_HOST = "1"; + + const token = `test-${randomUUID()}`; + process.env.CLAWDBOT_GATEWAY_TOKEN = token; + + const workspaceDir = path.join(tempHome, "clawd"); + await fs.mkdir(workspaceDir, { recursive: true }); + + const nonceA = randomUUID(); + const nonceB = randomUUID(); + const toolProbePath = path.join( + workspaceDir, + `.clawdbot-tool-probe.${nonceA}.txt`, + ); + await fs.writeFile(toolProbePath, `nonceA=${nonceA}\nnonceB=${nonceB}\n`); + + const configDir = path.join(tempHome, ".clawdbot"); + await fs.mkdir(configDir, { recursive: true }); + const configPath = path.join(configDir, "clawdbot.json"); + + const cfg = { + agents: { defaults: { workspace: workspaceDir } }, + models: { + mode: "replace", + providers: { + openai: { + baseUrl: "https://api.openai.com/v1", + apiKey: "test", + api: "openai-responses", + models: [ + { + id: "gpt-5.2", + name: "gpt-5.2", + api: "openai-responses", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128_000, + maxTokens: 4096, + }, + ], + }, + }, + }, + gateway: { auth: { token } }, + }; + + await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`); + process.env.CLAWDBOT_CONFIG_PATH = configPath; + + const port = await getFreeGatewayPort(); + const { startGatewayServer } = await import("./server.js"); + const server = await startGatewayServer(port, { + bind: "loopback", + auth: { mode: "token", token }, + controlUiEnabled: false, + }); + + const client = await connectClient({ + url: `ws://127.0.0.1:${port}`, + token, + }); + + try { + const sessionKey = "agent:dev:mock-openai"; + + await client.request>("sessions.patch", { + key: sessionKey, + model: "openai/gpt-5.2", + }); + + const runId = randomUUID(); + const payload = await client.request<{ + status?: unknown; + result?: unknown; + }>( + "agent", + { + sessionKey, + idempotencyKey: `idem-${runId}`, + message: + `Call the read tool on "${toolProbePath}". ` + + `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`, + deliver: false, + }, + { expectFinal: true }, + ); + + expect(payload?.status).toBe("ok"); + const text = extractPayloadText(payload?.result); + expect(text).toContain(nonceA); + expect(text).toContain(nonceB); + } finally { + client.stop(); + await server.close({ reason: "mock openai test complete" }); + await fs.rm(tempHome, { recursive: true, force: true }); + process.env.HOME = prev.home; + process.env.CLAWDBOT_CONFIG_PATH = prev.configPath; + process.env.CLAWDBOT_GATEWAY_TOKEN = prev.token; + process.env.CLAWDBOT_SKIP_PROVIDERS = prev.skipProviders; + process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = prev.skipGmail; + process.env.CLAWDBOT_SKIP_CRON = prev.skipCron; + process.env.CLAWDBOT_SKIP_CANVAS_HOST = prev.skipCanvas; + } + }, 30_000); +});