import { randomBytes, randomUUID } from "node:crypto"; import fs from "node:fs/promises"; import { createServer } from "node:net"; import os from "node:os"; import path from "node:path"; import type { Api, Model } from "@mariozechner/pi-ai"; import { discoverAuthStorage, discoverModels, } from "@mariozechner/pi-coding-agent"; import { describe, expect, it } from "vitest"; import { resolveClawdbotAgentDir } from "../agents/agent-paths.js"; import { getApiKeyForModel } from "../agents/model-auth.js"; import { ensureClawdbotModelsJson } from "../agents/models-config.js"; import { loadConfig } from "../config/config.js"; import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES, } from "../utils/message-provider.js"; import { resolveUserPath } from "../utils.js"; import { GatewayClient } from "./client.js"; import { renderCatNoncePngBase64 } from "./live-image-probe.js"; import { startGatewayServer } from "./server.js"; const LIVE = process.env.LIVE === "1" || process.env.CLAWDBOT_LIVE_TEST === "1"; const GATEWAY_LIVE = process.env.CLAWDBOT_LIVE_GATEWAY === "1"; const ALL_MODELS = process.env.CLAWDBOT_LIVE_GATEWAY_ALL_MODELS === "1" || process.env.CLAWDBOT_LIVE_GATEWAY_MODELS === "all"; const EXTRA_TOOL_PROBES = process.env.CLAWDBOT_LIVE_GATEWAY_TOOL_PROBE === "1"; const EXTRA_IMAGE_PROBES = process.env.CLAWDBOT_LIVE_GATEWAY_IMAGE_PROBE === "1"; const ZAI_FALLBACK = process.env.CLAWDBOT_LIVE_GATEWAY_ZAI_FALLBACK === "1"; const PROVIDERS = parseFilter(process.env.CLAWDBOT_LIVE_GATEWAY_PROVIDERS); const describeLive = LIVE && GATEWAY_LIVE ? describe : describe.skip; function parseFilter(raw?: string): Set | null { const trimmed = raw?.trim(); if (!trimmed || trimmed === "all") return null; const ids = trimmed .split(",") .map((s) => s.trim()) .filter(Boolean); return ids.length ? new Set(ids) : null; } function extractPayloadText(result: unknown): string { const record = result as Record; const payloads = Array.isArray(record.payloads) ? record.payloads : []; const texts = payloads .map((p) => p && typeof p === "object" ? (p as Record).text : undefined, ) .filter((t): t is string => typeof t === "string" && t.trim().length > 0); return texts.join("\n").trim(); } function isMeaningful(text: string): boolean { if (!text) return false; const trimmed = text.trim(); if (trimmed.toLowerCase() === "ok") return false; if (trimmed.length < 60) return false; const words = trimmed.split(/\s+/g).filter(Boolean); if (words.length < 12) return false; return true; } function isGoogleModelNotFoundText(text: string): boolean { const trimmed = text.trim(); if (!trimmed) return false; if (!/not found/i.test(trimmed)) return false; if (/models\/.+ is not found for api version/i.test(trimmed)) return true; if (/"status"\s*:\s*"NOT_FOUND"/.test(trimmed)) return true; if (/"code"\s*:\s*404/.test(trimmed)) return true; return false; } function isRefreshTokenReused(error: string): boolean { return /refresh_token_reused/i.test(error); } function randomImageProbeCode(len = 10): string { const alphabet = "2345689ABCEF"; const bytes = randomBytes(len); let out = ""; for (let i = 0; i < len; i += 1) { out += alphabet[bytes[i] % alphabet.length]; } return out; } function editDistance(a: string, b: string): number { if (a === b) return 0; const aLen = a.length; const bLen = b.length; if (aLen === 0) return bLen; if (bLen === 0) return aLen; let prev = Array.from({ length: bLen + 1 }, (_v, idx) => idx); let curr = Array.from({ length: bLen + 1 }, () => 0); for (let i = 1; i <= aLen; i += 1) { curr[0] = i; const aCh = a.charCodeAt(i - 1); for (let j = 1; j <= bLen; j += 1) { const cost = aCh === b.charCodeAt(j - 1) ? 0 : 1; curr[j] = Math.min( prev[j] + 1, // delete curr[j - 1] + 1, // insert prev[j - 1] + cost, // substitute ); } [prev, curr] = [curr, prev]; } return prev[bLen] ?? Number.POSITIVE_INFINITY; } async function getFreePort(): Promise { return await new Promise((resolve, reject) => { const srv = createServer(); srv.on("error", reject); srv.listen(0, "127.0.0.1", () => { const addr = srv.address(); if (!addr || typeof addr === "string") { srv.close(); reject(new Error("failed to acquire free port")); return; } const port = addr.port; srv.close((err) => { if (err) reject(err); else resolve(port); }); }); }); } async function isPortFree(port: number): Promise { if (!Number.isFinite(port) || port <= 0 || port > 65535) return false; return await new Promise((resolve) => { const srv = createServer(); srv.once("error", () => resolve(false)); srv.listen(port, "127.0.0.1", () => { srv.close(() => resolve(true)); }); }); } async function getFreeGatewayPort(): Promise { // Gateway uses derived ports (bridge/browser/canvas). Avoid flaky collisions by // ensuring the common derived offsets are free too. for (let attempt = 0; attempt < 25; attempt += 1) { const port = await getFreePort(); const candidates = [port, port + 1, port + 2, port + 4]; const ok = ( await Promise.all(candidates.map((candidate) => isPortFree(candidate))) ).every(Boolean); if (ok) return port; } throw new Error("failed to acquire a free gateway port block"); } type AgentFinalPayload = { status?: unknown; result?: unknown; }; async function connectClient(params: { url: string; token: string }) { return await new Promise((resolve, reject) => { let settled = false; const stop = (err?: Error, client?: GatewayClient) => { if (settled) return; settled = true; clearTimeout(timer); if (err) reject(err); else resolve(client as GatewayClient); }; const client = new GatewayClient({ url: params.url, token: params.token, clientName: GATEWAY_CLIENT_NAMES.TEST, clientDisplayName: "vitest-live", clientVersion: "dev", mode: GATEWAY_CLIENT_MODES.TEST, onHelloOk: () => stop(undefined, client), onConnectError: (err) => stop(err), onClose: (code, reason) => stop(new Error(`gateway closed during connect (${code}): ${reason}`)), }); const timer = setTimeout( () => stop(new Error("gateway connect timeout")), 10_000, ); timer.unref(); client.start(); }); } describeLive("gateway live (dev agent, profile keys)", () => { it( "runs meaningful prompts across models with available keys", async () => { const previous = { configPath: process.env.CLAWDBOT_CONFIG_PATH, token: process.env.CLAWDBOT_GATEWAY_TOKEN, skipProviders: process.env.CLAWDBOT_SKIP_PROVIDERS, skipGmail: process.env.CLAWDBOT_SKIP_GMAIL_WATCHER, skipCron: process.env.CLAWDBOT_SKIP_CRON, skipCanvas: process.env.CLAWDBOT_SKIP_CANVAS_HOST, }; process.env.CLAWDBOT_SKIP_PROVIDERS = "1"; process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = "1"; process.env.CLAWDBOT_SKIP_CRON = "1"; process.env.CLAWDBOT_SKIP_CANVAS_HOST = "1"; const token = `test-${randomUUID()}`; process.env.CLAWDBOT_GATEWAY_TOKEN = token; const cfg = loadConfig(); await ensureClawdbotModelsJson(cfg); const workspaceDir = resolveUserPath( cfg.agents?.defaults?.workspace ?? path.join(os.homedir(), "clawd"), ); await fs.mkdir(workspaceDir, { recursive: true }); const nonceA = randomUUID(); const nonceB = randomUUID(); const toolProbePath = path.join( workspaceDir, `.clawdbot-live-tool-probe.${nonceA}.txt`, ); await fs.writeFile(toolProbePath, `nonceA=${nonceA}\nnonceB=${nonceB}\n`); const agentDir = resolveClawdbotAgentDir(); const authStorage = discoverAuthStorage(agentDir); const modelRegistry = discoverModels(authStorage, agentDir); const all = modelRegistry.getAll() as Array>; const filter = parseFilter(process.env.CLAWDBOT_LIVE_GATEWAY_MODELS); // Default: honor user allowlist. Opt-in: scan all models with keys. const allowlistKeys = Object.keys(cfg.agents?.defaults?.models ?? {}); const wanted = ALL_MODELS || allowlistKeys.length === 0 ? all : all.filter((m) => allowlistKeys.includes(`${m.provider}/${m.id}`)); const candidates: Array> = []; for (const model of wanted) { const id = `${model.provider}/${model.id}`; if (PROVIDERS && !PROVIDERS.has(model.provider)) continue; if (filter && !filter.has(id)) continue; try { // eslint-disable-next-line no-await-in-loop await getApiKeyForModel({ model, cfg }); candidates.push(model); } catch { // no creds; skip } } expect(candidates.length).toBeGreaterThan(0); const imageCandidates = EXTRA_IMAGE_PROBES ? candidates.filter((m) => m.input?.includes("image")) : []; if (EXTRA_IMAGE_PROBES && imageCandidates.length === 0) { throw new Error( "image probe enabled but no selected models advertise image support; set CLAWDBOT_LIVE_GATEWAY_MODELS to include an image-capable model", ); } // Build a temp config that allows all selected models, so session overrides stick. const lmstudioProvider = cfg.models?.providers?.lmstudio; const nextCfg = { ...cfg, agents: { ...cfg.agents, list: (cfg.agents?.list ?? []).map((entry) => ({ ...entry, sandbox: { mode: "off" }, })), defaults: { ...cfg.agents?.defaults, // Live tests should avoid Docker sandboxing so tool probes can // operate on the temporary probe files we create in the host workspace. sandbox: { mode: "off" }, models: Object.fromEntries( candidates.map((m) => [`${m.provider}/${m.id}`, {}]), ), }, }, models: { ...cfg.models, providers: { ...cfg.models?.providers, // LM Studio is most reliable via Chat Completions; its Responses API // tool-calling behavior is inconsistent across releases. ...(lmstudioProvider ? { lmstudio: { ...lmstudioProvider, api: "openai-completions", }, } : {}), }, }, }; const tempDir = await fs.mkdtemp( path.join(os.tmpdir(), "clawdbot-live-"), ); const tempConfigPath = path.join(tempDir, "clawdbot.json"); await fs.writeFile( tempConfigPath, `${JSON.stringify(nextCfg, null, 2)}\n`, ); process.env.CLAWDBOT_CONFIG_PATH = tempConfigPath; const port = await getFreeGatewayPort(); const server = await startGatewayServer(port, { bind: "loopback", auth: { mode: "token", token }, controlUiEnabled: false, }); const client = await connectClient({ url: `ws://127.0.0.1:${port}`, token, }); try { const sessionKey = "agent:dev:live-gateway"; const failures: Array<{ model: string; error: string }> = []; for (const model of candidates) { const modelKey = `${model.provider}/${model.id}`; try { // Ensure session exists + override model for this run. await client.request>("sessions.patch", { key: sessionKey, model: modelKey, }); // Reset between models: avoids cross-provider transcript incompatibilities // (notably OpenAI Responses requiring reasoning replay for function_call items). await client.request>("sessions.reset", { key: sessionKey, }); // “Meaningful” direct prompt (no tools). const runId = randomUUID(); const payload = await client.request( "agent", { sessionKey, idempotencyKey: `idem-${runId}`, message: "Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.", deliver: false, }, { expectFinal: true }, ); if (payload?.status !== "ok") { throw new Error(`agent status=${String(payload?.status)}`); } const text = extractPayloadText(payload?.result); if ( model.provider === "google" && isGoogleModelNotFoundText(text) ) { // Catalog drift: model IDs can disappear or become unavailable on the API. // Treat as skip when scanning "all models" for Google. continue; } if (!isMeaningful(text)) throw new Error(`not meaningful: ${text}`); if ( !/\bmicro\s*-?\s*tasks?\b/i.test(text) || !/\bmacro\s*-?\s*tasks?\b/i.test(text) ) { throw new Error(`missing required keywords: ${text}`); } // Real tool invocation: force the agent to Read a local file and echo a nonce. const runIdTool = randomUUID(); const toolProbe = await client.request( "agent", { sessionKey, idempotencyKey: `idem-${runIdTool}-tool`, message: "Clawdbot live tool probe (local, safe): " + `use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` + "Then reply with the two nonce values you read (include both).", deliver: false, }, { expectFinal: true }, ); if (toolProbe?.status !== "ok") { throw new Error( `tool probe failed: status=${String(toolProbe?.status)}`, ); } const toolText = extractPayloadText(toolProbe?.result); if (!toolText.includes(nonceA) || !toolText.includes(nonceB)) { throw new Error(`tool probe missing nonce: ${toolText}`); } if (EXTRA_TOOL_PROBES) { const nonceC = randomUUID(); const toolWritePath = path.join( tempDir, `write-${runIdTool}.txt`, ); const bashReadProbe = await client.request( "agent", { sessionKey, idempotencyKey: `idem-${runIdTool}-bash-read`, message: "Clawdbot live tool probe (local, safe): " + "use the tool named `bash` (or `Bash`) to run this command: " + `mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` + `Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` + "Finally reply including the nonce text you read back.", deliver: false, }, { expectFinal: true }, ); if (bashReadProbe?.status !== "ok") { throw new Error( `bash+read probe failed: status=${String(bashReadProbe?.status)}`, ); } const bashReadText = extractPayloadText(bashReadProbe?.result); if (!bashReadText.includes(nonceC)) { throw new Error( `bash+read probe missing nonce: ${bashReadText}`, ); } await fs.rm(toolWritePath, { force: true }); } if (EXTRA_IMAGE_PROBES && model.input?.includes("image")) { const imageCode = randomImageProbeCode(10); const imageBase64 = renderCatNoncePngBase64(imageCode); const runIdImage = randomUUID(); const imageProbe = await client.request( "agent", { sessionKey, idempotencyKey: `idem-${runIdImage}-image`, message: "Look at the attached image. Reply with exactly two tokens separated by a single space: " + "(1) the animal shown or written in the image, lowercase; " + "(2) the code printed in the image, uppercase. No extra text.", attachments: [ { mimeType: "image/png", fileName: `probe-${runIdImage}.png`, content: imageBase64, }, ], deliver: false, }, { expectFinal: true }, ); if (imageProbe?.status !== "ok") { throw new Error( `image probe failed: status=${String(imageProbe?.status)}`, ); } const imageText = extractPayloadText(imageProbe?.result); if (!/\bcat\b/i.test(imageText)) { throw new Error(`image probe missing 'cat': ${imageText}`); } const candidates = imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? []; const bestDistance = candidates.reduce((best, cand) => { if (Math.abs(cand.length - imageCode.length) > 2) return best; return Math.min(best, editDistance(cand, imageCode)); }, Number.POSITIVE_INFINITY); if (!(bestDistance <= 2)) { throw new Error( `image probe missing code (${imageCode}): ${imageText}`, ); } } // Regression: tool-call-only turn followed by a user message (OpenAI responses bug class). if ( (model.provider === "openai" && model.api === "openai-responses") || (model.provider === "openai-codex" && model.api === "openai-codex-responses") ) { const runId2 = randomUUID(); const first = await client.request( "agent", { sessionKey, idempotencyKey: `idem-${runId2}-1`, message: `Call the tool named \`read\` (or \`Read\`) on "${toolProbePath}". Do not write any other text.`, deliver: false, }, { expectFinal: true }, ); if (first?.status !== "ok") { throw new Error( `tool-only turn failed: status=${String(first?.status)}`, ); } const second = await client.request( "agent", { sessionKey, idempotencyKey: `idem-${runId2}-2`, message: `Now answer: what are the values of nonceA and nonceB in "${toolProbePath}"? Reply with exactly: ${nonceA} ${nonceB}.`, deliver: false, }, { expectFinal: true }, ); if (second?.status !== "ok") { throw new Error( `post-tool message failed: status=${String(second?.status)}`, ); } const reply = extractPayloadText(second?.result); if (!reply.includes(nonceA) || !reply.includes(nonceB)) { throw new Error(`unexpected reply: ${reply}`); } } } catch (err) { const message = String(err); // OpenAI Codex refresh tokens can become single-use; skip instead of failing all live tests. if ( model.provider === "openai-codex" && isRefreshTokenReused(message) ) { continue; } failures.push({ model: modelKey, error: message }); } } if (failures.length > 0) { const preview = failures .slice(0, 20) .map((f) => `- ${f.model}: ${f.error}`) .join("\n"); throw new Error( `gateway live model failures (${failures.length}):\n${preview}`, ); } } finally { client.stop(); await server.close({ reason: "live test complete" }); await fs.rm(toolProbePath, { force: true }); await fs.rm(tempDir, { recursive: true, force: true }); process.env.CLAWDBOT_CONFIG_PATH = previous.configPath; process.env.CLAWDBOT_GATEWAY_TOKEN = previous.token; process.env.CLAWDBOT_SKIP_PROVIDERS = previous.skipProviders; process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = previous.skipGmail; process.env.CLAWDBOT_SKIP_CRON = previous.skipCron; process.env.CLAWDBOT_SKIP_CANVAS_HOST = previous.skipCanvas; } }, 20 * 60 * 1000, ); it("z.ai fallback handles anthropic tool history", async () => { if (!ZAI_FALLBACK) return; const previous = { configPath: process.env.CLAWDBOT_CONFIG_PATH, token: process.env.CLAWDBOT_GATEWAY_TOKEN, skipProviders: process.env.CLAWDBOT_SKIP_PROVIDERS, skipGmail: process.env.CLAWDBOT_SKIP_GMAIL_WATCHER, skipCron: process.env.CLAWDBOT_SKIP_CRON, skipCanvas: process.env.CLAWDBOT_SKIP_CANVAS_HOST, }; process.env.CLAWDBOT_SKIP_PROVIDERS = "1"; process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = "1"; process.env.CLAWDBOT_SKIP_CRON = "1"; process.env.CLAWDBOT_SKIP_CANVAS_HOST = "1"; const token = `test-${randomUUID()}`; process.env.CLAWDBOT_GATEWAY_TOKEN = token; const cfg = loadConfig(); await ensureClawdbotModelsJson(cfg); const agentDir = resolveClawdbotAgentDir(); const authStorage = discoverAuthStorage(agentDir); const modelRegistry = discoverModels(authStorage, agentDir); const anthropic = modelRegistry.find( "anthropic", "claude-opus-4-5", ) as Model | null; const zai = modelRegistry.find("zai", "glm-4.7") as Model | null; if (!anthropic || !zai) return; try { await getApiKeyForModel({ model: anthropic, cfg }); await getApiKeyForModel({ model: zai, cfg }); } catch { return; } const workspaceDir = resolveUserPath( cfg.agents?.defaults?.workspace ?? path.join(os.homedir(), "clawd"), ); await fs.mkdir(workspaceDir, { recursive: true }); const nonceA = randomUUID(); const nonceB = randomUUID(); const toolProbePath = path.join( workspaceDir, `.clawdbot-live-zai-fallback.${nonceA}.txt`, ); await fs.writeFile(toolProbePath, `nonceA=${nonceA}\nnonceB=${nonceB}\n`); const port = await getFreeGatewayPort(); const server = await startGatewayServer({ configPath: cfg.__meta?.path, port, token, }); const client = await connectClient({ url: `ws://127.0.0.1:${port}`, token, }); try { const sessionKey = "agent:dev:live-zai-fallback"; await client.request>("sessions.patch", { key: sessionKey, model: "anthropic/claude-opus-4-5", }); await client.request>("sessions.reset", { key: sessionKey, }); const runId = randomUUID(); const toolProbe = await client.request( "agent", { sessionKey, idempotencyKey: `idem-${runId}-tool`, message: `Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) with JSON arguments {"path":"${toolProbePath}"}. ` + `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`, deliver: false, }, { expectFinal: true }, ); if (toolProbe?.status !== "ok") { throw new Error( `anthropic tool probe failed: status=${String(toolProbe?.status)}`, ); } const toolText = extractPayloadText(toolProbe?.result); if (!toolText.includes(nonceA) || !toolText.includes(nonceB)) { throw new Error(`anthropic tool probe missing nonce: ${toolText}`); } await client.request>("sessions.patch", { key: sessionKey, model: "zai/glm-4.7", }); const followupId = randomUUID(); const followup = await client.request( "agent", { sessionKey, idempotencyKey: `idem-${followupId}-followup`, message: `What are the values of nonceA and nonceB in "${toolProbePath}"? ` + `Reply with exactly: ${nonceA} ${nonceB}.`, deliver: false, }, { expectFinal: true }, ); if (followup?.status !== "ok") { throw new Error( `zai followup failed: status=${String(followup?.status)}`, ); } const followupText = extractPayloadText(followup?.result); if (!followupText.includes(nonceA) || !followupText.includes(nonceB)) { throw new Error(`zai followup missing nonce: ${followupText}`); } } finally { client.stop(); await server.close({ reason: "live test complete" }); await fs.rm(toolProbePath, { force: true }); process.env.CLAWDBOT_CONFIG_PATH = previous.configPath; process.env.CLAWDBOT_GATEWAY_TOKEN = previous.token; process.env.CLAWDBOT_SKIP_PROVIDERS = previous.skipProviders; process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = previous.skipGmail; process.env.CLAWDBOT_SKIP_CRON = previous.skipCron; process.env.CLAWDBOT_SKIP_CANVAS_HOST = previous.skipCanvas; } }, 180_000); });