From a29f5dda2eb9b08874da916e3f9742eb6f2ea3df Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 10 Jan 2026 01:09:30 +0000 Subject: [PATCH] test(live): gateway smoke across profile-key models --- pnpm-workspace.yaml | 2 +- scripts/test-live-gateway-models-docker.sh | 24 ++ src/agents/pi-embedded-runner.ts | 1 - src/auto-reply/reply/abort.ts | 1 - .../gateway-models.profiles.live.test.ts | 300 ++++++++++++++++++ 5 files changed, 325 insertions(+), 3 deletions(-) create mode 100755 scripts/test-live-gateway-models-docker.sh create mode 100644 src/gateway/gateway-models.profiles.live.test.ts diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 869f33732..9cd3d93b7 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -10,4 +10,4 @@ onlyBuiltDependencies: - sharp patchedDependencies: - '@mariozechner/pi-ai': patches/@mariozechner__pi-ai.patch + '@mariozechner/pi-ai@0.42.1': patches/@mariozechner__pi-ai@0.42.1.patch diff --git a/scripts/test-live-gateway-models-docker.sh b/scripts/test-live-gateway-models-docker.sh new file mode 100755 index 000000000..39422cf76 --- /dev/null +++ b/scripts/test-live-gateway-models-docker.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +IMAGE_NAME="${CLAWDBOT_IMAGE:-clawdbot:local}" +CONFIG_DIR="${CLAWDBOT_CONFIG_DIR:-$HOME/.clawdbot}" +WORKSPACE_DIR="${CLAWDBOT_WORKSPACE_DIR:-$HOME/clawd}" + +echo "==> Build image: $IMAGE_NAME" +docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/Dockerfile" "$ROOT_DIR" + +echo "==> Run gateway live model tests (profile keys)" +docker run --rm -t \ + --entrypoint bash \ + -e HOME=/home/node \ + -e LIVE=1 \ + -e CLAWDBOT_LIVE_GATEWAY=1 \ + -e CLAWDBOT_LIVE_GATEWAY_ALL_MODELS=1 \ + -e CLAWDBOT_LIVE_GATEWAY_MODELS="${CLAWDBOT_LIVE_GATEWAY_MODELS:-all}" \ + -v "$CONFIG_DIR":/home/node/.clawdbot \ + -v "$WORKSPACE_DIR":/home/node/clawd \ + "$IMAGE_NAME" \ + -lc "cd /app && pnpm test:live" + diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index d3a73a002..7d923bf6a 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -759,7 +759,6 @@ export async function compactEmbeddedPiSession(params: { const enqueueGlobal = params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts)); - const runAbortController = new AbortController(); return enqueueCommandInLane(sessionLane, () => enqueueGlobal(async () => { const resolvedWorkspace = resolveUserPath(params.workspaceDir); diff --git a/src/auto-reply/reply/abort.ts b/src/auto-reply/reply/abort.ts index 3543cf739..eb31ae0f7 100644 --- a/src/auto-reply/reply/abort.ts +++ b/src/auto-reply/reply/abort.ts @@ -6,7 +6,6 @@ import { resolveStorePath, type SessionEntry, saveSessionStore, - type SessionEntry, } from "../../config/sessions.js"; import { parseAgentSessionKey } from "../../routing/session-key.js"; import { resolveCommandAuthorization } from "../command-auth.js"; diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts new file mode 100644 index 000000000..00c3e6bb9 --- /dev/null +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -0,0 +1,300 @@ +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; + +import type { Api, Model } from "@mariozechner/pi-ai"; +import { + discoverAuthStorage, + discoverModels, +} from "@mariozechner/pi-coding-agent"; +import { describe, expect, it } from "vitest"; +import { resolveClawdbotAgentDir } from "../agents/agent-paths.js"; +import { getApiKeyForModel } from "../agents/model-auth.js"; +import { ensureClawdbotModelsJson } from "../agents/models-config.js"; +import { loadConfig } from "../config/config.js"; +import { GatewayClient } from "./client.js"; +import { startGatewayServer } from "./server.js"; +import { getFreePort } from "./test-helpers.js"; + +const LIVE = process.env.LIVE === "1" || process.env.CLAWDBOT_LIVE_TEST === "1"; +const GATEWAY_LIVE = process.env.CLAWDBOT_LIVE_GATEWAY === "1"; +const ALL_MODELS = + process.env.CLAWDBOT_LIVE_GATEWAY_ALL_MODELS === "1" || + process.env.CLAWDBOT_LIVE_GATEWAY_MODELS === "all"; + +const describeLive = LIVE && GATEWAY_LIVE ? describe : describe.skip; + +function parseFilter(raw?: string): Set | null { + const trimmed = raw?.trim(); + if (!trimmed || trimmed === "all") return null; + const ids = trimmed + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + return ids.length ? new Set(ids) : null; +} + +function extractPayloadText(result: unknown): string { + const record = result as Record; + const payloads = Array.isArray(record.payloads) ? record.payloads : []; + const texts = payloads + .map((p) => + p && typeof p === "object" + ? (p as Record).text + : undefined, + ) + .filter((t): t is string => typeof t === "string" && t.trim().length > 0); + return texts.join("\n").trim(); +} + +function isMeaningful(text: string): boolean { + if (!text) return false; + const trimmed = text.trim(); + if (trimmed.toLowerCase() === "ok") return false; + if (trimmed.length < 60) return false; + const words = trimmed.split(/\s+/g).filter(Boolean); + if (words.length < 12) return false; + return true; +} + +type AgentFinalPayload = { + status?: unknown; + result?: unknown; +}; + +async function connectClient(params: { url: string; token: string }) { + return await new Promise((resolve, reject) => { + let settled = false; + const stop = (err?: Error, client?: GatewayClient) => { + if (settled) return; + settled = true; + clearTimeout(timer); + if (err) reject(err); + else resolve(client as GatewayClient); + }; + const client = new GatewayClient({ + url: params.url, + token: params.token, + clientName: "vitest-live", + clientVersion: "dev", + mode: "test", + onHelloOk: () => stop(undefined, client), + onConnectError: (err) => stop(err), + onClose: (code, reason) => + stop(new Error(`gateway closed during connect (${code}): ${reason}`)), + }); + const timer = setTimeout( + () => stop(new Error("gateway connect timeout")), + 10_000, + ); + timer.unref(); + client.start(); + }); +} + +describeLive("gateway live (dev agent, profile keys)", () => { + it( + "runs meaningful prompts across models with available keys", + async () => { + const previous = { + configPath: process.env.CLAWDBOT_CONFIG_PATH, + token: process.env.CLAWDBOT_GATEWAY_TOKEN, + skipProviders: process.env.CLAWDBOT_SKIP_PROVIDERS, + skipGmail: process.env.CLAWDBOT_SKIP_GMAIL_WATCHER, + skipCron: process.env.CLAWDBOT_SKIP_CRON, + skipCanvas: process.env.CLAWDBOT_SKIP_CANVAS_HOST, + }; + + process.env.CLAWDBOT_SKIP_PROVIDERS = "1"; + process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = "1"; + process.env.CLAWDBOT_SKIP_CRON = "1"; + process.env.CLAWDBOT_SKIP_CANVAS_HOST = "1"; + + const token = `test-${randomUUID()}`; + process.env.CLAWDBOT_GATEWAY_TOKEN = token; + + const cfg = loadConfig(); + await ensureClawdbotModelsJson(cfg); + + const agentDir = resolveClawdbotAgentDir(); + const authStorage = discoverAuthStorage(agentDir); + const modelRegistry = discoverModels(authStorage, agentDir); + const all = modelRegistry.getAll() as Array>; + + const filter = parseFilter(process.env.CLAWDBOT_LIVE_GATEWAY_MODELS); + + // Default: honor user allowlist. Opt-in: scan all models with keys. + const allowlistKeys = Object.keys(cfg.agents?.defaults?.models ?? {}); + const wanted = + ALL_MODELS || allowlistKeys.length === 0 + ? all + : all.filter((m) => allowlistKeys.includes(`${m.provider}/${m.id}`)); + + const candidates: Array> = []; + for (const model of wanted) { + const id = `${model.provider}/${model.id}`; + if (filter && !filter.has(id)) continue; + try { + // eslint-disable-next-line no-await-in-loop + await getApiKeyForModel({ model, cfg }); + candidates.push(model); + } catch { + // no creds; skip + } + } + + expect(candidates.length).toBeGreaterThan(0); + + // Build a temp config that allows all selected models, so session overrides stick. + const nextCfg = { + ...cfg, + agents: { + ...(cfg.agents ?? {}), + defaults: { + ...(cfg.agents?.defaults ?? {}), + models: Object.fromEntries( + candidates.map((m) => [`${m.provider}/${m.id}`, {}]), + ), + }, + }, + }; + const tempDir = await fs.mkdtemp( + path.join(os.tmpdir(), "clawdbot-live-"), + ); + const tempConfigPath = path.join(tempDir, "clawdbot.json"); + await fs.writeFile( + tempConfigPath, + `${JSON.stringify(nextCfg, null, 2)}\n`, + ); + process.env.CLAWDBOT_CONFIG_PATH = tempConfigPath; + + const port = await getFreePort(); + const server = await startGatewayServer(port, { + bind: "loopback", + auth: { mode: "token", token }, + controlUiEnabled: false, + }); + + const client = await connectClient({ + url: `ws://127.0.0.1:${port}`, + token, + }); + + try { + const sessionKey = "agent:dev:live-gateway"; + + const failures: Array<{ model: string; error: string }> = []; + + for (const model of candidates) { + const modelKey = `${model.provider}/${model.id}`; + + try { + // Ensure session exists + override model for this run. + await client.request>("sessions.patch", { + key: sessionKey, + model: modelKey, + }); + + // “Meaningful” direct prompt (no tools). + const runId = randomUUID(); + const payload = await client.request( + "agent", + { + sessionKey, + idempotencyKey: `idem-${runId}`, + message: + "Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.", + deliver: false, + }, + { expectFinal: true }, + ); + + if (payload?.status !== "ok") { + throw new Error(`agent status=${String(payload?.status)}`); + } + const text = extractPayloadText(payload?.result); + if (!isMeaningful(text)) throw new Error(`not meaningful: ${text}`); + if ( + !/\\bmicrotask\\b/i.test(text) || + !/\\bmacrotask\\b/i.test(text) + ) { + throw new Error(`missing required keywords: ${text}`); + } + + // Regression: tool-call-only turn followed by a user message (OpenAI responses bug class). + if ( + (model.provider === "openai" && + model.api === "openai-responses") || + (model.provider === "openai-codex" && + model.api === "openai-codex-responses") + ) { + const runId2 = randomUUID(); + const first = await client.request( + "agent", + { + sessionKey, + idempotencyKey: `idem-${runId2}-1`, + message: + "Call the read tool on package.json. Do not write any other text.", + deliver: false, + }, + { expectFinal: true }, + ); + if (first?.status !== "ok") { + throw new Error( + `tool-only turn failed: status=${String(first?.status)}`, + ); + } + + const second = await client.request( + "agent", + { + sessionKey, + idempotencyKey: `idem-${runId2}-2`, + message: + 'Now answer: what is the "version" field in package.json? Reply with just the version string.', + deliver: false, + }, + { expectFinal: true }, + ); + if (second?.status !== "ok") { + throw new Error( + `post-tool message failed: status=${String(second?.status)}`, + ); + } + const version = extractPayloadText(second?.result); + if (!/^\\d{4}\\.\\d+\\.\\d+/.test(version.trim())) { + throw new Error(`unexpected version: ${version}`); + } + } + } catch (err) { + failures.push({ model: modelKey, error: String(err) }); + } + } + + if (failures.length > 0) { + const preview = failures + .slice(0, 20) + .map((f) => `- ${f.model}: ${f.error}`) + .join("\n"); + throw new Error( + `gateway live model failures (${failures.length}):\n${preview}`, + ); + } + } finally { + client.stop(); + await server.close({ reason: "live test complete" }); + await fs.rm(tempDir, { recursive: true, force: true }); + + process.env.CLAWDBOT_CONFIG_PATH = previous.configPath; + process.env.CLAWDBOT_GATEWAY_TOKEN = previous.token; + process.env.CLAWDBOT_SKIP_PROVIDERS = previous.skipProviders; + process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = previous.skipGmail; + process.env.CLAWDBOT_SKIP_CRON = previous.skipCron; + process.env.CLAWDBOT_SKIP_CANVAS_HOST = previous.skipCanvas; + } + }, + 20 * 60 * 1000, + ); +});