Files
clawdbot/src/gateway/gateway-models.profiles.live.test.ts
Peter Steinberger 7acd26a2fc Move provider to a plugin-architecture (#661)
* refactor: introduce provider plugin registry

* refactor: move provider CLI to plugins

* docs: add provider plugin implementation notes

* refactor: shift provider runtime logic into plugins

* refactor: add plugin defaults and summaries

* docs: update provider plugin notes

* feat(commands): add /commands slash list

* Auto-reply: tidy help message

* Auto-reply: fix status command lint

* Tests: align google shared expectations

* Auto-reply: tidy help message

* Auto-reply: fix status command lint

* refactor: move provider routing into plugins

* test: align agent routing expectations

* docs: update provider plugin notes

* refactor: route replies via provider plugins

* docs: note route-reply plugin hooks

* refactor: extend provider plugin contract

* refactor: derive provider status from plugins

* refactor: unify gateway provider control

* refactor: use plugin metadata in auto-reply

* fix: parenthesize cron target selection

* refactor: derive gateway methods from plugins

* refactor: generalize provider logout

* refactor: route provider logout through plugins

* refactor: move WhatsApp web login methods into plugin

* refactor: generalize provider log prefixes

* refactor: centralize default chat provider

* refactor: derive provider lists from registry

* refactor: move provider reload noops into plugins

* refactor: resolve web login provider via alias

* refactor: derive CLI provider options from plugins

* refactor: derive prompt provider list from plugins

* style: apply biome lint fixes

* fix: resolve provider routing edge cases

* docs: update provider plugin refactor notes

* fix(gateway): harden agent provider routing

* refactor: move provider routing into plugins

* refactor: move provider CLI to plugins

* refactor: derive provider lists from registry

* fix: restore slash command parsing

* refactor: align provider ids for schema

* refactor: unify outbound target resolution

* fix: keep outbound labels stable

* feat: add msteams to cron surfaces

* fix: clean up lint build issues

* refactor: localize chat provider alias normalization

* refactor: drive gateway provider lists from plugins

* docs: update provider plugin notes

* style: format message-provider

* fix: avoid provider registry init cycles

* style: sort message-provider imports

* fix: relax provider alias map typing

* refactor: move provider routing into plugins

* refactor: add plugin pairing/config adapters

* refactor: route pairing and provider removal via plugins

* refactor: align auto-reply provider typing

* test: stabilize telegram media mocks

* docs: update provider plugin refactor notes

* refactor: pluginize outbound targets

* refactor: pluginize provider selection

* refactor: generalize text chunk limits

* docs: update provider plugin notes

* refactor: generalize group session/config

* fix: normalize provider id for room detection

* fix: avoid provider init in system prompt

* style: formatting cleanup

* refactor: normalize agent delivery targets

* test: update outbound delivery labels

* chore: fix lint regressions

* refactor: extend provider plugin adapters

* refactor: move elevated/block streaming defaults to plugins

* refactor: defer outbound send deps to plugins

* docs: note plugin-driven streaming/elevated defaults

* refactor: centralize webchat provider constant

* refactor: add provider setup adapters

* refactor: delegate provider add config to plugins

* docs: document plugin-driven provider add

* refactor: add plugin state/binding metadata

* refactor: build agent provider status from plugins

* docs: note plugin-driven agent bindings

* refactor: centralize internal provider constant usage

* fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing)

* refactor: centralize default chat provider

* refactor: centralize WhatsApp target normalization

* refactor: move provider routing into plugins

* refactor: normalize agent delivery targets

* chore: fix lint regressions

* fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing)

* feat: expand provider plugin adapters

* refactor: route auto-reply via provider plugins

* fix: align WhatsApp target normalization

* fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing)

* refactor: centralize WhatsApp target normalization

* feat: add /config chat config updates

* docs: add /config get alias

* feat(commands): add /commands slash list

* refactor: centralize default chat provider

* style: apply biome lint fixes

* chore: fix lint regressions

* fix: clean up whatsapp allowlist typing

* style: format config command helpers

* refactor: pluginize tool threading context

* refactor: normalize session announce targets

* docs: note new plugin threading and announce hooks

* refactor: pluginize message actions

* docs: update provider plugin actions notes

* fix: align provider action adapters

* refactor: centralize webchat checks

* style: format message provider helpers

* refactor: move provider onboarding into adapters

* docs: note onboarding provider adapters

* feat: add msteams onboarding adapter

* style: organize onboarding imports

* fix: normalize msteams allowFrom types

* feat: add plugin text chunk limits

* refactor: use plugin chunk limit fallbacks

* feat: add provider mention stripping hooks

* style: organize provider plugin type imports

* refactor: generalize health snapshots

* refactor: update macOS health snapshot handling

* docs: refresh health snapshot notes

* style: format health snapshot updates

* refactor: drive security warnings via plugins

* docs: note provider security adapter

* style: format provider security adapters

* refactor: centralize provider account defaults

* refactor: type gateway client identity constants

* chore: regen gateway protocol swift

* fix: degrade health on failed provider probe

* refactor: centralize pairing approve hint

* docs: add plugin CLI command references

* refactor: route auth and tool sends through plugins

* docs: expand provider plugin hooks

* refactor: document provider docking touchpoints

* refactor: normalize internal provider defaults

* refactor: streamline outbound delivery wiring

* refactor: make provider onboarding plugin-owned

* refactor: support provider-owned agent tools

* refactor: move telegram draft chunking into telegram module

* refactor: infer provider tool sends via extractToolSend

* fix: repair plugin onboarding imports

* refactor: de-dup outbound target normalization

* style: tidy plugin and agent imports

* refactor: data-drive provider selection line

* fix: satisfy lint after provider plugin rebase

* test: deflake gateway-cli coverage

* style: format gateway-cli coverage test

* refactor(provider-plugins): simplify provider ids

* test(pairing-cli): avoid provider-specific ternary

* style(macos): swiftformat HealthStore

* refactor(sandbox): derive provider tool denylist

* fix(sandbox): avoid plugin init in defaults

* refactor(provider-plugins): centralize provider aliases

* style(test): satisfy biome

* refactor(protocol): v3 providers.status maps

* refactor(ui): adapt to protocol v3

* refactor(macos): adapt to protocol v3

* test: update providers.status v3 fixtures

* refactor(gateway): map provider runtime snapshot

* test(gateway): update reload runtime snapshot

* refactor(whatsapp): normalize heartbeat provider id

* docs(refactor): update provider plugin notes

* style: satisfy biome after rebase

* fix: describe sandboxed elevated in prompt

* feat(gateway): add agent image attachments + live probe

* refactor: derive CLI provider options from plugins

* fix(gateway): harden agent provider routing

* fix(gateway): harden agent provider routing

* refactor: align provider ids for schema

* fix(protocol): keep agent provider string

* fix(gateway): harden agent provider routing

* fix(protocol): keep agent provider string

* refactor: normalize agent delivery targets

* refactor: support provider-owned agent tools

* refactor(config): provider-keyed elevated allowFrom

* style: satisfy biome

* fix(gateway): appease provider narrowing

* style: satisfy biome

* refactor(reply): move group intro hints into plugin

* fix(reply): avoid plugin registry init cycle

* refactor(providers): add lightweight provider dock

* refactor(gateway): use typed client id in connect

* refactor(providers): document docks and avoid init cycles

* refactor(providers): make media limit helper generic

* fix(providers): break plugin registry import cycles

* style: satisfy biome

* refactor(status-all): build providers table from plugins

* refactor(gateway): delegate web login to provider plugin

* refactor(provider): drop web alias

* refactor(provider): lazy-load monitors

* style: satisfy lint/format

* style: format status-all providers table

* style: swiftformat gateway discovery model

* test: make reload plan plugin-driven

* fix: avoid token stringification in status-all

* refactor: make provider IDs explicit in status

* feat: warn on signal/imessage provider runtime errors

* test: cover gateway provider runtime warnings in status

* fix: add runtime kind to provider status issues

* test: cover health degradation on probe failure

* fix: keep routeReply lightweight

* style: organize routeReply imports

* refactor(web): extract auth-store helpers

* refactor(whatsapp): lazy login imports

* refactor(outbound): route replies via plugin outbound

* docs: update provider plugin notes

* style: format provider status issues

* fix: make sandbox scope warning wrap-safe

* refactor: load outbound adapters from provider plugins

* docs: update provider plugin outbound notes

* style(macos): fix swiftformat lint

* docs: changelog for provider plugins

* fix(macos): satisfy swiftformat

* fix(macos): open settings via menu action

* style: format after rebase

* fix(macos): open Settings via menu action

---------

Co-authored-by: LK <luke@kyohere.com>
Co-authored-by: Luke K (pr-0f3t) <2609441+lc0rp@users.noreply.github.com>
Co-authored-by: Xin <xin@imfing.com>
2026-01-11 11:45:25 +00:00

719 lines
26 KiB
TypeScript

import { randomBytes, randomUUID } from "node:crypto";
import fs from "node:fs/promises";
import { createServer } from "node:net";
import os from "node:os";
import path from "node:path";
import type { Api, Model } from "@mariozechner/pi-ai";
import {
discoverAuthStorage,
discoverModels,
} from "@mariozechner/pi-coding-agent";
import { describe, expect, it } from "vitest";
import { resolveClawdbotAgentDir } from "../agents/agent-paths.js";
import { getApiKeyForModel } from "../agents/model-auth.js";
import { ensureClawdbotModelsJson } from "../agents/models-config.js";
import { loadConfig } from "../config/config.js";
import {
GATEWAY_CLIENT_MODES,
GATEWAY_CLIENT_NAMES,
} from "../utils/message-provider.js";
import { resolveUserPath } from "../utils.js";
import { GatewayClient } from "./client.js";
import { renderCatNoncePngBase64 } from "./live-image-probe.js";
import { startGatewayServer } from "./server.js";
const LIVE = process.env.LIVE === "1" || process.env.CLAWDBOT_LIVE_TEST === "1";
const GATEWAY_LIVE = process.env.CLAWDBOT_LIVE_GATEWAY === "1";
const ALL_MODELS =
process.env.CLAWDBOT_LIVE_GATEWAY_ALL_MODELS === "1" ||
process.env.CLAWDBOT_LIVE_GATEWAY_MODELS === "all";
const EXTRA_TOOL_PROBES = process.env.CLAWDBOT_LIVE_GATEWAY_TOOL_PROBE === "1";
const EXTRA_IMAGE_PROBES =
process.env.CLAWDBOT_LIVE_GATEWAY_IMAGE_PROBE === "1";
const ZAI_FALLBACK = process.env.CLAWDBOT_LIVE_GATEWAY_ZAI_FALLBACK === "1";
const PROVIDERS = parseFilter(process.env.CLAWDBOT_LIVE_GATEWAY_PROVIDERS);
const describeLive = LIVE && GATEWAY_LIVE ? describe : describe.skip;
function parseFilter(raw?: string): Set<string> | null {
const trimmed = raw?.trim();
if (!trimmed || trimmed === "all") return null;
const ids = trimmed
.split(",")
.map((s) => s.trim())
.filter(Boolean);
return ids.length ? new Set(ids) : null;
}
function extractPayloadText(result: unknown): string {
const record = result as Record<string, unknown>;
const payloads = Array.isArray(record.payloads) ? record.payloads : [];
const texts = payloads
.map((p) =>
p && typeof p === "object"
? (p as Record<string, unknown>).text
: undefined,
)
.filter((t): t is string => typeof t === "string" && t.trim().length > 0);
return texts.join("\n").trim();
}
function isMeaningful(text: string): boolean {
if (!text) return false;
const trimmed = text.trim();
if (trimmed.toLowerCase() === "ok") return false;
if (trimmed.length < 60) return false;
const words = trimmed.split(/\s+/g).filter(Boolean);
if (words.length < 12) return false;
return true;
}
function isGoogleModelNotFoundText(text: string): boolean {
const trimmed = text.trim();
if (!trimmed) return false;
if (!/not found/i.test(trimmed)) return false;
if (/models\/.+ is not found for api version/i.test(trimmed)) return true;
if (/"status"\s*:\s*"NOT_FOUND"/.test(trimmed)) return true;
if (/"code"\s*:\s*404/.test(trimmed)) return true;
return false;
}
function isRefreshTokenReused(error: string): boolean {
return /refresh_token_reused/i.test(error);
}
function randomImageProbeCode(len = 10): string {
const alphabet = "2345689ABCEF";
const bytes = randomBytes(len);
let out = "";
for (let i = 0; i < len; i += 1) {
out += alphabet[bytes[i] % alphabet.length];
}
return out;
}
function editDistance(a: string, b: string): number {
if (a === b) return 0;
const aLen = a.length;
const bLen = b.length;
if (aLen === 0) return bLen;
if (bLen === 0) return aLen;
let prev = Array.from({ length: bLen + 1 }, (_v, idx) => idx);
let curr = Array.from({ length: bLen + 1 }, () => 0);
for (let i = 1; i <= aLen; i += 1) {
curr[0] = i;
const aCh = a.charCodeAt(i - 1);
for (let j = 1; j <= bLen; j += 1) {
const cost = aCh === b.charCodeAt(j - 1) ? 0 : 1;
curr[j] = Math.min(
prev[j] + 1, // delete
curr[j - 1] + 1, // insert
prev[j - 1] + cost, // substitute
);
}
[prev, curr] = [curr, prev];
}
return prev[bLen] ?? Number.POSITIVE_INFINITY;
}
async function getFreePort(): Promise<number> {
return await new Promise((resolve, reject) => {
const srv = createServer();
srv.on("error", reject);
srv.listen(0, "127.0.0.1", () => {
const addr = srv.address();
if (!addr || typeof addr === "string") {
srv.close();
reject(new Error("failed to acquire free port"));
return;
}
const port = addr.port;
srv.close((err) => {
if (err) reject(err);
else resolve(port);
});
});
});
}
async function isPortFree(port: number): Promise<boolean> {
if (!Number.isFinite(port) || port <= 0 || port > 65535) return false;
return await new Promise((resolve) => {
const srv = createServer();
srv.once("error", () => resolve(false));
srv.listen(port, "127.0.0.1", () => {
srv.close(() => resolve(true));
});
});
}
async function getFreeGatewayPort(): Promise<number> {
// Gateway uses derived ports (bridge/browser/canvas). Avoid flaky collisions by
// ensuring the common derived offsets are free too.
for (let attempt = 0; attempt < 25; attempt += 1) {
const port = await getFreePort();
const candidates = [port, port + 1, port + 2, port + 4];
const ok = (
await Promise.all(candidates.map((candidate) => isPortFree(candidate)))
).every(Boolean);
if (ok) return port;
}
throw new Error("failed to acquire a free gateway port block");
}
type AgentFinalPayload = {
status?: unknown;
result?: unknown;
};
async function connectClient(params: { url: string; token: string }) {
return await new Promise<GatewayClient>((resolve, reject) => {
let settled = false;
const stop = (err?: Error, client?: GatewayClient) => {
if (settled) return;
settled = true;
clearTimeout(timer);
if (err) reject(err);
else resolve(client as GatewayClient);
};
const client = new GatewayClient({
url: params.url,
token: params.token,
clientName: GATEWAY_CLIENT_NAMES.TEST,
clientDisplayName: "vitest-live",
clientVersion: "dev",
mode: GATEWAY_CLIENT_MODES.TEST,
onHelloOk: () => stop(undefined, client),
onConnectError: (err) => stop(err),
onClose: (code, reason) =>
stop(new Error(`gateway closed during connect (${code}): ${reason}`)),
});
const timer = setTimeout(
() => stop(new Error("gateway connect timeout")),
10_000,
);
timer.unref();
client.start();
});
}
describeLive("gateway live (dev agent, profile keys)", () => {
it(
"runs meaningful prompts across models with available keys",
async () => {
const previous = {
configPath: process.env.CLAWDBOT_CONFIG_PATH,
token: process.env.CLAWDBOT_GATEWAY_TOKEN,
skipProviders: process.env.CLAWDBOT_SKIP_PROVIDERS,
skipGmail: process.env.CLAWDBOT_SKIP_GMAIL_WATCHER,
skipCron: process.env.CLAWDBOT_SKIP_CRON,
skipCanvas: process.env.CLAWDBOT_SKIP_CANVAS_HOST,
};
process.env.CLAWDBOT_SKIP_PROVIDERS = "1";
process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = "1";
process.env.CLAWDBOT_SKIP_CRON = "1";
process.env.CLAWDBOT_SKIP_CANVAS_HOST = "1";
const token = `test-${randomUUID()}`;
process.env.CLAWDBOT_GATEWAY_TOKEN = token;
const cfg = loadConfig();
await ensureClawdbotModelsJson(cfg);
const workspaceDir = resolveUserPath(
cfg.agents?.defaults?.workspace ?? path.join(os.homedir(), "clawd"),
);
await fs.mkdir(workspaceDir, { recursive: true });
const nonceA = randomUUID();
const nonceB = randomUUID();
const toolProbePath = path.join(
workspaceDir,
`.clawdbot-live-tool-probe.${nonceA}.txt`,
);
await fs.writeFile(toolProbePath, `nonceA=${nonceA}\nnonceB=${nonceB}\n`);
const agentDir = resolveClawdbotAgentDir();
const authStorage = discoverAuthStorage(agentDir);
const modelRegistry = discoverModels(authStorage, agentDir);
const all = modelRegistry.getAll() as Array<Model<Api>>;
const filter = parseFilter(process.env.CLAWDBOT_LIVE_GATEWAY_MODELS);
// Default: honor user allowlist. Opt-in: scan all models with keys.
const allowlistKeys = Object.keys(cfg.agents?.defaults?.models ?? {});
const wanted =
ALL_MODELS || allowlistKeys.length === 0
? all
: all.filter((m) => allowlistKeys.includes(`${m.provider}/${m.id}`));
const candidates: Array<Model<Api>> = [];
for (const model of wanted) {
const id = `${model.provider}/${model.id}`;
if (PROVIDERS && !PROVIDERS.has(model.provider)) continue;
if (filter && !filter.has(id)) continue;
try {
// eslint-disable-next-line no-await-in-loop
await getApiKeyForModel({ model, cfg });
candidates.push(model);
} catch {
// no creds; skip
}
}
expect(candidates.length).toBeGreaterThan(0);
const imageCandidates = EXTRA_IMAGE_PROBES
? candidates.filter((m) => m.input?.includes("image"))
: [];
if (EXTRA_IMAGE_PROBES && imageCandidates.length === 0) {
throw new Error(
"image probe enabled but no selected models advertise image support; set CLAWDBOT_LIVE_GATEWAY_MODELS to include an image-capable model",
);
}
// Build a temp config that allows all selected models, so session overrides stick.
const lmstudioProvider = cfg.models?.providers?.lmstudio;
const nextCfg = {
...cfg,
agents: {
...cfg.agents,
list: (cfg.agents?.list ?? []).map((entry) => ({
...entry,
sandbox: { mode: "off" },
})),
defaults: {
...cfg.agents?.defaults,
// Live tests should avoid Docker sandboxing so tool probes can
// operate on the temporary probe files we create in the host workspace.
sandbox: { mode: "off" },
models: Object.fromEntries(
candidates.map((m) => [`${m.provider}/${m.id}`, {}]),
),
},
},
models: {
...cfg.models,
providers: {
...cfg.models?.providers,
// LM Studio is most reliable via Chat Completions; its Responses API
// tool-calling behavior is inconsistent across releases.
...(lmstudioProvider
? {
lmstudio: {
...lmstudioProvider,
api: "openai-completions",
},
}
: {}),
},
},
};
const tempDir = await fs.mkdtemp(
path.join(os.tmpdir(), "clawdbot-live-"),
);
const tempConfigPath = path.join(tempDir, "clawdbot.json");
await fs.writeFile(
tempConfigPath,
`${JSON.stringify(nextCfg, null, 2)}\n`,
);
process.env.CLAWDBOT_CONFIG_PATH = tempConfigPath;
const port = await getFreeGatewayPort();
const server = await startGatewayServer(port, {
bind: "loopback",
auth: { mode: "token", token },
controlUiEnabled: false,
});
const client = await connectClient({
url: `ws://127.0.0.1:${port}`,
token,
});
try {
const sessionKey = "agent:dev:live-gateway";
const failures: Array<{ model: string; error: string }> = [];
for (const model of candidates) {
const modelKey = `${model.provider}/${model.id}`;
try {
// Ensure session exists + override model for this run.
await client.request<Record<string, unknown>>("sessions.patch", {
key: sessionKey,
model: modelKey,
});
// Reset between models: avoids cross-provider transcript incompatibilities
// (notably OpenAI Responses requiring reasoning replay for function_call items).
await client.request<Record<string, unknown>>("sessions.reset", {
key: sessionKey,
});
// “Meaningful” direct prompt (no tools).
const runId = randomUUID();
const payload = await client.request<AgentFinalPayload>(
"agent",
{
sessionKey,
idempotencyKey: `idem-${runId}`,
message:
"Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
deliver: false,
},
{ expectFinal: true },
);
if (payload?.status !== "ok") {
throw new Error(`agent status=${String(payload?.status)}`);
}
const text = extractPayloadText(payload?.result);
if (
model.provider === "google" &&
isGoogleModelNotFoundText(text)
) {
// Catalog drift: model IDs can disappear or become unavailable on the API.
// Treat as skip when scanning "all models" for Google.
continue;
}
if (!isMeaningful(text)) throw new Error(`not meaningful: ${text}`);
if (
!/\bmicro\s*-?\s*tasks?\b/i.test(text) ||
!/\bmacro\s*-?\s*tasks?\b/i.test(text)
) {
throw new Error(`missing required keywords: ${text}`);
}
// Real tool invocation: force the agent to Read a local file and echo a nonce.
const runIdTool = randomUUID();
const toolProbe = await client.request<AgentFinalPayload>(
"agent",
{
sessionKey,
idempotencyKey: `idem-${runIdTool}-tool`,
message:
"Clawdbot live tool probe (local, safe): " +
`use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
"Then reply with the two nonce values you read (include both).",
deliver: false,
},
{ expectFinal: true },
);
if (toolProbe?.status !== "ok") {
throw new Error(
`tool probe failed: status=${String(toolProbe?.status)}`,
);
}
const toolText = extractPayloadText(toolProbe?.result);
if (!toolText.includes(nonceA) || !toolText.includes(nonceB)) {
throw new Error(`tool probe missing nonce: ${toolText}`);
}
if (EXTRA_TOOL_PROBES) {
const nonceC = randomUUID();
const toolWritePath = path.join(
tempDir,
`write-${runIdTool}.txt`,
);
const bashReadProbe = await client.request<AgentFinalPayload>(
"agent",
{
sessionKey,
idempotencyKey: `idem-${runIdTool}-bash-read`,
message:
"Clawdbot live tool probe (local, safe): " +
"use the tool named `bash` (or `Bash`) to run this command: " +
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
"Finally reply including the nonce text you read back.",
deliver: false,
},
{ expectFinal: true },
);
if (bashReadProbe?.status !== "ok") {
throw new Error(
`bash+read probe failed: status=${String(bashReadProbe?.status)}`,
);
}
const bashReadText = extractPayloadText(bashReadProbe?.result);
if (!bashReadText.includes(nonceC)) {
throw new Error(
`bash+read probe missing nonce: ${bashReadText}`,
);
}
await fs.rm(toolWritePath, { force: true });
}
if (EXTRA_IMAGE_PROBES && model.input?.includes("image")) {
const imageCode = randomImageProbeCode(10);
const imageBase64 = renderCatNoncePngBase64(imageCode);
const runIdImage = randomUUID();
const imageProbe = await client.request<AgentFinalPayload>(
"agent",
{
sessionKey,
idempotencyKey: `idem-${runIdImage}-image`,
message:
"Look at the attached image. Reply with exactly two tokens separated by a single space: " +
"(1) the animal shown or written in the image, lowercase; " +
"(2) the code printed in the image, uppercase. No extra text.",
attachments: [
{
mimeType: "image/png",
fileName: `probe-${runIdImage}.png`,
content: imageBase64,
},
],
deliver: false,
},
{ expectFinal: true },
);
if (imageProbe?.status !== "ok") {
throw new Error(
`image probe failed: status=${String(imageProbe?.status)}`,
);
}
const imageText = extractPayloadText(imageProbe?.result);
if (!/\bcat\b/i.test(imageText)) {
throw new Error(`image probe missing 'cat': ${imageText}`);
}
const candidates =
imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
const bestDistance = candidates.reduce((best, cand) => {
if (Math.abs(cand.length - imageCode.length) > 2) return best;
return Math.min(best, editDistance(cand, imageCode));
}, Number.POSITIVE_INFINITY);
if (!(bestDistance <= 2)) {
throw new Error(
`image probe missing code (${imageCode}): ${imageText}`,
);
}
}
// Regression: tool-call-only turn followed by a user message (OpenAI responses bug class).
if (
(model.provider === "openai" &&
model.api === "openai-responses") ||
(model.provider === "openai-codex" &&
model.api === "openai-codex-responses")
) {
const runId2 = randomUUID();
const first = await client.request<AgentFinalPayload>(
"agent",
{
sessionKey,
idempotencyKey: `idem-${runId2}-1`,
message: `Call the tool named \`read\` (or \`Read\`) on "${toolProbePath}". Do not write any other text.`,
deliver: false,
},
{ expectFinal: true },
);
if (first?.status !== "ok") {
throw new Error(
`tool-only turn failed: status=${String(first?.status)}`,
);
}
const second = await client.request<AgentFinalPayload>(
"agent",
{
sessionKey,
idempotencyKey: `idem-${runId2}-2`,
message: `Now answer: what are the values of nonceA and nonceB in "${toolProbePath}"? Reply with exactly: ${nonceA} ${nonceB}.`,
deliver: false,
},
{ expectFinal: true },
);
if (second?.status !== "ok") {
throw new Error(
`post-tool message failed: status=${String(second?.status)}`,
);
}
const reply = extractPayloadText(second?.result);
if (!reply.includes(nonceA) || !reply.includes(nonceB)) {
throw new Error(`unexpected reply: ${reply}`);
}
}
} catch (err) {
const message = String(err);
// OpenAI Codex refresh tokens can become single-use; skip instead of failing all live tests.
if (
model.provider === "openai-codex" &&
isRefreshTokenReused(message)
) {
continue;
}
failures.push({ model: modelKey, error: message });
}
}
if (failures.length > 0) {
const preview = failures
.slice(0, 20)
.map((f) => `- ${f.model}: ${f.error}`)
.join("\n");
throw new Error(
`gateway live model failures (${failures.length}):\n${preview}`,
);
}
} finally {
client.stop();
await server.close({ reason: "live test complete" });
await fs.rm(toolProbePath, { force: true });
await fs.rm(tempDir, { recursive: true, force: true });
process.env.CLAWDBOT_CONFIG_PATH = previous.configPath;
process.env.CLAWDBOT_GATEWAY_TOKEN = previous.token;
process.env.CLAWDBOT_SKIP_PROVIDERS = previous.skipProviders;
process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = previous.skipGmail;
process.env.CLAWDBOT_SKIP_CRON = previous.skipCron;
process.env.CLAWDBOT_SKIP_CANVAS_HOST = previous.skipCanvas;
}
},
20 * 60 * 1000,
);
it("z.ai fallback handles anthropic tool history", async () => {
if (!ZAI_FALLBACK) return;
const previous = {
configPath: process.env.CLAWDBOT_CONFIG_PATH,
token: process.env.CLAWDBOT_GATEWAY_TOKEN,
skipProviders: process.env.CLAWDBOT_SKIP_PROVIDERS,
skipGmail: process.env.CLAWDBOT_SKIP_GMAIL_WATCHER,
skipCron: process.env.CLAWDBOT_SKIP_CRON,
skipCanvas: process.env.CLAWDBOT_SKIP_CANVAS_HOST,
};
process.env.CLAWDBOT_SKIP_PROVIDERS = "1";
process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = "1";
process.env.CLAWDBOT_SKIP_CRON = "1";
process.env.CLAWDBOT_SKIP_CANVAS_HOST = "1";
const token = `test-${randomUUID()}`;
process.env.CLAWDBOT_GATEWAY_TOKEN = token;
const cfg = loadConfig();
await ensureClawdbotModelsJson(cfg);
const agentDir = resolveClawdbotAgentDir();
const authStorage = discoverAuthStorage(agentDir);
const modelRegistry = discoverModels(authStorage, agentDir);
const anthropic = modelRegistry.find(
"anthropic",
"claude-opus-4-5",
) as Model<Api> | null;
const zai = modelRegistry.find("zai", "glm-4.7") as Model<Api> | null;
if (!anthropic || !zai) return;
try {
await getApiKeyForModel({ model: anthropic, cfg });
await getApiKeyForModel({ model: zai, cfg });
} catch {
return;
}
const workspaceDir = resolveUserPath(
cfg.agents?.defaults?.workspace ?? path.join(os.homedir(), "clawd"),
);
await fs.mkdir(workspaceDir, { recursive: true });
const nonceA = randomUUID();
const nonceB = randomUUID();
const toolProbePath = path.join(
workspaceDir,
`.clawdbot-live-zai-fallback.${nonceA}.txt`,
);
await fs.writeFile(toolProbePath, `nonceA=${nonceA}\nnonceB=${nonceB}\n`);
const port = await getFreeGatewayPort();
const server = await startGatewayServer({
configPath: cfg.__meta?.path,
port,
token,
});
const client = await connectClient({
url: `ws://127.0.0.1:${port}`,
token,
});
try {
const sessionKey = "agent:dev:live-zai-fallback";
await client.request<Record<string, unknown>>("sessions.patch", {
key: sessionKey,
model: "anthropic/claude-opus-4-5",
});
await client.request<Record<string, unknown>>("sessions.reset", {
key: sessionKey,
});
const runId = randomUUID();
const toolProbe = await client.request<AgentFinalPayload>(
"agent",
{
sessionKey,
idempotencyKey: `idem-${runId}-tool`,
message:
`Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) with JSON arguments {"path":"${toolProbePath}"}. ` +
`Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`,
deliver: false,
},
{ expectFinal: true },
);
if (toolProbe?.status !== "ok") {
throw new Error(
`anthropic tool probe failed: status=${String(toolProbe?.status)}`,
);
}
const toolText = extractPayloadText(toolProbe?.result);
if (!toolText.includes(nonceA) || !toolText.includes(nonceB)) {
throw new Error(`anthropic tool probe missing nonce: ${toolText}`);
}
await client.request<Record<string, unknown>>("sessions.patch", {
key: sessionKey,
model: "zai/glm-4.7",
});
const followupId = randomUUID();
const followup = await client.request<AgentFinalPayload>(
"agent",
{
sessionKey,
idempotencyKey: `idem-${followupId}-followup`,
message:
`What are the values of nonceA and nonceB in "${toolProbePath}"? ` +
`Reply with exactly: ${nonceA} ${nonceB}.`,
deliver: false,
},
{ expectFinal: true },
);
if (followup?.status !== "ok") {
throw new Error(
`zai followup failed: status=${String(followup?.status)}`,
);
}
const followupText = extractPayloadText(followup?.result);
if (!followupText.includes(nonceA) || !followupText.includes(nonceB)) {
throw new Error(`zai followup missing nonce: ${followupText}`);
}
} finally {
client.stop();
await server.close({ reason: "live test complete" });
await fs.rm(toolProbePath, { force: true });
process.env.CLAWDBOT_CONFIG_PATH = previous.configPath;
process.env.CLAWDBOT_GATEWAY_TOKEN = previous.token;
process.env.CLAWDBOT_SKIP_PROVIDERS = previous.skipProviders;
process.env.CLAWDBOT_SKIP_GMAIL_WATCHER = previous.skipGmail;
process.env.CLAWDBOT_SKIP_CRON = previous.skipCron;
process.env.CLAWDBOT_SKIP_CANVAS_HOST = previous.skipCanvas;
}
}, 180_000);
});