Files
clawdbot/src/agents/models.profiles.live.test.ts
2026-01-12 07:07:27 +00:00

383 lines
12 KiB
TypeScript

import { type Api, completeSimple, type Model } from "@mariozechner/pi-ai";
import {
discoverAuthStorage,
discoverModels,
} from "@mariozechner/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { describe, expect, it } from "vitest";
import { loadConfig } from "../config/config.js";
import { resolveClawdbotAgentDir } from "./agent-paths.js";
import {
collectAnthropicApiKeys,
isAnthropicRateLimitError,
} from "./live-auth-keys.js";
import { isModernModelRef } from "./live-model-filter.js";
import { getApiKeyForModel } from "./model-auth.js";
import { ensureClawdbotModelsJson } from "./models-config.js";
const LIVE = process.env.LIVE === "1" || process.env.CLAWDBOT_LIVE_TEST === "1";
const DIRECT_ENABLED = Boolean(process.env.CLAWDBOT_LIVE_MODELS?.trim());
const REQUIRE_PROFILE_KEYS =
process.env.CLAWDBOT_LIVE_REQUIRE_PROFILE_KEYS === "1";
const describeLive = LIVE ? describe : describe.skip;
function parseProviderFilter(raw?: string): Set<string> | null {
const trimmed = raw?.trim();
if (!trimmed || trimmed === "all") return null;
const ids = trimmed
.split(",")
.map((s) => s.trim())
.filter(Boolean);
return ids.length ? new Set(ids) : null;
}
function parseModelFilter(raw?: string): Set<string> | null {
const trimmed = raw?.trim();
if (!trimmed || trimmed === "all") return null;
const ids = trimmed
.split(",")
.map((s) => s.trim())
.filter(Boolean);
return ids.length ? new Set(ids) : null;
}
function logProgress(message: string): void {
console.log(`[live] ${message}`);
}
function isGoogleModelNotFoundError(err: unknown): boolean {
const msg = String(err);
if (!/not found/i.test(msg)) return false;
if (/models\/.+ is not found for api version/i.test(msg)) return true;
if (/"status"\\s*:\\s*"NOT_FOUND"/.test(msg)) return true;
if (/"code"\\s*:\\s*404/.test(msg)) return true;
return false;
}
function isModelNotFoundErrorMessage(raw: string): boolean {
const msg = raw.trim();
if (!msg) return false;
if (/\b404\b/.test(msg) && /not[_-]?found/i.test(msg)) return true;
if (/not_found_error/i.test(msg)) return true;
if (/model:\s*[a-z0-9._-]+/i.test(msg) && /not[_-]?found/i.test(msg))
return true;
return false;
}
function toInt(value: string | undefined, fallback: number): number {
const trimmed = value?.trim();
if (!trimmed) return fallback;
const parsed = Number.parseInt(trimmed, 10);
return Number.isFinite(parsed) ? parsed : fallback;
}
async function completeSimpleWithTimeout<TApi extends Api>(
model: Model<TApi>,
context: Parameters<typeof completeSimple<TApi>>[1],
options: Parameters<typeof completeSimple<TApi>>[2],
timeoutMs: number,
) {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), Math.max(1, timeoutMs));
timer.unref?.();
try {
return await completeSimple(model, context, {
...options,
signal: controller.signal,
});
} finally {
clearTimeout(timer);
}
}
async function completeOkWithRetry(params: {
model: Model<Api>;
apiKey: string;
timeoutMs: number;
}) {
const runOnce = async () => {
const res = await completeSimpleWithTimeout(
params.model,
{
messages: [
{
role: "user",
content: "Reply with the word ok.",
timestamp: Date.now(),
},
],
},
{
apiKey: params.apiKey,
reasoning: params.model.reasoning ? "low" : undefined,
maxTokens: 64,
},
params.timeoutMs,
);
const text = res.content
.filter((block) => block.type === "text")
.map((block) => block.text.trim())
.join(" ");
return { res, text };
};
const first = await runOnce();
if (first.text.length > 0) return first;
return await runOnce();
}
describeLive("live models (profile keys)", () => {
it(
"completes across selected models",
async () => {
const cfg = loadConfig();
await ensureClawdbotModelsJson(cfg);
if (!DIRECT_ENABLED) {
logProgress(
"[live-models] skipping (set CLAWDBOT_LIVE_MODELS=modern|all|<list>; all=modern)",
);
return;
}
const anthropicKeys = collectAnthropicApiKeys();
if (anthropicKeys.length > 0) {
process.env.ANTHROPIC_API_KEY = anthropicKeys[0];
logProgress(
`[live-models] anthropic keys loaded: ${anthropicKeys.length}`,
);
}
const agentDir = resolveClawdbotAgentDir();
const authStorage = discoverAuthStorage(agentDir);
const modelRegistry = discoverModels(authStorage, agentDir);
const models = modelRegistry.getAll() as Array<Model<Api>>;
const rawModels = process.env.CLAWDBOT_LIVE_MODELS?.trim();
const useModern = rawModels === "modern" || rawModels === "all";
const useExplicit = Boolean(rawModels) && !useModern;
const filter = useExplicit ? parseModelFilter(rawModels) : null;
const allowNotFoundSkip = useModern;
const providers = parseProviderFilter(
process.env.CLAWDBOT_LIVE_PROVIDERS,
);
const perModelTimeoutMs = toInt(
process.env.CLAWDBOT_LIVE_MODEL_TIMEOUT_MS,
30_000,
);
const failures: Array<{ model: string; error: string }> = [];
const skipped: Array<{ model: string; reason: string }> = [];
const candidates: Array<{
model: Model<Api>;
apiKeyInfo: Awaited<ReturnType<typeof getApiKeyForModel>>;
}> = [];
for (const model of models) {
if (providers && !providers.has(model.provider)) continue;
const id = `${model.provider}/${model.id}`;
if (filter && !filter.has(id)) continue;
if (!filter && useModern) {
if (!isModernModelRef({ provider: model.provider, id: model.id })) {
continue;
}
}
try {
const apiKeyInfo = await getApiKeyForModel({ model, cfg });
if (
REQUIRE_PROFILE_KEYS &&
!apiKeyInfo.source.startsWith("profile:")
) {
skipped.push({
model: id,
reason: `non-profile credential source: ${apiKeyInfo.source}`,
});
continue;
}
candidates.push({ model, apiKeyInfo });
} catch (err) {
skipped.push({ model: id, reason: String(err) });
}
}
if (candidates.length === 0) {
logProgress("[live-models] no API keys found; skipping");
return;
}
logProgress(
`[live-models] selection=${useExplicit ? "explicit" : "modern"}`,
);
logProgress(`[live-models] running ${candidates.length} models`);
const total = candidates.length;
for (const [index, entry] of candidates.entries()) {
const { model, apiKeyInfo } = entry;
const id = `${model.provider}/${model.id}`;
const progressLabel = `[live-models] ${index + 1}/${total} ${id}`;
const attemptMax =
model.provider === "anthropic" && anthropicKeys.length > 0
? anthropicKeys.length
: 1;
for (let attempt = 0; attempt < attemptMax; attempt += 1) {
if (model.provider === "anthropic" && anthropicKeys.length > 0) {
process.env.ANTHROPIC_API_KEY = anthropicKeys[attempt];
}
const apiKey =
model.provider === "anthropic" && anthropicKeys.length > 0
? anthropicKeys[attempt]
: apiKeyInfo.apiKey;
try {
// Special regression: OpenAI requires replayed `reasoning` items for tool-only turns.
if (
model.provider === "openai" &&
model.api === "openai-responses" &&
model.id === "gpt-5.2"
) {
logProgress(`${progressLabel}: tool-only regression`);
const noopTool = {
name: "noop",
description: "Return ok.",
parameters: Type.Object({}, { additionalProperties: false }),
};
const first = await completeSimpleWithTimeout(
model,
{
messages: [
{
role: "user",
content:
"Call the tool `noop` with {}. Do not write any other text.",
timestamp: Date.now(),
},
],
tools: [noopTool],
},
{
apiKey,
reasoning: model.reasoning ? "low" : undefined,
maxTokens: 128,
},
perModelTimeoutMs,
);
const toolCall = first.content.find((b) => b.type === "toolCall");
expect(toolCall).toBeTruthy();
if (!toolCall || toolCall.type !== "toolCall") {
throw new Error("expected tool call");
}
const second = await completeSimpleWithTimeout(
model,
{
messages: [
{
role: "user",
content:
"Call the tool `noop` with {}. Do not write any other text.",
timestamp: Date.now(),
},
first,
{
role: "toolResult",
toolCallId: toolCall.id,
toolName: "noop",
content: [{ type: "text", text: "ok" }],
isError: false,
timestamp: Date.now(),
},
{
role: "user",
content: "Reply with the word ok.",
timestamp: Date.now(),
},
],
},
{
apiKey,
reasoning: model.reasoning ? "low" : undefined,
maxTokens: 64,
},
perModelTimeoutMs,
);
const secondText = second.content
.filter((b) => b.type === "text")
.map((b) => b.text.trim())
.join(" ");
expect(secondText.length).toBeGreaterThan(0);
logProgress(`${progressLabel}: done`);
break;
}
logProgress(`${progressLabel}: prompt`);
const ok = await completeOkWithRetry({
model,
apiKey,
timeoutMs: perModelTimeoutMs,
});
if (ok.res.stopReason === "error") {
const msg = ok.res.errorMessage ?? "";
if (allowNotFoundSkip && isModelNotFoundErrorMessage(msg)) {
skipped.push({ model: id, reason: msg });
logProgress(`${progressLabel}: skip (model not found)`);
break;
}
throw new Error(msg || "model returned error with no message");
}
if (ok.text.length === 0 && model.provider === "google") {
skipped.push({
model: id,
reason: "no text returned (likely unavailable model id)",
});
logProgress(`${progressLabel}: skip (google model not found)`);
break;
}
expect(ok.text.length).toBeGreaterThan(0);
logProgress(`${progressLabel}: done`);
break;
} catch (err) {
const message = String(err);
if (
model.provider === "anthropic" &&
isAnthropicRateLimitError(message) &&
attempt + 1 < attemptMax
) {
logProgress(
`${progressLabel}: rate limit, retrying with next key`,
);
continue;
}
if (
model.provider === "google" &&
isGoogleModelNotFoundError(err)
) {
skipped.push({ model: id, reason: message });
logProgress(`${progressLabel}: skip (google model not found)`);
break;
}
logProgress(`${progressLabel}: failed`);
failures.push({ model: id, error: message });
break;
}
}
}
if (failures.length > 0) {
const preview = failures
.slice(0, 10)
.map((f) => `- ${f.model}: ${f.error}`)
.join("\n");
throw new Error(
`live model failures (${failures.length}):\n${preview}`,
);
}
void skipped;
},
15 * 60 * 1000,
);
});