test(live): add provider filters + google skip rules
This commit is contained in:
@@ -102,6 +102,8 @@ Live tests are split into two layers so we can isolate failures:
|
||||
- How to select models:
|
||||
- `CLAWDBOT_LIVE_MODELS=all` to run everything with keys
|
||||
- or `CLAWDBOT_LIVE_MODELS="openai/gpt-5.2,anthropic/claude-opus-4-5,..."` (comma allowlist)
|
||||
- How to select providers:
|
||||
- `CLAWDBOT_LIVE_PROVIDERS="google,google-antigravity,google-gemini-cli"` (comma allowlist)
|
||||
- Where keys come from:
|
||||
- By default: profile store and env fallbacks
|
||||
- Set `CLAWDBOT_LIVE_REQUIRE_PROFILE_KEYS=1` to enforce **profile store** only
|
||||
@@ -126,11 +128,19 @@ Live tests are split into two layers so we can isolate failures:
|
||||
- How to select models:
|
||||
- `CLAWDBOT_LIVE_GATEWAY_ALL_MODELS=1` to scan all discovered models with keys
|
||||
- or set `CLAWDBOT_LIVE_GATEWAY_MODELS="provider/model,provider/model,..."` to narrow quickly
|
||||
- How to select providers (avoid “OpenRouter everything”):
|
||||
- `CLAWDBOT_LIVE_GATEWAY_PROVIDERS="google,google-antigravity,google-gemini-cli,openai,anthropic,zai,minimax"` (comma allowlist)
|
||||
- Optional tool-calling stress:
|
||||
- `CLAWDBOT_LIVE_GATEWAY_TOOL_PROBE=1` enables an extra “bash writes file → read reads it back → echo nonce” check.
|
||||
- This is specifically meant to catch tool-calling compatibility issues across providers (formatting, history replay, tool_result pairing, etc.).
|
||||
- Optional image send smoke:
|
||||
- `CLAWDBOT_LIVE_GATEWAY_IMAGE_PROBE=1` sends a real image attachment through the gateway agent pipeline (multimodal message) and asserts the model can read back a per-run code from the image.
|
||||
- Flow (high level):
|
||||
- Test generates a tiny PNG with “CAT” + random code (`src/gateway/live-image-probe.ts`)
|
||||
- Sends it via `agent` `attachments: [{ mimeType: "image/png", content: "<base64>" }]`
|
||||
- Gateway parses attachments into `images[]` (`src/gateway/server-methods/agent.ts` + `src/gateway/chat-attachments.ts`)
|
||||
- Embedded agent forwards a multimodal user message to the model
|
||||
- Assertion: reply contains `cat` + the code (OCR tolerance: minor mistakes allowed)
|
||||
|
||||
### Recommended live recipes
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ index 93aa26c395e9bd0df64376408a13d15ee9e7cce7..beb585e2f2c13eec3bca98acade76110
|
||||
}
|
||||
const errorText = await response.text();
|
||||
+ // Fail immediately on 429 for Antigravity to let callers rotate accounts.
|
||||
+ // Antigravity rate limits can have very long retry delays (10+ minutes).
|
||||
+ // Antigravity rate limits can have very long retry delays (10+ minutes). Repro: LIVE=1 CLAWDBOT_LIVE_GATEWAY=1 CLAWDBOT_LIVE_GATEWAY_ALL_MODELS=1 CLAWDBOT_LIVE_GATEWAY_PROVIDERS=\"google-antigravity\" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts
|
||||
+ if (isAntigravity && response.status === 429) {
|
||||
+ throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
|
||||
+ }
|
||||
@@ -56,7 +56,7 @@ index 188a8294f26fe1bfe3fb298a7f58e4d8eaf2a529..a3aeb6a7ff53bc4f7f44362adb950b2c
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
- strict: null,
|
||||
+ strict: false,
|
||||
+ strict: false, // Repro: LIVE=1 CLAWDBOT_LIVE_GATEWAY=1 CLAWDBOT_LIVE_GATEWAY_ALL_MODELS=1 CLAWDBOT_LIVE_GATEWAY_MODELS=\"openai-codex/gpt-5.2\" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts
|
||||
}));
|
||||
}
|
||||
function mapStopReason(status) {
|
||||
|
||||
@@ -19,6 +19,16 @@ const REQUIRE_PROFILE_KEYS =
|
||||
|
||||
const describeLive = LIVE && ALL_MODELS ? describe : describe.skip;
|
||||
|
||||
function parseProviderFilter(raw?: string): Set<string> | null {
|
||||
const trimmed = raw?.trim();
|
||||
if (!trimmed || trimmed === "all") return null;
|
||||
const ids = trimmed
|
||||
.split(",")
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
return ids.length ? new Set(ids) : null;
|
||||
}
|
||||
|
||||
function parseModelFilter(raw?: string): Set<string> | null {
|
||||
const trimmed = raw?.trim();
|
||||
if (!trimmed || trimmed === "all") return null;
|
||||
@@ -29,6 +39,15 @@ function parseModelFilter(raw?: string): Set<string> | null {
|
||||
return ids.length ? new Set(ids) : null;
|
||||
}
|
||||
|
||||
function isGoogleModelNotFoundError(err: unknown): boolean {
|
||||
const msg = String(err);
|
||||
if (!/not found/i.test(msg)) return false;
|
||||
if (/models\/.+ is not found for api version/i.test(msg)) return true;
|
||||
if (/"status"\\s*:\\s*"NOT_FOUND"/.test(msg)) return true;
|
||||
if (/"code"\\s*:\\s*404/.test(msg)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
describeLive("live models (profile keys)", () => {
|
||||
it(
|
||||
"completes across configured models",
|
||||
@@ -42,11 +61,15 @@ describeLive("live models (profile keys)", () => {
|
||||
const models = modelRegistry.getAll() as Array<Model<Api>>;
|
||||
|
||||
const filter = parseModelFilter(process.env.CLAWDBOT_LIVE_MODELS);
|
||||
const providers = parseProviderFilter(
|
||||
process.env.CLAWDBOT_LIVE_PROVIDERS,
|
||||
);
|
||||
|
||||
const failures: Array<{ model: string; error: string }> = [];
|
||||
const skipped: Array<{ model: string; reason: string }> = [];
|
||||
|
||||
for (const model of models) {
|
||||
if (providers && !providers.has(model.provider)) continue;
|
||||
const id = `${model.provider}/${model.id}`;
|
||||
if (filter && !filter.has(id)) continue;
|
||||
|
||||
@@ -168,8 +191,19 @@ describeLive("live models (profile keys)", () => {
|
||||
.filter((block) => block.type === "text")
|
||||
.map((block) => block.text.trim())
|
||||
.join(" ");
|
||||
if (text.length === 0 && model.provider === "google") {
|
||||
skipped.push({
|
||||
model: id,
|
||||
reason: "no text returned (likely unavailable model id)",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
expect(text.length).toBeGreaterThan(0);
|
||||
} catch (err) {
|
||||
if (model.provider === "google" && isGoogleModelNotFoundError(err)) {
|
||||
skipped.push({ model: id, reason: String(err) });
|
||||
continue;
|
||||
}
|
||||
failures.push({ model: id, error: String(err) });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ const ALL_MODELS =
|
||||
const EXTRA_TOOL_PROBES = process.env.CLAWDBOT_LIVE_GATEWAY_TOOL_PROBE === "1";
|
||||
const EXTRA_IMAGE_PROBES =
|
||||
process.env.CLAWDBOT_LIVE_GATEWAY_IMAGE_PROBE === "1";
|
||||
const PROVIDERS = parseFilter(process.env.CLAWDBOT_LIVE_GATEWAY_PROVIDERS);
|
||||
|
||||
const describeLive = LIVE && GATEWAY_LIVE ? describe : describe.skip;
|
||||
|
||||
@@ -63,6 +64,16 @@ function isMeaningful(text: string): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
function isGoogleModelNotFoundText(text: string): boolean {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) return false;
|
||||
if (!/not found/i.test(trimmed)) return false;
|
||||
if (/models\/.+ is not found for api version/i.test(trimmed)) return true;
|
||||
if (/"status"\s*:\s*"NOT_FOUND"/.test(trimmed)) return true;
|
||||
if (/"code"\s*:\s*404/.test(trimmed)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
function randomImageProbeCode(len = 10): string {
|
||||
const alphabet = "2345689ABCEF";
|
||||
const bytes = randomBytes(len);
|
||||
@@ -233,6 +244,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
const candidates: Array<Model<Api>> = [];
|
||||
for (const model of wanted) {
|
||||
const id = `${model.provider}/${model.id}`;
|
||||
if (PROVIDERS && !PROVIDERS.has(model.provider)) continue;
|
||||
if (filter && !filter.has(id)) continue;
|
||||
try {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
@@ -345,6 +357,14 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
throw new Error(`agent status=${String(payload?.status)}`);
|
||||
}
|
||||
const text = extractPayloadText(payload?.result);
|
||||
if (
|
||||
model.provider === "google" &&
|
||||
isGoogleModelNotFoundText(text)
|
||||
) {
|
||||
// Catalog drift: model IDs can disappear or become unavailable on the API.
|
||||
// Treat as skip when scanning "all models" for Google.
|
||||
continue;
|
||||
}
|
||||
if (!isMeaningful(text)) throw new Error(`not meaningful: ${text}`);
|
||||
if (
|
||||
!/\bmicro\s*-?\s*tasks?\b/i.test(text) ||
|
||||
@@ -453,7 +473,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
if (Math.abs(cand.length - imageCode.length) > 2) return best;
|
||||
return Math.min(best, editDistance(cand, imageCode));
|
||||
}, Number.POSITIVE_INFINITY);
|
||||
if (!(bestDistance <= 1)) {
|
||||
if (!(bestDistance <= 2)) {
|
||||
throw new Error(
|
||||
`image probe missing code (${imageCode}): ${imageText}`,
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user