diff --git a/patches/@mariozechner__pi-ai@0.42.2.patch b/patches/@mariozechner__pi-ai@0.42.2.patch index 16edc1104..0dee9b488 100644 --- a/patches/@mariozechner__pi-ai@0.42.2.patch +++ b/patches/@mariozechner__pi-ai@0.42.2.patch @@ -27,19 +27,3 @@ index 188a8294f26fe1bfe3fb298a7f58e4d8eaf2a529..a3aeb6a7ff53bc4f7f44362adb950b2c })); } function mapStopReason(status) { -diff --git a/dist/providers/openai-responses.js b/dist/providers/openai-responses.js -index 7b58a79c989abc76bb8fc9e99fb49126e5fd7de4..a1a7f35ad47975dc1268d1a0c2078b0b651e97b4 100644 ---- a/dist/providers/openai-responses.js -+++ b/dist/providers/openai-responses.js -@@ -396,9 +396,10 @@ function convertMessages(model, context) { - } - else if (msg.role === "assistant") { - const output = []; -+ const hasAssistantText = msg.content.some((block) => block.type === "text"); - for (const block of msg.content) { - // Do not submit thinking blocks if the completion had an error (i.e. abort) -- if (block.type === "thinking" && msg.stopReason !== "error") { -+ if (block.type === "thinking" && msg.stopReason !== "error" && hasAssistantText) { - if (block.thinkingSignature) { - const reasoningItem = JSON.parse(block.thinkingSignature); - output.push(reasoningItem); diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts index 4ce3d7d54..d593773d0 100644 --- a/src/agents/models.profiles.live.test.ts +++ b/src/agents/models.profiles.live.test.ts @@ -7,7 +7,14 @@ import { Type } from "@sinclair/typebox"; import { describe, expect, it } from "vitest"; import { loadConfig } from "../config/config.js"; import { resolveClawdbotAgentDir } from "./agent-paths.js"; +import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; import { getApiKeyForModel } from "./model-auth.js"; +import { + buildModelAliasIndex, + parseModelRef, + resolveConfiguredModelRef, + resolveModelRefFromString, +} from "./model-selection.js"; import { ensureClawdbotModelsJson } from "./models-config.js"; const LIVE = process.env.LIVE === "1" || process.env.CLAWDBOT_LIVE_TEST === "1"; @@ -58,6 +65,131 @@ function isModelNotFoundErrorMessage(raw: string): boolean { return false; } +function toInt(value: string | undefined, fallback: number): number { + const trimmed = value?.trim(); + if (!trimmed) return fallback; + const parsed = Number.parseInt(trimmed, 10); + return Number.isFinite(parsed) ? parsed : fallback; +} + +async function completeSimpleWithTimeout( + model: Model, + context: Parameters>[1], + options: Parameters>[2], + timeoutMs: number, +) { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), Math.max(1, timeoutMs)); + timer.unref?.(); + try { + return await completeSimple(model, context, { + ...options, + signal: controller.signal, + }); + } finally { + clearTimeout(timer); + } +} + +async function completeOkWithRetry(params: { + model: Model; + apiKey: string; + timeoutMs: number; +}) { + const runOnce = async () => { + const res = await completeSimpleWithTimeout( + params.model, + { + messages: [ + { + role: "user", + content: "Reply with the word ok.", + timestamp: Date.now(), + }, + ], + }, + { + apiKey: params.apiKey, + reasoning: params.model.reasoning ? "low" : undefined, + maxTokens: 64, + }, + params.timeoutMs, + ); + const text = res.content + .filter((block) => block.type === "text") + .map((block) => block.text.trim()) + .join(" "); + return { res, text }; + }; + + const first = await runOnce(); + if (first.text.length > 0) return first; + return await runOnce(); +} + +function resolveConfiguredModelKeys( + cfg: ReturnType, +): string[] { + const aliasIndex = buildModelAliasIndex({ + cfg, + defaultProvider: DEFAULT_PROVIDER, + }); + const order: string[] = []; + const seen = new Set(); + + const addKey = (key: string) => { + const normalized = key.trim(); + if (!normalized || seen.has(normalized)) return; + seen.add(normalized); + order.push(normalized); + }; + + const addRef = (ref: { provider: string; model: string }) => { + addKey(`${ref.provider}/${ref.model}`); + }; + + addRef( + resolveConfiguredModelRef({ + cfg, + defaultProvider: DEFAULT_PROVIDER, + defaultModel: DEFAULT_MODEL, + }), + ); + + const modelConfig = cfg.agents?.defaults?.model as + | { primary?: string; fallbacks?: string[] } + | undefined; + const imageModelConfig = cfg.agents?.defaults?.imageModel as + | { primary?: string; fallbacks?: string[] } + | undefined; + + const primary = modelConfig?.primary?.trim() ?? ""; + const fallbacks = modelConfig?.fallbacks ?? []; + const imagePrimary = imageModelConfig?.primary?.trim() ?? ""; + const imageFallbacks = imageModelConfig?.fallbacks ?? []; + + const addRaw = (raw: string) => { + const resolved = resolveModelRefFromString({ + raw, + defaultProvider: DEFAULT_PROVIDER, + aliasIndex, + }); + if (resolved) addRef(resolved.ref); + }; + + if (primary) addRaw(primary); + for (const raw of fallbacks) addRaw(String(raw ?? "")); + if (imagePrimary) addRaw(imagePrimary); + for (const raw of imageFallbacks) addRaw(String(raw ?? "")); + + for (const key of Object.keys(cfg.agents?.defaults?.models ?? {})) { + const parsed = parseModelRef(String(key ?? ""), DEFAULT_PROVIDER); + if (parsed) addRef(parsed); + } + + return order; +} + describeLive("live models (profile keys)", () => { it( "completes across configured models", @@ -69,16 +201,33 @@ describeLive("live models (profile keys)", () => { const authStorage = discoverAuthStorage(agentDir); const modelRegistry = discoverModels(authStorage, agentDir); const models = modelRegistry.getAll() as Array>; + const modelByKey = new Map( + models.map((model) => [`${model.provider}/${model.id}`, model]), + ); const filter = parseModelFilter(process.env.CLAWDBOT_LIVE_MODELS); const providers = parseProviderFilter( process.env.CLAWDBOT_LIVE_PROVIDERS, ); + const perModelTimeoutMs = toInt( + process.env.CLAWDBOT_LIVE_MODEL_TIMEOUT_MS, + 30_000, + ); const failures: Array<{ model: string; error: string }> = []; const skipped: Array<{ model: string; reason: string }> = []; - for (const model of models) { + const configuredKeys = resolveConfiguredModelKeys(cfg); + + for (const key of configuredKeys) { + const model = modelByKey.get(key); + if (!model) { + skipped.push({ + model: key, + reason: "configured model missing in registry", + }); + continue; + } if (providers && !providers.has(model.provider)) continue; const id = `${model.provider}/${model.id}`; if (filter && !filter.has(id)) continue; @@ -100,7 +249,7 @@ describeLive("live models (profile keys)", () => { } try { - // Special regression: OpenAI rejects replayed `reasoning` items for tool-only turns. + // Special regression: OpenAI requires replayed `reasoning` items for tool-only turns. if ( model.provider === "openai" && model.api === "openai-responses" && @@ -112,7 +261,7 @@ describeLive("live models (profile keys)", () => { parameters: Type.Object({}, { additionalProperties: false }), }; - const first = await completeSimple( + const first = await completeSimpleWithTimeout( model, { messages: [ @@ -130,6 +279,7 @@ describeLive("live models (profile keys)", () => { reasoning: model.reasoning ? "low" : undefined, maxTokens: 128, }, + perModelTimeoutMs, ); const toolCall = first.content.find((b) => b.type === "toolCall"); @@ -138,7 +288,7 @@ describeLive("live models (profile keys)", () => { throw new Error("expected tool call"); } - const second = await completeSimple( + const second = await completeSimpleWithTimeout( model, { messages: [ @@ -169,6 +319,7 @@ describeLive("live models (profile keys)", () => { reasoning: model.reasoning ? "low" : undefined, maxTokens: 64, }, + perModelTimeoutMs, ); const secondText = second.content @@ -179,26 +330,14 @@ describeLive("live models (profile keys)", () => { continue; } - const res = await completeSimple( + const ok = await completeOkWithRetry({ model, - { - messages: [ - { - role: "user", - content: "Reply with the word ok.", - timestamp: Date.now(), - }, - ], - }, - { - apiKey: apiKeyInfo.apiKey, - reasoning: model.reasoning ? "low" : undefined, - maxTokens: 64, - }, - ); + apiKey: apiKeyInfo.apiKey, + timeoutMs: perModelTimeoutMs, + }); - if (res.stopReason === "error") { - const msg = res.errorMessage ?? ""; + if (ok.res.stopReason === "error") { + const msg = ok.res.errorMessage ?? ""; if (ALL_MODELS && isModelNotFoundErrorMessage(msg)) { skipped.push({ model: id, reason: msg }); continue; @@ -206,18 +345,14 @@ describeLive("live models (profile keys)", () => { throw new Error(msg || "model returned error with no message"); } - const text = res.content - .filter((block) => block.type === "text") - .map((block) => block.text.trim()) - .join(" "); - if (text.length === 0 && model.provider === "google") { + if (ok.text.length === 0 && model.provider === "google") { skipped.push({ model: id, reason: "no text returned (likely unavailable model id)", }); continue; } - expect(text.length).toBeGreaterThan(0); + expect(ok.text.length).toBeGreaterThan(0); } catch (err) { if (model.provider === "google" && isGoogleModelNotFoundError(err)) { skipped.push({ model: id, reason: String(err) }); diff --git a/src/agents/openai-responses.reasoning-replay.test.ts b/src/agents/openai-responses.reasoning-replay.test.ts index 9cca7551a..acadf499c 100644 --- a/src/agents/openai-responses.reasoning-replay.test.ts +++ b/src/agents/openai-responses.reasoning-replay.test.ts @@ -52,7 +52,7 @@ function installFailingFetchCapture() { } describe("openai-responses reasoning replay", () => { - it("skips reasoning for tool-call-only turns (OpenAI rejects standalone reasoning)", async () => { + it("replays reasoning for tool-call-only turns (OpenAI requires it)", async () => { const cap = installFailingFetchCapture(); try { const model = buildModel(); @@ -141,8 +141,11 @@ describe("openai-responses reasoning replay", () => { ) .filter((t): t is string => typeof t === "string"); + expect(types).toContain("reasoning"); expect(types).toContain("function_call"); - expect(types).not.toContain("reasoning"); + expect(types.indexOf("reasoning")).toBeLessThan( + types.indexOf("function_call"), + ); } finally { cap.restore(); } diff --git a/src/commands/doctor-sandbox.ts b/src/commands/doctor-sandbox.ts index 823f0c7d5..703a5af2e 100644 --- a/src/commands/doctor-sandbox.ts +++ b/src/commands/doctor-sandbox.ts @@ -290,7 +290,7 @@ export function noteSandboxScopeWarnings(cfg: ClawdbotConfig) { warnings.push( `- agents.list (id "${agentId}") sandbox ${overrides.join( "/", - )} overrides ignored (scope resolves to "shared").`, + )} overrides ignored\n scope resolves to "shared".`, ); }