diff --git a/src/agents/tools/image-tool.test.ts b/src/agents/tools/image-tool.test.ts index 20e212996..c0d194542 100644 --- a/src/agents/tools/image-tool.test.ts +++ b/src/agents/tools/image-tool.test.ts @@ -150,3 +150,101 @@ describe("image tool data URL support", () => { ).toThrow(/Unsupported data URL type/i); }); }); + +describe("image tool response validation", () => { + it("rejects image-model responses with no final text", () => { + expect(() => + __testing.coerceImageAssistantText({ + provider: "openai", + model: "gpt-5-mini", + message: { + role: "assistant", + api: "openai-responses", + provider: "openai", + model: "gpt-5-mini", + stopReason: "stop", + timestamp: Date.now(), + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, + }, + content: [{ type: "thinking", thinking: "hmm" }], + }, + }), + ).toThrow(/returned no text/i); + }); + + it("surfaces provider errors from image-model responses", () => { + expect(() => + __testing.coerceImageAssistantText({ + provider: "openai", + model: "gpt-5-mini", + message: { + role: "assistant", + api: "openai-responses", + provider: "openai", + model: "gpt-5-mini", + stopReason: "error", + errorMessage: "boom", + timestamp: Date.now(), + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, + }, + content: [], + }, + }), + ).toThrow(/boom/i); + }); + + it("returns trimmed text from image-model responses", () => { + const text = __testing.coerceImageAssistantText({ + provider: "anthropic", + model: "claude-opus-4-5", + message: { + role: "assistant", + api: "anthropic-messages", + provider: "anthropic", + model: "claude-opus-4-5", + stopReason: "stop", + timestamp: Date.now(), + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, + }, + content: [{ type: "text", text: " hello " }], + }, + }); + expect(text).toBe("hello"); + }); +}); diff --git a/src/agents/tools/image-tool.ts b/src/agents/tools/image-tool.ts index e932a2bc5..64d48d15b 100644 --- a/src/agents/tools/image-tool.ts +++ b/src/agents/tools/image-tool.ts @@ -53,8 +53,35 @@ function decodeDataUrl(dataUrl: string): { export const __testing = { decodeDataUrl, + coerceImageAssistantText, } as const; +function coerceImageAssistantText(params: { + message: AssistantMessage; + provider: string; + model: string; +}): string { + const stop = params.message.stopReason; + const errorMessage = params.message.errorMessage?.trim(); + if (stop === "error" || stop === "aborted") { + throw new Error( + errorMessage + ? `Image model failed (${params.provider}/${params.model}): ${errorMessage}` + : `Image model failed (${params.provider}/${params.model})`, + ); + } + if (errorMessage) { + throw new Error( + `Image model failed (${params.provider}/${params.model}): ${errorMessage}`, + ); + } + const text = extractAssistantText(params.message); + if (text.trim()) return text.trim(); + throw new Error( + `Image model returned no text (${params.provider}/${params.model}).`, + ); +} + function coerceImageModelConfig(cfg?: ClawdbotConfig): ImageModelConfig { const imageModel = cfg?.agents?.defaults?.imageModel as | { primary?: string; fallbacks?: string[] } @@ -259,7 +286,12 @@ async function runImagePrompt(params: { prompt: string; base64: string; mimeType: string; -}): Promise<{ text: string; provider: string; model: string }> { +}): Promise<{ + text: string; + provider: string; + model: string; + attempts: Array<{ provider: string; model: string; error: string }>; +}> { const effectiveCfg: ClawdbotConfig | undefined = params.cfg ? { ...params.cfg, @@ -306,15 +338,28 @@ async function runImagePrompt(params: { maxTokens: 512, temperature: 0, })) as AssistantMessage; - return message; + return { + message, + provider: model.provider, + model: model.id, + }; }, }); - const text = extractAssistantText(result.result); + const text = coerceImageAssistantText({ + message: result.result.message, + provider: result.result.provider, + model: result.result.model, + }); return { - text: text || "(no text returned)", + text, provider: result.provider, model: result.model, + attempts: result.attempts.map((attempt) => ({ + provider: attempt.provider, + model: attempt.model, + error: attempt.error, + })), }; } @@ -421,6 +466,7 @@ export function createImageTool(options?: { details: { model: `${result.provider}/${result.model}`, image: resolvedImage, + attempts: result.attempts, }, }; },