From 78998dba9e2a9abb65672964ad12602d9cde66ed Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 4 Jan 2026 19:35:00 +0100 Subject: [PATCH] feat: add image model config + tool --- CHANGELOG.md | 4 + docs/configuration.md | 6 + docs/models.md | 12 +- docs/tools.md | 13 ++ src/agents/clawdbot-tools.ts | 5 + src/agents/model-auth.ts | 143 ++++++++++++++++++++++ src/agents/model-fallback.ts | 123 +++++++++++++++++++ src/agents/model-scan.ts | 2 +- src/agents/pi-embedded-runner.ts | 129 +------------------- src/agents/pi-tools.ts | 3 + src/agents/tools/image-tool.ts | 157 +++++++++++++++++++++++++ src/cli/models-cli.ts | 75 ++++++++++++ src/commands/models.ts | 7 ++ src/commands/models/image-fallbacks.ts | 135 +++++++++++++++++++++ src/commands/models/list.ts | 30 +++++ src/commands/models/scan.ts | 110 ++++++++++++++--- src/commands/models/set-image.ts | 34 ++++++ src/config/schema.ts | 6 + src/config/types.ts | 4 + src/config/zod-schema.ts | 2 + 20 files changed, 856 insertions(+), 144 deletions(-) create mode 100644 src/agents/model-auth.ts create mode 100644 src/agents/tools/image-tool.ts create mode 100644 src/commands/models/image-fallbacks.ts create mode 100644 src/commands/models/set-image.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cda9e460..644ec5a17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ ## Unreleased +### Highlights +- Models: add image-specific model config (`agent.imageModel` + fallbacks) and scan support. +- Agent tools: new `image` tool routed to the image model (when configured). + ### Fixes - Android: tapping the foreground service notification brings the app to the front. (#179) — thanks @Syhids - Cron tool passes `id` to the gateway for update/remove/run/runs (keeps `jobId` input). (#180) — thanks @adamgall diff --git a/docs/configuration.md b/docs/configuration.md index b99f43af9..337516c03 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -431,6 +431,8 @@ Controls the embedded agent runtime (model/thinking/verbose/timeouts). (omit to show the full catalog). `modelAliases` adds short names for `/model` (alias -> provider/model). `modelFallbacks` lists ordered fallback models to try when the default fails. +`imageModel` selects an image-capable model for the `image` tool. +`imageModelFallbacks` lists ordered fallback image models for the `image` tool. ```json5 { @@ -448,6 +450,10 @@ Controls the embedded agent runtime (model/thinking/verbose/timeouts). "openrouter/deepseek/deepseek-r1:free", "openrouter/meta-llama/llama-3.3-70b-instruct:free" ], + imageModel: "openrouter/qwen/qwen-2.5-vl-72b-instruct:free", + imageModelFallbacks: [ + "openrouter/google/gemini-2.0-flash-vision:free" + ], thinkingDefault: "low", verboseDefault: "off", elevatedDefault: "on", diff --git a/docs/models.md b/docs/models.md index 939ffc6bf..32f6d0863 100644 --- a/docs/models.md +++ b/docs/models.md @@ -19,16 +19,22 @@ that prefers tool-call + image-capable models and maintains ordered fallbacks. - show default model + aliases + fallbacks + allowlist - `clawdbot models set ` - writes `agent.model` in config +- `clawdbot models set-image ` + - writes `agent.imageModel` in config - `clawdbot models aliases list|add|remove` - writes `agent.modelAliases` - `clawdbot models fallbacks list|add|remove|clear` - writes `agent.modelFallbacks` +- `clawdbot models image-fallbacks list|add|remove|clear` + - writes `agent.imageModelFallbacks` - `clawdbot models scan` - OpenRouter :free scan; probe tool-call + image; interactive selection ## Config changes - Add `agent.modelFallbacks: string[]` (ordered list of provider/model IDs). +- Add `agent.imageModel?: string` (optional image-capable model for image tool). +- Add `agent.imageModelFallbacks?: string[]` (ordered list for image tool). - Keep existing: - `agent.model` (default) - `agent.allowedModels` (list filter) @@ -49,8 +55,8 @@ Probes (direct pi-ai complete) - Prompt includes 1x1 PNG; success if no "unsupported image" error. Scoring/selection -- Prefer models passing tool + image. -- Fallback to tool-only if no tool+image pass. +- Prefer models passing tool + image for text/tool fallbacks. +- Prefer image-only models for image tool fallback (even if tool probe fails). - Rank by: image ok, then lower tool latency, then larger context, then params. Interactive selection (TTY) @@ -61,7 +67,9 @@ Interactive selection (TTY) Output - Writes `agent.modelFallbacks` ordered. +- Writes `agent.imageModelFallbacks` ordered (image-capable models). - Optional `--set-default` to set `agent.model`. +- Optional `--set-image` to set `agent.imageModel`. ## Runtime fallback diff --git a/docs/tools.md b/docs/tools.md index abac76f60..292be960e 100644 --- a/docs/tools.md +++ b/docs/tools.md @@ -101,6 +101,19 @@ Notes: - Videos return `FILE:` (mp4). - Location returns a JSON payload (lat/lon/accuracy/timestamp). +### `image` +Analyze an image with the configured image model. + +Core parameters: +- `image` (required path or URL) +- `prompt` (optional; defaults to "Describe the image.") +- `model` (optional override) +- `maxBytesMb` (optional size cap) + +Notes: +- Only available when `agent.imageModel` or `agent.imageModelFallbacks` is set. +- Uses the image model directly (independent of the main chat model). + ### `cron` Manage Gateway cron jobs and wakeups. diff --git a/src/agents/clawdbot-tools.ts b/src/agents/clawdbot-tools.ts index a31beb497..2884e657f 100644 --- a/src/agents/clawdbot-tools.ts +++ b/src/agents/clawdbot-tools.ts @@ -9,12 +9,16 @@ import { createSessionsHistoryTool } from "./tools/sessions-history-tool.js"; import { createSessionsListTool } from "./tools/sessions-list-tool.js"; import { createSessionsSendTool } from "./tools/sessions-send-tool.js"; import { createSlackTool } from "./tools/slack-tool.js"; +import { createImageTool } from "./tools/image-tool.js"; +import type { ClawdbotConfig } from "../config/config.js"; export function createClawdbotTools(options?: { browserControlUrl?: string; agentSessionKey?: string; agentSurface?: string; + config?: ClawdbotConfig; }): AnyAgentTool[] { + const imageTool = createImageTool({ config: options?.config }); return [ createBrowserTool({ defaultControlUrl: options?.browserControlUrl }), createCanvasTool(), @@ -29,5 +33,6 @@ export function createClawdbotTools(options?: { agentSessionKey: options?.agentSessionKey, agentSurface: options?.agentSurface, }), + ...(imageTool ? [imageTool] : []), ]; } diff --git a/src/agents/model-auth.ts b/src/agents/model-auth.ts new file mode 100644 index 000000000..9bb1d3ad5 --- /dev/null +++ b/src/agents/model-auth.ts @@ -0,0 +1,143 @@ +import fsSync from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { + type Api, + getEnvApiKey, + getOAuthApiKey, + type Model, + type OAuthCredentials, + type OAuthProvider, +} from "@mariozechner/pi-ai"; +import { discoverAuthStorage } from "@mariozechner/pi-coding-agent"; + +import { CONFIG_DIR, resolveUserPath } from "../utils.js"; + +const OAUTH_FILENAME = "oauth.json"; +const DEFAULT_OAUTH_DIR = path.join(CONFIG_DIR, "credentials"); +let oauthStorageConfigured = false; + +type OAuthStorage = Record; + +function resolveClawdbotOAuthPath(): string { + const overrideDir = + process.env.CLAWDBOT_OAUTH_DIR?.trim() || DEFAULT_OAUTH_DIR; + return path.join(resolveUserPath(overrideDir), OAUTH_FILENAME); +} + +function loadOAuthStorageAt(pathname: string): OAuthStorage | null { + if (!fsSync.existsSync(pathname)) return null; + try { + const content = fsSync.readFileSync(pathname, "utf8"); + const json = JSON.parse(content) as OAuthStorage; + if (!json || typeof json !== "object") return null; + return json; + } catch { + return null; + } +} + +function hasAnthropicOAuth(storage: OAuthStorage): boolean { + const entry = storage.anthropic as + | { + refresh?: string; + refresh_token?: string; + refreshToken?: string; + access?: string; + access_token?: string; + accessToken?: string; + } + | undefined; + if (!entry) return false; + const refresh = + entry.refresh ?? entry.refresh_token ?? entry.refreshToken ?? ""; + const access = entry.access ?? entry.access_token ?? entry.accessToken ?? ""; + return Boolean(refresh.trim() && access.trim()); +} + +function saveOAuthStorageAt(pathname: string, storage: OAuthStorage): void { + const dir = path.dirname(pathname); + fsSync.mkdirSync(dir, { recursive: true, mode: 0o700 }); + fsSync.writeFileSync( + pathname, + `${JSON.stringify(storage, null, 2)}\n`, + "utf8", + ); + fsSync.chmodSync(pathname, 0o600); +} + +function legacyOAuthPaths(): string[] { + const paths: string[] = []; + const piOverride = process.env.PI_CODING_AGENT_DIR?.trim(); + if (piOverride) { + paths.push(path.join(resolveUserPath(piOverride), OAUTH_FILENAME)); + } + paths.push(path.join(os.homedir(), ".pi", "agent", OAUTH_FILENAME)); + paths.push(path.join(os.homedir(), ".claude", OAUTH_FILENAME)); + paths.push(path.join(os.homedir(), ".config", "claude", OAUTH_FILENAME)); + paths.push(path.join(os.homedir(), ".config", "anthropic", OAUTH_FILENAME)); + return Array.from(new Set(paths)); +} + +function importLegacyOAuthIfNeeded(destPath: string): void { + if (fsSync.existsSync(destPath)) return; + for (const legacyPath of legacyOAuthPaths()) { + const storage = loadOAuthStorageAt(legacyPath); + if (!storage || !hasAnthropicOAuth(storage)) continue; + saveOAuthStorageAt(destPath, storage); + return; + } +} + +export function ensureOAuthStorage(): void { + if (oauthStorageConfigured) return; + oauthStorageConfigured = true; + const oauthPath = resolveClawdbotOAuthPath(); + importLegacyOAuthIfNeeded(oauthPath); +} + +function isOAuthProvider(provider: string): provider is OAuthProvider { + return ( + provider === "anthropic" || + provider === "anthropic-oauth" || + provider === "google" || + provider === "openai" || + provider === "openai-compatible" || + provider === "github-copilot" || + provider === "google-gemini-cli" || + provider === "google-antigravity" + ); +} + +export async function getApiKeyForModel( + model: Model, + authStorage: ReturnType, +): Promise { + const storedKey = await authStorage.getApiKey(model.provider); + if (storedKey) return storedKey; + ensureOAuthStorage(); + if (model.provider === "anthropic") { + const oauthEnv = process.env.ANTHROPIC_OAUTH_TOKEN; + if (oauthEnv?.trim()) return oauthEnv.trim(); + } + const envKey = getEnvApiKey(model.provider); + if (envKey) return envKey; + if (isOAuthProvider(model.provider)) { + const oauthPath = resolveClawdbotOAuthPath(); + const storage = loadOAuthStorageAt(oauthPath); + if (storage) { + try { + const result = await getOAuthApiKey(model.provider, storage); + if (result?.apiKey) { + storage[model.provider] = result.newCredentials; + saveOAuthStorageAt(oauthPath, storage); + return result.apiKey; + } + } catch { + // fall through to error below + } + } + } + throw new Error(`No API key found for provider "${model.provider}"`); +} diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 7fba489d3..a8c63d870 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -44,6 +44,54 @@ function buildAllowedModelKeys( return keys.size > 0 ? keys : null; } +function resolveImageFallbackCandidates(params: { + cfg: ClawdbotConfig | undefined; + defaultProvider: string; + modelOverride?: string; +}): ModelCandidate[] { + const aliasIndex = buildModelAliasIndex({ + cfg: params.cfg ?? {}, + defaultProvider: params.defaultProvider, + }); + const allowlist = buildAllowedModelKeys(params.cfg, params.defaultProvider); + const seen = new Set(); + const candidates: ModelCandidate[] = []; + + const addCandidate = ( + candidate: ModelCandidate, + enforceAllowlist: boolean, + ) => { + if (!candidate.provider || !candidate.model) return; + const key = modelKey(candidate.provider, candidate.model); + if (seen.has(key)) return; + if (enforceAllowlist && allowlist && !allowlist.has(key)) return; + seen.add(key); + candidates.push(candidate); + }; + + const addRaw = (raw: string, enforceAllowlist: boolean) => { + const resolved = resolveModelRefFromString({ + raw: String(raw ?? ""), + defaultProvider: params.defaultProvider, + aliasIndex, + }); + if (!resolved) return; + addCandidate(resolved.ref, enforceAllowlist); + }; + + if (params.modelOverride?.trim()) { + addRaw(params.modelOverride, false); + } else if (params.cfg?.agent?.imageModel?.trim()) { + addRaw(params.cfg.agent.imageModel, false); + } + + for (const raw of params.cfg?.agent?.imageModelFallbacks ?? []) { + addRaw(raw, true); + } + + return candidates; +} + function resolveFallbackCandidates(params: { cfg: ClawdbotConfig | undefined; provider: string; @@ -151,3 +199,78 @@ export async function runWithModelFallback(params: { { cause: lastError instanceof Error ? lastError : undefined }, ); } + +export async function runWithImageModelFallback(params: { + cfg: ClawdbotConfig | undefined; + modelOverride?: string; + run: (provider: string, model: string) => Promise; + onError?: (attempt: { + provider: string; + model: string; + error: unknown; + attempt: number; + total: number; + }) => void | Promise; +}): Promise<{ + result: T; + provider: string; + model: string; + attempts: FallbackAttempt[]; +}> { + const candidates = resolveImageFallbackCandidates({ + cfg: params.cfg, + defaultProvider: DEFAULT_PROVIDER, + modelOverride: params.modelOverride, + }); + if (candidates.length === 0) { + throw new Error( + "No image model configured. Set agent.imageModel or agent.imageModelFallbacks.", + ); + } + + const attempts: FallbackAttempt[] = []; + let lastError: unknown; + + for (let i = 0; i < candidates.length; i += 1) { + const candidate = candidates[i] as ModelCandidate; + try { + const result = await params.run(candidate.provider, candidate.model); + return { + result, + provider: candidate.provider, + model: candidate.model, + attempts, + }; + } catch (err) { + if (isAbortError(err)) throw err; + lastError = err; + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error: err instanceof Error ? err.message : String(err), + }); + await params.onError?.({ + provider: candidate.provider, + model: candidate.model, + error: err, + attempt: i + 1, + total: candidates.length, + }); + } + } + + if (attempts.length <= 1 && lastError) throw lastError; + const summary = + attempts.length > 0 + ? attempts + .map( + (attempt) => + `${attempt.provider}/${attempt.model}: ${attempt.error}`, + ) + .join(" | ") + : "unknown"; + throw new Error( + `All image models failed (${attempts.length || candidates.length}): ${summary}`, + { cause: lastError instanceof Error ? lastError : undefined }, + ); +} diff --git a/src/agents/model-scan.ts b/src/agents/model-scan.ts index a235073e2..ff6825659 100644 --- a/src/agents/model-scan.ts +++ b/src/agents/model-scan.ts @@ -348,7 +348,7 @@ export async function scanOpenRouterModels( }; const toolResult = await probeTool(model, apiKey, timeoutMs); - const imageResult = toolResult.ok + const imageResult = model.input.includes("image") ? await probeImage(ensureImageInput(model), apiKey, timeoutMs) : { ok: false, latencyMs: null, skipped: true }; diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 618bfe85a..73adafc50 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -1,17 +1,11 @@ -import fsSync from "node:fs"; import fs from "node:fs/promises"; import os from "node:os"; -import path from "node:path"; import type { AgentMessage, ThinkingLevel } from "@mariozechner/pi-agent-core"; import { type Api, type AssistantMessage, - getEnvApiKey, - getOAuthApiKey, type Model, - type OAuthCredentials, - type OAuthProvider, } from "@mariozechner/pi-ai"; import { buildSystemPrompt, @@ -25,7 +19,6 @@ import { import type { ThinkLevel, VerboseLevel } from "../auto-reply/thinking.js"; import { formatToolAggregate } from "../auto-reply/tool-meta.js"; import type { ClawdbotConfig } from "../config/config.js"; -import { resolveOAuthPath } from "../config/paths.js"; import { getMachineDisplayName } from "../infra/machine-name.js"; import { createSubsystemLogger } from "../logging.js"; import { splitMediaFromOutput } from "../media/parse.js"; @@ -37,6 +30,7 @@ import { resolveUserPath } from "../utils.js"; import { resolveClawdbotAgentDir } from "./agent-paths.js"; import type { BashElevatedDefaults } from "./bash-tools.js"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; +import { getApiKeyForModel } from "./model-auth.js"; import { ensureClawdbotModelsJson } from "./models-config.js"; import { buildBootstrapContextFiles, @@ -106,10 +100,6 @@ type EmbeddedRunWaiter = { }; const EMBEDDED_RUN_WAITERS = new Map>(); -const OAUTH_FILENAME = "oauth.json"; -let oauthStorageConfigured = false; - -type OAuthStorage = Record; type EmbeddedSandboxInfo = { enabled: boolean; workspaceDir?: string; @@ -139,90 +129,6 @@ export function buildEmbeddedSandboxInfo( }; } -function resolveClawdbotOAuthPath(): string { - return resolveOAuthPath(); -} - -function loadOAuthStorageAt(pathname: string): OAuthStorage | null { - if (!fsSync.existsSync(pathname)) return null; - try { - const content = fsSync.readFileSync(pathname, "utf8"); - const json = JSON.parse(content) as OAuthStorage; - if (!json || typeof json !== "object") return null; - return json; - } catch { - return null; - } -} - -function hasAnthropicOAuth(storage: OAuthStorage): boolean { - const entry = storage.anthropic as - | { - refresh?: string; - refresh_token?: string; - refreshToken?: string; - access?: string; - access_token?: string; - accessToken?: string; - } - | undefined; - if (!entry) return false; - const refresh = - entry.refresh ?? entry.refresh_token ?? entry.refreshToken ?? ""; - const access = entry.access ?? entry.access_token ?? entry.accessToken ?? ""; - return Boolean(refresh.trim() && access.trim()); -} - -function saveOAuthStorageAt(pathname: string, storage: OAuthStorage): void { - const dir = path.dirname(pathname); - fsSync.mkdirSync(dir, { recursive: true, mode: 0o700 }); - fsSync.writeFileSync( - pathname, - `${JSON.stringify(storage, null, 2)}\n`, - "utf8", - ); - fsSync.chmodSync(pathname, 0o600); -} - -function legacyOAuthPaths(): string[] { - const paths: string[] = []; - const piOverride = process.env.PI_CODING_AGENT_DIR?.trim(); - if (piOverride) { - paths.push(path.join(resolveUserPath(piOverride), OAUTH_FILENAME)); - } - paths.push(path.join(os.homedir(), ".pi", "agent", OAUTH_FILENAME)); - paths.push(path.join(os.homedir(), ".claude", OAUTH_FILENAME)); - paths.push(path.join(os.homedir(), ".config", "claude", OAUTH_FILENAME)); - paths.push(path.join(os.homedir(), ".config", "anthropic", OAUTH_FILENAME)); - return Array.from(new Set(paths)); -} - -function importLegacyOAuthIfNeeded(destPath: string): void { - if (fsSync.existsSync(destPath)) return; - for (const legacyPath of legacyOAuthPaths()) { - const storage = loadOAuthStorageAt(legacyPath); - if (!storage || !hasAnthropicOAuth(storage)) continue; - saveOAuthStorageAt(destPath, storage); - return; - } -} - -function ensureOAuthStorage(): void { - if (oauthStorageConfigured) return; - oauthStorageConfigured = true; - const oauthPath = resolveClawdbotOAuthPath(); - importLegacyOAuthIfNeeded(oauthPath); -} - -function isOAuthProvider(provider: string): provider is OAuthProvider { - return ( - provider === "anthropic" || - provider === "github-copilot" || - provider === "google-gemini-cli" || - provider === "google-antigravity" - ); -} - export function queueEmbeddedPiMessage( sessionId: string, text: string, @@ -325,38 +231,6 @@ function resolveModel( return { model, authStorage, modelRegistry }; } -async function getApiKeyForModel( - model: Model, - authStorage: ReturnType, -): Promise { - const storedKey = await authStorage.getApiKey(model.provider); - if (storedKey) return storedKey; - ensureOAuthStorage(); - if (model.provider === "anthropic") { - const oauthEnv = process.env.ANTHROPIC_OAUTH_TOKEN; - if (oauthEnv?.trim()) return oauthEnv.trim(); - } - const envKey = getEnvApiKey(model.provider); - if (envKey) return envKey; - if (isOAuthProvider(model.provider)) { - const oauthPath = resolveClawdbotOAuthPath(); - const storage = loadOAuthStorageAt(oauthPath); - if (storage) { - try { - const result = await getOAuthApiKey(model.provider, storage); - if (result?.apiKey) { - storage[model.provider] = result.newCredentials; - saveOAuthStorageAt(oauthPath, storage); - return result.apiKey; - } - } catch { - // fall through to error below - } - } - } - throw new Error(`No API key found for provider "${model.provider}"`); -} - function resolvePromptSkills( snapshot: SkillSnapshot, entries: SkillEntry[], @@ -502,6 +376,7 @@ export async function runEmbeddedPiAgent(params: { sandbox, surface: params.surface, sessionKey: params.sessionKey ?? params.sessionId, + config: params.config, }); const machineName = await getMachineDisplayName(); const runtimeInfo = { diff --git a/src/agents/pi-tools.ts b/src/agents/pi-tools.ts index 6c2ff334a..847a101bf 100644 --- a/src/agents/pi-tools.ts +++ b/src/agents/pi-tools.ts @@ -17,6 +17,7 @@ import { type ProcessToolDefaults, } from "./bash-tools.js"; import { createClawdbotTools } from "./clawdbot-tools.js"; +import type { ClawdbotConfig } from "../config/config.js"; import type { SandboxContext, SandboxToolPolicy } from "./sandbox.js"; import { assertSandboxPath } from "./sandbox-paths.js"; import { sanitizeToolResultImages } from "./tool-images.js"; @@ -452,6 +453,7 @@ export function createClawdbotCodingTools(options?: { surface?: string; sandbox?: SandboxContext | null; sessionKey?: string; + config?: ClawdbotConfig; }): AnyAgentTool[] { const bashToolName = "bash"; const sandbox = options?.sandbox?.enabled ? options.sandbox : undefined; @@ -497,6 +499,7 @@ export function createClawdbotCodingTools(options?: { browserControlUrl: sandbox?.browser?.controlUrl, agentSessionKey: options?.sessionKey, agentSurface: options?.surface, + config: options?.config, }), ]; const allowDiscord = shouldIncludeDiscordTool(options?.surface); diff --git a/src/agents/tools/image-tool.ts b/src/agents/tools/image-tool.ts new file mode 100644 index 000000000..7b79ee125 --- /dev/null +++ b/src/agents/tools/image-tool.ts @@ -0,0 +1,157 @@ +import { type Api, type AssistantMessage, complete, type Context, type Model } from "@mariozechner/pi-ai"; +import { discoverAuthStorage, discoverModels } from "@mariozechner/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; + +import type { ClawdbotConfig } from "../../config/config.js"; +import { loadWebMedia } from "../../web/media.js"; +import { resolveClawdbotAgentDir } from "../agent-paths.js"; +import { getApiKeyForModel } from "../model-auth.js"; +import { runWithImageModelFallback } from "../model-fallback.js"; +import { ensureClawdbotModelsJson } from "../models-config.js"; +import { extractAssistantText } from "../pi-embedded-utils.js"; +import { resolveUserPath } from "../../utils.js"; +import type { AnyAgentTool } from "./common.js"; + +const DEFAULT_PROMPT = "Describe the image."; + +function ensureImageToolConfigured(cfg?: ClawdbotConfig): boolean { + const primary = cfg?.agent?.imageModel?.trim(); + const fallbacks = cfg?.agent?.imageModelFallbacks ?? []; + return Boolean(primary || fallbacks.length > 0); +} + +function pickMaxBytes(cfg?: ClawdbotConfig, maxBytesMb?: number): number | undefined { + if (typeof maxBytesMb === "number" && Number.isFinite(maxBytesMb) && maxBytesMb > 0) { + return Math.floor(maxBytesMb * 1024 * 1024); + } + const configured = cfg?.agent?.mediaMaxMb; + if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) { + return Math.floor(configured * 1024 * 1024); + } + return undefined; +} + +function buildImageContext(prompt: string, base64: string, mimeType: string): Context { + return { + messages: [ + { + role: "user", + content: [ + { type: "text", text: prompt }, + { type: "image", data: base64, mimeType }, + ], + timestamp: Date.now(), + }, + ], + }; +} + +async function runImagePrompt(params: { + cfg?: ClawdbotConfig; + modelOverride?: string; + prompt: string; + base64: string; + mimeType: string; +}): Promise<{ text: string; provider: string; model: string }> { + const agentDir = resolveClawdbotAgentDir(); + await ensureClawdbotModelsJson(params.cfg); + const authStorage = discoverAuthStorage(agentDir); + const modelRegistry = discoverModels(authStorage, agentDir); + + const result = await runWithImageModelFallback({ + cfg: params.cfg, + modelOverride: params.modelOverride, + run: async (provider, modelId) => { + const model = modelRegistry.find(provider, modelId) as Model | null; + if (!model) { + throw new Error(`Unknown model: ${provider}/${modelId}`); + } + if (!model.input?.includes("image")) { + throw new Error(`Model does not support images: ${provider}/${modelId}`); + } + const apiKey = await getApiKeyForModel(model, authStorage); + authStorage.setRuntimeApiKey(model.provider, apiKey); + const context = buildImageContext( + params.prompt, + params.base64, + params.mimeType, + ); + const message = (await complete(model, context, { + apiKey, + maxTokens: 512, + temperature: 0, + })) as AssistantMessage; + return message; + }, + }); + + const text = extractAssistantText(result.result); + return { + text: text || "(no text returned)", + provider: result.provider, + model: result.model, + }; +} + +export function createImageTool(options?: { + config?: ClawdbotConfig; +}): AnyAgentTool | null { + if (!ensureImageToolConfigured(options?.config)) return null; + return { + label: "Image", + name: "image", + description: + "Analyze an image with the configured image model (agent.imageModel). Provide a prompt and image path or URL.", + parameters: Type.Object({ + prompt: Type.Optional(Type.String()), + image: Type.String(), + model: Type.Optional(Type.String()), + maxBytesMb: Type.Optional(Type.Number()), + }), + execute: async (_toolCallId, args) => { + const record = + args && typeof args === "object" + ? (args as Record) + : {}; + const imageRaw = + typeof record.image === "string" ? record.image.trim() : ""; + if (!imageRaw) throw new Error("image required"); + const promptRaw = + typeof record.prompt === "string" && record.prompt.trim() + ? record.prompt.trim() + : DEFAULT_PROMPT; + const modelOverride = + typeof record.model === "string" && record.model.trim() + ? record.model.trim() + : undefined; + const maxBytesMb = + typeof record.maxBytesMb === "number" ? record.maxBytesMb : undefined; + const maxBytes = pickMaxBytes(options?.config, maxBytesMb); + + const resolvedImage = imageRaw.startsWith("~") + ? resolveUserPath(imageRaw) + : imageRaw; + const media = await loadWebMedia(resolvedImage, maxBytes); + if (media.kind !== "image") { + throw new Error(`Unsupported media type: ${media.kind}`); + } + + const mimeType = media.contentType ?? "image/png"; + const base64 = media.buffer.toString("base64"); + const result = await runImagePrompt({ + cfg: options?.config, + modelOverride, + prompt: promptRaw, + base64, + mimeType, + }); + return { + content: [{ type: "text", text: result.text }], + details: { + model: `${result.provider}/${result.model}`, + image: resolvedImage, + }, + }; + }, + }; +} diff --git a/src/cli/models-cli.ts b/src/cli/models-cli.ts index 7637e235b..4d696d159 100644 --- a/src/cli/models-cli.ts +++ b/src/cli/models-cli.ts @@ -8,9 +8,14 @@ import { modelsFallbacksClearCommand, modelsFallbacksListCommand, modelsFallbacksRemoveCommand, + modelsImageFallbacksAddCommand, + modelsImageFallbacksClearCommand, + modelsImageFallbacksListCommand, + modelsImageFallbacksRemoveCommand, modelsListCommand, modelsScanCommand, modelsSetCommand, + modelsSetImageCommand, modelsStatusCommand, } from "../commands/models.js"; import { defaultRuntime } from "../runtime.js"; @@ -64,6 +69,19 @@ export function registerModelsCli(program: Command) { } }); + models + .command("set-image") + .description("Set the image model") + .argument("", "Model id or alias") + .action(async (model: string) => { + try { + await modelsSetImageCommand(model, defaultRuntime); + } catch (err) { + defaultRuntime.error(String(err)); + defaultRuntime.exit(1); + } + }); + const aliases = models.command("aliases").description("Manage model aliases"); aliases @@ -163,6 +181,62 @@ export function registerModelsCli(program: Command) { } }); + const imageFallbacks = models + .command("image-fallbacks") + .description("Manage image model fallback list"); + + imageFallbacks + .command("list") + .description("List image fallback models") + .option("--json", "Output JSON", false) + .option("--plain", "Plain output", false) + .action(async (opts) => { + try { + await modelsImageFallbacksListCommand(opts, defaultRuntime); + } catch (err) { + defaultRuntime.error(String(err)); + defaultRuntime.exit(1); + } + }); + + imageFallbacks + .command("add") + .description("Add an image fallback model") + .argument("", "Model id or alias") + .action(async (model: string) => { + try { + await modelsImageFallbacksAddCommand(model, defaultRuntime); + } catch (err) { + defaultRuntime.error(String(err)); + defaultRuntime.exit(1); + } + }); + + imageFallbacks + .command("remove") + .description("Remove an image fallback model") + .argument("", "Model id or alias") + .action(async (model: string) => { + try { + await modelsImageFallbacksRemoveCommand(model, defaultRuntime); + } catch (err) { + defaultRuntime.error(String(err)); + defaultRuntime.exit(1); + } + }); + + imageFallbacks + .command("clear") + .description("Clear all image fallback models") + .action(async () => { + try { + await modelsImageFallbacksClearCommand(defaultRuntime); + } catch (err) { + defaultRuntime.error(String(err)); + defaultRuntime.exit(1); + } + }); + models .command("scan") .description("Scan OpenRouter free models for tools + images") @@ -175,6 +249,7 @@ export function registerModelsCli(program: Command) { .option("--yes", "Accept defaults without prompting", false) .option("--no-input", "Disable prompts (use defaults)") .option("--set-default", "Set agent.model to the first selection", false) + .option("--set-image", "Set agent.imageModel to the first image selection", false) .option("--json", "Output JSON", false) .action(async (opts) => { try { diff --git a/src/commands/models.ts b/src/commands/models.ts index 06c52b70d..4622a4479 100644 --- a/src/commands/models.ts +++ b/src/commands/models.ts @@ -9,6 +9,13 @@ export { modelsFallbacksListCommand, modelsFallbacksRemoveCommand, } from "./models/fallbacks.js"; +export { + modelsImageFallbacksAddCommand, + modelsImageFallbacksClearCommand, + modelsImageFallbacksListCommand, + modelsImageFallbacksRemoveCommand, +} from "./models/image-fallbacks.js"; export { modelsListCommand, modelsStatusCommand } from "./models/list.js"; export { modelsScanCommand } from "./models/scan.js"; export { modelsSetCommand } from "./models/set.js"; +export { modelsSetImageCommand } from "./models/set-image.js"; diff --git a/src/commands/models/image-fallbacks.ts b/src/commands/models/image-fallbacks.ts new file mode 100644 index 000000000..f4a941b8a --- /dev/null +++ b/src/commands/models/image-fallbacks.ts @@ -0,0 +1,135 @@ +import { + buildModelAliasIndex, + resolveModelRefFromString, +} from "../../agents/model-selection.js"; +import { CONFIG_PATH_CLAWDBOT, loadConfig } from "../../config/config.js"; +import type { RuntimeEnv } from "../../runtime.js"; +import { + DEFAULT_PROVIDER, + ensureFlagCompatibility, + modelKey, + resolveModelTarget, + updateConfig, +} from "./shared.js"; + +export async function modelsImageFallbacksListCommand( + opts: { json?: boolean; plain?: boolean }, + runtime: RuntimeEnv, +) { + ensureFlagCompatibility(opts); + const cfg = loadConfig(); + const fallbacks = cfg.agent?.imageModelFallbacks ?? []; + + if (opts.json) { + runtime.log(JSON.stringify({ fallbacks }, null, 2)); + return; + } + if (opts.plain) { + for (const entry of fallbacks) runtime.log(entry); + return; + } + + runtime.log(`Image fallbacks (${fallbacks.length}):`); + if (fallbacks.length === 0) { + runtime.log("- none"); + return; + } + for (const entry of fallbacks) runtime.log(`- ${entry}`); +} + +export async function modelsImageFallbacksAddCommand( + modelRaw: string, + runtime: RuntimeEnv, +) { + const updated = await updateConfig((cfg) => { + const resolved = resolveModelTarget({ raw: modelRaw, cfg }); + const targetKey = modelKey(resolved.provider, resolved.model); + const aliasIndex = buildModelAliasIndex({ + cfg, + defaultProvider: DEFAULT_PROVIDER, + }); + const existing = cfg.agent?.imageModelFallbacks ?? []; + const existingKeys = existing + .map((entry) => + resolveModelRefFromString({ + raw: String(entry ?? ""), + defaultProvider: DEFAULT_PROVIDER, + aliasIndex, + }), + ) + .filter((entry): entry is NonNullable => Boolean(entry)) + .map((entry) => modelKey(entry.ref.provider, entry.ref.model)); + + if (existingKeys.includes(targetKey)) return cfg; + + return { + ...cfg, + agent: { + ...cfg.agent, + imageModelFallbacks: [...existing, targetKey], + }, + }; + }); + + runtime.log(`Updated ${CONFIG_PATH_CLAWDBOT}`); + runtime.log( + `Image fallbacks: ${(updated.agent?.imageModelFallbacks ?? []).join(", ")}`, + ); +} + +export async function modelsImageFallbacksRemoveCommand( + modelRaw: string, + runtime: RuntimeEnv, +) { + const updated = await updateConfig((cfg) => { + const resolved = resolveModelTarget({ raw: modelRaw, cfg }); + const targetKey = modelKey(resolved.provider, resolved.model); + const aliasIndex = buildModelAliasIndex({ + cfg, + defaultProvider: DEFAULT_PROVIDER, + }); + const existing = cfg.agent?.imageModelFallbacks ?? []; + const filtered = existing.filter((entry) => { + const resolvedEntry = resolveModelRefFromString({ + raw: String(entry ?? ""), + defaultProvider: DEFAULT_PROVIDER, + aliasIndex, + }); + if (!resolvedEntry) return true; + return ( + modelKey(resolvedEntry.ref.provider, resolvedEntry.ref.model) !== + targetKey + ); + }); + + if (filtered.length === existing.length) { + throw new Error(`Image fallback not found: ${targetKey}`); + } + + return { + ...cfg, + agent: { + ...cfg.agent, + imageModelFallbacks: filtered, + }, + }; + }); + + runtime.log(`Updated ${CONFIG_PATH_CLAWDBOT}`); + runtime.log( + `Image fallbacks: ${(updated.agent?.imageModelFallbacks ?? []).join(", ")}`, + ); +} + +export async function modelsImageFallbacksClearCommand(runtime: RuntimeEnv) { + await updateConfig((cfg) => ({ + ...cfg, + agent: { + ...cfg.agent, + imageModelFallbacks: [], + }, + })); + + runtime.log(`Updated ${CONFIG_PATH_CLAWDBOT}`); + runtime.log("Image fallback list cleared."); +} diff --git a/src/commands/models/list.ts b/src/commands/models/list.ts index dd236d247..e061cf5d3 100644 --- a/src/commands/models/list.ts +++ b/src/commands/models/list.ts @@ -120,6 +120,26 @@ const resolveConfiguredEntries = (cfg: ClawdbotConfig) => { addEntry(resolved.ref, `fallback#${idx + 1}`); }); + const imageModelRaw = cfg.agent?.imageModel?.trim(); + if (imageModelRaw) { + const resolved = resolveModelRefFromString({ + raw: imageModelRaw, + defaultProvider: DEFAULT_PROVIDER, + aliasIndex, + }); + if (resolved) addEntry(resolved.ref, "image"); + } + + (cfg.agent?.imageModelFallbacks ?? []).forEach((raw, idx) => { + const resolved = resolveModelRefFromString({ + raw: String(raw ?? ""), + defaultProvider: DEFAULT_PROVIDER, + aliasIndex, + }); + if (!resolved) return; + addEntry(resolved.ref, `img-fallback#${idx + 1}`); + }); + (cfg.agent?.allowedModels ?? []).forEach((raw) => { const parsed = parseModelRef(String(raw ?? ""), DEFAULT_PROVIDER); if (!parsed) return; @@ -375,6 +395,8 @@ export async function modelsStatusCommand( const rawModel = cfg.agent?.model?.trim() ?? ""; const defaultLabel = rawModel || `${resolved.provider}/${resolved.model}`; const fallbacks = cfg.agent?.modelFallbacks ?? []; + const imageModel = cfg.agent?.imageModel?.trim() ?? ""; + const imageFallbacks = cfg.agent?.imageModelFallbacks ?? []; const aliases = cfg.agent?.modelAliases ?? {}; const allowed = cfg.agent?.allowedModels ?? []; @@ -386,6 +408,8 @@ export async function modelsStatusCommand( defaultModel: defaultLabel, resolvedDefault: `${resolved.provider}/${resolved.model}`, fallbacks, + imageModel: imageModel || null, + imageFallbacks, aliases, allowed, }, @@ -406,6 +430,12 @@ export async function modelsStatusCommand( runtime.log( `Fallbacks (${fallbacks.length || 0}): ${fallbacks.join(", ") || "-"}`, ); + runtime.log(`Image model: ${imageModel || "-"}`); + runtime.log( + `Image fallbacks (${imageFallbacks.length || 0}): ${ + imageFallbacks.length ? imageFallbacks.join(", ") : "-" + }`, + ); runtime.log( `Aliases (${Object.keys(aliases).length || 0}): ${ Object.keys(aliases).length diff --git a/src/commands/models/scan.ts b/src/commands/models/scan.ts index 13ce6c6f5..4cb3b858f 100644 --- a/src/commands/models/scan.ts +++ b/src/commands/models/scan.ts @@ -49,6 +49,24 @@ function sortScanResults(results: ModelScanResult[]): ModelScanResult[] { }); } +function sortImageResults(results: ModelScanResult[]): ModelScanResult[] { + return results.slice().sort((a, b) => { + const aLatency = a.image.latencyMs ?? Number.POSITIVE_INFINITY; + const bLatency = b.image.latencyMs ?? Number.POSITIVE_INFINITY; + if (aLatency !== bLatency) return aLatency - bLatency; + + const aCtx = a.contextLength ?? 0; + const bCtx = b.contextLength ?? 0; + if (aCtx !== bCtx) return bCtx - aCtx; + + const aParams = a.inferredParamB ?? 0; + const bParams = b.inferredParamB ?? 0; + if (aParams !== bParams) return bParams - aParams; + + return a.modelRef.localeCompare(b.modelRef); + }); +} + function buildScanHint(result: ModelScanResult): string { const toolLabel = result.tool.ok ? `tool ${formatMs(result.tool.latencyMs)}` @@ -71,8 +89,9 @@ function printScanSummary(results: ModelScanResult[], runtime: RuntimeEnv) { const toolOk = results.filter((r) => r.tool.ok); const imageOk = results.filter((r) => r.image.ok); const toolImageOk = results.filter((r) => r.tool.ok && r.image.ok); + const imageOnly = imageOk.filter((r) => !r.tool.ok); runtime.log( - `Scan results: tested ${results.length}, tool ok ${toolOk.length}, image ok ${imageOk.length}, tool+image ok ${toolImageOk.length}`, + `Scan results: tested ${results.length}, tool ok ${toolOk.length}, image ok ${imageOk.length}, tool+image ok ${toolImageOk.length}, image only ${imageOnly.length}`, ); } @@ -127,6 +146,7 @@ export async function modelsScanCommand( yes?: boolean; input?: boolean; setDefault?: boolean; + setImage?: boolean; json?: boolean; }, runtime: RuntimeEnv, @@ -177,12 +197,18 @@ export async function modelsScanCommand( throw new Error("No tool-capable OpenRouter free models found."); } - const sorted = sortScanResults(toolOk); - const imagePreferred = sorted.filter((entry) => entry.image.ok); - const preselectPool = imagePreferred.length > 0 ? imagePreferred : sorted; + const sorted = sortScanResults(results); + const toolSorted = sortScanResults(toolOk); + const imageOk = results.filter((entry) => entry.image.ok); + const imageSorted = sortImageResults(imageOk); + const imagePreferred = toolSorted.filter((entry) => entry.image.ok); + const preselectPool = imagePreferred.length > 0 ? imagePreferred : toolSorted; const preselected = preselectPool .slice(0, Math.floor(maxCandidates)) .map((entry) => entry.modelRef); + const imagePreselected = imageSorted + .slice(0, Math.floor(maxCandidates)) + .map((entry) => entry.modelRef); if (!opts.json) { printScanSummary(results, runtime); @@ -192,11 +218,12 @@ export async function modelsScanCommand( const noInput = opts.input === false; const canPrompt = process.stdin.isTTY && !opts.yes && !noInput && !opts.json; let selected: string[] = preselected; + let selectedImages: string[] = imagePreselected; if (canPrompt) { const selection = await multiselect({ message: "Select fallback models (ordered)", - options: sorted.map((entry) => ({ + options: toolSorted.map((entry) => ({ value: entry.modelRef, label: entry.modelRef, hint: buildScanHint(entry), @@ -210,6 +237,24 @@ export async function modelsScanCommand( } selected = selection as string[]; + if (imageSorted.length > 0) { + const imageSelection = await multiselect({ + message: "Select image fallback models (ordered)", + options: imageSorted.map((entry) => ({ + value: entry.modelRef, + label: entry.modelRef, + hint: buildScanHint(entry), + })), + initialValues: imagePreselected, + }); + + if (isCancel(imageSelection)) { + cancel("Model scan cancelled."); + runtime.exit(0); + } + + selectedImages = imageSelection as string[]; + } } else if (!process.stdin.isTTY && !opts.yes && !noInput && !opts.json) { throw new Error("Non-interactive scan: pass --yes to apply defaults."); } @@ -217,34 +262,58 @@ export async function modelsScanCommand( if (selected.length === 0) { throw new Error("No models selected for fallbacks."); } + if (opts.setImage && selectedImages.length === 0) { + throw new Error("No image-capable models selected for image model."); + } const updated = await updateConfig((cfg) => { - const next = { + const agent = { + ...cfg.agent, + modelFallbacks: selected, + ...(opts.setDefault ? { model: selected[0] } : {}), + ...(opts.setImage && selectedImages.length > 0 + ? { imageModel: selectedImages[0] } + : {}), + } satisfies NonNullable; + if (imageSorted.length > 0) { + agent.imageModelFallbacks = selectedImages; + } + return { ...cfg, - agent: { - ...cfg.agent, - modelFallbacks: selected, - ...(opts.setDefault ? { model: selected[0] } : {}), - }, + agent, }; - return next; }); const allowlist = buildAllowlistSet(updated); const allowlistMissing = allowlist.size > 0 ? selected.filter((entry) => !allowlist.has(entry)) : []; + const allowlistMissingImages = + allowlist.size > 0 + ? selectedImages.filter((entry) => !allowlist.has(entry)) + : []; if (opts.json) { runtime.log( JSON.stringify( { selected, + selectedImages, setDefault: Boolean(opts.setDefault), + setImage: Boolean(opts.setImage), results, warnings: - allowlistMissing.length > 0 + allowlistMissing.length > 0 || allowlistMissingImages.length > 0 ? [ - `Selected models not in agent.allowedModels: ${allowlistMissing.join(", ")}`, + ...(allowlistMissing.length > 0 + ? [ + `Selected models not in agent.allowedModels: ${allowlistMissing.join(", ")}`, + ] + : []), + ...(allowlistMissingImages.length > 0 + ? [ + `Selected image models not in agent.allowedModels: ${allowlistMissingImages.join(", ")}`, + ] + : []), ] : [], }, @@ -262,10 +331,23 @@ export async function modelsScanCommand( ), ); } + if (allowlistMissingImages.length > 0) { + runtime.log( + warn( + `Warning: ${allowlistMissingImages.length} selected image models are not in agent.allowedModels and will be ignored by fallback: ${allowlistMissingImages.join(", ")}`, + ), + ); + } runtime.log(`Updated ${CONFIG_PATH_CLAWDBOT}`); runtime.log(`Fallbacks: ${selected.join(", ")}`); + if (selectedImages.length > 0) { + runtime.log(`Image fallbacks: ${selectedImages.join(", ")}`); + } if (opts.setDefault) { runtime.log(`Default model: ${selected[0]}`); } + if (opts.setImage && selectedImages.length > 0) { + runtime.log(`Image model: ${selectedImages[0]}`); + } } diff --git a/src/commands/models/set-image.ts b/src/commands/models/set-image.ts new file mode 100644 index 000000000..6613b2e98 --- /dev/null +++ b/src/commands/models/set-image.ts @@ -0,0 +1,34 @@ +import { CONFIG_PATH_CLAWDBOT } from "../../config/config.js"; +import type { RuntimeEnv } from "../../runtime.js"; +import { + buildAllowlistSet, + modelKey, + resolveModelTarget, + updateConfig, +} from "./shared.js"; + +export async function modelsSetImageCommand( + modelRaw: string, + runtime: RuntimeEnv, +) { + const updated = await updateConfig((cfg) => { + const resolved = resolveModelTarget({ raw: modelRaw, cfg }); + const allowlist = buildAllowlistSet(cfg); + if (allowlist.size > 0) { + const key = modelKey(resolved.provider, resolved.model); + if (!allowlist.has(key)) { + throw new Error(`Model ${key} is not in agent.allowedModels.`); + } + } + return { + ...cfg, + agent: { + ...cfg.agent, + imageModel: `${resolved.provider}/${resolved.model}`, + }, + }; + }); + + runtime.log(`Updated ${CONFIG_PATH_CLAWDBOT}`); + runtime.log(`Image model: ${updated.agent?.imageModel ?? modelRaw}`); +} diff --git a/src/config/schema.ts b/src/config/schema.ts index 2573eb736..b09b26a4a 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -88,7 +88,9 @@ const FIELD_LABELS: Record = { "gateway.reload.debounceMs": "Config Reload Debounce (ms)", "agent.workspace": "Workspace", "agent.model": "Default Model", + "agent.imageModel": "Image Model", "agent.modelFallbacks": "Model Fallbacks", + "agent.imageModelFallbacks": "Image Model Fallbacks", "ui.seamColor": "Accent Color", "browser.controlUrl": "Browser Control URL", "session.agentToAgent.maxPingPongTurns": "Agent-to-Agent Ping-Pong Turns", @@ -114,6 +116,10 @@ const FIELD_HELP: Record = { "Debounce window (ms) before applying config changes.", "agent.modelFallbacks": "Ordered fallback models (provider/model). Used when the primary model fails.", + "agent.imageModel": + "Optional image-capable model (provider/model) used by the image tool.", + "agent.imageModelFallbacks": + "Ordered fallback image models (provider/model) used by the image tool.", "session.agentToAgent.maxPingPongTurns": "Max reply-back turns between requester and target (0–5).", }; diff --git a/src/config/types.ts b/src/config/types.ts index 26c465f34..cbd174710 100644 --- a/src/config/types.ts +++ b/src/config/types.ts @@ -660,6 +660,8 @@ export type ClawdbotConfig = { agent?: { /** Model id (provider/model), e.g. "anthropic/claude-opus-4-5". */ model?: string; + /** Optional image-capable model (provider/model) used by the image tool. */ + imageModel?: string; /** Agent working directory (preferred). Used as the default cwd for agent runs. */ workspace?: string; /** Optional allowlist for /model (provider/model or model-only). */ @@ -668,6 +670,8 @@ export type ClawdbotConfig = { modelAliases?: Record; /** Ordered fallback models (provider/model). */ modelFallbacks?: string[]; + /** Ordered fallback image models (provider/model) for the image tool. */ + imageModelFallbacks?: string[]; /** Optional display-only context window override (used for % in status UIs). */ contextTokens?: number; /** Default thinking level when no /think directive is present. */ diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index 4e1f4f17f..6c607b081 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -363,10 +363,12 @@ export const ClawdbotSchema = z.object({ agent: z .object({ model: z.string().optional(), + imageModel: z.string().optional(), workspace: z.string().optional(), allowedModels: z.array(z.string()).optional(), modelAliases: z.record(z.string(), z.string()).optional(), modelFallbacks: z.array(z.string()).optional(), + imageModelFallbacks: z.array(z.string()).optional(), contextTokens: z.number().int().positive().optional(), thinkingDefault: z .union([