feat: add image model config + tool

This commit is contained in:
Peter Steinberger
2026-01-04 19:35:00 +01:00
parent 0716a624a8
commit 78998dba9e
20 changed files with 856 additions and 144 deletions

View File

@@ -0,0 +1,157 @@
import { type Api, type AssistantMessage, complete, type Context, type Model } from "@mariozechner/pi-ai";
import { discoverAuthStorage, discoverModels } from "@mariozechner/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import type { ClawdbotConfig } from "../../config/config.js";
import { loadWebMedia } from "../../web/media.js";
import { resolveClawdbotAgentDir } from "../agent-paths.js";
import { getApiKeyForModel } from "../model-auth.js";
import { runWithImageModelFallback } from "../model-fallback.js";
import { ensureClawdbotModelsJson } from "../models-config.js";
import { extractAssistantText } from "../pi-embedded-utils.js";
import { resolveUserPath } from "../../utils.js";
import type { AnyAgentTool } from "./common.js";
const DEFAULT_PROMPT = "Describe the image.";
function ensureImageToolConfigured(cfg?: ClawdbotConfig): boolean {
const primary = cfg?.agent?.imageModel?.trim();
const fallbacks = cfg?.agent?.imageModelFallbacks ?? [];
return Boolean(primary || fallbacks.length > 0);
}
function pickMaxBytes(cfg?: ClawdbotConfig, maxBytesMb?: number): number | undefined {
if (typeof maxBytesMb === "number" && Number.isFinite(maxBytesMb) && maxBytesMb > 0) {
return Math.floor(maxBytesMb * 1024 * 1024);
}
const configured = cfg?.agent?.mediaMaxMb;
if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) {
return Math.floor(configured * 1024 * 1024);
}
return undefined;
}
function buildImageContext(prompt: string, base64: string, mimeType: string): Context {
return {
messages: [
{
role: "user",
content: [
{ type: "text", text: prompt },
{ type: "image", data: base64, mimeType },
],
timestamp: Date.now(),
},
],
};
}
async function runImagePrompt(params: {
cfg?: ClawdbotConfig;
modelOverride?: string;
prompt: string;
base64: string;
mimeType: string;
}): Promise<{ text: string; provider: string; model: string }> {
const agentDir = resolveClawdbotAgentDir();
await ensureClawdbotModelsJson(params.cfg);
const authStorage = discoverAuthStorage(agentDir);
const modelRegistry = discoverModels(authStorage, agentDir);
const result = await runWithImageModelFallback({
cfg: params.cfg,
modelOverride: params.modelOverride,
run: async (provider, modelId) => {
const model = modelRegistry.find(provider, modelId) as Model<Api> | null;
if (!model) {
throw new Error(`Unknown model: ${provider}/${modelId}`);
}
if (!model.input?.includes("image")) {
throw new Error(`Model does not support images: ${provider}/${modelId}`);
}
const apiKey = await getApiKeyForModel(model, authStorage);
authStorage.setRuntimeApiKey(model.provider, apiKey);
const context = buildImageContext(
params.prompt,
params.base64,
params.mimeType,
);
const message = (await complete(model, context, {
apiKey,
maxTokens: 512,
temperature: 0,
})) as AssistantMessage;
return message;
},
});
const text = extractAssistantText(result.result);
return {
text: text || "(no text returned)",
provider: result.provider,
model: result.model,
};
}
export function createImageTool(options?: {
config?: ClawdbotConfig;
}): AnyAgentTool | null {
if (!ensureImageToolConfigured(options?.config)) return null;
return {
label: "Image",
name: "image",
description:
"Analyze an image with the configured image model (agent.imageModel). Provide a prompt and image path or URL.",
parameters: Type.Object({
prompt: Type.Optional(Type.String()),
image: Type.String(),
model: Type.Optional(Type.String()),
maxBytesMb: Type.Optional(Type.Number()),
}),
execute: async (_toolCallId, args) => {
const record =
args && typeof args === "object"
? (args as Record<string, unknown>)
: {};
const imageRaw =
typeof record.image === "string" ? record.image.trim() : "";
if (!imageRaw) throw new Error("image required");
const promptRaw =
typeof record.prompt === "string" && record.prompt.trim()
? record.prompt.trim()
: DEFAULT_PROMPT;
const modelOverride =
typeof record.model === "string" && record.model.trim()
? record.model.trim()
: undefined;
const maxBytesMb =
typeof record.maxBytesMb === "number" ? record.maxBytesMb : undefined;
const maxBytes = pickMaxBytes(options?.config, maxBytesMb);
const resolvedImage = imageRaw.startsWith("~")
? resolveUserPath(imageRaw)
: imageRaw;
const media = await loadWebMedia(resolvedImage, maxBytes);
if (media.kind !== "image") {
throw new Error(`Unsupported media type: ${media.kind}`);
}
const mimeType = media.contentType ?? "image/png";
const base64 = media.buffer.toString("base64");
const result = await runImagePrompt({
cfg: options?.config,
modelOverride,
prompt: promptRaw,
base64,
mimeType,
});
return {
content: [{ type: "text", text: result.text }],
details: {
model: `${result.provider}/${result.model}`,
image: resolvedImage,
},
};
},
};
}