fix: gate image tool and deepgram audio payload
This commit is contained in:
@@ -102,6 +102,27 @@ describe("image tool implicit imageModel config", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("disables image tool when primary model already supports images", async () => {
|
||||
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-"));
|
||||
const cfg: ClawdbotConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "acme/vision-1" },
|
||||
imageModel: { primary: "openai/gpt-5-mini" },
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
acme: {
|
||||
models: [{ id: "vision-1", input: ["text", "image"] }],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toBeNull();
|
||||
expect(createImageTool({ config: cfg, agentDir })).toBeNull();
|
||||
});
|
||||
|
||||
it("sandboxes image paths like the read tool", async () => {
|
||||
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-sandbox-"));
|
||||
const agentDir = path.join(stateDir, "agent");
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import fsSync from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
@@ -19,7 +20,7 @@ import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
||||
import { minimaxUnderstandImage } from "../minimax-vlm.js";
|
||||
import { getApiKeyForModel, resolveEnvApiKey } from "../model-auth.js";
|
||||
import { runWithImageModelFallback } from "../model-fallback.js";
|
||||
import { parseModelRef } from "../model-selection.js";
|
||||
import { normalizeProviderId, resolveConfiguredModelRef } from "../model-selection.js";
|
||||
import { ensureClawdbotModelsJson } from "../models-config.js";
|
||||
import { assertSandboxPath } from "../sandbox-paths.js";
|
||||
import type { AnyAgentTool } from "./common.js";
|
||||
@@ -42,12 +43,15 @@ function resolveDefaultModelRef(cfg?: ClawdbotConfig): {
|
||||
provider: string;
|
||||
model: string;
|
||||
} {
|
||||
const modelConfig = cfg?.agents?.defaults?.model as { primary?: string } | string | undefined;
|
||||
const raw = typeof modelConfig === "string" ? modelConfig.trim() : modelConfig?.primary?.trim();
|
||||
const parsed =
|
||||
parseModelRef(raw ?? "", DEFAULT_PROVIDER) ??
|
||||
({ provider: DEFAULT_PROVIDER, model: DEFAULT_MODEL } as const);
|
||||
return { provider: parsed.provider, model: parsed.model };
|
||||
if (cfg) {
|
||||
const resolved = resolveConfiguredModelRef({
|
||||
cfg,
|
||||
defaultProvider: DEFAULT_PROVIDER,
|
||||
defaultModel: DEFAULT_MODEL,
|
||||
});
|
||||
return { provider: resolved.provider, model: resolved.model };
|
||||
}
|
||||
return { provider: DEFAULT_PROVIDER, model: DEFAULT_MODEL };
|
||||
}
|
||||
|
||||
function hasAuthForProvider(params: { provider: string; agentDir: string }): boolean {
|
||||
@@ -58,6 +62,77 @@ function hasAuthForProvider(params: { provider: string; agentDir: string }): boo
|
||||
return listProfilesForProvider(store, params.provider).length > 0;
|
||||
}
|
||||
|
||||
type ProviderModelEntry = {
|
||||
id?: string;
|
||||
input?: string[];
|
||||
};
|
||||
|
||||
type ProviderConfigLike = {
|
||||
models?: ProviderModelEntry[];
|
||||
};
|
||||
|
||||
function resolveProviderConfig(
|
||||
providers: Record<string, ProviderConfigLike> | undefined,
|
||||
provider: string,
|
||||
): ProviderConfigLike | null {
|
||||
if (!providers) return null;
|
||||
const normalized = normalizeProviderId(provider);
|
||||
for (const [key, value] of Object.entries(providers)) {
|
||||
if (normalizeProviderId(key) === normalized) return value;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveModelSupportsImages(params: {
|
||||
providerConfig: ProviderConfigLike | null;
|
||||
modelId: string;
|
||||
}): boolean | null {
|
||||
const models = params.providerConfig?.models;
|
||||
if (!Array.isArray(models) || models.length === 0) return null;
|
||||
const trimmedId = params.modelId.trim();
|
||||
if (!trimmedId) return null;
|
||||
const match =
|
||||
models.find((model) => String(model?.id ?? "").trim() === trimmedId) ??
|
||||
models.find(
|
||||
(model) =>
|
||||
String(model?.id ?? "")
|
||||
.trim()
|
||||
.toLowerCase() === trimmedId.toLowerCase(),
|
||||
);
|
||||
if (!match) return null;
|
||||
const input = Array.isArray(match.input) ? match.input : [];
|
||||
return input.includes("image");
|
||||
}
|
||||
|
||||
function resolvePrimaryModelSupportsImages(params: {
|
||||
cfg?: ClawdbotConfig;
|
||||
agentDir: string;
|
||||
}): boolean | null {
|
||||
if (!params.cfg) return null;
|
||||
const primary = resolveDefaultModelRef(params.cfg);
|
||||
const providerConfig = resolveProviderConfig(
|
||||
params.cfg.models?.providers as Record<string, ProviderConfigLike> | undefined,
|
||||
primary.provider,
|
||||
);
|
||||
const fromConfig = resolveModelSupportsImages({
|
||||
providerConfig,
|
||||
modelId: primary.model,
|
||||
});
|
||||
if (fromConfig !== null) return fromConfig;
|
||||
try {
|
||||
const modelsPath = path.join(params.agentDir, "models.json");
|
||||
const raw = fsSync.readFileSync(modelsPath, "utf8");
|
||||
const parsed = JSON.parse(raw) as { providers?: Record<string, ProviderConfigLike> };
|
||||
const provider = resolveProviderConfig(parsed.providers, primary.provider);
|
||||
return resolveModelSupportsImages({
|
||||
providerConfig: provider,
|
||||
modelId: primary.model,
|
||||
});
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the effective image model config for the `image` tool.
|
||||
*
|
||||
@@ -70,6 +145,11 @@ export function resolveImageModelConfigForTool(params: {
|
||||
cfg?: ClawdbotConfig;
|
||||
agentDir: string;
|
||||
}): ImageModelConfig | null {
|
||||
const primarySupportsImages = resolvePrimaryModelSupportsImages({
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
if (primarySupportsImages === true) return null;
|
||||
const explicit = coerceImageModelConfig(params.cfg);
|
||||
if (explicit.primary?.trim() || (explicit.fallbacks?.length ?? 0) > 0) {
|
||||
return explicit;
|
||||
|
||||
Reference in New Issue
Block a user