fix: gate image tool and deepgram audio payload

This commit is contained in:
Peter Steinberger
2026-01-17 09:33:53 +00:00
parent d66bc65ca6
commit b6ea5895b6
9 changed files with 139 additions and 31 deletions

View File

@@ -81,6 +81,7 @@ Docs: https://docs.clawd.bot
- WhatsApp: scope self-chat response prefix; inject pending-only group history and clear after any processed message.
- WhatsApp: include `linked` field in `describeAccount`.
- Agents: drop unsigned Gemini tool calls and avoid JSON Schema `format` keyword collisions.
- Agents: hide the image tool when the primary model already supports images.
- Agents: avoid duplicate sends by replying with `NO_REPLY` after `message` tool sends.
- Auth: inherit/merge sub-agent auth profiles from the main agent.
- Gateway: resolve local auth for security probe and validate gateway token/password file modes. (#1011, #1022) — thanks @ivanrvpereira, @kkarimi.

View File

@@ -102,6 +102,27 @@ describe("image tool implicit imageModel config", () => {
});
});
it("disables image tool when primary model already supports images", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-"));
const cfg: ClawdbotConfig = {
agents: {
defaults: {
model: { primary: "acme/vision-1" },
imageModel: { primary: "openai/gpt-5-mini" },
},
},
models: {
providers: {
acme: {
models: [{ id: "vision-1", input: ["text", "image"] }],
},
},
},
};
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toBeNull();
expect(createImageTool({ config: cfg, agentDir })).toBeNull();
});
it("sandboxes image paths like the read tool", async () => {
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-sandbox-"));
const agentDir = path.join(stateDir, "agent");

View File

@@ -1,3 +1,4 @@
import fsSync from "node:fs";
import fs from "node:fs/promises";
import path from "node:path";
@@ -19,7 +20,7 @@ import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
import { minimaxUnderstandImage } from "../minimax-vlm.js";
import { getApiKeyForModel, resolveEnvApiKey } from "../model-auth.js";
import { runWithImageModelFallback } from "../model-fallback.js";
import { parseModelRef } from "../model-selection.js";
import { normalizeProviderId, resolveConfiguredModelRef } from "../model-selection.js";
import { ensureClawdbotModelsJson } from "../models-config.js";
import { assertSandboxPath } from "../sandbox-paths.js";
import type { AnyAgentTool } from "./common.js";
@@ -42,12 +43,15 @@ function resolveDefaultModelRef(cfg?: ClawdbotConfig): {
provider: string;
model: string;
} {
const modelConfig = cfg?.agents?.defaults?.model as { primary?: string } | string | undefined;
const raw = typeof modelConfig === "string" ? modelConfig.trim() : modelConfig?.primary?.trim();
const parsed =
parseModelRef(raw ?? "", DEFAULT_PROVIDER) ??
({ provider: DEFAULT_PROVIDER, model: DEFAULT_MODEL } as const);
return { provider: parsed.provider, model: parsed.model };
if (cfg) {
const resolved = resolveConfiguredModelRef({
cfg,
defaultProvider: DEFAULT_PROVIDER,
defaultModel: DEFAULT_MODEL,
});
return { provider: resolved.provider, model: resolved.model };
}
return { provider: DEFAULT_PROVIDER, model: DEFAULT_MODEL };
}
function hasAuthForProvider(params: { provider: string; agentDir: string }): boolean {
@@ -58,6 +62,77 @@ function hasAuthForProvider(params: { provider: string; agentDir: string }): boo
return listProfilesForProvider(store, params.provider).length > 0;
}
type ProviderModelEntry = {
id?: string;
input?: string[];
};
type ProviderConfigLike = {
models?: ProviderModelEntry[];
};
function resolveProviderConfig(
providers: Record<string, ProviderConfigLike> | undefined,
provider: string,
): ProviderConfigLike | null {
if (!providers) return null;
const normalized = normalizeProviderId(provider);
for (const [key, value] of Object.entries(providers)) {
if (normalizeProviderId(key) === normalized) return value;
}
return null;
}
function resolveModelSupportsImages(params: {
providerConfig: ProviderConfigLike | null;
modelId: string;
}): boolean | null {
const models = params.providerConfig?.models;
if (!Array.isArray(models) || models.length === 0) return null;
const trimmedId = params.modelId.trim();
if (!trimmedId) return null;
const match =
models.find((model) => String(model?.id ?? "").trim() === trimmedId) ??
models.find(
(model) =>
String(model?.id ?? "")
.trim()
.toLowerCase() === trimmedId.toLowerCase(),
);
if (!match) return null;
const input = Array.isArray(match.input) ? match.input : [];
return input.includes("image");
}
function resolvePrimaryModelSupportsImages(params: {
cfg?: ClawdbotConfig;
agentDir: string;
}): boolean | null {
if (!params.cfg) return null;
const primary = resolveDefaultModelRef(params.cfg);
const providerConfig = resolveProviderConfig(
params.cfg.models?.providers as Record<string, ProviderConfigLike> | undefined,
primary.provider,
);
const fromConfig = resolveModelSupportsImages({
providerConfig,
modelId: primary.model,
});
if (fromConfig !== null) return fromConfig;
try {
const modelsPath = path.join(params.agentDir, "models.json");
const raw = fsSync.readFileSync(modelsPath, "utf8");
const parsed = JSON.parse(raw) as { providers?: Record<string, ProviderConfigLike> };
const provider = resolveProviderConfig(parsed.providers, primary.provider);
return resolveModelSupportsImages({
providerConfig: provider,
modelId: primary.model,
});
} catch {
return null;
}
}
/**
* Resolve the effective image model config for the `image` tool.
*
@@ -70,6 +145,11 @@ export function resolveImageModelConfigForTool(params: {
cfg?: ClawdbotConfig;
agentDir: string;
}): ImageModelConfig | null {
const primarySupportsImages = resolvePrimaryModelSupportsImages({
cfg: params.cfg,
agentDir: params.agentDir,
});
if (primarySupportsImages === true) return null;
const explicit = coerceImageModelConfig(params.cfg);
if (explicit.primary?.trim() || (explicit.fallbacks?.length ?? 0) > 0) {
return explicit;

View File

@@ -107,7 +107,12 @@ describe("directive behavior", () => {
const storePath = path.join(home, "sessions.json");
await getReplyFromConfig(
{ Body: "/model kimi-k2-0905-preview", From: "+1222", To: "+1222", CommandAuthorized: true },
{
Body: "/model kimi-k2-0905-preview",
From: "+1222",
To: "+1222",
CommandAuthorized: true,
},
{},
{
agents: {

View File

@@ -67,7 +67,9 @@ export function formatForLog(value: unknown): string {
: JSON.stringify(value);
if (!str) return "";
const redacted = redactSensitiveText(str, WS_LOG_REDACT_OPTIONS);
return redacted.length > LOG_VALUE_LIMIT ? `${redacted.slice(0, LOG_VALUE_LIMIT)}...` : redacted;
return redacted.length > LOG_VALUE_LIMIT
? `${redacted.slice(0, LOG_VALUE_LIMIT)}...`
: redacted;
} catch {
return String(value);
}

View File

@@ -31,21 +31,17 @@ async function fetchSampleBuffer(url: string, timeoutMs: number): Promise<Buffer
}
describeLive("deepgram live", () => {
it(
"transcribes sample audio",
async () => {
const buffer = await fetchSampleBuffer(SAMPLE_URL, 15000);
const result = await transcribeDeepgramAudio({
buffer,
fileName: "sample.wav",
mime: "audio/wav",
apiKey: DEEPGRAM_KEY,
model: DEEPGRAM_MODEL,
baseUrl: DEEPGRAM_BASE_URL,
timeoutMs: 20000,
});
expect(result.text.trim().length).toBeGreaterThan(0);
},
30000,
);
it("transcribes sample audio", async () => {
const buffer = await fetchSampleBuffer(SAMPLE_URL, 15000);
const result = await transcribeDeepgramAudio({
buffer,
fileName: "sample.wav",
mime: "audio/wav",
apiKey: DEEPGRAM_KEY,
model: DEEPGRAM_MODEL,
baseUrl: DEEPGRAM_BASE_URL,
timeoutMs: 20000,
});
expect(result.text.trim().length).toBeGreaterThan(0);
}, 30000);
});

View File

@@ -84,6 +84,6 @@ describe("transcribeDeepgramAudio", () => {
expect(headers.get("authorization")).toBe("Token test-key");
expect(headers.get("x-custom")).toBe("1");
expect(headers.get("content-type")).toBe("audio/wav");
expect(Buffer.isBuffer(seenInit?.body)).toBe(true);
expect(seenInit?.body).toBeInstanceOf(Uint8Array);
});
});

View File

@@ -293,9 +293,9 @@ async function runProviderEntry(params: {
const providerConfig = cfg.models?.providers?.[providerId];
const baseUrl = entry.baseUrl ?? params.config?.baseUrl ?? providerConfig?.baseUrl;
const mergedHeaders = {
...(providerConfig?.headers ?? {}),
...(params.config?.headers ?? {}),
...(entry.headers ?? {}),
...providerConfig?.headers,
...params.config?.headers,
...entry.headers,
};
const headers = Object.keys(mergedHeaders).length > 0 ? mergedHeaders : undefined;
const providerQuery = resolveProviderQuery({

View File

@@ -16,7 +16,10 @@ import {
import { dispatchReplyWithBufferedBlockDispatcher } from "../../../auto-reply/reply/provider-dispatcher.js";
import type { getReplyFromConfig } from "../../../auto-reply/reply.js";
import type { ReplyPayload } from "../../../auto-reply/types.js";
import { hasInlineCommandTokens, isControlCommandMessage } from "../../../auto-reply/command-detection.js";
import {
hasInlineCommandTokens,
isControlCommandMessage,
} from "../../../auto-reply/command-detection.js";
import { finalizeInboundContext } from "../../../auto-reply/reply/inbound-context.js";
import { toLocationContext } from "../../../channels/location.js";
import type { loadConfig } from "../../../config/config.js";