fix: gate image tool and deepgram audio payload
This commit is contained in:
@@ -81,6 +81,7 @@ Docs: https://docs.clawd.bot
|
||||
- WhatsApp: scope self-chat response prefix; inject pending-only group history and clear after any processed message.
|
||||
- WhatsApp: include `linked` field in `describeAccount`.
|
||||
- Agents: drop unsigned Gemini tool calls and avoid JSON Schema `format` keyword collisions.
|
||||
- Agents: hide the image tool when the primary model already supports images.
|
||||
- Agents: avoid duplicate sends by replying with `NO_REPLY` after `message` tool sends.
|
||||
- Auth: inherit/merge sub-agent auth profiles from the main agent.
|
||||
- Gateway: resolve local auth for security probe and validate gateway token/password file modes. (#1011, #1022) — thanks @ivanrvpereira, @kkarimi.
|
||||
|
||||
@@ -102,6 +102,27 @@ describe("image tool implicit imageModel config", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("disables image tool when primary model already supports images", async () => {
|
||||
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-"));
|
||||
const cfg: ClawdbotConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "acme/vision-1" },
|
||||
imageModel: { primary: "openai/gpt-5-mini" },
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
acme: {
|
||||
models: [{ id: "vision-1", input: ["text", "image"] }],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toBeNull();
|
||||
expect(createImageTool({ config: cfg, agentDir })).toBeNull();
|
||||
});
|
||||
|
||||
it("sandboxes image paths like the read tool", async () => {
|
||||
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-sandbox-"));
|
||||
const agentDir = path.join(stateDir, "agent");
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import fsSync from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
@@ -19,7 +20,7 @@ import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
||||
import { minimaxUnderstandImage } from "../minimax-vlm.js";
|
||||
import { getApiKeyForModel, resolveEnvApiKey } from "../model-auth.js";
|
||||
import { runWithImageModelFallback } from "../model-fallback.js";
|
||||
import { parseModelRef } from "../model-selection.js";
|
||||
import { normalizeProviderId, resolveConfiguredModelRef } from "../model-selection.js";
|
||||
import { ensureClawdbotModelsJson } from "../models-config.js";
|
||||
import { assertSandboxPath } from "../sandbox-paths.js";
|
||||
import type { AnyAgentTool } from "./common.js";
|
||||
@@ -42,12 +43,15 @@ function resolveDefaultModelRef(cfg?: ClawdbotConfig): {
|
||||
provider: string;
|
||||
model: string;
|
||||
} {
|
||||
const modelConfig = cfg?.agents?.defaults?.model as { primary?: string } | string | undefined;
|
||||
const raw = typeof modelConfig === "string" ? modelConfig.trim() : modelConfig?.primary?.trim();
|
||||
const parsed =
|
||||
parseModelRef(raw ?? "", DEFAULT_PROVIDER) ??
|
||||
({ provider: DEFAULT_PROVIDER, model: DEFAULT_MODEL } as const);
|
||||
return { provider: parsed.provider, model: parsed.model };
|
||||
if (cfg) {
|
||||
const resolved = resolveConfiguredModelRef({
|
||||
cfg,
|
||||
defaultProvider: DEFAULT_PROVIDER,
|
||||
defaultModel: DEFAULT_MODEL,
|
||||
});
|
||||
return { provider: resolved.provider, model: resolved.model };
|
||||
}
|
||||
return { provider: DEFAULT_PROVIDER, model: DEFAULT_MODEL };
|
||||
}
|
||||
|
||||
function hasAuthForProvider(params: { provider: string; agentDir: string }): boolean {
|
||||
@@ -58,6 +62,77 @@ function hasAuthForProvider(params: { provider: string; agentDir: string }): boo
|
||||
return listProfilesForProvider(store, params.provider).length > 0;
|
||||
}
|
||||
|
||||
type ProviderModelEntry = {
|
||||
id?: string;
|
||||
input?: string[];
|
||||
};
|
||||
|
||||
type ProviderConfigLike = {
|
||||
models?: ProviderModelEntry[];
|
||||
};
|
||||
|
||||
function resolveProviderConfig(
|
||||
providers: Record<string, ProviderConfigLike> | undefined,
|
||||
provider: string,
|
||||
): ProviderConfigLike | null {
|
||||
if (!providers) return null;
|
||||
const normalized = normalizeProviderId(provider);
|
||||
for (const [key, value] of Object.entries(providers)) {
|
||||
if (normalizeProviderId(key) === normalized) return value;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveModelSupportsImages(params: {
|
||||
providerConfig: ProviderConfigLike | null;
|
||||
modelId: string;
|
||||
}): boolean | null {
|
||||
const models = params.providerConfig?.models;
|
||||
if (!Array.isArray(models) || models.length === 0) return null;
|
||||
const trimmedId = params.modelId.trim();
|
||||
if (!trimmedId) return null;
|
||||
const match =
|
||||
models.find((model) => String(model?.id ?? "").trim() === trimmedId) ??
|
||||
models.find(
|
||||
(model) =>
|
||||
String(model?.id ?? "")
|
||||
.trim()
|
||||
.toLowerCase() === trimmedId.toLowerCase(),
|
||||
);
|
||||
if (!match) return null;
|
||||
const input = Array.isArray(match.input) ? match.input : [];
|
||||
return input.includes("image");
|
||||
}
|
||||
|
||||
function resolvePrimaryModelSupportsImages(params: {
|
||||
cfg?: ClawdbotConfig;
|
||||
agentDir: string;
|
||||
}): boolean | null {
|
||||
if (!params.cfg) return null;
|
||||
const primary = resolveDefaultModelRef(params.cfg);
|
||||
const providerConfig = resolveProviderConfig(
|
||||
params.cfg.models?.providers as Record<string, ProviderConfigLike> | undefined,
|
||||
primary.provider,
|
||||
);
|
||||
const fromConfig = resolveModelSupportsImages({
|
||||
providerConfig,
|
||||
modelId: primary.model,
|
||||
});
|
||||
if (fromConfig !== null) return fromConfig;
|
||||
try {
|
||||
const modelsPath = path.join(params.agentDir, "models.json");
|
||||
const raw = fsSync.readFileSync(modelsPath, "utf8");
|
||||
const parsed = JSON.parse(raw) as { providers?: Record<string, ProviderConfigLike> };
|
||||
const provider = resolveProviderConfig(parsed.providers, primary.provider);
|
||||
return resolveModelSupportsImages({
|
||||
providerConfig: provider,
|
||||
modelId: primary.model,
|
||||
});
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the effective image model config for the `image` tool.
|
||||
*
|
||||
@@ -70,6 +145,11 @@ export function resolveImageModelConfigForTool(params: {
|
||||
cfg?: ClawdbotConfig;
|
||||
agentDir: string;
|
||||
}): ImageModelConfig | null {
|
||||
const primarySupportsImages = resolvePrimaryModelSupportsImages({
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
if (primarySupportsImages === true) return null;
|
||||
const explicit = coerceImageModelConfig(params.cfg);
|
||||
if (explicit.primary?.trim() || (explicit.fallbacks?.length ?? 0) > 0) {
|
||||
return explicit;
|
||||
|
||||
@@ -107,7 +107,12 @@ describe("directive behavior", () => {
|
||||
const storePath = path.join(home, "sessions.json");
|
||||
|
||||
await getReplyFromConfig(
|
||||
{ Body: "/model kimi-k2-0905-preview", From: "+1222", To: "+1222", CommandAuthorized: true },
|
||||
{
|
||||
Body: "/model kimi-k2-0905-preview",
|
||||
From: "+1222",
|
||||
To: "+1222",
|
||||
CommandAuthorized: true,
|
||||
},
|
||||
{},
|
||||
{
|
||||
agents: {
|
||||
|
||||
@@ -67,7 +67,9 @@ export function formatForLog(value: unknown): string {
|
||||
: JSON.stringify(value);
|
||||
if (!str) return "";
|
||||
const redacted = redactSensitiveText(str, WS_LOG_REDACT_OPTIONS);
|
||||
return redacted.length > LOG_VALUE_LIMIT ? `${redacted.slice(0, LOG_VALUE_LIMIT)}...` : redacted;
|
||||
return redacted.length > LOG_VALUE_LIMIT
|
||||
? `${redacted.slice(0, LOG_VALUE_LIMIT)}...`
|
||||
: redacted;
|
||||
} catch {
|
||||
return String(value);
|
||||
}
|
||||
|
||||
@@ -31,21 +31,17 @@ async function fetchSampleBuffer(url: string, timeoutMs: number): Promise<Buffer
|
||||
}
|
||||
|
||||
describeLive("deepgram live", () => {
|
||||
it(
|
||||
"transcribes sample audio",
|
||||
async () => {
|
||||
const buffer = await fetchSampleBuffer(SAMPLE_URL, 15000);
|
||||
const result = await transcribeDeepgramAudio({
|
||||
buffer,
|
||||
fileName: "sample.wav",
|
||||
mime: "audio/wav",
|
||||
apiKey: DEEPGRAM_KEY,
|
||||
model: DEEPGRAM_MODEL,
|
||||
baseUrl: DEEPGRAM_BASE_URL,
|
||||
timeoutMs: 20000,
|
||||
});
|
||||
expect(result.text.trim().length).toBeGreaterThan(0);
|
||||
},
|
||||
30000,
|
||||
);
|
||||
it("transcribes sample audio", async () => {
|
||||
const buffer = await fetchSampleBuffer(SAMPLE_URL, 15000);
|
||||
const result = await transcribeDeepgramAudio({
|
||||
buffer,
|
||||
fileName: "sample.wav",
|
||||
mime: "audio/wav",
|
||||
apiKey: DEEPGRAM_KEY,
|
||||
model: DEEPGRAM_MODEL,
|
||||
baseUrl: DEEPGRAM_BASE_URL,
|
||||
timeoutMs: 20000,
|
||||
});
|
||||
expect(result.text.trim().length).toBeGreaterThan(0);
|
||||
}, 30000);
|
||||
});
|
||||
|
||||
@@ -84,6 +84,6 @@ describe("transcribeDeepgramAudio", () => {
|
||||
expect(headers.get("authorization")).toBe("Token test-key");
|
||||
expect(headers.get("x-custom")).toBe("1");
|
||||
expect(headers.get("content-type")).toBe("audio/wav");
|
||||
expect(Buffer.isBuffer(seenInit?.body)).toBe(true);
|
||||
expect(seenInit?.body).toBeInstanceOf(Uint8Array);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -293,9 +293,9 @@ async function runProviderEntry(params: {
|
||||
const providerConfig = cfg.models?.providers?.[providerId];
|
||||
const baseUrl = entry.baseUrl ?? params.config?.baseUrl ?? providerConfig?.baseUrl;
|
||||
const mergedHeaders = {
|
||||
...(providerConfig?.headers ?? {}),
|
||||
...(params.config?.headers ?? {}),
|
||||
...(entry.headers ?? {}),
|
||||
...providerConfig?.headers,
|
||||
...params.config?.headers,
|
||||
...entry.headers,
|
||||
};
|
||||
const headers = Object.keys(mergedHeaders).length > 0 ? mergedHeaders : undefined;
|
||||
const providerQuery = resolveProviderQuery({
|
||||
|
||||
@@ -16,7 +16,10 @@ import {
|
||||
import { dispatchReplyWithBufferedBlockDispatcher } from "../../../auto-reply/reply/provider-dispatcher.js";
|
||||
import type { getReplyFromConfig } from "../../../auto-reply/reply.js";
|
||||
import type { ReplyPayload } from "../../../auto-reply/types.js";
|
||||
import { hasInlineCommandTokens, isControlCommandMessage } from "../../../auto-reply/command-detection.js";
|
||||
import {
|
||||
hasInlineCommandTokens,
|
||||
isControlCommandMessage,
|
||||
} from "../../../auto-reply/command-detection.js";
|
||||
import { finalizeInboundContext } from "../../../auto-reply/reply/inbound-context.js";
|
||||
import { toLocationContext } from "../../../channels/location.js";
|
||||
import type { loadConfig } from "../../../config/config.js";
|
||||
|
||||
Reference in New Issue
Block a user