fix: gate image tool and deepgram audio payload
This commit is contained in:
@@ -81,6 +81,7 @@ Docs: https://docs.clawd.bot
|
|||||||
- WhatsApp: scope self-chat response prefix; inject pending-only group history and clear after any processed message.
|
- WhatsApp: scope self-chat response prefix; inject pending-only group history and clear after any processed message.
|
||||||
- WhatsApp: include `linked` field in `describeAccount`.
|
- WhatsApp: include `linked` field in `describeAccount`.
|
||||||
- Agents: drop unsigned Gemini tool calls and avoid JSON Schema `format` keyword collisions.
|
- Agents: drop unsigned Gemini tool calls and avoid JSON Schema `format` keyword collisions.
|
||||||
|
- Agents: hide the image tool when the primary model already supports images.
|
||||||
- Agents: avoid duplicate sends by replying with `NO_REPLY` after `message` tool sends.
|
- Agents: avoid duplicate sends by replying with `NO_REPLY` after `message` tool sends.
|
||||||
- Auth: inherit/merge sub-agent auth profiles from the main agent.
|
- Auth: inherit/merge sub-agent auth profiles from the main agent.
|
||||||
- Gateway: resolve local auth for security probe and validate gateway token/password file modes. (#1011, #1022) — thanks @ivanrvpereira, @kkarimi.
|
- Gateway: resolve local auth for security probe and validate gateway token/password file modes. (#1011, #1022) — thanks @ivanrvpereira, @kkarimi.
|
||||||
|
|||||||
@@ -102,6 +102,27 @@ describe("image tool implicit imageModel config", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("disables image tool when primary model already supports images", async () => {
|
||||||
|
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-"));
|
||||||
|
const cfg: ClawdbotConfig = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: { primary: "acme/vision-1" },
|
||||||
|
imageModel: { primary: "openai/gpt-5-mini" },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
models: {
|
||||||
|
providers: {
|
||||||
|
acme: {
|
||||||
|
models: [{ id: "vision-1", input: ["text", "image"] }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toBeNull();
|
||||||
|
expect(createImageTool({ config: cfg, agentDir })).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
it("sandboxes image paths like the read tool", async () => {
|
it("sandboxes image paths like the read tool", async () => {
|
||||||
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-sandbox-"));
|
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-sandbox-"));
|
||||||
const agentDir = path.join(stateDir, "agent");
|
const agentDir = path.join(stateDir, "agent");
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import fsSync from "node:fs";
|
||||||
import fs from "node:fs/promises";
|
import fs from "node:fs/promises";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
|
|
||||||
@@ -19,7 +20,7 @@ import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
|||||||
import { minimaxUnderstandImage } from "../minimax-vlm.js";
|
import { minimaxUnderstandImage } from "../minimax-vlm.js";
|
||||||
import { getApiKeyForModel, resolveEnvApiKey } from "../model-auth.js";
|
import { getApiKeyForModel, resolveEnvApiKey } from "../model-auth.js";
|
||||||
import { runWithImageModelFallback } from "../model-fallback.js";
|
import { runWithImageModelFallback } from "../model-fallback.js";
|
||||||
import { parseModelRef } from "../model-selection.js";
|
import { normalizeProviderId, resolveConfiguredModelRef } from "../model-selection.js";
|
||||||
import { ensureClawdbotModelsJson } from "../models-config.js";
|
import { ensureClawdbotModelsJson } from "../models-config.js";
|
||||||
import { assertSandboxPath } from "../sandbox-paths.js";
|
import { assertSandboxPath } from "../sandbox-paths.js";
|
||||||
import type { AnyAgentTool } from "./common.js";
|
import type { AnyAgentTool } from "./common.js";
|
||||||
@@ -42,12 +43,15 @@ function resolveDefaultModelRef(cfg?: ClawdbotConfig): {
|
|||||||
provider: string;
|
provider: string;
|
||||||
model: string;
|
model: string;
|
||||||
} {
|
} {
|
||||||
const modelConfig = cfg?.agents?.defaults?.model as { primary?: string } | string | undefined;
|
if (cfg) {
|
||||||
const raw = typeof modelConfig === "string" ? modelConfig.trim() : modelConfig?.primary?.trim();
|
const resolved = resolveConfiguredModelRef({
|
||||||
const parsed =
|
cfg,
|
||||||
parseModelRef(raw ?? "", DEFAULT_PROVIDER) ??
|
defaultProvider: DEFAULT_PROVIDER,
|
||||||
({ provider: DEFAULT_PROVIDER, model: DEFAULT_MODEL } as const);
|
defaultModel: DEFAULT_MODEL,
|
||||||
return { provider: parsed.provider, model: parsed.model };
|
});
|
||||||
|
return { provider: resolved.provider, model: resolved.model };
|
||||||
|
}
|
||||||
|
return { provider: DEFAULT_PROVIDER, model: DEFAULT_MODEL };
|
||||||
}
|
}
|
||||||
|
|
||||||
function hasAuthForProvider(params: { provider: string; agentDir: string }): boolean {
|
function hasAuthForProvider(params: { provider: string; agentDir: string }): boolean {
|
||||||
@@ -58,6 +62,77 @@ function hasAuthForProvider(params: { provider: string; agentDir: string }): boo
|
|||||||
return listProfilesForProvider(store, params.provider).length > 0;
|
return listProfilesForProvider(store, params.provider).length > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ProviderModelEntry = {
|
||||||
|
id?: string;
|
||||||
|
input?: string[];
|
||||||
|
};
|
||||||
|
|
||||||
|
type ProviderConfigLike = {
|
||||||
|
models?: ProviderModelEntry[];
|
||||||
|
};
|
||||||
|
|
||||||
|
function resolveProviderConfig(
|
||||||
|
providers: Record<string, ProviderConfigLike> | undefined,
|
||||||
|
provider: string,
|
||||||
|
): ProviderConfigLike | null {
|
||||||
|
if (!providers) return null;
|
||||||
|
const normalized = normalizeProviderId(provider);
|
||||||
|
for (const [key, value] of Object.entries(providers)) {
|
||||||
|
if (normalizeProviderId(key) === normalized) return value;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveModelSupportsImages(params: {
|
||||||
|
providerConfig: ProviderConfigLike | null;
|
||||||
|
modelId: string;
|
||||||
|
}): boolean | null {
|
||||||
|
const models = params.providerConfig?.models;
|
||||||
|
if (!Array.isArray(models) || models.length === 0) return null;
|
||||||
|
const trimmedId = params.modelId.trim();
|
||||||
|
if (!trimmedId) return null;
|
||||||
|
const match =
|
||||||
|
models.find((model) => String(model?.id ?? "").trim() === trimmedId) ??
|
||||||
|
models.find(
|
||||||
|
(model) =>
|
||||||
|
String(model?.id ?? "")
|
||||||
|
.trim()
|
||||||
|
.toLowerCase() === trimmedId.toLowerCase(),
|
||||||
|
);
|
||||||
|
if (!match) return null;
|
||||||
|
const input = Array.isArray(match.input) ? match.input : [];
|
||||||
|
return input.includes("image");
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolvePrimaryModelSupportsImages(params: {
|
||||||
|
cfg?: ClawdbotConfig;
|
||||||
|
agentDir: string;
|
||||||
|
}): boolean | null {
|
||||||
|
if (!params.cfg) return null;
|
||||||
|
const primary = resolveDefaultModelRef(params.cfg);
|
||||||
|
const providerConfig = resolveProviderConfig(
|
||||||
|
params.cfg.models?.providers as Record<string, ProviderConfigLike> | undefined,
|
||||||
|
primary.provider,
|
||||||
|
);
|
||||||
|
const fromConfig = resolveModelSupportsImages({
|
||||||
|
providerConfig,
|
||||||
|
modelId: primary.model,
|
||||||
|
});
|
||||||
|
if (fromConfig !== null) return fromConfig;
|
||||||
|
try {
|
||||||
|
const modelsPath = path.join(params.agentDir, "models.json");
|
||||||
|
const raw = fsSync.readFileSync(modelsPath, "utf8");
|
||||||
|
const parsed = JSON.parse(raw) as { providers?: Record<string, ProviderConfigLike> };
|
||||||
|
const provider = resolveProviderConfig(parsed.providers, primary.provider);
|
||||||
|
return resolveModelSupportsImages({
|
||||||
|
providerConfig: provider,
|
||||||
|
modelId: primary.model,
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resolve the effective image model config for the `image` tool.
|
* Resolve the effective image model config for the `image` tool.
|
||||||
*
|
*
|
||||||
@@ -70,6 +145,11 @@ export function resolveImageModelConfigForTool(params: {
|
|||||||
cfg?: ClawdbotConfig;
|
cfg?: ClawdbotConfig;
|
||||||
agentDir: string;
|
agentDir: string;
|
||||||
}): ImageModelConfig | null {
|
}): ImageModelConfig | null {
|
||||||
|
const primarySupportsImages = resolvePrimaryModelSupportsImages({
|
||||||
|
cfg: params.cfg,
|
||||||
|
agentDir: params.agentDir,
|
||||||
|
});
|
||||||
|
if (primarySupportsImages === true) return null;
|
||||||
const explicit = coerceImageModelConfig(params.cfg);
|
const explicit = coerceImageModelConfig(params.cfg);
|
||||||
if (explicit.primary?.trim() || (explicit.fallbacks?.length ?? 0) > 0) {
|
if (explicit.primary?.trim() || (explicit.fallbacks?.length ?? 0) > 0) {
|
||||||
return explicit;
|
return explicit;
|
||||||
|
|||||||
@@ -107,7 +107,12 @@ describe("directive behavior", () => {
|
|||||||
const storePath = path.join(home, "sessions.json");
|
const storePath = path.join(home, "sessions.json");
|
||||||
|
|
||||||
await getReplyFromConfig(
|
await getReplyFromConfig(
|
||||||
{ Body: "/model kimi-k2-0905-preview", From: "+1222", To: "+1222", CommandAuthorized: true },
|
{
|
||||||
|
Body: "/model kimi-k2-0905-preview",
|
||||||
|
From: "+1222",
|
||||||
|
To: "+1222",
|
||||||
|
CommandAuthorized: true,
|
||||||
|
},
|
||||||
{},
|
{},
|
||||||
{
|
{
|
||||||
agents: {
|
agents: {
|
||||||
|
|||||||
@@ -67,7 +67,9 @@ export function formatForLog(value: unknown): string {
|
|||||||
: JSON.stringify(value);
|
: JSON.stringify(value);
|
||||||
if (!str) return "";
|
if (!str) return "";
|
||||||
const redacted = redactSensitiveText(str, WS_LOG_REDACT_OPTIONS);
|
const redacted = redactSensitiveText(str, WS_LOG_REDACT_OPTIONS);
|
||||||
return redacted.length > LOG_VALUE_LIMIT ? `${redacted.slice(0, LOG_VALUE_LIMIT)}...` : redacted;
|
return redacted.length > LOG_VALUE_LIMIT
|
||||||
|
? `${redacted.slice(0, LOG_VALUE_LIMIT)}...`
|
||||||
|
: redacted;
|
||||||
} catch {
|
} catch {
|
||||||
return String(value);
|
return String(value);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,9 +31,7 @@ async function fetchSampleBuffer(url: string, timeoutMs: number): Promise<Buffer
|
|||||||
}
|
}
|
||||||
|
|
||||||
describeLive("deepgram live", () => {
|
describeLive("deepgram live", () => {
|
||||||
it(
|
it("transcribes sample audio", async () => {
|
||||||
"transcribes sample audio",
|
|
||||||
async () => {
|
|
||||||
const buffer = await fetchSampleBuffer(SAMPLE_URL, 15000);
|
const buffer = await fetchSampleBuffer(SAMPLE_URL, 15000);
|
||||||
const result = await transcribeDeepgramAudio({
|
const result = await transcribeDeepgramAudio({
|
||||||
buffer,
|
buffer,
|
||||||
@@ -45,7 +43,5 @@ describeLive("deepgram live", () => {
|
|||||||
timeoutMs: 20000,
|
timeoutMs: 20000,
|
||||||
});
|
});
|
||||||
expect(result.text.trim().length).toBeGreaterThan(0);
|
expect(result.text.trim().length).toBeGreaterThan(0);
|
||||||
},
|
}, 30000);
|
||||||
30000,
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -84,6 +84,6 @@ describe("transcribeDeepgramAudio", () => {
|
|||||||
expect(headers.get("authorization")).toBe("Token test-key");
|
expect(headers.get("authorization")).toBe("Token test-key");
|
||||||
expect(headers.get("x-custom")).toBe("1");
|
expect(headers.get("x-custom")).toBe("1");
|
||||||
expect(headers.get("content-type")).toBe("audio/wav");
|
expect(headers.get("content-type")).toBe("audio/wav");
|
||||||
expect(Buffer.isBuffer(seenInit?.body)).toBe(true);
|
expect(seenInit?.body).toBeInstanceOf(Uint8Array);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -293,9 +293,9 @@ async function runProviderEntry(params: {
|
|||||||
const providerConfig = cfg.models?.providers?.[providerId];
|
const providerConfig = cfg.models?.providers?.[providerId];
|
||||||
const baseUrl = entry.baseUrl ?? params.config?.baseUrl ?? providerConfig?.baseUrl;
|
const baseUrl = entry.baseUrl ?? params.config?.baseUrl ?? providerConfig?.baseUrl;
|
||||||
const mergedHeaders = {
|
const mergedHeaders = {
|
||||||
...(providerConfig?.headers ?? {}),
|
...providerConfig?.headers,
|
||||||
...(params.config?.headers ?? {}),
|
...params.config?.headers,
|
||||||
...(entry.headers ?? {}),
|
...entry.headers,
|
||||||
};
|
};
|
||||||
const headers = Object.keys(mergedHeaders).length > 0 ? mergedHeaders : undefined;
|
const headers = Object.keys(mergedHeaders).length > 0 ? mergedHeaders : undefined;
|
||||||
const providerQuery = resolveProviderQuery({
|
const providerQuery = resolveProviderQuery({
|
||||||
|
|||||||
@@ -16,7 +16,10 @@ import {
|
|||||||
import { dispatchReplyWithBufferedBlockDispatcher } from "../../../auto-reply/reply/provider-dispatcher.js";
|
import { dispatchReplyWithBufferedBlockDispatcher } from "../../../auto-reply/reply/provider-dispatcher.js";
|
||||||
import type { getReplyFromConfig } from "../../../auto-reply/reply.js";
|
import type { getReplyFromConfig } from "../../../auto-reply/reply.js";
|
||||||
import type { ReplyPayload } from "../../../auto-reply/types.js";
|
import type { ReplyPayload } from "../../../auto-reply/types.js";
|
||||||
import { hasInlineCommandTokens, isControlCommandMessage } from "../../../auto-reply/command-detection.js";
|
import {
|
||||||
|
hasInlineCommandTokens,
|
||||||
|
isControlCommandMessage,
|
||||||
|
} from "../../../auto-reply/command-detection.js";
|
||||||
import { finalizeInboundContext } from "../../../auto-reply/reply/inbound-context.js";
|
import { finalizeInboundContext } from "../../../auto-reply/reply/inbound-context.js";
|
||||||
import { toLocationContext } from "../../../channels/location.js";
|
import { toLocationContext } from "../../../channels/location.js";
|
||||||
import type { loadConfig } from "../../../config/config.js";
|
import type { loadConfig } from "../../../config/config.js";
|
||||||
|
|||||||
Reference in New Issue
Block a user