fix: allow media-only sends

This commit is contained in:
Peter Steinberger
2026-01-16 03:15:07 +00:00
parent f449115ec5
commit a0d2a7232e
15 changed files with 200 additions and 9 deletions

View File

@@ -20,6 +20,7 @@
- Tools: normalize Slack/Discord message timestamps with `timestampMs`/`timestampUtc` while keeping raw provider fields.
- Docs: add Date & Time guide and update prompt/timezone configuration docs.
- Messages: debounce rapid inbound messages across channels with per-connector overrides. (#971) — thanks @juanpablodlc.
- Messages: allow media-only sends (CLI/tool) and show Telegram voice recording status for voice notes. (#957) — thanks @rdev.
- Auth/Status: keep auth profiles sticky per session (rotate on compaction/new), surface provider usage headers in `/status` and `clawdbot models status`, and update docs.
- Fix: guard model fallback against undefined provider/model values. (#954) — thanks @roshanasingh4.
- Fix: refactor session store updates, add chat.inject, and harden subagent cleanup flow. (#944) — thanks @tyler6204.

View File

@@ -44,7 +44,7 @@ Target formats (`--to`):
- `send`
- Channels: WhatsApp/Telegram/Discord/Slack/Signal/iMessage/MS Teams
- Required: `--to`, `--message`
- Required: `--to`, plus `--message` or `--media`
- Optional: `--media`, `--reply-to`, `--thread-id`, `--gif-playback`
- Telegram only: `--buttons` (requires `"inlineButtons"` in `channels.telegram.capabilities` or `channels.telegram.accounts.<id>.capabilities`)
- Telegram only: `--thread-id` (forum topic id)

View File

@@ -221,6 +221,43 @@ describe("handleTelegramAction", () => {
);
});
it("allows media-only messages without content", async () => {
const cfg = {
channels: { telegram: { botToken: "tok" } },
} as ClawdbotConfig;
await handleTelegramAction(
{
action: "sendMessage",
to: "123456",
mediaUrl: "https://example.com/note.ogg",
},
cfg,
);
expect(sendMessageTelegram).toHaveBeenCalledWith(
"123456",
"",
expect.objectContaining({
token: "tok",
mediaUrl: "https://example.com/note.ogg",
}),
);
});
it("requires content when no mediaUrl is provided", async () => {
const cfg = {
channels: { telegram: { botToken: "tok" } },
} as ClawdbotConfig;
await expect(
handleTelegramAction(
{
action: "sendMessage",
to: "123456",
},
cfg,
),
).rejects.toThrow(/content required/i);
});
it("respects sendMessage gating", async () => {
const cfg = {
channels: {

View File

@@ -130,8 +130,13 @@ export async function handleTelegramAction(
throw new Error("Telegram sendMessage is disabled.");
}
const to = readStringParam(params, "to", { required: true });
const content = readStringParam(params, "content", { required: true });
const mediaUrl = readStringParam(params, "mediaUrl");
// Allow content to be omitted when sending media-only (e.g., voice notes)
const content =
readStringParam(params, "content", {
required: !mediaUrl,
allowEmpty: true,
}) ?? "";
const buttons = readTelegramButtons(params);
if (buttons && !hasInlineButtonsCapability({ cfg, accountId: accountId ?? undefined })) {
throw new Error(

View File

@@ -19,6 +19,7 @@ const usageMocks = vi.hoisted(() => ({
providers: [],
}),
formatUsageSummaryLine: vi.fn().mockReturnValue("📊 Usage: Claude 80% left"),
formatUsageWindowSummary: vi.fn().mockReturnValue("Claude 80% left"),
resolveUsageProviderId: vi.fn((provider: string) => provider.split("/")[0]),
}));
@@ -97,6 +98,16 @@ describe("trigger handling", () => {
it("filters usage summary to the current model provider", async () => {
await withTempHome(async (home) => {
usageMocks.loadProviderUsageSummary.mockClear();
usageMocks.loadProviderUsageSummary.mockResolvedValue({
updatedAt: 0,
providers: [
{
provider: "anthropic",
displayName: "Anthropic",
windows: [],
},
],
});
const res = await getReplyFromConfig(
{

View File

@@ -42,7 +42,10 @@ export function applyReplyTagsToPayload(
export function isRenderablePayload(payload: ReplyPayload): boolean {
return Boolean(
payload.text || payload.mediaUrl || (payload.mediaUrls && payload.mediaUrls.length > 0),
payload.text ||
payload.mediaUrl ||
(payload.mediaUrls && payload.mediaUrls.length > 0) ||
payload.audioAsVoice,
);
}

View File

@@ -9,7 +9,7 @@ export function registerMessageSendCommand(message: Command, helpers: MessageCli
message
.command("send")
.description("Send a message")
.requiredOption("-m, --message <text>", "Message body"),
.option("-m, --message <text>", "Message body (required unless --media is set)"),
)
.option(
"--media <path-or-url>",

View File

@@ -4,8 +4,8 @@ export const createTestRegistry = (overrides: Partial<PluginRegistry> = {}): Plu
const base: PluginRegistry = {
plugins: [],
tools: [],
providers: [],
channels: [],
providers: [],
gatewayHandlers: {},
httpHandlers: [],
cliRegistrars: [],

View File

@@ -37,6 +37,37 @@ describe("runMessageAction context isolation", () => {
expect(result.kind).toBe("send");
});
it("allows media-only send when target matches current channel", async () => {
const result = await runMessageAction({
cfg: slackConfig,
action: "send",
params: {
channel: "slack",
to: "#C123",
media: "https://example.com/note.ogg",
},
toolContext: { currentChannelId: "C123" },
dryRun: true,
});
expect(result.kind).toBe("send");
});
it("requires message when no media hint is provided", async () => {
await expect(
runMessageAction({
cfg: slackConfig,
action: "send",
params: {
channel: "slack",
to: "#C123",
},
toolContext: { currentChannelId: "C123" },
dryRun: true,
}),
).rejects.toThrow(/message required/i);
});
it("blocks send when target differs from current channel", async () => {
await expect(
runMessageAction({

View File

@@ -208,10 +208,12 @@ export async function runMessageAction(
if (action === "send") {
const to = readStringParam(params, "to", { required: true });
// Allow message to be omitted when sending media-only (e.g., voice notes)
const mediaHint = readStringParam(params, "media", { trim: false });
let message = readStringParam(params, "message", {
required: true,
required: !mediaHint, // Only require message if no media hint
allowEmpty: true,
});
}) ?? "";
const parsed = parseReplyDirectives(message);
message = parsed.text;

View File

@@ -189,8 +189,8 @@ function createPluginRecord(params: {
enabled: params.enabled,
status: params.enabled ? "loaded" : "disabled",
toolNames: [],
providerIds: [],
channelIds: [],
providerIds: [],
gatewayMethods: [],
cliCommands: [],
services: [],

View File

@@ -98,6 +98,18 @@ export const buildTelegramMessageContext = async ({
}
};
const sendRecordVoice = async () => {
try {
await bot.api.sendChatAction(
chatId,
"record_voice",
buildTypingThreadParams(resolvedThreadId),
);
} catch (err) {
logVerbose(`telegram record_voice cue failed for chat ${chatId}: ${String(err)}`);
}
};
// DM access control (secure defaults): "pairing" (default) / "allowlist" / "open" / "disabled"
if (!isGroup) {
if (dmPolicy === "disabled") return null;
@@ -408,6 +420,7 @@ export const buildTelegramMessageContext = async ({
route,
skillFilter,
sendTyping,
sendRecordVoice,
ackReactionPromise,
reactionApi,
removeAckAfterReply,

View File

@@ -37,6 +37,7 @@ export const dispatchTelegramMessage = async ({
route,
skillFilter,
sendTyping,
sendRecordVoice,
ackReactionPromise,
reactionApi,
removeAckAfterReply,
@@ -144,6 +145,7 @@ export const dispatchTelegramMessage = async ({
replyToMode,
textLimit,
messageThreadId: resolvedThreadId,
onVoiceRecording: sendRecordVoice,
});
didSendReply = true;
},

View File

@@ -0,0 +1,77 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { Bot } from "grammy";
import { deliverReplies } from "./delivery.js";
const loadWebMedia = vi.fn();
vi.mock("../../web/media.js", () => ({
loadWebMedia: (...args: unknown[]) => loadWebMedia(...args),
}));
vi.mock("grammy", () => ({
InputFile: class {
constructor(
public buffer: Buffer,
public fileName?: string,
) {}
},
}));
describe("deliverReplies", () => {
beforeEach(() => {
loadWebMedia.mockReset();
});
it("skips audioAsVoice-only payloads without logging an error", async () => {
const runtime = { error: vi.fn() };
const bot = { api: {} } as unknown as Bot;
await deliverReplies({
replies: [{ audioAsVoice: true }],
chatId: "123",
token: "tok",
runtime,
bot,
replyToMode: "off",
textLimit: 4000,
});
expect(runtime.error).not.toHaveBeenCalled();
});
it("invokes onVoiceRecording before sending a voice note", async () => {
const events: string[] = [];
const runtime = { error: vi.fn() };
const sendVoice = vi.fn(async () => {
events.push("sendVoice");
return { message_id: 1, chat: { id: "123" } };
});
const bot = { api: { sendVoice } } as unknown as Bot;
const onVoiceRecording = vi.fn(async () => {
events.push("recordVoice");
});
loadWebMedia.mockResolvedValueOnce({
buffer: Buffer.from("voice"),
contentType: "audio/ogg",
fileName: "note.ogg",
});
await deliverReplies({
replies: [{ mediaUrl: "https://example.com/note.ogg", audioAsVoice: true }],
chatId: "123",
token: "tok",
runtime,
bot,
replyToMode: "off",
textLimit: 4000,
onVoiceRecording,
});
expect(onVoiceRecording).toHaveBeenCalledTimes(1);
expect(sendVoice).toHaveBeenCalledTimes(1);
expect(events).toEqual(["recordVoice", "sendVoice"]);
});
});

View File

@@ -25,12 +25,19 @@ export async function deliverReplies(params: {
replyToMode: ReplyToMode;
textLimit: number;
messageThreadId?: number;
/** Callback invoked before sending a voice message to switch typing indicator. */
onVoiceRecording?: () => Promise<void> | void;
}) {
const { replies, chatId, runtime, bot, replyToMode, textLimit, messageThreadId } = params;
const threadParams = buildTelegramThreadParams(messageThreadId);
let hasReplied = false;
for (const reply of replies) {
if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) {
const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0;
if (!reply?.text && !hasMedia) {
if (reply?.audioAsVoice) {
logVerbose("telegram reply has audioAsVoice without media/text; skipping");
continue;
}
runtime.error?.(danger("reply missing text/media"));
continue;
}
@@ -99,6 +106,8 @@ export async function deliverReplies(params: {
});
if (useVoice) {
// Voice message - displays as round playable bubble (opt-in via [[audio_as_voice]])
// Switch typing indicator to record_voice before sending.
await params.onVoiceRecording?.();
await bot.api.sendVoice(chatId, file, {
...mediaParams,
});