fix: allow media-only sends
This commit is contained in:
@@ -20,6 +20,7 @@
|
||||
- Tools: normalize Slack/Discord message timestamps with `timestampMs`/`timestampUtc` while keeping raw provider fields.
|
||||
- Docs: add Date & Time guide and update prompt/timezone configuration docs.
|
||||
- Messages: debounce rapid inbound messages across channels with per-connector overrides. (#971) — thanks @juanpablodlc.
|
||||
- Messages: allow media-only sends (CLI/tool) and show Telegram voice recording status for voice notes. (#957) — thanks @rdev.
|
||||
- Auth/Status: keep auth profiles sticky per session (rotate on compaction/new), surface provider usage headers in `/status` and `clawdbot models status`, and update docs.
|
||||
- Fix: guard model fallback against undefined provider/model values. (#954) — thanks @roshanasingh4.
|
||||
- Fix: refactor session store updates, add chat.inject, and harden subagent cleanup flow. (#944) — thanks @tyler6204.
|
||||
|
||||
@@ -44,7 +44,7 @@ Target formats (`--to`):
|
||||
|
||||
- `send`
|
||||
- Channels: WhatsApp/Telegram/Discord/Slack/Signal/iMessage/MS Teams
|
||||
- Required: `--to`, `--message`
|
||||
- Required: `--to`, plus `--message` or `--media`
|
||||
- Optional: `--media`, `--reply-to`, `--thread-id`, `--gif-playback`
|
||||
- Telegram only: `--buttons` (requires `"inlineButtons"` in `channels.telegram.capabilities` or `channels.telegram.accounts.<id>.capabilities`)
|
||||
- Telegram only: `--thread-id` (forum topic id)
|
||||
|
||||
@@ -221,6 +221,43 @@ describe("handleTelegramAction", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("allows media-only messages without content", async () => {
|
||||
const cfg = {
|
||||
channels: { telegram: { botToken: "tok" } },
|
||||
} as ClawdbotConfig;
|
||||
await handleTelegramAction(
|
||||
{
|
||||
action: "sendMessage",
|
||||
to: "123456",
|
||||
mediaUrl: "https://example.com/note.ogg",
|
||||
},
|
||||
cfg,
|
||||
);
|
||||
expect(sendMessageTelegram).toHaveBeenCalledWith(
|
||||
"123456",
|
||||
"",
|
||||
expect.objectContaining({
|
||||
token: "tok",
|
||||
mediaUrl: "https://example.com/note.ogg",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("requires content when no mediaUrl is provided", async () => {
|
||||
const cfg = {
|
||||
channels: { telegram: { botToken: "tok" } },
|
||||
} as ClawdbotConfig;
|
||||
await expect(
|
||||
handleTelegramAction(
|
||||
{
|
||||
action: "sendMessage",
|
||||
to: "123456",
|
||||
},
|
||||
cfg,
|
||||
),
|
||||
).rejects.toThrow(/content required/i);
|
||||
});
|
||||
|
||||
it("respects sendMessage gating", async () => {
|
||||
const cfg = {
|
||||
channels: {
|
||||
|
||||
@@ -130,8 +130,13 @@ export async function handleTelegramAction(
|
||||
throw new Error("Telegram sendMessage is disabled.");
|
||||
}
|
||||
const to = readStringParam(params, "to", { required: true });
|
||||
const content = readStringParam(params, "content", { required: true });
|
||||
const mediaUrl = readStringParam(params, "mediaUrl");
|
||||
// Allow content to be omitted when sending media-only (e.g., voice notes)
|
||||
const content =
|
||||
readStringParam(params, "content", {
|
||||
required: !mediaUrl,
|
||||
allowEmpty: true,
|
||||
}) ?? "";
|
||||
const buttons = readTelegramButtons(params);
|
||||
if (buttons && !hasInlineButtonsCapability({ cfg, accountId: accountId ?? undefined })) {
|
||||
throw new Error(
|
||||
|
||||
@@ -19,6 +19,7 @@ const usageMocks = vi.hoisted(() => ({
|
||||
providers: [],
|
||||
}),
|
||||
formatUsageSummaryLine: vi.fn().mockReturnValue("📊 Usage: Claude 80% left"),
|
||||
formatUsageWindowSummary: vi.fn().mockReturnValue("Claude 80% left"),
|
||||
resolveUsageProviderId: vi.fn((provider: string) => provider.split("/")[0]),
|
||||
}));
|
||||
|
||||
@@ -97,6 +98,16 @@ describe("trigger handling", () => {
|
||||
it("filters usage summary to the current model provider", async () => {
|
||||
await withTempHome(async (home) => {
|
||||
usageMocks.loadProviderUsageSummary.mockClear();
|
||||
usageMocks.loadProviderUsageSummary.mockResolvedValue({
|
||||
updatedAt: 0,
|
||||
providers: [
|
||||
{
|
||||
provider: "anthropic",
|
||||
displayName: "Anthropic",
|
||||
windows: [],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const res = await getReplyFromConfig(
|
||||
{
|
||||
|
||||
@@ -42,7 +42,10 @@ export function applyReplyTagsToPayload(
|
||||
|
||||
export function isRenderablePayload(payload: ReplyPayload): boolean {
|
||||
return Boolean(
|
||||
payload.text || payload.mediaUrl || (payload.mediaUrls && payload.mediaUrls.length > 0),
|
||||
payload.text ||
|
||||
payload.mediaUrl ||
|
||||
(payload.mediaUrls && payload.mediaUrls.length > 0) ||
|
||||
payload.audioAsVoice,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ export function registerMessageSendCommand(message: Command, helpers: MessageCli
|
||||
message
|
||||
.command("send")
|
||||
.description("Send a message")
|
||||
.requiredOption("-m, --message <text>", "Message body"),
|
||||
.option("-m, --message <text>", "Message body (required unless --media is set)"),
|
||||
)
|
||||
.option(
|
||||
"--media <path-or-url>",
|
||||
|
||||
@@ -4,8 +4,8 @@ export const createTestRegistry = (overrides: Partial<PluginRegistry> = {}): Plu
|
||||
const base: PluginRegistry = {
|
||||
plugins: [],
|
||||
tools: [],
|
||||
providers: [],
|
||||
channels: [],
|
||||
providers: [],
|
||||
gatewayHandlers: {},
|
||||
httpHandlers: [],
|
||||
cliRegistrars: [],
|
||||
|
||||
@@ -37,6 +37,37 @@ describe("runMessageAction context isolation", () => {
|
||||
expect(result.kind).toBe("send");
|
||||
});
|
||||
|
||||
it("allows media-only send when target matches current channel", async () => {
|
||||
const result = await runMessageAction({
|
||||
cfg: slackConfig,
|
||||
action: "send",
|
||||
params: {
|
||||
channel: "slack",
|
||||
to: "#C123",
|
||||
media: "https://example.com/note.ogg",
|
||||
},
|
||||
toolContext: { currentChannelId: "C123" },
|
||||
dryRun: true,
|
||||
});
|
||||
|
||||
expect(result.kind).toBe("send");
|
||||
});
|
||||
|
||||
it("requires message when no media hint is provided", async () => {
|
||||
await expect(
|
||||
runMessageAction({
|
||||
cfg: slackConfig,
|
||||
action: "send",
|
||||
params: {
|
||||
channel: "slack",
|
||||
to: "#C123",
|
||||
},
|
||||
toolContext: { currentChannelId: "C123" },
|
||||
dryRun: true,
|
||||
}),
|
||||
).rejects.toThrow(/message required/i);
|
||||
});
|
||||
|
||||
it("blocks send when target differs from current channel", async () => {
|
||||
await expect(
|
||||
runMessageAction({
|
||||
|
||||
@@ -208,10 +208,12 @@ export async function runMessageAction(
|
||||
|
||||
if (action === "send") {
|
||||
const to = readStringParam(params, "to", { required: true });
|
||||
// Allow message to be omitted when sending media-only (e.g., voice notes)
|
||||
const mediaHint = readStringParam(params, "media", { trim: false });
|
||||
let message = readStringParam(params, "message", {
|
||||
required: true,
|
||||
required: !mediaHint, // Only require message if no media hint
|
||||
allowEmpty: true,
|
||||
});
|
||||
}) ?? "";
|
||||
|
||||
const parsed = parseReplyDirectives(message);
|
||||
message = parsed.text;
|
||||
|
||||
@@ -189,8 +189,8 @@ function createPluginRecord(params: {
|
||||
enabled: params.enabled,
|
||||
status: params.enabled ? "loaded" : "disabled",
|
||||
toolNames: [],
|
||||
providerIds: [],
|
||||
channelIds: [],
|
||||
providerIds: [],
|
||||
gatewayMethods: [],
|
||||
cliCommands: [],
|
||||
services: [],
|
||||
|
||||
@@ -98,6 +98,18 @@ export const buildTelegramMessageContext = async ({
|
||||
}
|
||||
};
|
||||
|
||||
const sendRecordVoice = async () => {
|
||||
try {
|
||||
await bot.api.sendChatAction(
|
||||
chatId,
|
||||
"record_voice",
|
||||
buildTypingThreadParams(resolvedThreadId),
|
||||
);
|
||||
} catch (err) {
|
||||
logVerbose(`telegram record_voice cue failed for chat ${chatId}: ${String(err)}`);
|
||||
}
|
||||
};
|
||||
|
||||
// DM access control (secure defaults): "pairing" (default) / "allowlist" / "open" / "disabled"
|
||||
if (!isGroup) {
|
||||
if (dmPolicy === "disabled") return null;
|
||||
@@ -408,6 +420,7 @@ export const buildTelegramMessageContext = async ({
|
||||
route,
|
||||
skillFilter,
|
||||
sendTyping,
|
||||
sendRecordVoice,
|
||||
ackReactionPromise,
|
||||
reactionApi,
|
||||
removeAckAfterReply,
|
||||
|
||||
@@ -37,6 +37,7 @@ export const dispatchTelegramMessage = async ({
|
||||
route,
|
||||
skillFilter,
|
||||
sendTyping,
|
||||
sendRecordVoice,
|
||||
ackReactionPromise,
|
||||
reactionApi,
|
||||
removeAckAfterReply,
|
||||
@@ -144,6 +145,7 @@ export const dispatchTelegramMessage = async ({
|
||||
replyToMode,
|
||||
textLimit,
|
||||
messageThreadId: resolvedThreadId,
|
||||
onVoiceRecording: sendRecordVoice,
|
||||
});
|
||||
didSendReply = true;
|
||||
},
|
||||
|
||||
77
src/telegram/bot/delivery.test.ts
Normal file
77
src/telegram/bot/delivery.test.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
import type { Bot } from "grammy";
|
||||
|
||||
import { deliverReplies } from "./delivery.js";
|
||||
|
||||
const loadWebMedia = vi.fn();
|
||||
|
||||
vi.mock("../../web/media.js", () => ({
|
||||
loadWebMedia: (...args: unknown[]) => loadWebMedia(...args),
|
||||
}));
|
||||
|
||||
vi.mock("grammy", () => ({
|
||||
InputFile: class {
|
||||
constructor(
|
||||
public buffer: Buffer,
|
||||
public fileName?: string,
|
||||
) {}
|
||||
},
|
||||
}));
|
||||
|
||||
describe("deliverReplies", () => {
|
||||
beforeEach(() => {
|
||||
loadWebMedia.mockReset();
|
||||
});
|
||||
|
||||
it("skips audioAsVoice-only payloads without logging an error", async () => {
|
||||
const runtime = { error: vi.fn() };
|
||||
const bot = { api: {} } as unknown as Bot;
|
||||
|
||||
await deliverReplies({
|
||||
replies: [{ audioAsVoice: true }],
|
||||
chatId: "123",
|
||||
token: "tok",
|
||||
runtime,
|
||||
bot,
|
||||
replyToMode: "off",
|
||||
textLimit: 4000,
|
||||
});
|
||||
|
||||
expect(runtime.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("invokes onVoiceRecording before sending a voice note", async () => {
|
||||
const events: string[] = [];
|
||||
const runtime = { error: vi.fn() };
|
||||
const sendVoice = vi.fn(async () => {
|
||||
events.push("sendVoice");
|
||||
return { message_id: 1, chat: { id: "123" } };
|
||||
});
|
||||
const bot = { api: { sendVoice } } as unknown as Bot;
|
||||
const onVoiceRecording = vi.fn(async () => {
|
||||
events.push("recordVoice");
|
||||
});
|
||||
|
||||
loadWebMedia.mockResolvedValueOnce({
|
||||
buffer: Buffer.from("voice"),
|
||||
contentType: "audio/ogg",
|
||||
fileName: "note.ogg",
|
||||
});
|
||||
|
||||
await deliverReplies({
|
||||
replies: [{ mediaUrl: "https://example.com/note.ogg", audioAsVoice: true }],
|
||||
chatId: "123",
|
||||
token: "tok",
|
||||
runtime,
|
||||
bot,
|
||||
replyToMode: "off",
|
||||
textLimit: 4000,
|
||||
onVoiceRecording,
|
||||
});
|
||||
|
||||
expect(onVoiceRecording).toHaveBeenCalledTimes(1);
|
||||
expect(sendVoice).toHaveBeenCalledTimes(1);
|
||||
expect(events).toEqual(["recordVoice", "sendVoice"]);
|
||||
});
|
||||
});
|
||||
@@ -25,12 +25,19 @@ export async function deliverReplies(params: {
|
||||
replyToMode: ReplyToMode;
|
||||
textLimit: number;
|
||||
messageThreadId?: number;
|
||||
/** Callback invoked before sending a voice message to switch typing indicator. */
|
||||
onVoiceRecording?: () => Promise<void> | void;
|
||||
}) {
|
||||
const { replies, chatId, runtime, bot, replyToMode, textLimit, messageThreadId } = params;
|
||||
const threadParams = buildTelegramThreadParams(messageThreadId);
|
||||
let hasReplied = false;
|
||||
for (const reply of replies) {
|
||||
if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) {
|
||||
const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0;
|
||||
if (!reply?.text && !hasMedia) {
|
||||
if (reply?.audioAsVoice) {
|
||||
logVerbose("telegram reply has audioAsVoice without media/text; skipping");
|
||||
continue;
|
||||
}
|
||||
runtime.error?.(danger("reply missing text/media"));
|
||||
continue;
|
||||
}
|
||||
@@ -99,6 +106,8 @@ export async function deliverReplies(params: {
|
||||
});
|
||||
if (useVoice) {
|
||||
// Voice message - displays as round playable bubble (opt-in via [[audio_as_voice]])
|
||||
// Switch typing indicator to record_voice before sending.
|
||||
await params.onVoiceRecording?.();
|
||||
await bot.api.sendVoice(chatId, file, {
|
||||
...mediaParams,
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user