Merge pull request #1099 from mukhtharcm/feat/message-tool-voice-support

feat(telegram): support sending audio as native voice notes via asVoice param in message tool
This commit is contained in:
Peter Steinberger
2026-01-17 17:33:20 +00:00
committed by GitHub
6 changed files with 62 additions and 5 deletions

View File

@@ -9,7 +9,7 @@ Docs: https://docs.clawd.bot
### Fixes ### Fixes
- Matrix: send voice/image-specific media payloads and keep legacy poll parsing. (#1088) — thanks @sibbl. - Matrix: send voice/image-specific media payloads and keep legacy poll parsing. (#1088) — thanks @sibbl.
- Telegram: allow media-only message tool sends to request voice notes via `asVoice`. (#1099) — thanks @mukhtharcm.
## 2026.1.16-2 ## 2026.1.16-2
### Changes ### Changes

View File

@@ -360,6 +360,19 @@ To force a voice note bubble in agent replies, include this tag anywhere in the
The tag is stripped from the delivered text. Other channels ignore this tag. The tag is stripped from the delivered text. Other channels ignore this tag.
For message tool sends, set `asVoice: true` with a voice-compatible audio `media` URL
(`message` is optional when media is present):
```json5
{
"action": "send",
"channel": "telegram",
"to": "123456789",
"media": "https://example.com/voice.ogg",
"asVoice": true
}
```
## Streaming (drafts) ## Streaming (drafts)
Telegram can stream **draft bubbles** while the agent is generating a response. Telegram can stream **draft bubbles** while the agent is generating a response.
Clawdbot uses Bot API `sendMessageDraft` (not real messages) and then sends the Clawdbot uses Bot API `sendMessageDraft` (not real messages) and then sends the

View File

@@ -39,6 +39,7 @@ function buildSendSchema(options: { includeButtons: boolean }) {
media: Type.Optional(Type.String()), media: Type.Optional(Type.String()),
replyTo: Type.Optional(Type.String()), replyTo: Type.Optional(Type.String()),
threadId: Type.Optional(Type.String()), threadId: Type.Optional(Type.String()),
asVoice: Type.Optional(Type.Boolean()),
bestEffort: Type.Optional(Type.Boolean()), bestEffort: Type.Optional(Type.Boolean()),
gifPlayback: Type.Optional(Type.Boolean()), gifPlayback: Type.Optional(Type.Boolean()),
buttons: Type.Optional( buttons: Type.Optional(

View File

@@ -175,6 +175,7 @@ export async function handleTelegramAction(
buttons, buttons,
replyToMessageId: replyToMessageId ?? undefined, replyToMessageId: replyToMessageId ?? undefined,
messageThreadId: messageThreadId ?? undefined, messageThreadId: messageThreadId ?? undefined,
asVoice: typeof params.asVoice === "boolean" ? params.asVoice : undefined,
}); });
return jsonResult({ return jsonResult({
ok: true, ok: true,

View File

@@ -0,0 +1,39 @@
import { describe, expect, it, vi } from "vitest";
import type { ClawdbotConfig } from "../../../config/config.js";
import { telegramMessageActions } from "./telegram.js";
const handleTelegramAction = vi.fn(async () => ({ ok: true }));
vi.mock("../../../agents/tools/telegram-actions.js", () => ({
handleTelegramAction: (...args: unknown[]) => handleTelegramAction(...args),
}));
describe("telegramMessageActions", () => {
it("allows media-only sends and passes asVoice", async () => {
handleTelegramAction.mockClear();
const cfg = { channels: { telegram: { botToken: "tok" } } } as ClawdbotConfig;
await telegramMessageActions.handleAction({
action: "send",
params: {
to: "123",
media: "https://example.com/voice.ogg",
asVoice: true,
},
cfg,
accountId: undefined,
});
expect(handleTelegramAction).toHaveBeenCalledWith(
expect.objectContaining({
action: "sendMessage",
to: "123",
content: "",
mediaUrl: "https://example.com/voice.ogg",
asVoice: true,
}),
cfg,
);
});
});

View File

@@ -42,14 +42,16 @@ export const telegramMessageActions: ChannelMessageActionAdapter = {
handleAction: async ({ action, params, cfg, accountId }) => { handleAction: async ({ action, params, cfg, accountId }) => {
if (action === "send") { if (action === "send") {
const to = readStringParam(params, "to", { required: true }); const to = readStringParam(params, "to", { required: true });
const content = readStringParam(params, "message", {
required: true,
allowEmpty: true,
});
const mediaUrl = readStringParam(params, "media", { trim: false }); const mediaUrl = readStringParam(params, "media", { trim: false });
const content =
readStringParam(params, "message", {
required: !mediaUrl,
allowEmpty: true,
}) ?? "";
const replyTo = readStringParam(params, "replyTo"); const replyTo = readStringParam(params, "replyTo");
const threadId = readStringParam(params, "threadId"); const threadId = readStringParam(params, "threadId");
const buttons = params.buttons; const buttons = params.buttons;
const asVoice = typeof params.asVoice === "boolean" ? params.asVoice : undefined;
return await handleTelegramAction( return await handleTelegramAction(
{ {
action: "sendMessage", action: "sendMessage",
@@ -60,6 +62,7 @@ export const telegramMessageActions: ChannelMessageActionAdapter = {
messageThreadId: threadId ?? undefined, messageThreadId: threadId ?? undefined,
accountId: accountId ?? undefined, accountId: accountId ?? undefined,
buttons, buttons,
asVoice,
}, },
cfg, cfg,
); );