Merge pull request #1099 from mukhtharcm/feat/message-tool-voice-support

feat(telegram): support sending audio as native voice notes via asVoice param in message tool
This commit is contained in:
Peter Steinberger
2026-01-17 17:33:20 +00:00
committed by GitHub
6 changed files with 62 additions and 5 deletions

View File

@@ -9,7 +9,7 @@ Docs: https://docs.clawd.bot
### Fixes
- Matrix: send voice/image-specific media payloads and keep legacy poll parsing. (#1088) — thanks @sibbl.
- Telegram: allow media-only message tool sends to request voice notes via `asVoice`. (#1099) — thanks @mukhtharcm.
## 2026.1.16-2
### Changes

View File

@@ -360,6 +360,19 @@ To force a voice note bubble in agent replies, include this tag anywhere in the
The tag is stripped from the delivered text. Other channels ignore this tag.
For message tool sends, set `asVoice: true` with a voice-compatible audio `media` URL
(`message` is optional when media is present):
```json5
{
"action": "send",
"channel": "telegram",
"to": "123456789",
"media": "https://example.com/voice.ogg",
"asVoice": true
}
```
## Streaming (drafts)
Telegram can stream **draft bubbles** while the agent is generating a response.
Clawdbot uses Bot API `sendMessageDraft` (not real messages) and then sends the

View File

@@ -39,6 +39,7 @@ function buildSendSchema(options: { includeButtons: boolean }) {
media: Type.Optional(Type.String()),
replyTo: Type.Optional(Type.String()),
threadId: Type.Optional(Type.String()),
asVoice: Type.Optional(Type.Boolean()),
bestEffort: Type.Optional(Type.Boolean()),
gifPlayback: Type.Optional(Type.Boolean()),
buttons: Type.Optional(

View File

@@ -175,6 +175,7 @@ export async function handleTelegramAction(
buttons,
replyToMessageId: replyToMessageId ?? undefined,
messageThreadId: messageThreadId ?? undefined,
asVoice: typeof params.asVoice === "boolean" ? params.asVoice : undefined,
});
return jsonResult({
ok: true,

View File

@@ -0,0 +1,39 @@
import { describe, expect, it, vi } from "vitest";
import type { ClawdbotConfig } from "../../../config/config.js";
import { telegramMessageActions } from "./telegram.js";
const handleTelegramAction = vi.fn(async () => ({ ok: true }));
vi.mock("../../../agents/tools/telegram-actions.js", () => ({
handleTelegramAction: (...args: unknown[]) => handleTelegramAction(...args),
}));
describe("telegramMessageActions", () => {
it("allows media-only sends and passes asVoice", async () => {
handleTelegramAction.mockClear();
const cfg = { channels: { telegram: { botToken: "tok" } } } as ClawdbotConfig;
await telegramMessageActions.handleAction({
action: "send",
params: {
to: "123",
media: "https://example.com/voice.ogg",
asVoice: true,
},
cfg,
accountId: undefined,
});
expect(handleTelegramAction).toHaveBeenCalledWith(
expect.objectContaining({
action: "sendMessage",
to: "123",
content: "",
mediaUrl: "https://example.com/voice.ogg",
asVoice: true,
}),
cfg,
);
});
});

View File

@@ -42,14 +42,16 @@ export const telegramMessageActions: ChannelMessageActionAdapter = {
handleAction: async ({ action, params, cfg, accountId }) => {
if (action === "send") {
const to = readStringParam(params, "to", { required: true });
const content = readStringParam(params, "message", {
required: true,
allowEmpty: true,
});
const mediaUrl = readStringParam(params, "media", { trim: false });
const content =
readStringParam(params, "message", {
required: !mediaUrl,
allowEmpty: true,
}) ?? "";
const replyTo = readStringParam(params, "replyTo");
const threadId = readStringParam(params, "threadId");
const buttons = params.buttons;
const asVoice = typeof params.asVoice === "boolean" ? params.asVoice : undefined;
return await handleTelegramAction(
{
action: "sendMessage",
@@ -60,6 +62,7 @@ export const telegramMessageActions: ChannelMessageActionAdapter = {
messageThreadId: threadId ?? undefined,
accountId: accountId ?? undefined,
buttons,
asVoice,
},
cfg,
);