fix(telegram): voice-note tag defaults (#188, thanks @manmal)
This commit is contained in:
@@ -95,6 +95,7 @@
|
|||||||
- Telegram: honor `/activation` session mode for group mention gating and clarify group activation docs. Thanks @julianengel for PR #377.
|
- Telegram: honor `/activation` session mode for group mention gating and clarify group activation docs. Thanks @julianengel for PR #377.
|
||||||
- Telegram: isolate forum topic transcripts per thread and validate Gemini turn ordering in multi-topic sessions. Thanks @hsrvc for PR #407.
|
- Telegram: isolate forum topic transcripts per thread and validate Gemini turn ordering in multi-topic sessions. Thanks @hsrvc for PR #407.
|
||||||
- Telegram: render Telegram-safe HTML for outbound formatting and fall back to plain text on parse errors. Thanks @RandyVentures for PR #435.
|
- Telegram: render Telegram-safe HTML for outbound formatting and fall back to plain text on parse errors. Thanks @RandyVentures for PR #435.
|
||||||
|
- Telegram: add `[[audio_as_voice]]` tag to send audio as voice notes (audio files remain default); docs updated. Thanks @manmal for PR #188.
|
||||||
- iMessage: ignore disconnect errors during shutdown (avoid unhandled promise rejections). Thanks @antons for PR #359.
|
- iMessage: ignore disconnect errors during shutdown (avoid unhandled promise rejections). Thanks @antons for PR #359.
|
||||||
- Messages: stop defaulting ack reactions to 👀 when identity emoji is missing.
|
- Messages: stop defaulting ack reactions to 👀 when identity emoji is missing.
|
||||||
- Auto-reply: require slash for control commands to avoid false triggers in normal text.
|
- Auto-reply: require slash for control commands to avoid false triggers in normal text.
|
||||||
|
|||||||
@@ -153,6 +153,15 @@ Telegram supports optional threaded replies via tags:
|
|||||||
Controlled by `telegram.replyToMode`:
|
Controlled by `telegram.replyToMode`:
|
||||||
- `first` (default), `all`, `off`.
|
- `first` (default), `all`, `off`.
|
||||||
|
|
||||||
|
## Audio messages (voice vs file)
|
||||||
|
Telegram distinguishes **voice notes** (round bubble) from **audio files** (metadata card).
|
||||||
|
Clawdbot defaults to audio files for backward compatibility.
|
||||||
|
|
||||||
|
To force a voice note bubble in agent replies, include this tag anywhere in the reply:
|
||||||
|
- `[[audio_as_voice]]` — send audio as a voice note instead of a file.
|
||||||
|
|
||||||
|
The tag is stripped from the delivered text. Other providers ignore this tag.
|
||||||
|
|
||||||
## Streaming (drafts)
|
## Streaming (drafts)
|
||||||
Telegram can stream **draft bubbles** while the agent is generating a response.
|
Telegram can stream **draft bubbles** while the agent is generating a response.
|
||||||
Clawdbot uses Bot API `sendMessageDraft` (not real messages) and then sends the
|
Clawdbot uses Bot API `sendMessageDraft` (not real messages) and then sends the
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ import type { OriginatingChannelType, TemplateContext } from "../templating.js";
|
|||||||
import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
|
import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
|
||||||
import { SILENT_REPLY_TOKEN } from "../tokens.js";
|
import { SILENT_REPLY_TOKEN } from "../tokens.js";
|
||||||
import type { GetReplyOptions, ReplyPayload } from "../types.js";
|
import type { GetReplyOptions, ReplyPayload } from "../types.js";
|
||||||
|
import { extractAudioTag } from "./audio-tags.js";
|
||||||
import { createFollowupRunner } from "./followup-runner.js";
|
import { createFollowupRunner } from "./followup-runner.js";
|
||||||
import {
|
import {
|
||||||
enqueueFollowupRun,
|
enqueueFollowupRun,
|
||||||
@@ -30,14 +31,12 @@ import {
|
|||||||
type QueueSettings,
|
type QueueSettings,
|
||||||
scheduleFollowupDrain,
|
scheduleFollowupDrain,
|
||||||
} from "./queue.js";
|
} from "./queue.js";
|
||||||
import { extractAudioTag } from "./audio-tags.js";
|
|
||||||
import {
|
import {
|
||||||
applyReplyTagsToPayload,
|
applyReplyTagsToPayload,
|
||||||
applyReplyThreading,
|
applyReplyThreading,
|
||||||
filterMessagingToolDuplicates,
|
filterMessagingToolDuplicates,
|
||||||
isRenderablePayload,
|
isRenderablePayload,
|
||||||
} from "./reply-payloads.js";
|
} from "./reply-payloads.js";
|
||||||
import { extractReplyToTag } from "./reply-tags.js";
|
|
||||||
import {
|
import {
|
||||||
createReplyToModeFilter,
|
createReplyToModeFilter,
|
||||||
resolveReplyToMode,
|
resolveReplyToMode,
|
||||||
@@ -341,6 +340,7 @@ export async function runReplyAgent(params: {
|
|||||||
const hasMedia =
|
const hasMedia =
|
||||||
Boolean(taggedPayload.mediaUrl) ||
|
Boolean(taggedPayload.mediaUrl) ||
|
||||||
(taggedPayload.mediaUrls?.length ?? 0) > 0;
|
(taggedPayload.mediaUrls?.length ?? 0) > 0;
|
||||||
|
if (!cleaned && !hasMedia) return;
|
||||||
if (cleaned?.trim() === SILENT_REPLY_TOKEN && !hasMedia)
|
if (cleaned?.trim() === SILENT_REPLY_TOKEN && !hasMedia)
|
||||||
return;
|
return;
|
||||||
const blockPayload: ReplyPayload = applyReplyToMode({
|
const blockPayload: ReplyPayload = applyReplyToMode({
|
||||||
|
|||||||
25
src/auto-reply/reply/audio-tags.test.ts
Normal file
25
src/auto-reply/reply/audio-tags.test.ts
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import { extractAudioTag } from "./audio-tags.js";
|
||||||
|
|
||||||
|
describe("extractAudioTag", () => {
|
||||||
|
it("detects audio_as_voice and strips the tag", () => {
|
||||||
|
const result = extractAudioTag("Hello [[audio_as_voice]] world");
|
||||||
|
expect(result.audioAsVoice).toBe(true);
|
||||||
|
expect(result.hasTag).toBe(true);
|
||||||
|
expect(result.cleaned).toBe("Hello world");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns empty output for missing text", () => {
|
||||||
|
const result = extractAudioTag(undefined);
|
||||||
|
expect(result.audioAsVoice).toBe(false);
|
||||||
|
expect(result.hasTag).toBe(false);
|
||||||
|
expect(result.cleaned).toBe("");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("removes tag-only messages", () => {
|
||||||
|
const result = extractAudioTag("[[audio_as_voice]]");
|
||||||
|
expect(result.audioAsVoice).toBe(true);
|
||||||
|
expect(result.cleaned).toBe("");
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -158,6 +158,77 @@ describe("sendMessageTelegram", () => {
|
|||||||
expect(res.messageId).toBe("9");
|
expect(res.messageId).toBe("9");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("sends audio media as files by default", async () => {
|
||||||
|
const chatId = "123";
|
||||||
|
const sendAudio = vi.fn().mockResolvedValue({
|
||||||
|
message_id: 10,
|
||||||
|
chat: { id: chatId },
|
||||||
|
});
|
||||||
|
const sendVoice = vi.fn().mockResolvedValue({
|
||||||
|
message_id: 11,
|
||||||
|
chat: { id: chatId },
|
||||||
|
});
|
||||||
|
const api = { sendAudio, sendVoice } as unknown as {
|
||||||
|
sendAudio: typeof sendAudio;
|
||||||
|
sendVoice: typeof sendVoice;
|
||||||
|
};
|
||||||
|
|
||||||
|
loadWebMedia.mockResolvedValueOnce({
|
||||||
|
buffer: Buffer.from("audio"),
|
||||||
|
contentType: "audio/mpeg",
|
||||||
|
fileName: "clip.mp3",
|
||||||
|
});
|
||||||
|
|
||||||
|
await sendMessageTelegram(chatId, "caption", {
|
||||||
|
token: "tok",
|
||||||
|
api,
|
||||||
|
mediaUrl: "https://example.com/clip.mp3",
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), {
|
||||||
|
caption: "caption",
|
||||||
|
});
|
||||||
|
expect(sendVoice).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("sends voice messages when asVoice is true and preserves thread params", async () => {
|
||||||
|
const chatId = "-1001234567890";
|
||||||
|
const sendAudio = vi.fn().mockResolvedValue({
|
||||||
|
message_id: 12,
|
||||||
|
chat: { id: chatId },
|
||||||
|
});
|
||||||
|
const sendVoice = vi.fn().mockResolvedValue({
|
||||||
|
message_id: 13,
|
||||||
|
chat: { id: chatId },
|
||||||
|
});
|
||||||
|
const api = { sendAudio, sendVoice } as unknown as {
|
||||||
|
sendAudio: typeof sendAudio;
|
||||||
|
sendVoice: typeof sendVoice;
|
||||||
|
};
|
||||||
|
|
||||||
|
loadWebMedia.mockResolvedValueOnce({
|
||||||
|
buffer: Buffer.from("voice"),
|
||||||
|
contentType: "audio/ogg",
|
||||||
|
fileName: "note.ogg",
|
||||||
|
});
|
||||||
|
|
||||||
|
await sendMessageTelegram(chatId, "voice note", {
|
||||||
|
token: "tok",
|
||||||
|
api,
|
||||||
|
mediaUrl: "https://example.com/note.ogg",
|
||||||
|
asVoice: true,
|
||||||
|
messageThreadId: 271,
|
||||||
|
replyToMessageId: 500,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(sendVoice).toHaveBeenCalledWith(chatId, expect.anything(), {
|
||||||
|
caption: "voice note",
|
||||||
|
message_thread_id: 271,
|
||||||
|
reply_to_message_id: 500,
|
||||||
|
});
|
||||||
|
expect(sendAudio).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
it("includes message_thread_id for forum topic messages", async () => {
|
it("includes message_thread_id for forum topic messages", async () => {
|
||||||
const chatId = "-1001234567890";
|
const chatId = "-1001234567890";
|
||||||
const sendMessage = vi.fn().mockResolvedValue({
|
const sendMessage = vi.fn().mockResolvedValue({
|
||||||
|
|||||||
@@ -317,7 +317,7 @@ describe("partial reply gating", () => {
|
|||||||
undefined,
|
undefined,
|
||||||
{},
|
{},
|
||||||
);
|
);
|
||||||
expect(allowed).toEqual({ text: "ok" });
|
expect(allowed).toMatchObject({ text: "ok", audioAsVoice: false });
|
||||||
expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
|
expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user