diff --git a/CHANGELOG.md b/CHANGELOG.md
index c69779cac..658392067 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -95,6 +95,7 @@
 - Telegram: honor `/activation` session mode for group mention gating and clarify group activation docs. Thanks @julianengel for PR #377.
 - Telegram: isolate forum topic transcripts per thread and validate Gemini turn ordering in multi-topic sessions. Thanks @hsrvc for PR #407.
 - Telegram: render Telegram-safe HTML for outbound formatting and fall back to plain text on parse errors. Thanks @RandyVentures for PR #435.
+- Telegram: add `[[audio_as_voice]]` tag to send audio as voice notes (audio files remain default); docs updated. Thanks @manmal for PR #188.
 - iMessage: ignore disconnect errors during shutdown (avoid unhandled promise rejections). Thanks @antons for PR #359.
 - Messages: stop defaulting ack reactions to 👀 when identity emoji is missing.
 - Auto-reply: require slash for control commands to avoid false triggers in normal text.
diff --git a/docs/providers/telegram.md b/docs/providers/telegram.md
index 77705b6c2..bcb59733b 100644
--- a/docs/providers/telegram.md
+++ b/docs/providers/telegram.md
@@ -153,6 +153,15 @@ Telegram supports optional threaded replies via tags:
 Controlled by `telegram.replyToMode`:
 - `first` (default), `all`, `off`.
 
+## Audio messages (voice vs file)
+Telegram distinguishes **voice notes** (round bubble) from **audio files** (metadata card).
+Clawdbot defaults to audio files for backward compatibility.
+
+To force a voice note bubble in agent replies, include this tag anywhere in the reply:
+- `[[audio_as_voice]]` — send audio as a voice note instead of a file.
+
+The tag is stripped from the delivered text. Other providers ignore this tag.
+
 ## Streaming (drafts)
 Telegram can stream **draft bubbles** while the agent is generating a response.
 Clawdbot uses Bot API `sendMessageDraft` (not real messages) and then sends the
diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts
index 97ff55243..fb711ccdf 100644
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@@ -23,6 +23,7 @@ import type { OriginatingChannelType, TemplateContext } from "../templating.js";
 import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
 import { SILENT_REPLY_TOKEN } from "../tokens.js";
 import type { GetReplyOptions, ReplyPayload } from "../types.js";
+import { extractAudioTag } from "./audio-tags.js";
 import { createFollowupRunner } from "./followup-runner.js";
 import {
   enqueueFollowupRun,
@@ -30,14 +31,12 @@ import {
   type QueueSettings,
   scheduleFollowupDrain,
 } from "./queue.js";
-import { extractAudioTag } from "./audio-tags.js";
 import {
   applyReplyTagsToPayload,
   applyReplyThreading,
   filterMessagingToolDuplicates,
   isRenderablePayload,
 } from "./reply-payloads.js";
-import { extractReplyToTag } from "./reply-tags.js";
 import {
   createReplyToModeFilter,
   resolveReplyToMode,
@@ -341,6 +340,7 @@ export async function runReplyAgent(params: {
                     const hasMedia =
                       Boolean(taggedPayload.mediaUrl) ||
                       (taggedPayload.mediaUrls?.length ?? 0) > 0;
+                    if (!cleaned && !hasMedia) return;
                     if (cleaned?.trim() === SILENT_REPLY_TOKEN && !hasMedia)
                       return;
                     const blockPayload: ReplyPayload = applyReplyToMode({
diff --git a/src/auto-reply/reply/audio-tags.test.ts b/src/auto-reply/reply/audio-tags.test.ts
new file mode 100644
index 000000000..4a1b8d16b
--- /dev/null
+++ b/src/auto-reply/reply/audio-tags.test.ts
@@ -0,0 +1,25 @@
+import { describe, expect, it } from "vitest";
+
+import { extractAudioTag } from "./audio-tags.js";
+
+describe("extractAudioTag", () => {
+  it("detects audio_as_voice and strips the tag", () => {
+    const result = extractAudioTag("Hello [[audio_as_voice]] world");
+    expect(result.audioAsVoice).toBe(true);
+    expect(result.hasTag).toBe(true);
+    expect(result.cleaned).toBe("Hello world");
+  });
+
+  it("returns empty output for missing text", () => {
+    const result = extractAudioTag(undefined);
+    expect(result.audioAsVoice).toBe(false);
+    expect(result.hasTag).toBe(false);
+    expect(result.cleaned).toBe("");
+  });
+
+  it("removes tag-only messages", () => {
+    const result = extractAudioTag("[[audio_as_voice]]");
+    expect(result.audioAsVoice).toBe(true);
+    expect(result.cleaned).toBe("");
+  });
+});
diff --git a/src/telegram/send.test.ts b/src/telegram/send.test.ts
index 823d641eb..fc50cd669 100644
--- a/src/telegram/send.test.ts
+++ b/src/telegram/send.test.ts
@@ -158,6 +158,77 @@ describe("sendMessageTelegram", () => {
     expect(res.messageId).toBe("9");
   });
 
+  it("sends audio media as files by default", async () => {
+    const chatId = "123";
+    const sendAudio = vi.fn().mockResolvedValue({
+      message_id: 10,
+      chat: { id: chatId },
+    });
+    const sendVoice = vi.fn().mockResolvedValue({
+      message_id: 11,
+      chat: { id: chatId },
+    });
+    const api = { sendAudio, sendVoice } as unknown as {
+      sendAudio: typeof sendAudio;
+      sendVoice: typeof sendVoice;
+    };
+
+    loadWebMedia.mockResolvedValueOnce({
+      buffer: Buffer.from("audio"),
+      contentType: "audio/mpeg",
+      fileName: "clip.mp3",
+    });
+
+    await sendMessageTelegram(chatId, "caption", {
+      token: "tok",
+      api,
+      mediaUrl: "https://example.com/clip.mp3",
+    });
+
+    expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), {
+      caption: "caption",
+    });
+    expect(sendVoice).not.toHaveBeenCalled();
+  });
+
+  it("sends voice messages when asVoice is true and preserves thread params", async () => {
+    const chatId = "-1001234567890";
+    const sendAudio = vi.fn().mockResolvedValue({
+      message_id: 12,
+      chat: { id: chatId },
+    });
+    const sendVoice = vi.fn().mockResolvedValue({
+      message_id: 13,
+      chat: { id: chatId },
+    });
+    const api = { sendAudio, sendVoice } as unknown as {
+      sendAudio: typeof sendAudio;
+      sendVoice: typeof sendVoice;
+    };
+
+    loadWebMedia.mockResolvedValueOnce({
+      buffer: Buffer.from("voice"),
+      contentType: "audio/ogg",
+      fileName: "note.ogg",
+    });
+
+    await sendMessageTelegram(chatId, "voice note", {
+      token: "tok",
+      api,
+      mediaUrl: "https://example.com/note.ogg",
+      asVoice: true,
+      messageThreadId: 271,
+      replyToMessageId: 500,
+    });
+
+    expect(sendVoice).toHaveBeenCalledWith(chatId, expect.anything(), {
+      caption: "voice note",
+      message_thread_id: 271,
+      reply_to_message_id: 500,
+    });
+    expect(sendAudio).not.toHaveBeenCalled();
+  });
+
   it("includes message_thread_id for forum topic messages", async () => {
     const chatId = "-1001234567890";
     const sendMessage = vi.fn().mockResolvedValue({
diff --git a/src/web/auto-reply.test.ts b/src/web/auto-reply.test.ts
index 4cf7ec565..c5cf7544c 100644
--- a/src/web/auto-reply.test.ts
+++ b/src/web/auto-reply.test.ts
@@ -317,7 +317,7 @@ describe("partial reply gating", () => {
       undefined,
       {},
     );
-    expect(allowed).toEqual({ text: "ok" });
+    expect(allowed).toMatchObject({ text: "ok", audioAsVoice: false });
     expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
   });
 });