feat(telegram): add [[audio_as_file]] tag support

Allow agents to specify audio mode via inline tag:
- Default: voice bubble (sendVoice)
- [[audio_as_file]]: audio file with metadata (sendAudio)

The tag is stripped from the final message text.

Example agent response:
  Here's a podcast episode! [[audio_as_file]]
  MEDIA:https://example.com/episode.mp3
This commit is contained in:
Manuel Maly
2026-01-04 22:15:22 +01:00
committed by Peter Steinberger
parent 5e1b91b32c
commit 262f8a8d45
2 changed files with 50 additions and 7 deletions

View File

@@ -30,12 +30,14 @@ import {
type QueueSettings,
scheduleFollowupDrain,
} from "./queue.js";
import { extractAudioTag } from "./audio-tags.js";
import {
applyReplyTagsToPayload,
applyReplyThreading,
filterMessagingToolDuplicates,
isRenderablePayload,
} from "./reply-payloads.js";
import { extractReplyToTag } from "./reply-tags.js";
import {
createReplyToModeFilter,
resolveReplyToMode,
@@ -334,16 +336,18 @@ export async function runReplyAgent(params: {
sessionCtx.MessageSid,
);
if (!isRenderablePayload(taggedPayload)) return;
const audioTagResult = extractAudioTag(taggedPayload.text);
const cleaned = audioTagResult.cleaned || undefined;
const hasMedia =
Boolean(taggedPayload.mediaUrl) ||
(taggedPayload.mediaUrls?.length ?? 0) > 0;
if (
taggedPayload.text?.trim() === SILENT_REPLY_TOKEN &&
!hasMedia
)
if (cleaned?.trim() === SILENT_REPLY_TOKEN && !hasMedia)
return;
const blockPayload: ReplyPayload =
applyReplyToMode(taggedPayload);
const blockPayload: ReplyPayload = applyReplyToMode({
...taggedPayload,
text: cleaned,
audioAsVoice: audioTagResult.audioAsVoice,
});
const payloadKey = buildPayloadKey(blockPayload);
if (
streamedPayloadKeys.has(payloadKey) ||
@@ -519,7 +523,16 @@ export async function runReplyAgent(params: {
payloads: sanitizedPayloads,
applyReplyToMode,
currentMessageId: sessionCtx.MessageSid,
});
})
.map((payload) => {
const audioTagResult = extractAudioTag(payload.text);
return {
...payload,
text: audioTagResult.cleaned ? audioTagResult.cleaned : undefined,
audioAsVoice: audioTagResult.audioAsVoice,
};
})
.filter(isRenderablePayload);
// Drop final payloads if block streaming is enabled and we already streamed
// block replies. Tool-sent duplicates are filtered below.

View File

@@ -0,0 +1,30 @@
/**
* Extract audio mode tag from text.
* Supports [[audio_as_file]] to send audio as file instead of voice bubble.
*/
export function extractAudioTag(text?: string): {
cleaned: string;
audioAsVoice: boolean;
hasTag: boolean;
} {
if (!text) return { cleaned: "", audioAsVoice: true, hasTag: false };
let cleaned = text;
let audioAsVoice = true; // default: voice bubble
let hasTag = false;
// [[audio_as_file]] -> send as file with metadata, not voice bubble
const fileMatch = cleaned.match(/\[\[audio_as_file\]\]/i);
if (fileMatch) {
cleaned = cleaned.replace(/\[\[audio_as_file\]\]/gi, " ");
audioAsVoice = false;
hasTag = true;
}
// Clean up whitespace
cleaned = cleaned
.replace(/[ \t]+/g, " ")
.replace(/[ \t]*\n[ \t]*/g, "\n")
.trim();
return { cleaned, audioAsVoice, hasTag };
}