diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index e297a7cd0..97ff55243 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -30,12 +30,14 @@ import { type QueueSettings, scheduleFollowupDrain, } from "./queue.js"; +import { extractAudioTag } from "./audio-tags.js"; import { applyReplyTagsToPayload, applyReplyThreading, filterMessagingToolDuplicates, isRenderablePayload, } from "./reply-payloads.js"; +import { extractReplyToTag } from "./reply-tags.js"; import { createReplyToModeFilter, resolveReplyToMode, @@ -334,16 +336,18 @@ export async function runReplyAgent(params: { sessionCtx.MessageSid, ); if (!isRenderablePayload(taggedPayload)) return; + const audioTagResult = extractAudioTag(taggedPayload.text); + const cleaned = audioTagResult.cleaned || undefined; const hasMedia = Boolean(taggedPayload.mediaUrl) || (taggedPayload.mediaUrls?.length ?? 0) > 0; - if ( - taggedPayload.text?.trim() === SILENT_REPLY_TOKEN && - !hasMedia - ) + if (cleaned?.trim() === SILENT_REPLY_TOKEN && !hasMedia) return; - const blockPayload: ReplyPayload = - applyReplyToMode(taggedPayload); + const blockPayload: ReplyPayload = applyReplyToMode({ + ...taggedPayload, + text: cleaned, + audioAsVoice: audioTagResult.audioAsVoice, + }); const payloadKey = buildPayloadKey(blockPayload); if ( streamedPayloadKeys.has(payloadKey) || @@ -519,7 +523,16 @@ export async function runReplyAgent(params: { payloads: sanitizedPayloads, applyReplyToMode, currentMessageId: sessionCtx.MessageSid, - }); + }) + .map((payload) => { + const audioTagResult = extractAudioTag(payload.text); + return { + ...payload, + text: audioTagResult.cleaned ? audioTagResult.cleaned : undefined, + audioAsVoice: audioTagResult.audioAsVoice, + }; + }) + .filter(isRenderablePayload); // Drop final payloads if block streaming is enabled and we already streamed // block replies. Tool-sent duplicates are filtered below. diff --git a/src/auto-reply/reply/audio-tags.ts b/src/auto-reply/reply/audio-tags.ts new file mode 100644 index 000000000..db9445d08 --- /dev/null +++ b/src/auto-reply/reply/audio-tags.ts @@ -0,0 +1,30 @@ +/** + * Extract audio mode tag from text. + * Supports [[audio_as_file]] to send audio as file instead of voice bubble. + */ +export function extractAudioTag(text?: string): { + cleaned: string; + audioAsVoice: boolean; + hasTag: boolean; +} { + if (!text) return { cleaned: "", audioAsVoice: true, hasTag: false }; + let cleaned = text; + let audioAsVoice = true; // default: voice bubble + let hasTag = false; + + // [[audio_as_file]] -> send as file with metadata, not voice bubble + const fileMatch = cleaned.match(/\[\[audio_as_file\]\]/i); + if (fileMatch) { + cleaned = cleaned.replace(/\[\[audio_as_file\]\]/gi, " "); + audioAsVoice = false; + hasTag = true; + } + + // Clean up whitespace + cleaned = cleaned + .replace(/[ \t]+/g, " ") + .replace(/[ \t]*\n[ \t]*/g, "\n") + .trim(); + + return { cleaned, audioAsVoice, hasTag }; +}