feat(telegram): buffer audio blocks for [[audio_as_voice]] tag support
- Add [[audio_as_voice]] detection to splitMediaFromOutput() - Pass audioAsVoice through onBlockReply callback chain - Buffer audio blocks during streaming, flush at end with correct flag - Non-audio media still streams immediately - Fix: emit payloads with audioAsVoice flag even if text is empty Co-authored-by: Manuel Hettich <17690367+ManuelHettich@users.noreply.github.com>
This commit is contained in:
committed by
Peter Steinberger
parent
60bd65dfac
commit
05a99aa49b
@@ -1044,6 +1044,7 @@ export async function runEmbeddedPiAgent(params: {
|
||||
onBlockReply?: (payload: {
|
||||
text?: string;
|
||||
mediaUrls?: string[];
|
||||
audioAsVoice?: boolean;
|
||||
}) => void | Promise<void>;
|
||||
blockReplyBreak?: "text_end" | "message_end";
|
||||
blockReplyChunking?: BlockReplyChunking;
|
||||
@@ -1640,6 +1641,7 @@ export async function runEmbeddedPiAgent(params: {
|
||||
text: string;
|
||||
media?: string[];
|
||||
isError?: boolean;
|
||||
audioAsVoice?: boolean;
|
||||
}> = [];
|
||||
|
||||
const errorText = lastAssistant
|
||||
@@ -1656,10 +1658,10 @@ export async function runEmbeddedPiAgent(params: {
|
||||
if (inlineToolResults) {
|
||||
for (const { toolName, meta } of toolMetas) {
|
||||
const agg = formatToolAggregate(toolName, meta ? [meta] : []);
|
||||
const { text: cleanedText, mediaUrls } =
|
||||
const { text: cleanedText, mediaUrls, audioAsVoice } =
|
||||
splitMediaFromOutput(agg);
|
||||
if (cleanedText)
|
||||
replyItems.push({ text: cleanedText, media: mediaUrls });
|
||||
replyItems.push({ text: cleanedText, media: mediaUrls, audioAsVoice });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1678,18 +1680,31 @@ export async function runEmbeddedPiAgent(params: {
|
||||
? [fallbackAnswerText]
|
||||
: [];
|
||||
for (const text of answerTexts) {
|
||||
const { text: cleanedText, mediaUrls } = splitMediaFromOutput(text);
|
||||
if (!cleanedText && (!mediaUrls || mediaUrls.length === 0))
|
||||
const { text: cleanedText, mediaUrls, audioAsVoice } =
|
||||
splitMediaFromOutput(text);
|
||||
if (
|
||||
!cleanedText &&
|
||||
(!mediaUrls || mediaUrls.length === 0) &&
|
||||
!audioAsVoice
|
||||
)
|
||||
continue;
|
||||
replyItems.push({ text: cleanedText, media: mediaUrls });
|
||||
replyItems.push({ text: cleanedText, media: mediaUrls, audioAsVoice });
|
||||
}
|
||||
|
||||
// Check if any replyItem has audioAsVoice tag - if so, apply to all media payloads
|
||||
const hasAudioAsVoiceTag = replyItems.some(
|
||||
(item) => item.audioAsVoice,
|
||||
);
|
||||
|
||||
const payloads = replyItems
|
||||
.map((item) => ({
|
||||
text: item.text?.trim() ? item.text.trim() : undefined,
|
||||
mediaUrls: item.media?.length ? item.media : undefined,
|
||||
mediaUrl: item.media?.[0],
|
||||
isError: item.isError,
|
||||
// Apply audioAsVoice to media payloads if tag was found anywhere in response
|
||||
audioAsVoice:
|
||||
item.audioAsVoice || (hasAudioAsVoiceTag && item.media?.length),
|
||||
}))
|
||||
.filter(
|
||||
(p) =>
|
||||
|
||||
@@ -262,6 +262,7 @@ export function subscribeEmbeddedPiSession(params: {
|
||||
onBlockReply?: (payload: {
|
||||
text?: string;
|
||||
mediaUrls?: string[];
|
||||
audioAsVoice?: boolean;
|
||||
}) => void | Promise<void>;
|
||||
blockReplyBreak?: "text_end" | "message_end";
|
||||
blockReplyChunking?: BlockReplyChunking;
|
||||
@@ -436,11 +437,13 @@ export function subscribeEmbeddedPiSession(params: {
|
||||
lastBlockReplyText = chunk;
|
||||
assistantTexts.push(chunk);
|
||||
if (!params.onBlockReply) return;
|
||||
const { text: cleanedText, mediaUrls } = splitMediaFromOutput(chunk);
|
||||
if (!cleanedText && (!mediaUrls || mediaUrls.length === 0)) return;
|
||||
const { text: cleanedText, mediaUrls, audioAsVoice } = splitMediaFromOutput(chunk);
|
||||
// Skip empty payloads, but always emit if audioAsVoice is set (to propagate the flag)
|
||||
if (!cleanedText && (!mediaUrls || mediaUrls.length === 0) && !audioAsVoice) return;
|
||||
void params.onBlockReply({
|
||||
text: cleanedText,
|
||||
mediaUrls: mediaUrls?.length ? mediaUrls : undefined,
|
||||
audioAsVoice,
|
||||
});
|
||||
};
|
||||
|
||||
@@ -859,12 +862,18 @@ export function subscribeEmbeddedPiSession(params: {
|
||||
);
|
||||
} else {
|
||||
lastBlockReplyText = text;
|
||||
const { text: cleanedText, mediaUrls } =
|
||||
const { text: cleanedText, mediaUrls, audioAsVoice } =
|
||||
splitMediaFromOutput(text);
|
||||
if (cleanedText || (mediaUrls && mediaUrls.length > 0)) {
|
||||
// Emit if there's content OR audioAsVoice flag (to propagate the flag)
|
||||
if (
|
||||
cleanedText ||
|
||||
(mediaUrls && mediaUrls.length > 0) ||
|
||||
audioAsVoice
|
||||
) {
|
||||
void onBlockReply({
|
||||
text: cleanedText,
|
||||
mediaUrls: mediaUrls?.length ? mediaUrls : undefined,
|
||||
audioAsVoice,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user