Merge pull request #490 from jarvis-medmatic/feat/audio-as-voice-tag

feat(telegram): `[[audio_as_voice]]` tag support
This commit is contained in:
Peter Steinberger
2026-01-10 00:52:02 +00:00
committed by GitHub
11 changed files with 259 additions and 20 deletions

View File

@@ -776,6 +776,7 @@ export async function compactEmbeddedPiSession(params: {
const enqueueGlobal =
params.enqueue ??
((task, opts) => enqueueCommandInLane(globalLane, task, opts));
const runAbortController = new AbortController();
return enqueueCommandInLane(sessionLane, () =>
enqueueGlobal(async () => {
const resolvedWorkspace = resolveUserPath(params.workspaceDir);
@@ -1045,6 +1046,7 @@ export async function runEmbeddedPiAgent(params: {
onBlockReply?: (payload: {
text?: string;
mediaUrls?: string[];
audioAsVoice?: boolean;
}) => void | Promise<void>;
blockReplyBreak?: "text_end" | "message_end";
blockReplyChunking?: BlockReplyChunking;
@@ -1641,6 +1643,7 @@ export async function runEmbeddedPiAgent(params: {
text: string;
media?: string[];
isError?: boolean;
audioAsVoice?: boolean;
}> = [];
const errorText = lastAssistant
@@ -1657,10 +1660,17 @@ export async function runEmbeddedPiAgent(params: {
if (inlineToolResults) {
for (const { toolName, meta } of toolMetas) {
const agg = formatToolAggregate(toolName, meta ? [meta] : []);
const { text: cleanedText, mediaUrls } =
splitMediaFromOutput(agg);
const {
text: cleanedText,
mediaUrls,
audioAsVoice,
} = splitMediaFromOutput(agg);
if (cleanedText)
replyItems.push({ text: cleanedText, media: mediaUrls });
replyItems.push({
text: cleanedText,
media: mediaUrls,
audioAsVoice,
});
}
}
@@ -1679,18 +1689,37 @@ export async function runEmbeddedPiAgent(params: {
? [fallbackAnswerText]
: [];
for (const text of answerTexts) {
const { text: cleanedText, mediaUrls } = splitMediaFromOutput(text);
if (!cleanedText && (!mediaUrls || mediaUrls.length === 0))
const {
text: cleanedText,
mediaUrls,
audioAsVoice,
} = splitMediaFromOutput(text);
if (
!cleanedText &&
(!mediaUrls || mediaUrls.length === 0) &&
!audioAsVoice
)
continue;
replyItems.push({ text: cleanedText, media: mediaUrls });
replyItems.push({
text: cleanedText,
media: mediaUrls,
audioAsVoice,
});
}
// Check if any replyItem has audioAsVoice tag - if so, apply to all media payloads
const hasAudioAsVoiceTag = replyItems.some(
(item) => item.audioAsVoice,
);
const payloads = replyItems
.map((item) => ({
text: item.text?.trim() ? item.text.trim() : undefined,
mediaUrls: item.media?.length ? item.media : undefined,
mediaUrl: item.media?.[0],
isError: item.isError,
// Apply audioAsVoice to media payloads if tag was found anywhere in response
audioAsVoice:
item.audioAsVoice || (hasAudioAsVoiceTag && item.media?.length),
}))
.filter(
(p) =>

View File

@@ -262,6 +262,7 @@ export function subscribeEmbeddedPiSession(params: {
onBlockReply?: (payload: {
text?: string;
mediaUrls?: string[];
audioAsVoice?: boolean;
}) => void | Promise<void>;
blockReplyBreak?: "text_end" | "message_end";
blockReplyChunking?: BlockReplyChunking;
@@ -436,11 +437,15 @@ export function subscribeEmbeddedPiSession(params: {
lastBlockReplyText = chunk;
assistantTexts.push(chunk);
if (!params.onBlockReply) return;
const { text: cleanedText, mediaUrls } = splitMediaFromOutput(chunk);
if (!cleanedText && (!mediaUrls || mediaUrls.length === 0)) return;
const splitResult = splitMediaFromOutput(chunk);
const { text: cleanedText, mediaUrls, audioAsVoice } = splitResult;
// Skip empty payloads, but always emit if audioAsVoice is set (to propagate the flag)
if (!cleanedText && (!mediaUrls || mediaUrls.length === 0) && !audioAsVoice)
return;
void params.onBlockReply({
text: cleanedText,
mediaUrls: mediaUrls?.length ? mediaUrls : undefined,
audioAsVoice,
});
};
@@ -859,12 +864,21 @@ export function subscribeEmbeddedPiSession(params: {
);
} else {
lastBlockReplyText = text;
const { text: cleanedText, mediaUrls } =
splitMediaFromOutput(text);
if (cleanedText || (mediaUrls && mediaUrls.length > 0)) {
const {
text: cleanedText,
mediaUrls,
audioAsVoice,
} = splitMediaFromOutput(text);
// Emit if there's content OR audioAsVoice flag (to propagate the flag)
if (
cleanedText ||
(mediaUrls && mediaUrls.length > 0) ||
audioAsVoice
) {
void onBlockReply({
text: cleanedText,
mediaUrls: mediaUrls?.length ? mediaUrls : undefined,
audioAsVoice,
});
}
}