From 5fedfd8d159a0ff5a114204ccd7639090ebe051c Mon Sep 17 00:00:00 2001 From: Jarvis Date: Thu, 8 Jan 2026 14:26:54 +0000 Subject: [PATCH] chore: format audioAsVoice updates Co-authored-by: Manuel Hettich <17690367+ManuelHettich@users.noreply.github.com> --- src/agents/pi-embedded-runner.ts | 26 ++++++++++++++++++++------ src/agents/pi-embedded-subscribe.ts | 10 +++++++--- src/auto-reply/reply/agent-runner.ts | 5 ++++- src/media/parse.ts | 2 +- src/telegram/bot.ts | 2 +- src/telegram/send.ts | 2 +- 6 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 1860c9b6d..e36b58e63 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -1658,10 +1658,17 @@ export async function runEmbeddedPiAgent(params: { if (inlineToolResults) { for (const { toolName, meta } of toolMetas) { const agg = formatToolAggregate(toolName, meta ? [meta] : []); - const { text: cleanedText, mediaUrls, audioAsVoice } = - splitMediaFromOutput(agg); + const { + text: cleanedText, + mediaUrls, + audioAsVoice, + } = splitMediaFromOutput(agg); if (cleanedText) - replyItems.push({ text: cleanedText, media: mediaUrls, audioAsVoice }); + replyItems.push({ + text: cleanedText, + media: mediaUrls, + audioAsVoice, + }); } } @@ -1680,15 +1687,22 @@ export async function runEmbeddedPiAgent(params: { ? [fallbackAnswerText] : []; for (const text of answerTexts) { - const { text: cleanedText, mediaUrls, audioAsVoice } = - splitMediaFromOutput(text); + const { + text: cleanedText, + mediaUrls, + audioAsVoice, + } = splitMediaFromOutput(text); if ( !cleanedText && (!mediaUrls || mediaUrls.length === 0) && !audioAsVoice ) continue; - replyItems.push({ text: cleanedText, media: mediaUrls, audioAsVoice }); + replyItems.push({ + text: cleanedText, + media: mediaUrls, + audioAsVoice, + }); } // Check if any replyItem has audioAsVoice tag - if so, apply to all media payloads diff --git a/src/agents/pi-embedded-subscribe.ts b/src/agents/pi-embedded-subscribe.ts index 8511911d8..001d579f1 100644 --- a/src/agents/pi-embedded-subscribe.ts +++ b/src/agents/pi-embedded-subscribe.ts @@ -440,7 +440,8 @@ export function subscribeEmbeddedPiSession(params: { const splitResult = splitMediaFromOutput(chunk); const { text: cleanedText, mediaUrls, audioAsVoice } = splitResult; // Skip empty payloads, but always emit if audioAsVoice is set (to propagate the flag) - if (!cleanedText && (!mediaUrls || mediaUrls.length === 0) && !audioAsVoice) return; + if (!cleanedText && (!mediaUrls || mediaUrls.length === 0) && !audioAsVoice) + return; void params.onBlockReply({ text: cleanedText, mediaUrls: mediaUrls?.length ? mediaUrls : undefined, @@ -863,8 +864,11 @@ export function subscribeEmbeddedPiSession(params: { ); } else { lastBlockReplyText = text; - const { text: cleanedText, mediaUrls, audioAsVoice } = - splitMediaFromOutput(text); + const { + text: cleanedText, + mediaUrls, + audioAsVoice, + } = splitMediaFromOutput(text); // Emit if there's content OR audioAsVoice flag (to propagate the flag) if ( cleanedText || diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 1063dcdd7..c4878690d 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -541,7 +541,10 @@ export async function runReplyAgent(params: { sessionCtx.MessageSid, ); // Let through payloads with audioAsVoice flag even if empty (need to track it) - if (!isRenderablePayload(taggedPayload) && !payload.audioAsVoice) + if ( + !isRenderablePayload(taggedPayload) && + !payload.audioAsVoice + ) return; const audioTagResult = extractAudioTag(taggedPayload.text); const cleaned = audioTagResult.cleaned || undefined; diff --git a/src/media/parse.ts b/src/media/parse.ts index 08f6d492e..77b4bd9f9 100644 --- a/src/media/parse.ts +++ b/src/media/parse.ts @@ -135,7 +135,7 @@ export function splitMediaFromOutput(raw: string): { if (media.length === 0) { const result: ReturnType = { // Return cleaned text if we found a media token OR audio tag, otherwise original - text: (foundMediaToken || hasAudioAsVoice) ? cleanedText : trimmedRaw, + text: foundMediaToken || hasAudioAsVoice ? cleanedText : trimmedRaw, }; if (hasAudioAsVoice) result.audioAsVoice = true; return result; diff --git a/src/telegram/bot.ts b/src/telegram/bot.ts index 75d339c41..8ec6756b4 100644 --- a/src/telegram/bot.ts +++ b/src/telegram/bot.ts @@ -60,11 +60,11 @@ import { resolveTelegramAccount } from "./accounts.js"; import { createTelegramDraftStream } from "./draft-stream.js"; import { resolveTelegramFetch } from "./fetch.js"; import { markdownToTelegramHtml } from "./format.js"; -import { resolveTelegramVoiceDecision } from "./voice.js"; import { readTelegramAllowFromStore, upsertTelegramPairingRequest, } from "./pairing-store.js"; +import { resolveTelegramVoiceDecision } from "./voice.js"; const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i; diff --git a/src/telegram/send.ts b/src/telegram/send.ts index b293f7844..7aed6ee31 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -6,11 +6,11 @@ import type { } from "@grammyjs/types"; import { type ApiClientOptions, Bot, InputFile } from "grammy"; import { loadConfig } from "../config/config.js"; +import { logVerbose } from "../globals.js"; import { formatErrorMessage } from "../infra/errors.js"; import { recordProviderActivity } from "../infra/provider-activity.js"; import type { RetryConfig } from "../infra/retry.js"; import { createTelegramRetryRunner } from "../infra/retry-policy.js"; -import { logVerbose } from "../globals.js"; import { mediaKindFromMime } from "../media/constants.js"; import { isGifMedia } from "../media/mime.js"; import { loadWebMedia } from "../web/media.js";