diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 7d923bf6a..04b21cc43 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -17,6 +17,7 @@ import { SettingsManager, } from "@mariozechner/pi-coding-agent"; import { resolveHeartbeatPrompt } from "../auto-reply/heartbeat.js"; +import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js"; import type { ReasoningLevel, ThinkLevel, @@ -28,7 +29,6 @@ import type { ClawdbotConfig } from "../config/config.js"; import { resolveProviderCapabilities } from "../config/provider-capabilities.js"; import { getMachineDisplayName } from "../infra/machine-name.js"; import { createSubsystemLogger } from "../logging.js"; -import { splitMediaFromOutput } from "../media/parse.js"; import { type enqueueCommand, enqueueCommandInLane, @@ -1626,6 +1626,9 @@ export async function runEmbeddedPiAgent(params: { media?: string[]; isError?: boolean; audioAsVoice?: boolean; + replyToId?: string; + replyToTag?: boolean; + replyToCurrent?: boolean; }> = []; const errorText = lastAssistant @@ -1646,12 +1649,18 @@ export async function runEmbeddedPiAgent(params: { text: cleanedText, mediaUrls, audioAsVoice, - } = splitMediaFromOutput(agg); + replyToId, + replyToTag, + replyToCurrent, + } = parseReplyDirectives(agg); if (cleanedText) replyItems.push({ text: cleanedText, media: mediaUrls, audioAsVoice, + replyToId, + replyToTag, + replyToCurrent, }); } } @@ -1675,7 +1684,10 @@ export async function runEmbeddedPiAgent(params: { text: cleanedText, mediaUrls, audioAsVoice, - } = splitMediaFromOutput(text); + replyToId, + replyToTag, + replyToCurrent, + } = parseReplyDirectives(text); if ( !cleanedText && (!mediaUrls || mediaUrls.length === 0) && @@ -1686,6 +1698,9 @@ export async function runEmbeddedPiAgent(params: { text: cleanedText, media: mediaUrls, audioAsVoice, + replyToId, + replyToTag, + replyToCurrent, }); } @@ -1699,6 +1714,9 @@ export async function runEmbeddedPiAgent(params: { mediaUrls: item.media?.length ? item.media : undefined, mediaUrl: item.media?.[0], isError: item.isError, + replyToId: item.replyToId, + replyToTag: item.replyToTag, + replyToCurrent: item.replyToCurrent, // Apply audioAsVoice to media payloads if tag was found anywhere in response audioAsVoice: item.audioAsVoice || (hasAudioAsVoiceTag && item.media?.length), diff --git a/src/agents/pi-embedded-subscribe.ts b/src/agents/pi-embedded-subscribe.ts index 001d579f1..1f2350e11 100644 --- a/src/agents/pi-embedded-subscribe.ts +++ b/src/agents/pi-embedded-subscribe.ts @@ -3,12 +3,12 @@ import path from "node:path"; import type { AgentEvent, AgentMessage } from "@mariozechner/pi-agent-core"; import type { AssistantMessage } from "@mariozechner/pi-ai"; import type { AgentSession } from "@mariozechner/pi-coding-agent"; +import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js"; import type { ReasoningLevel } from "../auto-reply/thinking.js"; import { formatToolAggregate } from "../auto-reply/tool-meta.js"; import { resolveStateDir } from "../config/paths.js"; import { emitAgentEvent } from "../infra/agent-events.js"; import { createSubsystemLogger } from "../logging.js"; -import { splitMediaFromOutput } from "../media/parse.js"; import { truncateUtf16Safe } from "../utils.js"; import type { BlockReplyChunking } from "./pi-embedded-block-chunker.js"; import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js"; @@ -383,7 +383,7 @@ export function subscribeEmbeddedPiSession(params: { const emitToolSummary = (toolName?: string, meta?: string) => { if (!params.onToolResult) return; const agg = formatToolAggregate(toolName, meta ? [meta] : undefined); - const { text: cleanedText, mediaUrls } = splitMediaFromOutput(agg); + const { text: cleanedText, mediaUrls } = parseReplyDirectives(agg); if (!cleanedText && (!mediaUrls || mediaUrls.length === 0)) return; try { void params.onToolResult({ @@ -437,7 +437,7 @@ export function subscribeEmbeddedPiSession(params: { lastBlockReplyText = chunk; assistantTexts.push(chunk); if (!params.onBlockReply) return; - const splitResult = splitMediaFromOutput(chunk); + const splitResult = parseReplyDirectives(chunk); const { text: cleanedText, mediaUrls, audioAsVoice } = splitResult; // Skip empty payloads, but always emit if audioAsVoice is set (to propagate the flag) if (!cleanedText && (!mediaUrls || mediaUrls.length === 0) && !audioAsVoice) @@ -739,7 +739,7 @@ export function subscribeEmbeddedPiSession(params: { if (next && next !== lastStreamedAssistant) { lastStreamedAssistant = next; const { text: cleanedText, mediaUrls } = - splitMediaFromOutput(next); + parseReplyDirectives(next); emitAgentEvent({ runId: params.runId, stream: "assistant", @@ -868,7 +868,7 @@ export function subscribeEmbeddedPiSession(params: { text: cleanedText, mediaUrls, audioAsVoice, - } = splitMediaFromOutput(text); + } = parseReplyDirectives(text); // Emit if there's content OR audioAsVoice flag (to propagate the flag) if ( cleanedText || diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 3391666da..011492db6 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -33,9 +33,8 @@ import { import { stripHeartbeatToken } from "../heartbeat.js"; import type { OriginatingChannelType, TemplateContext } from "../templating.js"; import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js"; -import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js"; +import { SILENT_REPLY_TOKEN } from "../tokens.js"; import type { GetReplyOptions, ReplyPayload } from "../types.js"; -import { parseAudioTag } from "./audio-tags.js"; import { createAudioAsVoiceBuffer, createBlockReplyPipeline, @@ -48,6 +47,7 @@ import { type QueueSettings, scheduleFollowupDrain, } from "./queue.js"; +import { parseReplyDirectives } from "./reply-directives.js"; import { applyReplyTagsToPayload, applyReplyThreading, @@ -550,28 +550,39 @@ export async function runReplyAgent(params: { !payload.audioAsVoice ) return; - const audioTagResult = parseAudioTag(taggedPayload.text); - const cleaned = audioTagResult.text || undefined; + const parsed = parseReplyDirectives( + taggedPayload.text ?? "", + { + currentMessageId: sessionCtx.MessageSid, + silentToken: SILENT_REPLY_TOKEN, + }, + ); + const cleaned = parsed.text || undefined; const hasMedia = Boolean(taggedPayload.mediaUrl) || (taggedPayload.mediaUrls?.length ?? 0) > 0; // Skip empty payloads unless they have audioAsVoice flag (need to track it) - if (!cleaned && !hasMedia && !payload.audioAsVoice) return; if ( - isSilentReplyText(cleaned, SILENT_REPLY_TOKEN) && - !hasMedia + !cleaned && + !hasMedia && + !payload.audioAsVoice && + !parsed.audioAsVoice ) return; + if (parsed.isSilent && !hasMedia) return; const blockPayload: ReplyPayload = applyReplyToMode({ ...taggedPayload, text: cleaned, - audioAsVoice: - audioTagResult.audioAsVoice || payload.audioAsVoice, + audioAsVoice: parsed.audioAsVoice || payload.audioAsVoice, + replyToId: taggedPayload.replyToId ?? parsed.replyToId, + replyToTag: taggedPayload.replyToTag || parsed.replyToTag, + replyToCurrent: + taggedPayload.replyToCurrent || parsed.replyToCurrent, }); void typingSignals - .signalTextDelta(taggedPayload.text) + .signalTextDelta(cleaned ?? taggedPayload.text) .catch((err) => { logVerbose( `block reply typing signal failed: ${String(err)}`, @@ -735,11 +746,21 @@ export async function runReplyAgent(params: { currentMessageId: sessionCtx.MessageSid, }) .map((payload) => { - const audioTagResult = parseAudioTag(payload.text); + const parsed = parseReplyDirectives(payload.text ?? "", { + currentMessageId: sessionCtx.MessageSid, + silentToken: SILENT_REPLY_TOKEN, + }); + const mediaUrls = payload.mediaUrls ?? parsed.mediaUrls; + const mediaUrl = payload.mediaUrl ?? parsed.mediaUrl ?? mediaUrls?.[0]; return { ...payload, - text: audioTagResult.text ? audioTagResult.text : undefined, - audioAsVoice: audioTagResult.audioAsVoice, + text: parsed.text ? parsed.text : undefined, + mediaUrls, + mediaUrl, + replyToId: payload.replyToId ?? parsed.replyToId, + replyToTag: payload.replyToTag || parsed.replyToTag, + replyToCurrent: payload.replyToCurrent || parsed.replyToCurrent, + audioAsVoice: payload.audioAsVoice || parsed.audioAsVoice, }; }) .filter(isRenderablePayload); @@ -775,8 +796,7 @@ export async function runReplyAgent(params: { const shouldSignalTyping = replyPayloads.some((payload) => { const trimmed = payload.text?.trim(); - if (trimmed && !isSilentReplyText(trimmed, SILENT_REPLY_TOKEN)) - return true; + if (trimmed) return true; if (payload.mediaUrl) return true; if (payload.mediaUrls && payload.mediaUrls.length > 0) return true; return false; diff --git a/src/auto-reply/reply/reply-directives.ts b/src/auto-reply/reply/reply-directives.ts new file mode 100644 index 000000000..d1768d4ad --- /dev/null +++ b/src/auto-reply/reply/reply-directives.ts @@ -0,0 +1,49 @@ +import { splitMediaFromOutput } from "../../media/parse.js"; +import { parseInlineDirectives } from "../../utils/directive-tags.js"; +import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js"; + +export type ReplyDirectiveParseResult = { + text: string; + mediaUrls?: string[]; + mediaUrl?: string; + replyToId?: string; + replyToCurrent: boolean; + replyToTag: boolean; + audioAsVoice?: boolean; + isSilent: boolean; +}; + +export function parseReplyDirectives( + raw: string, + options: { currentMessageId?: string; silentToken?: string } = {}, +): ReplyDirectiveParseResult { + const split = splitMediaFromOutput(raw); + let text = split.text ?? ""; + + const replyParsed = parseInlineDirectives(text, { + currentMessageId: options.currentMessageId, + stripAudioTag: false, + stripReplyTags: true, + }); + + if (replyParsed.hasReplyTag) { + text = replyParsed.text; + } + + const silentToken = options.silentToken ?? SILENT_REPLY_TOKEN; + const isSilent = isSilentReplyText(text, silentToken); + if (isSilent) { + text = ""; + } + + return { + text, + mediaUrls: split.mediaUrls, + mediaUrl: split.mediaUrl, + replyToId: replyParsed.replyToId, + replyToCurrent: replyParsed.replyToCurrent, + replyToTag: replyParsed.hasReplyTag, + audioAsVoice: split.audioAsVoice, + isSilent, + }; +} diff --git a/src/auto-reply/reply/reply-payloads.ts b/src/auto-reply/reply/reply-payloads.ts index be5c94698..e1f1707dd 100644 --- a/src/auto-reply/reply/reply-payloads.ts +++ b/src/auto-reply/reply/reply-payloads.ts @@ -10,8 +10,23 @@ export function applyReplyTagsToPayload( payload: ReplyPayload, currentMessageId?: string, ): ReplyPayload { - if (typeof payload.text !== "string") return payload; - const { cleaned, replyToId, hasTag } = extractReplyToTag( + if (typeof payload.text !== "string") { + if (!payload.replyToCurrent || payload.replyToId) return payload; + return { + ...payload, + replyToId: currentMessageId?.trim() || undefined, + }; + } + const shouldParseTags = payload.text.includes("[["); + if (!shouldParseTags) { + if (!payload.replyToCurrent || payload.replyToId) return payload; + return { + ...payload, + replyToId: currentMessageId?.trim() || undefined, + replyToTag: payload.replyToTag ?? true, + }; + } + const { cleaned, replyToId, replyToCurrent, hasTag } = extractReplyToTag( payload.text, currentMessageId, ); @@ -20,6 +35,7 @@ export function applyReplyTagsToPayload( text: cleaned ? cleaned : undefined, replyToId: replyToId ?? payload.replyToId, replyToTag: hasTag || payload.replyToTag, + replyToCurrent: replyToCurrent || payload.replyToCurrent, }; } diff --git a/src/auto-reply/reply/reply-tags.ts b/src/auto-reply/reply/reply-tags.ts index 9e81affcb..f5e416260 100644 --- a/src/auto-reply/reply/reply-tags.ts +++ b/src/auto-reply/reply/reply-tags.ts @@ -1,12 +1,4 @@ -const REPLY_TAG_RE = - /\[\[\s*(?:reply_to_current|reply_to\s*:\s*([^\]\n]+))\s*\]\]/gi; - -function normalizeReplyText(text: string) { - return text - .replace(/[ \t]+/g, " ") - .replace(/[ \t]*\n[ \t]*/g, "\n") - .trim(); -} +import { parseInlineDirectives } from "../../utils/directive-tags.js"; export function extractReplyToTag( text?: string, @@ -14,31 +6,17 @@ export function extractReplyToTag( ): { cleaned: string; replyToId?: string; + replyToCurrent: boolean; hasTag: boolean; } { - if (!text) return { cleaned: "", hasTag: false }; - - let sawCurrent = false; - let lastExplicitId: string | undefined; - let hasTag = false; - - const cleaned = normalizeReplyText( - text.replace(REPLY_TAG_RE, (_full, idRaw: string | undefined) => { - hasTag = true; - if (idRaw === undefined) { - sawCurrent = true; - return " "; - } - - const id = idRaw.trim(); - if (id) lastExplicitId = id; - return " "; - }), - ); - - const replyToId = - lastExplicitId ?? - (sawCurrent ? currentMessageId?.trim() || undefined : undefined); - - return { cleaned, replyToId, hasTag }; + const result = parseInlineDirectives(text, { + currentMessageId, + stripAudioTag: false, + }); + return { + cleaned: result.text, + replyToId: result.replyToId, + replyToCurrent: result.replyToCurrent, + hasTag: result.hasReplyTag, + }; } diff --git a/src/auto-reply/types.ts b/src/auto-reply/types.ts index fb7b8321a..9344276cc 100644 --- a/src/auto-reply/types.ts +++ b/src/auto-reply/types.ts @@ -31,6 +31,8 @@ export type ReplyPayload = { mediaUrls?: string[]; replyToId?: string; replyToTag?: boolean; + /** True when [[reply_to_current]] was present but not yet mapped to a message id. */ + replyToCurrent?: boolean; /** Send audio as voice message (bubble) instead of audio file. Defaults to false. */ audioAsVoice?: boolean; isError?: boolean; diff --git a/src/discord/monitor.ts b/src/discord/monitor.ts index 6dffa3443..b58495768 100644 --- a/src/discord/monitor.ts +++ b/src/discord/monitor.ts @@ -51,7 +51,7 @@ import { formatDurationSeconds } from "../infra/format-duration.js"; import { recordProviderActivity } from "../infra/provider-activity.js"; import { enqueueSystemEvent } from "../infra/system-events.js"; import { getChildLogger } from "../logging.js"; -import { detectMime } from "../media/mime.js"; +import { fetchRemoteMedia } from "../media/fetch.js"; import { saveMediaBuffer } from "../media/store.js"; import { buildPairingReply } from "../pairing/pairing-messages.js"; import { @@ -1879,19 +1879,16 @@ async function resolveMediaList( const out: DiscordMediaInfo[] = []; for (const attachment of attachments) { try { - const res = await fetch(attachment.url); - if (!res.ok) { - throw new Error( - `Failed to download discord attachment: HTTP ${res.status}`, - ); - } - const buffer = Buffer.from(await res.arrayBuffer()); - const mime = await detectMime({ - buffer, - headerMime: attachment.content_type ?? res.headers.get("content-type"), - filePath: attachment.filename ?? attachment.url, + const fetched = await fetchRemoteMedia({ + url: attachment.url, + filePathHint: attachment.filename ?? attachment.url, }); - const saved = await saveMediaBuffer(buffer, mime, "inbound", maxBytes); + const saved = await saveMediaBuffer( + fetched.buffer, + fetched.contentType ?? attachment.content_type, + "inbound", + maxBytes, + ); out.push({ path: saved.path, contentType: saved.contentType, diff --git a/src/media/audio-tags.ts b/src/media/audio-tags.ts index 4ebb0b6fb..51591539a 100644 --- a/src/media/audio-tags.ts +++ b/src/media/audio-tags.ts @@ -1,3 +1,5 @@ +import { parseInlineDirectives } from "../utils/directive-tags.js"; + /** * Extract audio mode tag from text. * Supports [[audio_as_voice]] to send audio as voice bubble instead of file. @@ -8,24 +10,10 @@ export function parseAudioTag(text?: string): { audioAsVoice: boolean; hadTag: boolean; } { - if (!text) return { text: "", audioAsVoice: false, hadTag: false }; - let cleaned = text; - let audioAsVoice = false; // default: audio file (backward compatible) - let hadTag = false; - - // [[audio_as_voice]] -> send as voice bubble (opt-in) - const voiceMatch = cleaned.match(/\[\[audio_as_voice\]\]/i); - if (voiceMatch) { - cleaned = cleaned.replace(/\[\[audio_as_voice\]\]/gi, " "); - audioAsVoice = true; - hadTag = true; - } - - // Clean up whitespace - cleaned = cleaned - .replace(/[ \t]+/g, " ") - .replace(/[ \t]*\n[ \t]*/g, "\n") - .trim(); - - return { text: cleaned, audioAsVoice, hadTag }; + const result = parseInlineDirectives(text, { stripReplyTags: false }); + return { + text: result.text, + audioAsVoice: result.audioAsVoice, + hadTag: result.hasAudioTag, + }; } diff --git a/src/media/audio.ts b/src/media/audio.ts new file mode 100644 index 000000000..f1341740e --- /dev/null +++ b/src/media/audio.ts @@ -0,0 +1,18 @@ +import { getFileExtension } from "./mime.js"; + +const VOICE_AUDIO_EXTENSIONS = new Set([".oga", ".ogg", ".opus"]); + +export function isVoiceCompatibleAudio(opts: { + contentType?: string | null; + fileName?: string | null; +}): boolean { + const mime = opts.contentType?.toLowerCase(); + if (mime && (mime.includes("ogg") || mime.includes("opus"))) { + return true; + } + const fileName = opts.fileName?.trim(); + if (!fileName) return false; + const ext = getFileExtension(fileName); + if (!ext) return false; + return VOICE_AUDIO_EXTENSIONS.has(ext); +} diff --git a/src/msteams/attachments.ts b/src/msteams/attachments.ts index bb1b63f77..dab2addc6 100644 --- a/src/msteams/attachments.ts +++ b/src/msteams/attachments.ts @@ -1,3 +1,4 @@ +import { fetchRemoteMedia } from "../media/fetch.js"; import { detectMime } from "../media/mime.js"; import { saveMediaBuffer } from "../media/store.js"; @@ -740,23 +741,28 @@ export async function downloadMSTeamsImageAttachments(params: { for (const candidate of candidates) { if (!isUrlAllowed(candidate.url, allowHosts)) continue; try { - const res = await fetchWithAuthFallback({ + const fetchImpl: typeof fetch = (input) => { + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.toString() + : input.url; + return fetchWithAuthFallback({ + url, + tokenProvider: params.tokenProvider, + fetchFn: params.fetchFn, + }); + }; + const fetched = await fetchRemoteMedia({ url: candidate.url, - tokenProvider: params.tokenProvider, - fetchFn: params.fetchFn, - }); - if (!res.ok) continue; - const buffer = Buffer.from(await res.arrayBuffer()); - if (buffer.byteLength > params.maxBytes) continue; - const mime = await detectMime({ - buffer, - headerMime: - candidate.contentTypeHint ?? res.headers.get("content-type"), - filePath: candidate.fileHint ?? candidate.url, + fetchImpl, + filePathHint: candidate.fileHint, }); + if (fetched.buffer.byteLength > params.maxBytes) continue; const saved = await saveMediaBuffer( - buffer, - mime, + fetched.buffer, + fetched.contentType ?? candidate.contentTypeHint, "inbound", params.maxBytes, ); diff --git a/src/slack/monitor.ts b/src/slack/monitor.ts index d95443b3c..62278e899 100644 --- a/src/slack/monitor.ts +++ b/src/slack/monitor.ts @@ -45,7 +45,7 @@ import { import { danger, logVerbose, shouldLogVerbose } from "../globals.js"; import { enqueueSystemEvent } from "../infra/system-events.js"; import { getChildLogger } from "../logging.js"; -import { detectMime } from "../media/mime.js"; +import { fetchRemoteMedia } from "../media/fetch.js"; import { saveMediaBuffer } from "../media/store.js"; import { buildPairingReply } from "../pairing/pairing-messages.js"; import { @@ -355,27 +355,28 @@ async function resolveSlackMedia(params: { const url = file.url_private_download ?? file.url_private; if (!url) continue; try { - const res = await fetch(url, { - headers: { Authorization: `Bearer ${params.token}` }, - }); - if (!res.ok) continue; - const buffer = Buffer.from(await res.arrayBuffer()); - if (buffer.byteLength > params.maxBytes) continue; - const contentType = await detectMime({ - buffer, - headerMime: res.headers.get("content-type"), - filePath: file.name, + const fetchImpl: typeof fetch = (input, init) => { + const headers = new Headers(init?.headers); + headers.set("Authorization", `Bearer ${params.token}`); + return fetch(input, { ...init, headers }); + }; + const fetched = await fetchRemoteMedia({ + url, + fetchImpl, + filePathHint: file.name, }); + if (fetched.buffer.byteLength > params.maxBytes) continue; const saved = await saveMediaBuffer( - buffer, - contentType ?? file.mimetype, + fetched.buffer, + fetched.contentType ?? file.mimetype, "inbound", params.maxBytes, ); + const label = fetched.fileName ?? file.name; return { path: saved.path, contentType: saved.contentType, - placeholder: file.name ? `[Slack file: ${file.name}]` : "[Slack file]", + placeholder: label ? `[Slack file: ${label}]` : "[Slack file]", }; } catch { // Ignore download failures and fall through to the next file. diff --git a/src/telegram/voice.ts b/src/telegram/voice.ts index c78ca4fee..bf296c8f1 100644 --- a/src/telegram/voice.ts +++ b/src/telegram/voice.ts @@ -1,18 +1,10 @@ -import { getFileExtension } from "../media/mime.js"; +import { isVoiceCompatibleAudio } from "../media/audio.js"; export function isTelegramVoiceCompatible(opts: { contentType?: string | null; fileName?: string | null; }): boolean { - const mime = opts.contentType?.toLowerCase(); - if (mime && (mime.includes("ogg") || mime.includes("opus"))) { - return true; - } - const fileName = opts.fileName?.trim(); - if (!fileName) return false; - const ext = getFileExtension(fileName); - if (!ext) return false; - return ext === ".ogg" || ext === ".opus" || ext === ".oga"; + return isVoiceCompatibleAudio(opts); } export function resolveTelegramVoiceDecision(opts: { diff --git a/src/utils/directive-tags.ts b/src/utils/directive-tags.ts new file mode 100644 index 000000000..4f858c4ba --- /dev/null +++ b/src/utils/directive-tags.ts @@ -0,0 +1,87 @@ +export type InlineDirectiveParseResult = { + text: string; + audioAsVoice: boolean; + replyToId?: string; + replyToCurrent: boolean; + hasAudioTag: boolean; + hasReplyTag: boolean; +}; + +type InlineDirectiveParseOptions = { + currentMessageId?: string; + stripAudioTag?: boolean; + stripReplyTags?: boolean; +}; + +const AUDIO_TAG_RE = /\[\[\s*audio_as_voice\s*\]\]/gi; +const REPLY_TAG_RE = + /\[\[\s*(?:reply_to_current|reply_to\s*:\s*([^\]\n]+))\s*\]\]/gi; + +function normalizeDirectiveWhitespace(text: string): string { + return text + .replace(/[ \t]+/g, " ") + .replace(/[ \t]*\n[ \t]*/g, "\n") + .trim(); +} + +export function parseInlineDirectives( + text?: string, + options: InlineDirectiveParseOptions = {}, +): InlineDirectiveParseResult { + const { + currentMessageId, + stripAudioTag = true, + stripReplyTags = true, + } = options; + if (!text) { + return { + text: "", + audioAsVoice: false, + replyToCurrent: false, + hasAudioTag: false, + hasReplyTag: false, + }; + } + + let cleaned = text; + let audioAsVoice = false; + let hasAudioTag = false; + let hasReplyTag = false; + let sawCurrent = false; + let lastExplicitId: string | undefined; + + cleaned = cleaned.replace(AUDIO_TAG_RE, (match) => { + audioAsVoice = true; + hasAudioTag = true; + return stripAudioTag ? " " : match; + }); + + cleaned = cleaned.replace( + REPLY_TAG_RE, + (match, idRaw: string | undefined) => { + hasReplyTag = true; + if (idRaw === undefined) { + sawCurrent = true; + } else { + const id = idRaw.trim(); + if (id) lastExplicitId = id; + } + return stripReplyTags ? " " : match; + }, + ); + + cleaned = normalizeDirectiveWhitespace(cleaned); + + const replyToId = + lastExplicitId ?? + (sawCurrent ? currentMessageId?.trim() || undefined : undefined); + + return { + text: cleaned, + audioAsVoice, + replyToId, + replyToCurrent: sawCurrent, + hasAudioTag, + hasReplyTag, + }; +} diff --git a/src/web/media.ts b/src/web/media.ts index 59bc52e0a..ed8860128 100644 --- a/src/web/media.ts +++ b/src/web/media.ts @@ -54,43 +54,60 @@ async function loadWebMediaInternal( }; }; - if (/^https?:\/\//i.test(mediaUrl)) { - const fetched = await fetchRemoteMedia({ url: mediaUrl }); - const { buffer, contentType, fileName } = fetched; - const kind = mediaKindFromMime(contentType); + const clampAndFinalize = async (params: { + buffer: Buffer; + contentType?: string; + kind: MediaKind; + fileName?: string; + }): Promise => { const cap = Math.min( - maxBytes ?? maxBytesForKind(kind), - maxBytesForKind(kind), + maxBytes ?? maxBytesForKind(params.kind), + maxBytesForKind(params.kind), ); - if (kind === "image") { - // Skip optimization for GIFs to preserve animation. - if (contentType === "image/gif" || !optimizeImages) { - if (buffer.length > cap) { + if (params.kind === "image") { + const isGif = params.contentType === "image/gif"; + if (isGif || !optimizeImages) { + if (params.buffer.length > cap) { throw new Error( `${ - contentType === "image/gif" ? "GIF" : "Media" + isGif ? "GIF" : "Media" } exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${( - buffer.length / (1024 * 1024) + params.buffer.length / (1024 * 1024) ).toFixed(2)}MB)`, ); } - return { buffer, contentType, kind, fileName }; + return { + buffer: params.buffer, + contentType: params.contentType, + kind: params.kind, + fileName: params.fileName, + }; } - return { ...(await optimizeAndClampImage(buffer, cap)), fileName }; + return { + ...(await optimizeAndClampImage(params.buffer, cap)), + fileName: params.fileName, + }; } - if (buffer.length > cap) { + if (params.buffer.length > cap) { throw new Error( `Media exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${( - buffer.length / (1024 * 1024) + params.buffer.length / (1024 * 1024) ).toFixed(2)}MB)`, ); } return { - buffer, - contentType: contentType ?? undefined, - kind, - fileName, + buffer: params.buffer, + contentType: params.contentType ?? undefined, + kind: params.kind, + fileName: params.fileName, }; + }; + + if (/^https?:\/\//i.test(mediaUrl)) { + const fetched = await fetchRemoteMedia({ url: mediaUrl }); + const { buffer, contentType, fileName } = fetched; + const kind = mediaKindFromMime(contentType); + return await clampAndFinalize({ buffer, contentType, kind, fileName }); } // Local path @@ -102,34 +119,12 @@ async function loadWebMediaInternal( const ext = extensionForMime(mime); if (ext) fileName = `${fileName}${ext}`; } - const cap = Math.min( - maxBytes ?? maxBytesForKind(kind), - maxBytesForKind(kind), - ); - if (kind === "image") { - // Skip optimization for GIFs to preserve animation. - if (mime === "image/gif" || !optimizeImages) { - if (data.length > cap) { - throw new Error( - `${ - mime === "image/gif" ? "GIF" : "Media" - } exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${( - data.length / (1024 * 1024) - ).toFixed(2)}MB)`, - ); - } - return { buffer: data, contentType: mime, kind, fileName }; - } - return { ...(await optimizeAndClampImage(data, cap)), fileName }; - } - if (data.length > cap) { - throw new Error( - `Media exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${( - data.length / (1024 * 1024) - ).toFixed(2)}MB)`, - ); - } - return { buffer: data, contentType: mime, kind, fileName }; + return await clampAndFinalize({ + buffer: data, + contentType: mime, + kind, + fileName, + }); } export async function loadWebMedia(