refactor: unify inline directives and media fetch

2026-01-10 03:01:04 +01:00
parent 4075895c4c
commit f28a4a34ad
15 changed files with 345 additions and 178 deletions
--- a/src/agents/pi-embedded-runner.ts
+++ b/src/agents/pi-embedded-runner.ts
@@ -17,6 +17,7 @@ import {
  SettingsManager,
 } from "@mariozechner/pi-coding-agent";
 import { resolveHeartbeatPrompt } from "../auto-reply/heartbeat.js";
 import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js";
 import type {
  ReasoningLevel,
  ThinkLevel,
@@ -28,7 +29,6 @@ import type { ClawdbotConfig } from "../config/config.js";
 import { resolveProviderCapabilities } from "../config/provider-capabilities.js";
 import { getMachineDisplayName } from "../infra/machine-name.js";
 import { createSubsystemLogger } from "../logging.js";
 import { splitMediaFromOutput } from "../media/parse.js";
 import {
  type enqueueCommand,
  enqueueCommandInLane,
@@ -1626,6 +1626,9 @@ export async function runEmbeddedPiAgent(params: {
            media?: string[];
            isError?: boolean;
            audioAsVoice?: boolean;
            replyToId?: string;
            replyToTag?: boolean;
            replyToCurrent?: boolean;
          }> = [];
          const errorText = lastAssistant
@@ -1646,12 +1649,18 @@ export async function runEmbeddedPiAgent(params: {
                text: cleanedText,
                mediaUrls,
                audioAsVoice,
-              } = splitMediaFromOutput(agg);
+                replyToId,
                replyToTag,
                replyToCurrent,
              } = parseReplyDirectives(agg);
              if (cleanedText)
                replyItems.push({
                  text: cleanedText,
                  media: mediaUrls,
                  audioAsVoice,
                  replyToId,
                  replyToTag,
                  replyToCurrent,
                });
            }
          }
@@ -1675,7 +1684,10 @@ export async function runEmbeddedPiAgent(params: {
              text: cleanedText,
              mediaUrls,
              audioAsVoice,
-            } = splitMediaFromOutput(text);
+              replyToId,
              replyToTag,
              replyToCurrent,
            } = parseReplyDirectives(text);
            if (
              !cleanedText &&
              (!mediaUrls || mediaUrls.length === 0) &&
@@ -1686,6 +1698,9 @@ export async function runEmbeddedPiAgent(params: {
              text: cleanedText,
              media: mediaUrls,
              audioAsVoice,
              replyToId,
              replyToTag,
              replyToCurrent,
            });
          }
@@ -1699,6 +1714,9 @@ export async function runEmbeddedPiAgent(params: {
              mediaUrls: item.media?.length ? item.media : undefined,
              mediaUrl: item.media?.[0],
              isError: item.isError,
              replyToId: item.replyToId,
              replyToTag: item.replyToTag,
              replyToCurrent: item.replyToCurrent,
              // Apply audioAsVoice to media payloads if tag was found anywhere in response
              audioAsVoice:
                item.audioAsVoice || (hasAudioAsVoiceTag && item.media?.length),
--- a/src/agents/pi-embedded-subscribe.ts
+++ b/src/agents/pi-embedded-subscribe.ts
@@ -3,12 +3,12 @@ import path from "node:path";
 import type { AgentEvent, AgentMessage } from "@mariozechner/pi-agent-core";
 import type { AssistantMessage } from "@mariozechner/pi-ai";
 import type { AgentSession } from "@mariozechner/pi-coding-agent";
 import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js";
 import type { ReasoningLevel } from "../auto-reply/thinking.js";
 import { formatToolAggregate } from "../auto-reply/tool-meta.js";
 import { resolveStateDir } from "../config/paths.js";
 import { emitAgentEvent } from "../infra/agent-events.js";
 import { createSubsystemLogger } from "../logging.js";
 import { splitMediaFromOutput } from "../media/parse.js";
 import { truncateUtf16Safe } from "../utils.js";
 import type { BlockReplyChunking } from "./pi-embedded-block-chunker.js";
 import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";
@@ -383,7 +383,7 @@ export function subscribeEmbeddedPiSession(params: {
  const emitToolSummary = (toolName?: string, meta?: string) => {
    if (!params.onToolResult) return;
    const agg = formatToolAggregate(toolName, meta ? [meta] : undefined);
-    const { text: cleanedText, mediaUrls } = splitMediaFromOutput(agg);
+    const { text: cleanedText, mediaUrls } = parseReplyDirectives(agg);
    if (!cleanedText && (!mediaUrls || mediaUrls.length === 0)) return;
    try {
      void params.onToolResult({
@@ -437,7 +437,7 @@ export function subscribeEmbeddedPiSession(params: {
    lastBlockReplyText = chunk;
    assistantTexts.push(chunk);
    if (!params.onBlockReply) return;
-    const splitResult = splitMediaFromOutput(chunk);
+    const splitResult = parseReplyDirectives(chunk);
    const { text: cleanedText, mediaUrls, audioAsVoice } = splitResult;
    // Skip empty payloads, but always emit if audioAsVoice is set (to propagate the flag)
    if (!cleanedText && (!mediaUrls || mediaUrls.length === 0) && !audioAsVoice)
@@ -739,7 +739,7 @@ export function subscribeEmbeddedPiSession(params: {
            if (next && next !== lastStreamedAssistant) {
              lastStreamedAssistant = next;
              const { text: cleanedText, mediaUrls } =
-                splitMediaFromOutput(next);
+                parseReplyDirectives(next);
              emitAgentEvent({
                runId: params.runId,
                stream: "assistant",
@@ -868,7 +868,7 @@ export function subscribeEmbeddedPiSession(params: {
                  text: cleanedText,
                  mediaUrls,
                  audioAsVoice,
-                } = splitMediaFromOutput(text);
+                } = parseReplyDirectives(text);
                // Emit if there's content OR audioAsVoice flag (to propagate the flag)
                if (
                  cleanedText ||
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@@ -33,9 +33,8 @@ import {
 import { stripHeartbeatToken } from "../heartbeat.js";
 import type { OriginatingChannelType, TemplateContext } from "../templating.js";
 import { normalizeVerboseLevel, type VerboseLevel } from "../thinking.js";
-import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js";
+import { SILENT_REPLY_TOKEN } from "../tokens.js";
 import type { GetReplyOptions, ReplyPayload } from "../types.js";
 import { parseAudioTag } from "./audio-tags.js";
 import {
  createAudioAsVoiceBuffer,
  createBlockReplyPipeline,
@@ -48,6 +47,7 @@ import {
  type QueueSettings,
  scheduleFollowupDrain,
 } from "./queue.js";
 import { parseReplyDirectives } from "./reply-directives.js";
 import {
  applyReplyTagsToPayload,
  applyReplyThreading,
@@ -550,28 +550,39 @@ export async function runReplyAgent(params: {
                      !payload.audioAsVoice
                    )
                      return;
-                    const audioTagResult = parseAudioTag(taggedPayload.text);
+                    const parsed = parseReplyDirectives(
-                    const cleaned = audioTagResult.text || undefined;
+                      taggedPayload.text ?? "",
                      {
                        currentMessageId: sessionCtx.MessageSid,
                        silentToken: SILENT_REPLY_TOKEN,
                      },
                    );
                    const cleaned = parsed.text || undefined;
                    const hasMedia =
                      Boolean(taggedPayload.mediaUrl) ||
                      (taggedPayload.mediaUrls?.length ?? 0) > 0;
                    // Skip empty payloads unless they have audioAsVoice flag (need to track it)
                    if (!cleaned && !hasMedia && !payload.audioAsVoice) return;
                    if (
-                      isSilentReplyText(cleaned, SILENT_REPLY_TOKEN) &&
+                      !cleaned &&
-                      !hasMedia
+                      !hasMedia &&
                      !payload.audioAsVoice &&
                      !parsed.audioAsVoice
                    )
                      return;
                    if (parsed.isSilent && !hasMedia) return;
                    const blockPayload: ReplyPayload = applyReplyToMode({
                      ...taggedPayload,
                      text: cleaned,
-                      audioAsVoice:
+                      audioAsVoice: parsed.audioAsVoice || payload.audioAsVoice,
-                        audioTagResult.audioAsVoice || payload.audioAsVoice,
+                      replyToId: taggedPayload.replyToId ?? parsed.replyToId,
                      replyToTag: taggedPayload.replyToTag || parsed.replyToTag,
                      replyToCurrent:
                        taggedPayload.replyToCurrent || parsed.replyToCurrent,
                    });
                    void typingSignals
-                      .signalTextDelta(taggedPayload.text)
+                      .signalTextDelta(cleaned ?? taggedPayload.text)
                      .catch((err) => {
                        logVerbose(
                          `block reply typing signal failed: ${String(err)}`,
@@ -735,11 +746,21 @@ export async function runReplyAgent(params: {
      currentMessageId: sessionCtx.MessageSid,
    })
      .map((payload) => {
-        const audioTagResult = parseAudioTag(payload.text);
+        const parsed = parseReplyDirectives(payload.text ?? "", {
          currentMessageId: sessionCtx.MessageSid,
          silentToken: SILENT_REPLY_TOKEN,
        });
        const mediaUrls = payload.mediaUrls ?? parsed.mediaUrls;
        const mediaUrl = payload.mediaUrl ?? parsed.mediaUrl ?? mediaUrls?.[0];
        return {
          ...payload,
-          text: audioTagResult.text ? audioTagResult.text : undefined,
+          text: parsed.text ? parsed.text : undefined,
-          audioAsVoice: audioTagResult.audioAsVoice,
+          mediaUrls,
          mediaUrl,
          replyToId: payload.replyToId ?? parsed.replyToId,
          replyToTag: payload.replyToTag || parsed.replyToTag,
          replyToCurrent: payload.replyToCurrent || parsed.replyToCurrent,
          audioAsVoice: payload.audioAsVoice || parsed.audioAsVoice,
        };
      })
      .filter(isRenderablePayload);
@@ -775,8 +796,7 @@ export async function runReplyAgent(params: {
    const shouldSignalTyping = replyPayloads.some((payload) => {
      const trimmed = payload.text?.trim();
-      if (trimmed && !isSilentReplyText(trimmed, SILENT_REPLY_TOKEN))
+      if (trimmed) return true;
        return true;
      if (payload.mediaUrl) return true;
      if (payload.mediaUrls && payload.mediaUrls.length > 0) return true;
      return false;
--- a/src/auto-reply/reply/reply-directives.ts
+++ b/src/auto-reply/reply/reply-directives.ts
@@ -0,0 +1,49 @@
 import { splitMediaFromOutput } from "../../media/parse.js";
 import { parseInlineDirectives } from "../../utils/directive-tags.js";
 import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js";
 export type ReplyDirectiveParseResult = {
  text: string;
  mediaUrls?: string[];
  mediaUrl?: string;
  replyToId?: string;
  replyToCurrent: boolean;
  replyToTag: boolean;
  audioAsVoice?: boolean;
  isSilent: boolean;
 };
 export function parseReplyDirectives(
  raw: string,
  options: { currentMessageId?: string; silentToken?: string } = {},
 ): ReplyDirectiveParseResult {
  const split = splitMediaFromOutput(raw);
  let text = split.text ?? "";
  const replyParsed = parseInlineDirectives(text, {
    currentMessageId: options.currentMessageId,
    stripAudioTag: false,
    stripReplyTags: true,
  });
  if (replyParsed.hasReplyTag) {
    text = replyParsed.text;
  }
  const silentToken = options.silentToken ?? SILENT_REPLY_TOKEN;
  const isSilent = isSilentReplyText(text, silentToken);
  if (isSilent) {
    text = "";
  }
  return {
    text,
    mediaUrls: split.mediaUrls,
    mediaUrl: split.mediaUrl,
    replyToId: replyParsed.replyToId,
    replyToCurrent: replyParsed.replyToCurrent,
    replyToTag: replyParsed.hasReplyTag,
    audioAsVoice: split.audioAsVoice,
    isSilent,
  };
 }
--- a/src/auto-reply/reply/reply-payloads.ts
+++ b/src/auto-reply/reply/reply-payloads.ts
@@ -10,8 +10,23 @@ export function applyReplyTagsToPayload(
  payload: ReplyPayload,
  currentMessageId?: string,
 ): ReplyPayload {
-  if (typeof payload.text !== "string") return payload;
+  if (typeof payload.text !== "string") {
-  const { cleaned, replyToId, hasTag } = extractReplyToTag(
+    if (!payload.replyToCurrent || payload.replyToId) return payload;
    return {
      ...payload,
      replyToId: currentMessageId?.trim() || undefined,
    };
  }
  const shouldParseTags = payload.text.includes("[[");
  if (!shouldParseTags) {
    if (!payload.replyToCurrent || payload.replyToId) return payload;
    return {
      ...payload,
      replyToId: currentMessageId?.trim() || undefined,
      replyToTag: payload.replyToTag ?? true,
    };
  }
  const { cleaned, replyToId, replyToCurrent, hasTag } = extractReplyToTag(
    payload.text,
    currentMessageId,
  );
@@ -20,6 +35,7 @@ export function applyReplyTagsToPayload(
    text: cleaned ? cleaned : undefined,
    replyToId: replyToId ?? payload.replyToId,
    replyToTag: hasTag || payload.replyToTag,
    replyToCurrent: replyToCurrent || payload.replyToCurrent,
  };
 }
--- a/src/auto-reply/reply/reply-tags.ts
+++ b/src/auto-reply/reply/reply-tags.ts
@@ -1,12 +1,4 @@
-const REPLY_TAG_RE =
+import { parseInlineDirectives } from "../../utils/directive-tags.js";
  /\[\[\s*(?:reply_to_current|reply_to\s*:\s*([^\]\n]+))\s*\]\]/gi;
 function normalizeReplyText(text: string) {
  return text
    .replace(/[ \t]+/g, " ")
    .replace(/[ \t]*\n[ \t]*/g, "\n")
    .trim();
 }
 export function extractReplyToTag(
  text?: string,
@@ -14,31 +6,17 @@ export function extractReplyToTag(
 ): {
  cleaned: string;
  replyToId?: string;
  replyToCurrent: boolean;
  hasTag: boolean;
 } {
-  if (!text) return { cleaned: "", hasTag: false };
+  const result = parseInlineDirectives(text, {
-
+    currentMessageId,
-  let sawCurrent = false;
+    stripAudioTag: false,
-  let lastExplicitId: string | undefined;
+  });
-  let hasTag = false;
+  return {
-
+    cleaned: result.text,
-  const cleaned = normalizeReplyText(
+    replyToId: result.replyToId,
-    text.replace(REPLY_TAG_RE, (_full, idRaw: string | undefined) => {
+    replyToCurrent: result.replyToCurrent,
-      hasTag = true;
+    hasTag: result.hasReplyTag,
-      if (idRaw === undefined) {
+  };
        sawCurrent = true;
        return " ";
      }
      const id = idRaw.trim();
      if (id) lastExplicitId = id;
      return " ";
    }),
  );
  const replyToId =
    lastExplicitId ??
    (sawCurrent ? currentMessageId?.trim() || undefined : undefined);
  return { cleaned, replyToId, hasTag };
 }
--- a/src/auto-reply/types.ts
+++ b/src/auto-reply/types.ts
@@ -31,6 +31,8 @@ export type ReplyPayload = {
  mediaUrls?: string[];
  replyToId?: string;
  replyToTag?: boolean;
  /** True when [[reply_to_current]] was present but not yet mapped to a message id. */
  replyToCurrent?: boolean;
  /** Send audio as voice message (bubble) instead of audio file. Defaults to false. */
  audioAsVoice?: boolean;
  isError?: boolean;
--- a/src/discord/monitor.ts
+++ b/src/discord/monitor.ts
@@ -51,7 +51,7 @@ import { formatDurationSeconds } from "../infra/format-duration.js";
 import { recordProviderActivity } from "../infra/provider-activity.js";
 import { enqueueSystemEvent } from "../infra/system-events.js";
 import { getChildLogger } from "../logging.js";
-import { detectMime } from "../media/mime.js";
+import { fetchRemoteMedia } from "../media/fetch.js";
 import { saveMediaBuffer } from "../media/store.js";
 import { buildPairingReply } from "../pairing/pairing-messages.js";
 import {
@@ -1879,19 +1879,16 @@ async function resolveMediaList(
  const out: DiscordMediaInfo[] = [];
  for (const attachment of attachments) {
    try {
-      const res = await fetch(attachment.url);
+      const fetched = await fetchRemoteMedia({
-      if (!res.ok) {
+        url: attachment.url,
-        throw new Error(
+        filePathHint: attachment.filename ?? attachment.url,
          `Failed to download discord attachment: HTTP ${res.status}`,
        );
      }
      const buffer = Buffer.from(await res.arrayBuffer());
      const mime = await detectMime({
        buffer,
        headerMime: attachment.content_type ?? res.headers.get("content-type"),
        filePath: attachment.filename ?? attachment.url,
      });
-      const saved = await saveMediaBuffer(buffer, mime, "inbound", maxBytes);
+      const saved = await saveMediaBuffer(
        fetched.buffer,
        fetched.contentType ?? attachment.content_type,
        "inbound",
        maxBytes,
      );
      out.push({
        path: saved.path,
        contentType: saved.contentType,
--- a/src/media/audio-tags.ts
+++ b/src/media/audio-tags.ts
@@ -1,3 +1,5 @@
 import { parseInlineDirectives } from "../utils/directive-tags.js";
 /**
 * Extract audio mode tag from text.
 * Supports [[audio_as_voice]] to send audio as voice bubble instead of file.
@@ -8,24 +10,10 @@ export function parseAudioTag(text?: string): {
  audioAsVoice: boolean;
  hadTag: boolean;
 } {
-  if (!text) return { text: "", audioAsVoice: false, hadTag: false };
+  const result = parseInlineDirectives(text, { stripReplyTags: false });
-  let cleaned = text;
+  return {
-  let audioAsVoice = false; // default: audio file (backward compatible)
+    text: result.text,
-  let hadTag = false;
+    audioAsVoice: result.audioAsVoice,
-
+    hadTag: result.hasAudioTag,
-  // [[audio_as_voice]] -> send as voice bubble (opt-in)
+  };
  const voiceMatch = cleaned.match(/\[\[audio_as_voice\]\]/i);
  if (voiceMatch) {
    cleaned = cleaned.replace(/\[\[audio_as_voice\]\]/gi, " ");
    audioAsVoice = true;
    hadTag = true;
  }
  // Clean up whitespace
  cleaned = cleaned
    .replace(/[ \t]+/g, " ")
    .replace(/[ \t]*\n[ \t]*/g, "\n")
    .trim();
  return { text: cleaned, audioAsVoice, hadTag };
 }
--- a/src/media/audio.ts
+++ b/src/media/audio.ts
@@ -0,0 +1,18 @@
 import { getFileExtension } from "./mime.js";
 const VOICE_AUDIO_EXTENSIONS = new Set([".oga", ".ogg", ".opus"]);
 export function isVoiceCompatibleAudio(opts: {
  contentType?: string | null;
  fileName?: string | null;
 }): boolean {
  const mime = opts.contentType?.toLowerCase();
  if (mime && (mime.includes("ogg") || mime.includes("opus"))) {
    return true;
  }
  const fileName = opts.fileName?.trim();
  if (!fileName) return false;
  const ext = getFileExtension(fileName);
  if (!ext) return false;
  return VOICE_AUDIO_EXTENSIONS.has(ext);
 }
--- a/src/msteams/attachments.ts
+++ b/src/msteams/attachments.ts
@@ -1,3 +1,4 @@
 import { fetchRemoteMedia } from "../media/fetch.js";
 import { detectMime } from "../media/mime.js";
 import { saveMediaBuffer } from "../media/store.js";
@@ -740,23 +741,28 @@ export async function downloadMSTeamsImageAttachments(params: {
  for (const candidate of candidates) {
    if (!isUrlAllowed(candidate.url, allowHosts)) continue;
    try {
-      const res = await fetchWithAuthFallback({
+      const fetchImpl: typeof fetch = (input) => {
        const url =
          typeof input === "string"
            ? input
            : input instanceof URL
              ? input.toString()
              : input.url;
        return fetchWithAuthFallback({
          url,
          tokenProvider: params.tokenProvider,
          fetchFn: params.fetchFn,
        });
      };
      const fetched = await fetchRemoteMedia({
        url: candidate.url,
-        tokenProvider: params.tokenProvider,
+        fetchImpl,
-        fetchFn: params.fetchFn,
+        filePathHint: candidate.fileHint,
      });
      if (!res.ok) continue;
      const buffer = Buffer.from(await res.arrayBuffer());
      if (buffer.byteLength > params.maxBytes) continue;
      const mime = await detectMime({
        buffer,
        headerMime:
          candidate.contentTypeHint ?? res.headers.get("content-type"),
        filePath: candidate.fileHint ?? candidate.url,
      });
      if (fetched.buffer.byteLength > params.maxBytes) continue;
      const saved = await saveMediaBuffer(
-        buffer,
+        fetched.buffer,
-        mime,
+        fetched.contentType ?? candidate.contentTypeHint,
        "inbound",
        params.maxBytes,
      );
--- a/src/slack/monitor.ts
+++ b/src/slack/monitor.ts
@@ -45,7 +45,7 @@ import {
 import { danger, logVerbose, shouldLogVerbose } from "../globals.js";
 import { enqueueSystemEvent } from "../infra/system-events.js";
 import { getChildLogger } from "../logging.js";
-import { detectMime } from "../media/mime.js";
+import { fetchRemoteMedia } from "../media/fetch.js";
 import { saveMediaBuffer } from "../media/store.js";
 import { buildPairingReply } from "../pairing/pairing-messages.js";
 import {
@@ -355,27 +355,28 @@ async function resolveSlackMedia(params: {
    const url = file.url_private_download ?? file.url_private;
    if (!url) continue;
    try {
-      const res = await fetch(url, {
+      const fetchImpl: typeof fetch = (input, init) => {
-        headers: { Authorization: `Bearer ${params.token}` },
+        const headers = new Headers(init?.headers);
-      });
+        headers.set("Authorization", `Bearer ${params.token}`);
-      if (!res.ok) continue;
+        return fetch(input, { ...init, headers });
-      const buffer = Buffer.from(await res.arrayBuffer());
+      };
-      if (buffer.byteLength > params.maxBytes) continue;
+      const fetched = await fetchRemoteMedia({
-      const contentType = await detectMime({
+        url,
-        buffer,
+        fetchImpl,
-        headerMime: res.headers.get("content-type"),
+        filePathHint: file.name,
        filePath: file.name,
      });
      if (fetched.buffer.byteLength > params.maxBytes) continue;
      const saved = await saveMediaBuffer(
-        buffer,
+        fetched.buffer,
-        contentType ?? file.mimetype,
+        fetched.contentType ?? file.mimetype,
        "inbound",
        params.maxBytes,
      );
      const label = fetched.fileName ?? file.name;
      return {
        path: saved.path,
        contentType: saved.contentType,
-        placeholder: file.name ? `[Slack file: ${file.name}]` : "[Slack file]",
+        placeholder: label ? `[Slack file: ${label}]` : "[Slack file]",
      };
    } catch {
      // Ignore download failures and fall through to the next file.
--- a/src/telegram/voice.ts
+++ b/src/telegram/voice.ts
@@ -1,18 +1,10 @@
-import { getFileExtension } from "../media/mime.js";
+import { isVoiceCompatibleAudio } from "../media/audio.js";
 export function isTelegramVoiceCompatible(opts: {
  contentType?: string | null;
  fileName?: string | null;
 }): boolean {
-  const mime = opts.contentType?.toLowerCase();
+  return isVoiceCompatibleAudio(opts);
  if (mime && (mime.includes("ogg") || mime.includes("opus"))) {
    return true;
  }
  const fileName = opts.fileName?.trim();
  if (!fileName) return false;
  const ext = getFileExtension(fileName);
  if (!ext) return false;
  return ext === ".ogg" || ext === ".opus" || ext === ".oga";
 }
 export function resolveTelegramVoiceDecision(opts: {
--- a/src/utils/directive-tags.ts
+++ b/src/utils/directive-tags.ts
@@ -0,0 +1,87 @@
 export type InlineDirectiveParseResult = {
  text: string;
  audioAsVoice: boolean;
  replyToId?: string;
  replyToCurrent: boolean;
  hasAudioTag: boolean;
  hasReplyTag: boolean;
 };
 type InlineDirectiveParseOptions = {
  currentMessageId?: string;
  stripAudioTag?: boolean;
  stripReplyTags?: boolean;
 };
 const AUDIO_TAG_RE = /\[\[\s*audio_as_voice\s*\]\]/gi;
 const REPLY_TAG_RE =
  /\[\[\s*(?:reply_to_current|reply_to\s*:\s*([^\]\n]+))\s*\]\]/gi;
 function normalizeDirectiveWhitespace(text: string): string {
  return text
    .replace(/[ \t]+/g, " ")
    .replace(/[ \t]*\n[ \t]*/g, "\n")
    .trim();
 }
 export function parseInlineDirectives(
  text?: string,
  options: InlineDirectiveParseOptions = {},
 ): InlineDirectiveParseResult {
  const {
    currentMessageId,
    stripAudioTag = true,
    stripReplyTags = true,
  } = options;
  if (!text) {
    return {
      text: "",
      audioAsVoice: false,
      replyToCurrent: false,
      hasAudioTag: false,
      hasReplyTag: false,
    };
  }
  let cleaned = text;
  let audioAsVoice = false;
  let hasAudioTag = false;
  let hasReplyTag = false;
  let sawCurrent = false;
  let lastExplicitId: string | undefined;
  cleaned = cleaned.replace(AUDIO_TAG_RE, (match) => {
    audioAsVoice = true;
    hasAudioTag = true;
    return stripAudioTag ? " " : match;
  });
  cleaned = cleaned.replace(
    REPLY_TAG_RE,
    (match, idRaw: string | undefined) => {
      hasReplyTag = true;
      if (idRaw === undefined) {
        sawCurrent = true;
      } else {
        const id = idRaw.trim();
        if (id) lastExplicitId = id;
      }
      return stripReplyTags ? " " : match;
    },
  );
  cleaned = normalizeDirectiveWhitespace(cleaned);
  const replyToId =
    lastExplicitId ??
    (sawCurrent ? currentMessageId?.trim() || undefined : undefined);
  return {
    text: cleaned,
    audioAsVoice,
    replyToId,
    replyToCurrent: sawCurrent,
    hasAudioTag,
    hasReplyTag,
  };
 }
--- a/src/web/media.ts
+++ b/src/web/media.ts
@@ -54,43 +54,60 @@ async function loadWebMediaInternal(
    };
  };
-  if (/^https?:\/\//i.test(mediaUrl)) {
+  const clampAndFinalize = async (params: {
-    const fetched = await fetchRemoteMedia({ url: mediaUrl });
+    buffer: Buffer;
-    const { buffer, contentType, fileName } = fetched;
+    contentType?: string;
-    const kind = mediaKindFromMime(contentType);
+    kind: MediaKind;
    fileName?: string;
  }): Promise<WebMediaResult> => {
    const cap = Math.min(
-      maxBytes ?? maxBytesForKind(kind),
+      maxBytes ?? maxBytesForKind(params.kind),
-      maxBytesForKind(kind),
+      maxBytesForKind(params.kind),
    );
-    if (kind === "image") {
+    if (params.kind === "image") {
-      // Skip optimization for GIFs to preserve animation.
+      const isGif = params.contentType === "image/gif";
-      if (contentType === "image/gif" || !optimizeImages) {
+      if (isGif || !optimizeImages) {
-        if (buffer.length > cap) {
+        if (params.buffer.length > cap) {
          throw new Error(
            `${
-              contentType === "image/gif" ? "GIF" : "Media"
+              isGif ? "GIF" : "Media"
            } exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${(
-              buffer.length / (1024 * 1024)
+              params.buffer.length / (1024 * 1024)
            ).toFixed(2)}MB)`,
          );
        }
-        return { buffer, contentType, kind, fileName };
+        return {
          buffer: params.buffer,
          contentType: params.contentType,
          kind: params.kind,
          fileName: params.fileName,
        };
      }
-      return { ...(await optimizeAndClampImage(buffer, cap)), fileName };
+      return {
        ...(await optimizeAndClampImage(params.buffer, cap)),
        fileName: params.fileName,
      };
    }
-    if (buffer.length > cap) {
+    if (params.buffer.length > cap) {
      throw new Error(
        `Media exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${(
-          buffer.length / (1024 * 1024)
+          params.buffer.length / (1024 * 1024)
        ).toFixed(2)}MB)`,
      );
    }
    return {
-      buffer,
+      buffer: params.buffer,
-      contentType: contentType ?? undefined,
+      contentType: params.contentType ?? undefined,
-      kind,
+      kind: params.kind,
-      fileName,
+      fileName: params.fileName,
    };
  };
  if (/^https?:\/\//i.test(mediaUrl)) {
    const fetched = await fetchRemoteMedia({ url: mediaUrl });
    const { buffer, contentType, fileName } = fetched;
    const kind = mediaKindFromMime(contentType);
    return await clampAndFinalize({ buffer, contentType, kind, fileName });
  }
  // Local path
@@ -102,34 +119,12 @@ async function loadWebMediaInternal(
    const ext = extensionForMime(mime);
    if (ext) fileName = `${fileName}${ext}`;
  }
-  const cap = Math.min(
+  return await clampAndFinalize({
-    maxBytes ?? maxBytesForKind(kind),
+    buffer: data,
-    maxBytesForKind(kind),
+    contentType: mime,
-  );
+    kind,
-  if (kind === "image") {
+    fileName,
-    // Skip optimization for GIFs to preserve animation.
+  });
    if (mime === "image/gif" || !optimizeImages) {
      if (data.length > cap) {
        throw new Error(
          `${
            mime === "image/gif" ? "GIF" : "Media"
          } exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${(
            data.length / (1024 * 1024)
          ).toFixed(2)}MB)`,
        );
      }
      return { buffer: data, contentType: mime, kind, fileName };
    }
    return { ...(await optimizeAndClampImage(data, cap)), fileName };
  }
  if (data.length > cap) {
    throw new Error(
      `Media exceeds ${(cap / (1024 * 1024)).toFixed(0)}MB limit (got ${(
        data.length / (1024 * 1024)
      ).toFixed(2)}MB)`,
    );
  }
  return { buffer: data, contentType: mime, kind, fileName };
 }
 export async function loadWebMedia(