refactor: unify inline directives and media fetch

2026-01-10 03:01:04 +01:00
parent 4075895c4c
commit f28a4a34ad
15 changed files with 345 additions and 178 deletions
--- a/src/agents/pi-embedded-runner.ts
+++ b/src/agents/pi-embedded-runner.ts
@@ -17,6 +17,7 @@ import {
  SettingsManager,
 } from "@mariozechner/pi-coding-agent";
 import { resolveHeartbeatPrompt } from "../auto-reply/heartbeat.js";
+import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js";
 import type {
  ReasoningLevel,
  ThinkLevel,
@@ -28,7 +29,6 @@ import type { ClawdbotConfig } from "../config/config.js";
 import { resolveProviderCapabilities } from "../config/provider-capabilities.js";
 import { getMachineDisplayName } from "../infra/machine-name.js";
 import { createSubsystemLogger } from "../logging.js";
-import { splitMediaFromOutput } from "../media/parse.js";
 import {
  type enqueueCommand,
  enqueueCommandInLane,
@@ -1626,6 +1626,9 @@ export async function runEmbeddedPiAgent(params: {
            media?: string[];
            isError?: boolean;
            audioAsVoice?: boolean;
+            replyToId?: string;
+            replyToTag?: boolean;
+            replyToCurrent?: boolean;
          }> = [];

          const errorText = lastAssistant
@@ -1646,12 +1649,18 @@ export async function runEmbeddedPiAgent(params: {
                text: cleanedText,
                mediaUrls,
                audioAsVoice,
-              } = splitMediaFromOutput(agg);
+                replyToId,
+                replyToTag,
+                replyToCurrent,
+              } = parseReplyDirectives(agg);
              if (cleanedText)
                replyItems.push({
                  text: cleanedText,
                  media: mediaUrls,
                  audioAsVoice,
+                  replyToId,
+                  replyToTag,
+                  replyToCurrent,
                });
            }
          }
@@ -1675,7 +1684,10 @@ export async function runEmbeddedPiAgent(params: {
              text: cleanedText,
              mediaUrls,
              audioAsVoice,
-            } = splitMediaFromOutput(text);
+              replyToId,
+              replyToTag,
+              replyToCurrent,
+            } = parseReplyDirectives(text);
            if (
              !cleanedText &&
              (!mediaUrls || mediaUrls.length === 0) &&
@@ -1686,6 +1698,9 @@ export async function runEmbeddedPiAgent(params: {
              text: cleanedText,
              media: mediaUrls,
              audioAsVoice,
+              replyToId,
+              replyToTag,
+              replyToCurrent,
            });
          }

@@ -1699,6 +1714,9 @@ export async function runEmbeddedPiAgent(params: {
              mediaUrls: item.media?.length ? item.media : undefined,
              mediaUrl: item.media?.[0],
              isError: item.isError,
+              replyToId: item.replyToId,
+              replyToTag: item.replyToTag,
+              replyToCurrent: item.replyToCurrent,
              // Apply audioAsVoice to media payloads if tag was found anywhere in response
              audioAsVoice:
                item.audioAsVoice || (hasAudioAsVoiceTag && item.media?.length),
--- a/src/agents/pi-embedded-subscribe.ts
+++ b/src/agents/pi-embedded-subscribe.ts
@@ -3,12 +3,12 @@ import path from "node:path";
 import type { AgentEvent, AgentMessage } from "@mariozechner/pi-agent-core";
 import type { AssistantMessage } from "@mariozechner/pi-ai";
 import type { AgentSession } from "@mariozechner/pi-coding-agent";
+import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js";
 import type { ReasoningLevel } from "../auto-reply/thinking.js";
 import { formatToolAggregate } from "../auto-reply/tool-meta.js";
 import { resolveStateDir } from "../config/paths.js";
 import { emitAgentEvent } from "../infra/agent-events.js";
 import { createSubsystemLogger } from "../logging.js";
-import { splitMediaFromOutput } from "../media/parse.js";
 import { truncateUtf16Safe } from "../utils.js";
 import type { BlockReplyChunking } from "./pi-embedded-block-chunker.js";
 import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";
@@ -383,7 +383,7 @@ export function subscribeEmbeddedPiSession(params: {
  const emitToolSummary = (toolName?: string, meta?: string) => {
    if (!params.onToolResult) return;
    const agg = formatToolAggregate(toolName, meta ? [meta] : undefined);
-    const { text: cleanedText, mediaUrls } = splitMediaFromOutput(agg);
+    const { text: cleanedText, mediaUrls } = parseReplyDirectives(agg);
    if (!cleanedText && (!mediaUrls || mediaUrls.length === 0)) return;
    try {
      void params.onToolResult({
@@ -437,7 +437,7 @@ export function subscribeEmbeddedPiSession(params: {
    lastBlockReplyText = chunk;
    assistantTexts.push(chunk);
    if (!params.onBlockReply) return;
-    const splitResult = splitMediaFromOutput(chunk);
+    const splitResult = parseReplyDirectives(chunk);
    const { text: cleanedText, mediaUrls, audioAsVoice } = splitResult;
    // Skip empty payloads, but always emit if audioAsVoice is set (to propagate the flag)
    if (!cleanedText && (!mediaUrls || mediaUrls.length === 0) && !audioAsVoice)
@@ -739,7 +739,7 @@ export function subscribeEmbeddedPiSession(params: {
            if (next && next !== lastStreamedAssistant) {
              lastStreamedAssistant = next;
              const { text: cleanedText, mediaUrls } =
-                splitMediaFromOutput(next);
+                parseReplyDirectives(next);
              emitAgentEvent({
                runId: params.runId,
                stream: "assistant",
@@ -868,7 +868,7 @@ export function subscribeEmbeddedPiSession(params: {
                  text: cleanedText,
                  mediaUrls,
                  audioAsVoice,
-                } = splitMediaFromOutput(text);
+                } = parseReplyDirectives(text);
                // Emit if there's content OR audioAsVoice flag (to propagate the flag)
                if (
                  cleanedText ||