refactor: unify inline directives and media fetch

This commit is contained in:
Peter Steinberger
2026-01-10 03:01:04 +01:00
parent 4075895c4c
commit f28a4a34ad
15 changed files with 345 additions and 178 deletions

View File

@@ -17,6 +17,7 @@ import {
SettingsManager,
} from "@mariozechner/pi-coding-agent";
import { resolveHeartbeatPrompt } from "../auto-reply/heartbeat.js";
import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js";
import type {
ReasoningLevel,
ThinkLevel,
@@ -28,7 +29,6 @@ import type { ClawdbotConfig } from "../config/config.js";
import { resolveProviderCapabilities } from "../config/provider-capabilities.js";
import { getMachineDisplayName } from "../infra/machine-name.js";
import { createSubsystemLogger } from "../logging.js";
import { splitMediaFromOutput } from "../media/parse.js";
import {
type enqueueCommand,
enqueueCommandInLane,
@@ -1626,6 +1626,9 @@ export async function runEmbeddedPiAgent(params: {
media?: string[];
isError?: boolean;
audioAsVoice?: boolean;
replyToId?: string;
replyToTag?: boolean;
replyToCurrent?: boolean;
}> = [];
const errorText = lastAssistant
@@ -1646,12 +1649,18 @@ export async function runEmbeddedPiAgent(params: {
text: cleanedText,
mediaUrls,
audioAsVoice,
} = splitMediaFromOutput(agg);
replyToId,
replyToTag,
replyToCurrent,
} = parseReplyDirectives(agg);
if (cleanedText)
replyItems.push({
text: cleanedText,
media: mediaUrls,
audioAsVoice,
replyToId,
replyToTag,
replyToCurrent,
});
}
}
@@ -1675,7 +1684,10 @@ export async function runEmbeddedPiAgent(params: {
text: cleanedText,
mediaUrls,
audioAsVoice,
} = splitMediaFromOutput(text);
replyToId,
replyToTag,
replyToCurrent,
} = parseReplyDirectives(text);
if (
!cleanedText &&
(!mediaUrls || mediaUrls.length === 0) &&
@@ -1686,6 +1698,9 @@ export async function runEmbeddedPiAgent(params: {
text: cleanedText,
media: mediaUrls,
audioAsVoice,
replyToId,
replyToTag,
replyToCurrent,
});
}
@@ -1699,6 +1714,9 @@ export async function runEmbeddedPiAgent(params: {
mediaUrls: item.media?.length ? item.media : undefined,
mediaUrl: item.media?.[0],
isError: item.isError,
replyToId: item.replyToId,
replyToTag: item.replyToTag,
replyToCurrent: item.replyToCurrent,
// Apply audioAsVoice to media payloads if tag was found anywhere in response
audioAsVoice:
item.audioAsVoice || (hasAudioAsVoiceTag && item.media?.length),

View File

@@ -3,12 +3,12 @@ import path from "node:path";
import type { AgentEvent, AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage } from "@mariozechner/pi-ai";
import type { AgentSession } from "@mariozechner/pi-coding-agent";
import { parseReplyDirectives } from "../auto-reply/reply/reply-directives.js";
import type { ReasoningLevel } from "../auto-reply/thinking.js";
import { formatToolAggregate } from "../auto-reply/tool-meta.js";
import { resolveStateDir } from "../config/paths.js";
import { emitAgentEvent } from "../infra/agent-events.js";
import { createSubsystemLogger } from "../logging.js";
import { splitMediaFromOutput } from "../media/parse.js";
import { truncateUtf16Safe } from "../utils.js";
import type { BlockReplyChunking } from "./pi-embedded-block-chunker.js";
import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";
@@ -383,7 +383,7 @@ export function subscribeEmbeddedPiSession(params: {
const emitToolSummary = (toolName?: string, meta?: string) => {
if (!params.onToolResult) return;
const agg = formatToolAggregate(toolName, meta ? [meta] : undefined);
const { text: cleanedText, mediaUrls } = splitMediaFromOutput(agg);
const { text: cleanedText, mediaUrls } = parseReplyDirectives(agg);
if (!cleanedText && (!mediaUrls || mediaUrls.length === 0)) return;
try {
void params.onToolResult({
@@ -437,7 +437,7 @@ export function subscribeEmbeddedPiSession(params: {
lastBlockReplyText = chunk;
assistantTexts.push(chunk);
if (!params.onBlockReply) return;
const splitResult = splitMediaFromOutput(chunk);
const splitResult = parseReplyDirectives(chunk);
const { text: cleanedText, mediaUrls, audioAsVoice } = splitResult;
// Skip empty payloads, but always emit if audioAsVoice is set (to propagate the flag)
if (!cleanedText && (!mediaUrls || mediaUrls.length === 0) && !audioAsVoice)
@@ -739,7 +739,7 @@ export function subscribeEmbeddedPiSession(params: {
if (next && next !== lastStreamedAssistant) {
lastStreamedAssistant = next;
const { text: cleanedText, mediaUrls } =
splitMediaFromOutput(next);
parseReplyDirectives(next);
emitAgentEvent({
runId: params.runId,
stream: "assistant",
@@ -868,7 +868,7 @@ export function subscribeEmbeddedPiSession(params: {
text: cleanedText,
mediaUrls,
audioAsVoice,
} = splitMediaFromOutput(text);
} = parseReplyDirectives(text);
// Emit if there's content OR audioAsVoice flag (to propagate the flag)
if (
cleanedText ||