diff --git a/src/gateway/protocol/schema/agent.ts b/src/gateway/protocol/schema/agent.ts index 54da9d23c..0d4b2e802 100644 --- a/src/gateway/protocol/schema/agent.ts +++ b/src/gateway/protocol/schema/agent.ts @@ -18,6 +18,7 @@ export const SendParamsSchema = Type.Object( to: NonEmptyString, message: NonEmptyString, mediaUrl: Type.Optional(Type.String()), + mediaUrls: Type.Optional(Type.Array(Type.String())), gifPlayback: Type.Optional(Type.Boolean()), channel: Type.Optional(Type.String()), accountId: Type.Optional(Type.String()), diff --git a/src/gateway/server-methods/send.test.ts b/src/gateway/server-methods/send.test.ts index 0f437160c..2d30d0593 100644 --- a/src/gateway/server-methods/send.test.ts +++ b/src/gateway/server-methods/send.test.ts @@ -104,4 +104,34 @@ describe("gateway send mirroring", () => { }), ); }); + + it("mirrors MEDIA tags as attachments", async () => { + mocks.deliverOutboundPayloads.mockResolvedValue([{ messageId: "m2", channel: "slack" }]); + + const respond = vi.fn(); + await sendHandlers.send({ + params: { + to: "channel:C1", + message: "Here\nMEDIA:https://example.com/image.png", + channel: "slack", + idempotencyKey: "idem-3", + sessionKey: "agent:main:main", + }, + respond, + context: makeContext(), + req: { type: "req", id: "1", method: "send" }, + client: null, + isWebchatConnect: () => false, + }); + + expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith( + expect.objectContaining({ + mirror: expect.objectContaining({ + sessionKey: "agent:main:main", + text: "Here", + mediaUrls: ["https://example.com/image.png"], + }), + }), + ); + }); }); diff --git a/src/gateway/server-methods/send.ts b/src/gateway/server-methods/send.ts index 31ff60caa..971219de1 100644 --- a/src/gateway/server-methods/send.ts +++ b/src/gateway/server-methods/send.ts @@ -4,6 +4,7 @@ import { DEFAULT_CHAT_CHANNEL } from "../../channels/registry.js"; import { loadConfig } from "../../config/config.js"; import { createOutboundSendDeps } from "../../cli/deps.js"; import { deliverOutboundPayloads } from "../../infra/outbound/deliver.js"; +import { normalizeReplyPayloadsForDelivery } from "../../infra/outbound/payloads.js"; import { resolveSessionAgentId } from "../../agents/agent-scope.js"; import type { OutboundChannel } from "../../infra/outbound/targets.js"; import { resolveOutboundTarget } from "../../infra/outbound/targets.js"; @@ -57,6 +58,7 @@ export const sendHandlers: GatewayRequestHandlers = { to: string; message: string; mediaUrl?: string; + mediaUrls?: string[]; gifPlayback?: boolean; channel?: string; accountId?: string; @@ -82,6 +84,7 @@ export const sendHandlers: GatewayRequestHandlers = { } const to = request.to.trim(); const message = request.message.trim(); + const mediaUrls = Array.isArray(request.mediaUrls) ? request.mediaUrls : undefined; const channelInput = typeof request.channel === "string" ? request.channel : undefined; const normalizedChannel = channelInput ? normalizeChannelId(channelInput) : null; if (channelInput && !normalizedChannel) { @@ -126,12 +129,22 @@ export const sendHandlers: GatewayRequestHandlers = { }; } const outboundDeps = context.deps ? createOutboundSendDeps(context.deps) : undefined; + const mirrorPayloads = normalizeReplyPayloadsForDelivery([ + { text: message, mediaUrl: request.mediaUrl, mediaUrls }, + ]); + const mirrorText = mirrorPayloads + .map((payload) => payload.text) + .filter(Boolean) + .join("\n"); + const mirrorMediaUrls = mirrorPayloads.flatMap( + (payload) => payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []), + ); const results = await deliverOutboundPayloads({ cfg, channel: outboundChannel, to: resolved.to, accountId, - payloads: [{ text: message, mediaUrl: request.mediaUrl }], + payloads: [{ text: message, mediaUrl: request.mediaUrl, mediaUrls }], gifPlayback: request.gifPlayback, deps: outboundDeps, mirror: @@ -142,8 +155,8 @@ export const sendHandlers: GatewayRequestHandlers = { sessionKey: request.sessionKey.trim(), config: cfg, }), - text: message, - mediaUrls: request.mediaUrl ? [request.mediaUrl] : undefined, + text: mirrorText || message, + mediaUrls: mirrorMediaUrls.length > 0 ? mirrorMediaUrls : undefined, } : undefined, }); diff --git a/src/infra/outbound/deliver.test.ts b/src/infra/outbound/deliver.test.ts index 1aea21a6c..5c38a242b 100644 --- a/src/infra/outbound/deliver.test.ts +++ b/src/infra/outbound/deliver.test.ts @@ -201,13 +201,12 @@ describe("deliverOutboundPayloads", () => { it("normalizes payloads and drops empty entries", () => { const normalized = normalizeOutboundPayloads([ { text: "hi" }, - { mediaUrl: "https://x.test/a.jpg" }, + { text: "MEDIA:https://x.test/a.jpg" }, { text: " ", mediaUrls: [] }, ]); expect(normalized).toEqual([ { text: "hi", mediaUrls: [] }, { text: "", mediaUrls: ["https://x.test/a.jpg"] }, - { text: " ", mediaUrls: [] }, ]); }); diff --git a/src/infra/outbound/message-action-runner.ts b/src/infra/outbound/message-action-runner.ts index 051098f34..b873fa264 100644 --- a/src/infra/outbound/message-action-runner.ts +++ b/src/infra/outbound/message-action-runner.ts @@ -586,12 +586,24 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise(); + const pushMedia = (value?: string | null) => { + const trimmed = value?.trim(); + if (!trimmed) return; + if (seenMedia.has(trimmed)) return; + seenMedia.add(trimmed); + mergedMediaUrls.push(trimmed); + }; + pushMedia(mediaHint); + for (const url of parsed.mediaUrls ?? []) pushMedia(url); + pushMedia(parsed.mediaUrl); message = parsed.text; params.message = message; if (!params.replyTo && parsed.replyToId) params.replyTo = parsed.replyToId; if (!params.media) { // Use path/filePath if media not set, then fall back to parsed directives - params.media = mediaHint || parsed.mediaUrls?.[0] || parsed.mediaUrl || undefined; + params.media = mergedMediaUrls[0] || undefined; } message = await maybeApplyCrossContextMarker({ @@ -630,6 +642,7 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise payload.text) + .filter(Boolean) + .join("\n"); + const mirrorMediaUrls = normalizedPayloads.flatMap( + (payload) => payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []), + ); + const primaryMediaUrl = mirrorMediaUrls[0] ?? params.mediaUrl ?? null; if (params.dryRun) { return { channel, to: params.to, via: deliveryMode === "gateway" ? "gateway" : "direct", - mediaUrl: params.mediaUrl ?? null, + mediaUrl: primaryMediaUrl, + mediaUrls: mirrorMediaUrls.length ? mirrorMediaUrls : undefined, dryRun: true, }; } @@ -142,15 +161,15 @@ export async function sendMessage(params: MessageSendParams): Promise { }, ]); }); + + it("keeps mediaUrl null for multi MEDIA tags", () => { + expect( + normalizeOutboundPayloadsForJson([ + { + text: "MEDIA:https://x.test/a.png\nMEDIA:https://x.test/b.png", + }, + ]), + ).toEqual([ + { + text: "", + mediaUrl: null, + mediaUrls: ["https://x.test/a.png", "https://x.test/b.png"], + }, + ]); + }); }); describe("formatOutboundPayloadLog", () => { diff --git a/src/infra/outbound/payloads.ts b/src/infra/outbound/payloads.ts index 42f9d6f6c..b3558b356 100644 --- a/src/infra/outbound/payloads.ts +++ b/src/infra/outbound/payloads.ts @@ -1,3 +1,5 @@ +import { parseReplyDirectives } from "../../auto-reply/reply/reply-directives.js"; +import { isRenderablePayload } from "../../auto-reply/reply/reply-payloads.js"; import type { ReplyPayload } from "../../auto-reply/types.js"; export type NormalizedOutboundPayload = { @@ -11,8 +13,51 @@ export type OutboundPayloadJson = { mediaUrls?: string[]; }; +function mergeMediaUrls(...lists: Array | undefined>): string[] { + const seen = new Set(); + const merged: string[] = []; + for (const list of lists) { + if (!list) continue; + for (const entry of list) { + const trimmed = entry?.trim(); + if (!trimmed) continue; + if (seen.has(trimmed)) continue; + seen.add(trimmed); + merged.push(trimmed); + } + } + return merged; +} + +export function normalizeReplyPayloadsForDelivery(payloads: ReplyPayload[]): ReplyPayload[] { + return payloads.flatMap((payload) => { + const parsed = parseReplyDirectives(payload.text ?? ""); + const explicitMediaUrls = payload.mediaUrls ?? parsed.mediaUrls; + const explicitMediaUrl = payload.mediaUrl ?? parsed.mediaUrl; + const mergedMedia = mergeMediaUrls( + explicitMediaUrls, + explicitMediaUrl ? [explicitMediaUrl] : undefined, + ); + const hasMultipleMedia = (explicitMediaUrls?.length ?? 0) > 1; + const resolvedMediaUrl = hasMultipleMedia ? undefined : explicitMediaUrl; + const next: ReplyPayload = { + ...payload, + text: parsed.text ?? "", + mediaUrls: mergedMedia.length ? mergedMedia : undefined, + mediaUrl: resolvedMediaUrl, + replyToId: payload.replyToId ?? parsed.replyToId, + replyToTag: payload.replyToTag || parsed.replyToTag, + replyToCurrent: payload.replyToCurrent || parsed.replyToCurrent, + audioAsVoice: Boolean(payload.audioAsVoice || parsed.audioAsVoice), + }; + if (parsed.isSilent && mergedMedia.length === 0) return []; + if (!isRenderablePayload(next)) return []; + return [next]; + }); +} + export function normalizeOutboundPayloads(payloads: ReplyPayload[]): NormalizedOutboundPayload[] { - return payloads + return normalizeReplyPayloadsForDelivery(payloads) .map((payload) => ({ text: payload.text ?? "", mediaUrls: payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []), @@ -21,7 +66,7 @@ export function normalizeOutboundPayloads(payloads: ReplyPayload[]): NormalizedO } export function normalizeOutboundPayloadsForJson(payloads: ReplyPayload[]): OutboundPayloadJson[] { - return payloads.map((payload) => ({ + return normalizeReplyPayloadsForDelivery(payloads).map((payload) => ({ text: payload.text ?? "", mediaUrl: payload.mediaUrl ?? null, mediaUrls: payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : undefined),