fix: handle inline MEDIA tokens and host webhook media

This commit is contained in:
Peter Steinberger
2025-11-25 05:47:12 +01:00
parent 729ae64822
commit 6883c3ae4a
4 changed files with 119 additions and 47 deletions

View File

@@ -297,38 +297,38 @@ const mediaNote =
let mediaFromCommand: string | undefined; let mediaFromCommand: string | undefined;
const mediaLine = rawStdout const mediaLine = rawStdout
.split("\n") .split("\n")
.find((line) => /^MEDIA:/i.test(line)); .find((line) => /\bMEDIA:/i.test(line));
if (mediaLine) { if (mediaLine) {
const after = mediaLine.replace(/^MEDIA:\s*/i, ""); let isValidMedia = false;
const parts = after.trim().split(/\s+/); const mediaMatch = mediaLine.match(/\bMEDIA:\s*([^\s]+)/i);
if (parts[0]) { if (mediaMatch?.[1]) {
mediaFromCommand = normalizeMediaSource(parts[0]); const candidate = normalizeMediaSource(mediaMatch[1]);
const looksLikeUrl = /^https?:\/\//i.test(candidate);
const looksLikePath =
candidate.startsWith("/") || candidate.startsWith("./");
const hasWhitespace = /\s/.test(candidate);
isValidMedia =
!hasWhitespace &&
candidate.length <= 1024 &&
(looksLikeUrl || looksLikePath);
if (isValidMedia) mediaFromCommand = candidate;
} }
trimmed = rawStdout if (isValidMedia && mediaMatch?.[0]) {
.split("\n") trimmed = rawStdout
.filter((line) => !/^MEDIA:/i.test(line)) .replace(mediaMatch[0], "")
.join("\n") .replace(/\s{2,}/g, " ")
.trim(); .replace(/\s+\n/g, "\n")
// Basic sanity: accept only URLs or existing file paths without whitespace. .replace(/\n{3,}/g, "\n\n")
const hasWhitespace = mediaFromCommand .trim();
? /\s/.test(mediaFromCommand) } else {
: false; trimmed = rawStdout
const looksLikeUrl = mediaFromCommand .split("\n")
? /^https?:\/\//i.test(mediaFromCommand) .filter((line) => line !== mediaLine)
: false; .join("\n")
const looksLikePath = mediaFromCommand .replace(/\n\s+/g, "\n")
? mediaFromCommand.startsWith("/") || mediaFromCommand.startsWith("./") .replace(/\n{3,}/g, "\n\n")
: false; .trim();
if (
!mediaFromCommand ||
hasWhitespace ||
(!looksLikeUrl && !looksLikePath) ||
mediaFromCommand.length > 1024
) {
mediaFromCommand = undefined;
} }
} else {
trimmed = rawStdout;
} }
if (stderr?.trim()) { if (stderr?.trim()) {
logVerbose(`Command auto-reply stderr: ${stderr.trim()}`); logVerbose(`Command auto-reply stderr: ${stderr.trim()}`);

View File

@@ -171,6 +171,32 @@ describe("config and templating", () => {
expect(result?.mediaUrl).toBe("/tmp/pic.png"); expect(result?.mediaUrl).toBe("/tmp/pic.png");
}); });
it("extracts MEDIA token inline within a sentence", async () => {
const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({
stdout: "caption before MEDIA:/tmp/pic.png caption after",
stderr: "",
code: 0,
signal: null,
killed: false,
});
const cfg = {
inbound: {
reply: {
mode: "command" as const,
command: ["echo", "{{Body}}"],
},
},
};
const result = await index.getReplyFromConfig(
{ Body: "hi", From: "+1", To: "+2" },
undefined,
cfg,
runSpy,
);
expect(result?.mediaUrl).toBe("/tmp/pic.png");
expect(result?.text).toBe("caption before caption after");
});
it("ignores invalid MEDIA lines with whitespace", async () => { it("ignores invalid MEDIA lines with whitespace", async () => {
const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({ const runSpy = vi.spyOn(index, "runCommandWithTimeout").mockResolvedValue({
stdout: "hello\nMEDIA: not a url with spaces\nrest\n", stdout: "hello\nMEDIA: not a url with spaces\nrest\n",
@@ -483,11 +509,11 @@ describe("twilio interactions", () => {
}); });
describe("webhook and messaging", () => { describe("webhook and messaging", () => {
it("startWebhook responds and auto-replies", async () => { it("startWebhook responds and auto-replies", async () => {
const client = twilioFactory._createClient(); const client = twilioFactory._createClient();
client.messages.create.mockResolvedValue({}); client.messages.create.mockResolvedValue({});
twilioFactory.mockReturnValue(client); twilioFactory.mockReturnValue(client);
vi.spyOn(index, "getReplyFromConfig").mockResolvedValue({ text: "Auto" }); vi.spyOn(index, "getReplyFromConfig").mockResolvedValue({ text: "Auto" });
const server = await index.startWebhook(0, "/hook", undefined, false); const server = await index.startWebhook(0, "/hook", undefined, false);
const address = server.address() as net.AddressInfo; const address = server.address() as net.AddressInfo;
@@ -501,6 +527,39 @@ describe("webhook and messaging", () => {
await new Promise((resolve) => server.close(resolve)); await new Promise((resolve) => server.close(resolve));
}); });
it("hosts local media before replying via webhook", async () => {
const client = twilioFactory._createClient();
client.messages.create.mockResolvedValue({});
twilioFactory.mockReturnValue(client);
const replies = await import("./auto-reply/reply.js");
const hostModule = await import("./media/host.js");
const hostSpy = vi
.spyOn(hostModule, "ensureMediaHosted")
.mockResolvedValue({ url: "https://ts.net/media/abc", id: "abc", size: 123 });
vi.spyOn(replies, "getReplyFromConfig").mockResolvedValue({
text: "Auto",
mediaUrl: "/tmp/pic.png",
});
const server = await index.startWebhook(0, "/hook", undefined, false);
const address = server.address() as net.AddressInfo;
const url = `http://127.0.0.1:${address.port}/hook`;
await fetch(url, {
method: "POST",
headers: { "content-type": "application/x-www-form-urlencoded" },
body: "From=whatsapp%3A%2B1555&To=whatsapp%3A%2B1666&Body=Hello&MessageSid=SM2",
});
expect(hostSpy).toHaveBeenCalledWith("/tmp/pic.png");
expect(client.messages.create).toHaveBeenCalledWith(
expect.objectContaining({
mediaUrl: ["https://ts.net/media/abc"],
}),
);
hostSpy.mockRestore();
await new Promise((resolve) => server.close(resolve));
});
it("listRecentMessages merges and sorts", async () => { it("listRecentMessages merges and sorts", async () => {
const inbound = [ const inbound = [
{ {

View File

@@ -23,6 +23,10 @@ import { defaultRuntime, type RuntimeEnv } from "./runtime.js";
import { logInfo, logWarn } from "./logger.js"; import { logInfo, logWarn } from "./logger.js";
import { saveMediaBuffer } from "./media/store.js"; import { saveMediaBuffer } from "./media/store.js";
function formatDuration(ms: number) {
return ms >= 1000 ? `${(ms / 1000).toFixed(2)}s` : `${ms}ms`;
}
const WA_WEB_AUTH_DIR = path.join(os.homedir(), ".warelay", "credentials"); const WA_WEB_AUTH_DIR = path.join(os.homedir(), ".warelay", "credentials");
export async function createWaSocket(printQr: boolean, verbose: boolean) { export async function createWaSocket(printQr: boolean, verbose: boolean) {
@@ -117,15 +121,12 @@ export async function sendMessageWeb(
logVerbose(`Presence update skipped: ${String(err)}`); logVerbose(`Presence update skipped: ${String(err)}`);
} }
let payload: AnyMessageContent = { text: body }; let payload: AnyMessageContent = { text: body };
if (options.mediaUrl) { if (options.mediaUrl) {
const normalized = options.mediaUrl.startsWith("file://") const media = await loadWebMedia(options.mediaUrl);
? options.mediaUrl.replace("file://", "") payload = {
: options.mediaUrl; image: media.buffer,
const media = await loadWebMedia(options.mediaUrl); caption: body || undefined,
payload = { mimetype: media.contentType,
image: media.buffer,
caption: body || undefined,
mimetype: media.contentType,
}; };
} }
logInfo( logInfo(
@@ -369,6 +370,9 @@ export async function monitorWebProvider(
if (!replyResult || (!replyResult.text && !replyResult.mediaUrl)) return; if (!replyResult || (!replyResult.text && !replyResult.mediaUrl)) return;
try { try {
if (replyResult.mediaUrl) { if (replyResult.mediaUrl) {
logVerbose(
`Web auto-reply media detected: ${replyResult.mediaUrl}`,
);
const media = await loadWebMedia(replyResult.mediaUrl); const media = await loadWebMedia(replyResult.mediaUrl);
await msg.sendMedia({ await msg.sendMedia({
image: media.buffer, image: media.buffer,
@@ -382,7 +386,7 @@ export async function monitorWebProvider(
if (isVerbose()) { if (isVerbose()) {
console.log( console.log(
success( success(
`↩️ Auto-replied to ${msg.from} (web, ${replyResult.text?.length ?? 0} chars${replyResult.mediaUrl ? ", media" : ""}, ${durationMs}ms)`, `↩️ Auto-replied to ${msg.from} (web, ${replyResult.text?.length ?? 0} chars${replyResult.mediaUrl ? ", media" : ""}, ${formatDuration(durationMs)})`,
), ),
); );
} else { } else {
@@ -493,7 +497,8 @@ async function downloadInboundMedia(
message.videoMessage?.mimetype ?? message.videoMessage?.mimetype ??
message.documentMessage?.mimetype ?? message.documentMessage?.mimetype ??
message.audioMessage?.mimetype ?? message.audioMessage?.mimetype ??
message.stickerMessage?.mimetype; message.stickerMessage?.mimetype ??
undefined;
if ( if (
!message.imageMessage && !message.imageMessage &&
!message.videoMessage && !message.videoMessage &&
@@ -506,6 +511,7 @@ async function downloadInboundMedia(
try { try {
const buffer = (await downloadMediaMessage(msg as any, "buffer", {}, { const buffer = (await downloadMediaMessage(msg as any, "buffer", {}, {
reuploadRequest: sock.updateMediaMessage, reuploadRequest: sock.updateMediaMessage,
logger: (sock as { logger?: unknown })?.logger as any,
})) as Buffer; })) as Buffer;
return { buffer, mimetype }; return { buffer, mimetype };
} catch (err) { } catch (err) {
@@ -531,6 +537,7 @@ async function loadWebMedia(
} }
return { buffer: array, contentType: res.headers.get("content-type") ?? undefined }; return { buffer: array, contentType: res.headers.get("content-type") ?? undefined };
} }
// Local path
const data = await fs.readFile(mediaUrl); const data = await fs.readFile(mediaUrl);
if (data.length > 5 * 1024 * 1024) { if (data.length > 5 * 1024 * 1024) {
throw new Error("Media exceeds 5MB limit"); throw new Error("Media exceeds 5MB limit");

View File

@@ -13,6 +13,7 @@ import { logTwilioSendError } from "./utils.js";
import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; import { defaultRuntime, type RuntimeEnv } from "../runtime.js";
import { attachMediaRoutes } from "../media/server.js"; import { attachMediaRoutes } from "../media/server.js";
import { saveMediaSource } from "../media/store.js"; import { saveMediaSource } from "../media/store.js";
import { ensureMediaHosted } from "../media/host.js";
/** Start the inbound webhook HTTP server and wire optional auto-replies. */ /** Start the inbound webhook HTTP server and wire optional auto-replies. */
export async function startWebhook( export async function startWebhook(
@@ -75,16 +76,21 @@ export async function startWebhook(
if (replyResult && (replyResult.text || replyResult.mediaUrl)) { if (replyResult && (replyResult.text || replyResult.mediaUrl)) {
try { try {
let mediaUrl = replyResult.mediaUrl;
if (mediaUrl && !/^https?:\/\//i.test(mediaUrl)) {
const hosted = await ensureMediaHosted(mediaUrl);
mediaUrl = hosted.url;
}
await client.messages.create({ await client.messages.create({
from: To, from: To,
to: From, to: From,
body: replyResult.text ?? "", body: replyResult.text ?? "",
...(replyResult.mediaUrl ? { mediaUrl: [replyResult.mediaUrl] } : {}), ...(mediaUrl ? { mediaUrl: [mediaUrl] } : {}),
}); });
if (verbose) if (verbose)
runtime.log( runtime.log(
success( success(
`↩️ Auto-replied to ${From}${replyResult.mediaUrl ? " (media)" : ""}`, `↩️ Auto-replied to ${From}${mediaUrl ? " (media)" : ""}`,
), ),
); );
} catch (err) { } catch (err) {