From 2045487d5e63ac9eaeb677cedcc62d7324100550 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 26 Dec 2025 10:50:41 +0100 Subject: [PATCH] fix: extract quoted WhatsApp reply text --- CHANGELOG.md | 1 + src/web/inbound.test.ts | 16 ++++++ src/web/inbound.ts | 102 +++++++++++++++++++++++++--------- src/web/monitor-inbox.test.ts | 49 ++++++++++++++++ 4 files changed, 141 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a366e9828..efc4f9d78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - Heartbeat replies now strip repeated `HEARTBEAT_OK` tails to avoid accidental “OK OK” spam. - Heartbeat failure logs now include the error reason instead of `[object Object]`. - Duration strings now accept `h` (hours) where durations are parsed (e.g., heartbeat intervals). +- WhatsApp inbound now normalizes more wrapper types so quoted reply bodies are extracted reliably. - WhatsApp send now preserves existing JIDs (including group `@g.us`) instead of coercing to `@s.whatsapp.net`. (Thanks @arun-8687.) - Telegram/WhatsApp: reply context stays in `Body`/`ReplyTo*`, but outbound replies no longer thread to the original message. (Thanks @joshp123 for the PR and follow-up question.) - WhatsApp web creds persistence hardened; credentials are restored before auth checks and QR login auto-restarts if it stalls. diff --git a/src/web/inbound.test.ts b/src/web/inbound.test.ts index 262ff5ae9..161b0d62b 100644 --- a/src/web/inbound.test.ts +++ b/src/web/inbound.test.ts @@ -17,6 +17,22 @@ describe("web inbound helpers", () => { expect(body).toBe("caption"); }); + it("handles document captions", () => { + const body = extractText({ + documentMessage: { caption: " doc " }, + } as unknown as import("@whiskeysockets/baileys").proto.IMessage); + expect(body).toBe("doc"); + }); + + it("unwraps view-once v2 extension messages", () => { + const body = extractText({ + viewOnceMessageV2Extension: { + message: { conversation: " hello " }, + }, + } as unknown as import("@whiskeysockets/baileys").proto.IMessage); + expect(body).toBe("hello"); + }); + it("returns placeholders for media-only payloads", () => { expect( extractMediaPlaceholder({ diff --git a/src/web/inbound.ts b/src/web/inbound.ts index 7dbe9bfe8..62222c1aa 100644 --- a/src/web/inbound.ts +++ b/src/web/inbound.ts @@ -6,7 +6,10 @@ import type { import { DisconnectReason, downloadMediaMessage, + extractMessageContent, + getContentType, isJidGroup, + normalizeMessageContent, } from "@whiskeysockets/baileys"; import { loadConfig } from "../config/config.js"; @@ -405,17 +408,47 @@ export async function monitorWebInbox(options: { function unwrapMessage( message: proto.IMessage | undefined, ): proto.IMessage | undefined { + const normalized = normalizeMessageContent( + message as proto.IMessage | undefined, + ); + return normalized as proto.IMessage | undefined; +} + +function extractContextInfo( + message: proto.IMessage | undefined, +): proto.IContextInfo | undefined { if (!message) return undefined; - if (message.ephemeralMessage?.message) { - return unwrapMessage(message.ephemeralMessage.message as proto.IMessage); + const contentType = getContentType(message); + const candidate = contentType + ? (message as Record)[contentType] + : undefined; + const contextInfo = + candidate && typeof candidate === "object" && "contextInfo" in candidate + ? (candidate as { contextInfo?: proto.IContextInfo }).contextInfo + : undefined; + if (contextInfo) return contextInfo; + const fallback = + message.extendedTextMessage?.contextInfo ?? + message.imageMessage?.contextInfo ?? + message.videoMessage?.contextInfo ?? + message.documentMessage?.contextInfo ?? + message.audioMessage?.contextInfo ?? + message.stickerMessage?.contextInfo ?? + message.buttonsResponseMessage?.contextInfo ?? + message.listResponseMessage?.contextInfo ?? + message.templateButtonReplyMessage?.contextInfo ?? + message.interactiveResponseMessage?.contextInfo ?? + message.buttonsMessage?.contextInfo ?? + message.listMessage?.contextInfo; + if (fallback) return fallback; + for (const value of Object.values(message)) { + if (!value || typeof value !== "object") continue; + if (!("contextInfo" in value)) continue; + const candidateContext = (value as { contextInfo?: proto.IContextInfo }) + .contextInfo; + if (candidateContext) return candidateContext; } - if (message.viewOnceMessage?.message) { - return unwrapMessage(message.viewOnceMessage.message as proto.IMessage); - } - if (message.viewOnceMessageV2?.message) { - return unwrapMessage(message.viewOnceMessageV2.message as proto.IMessage); - } - return message; + return undefined; } function extractMentionedJids( @@ -448,14 +481,27 @@ export function extractText( ): string | undefined { const message = unwrapMessage(rawMessage); if (!message) return undefined; - if (typeof message.conversation === "string" && message.conversation.trim()) { - return message.conversation.trim(); + const extracted = extractMessageContent(message); + const candidates = [ + message, + extracted && extracted !== message ? extracted : undefined, + ]; + for (const candidate of candidates) { + if (!candidate) continue; + if ( + typeof candidate.conversation === "string" && + candidate.conversation.trim() + ) { + return candidate.conversation.trim(); + } + const extended = candidate.extendedTextMessage?.text; + if (extended?.trim()) return extended.trim(); + const caption = + candidate.imageMessage?.caption ?? + candidate.videoMessage?.caption ?? + candidate.documentMessage?.caption; + if (caption?.trim()) return caption.trim(); } - const extended = message.extendedTextMessage?.text; - if (extended?.trim()) return extended.trim(); - const caption = - message.imageMessage?.caption ?? message.videoMessage?.caption; - if (caption?.trim()) return caption.trim(); return undefined; } @@ -479,19 +525,21 @@ function describeReplyContext(rawMessage: proto.IMessage | undefined): { } | null { const message = unwrapMessage(rawMessage); if (!message) return null; - const contextInfo = - message.extendedTextMessage?.contextInfo ?? - message.imageMessage?.contextInfo ?? - message.videoMessage?.contextInfo ?? - message.documentMessage?.contextInfo ?? - message.audioMessage?.contextInfo ?? - message.stickerMessage?.contextInfo ?? - message.buttonsResponseMessage?.contextInfo ?? - message.listResponseMessage?.contextInfo; - const quoted = contextInfo?.quotedMessage as proto.IMessage | undefined; + const contextInfo = extractContextInfo(message); + const quoted = normalizeMessageContent( + contextInfo?.quotedMessage as proto.IMessage | undefined, + ) as proto.IMessage | undefined; if (!quoted) return null; const body = extractText(quoted) ?? extractMediaPlaceholder(quoted); - if (!body) return null; + if (!body) { + const quotedType = quoted ? getContentType(quoted) : undefined; + logVerbose( + `Quoted message missing extractable body${ + quotedType ? ` (type ${quotedType})` : "" + }`, + ); + return null; + } const senderJid = contextInfo?.participant ?? undefined; const senderE164 = senderJid ? (jidToE164(senderJid) ?? senderJid) diff --git a/src/web/monitor-inbox.test.ts b/src/web/monitor-inbox.test.ts index 7f59e103a..b4ee36cb2 100644 --- a/src/web/monitor-inbox.test.ts +++ b/src/web/monitor-inbox.test.ts @@ -198,6 +198,55 @@ describe("web monitor inbox", () => { await listener.close(); }); + it("captures reply context from wrapped quoted messages", async () => { + const onMessage = vi.fn(async (msg) => { + await msg.reply("pong"); + }); + + const listener = await monitorWebInbox({ verbose: false, onMessage }); + const sock = await createWaSocket(); + const upsert = { + type: "notify", + messages: [ + { + key: { id: "abc", fromMe: false, remoteJid: "999@s.whatsapp.net" }, + message: { + extendedTextMessage: { + text: "reply", + contextInfo: { + stanzaId: "q1", + participant: "111@s.whatsapp.net", + quotedMessage: { + viewOnceMessageV2Extension: { + message: { conversation: "original" }, + }, + }, + }, + }, + }, + messageTimestamp: 1_700_000_000, + pushName: "Tester", + }, + ], + }; + + sock.ev.emit("messages.upsert", upsert); + await new Promise((resolve) => setImmediate(resolve)); + + expect(onMessage).toHaveBeenCalledWith( + expect.objectContaining({ + replyToId: "q1", + replyToBody: "original", + replyToSender: "+111", + }), + ); + expect(sock.sendMessage).toHaveBeenCalledWith("999@s.whatsapp.net", { + text: "pong", + }); + + await listener.close(); + }); + it("captures media path for image messages", async () => { const onMessage = vi.fn(); const listener = await monitorWebInbox({ verbose: false, onMessage });