diff --git a/extensions/matrix/src/matrix/monitor/replies.ts b/extensions/matrix/src/matrix/monitor/replies.ts index e347df1c9..bb8c205ea 100644 --- a/extensions/matrix/src/matrix/monitor/replies.ts +++ b/extensions/matrix/src/matrix/monitor/replies.ts @@ -2,7 +2,7 @@ import type { MatrixClient } from "matrix-js-sdk"; import { chunkMarkdownText } from "../../../../../src/auto-reply/chunk.js"; import type { ReplyPayload } from "../../../../../src/auto-reply/types.js"; -import { danger } from "../../../../../src/globals.js"; +import { danger, logVerbose } from "../../../../../src/globals.js"; import type { RuntimeEnv } from "../../../../../src/runtime.js"; import { sendMessageMatrix } from "../send.js"; @@ -18,7 +18,12 @@ export async function deliverMatrixReplies(params: { const chunkLimit = Math.min(params.textLimit, 4000); let hasReplied = false; for (const reply of params.replies) { - if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) { + const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0; + if (!reply?.text && !hasMedia) { + if (reply?.audioAsVoice) { + logVerbose("matrix reply has audioAsVoice without media/text; skipping"); + continue; + } params.runtime.error?.(danger("matrix reply missing text/media")); continue; } @@ -57,6 +62,7 @@ export async function deliverMatrixReplies(params: { mediaUrl, replyToId: shouldIncludeReply(replyToId) ? replyToId : undefined, threadId: params.threadId, + audioAsVoice: reply.audioAsVoice, }); if (shouldIncludeReply(replyToId)) { hasReplied = true; diff --git a/extensions/matrix/src/matrix/send.ts b/extensions/matrix/src/matrix/send.ts index 3ac6d9d32..7e1e2b2fe 100644 --- a/extensions/matrix/src/matrix/send.ts +++ b/extensions/matrix/src/matrix/send.ts @@ -1,12 +1,15 @@ import type { AccountDataEvents, MatrixClient } from "matrix-js-sdk"; import { EventType, MsgType, RelationType } from "matrix-js-sdk"; import type { - ReactionEventContent, RoomMessageEventContent, + ReactionEventContent, } from "matrix-js-sdk/lib/@types/events.js"; import { chunkMarkdownText, resolveTextChunkLimit } from "../../../../src/auto-reply/chunk.js"; import { loadConfig } from "../../../../src/config/config.js"; +import { isVoiceCompatibleAudio } from "../../../../src/media/audio.js"; +import { mediaKindFromMime } from "../../../../src/media/constants.js"; +import { getImageMetadata, resizeToJpeg } from "../../../../src/media/image-ops.js"; import type { PollInput } from "../../../../src/polls.js"; import { loadWebMedia } from "../../../../src/web/media.js"; import { getActiveMatrixClient } from "./active-client.js"; @@ -47,6 +50,8 @@ export type MatrixSendOpts = { replyToId?: string; threadId?: string | number | null; timeoutMs?: number; + /** Send audio as voice message (voice bubble) instead of audio file. Defaults to false. */ + audioAsVoice?: boolean; }; function ensureNodeRuntime() { @@ -125,6 +130,18 @@ export async function resolveMatrixRoomId( return target; } +type MatrixImageInfo = { + w?: number; + h?: number; + thumbnail_url?: string; + thumbnail_info?: { + w: number; + h: number; + mimetype: string; + size: number; + }; +}; + function buildMediaContent(params: { msgtype: MsgType.Image | MsgType.Audio | MsgType.Video | MsgType.File; body: string; @@ -133,8 +150,24 @@ function buildMediaContent(params: { mimetype?: string; size: number; relation?: MatrixReplyRelation; + isVoice?: boolean; + durationMs?: number; + imageInfo?: MatrixImageInfo; }): RoomMessageEventContent { - const info = { mimetype: params.mimetype, size: params.size }; + const info: Record = { mimetype: params.mimetype, size: params.size }; + if (params.durationMs !== undefined) { + info.duration = params.durationMs; + } + if (params.imageInfo) { + if (params.imageInfo.w) info.w = params.imageInfo.w; + if (params.imageInfo.h) info.h = params.imageInfo.h; + if (params.imageInfo.thumbnail_url) { + info.thumbnail_url = params.imageInfo.thumbnail_url; + if (params.imageInfo.thumbnail_info) { + info.thumbnail_info = params.imageInfo.thumbnail_info; + } + } + } const base: MatrixMessageContent = { msgtype: params.msgtype, body: params.body, @@ -142,6 +175,12 @@ function buildMediaContent(params: { info, url: params.url, }; + if (params.isVoice) { + base["org.matrix.msc3245.voice"] = {}; + base["org.matrix.msc1767.audio"] = { + duration: params.durationMs, + }; + } if (params.relation) { base["m.relates_to"] = params.relation; } @@ -177,6 +216,75 @@ function buildReplyRelation(replyToId?: string): MatrixReplyRelation | undefined return { "m.in_reply_to": { event_id: trimmed } }; } +function resolveMatrixMsgType( + contentType?: string, + fileName?: string, +): MsgType.Image | MsgType.Audio | MsgType.Video | MsgType.File { + const kind = mediaKindFromMime(contentType ?? ""); + switch (kind) { + case "image": + return MsgType.Image; + case "audio": + return MsgType.Audio; + case "video": + return MsgType.Video; + default: + return MsgType.File; + } +} + +function resolveMatrixVoiceDecision(opts: { + wantsVoice: boolean; + contentType?: string; + fileName?: string; +}): { useVoice: boolean } { + if (!opts.wantsVoice) return { useVoice: false }; + if (isVoiceCompatibleAudio({ contentType: opts.contentType, fileName: opts.fileName })) { + return { useVoice: true }; + } + return { useVoice: false }; +} + +const THUMBNAIL_MAX_SIDE = 800; +const THUMBNAIL_QUALITY = 80; + +async function prepareImageInfo(params: { + buffer: Buffer; + client: MatrixClient; +}): Promise { + const meta = await getImageMetadata(params.buffer).catch(() => null); + if (!meta) return undefined; + const imageInfo: MatrixImageInfo = { w: meta.width, h: meta.height }; + const maxDim = Math.max(meta.width, meta.height); + if (maxDim > THUMBNAIL_MAX_SIDE) { + try { + const thumbBuffer = await resizeToJpeg({ + buffer: params.buffer, + maxSide: THUMBNAIL_MAX_SIDE, + quality: THUMBNAIL_QUALITY, + withoutEnlargement: true, + }); + const thumbMeta = await getImageMetadata(thumbBuffer).catch(() => null); + const thumbUri = await params.client.uploadContent(thumbBuffer as MatrixUploadContent, { + type: "image/jpeg", + name: "thumbnail.jpg", + }); + imageInfo.thumbnail_url = thumbUri.content_uri; + if (thumbMeta) { + imageInfo.thumbnail_info = { + w: thumbMeta.width, + h: thumbMeta.height, + mimetype: "image/jpeg", + size: thumbBuffer.byteLength, + }; + } + } catch { + // Thumbnail generation failed, continue without it + } + } + return imageInfo; +} + async function uploadFile( client: MatrixClient, file: MatrixUploadContent | Buffer, @@ -257,9 +365,17 @@ export async function sendMessageMatrix( contentType: media.contentType, filename: media.fileName, }); - const msgtype = MsgType.File; + const baseMsgType = resolveMatrixMsgType(media.contentType, media.fileName); + const { useVoice } = resolveMatrixVoiceDecision({ + wantsVoice: opts.audioAsVoice === true, + contentType: media.contentType, + fileName: media.fileName, + }); + const msgtype = useVoice ? MsgType.Audio : baseMsgType; + const isImage = msgtype === MsgType.Image; + const imageInfo = isImage ? await prepareImageInfo({ buffer: media.buffer, client }) : undefined; const [firstChunk, ...rest] = chunks; - const body = firstChunk ?? media.fileName ?? "(file)"; + const body = useVoice ? "Voice message" : (firstChunk ?? media.fileName ?? "(file)"); const content = buildMediaContent({ msgtype, body, @@ -268,10 +384,13 @@ export async function sendMessageMatrix( mimetype: media.contentType, size: media.buffer.byteLength, relation, + isVoice: useVoice, + imageInfo, }); const response = await sendContent(content); lastMessageId = response.event_id ?? lastMessageId; - for (const chunk of rest) { + const textChunks = useVoice ? chunks : rest; + for (const chunk of textChunks) { const text = chunk.trim(); if (!text) continue; const followup = buildTextContent(text);