Merge pull request #1088 from sibbl/fix-matrix

feat(matrix): fix sending bug, add specific support for voice messages and images
This commit is contained in:
Peter Steinberger
2026-01-17 17:27:44 +00:00
committed by GitHub
6 changed files with 220 additions and 54 deletions

View File

@@ -7,6 +7,9 @@ Docs: https://docs.clawd.bot
### Changes
- macOS: strip prerelease/build suffixes when parsing gateway semver patches. (#1110) — thanks @zerone0x.
### Fixes
- Matrix: send voice/image-specific media payloads and keep legacy poll parsing. (#1088) — thanks @sibbl.
## 2026.1.16-2
### Changes

View File

@@ -2,7 +2,7 @@ import type { MatrixClient } from "matrix-js-sdk";
import { chunkMarkdownText } from "../../../../../src/auto-reply/chunk.js";
import type { ReplyPayload } from "../../../../../src/auto-reply/types.js";
import { danger } from "../../../../../src/globals.js";
import { danger, logVerbose } from "../../../../../src/globals.js";
import type { RuntimeEnv } from "../../../../../src/runtime.js";
import { sendMessageMatrix } from "../send.js";
@@ -18,7 +18,12 @@ export async function deliverMatrixReplies(params: {
const chunkLimit = Math.min(params.textLimit, 4000);
let hasReplied = false;
for (const reply of params.replies) {
if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) {
const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0;
if (!reply?.text && !hasMedia) {
if (reply?.audioAsVoice) {
logVerbose("matrix reply has audioAsVoice without media/text; skipping");
continue;
}
params.runtime.error?.(danger("matrix reply missing text/media"));
continue;
}
@@ -57,6 +62,7 @@ export async function deliverMatrixReplies(params: {
mediaUrl,
replyToId: shouldIncludeReply(replyToId) ? replyToId : undefined,
threadId: params.threadId,
audioAsVoice: reply.audioAsVoice,
});
if (shouldIncludeReply(replyToId)) {
hasReplied = true;

View File

@@ -0,0 +1,22 @@
import { describe, expect, it } from "vitest";
import { parsePollStartContent } from "./poll-types.js";
describe("parsePollStartContent", () => {
it("parses legacy m.poll payloads", () => {
const summary = parsePollStartContent({
"m.poll": {
question: { "m.text": "Lunch?" },
kind: "m.poll.disclosed",
max_selections: 1,
answers: [
{ id: "answer1", "m.text": "Yes" },
{ id: "answer2", "m.text": "No" },
],
},
});
expect(summary?.question).toBe("Lunch?");
expect(summary?.answers).toEqual(["Yes", "No"]);
});
});

View File

@@ -7,15 +7,17 @@
* - m.poll.end - Closes a poll
*/
import type { TimelineEvents } from "matrix-js-sdk/lib/@types/event.js";
import type { ExtensibleAnyMessageEventContent } from "matrix-js-sdk/lib/@types/extensible_events.js";
import type { PollInput } from "../../../../src/polls.js";
export const M_POLL_START = "m.poll.start";
export const M_POLL_RESPONSE = "m.poll.response";
export const M_POLL_END = "m.poll.end";
export const M_POLL_START = "m.poll.start" as const;
export const M_POLL_RESPONSE = "m.poll.response" as const;
export const M_POLL_END = "m.poll.end" as const;
export const ORG_POLL_START = "org.matrix.msc3381.poll.start";
export const ORG_POLL_RESPONSE = "org.matrix.msc3381.poll.response";
export const ORG_POLL_END = "org.matrix.msc3381.poll.end";
export const ORG_POLL_START = "org.matrix.msc3381.poll.start" as const;
export const ORG_POLL_RESPONSE = "org.matrix.msc3381.poll.response" as const;
export const ORG_POLL_END = "org.matrix.msc3381.poll.end" as const;
export const POLL_EVENT_TYPES = [
M_POLL_START,
@@ -32,9 +34,7 @@ export const POLL_END_TYPES = [M_POLL_END, ORG_POLL_END];
export type PollKind = "m.poll.disclosed" | "m.poll.undisclosed";
export type TextContent = {
"m.text"?: string;
"org.matrix.msc1767.text"?: string;
export type TextContent = ExtensibleAnyMessageEventContent & {
body?: string;
};
@@ -42,25 +42,19 @@ export type PollAnswer = {
id: string;
} & TextContent;
export type PollStartContent = {
"m.poll"?: {
question: TextContent;
kind?: PollKind;
max_selections?: number;
answers: PollAnswer[];
};
"org.matrix.msc3381.poll.start"?: {
question: TextContent;
kind?: PollKind;
max_selections?: number;
answers: PollAnswer[];
};
"m.relates_to"?: {
rel_type: "m.reference";
event_id: string;
};
export type PollStartSubtype = {
question: TextContent;
kind?: PollKind;
max_selections?: number;
answers: PollAnswer[];
};
export type LegacyPollStartContent = {
"m.poll"?: PollStartSubtype;
};
export type PollStartContent = TimelineEvents[typeof M_POLL_START] | LegacyPollStartContent;
export type PollSummary = {
eventId: string;
roomId: string;
@@ -82,7 +76,9 @@ export function getTextContent(text?: TextContent): string {
}
export function parsePollStartContent(content: PollStartContent): PollSummary | null {
const poll = content["m.poll"] ?? content["org.matrix.msc3381.poll.start"];
const poll = (content as Record<string, PollStartSubtype | undefined>)[M_POLL_START]
?? (content as Record<string, PollStartSubtype | undefined>)[ORG_POLL_START]
?? (content as Record<string, PollStartSubtype | undefined>)["m.poll"];
if (!poll) return null;
const question = getTextContent(poll.question);
@@ -121,6 +117,11 @@ function buildTextContent(body: string): TextContent {
};
}
function buildPollFallbackText(question: string, answers: string[]): string {
if (answers.length === 0) return question;
return `${question}\n${answers.map((answer, idx) => `${idx + 1}. ${answer}`).join("\n")}`;
}
export function buildPollStartContent(poll: PollInput): PollStartContent {
const question = poll.question.trim();
const answers = poll.options
@@ -132,13 +133,19 @@ export function buildPollStartContent(poll: PollInput): PollStartContent {
}));
const maxSelections = poll.multiple ? Math.max(1, answers.length) : 1;
const fallbackText = buildPollFallbackText(
question,
answers.map((answer) => getTextContent(answer)),
);
return {
"m.poll": {
[M_POLL_START]: {
question: buildTextContent(question),
kind: poll.multiple ? "m.poll.undisclosed" : "m.poll.disclosed",
max_selections: maxSelections,
answers,
},
"m.text": fallbackText,
"org.matrix.msc1767.text": fallbackText,
};
}

View File

@@ -31,6 +31,11 @@ vi.mock("../../../../src/web/media.js", () => ({
}),
}));
vi.mock("../../../../src/media/image-ops.js", () => ({
getImageMetadata: vi.fn().mockResolvedValue(null),
resizeToJpeg: vi.fn(),
}));
let sendMessageMatrix: typeof import("./send.js").sendMessageMatrix;
const makeClient = () => {
@@ -65,13 +70,13 @@ describe("sendMessageMatrix media", () => {
const uploadArg = uploadContent.mock.calls[0]?.[0];
expect(Buffer.isBuffer(uploadArg)).toBe(true);
const content = sendMessage.mock.calls[0]?.[2] as {
const content = sendMessage.mock.calls[0]?.[1] as {
url?: string;
msgtype?: string;
format?: string;
formatted_body?: string;
};
expect(content.msgtype).toBe("m.file");
expect(content.msgtype).toBe("m.image");
expect(content.format).toBe("org.matrix.custom.html");
expect(content.formatted_body).toContain("caption");
expect(content.url).toBe("mxc://example/file");

View File

@@ -1,12 +1,15 @@
import type { AccountDataEvents, MatrixClient } from "matrix-js-sdk";
import { EventType, MsgType, RelationType } from "matrix-js-sdk";
import type {
ReactionEventContent,
RoomMessageEventContent,
ReactionEventContent,
} from "matrix-js-sdk/lib/@types/events.js";
import { chunkMarkdownText, resolveTextChunkLimit } from "../../../../src/auto-reply/chunk.js";
import { loadConfig } from "../../../../src/config/config.js";
import { isVoiceCompatibleAudio } from "../../../../src/media/audio.js";
import { mediaKindFromMime } from "../../../../src/media/constants.js";
import { getImageMetadata, resizeToJpeg } from "../../../../src/media/image-ops.js";
import type { PollInput } from "../../../../src/polls.js";
import { loadWebMedia } from "../../../../src/web/media.js";
import { getActiveMatrixClient } from "./active-client.js";
@@ -47,6 +50,8 @@ export type MatrixSendOpts = {
replyToId?: string;
threadId?: string | number | null;
timeoutMs?: number;
/** Send audio as voice message (voice bubble) instead of audio file. Defaults to false. */
audioAsVoice?: boolean;
};
function ensureNodeRuntime() {
@@ -71,6 +76,12 @@ function normalizeTarget(raw: string): string {
return trimmed;
}
function normalizeThreadId(raw?: string | number | null): string | null {
if (raw === undefined || raw === null) return null;
const trimmed = String(raw).trim();
return trimmed ? trimmed : null;
}
async function resolveDirectRoomId(client: MatrixClient, userId: string): Promise<string> {
const trimmed = userId.trim();
if (!trimmed.startsWith("@")) {
@@ -119,6 +130,18 @@ export async function resolveMatrixRoomId(
return target;
}
type MatrixImageInfo = {
w?: number;
h?: number;
thumbnail_url?: string;
thumbnail_info?: {
w: number;
h: number;
mimetype: string;
size: number;
};
};
function buildMediaContent(params: {
msgtype: MsgType.Image | MsgType.Audio | MsgType.Video | MsgType.File;
body: string;
@@ -127,8 +150,24 @@ function buildMediaContent(params: {
mimetype?: string;
size: number;
relation?: MatrixReplyRelation;
isVoice?: boolean;
durationMs?: number;
imageInfo?: MatrixImageInfo;
}): RoomMessageEventContent {
const info = { mimetype: params.mimetype, size: params.size };
const info: Record<string, unknown> = { mimetype: params.mimetype, size: params.size };
if (params.durationMs !== undefined) {
info.duration = params.durationMs;
}
if (params.imageInfo) {
if (params.imageInfo.w) info.w = params.imageInfo.w;
if (params.imageInfo.h) info.h = params.imageInfo.h;
if (params.imageInfo.thumbnail_url) {
info.thumbnail_url = params.imageInfo.thumbnail_url;
if (params.imageInfo.thumbnail_info) {
info.thumbnail_info = params.imageInfo.thumbnail_info;
}
}
}
const base: MatrixMessageContent = {
msgtype: params.msgtype,
body: params.body,
@@ -136,6 +175,12 @@ function buildMediaContent(params: {
info,
url: params.url,
};
if (params.isVoice) {
base["org.matrix.msc3245.voice"] = {};
base["org.matrix.msc1767.audio"] = {
duration: params.durationMs,
};
}
if (params.relation) {
base["m.relates_to"] = params.relation;
}
@@ -171,6 +216,75 @@ function buildReplyRelation(replyToId?: string): MatrixReplyRelation | undefined
return { "m.in_reply_to": { event_id: trimmed } };
}
function resolveMatrixMsgType(
contentType?: string,
fileName?: string,
): MsgType.Image | MsgType.Audio | MsgType.Video | MsgType.File {
const kind = mediaKindFromMime(contentType ?? "");
switch (kind) {
case "image":
return MsgType.Image;
case "audio":
return MsgType.Audio;
case "video":
return MsgType.Video;
default:
return MsgType.File;
}
}
function resolveMatrixVoiceDecision(opts: {
wantsVoice: boolean;
contentType?: string;
fileName?: string;
}): { useVoice: boolean } {
if (!opts.wantsVoice) return { useVoice: false };
if (isVoiceCompatibleAudio({ contentType: opts.contentType, fileName: opts.fileName })) {
return { useVoice: true };
}
return { useVoice: false };
}
const THUMBNAIL_MAX_SIDE = 800;
const THUMBNAIL_QUALITY = 80;
async function prepareImageInfo(params: {
buffer: Buffer;
client: MatrixClient;
}): Promise<MatrixImageInfo | undefined> {
const meta = await getImageMetadata(params.buffer).catch(() => null);
if (!meta) return undefined;
const imageInfo: MatrixImageInfo = { w: meta.width, h: meta.height };
const maxDim = Math.max(meta.width, meta.height);
if (maxDim > THUMBNAIL_MAX_SIDE) {
try {
const thumbBuffer = await resizeToJpeg({
buffer: params.buffer,
maxSide: THUMBNAIL_MAX_SIDE,
quality: THUMBNAIL_QUALITY,
withoutEnlargement: true,
});
const thumbMeta = await getImageMetadata(thumbBuffer).catch(() => null);
const thumbUri = await params.client.uploadContent(thumbBuffer as MatrixUploadContent, {
type: "image/jpeg",
name: "thumbnail.jpg",
});
imageInfo.thumbnail_url = thumbUri.content_uri;
if (thumbMeta) {
imageInfo.thumbnail_info = {
w: thumbMeta.width,
h: thumbMeta.height,
mimetype: "image/jpeg",
size: thumbBuffer.byteLength,
};
}
} catch {
// Thumbnail generation failed, continue without it
}
}
return imageInfo;
}
async function uploadFile(
client: MatrixClient,
file: MatrixUploadContent | Buffer,
@@ -238,14 +352,10 @@ export async function sendMessageMatrix(
const textLimit = resolveTextChunkLimit(cfg, "matrix");
const chunkLimit = Math.min(textLimit, MATRIX_TEXT_LIMIT);
const chunks = chunkMarkdownText(trimmedMessage, chunkLimit);
const rawThreadId = opts.threadId;
const threadId =
rawThreadId !== undefined && rawThreadId !== null
? String(rawThreadId).trim()
: null;
const threadId = normalizeThreadId(opts.threadId);
const relation = threadId ? undefined : buildReplyRelation(opts.replyToId);
const sendContent = (content: RoomMessageEventContent) =>
client.sendMessage(roomId, threadId ?? undefined, content);
threadId ? client.sendMessage(roomId, threadId, content) : client.sendMessage(roomId, content);
let lastMessageId = "";
if (opts.mediaUrl) {
@@ -255,9 +365,17 @@ export async function sendMessageMatrix(
contentType: media.contentType,
filename: media.fileName,
});
const msgtype = MsgType.File;
const baseMsgType = resolveMatrixMsgType(media.contentType, media.fileName);
const { useVoice } = resolveMatrixVoiceDecision({
wantsVoice: opts.audioAsVoice === true,
contentType: media.contentType,
fileName: media.fileName,
});
const msgtype = useVoice ? MsgType.Audio : baseMsgType;
const isImage = msgtype === MsgType.Image;
const imageInfo = isImage ? await prepareImageInfo({ buffer: media.buffer, client }) : undefined;
const [firstChunk, ...rest] = chunks;
const body = firstChunk ?? media.fileName ?? "(file)";
const body = useVoice ? "Voice message" : (firstChunk ?? media.fileName ?? "(file)");
const content = buildMediaContent({
msgtype,
body,
@@ -266,10 +384,13 @@ export async function sendMessageMatrix(
mimetype: media.contentType,
size: media.buffer.byteLength,
relation,
isVoice: useVoice,
imageInfo,
});
const response = await sendContent(content);
lastMessageId = response.event_id ?? lastMessageId;
for (const chunk of rest) {
const textChunks = useVoice ? chunks : rest;
for (const chunk of textChunks) {
const text = chunk.trim();
if (!text) continue;
const followup = buildTextContent(text);
@@ -316,17 +437,19 @@ export async function sendPollMatrix(
try {
const roomId = await resolveMatrixRoomId(client, to);
const pollContent = buildPollStartContent(poll);
const rawThreadId = opts.threadId;
const threadId =
rawThreadId !== undefined && rawThreadId !== null
? String(rawThreadId).trim()
: null;
const response = await client.sendEvent(
roomId,
threadId ?? undefined,
M_POLL_START as EventType.RoomMessage,
pollContent as unknown as RoomMessageEventContent,
);
const threadId = normalizeThreadId(opts.threadId);
const response = threadId
? await client.sendEvent(
roomId,
threadId,
M_POLL_START,
pollContent,
)
: await client.sendEvent(
roomId,
M_POLL_START,
pollContent,
);
return {
eventId: response.event_id ?? "unknown",