Merge pull request #1088 from sibbl/fix-matrix

feat(matrix): fix sending bug, add specific support for voice messages and images
This commit is contained in:
Peter Steinberger
2026-01-17 17:27:44 +00:00
committed by GitHub
6 changed files with 220 additions and 54 deletions

View File

@@ -7,6 +7,9 @@ Docs: https://docs.clawd.bot
### Changes ### Changes
- macOS: strip prerelease/build suffixes when parsing gateway semver patches. (#1110) — thanks @zerone0x. - macOS: strip prerelease/build suffixes when parsing gateway semver patches. (#1110) — thanks @zerone0x.
### Fixes
- Matrix: send voice/image-specific media payloads and keep legacy poll parsing. (#1088) — thanks @sibbl.
## 2026.1.16-2 ## 2026.1.16-2
### Changes ### Changes

View File

@@ -2,7 +2,7 @@ import type { MatrixClient } from "matrix-js-sdk";
import { chunkMarkdownText } from "../../../../../src/auto-reply/chunk.js"; import { chunkMarkdownText } from "../../../../../src/auto-reply/chunk.js";
import type { ReplyPayload } from "../../../../../src/auto-reply/types.js"; import type { ReplyPayload } from "../../../../../src/auto-reply/types.js";
import { danger } from "../../../../../src/globals.js"; import { danger, logVerbose } from "../../../../../src/globals.js";
import type { RuntimeEnv } from "../../../../../src/runtime.js"; import type { RuntimeEnv } from "../../../../../src/runtime.js";
import { sendMessageMatrix } from "../send.js"; import { sendMessageMatrix } from "../send.js";
@@ -18,7 +18,12 @@ export async function deliverMatrixReplies(params: {
const chunkLimit = Math.min(params.textLimit, 4000); const chunkLimit = Math.min(params.textLimit, 4000);
let hasReplied = false; let hasReplied = false;
for (const reply of params.replies) { for (const reply of params.replies) {
if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) { const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0;
if (!reply?.text && !hasMedia) {
if (reply?.audioAsVoice) {
logVerbose("matrix reply has audioAsVoice without media/text; skipping");
continue;
}
params.runtime.error?.(danger("matrix reply missing text/media")); params.runtime.error?.(danger("matrix reply missing text/media"));
continue; continue;
} }
@@ -57,6 +62,7 @@ export async function deliverMatrixReplies(params: {
mediaUrl, mediaUrl,
replyToId: shouldIncludeReply(replyToId) ? replyToId : undefined, replyToId: shouldIncludeReply(replyToId) ? replyToId : undefined,
threadId: params.threadId, threadId: params.threadId,
audioAsVoice: reply.audioAsVoice,
}); });
if (shouldIncludeReply(replyToId)) { if (shouldIncludeReply(replyToId)) {
hasReplied = true; hasReplied = true;

View File

@@ -0,0 +1,22 @@
import { describe, expect, it } from "vitest";
import { parsePollStartContent } from "./poll-types.js";
describe("parsePollStartContent", () => {
it("parses legacy m.poll payloads", () => {
const summary = parsePollStartContent({
"m.poll": {
question: { "m.text": "Lunch?" },
kind: "m.poll.disclosed",
max_selections: 1,
answers: [
{ id: "answer1", "m.text": "Yes" },
{ id: "answer2", "m.text": "No" },
],
},
});
expect(summary?.question).toBe("Lunch?");
expect(summary?.answers).toEqual(["Yes", "No"]);
});
});

View File

@@ -7,15 +7,17 @@
* - m.poll.end - Closes a poll * - m.poll.end - Closes a poll
*/ */
import type { TimelineEvents } from "matrix-js-sdk/lib/@types/event.js";
import type { ExtensibleAnyMessageEventContent } from "matrix-js-sdk/lib/@types/extensible_events.js";
import type { PollInput } from "../../../../src/polls.js"; import type { PollInput } from "../../../../src/polls.js";
export const M_POLL_START = "m.poll.start"; export const M_POLL_START = "m.poll.start" as const;
export const M_POLL_RESPONSE = "m.poll.response"; export const M_POLL_RESPONSE = "m.poll.response" as const;
export const M_POLL_END = "m.poll.end"; export const M_POLL_END = "m.poll.end" as const;
export const ORG_POLL_START = "org.matrix.msc3381.poll.start"; export const ORG_POLL_START = "org.matrix.msc3381.poll.start" as const;
export const ORG_POLL_RESPONSE = "org.matrix.msc3381.poll.response"; export const ORG_POLL_RESPONSE = "org.matrix.msc3381.poll.response" as const;
export const ORG_POLL_END = "org.matrix.msc3381.poll.end"; export const ORG_POLL_END = "org.matrix.msc3381.poll.end" as const;
export const POLL_EVENT_TYPES = [ export const POLL_EVENT_TYPES = [
M_POLL_START, M_POLL_START,
@@ -32,9 +34,7 @@ export const POLL_END_TYPES = [M_POLL_END, ORG_POLL_END];
export type PollKind = "m.poll.disclosed" | "m.poll.undisclosed"; export type PollKind = "m.poll.disclosed" | "m.poll.undisclosed";
export type TextContent = { export type TextContent = ExtensibleAnyMessageEventContent & {
"m.text"?: string;
"org.matrix.msc1767.text"?: string;
body?: string; body?: string;
}; };
@@ -42,25 +42,19 @@ export type PollAnswer = {
id: string; id: string;
} & TextContent; } & TextContent;
export type PollStartContent = { export type PollStartSubtype = {
"m.poll"?: {
question: TextContent; question: TextContent;
kind?: PollKind; kind?: PollKind;
max_selections?: number; max_selections?: number;
answers: PollAnswer[]; answers: PollAnswer[];
};
"org.matrix.msc3381.poll.start"?: {
question: TextContent;
kind?: PollKind;
max_selections?: number;
answers: PollAnswer[];
};
"m.relates_to"?: {
rel_type: "m.reference";
event_id: string;
};
}; };
export type LegacyPollStartContent = {
"m.poll"?: PollStartSubtype;
};
export type PollStartContent = TimelineEvents[typeof M_POLL_START] | LegacyPollStartContent;
export type PollSummary = { export type PollSummary = {
eventId: string; eventId: string;
roomId: string; roomId: string;
@@ -82,7 +76,9 @@ export function getTextContent(text?: TextContent): string {
} }
export function parsePollStartContent(content: PollStartContent): PollSummary | null { export function parsePollStartContent(content: PollStartContent): PollSummary | null {
const poll = content["m.poll"] ?? content["org.matrix.msc3381.poll.start"]; const poll = (content as Record<string, PollStartSubtype | undefined>)[M_POLL_START]
?? (content as Record<string, PollStartSubtype | undefined>)[ORG_POLL_START]
?? (content as Record<string, PollStartSubtype | undefined>)["m.poll"];
if (!poll) return null; if (!poll) return null;
const question = getTextContent(poll.question); const question = getTextContent(poll.question);
@@ -121,6 +117,11 @@ function buildTextContent(body: string): TextContent {
}; };
} }
function buildPollFallbackText(question: string, answers: string[]): string {
if (answers.length === 0) return question;
return `${question}\n${answers.map((answer, idx) => `${idx + 1}. ${answer}`).join("\n")}`;
}
export function buildPollStartContent(poll: PollInput): PollStartContent { export function buildPollStartContent(poll: PollInput): PollStartContent {
const question = poll.question.trim(); const question = poll.question.trim();
const answers = poll.options const answers = poll.options
@@ -132,13 +133,19 @@ export function buildPollStartContent(poll: PollInput): PollStartContent {
})); }));
const maxSelections = poll.multiple ? Math.max(1, answers.length) : 1; const maxSelections = poll.multiple ? Math.max(1, answers.length) : 1;
const fallbackText = buildPollFallbackText(
question,
answers.map((answer) => getTextContent(answer)),
);
return { return {
"m.poll": { [M_POLL_START]: {
question: buildTextContent(question), question: buildTextContent(question),
kind: poll.multiple ? "m.poll.undisclosed" : "m.poll.disclosed", kind: poll.multiple ? "m.poll.undisclosed" : "m.poll.disclosed",
max_selections: maxSelections, max_selections: maxSelections,
answers, answers,
}, },
"m.text": fallbackText,
"org.matrix.msc1767.text": fallbackText,
}; };
} }

View File

@@ -31,6 +31,11 @@ vi.mock("../../../../src/web/media.js", () => ({
}), }),
})); }));
vi.mock("../../../../src/media/image-ops.js", () => ({
getImageMetadata: vi.fn().mockResolvedValue(null),
resizeToJpeg: vi.fn(),
}));
let sendMessageMatrix: typeof import("./send.js").sendMessageMatrix; let sendMessageMatrix: typeof import("./send.js").sendMessageMatrix;
const makeClient = () => { const makeClient = () => {
@@ -65,13 +70,13 @@ describe("sendMessageMatrix media", () => {
const uploadArg = uploadContent.mock.calls[0]?.[0]; const uploadArg = uploadContent.mock.calls[0]?.[0];
expect(Buffer.isBuffer(uploadArg)).toBe(true); expect(Buffer.isBuffer(uploadArg)).toBe(true);
const content = sendMessage.mock.calls[0]?.[2] as { const content = sendMessage.mock.calls[0]?.[1] as {
url?: string; url?: string;
msgtype?: string; msgtype?: string;
format?: string; format?: string;
formatted_body?: string; formatted_body?: string;
}; };
expect(content.msgtype).toBe("m.file"); expect(content.msgtype).toBe("m.image");
expect(content.format).toBe("org.matrix.custom.html"); expect(content.format).toBe("org.matrix.custom.html");
expect(content.formatted_body).toContain("caption"); expect(content.formatted_body).toContain("caption");
expect(content.url).toBe("mxc://example/file"); expect(content.url).toBe("mxc://example/file");

View File

@@ -1,12 +1,15 @@
import type { AccountDataEvents, MatrixClient } from "matrix-js-sdk"; import type { AccountDataEvents, MatrixClient } from "matrix-js-sdk";
import { EventType, MsgType, RelationType } from "matrix-js-sdk"; import { EventType, MsgType, RelationType } from "matrix-js-sdk";
import type { import type {
ReactionEventContent,
RoomMessageEventContent, RoomMessageEventContent,
ReactionEventContent,
} from "matrix-js-sdk/lib/@types/events.js"; } from "matrix-js-sdk/lib/@types/events.js";
import { chunkMarkdownText, resolveTextChunkLimit } from "../../../../src/auto-reply/chunk.js"; import { chunkMarkdownText, resolveTextChunkLimit } from "../../../../src/auto-reply/chunk.js";
import { loadConfig } from "../../../../src/config/config.js"; import { loadConfig } from "../../../../src/config/config.js";
import { isVoiceCompatibleAudio } from "../../../../src/media/audio.js";
import { mediaKindFromMime } from "../../../../src/media/constants.js";
import { getImageMetadata, resizeToJpeg } from "../../../../src/media/image-ops.js";
import type { PollInput } from "../../../../src/polls.js"; import type { PollInput } from "../../../../src/polls.js";
import { loadWebMedia } from "../../../../src/web/media.js"; import { loadWebMedia } from "../../../../src/web/media.js";
import { getActiveMatrixClient } from "./active-client.js"; import { getActiveMatrixClient } from "./active-client.js";
@@ -47,6 +50,8 @@ export type MatrixSendOpts = {
replyToId?: string; replyToId?: string;
threadId?: string | number | null; threadId?: string | number | null;
timeoutMs?: number; timeoutMs?: number;
/** Send audio as voice message (voice bubble) instead of audio file. Defaults to false. */
audioAsVoice?: boolean;
}; };
function ensureNodeRuntime() { function ensureNodeRuntime() {
@@ -71,6 +76,12 @@ function normalizeTarget(raw: string): string {
return trimmed; return trimmed;
} }
function normalizeThreadId(raw?: string | number | null): string | null {
if (raw === undefined || raw === null) return null;
const trimmed = String(raw).trim();
return trimmed ? trimmed : null;
}
async function resolveDirectRoomId(client: MatrixClient, userId: string): Promise<string> { async function resolveDirectRoomId(client: MatrixClient, userId: string): Promise<string> {
const trimmed = userId.trim(); const trimmed = userId.trim();
if (!trimmed.startsWith("@")) { if (!trimmed.startsWith("@")) {
@@ -119,6 +130,18 @@ export async function resolveMatrixRoomId(
return target; return target;
} }
type MatrixImageInfo = {
w?: number;
h?: number;
thumbnail_url?: string;
thumbnail_info?: {
w: number;
h: number;
mimetype: string;
size: number;
};
};
function buildMediaContent(params: { function buildMediaContent(params: {
msgtype: MsgType.Image | MsgType.Audio | MsgType.Video | MsgType.File; msgtype: MsgType.Image | MsgType.Audio | MsgType.Video | MsgType.File;
body: string; body: string;
@@ -127,8 +150,24 @@ function buildMediaContent(params: {
mimetype?: string; mimetype?: string;
size: number; size: number;
relation?: MatrixReplyRelation; relation?: MatrixReplyRelation;
isVoice?: boolean;
durationMs?: number;
imageInfo?: MatrixImageInfo;
}): RoomMessageEventContent { }): RoomMessageEventContent {
const info = { mimetype: params.mimetype, size: params.size }; const info: Record<string, unknown> = { mimetype: params.mimetype, size: params.size };
if (params.durationMs !== undefined) {
info.duration = params.durationMs;
}
if (params.imageInfo) {
if (params.imageInfo.w) info.w = params.imageInfo.w;
if (params.imageInfo.h) info.h = params.imageInfo.h;
if (params.imageInfo.thumbnail_url) {
info.thumbnail_url = params.imageInfo.thumbnail_url;
if (params.imageInfo.thumbnail_info) {
info.thumbnail_info = params.imageInfo.thumbnail_info;
}
}
}
const base: MatrixMessageContent = { const base: MatrixMessageContent = {
msgtype: params.msgtype, msgtype: params.msgtype,
body: params.body, body: params.body,
@@ -136,6 +175,12 @@ function buildMediaContent(params: {
info, info,
url: params.url, url: params.url,
}; };
if (params.isVoice) {
base["org.matrix.msc3245.voice"] = {};
base["org.matrix.msc1767.audio"] = {
duration: params.durationMs,
};
}
if (params.relation) { if (params.relation) {
base["m.relates_to"] = params.relation; base["m.relates_to"] = params.relation;
} }
@@ -171,6 +216,75 @@ function buildReplyRelation(replyToId?: string): MatrixReplyRelation | undefined
return { "m.in_reply_to": { event_id: trimmed } }; return { "m.in_reply_to": { event_id: trimmed } };
} }
function resolveMatrixMsgType(
contentType?: string,
fileName?: string,
): MsgType.Image | MsgType.Audio | MsgType.Video | MsgType.File {
const kind = mediaKindFromMime(contentType ?? "");
switch (kind) {
case "image":
return MsgType.Image;
case "audio":
return MsgType.Audio;
case "video":
return MsgType.Video;
default:
return MsgType.File;
}
}
function resolveMatrixVoiceDecision(opts: {
wantsVoice: boolean;
contentType?: string;
fileName?: string;
}): { useVoice: boolean } {
if (!opts.wantsVoice) return { useVoice: false };
if (isVoiceCompatibleAudio({ contentType: opts.contentType, fileName: opts.fileName })) {
return { useVoice: true };
}
return { useVoice: false };
}
const THUMBNAIL_MAX_SIDE = 800;
const THUMBNAIL_QUALITY = 80;
async function prepareImageInfo(params: {
buffer: Buffer;
client: MatrixClient;
}): Promise<MatrixImageInfo | undefined> {
const meta = await getImageMetadata(params.buffer).catch(() => null);
if (!meta) return undefined;
const imageInfo: MatrixImageInfo = { w: meta.width, h: meta.height };
const maxDim = Math.max(meta.width, meta.height);
if (maxDim > THUMBNAIL_MAX_SIDE) {
try {
const thumbBuffer = await resizeToJpeg({
buffer: params.buffer,
maxSide: THUMBNAIL_MAX_SIDE,
quality: THUMBNAIL_QUALITY,
withoutEnlargement: true,
});
const thumbMeta = await getImageMetadata(thumbBuffer).catch(() => null);
const thumbUri = await params.client.uploadContent(thumbBuffer as MatrixUploadContent, {
type: "image/jpeg",
name: "thumbnail.jpg",
});
imageInfo.thumbnail_url = thumbUri.content_uri;
if (thumbMeta) {
imageInfo.thumbnail_info = {
w: thumbMeta.width,
h: thumbMeta.height,
mimetype: "image/jpeg",
size: thumbBuffer.byteLength,
};
}
} catch {
// Thumbnail generation failed, continue without it
}
}
return imageInfo;
}
async function uploadFile( async function uploadFile(
client: MatrixClient, client: MatrixClient,
file: MatrixUploadContent | Buffer, file: MatrixUploadContent | Buffer,
@@ -238,14 +352,10 @@ export async function sendMessageMatrix(
const textLimit = resolveTextChunkLimit(cfg, "matrix"); const textLimit = resolveTextChunkLimit(cfg, "matrix");
const chunkLimit = Math.min(textLimit, MATRIX_TEXT_LIMIT); const chunkLimit = Math.min(textLimit, MATRIX_TEXT_LIMIT);
const chunks = chunkMarkdownText(trimmedMessage, chunkLimit); const chunks = chunkMarkdownText(trimmedMessage, chunkLimit);
const rawThreadId = opts.threadId; const threadId = normalizeThreadId(opts.threadId);
const threadId =
rawThreadId !== undefined && rawThreadId !== null
? String(rawThreadId).trim()
: null;
const relation = threadId ? undefined : buildReplyRelation(opts.replyToId); const relation = threadId ? undefined : buildReplyRelation(opts.replyToId);
const sendContent = (content: RoomMessageEventContent) => const sendContent = (content: RoomMessageEventContent) =>
client.sendMessage(roomId, threadId ?? undefined, content); threadId ? client.sendMessage(roomId, threadId, content) : client.sendMessage(roomId, content);
let lastMessageId = ""; let lastMessageId = "";
if (opts.mediaUrl) { if (opts.mediaUrl) {
@@ -255,9 +365,17 @@ export async function sendMessageMatrix(
contentType: media.contentType, contentType: media.contentType,
filename: media.fileName, filename: media.fileName,
}); });
const msgtype = MsgType.File; const baseMsgType = resolveMatrixMsgType(media.contentType, media.fileName);
const { useVoice } = resolveMatrixVoiceDecision({
wantsVoice: opts.audioAsVoice === true,
contentType: media.contentType,
fileName: media.fileName,
});
const msgtype = useVoice ? MsgType.Audio : baseMsgType;
const isImage = msgtype === MsgType.Image;
const imageInfo = isImage ? await prepareImageInfo({ buffer: media.buffer, client }) : undefined;
const [firstChunk, ...rest] = chunks; const [firstChunk, ...rest] = chunks;
const body = firstChunk ?? media.fileName ?? "(file)"; const body = useVoice ? "Voice message" : (firstChunk ?? media.fileName ?? "(file)");
const content = buildMediaContent({ const content = buildMediaContent({
msgtype, msgtype,
body, body,
@@ -266,10 +384,13 @@ export async function sendMessageMatrix(
mimetype: media.contentType, mimetype: media.contentType,
size: media.buffer.byteLength, size: media.buffer.byteLength,
relation, relation,
isVoice: useVoice,
imageInfo,
}); });
const response = await sendContent(content); const response = await sendContent(content);
lastMessageId = response.event_id ?? lastMessageId; lastMessageId = response.event_id ?? lastMessageId;
for (const chunk of rest) { const textChunks = useVoice ? chunks : rest;
for (const chunk of textChunks) {
const text = chunk.trim(); const text = chunk.trim();
if (!text) continue; if (!text) continue;
const followup = buildTextContent(text); const followup = buildTextContent(text);
@@ -316,16 +437,18 @@ export async function sendPollMatrix(
try { try {
const roomId = await resolveMatrixRoomId(client, to); const roomId = await resolveMatrixRoomId(client, to);
const pollContent = buildPollStartContent(poll); const pollContent = buildPollStartContent(poll);
const rawThreadId = opts.threadId; const threadId = normalizeThreadId(opts.threadId);
const threadId = const response = threadId
rawThreadId !== undefined && rawThreadId !== null ? await client.sendEvent(
? String(rawThreadId).trim()
: null;
const response = await client.sendEvent(
roomId, roomId,
threadId ?? undefined, threadId,
M_POLL_START as EventType.RoomMessage, M_POLL_START,
pollContent as unknown as RoomMessageEventContent, pollContent,
)
: await client.sendEvent(
roomId,
M_POLL_START,
pollContent,
); );
return { return {