refactor: unify markdown formatting pipeline
This commit is contained in:
@@ -33,9 +33,9 @@ describe("markdownToSlackMrkdwn", () => {
|
||||
expect(res).toBe("```\nconst x = 1;\n```");
|
||||
});
|
||||
|
||||
it("renders links with URL in parentheses", () => {
|
||||
it("renders links with Slack mrkdwn syntax", () => {
|
||||
const res = markdownToSlackMrkdwn("see [docs](https://example.com)");
|
||||
expect(res).toBe("see docs (https://example.com)");
|
||||
expect(res).toBe("see <https://example.com|docs>");
|
||||
});
|
||||
|
||||
it("does not duplicate bare URLs", () => {
|
||||
@@ -94,7 +94,7 @@ describe("markdownToSlackMrkdwn", () => {
|
||||
"**Important:** Check the _docs_ at [link](https://example.com)\n\n- first\n- second",
|
||||
);
|
||||
expect(res).toBe(
|
||||
"*Important:* Check the _docs_ at link (https://example.com)\n\n• first\n• second",
|
||||
"*Important:* Check the _docs_ at <https://example.com|link>\n\n• first\n• second",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,33 +1,8 @@
|
||||
import MarkdownIt from "markdown-it";
|
||||
import { chunkMarkdownIR, markdownToIR, type MarkdownLinkSpan } from "../markdown/ir.js";
|
||||
import { renderMarkdownWithMarkers } from "../markdown/render.js";
|
||||
|
||||
type ListState = {
|
||||
type: "bullet" | "ordered";
|
||||
index: number;
|
||||
};
|
||||
|
||||
type RenderEnv = {
|
||||
slackListStack?: ListState[];
|
||||
slackLinkStack?: { href: string }[];
|
||||
};
|
||||
|
||||
const md = new MarkdownIt({
|
||||
html: false,
|
||||
// Slack will auto-link plain URLs; keeping linkify off avoids double-rendering
|
||||
// (e.g. "https://x.com" becoming "https://x.com (https://x.com)").
|
||||
linkify: false,
|
||||
breaks: false,
|
||||
typographer: false,
|
||||
});
|
||||
|
||||
md.enable("strikethrough");
|
||||
|
||||
/**
|
||||
* Escape special characters for Slack mrkdwn format.
|
||||
*
|
||||
* By default, Slack uses angle-bracket markup for mentions and links
|
||||
* (e.g. "<@U123>", "<https://…|text>"). We preserve those tokens so agents
|
||||
* can intentionally include them, while escaping other uses of "<" and ">".
|
||||
*/
|
||||
// Escape special characters for Slack mrkdwn format.
|
||||
// Preserve Slack's angle-bracket tokens so mentions and links stay intact.
|
||||
function escapeSlackMrkdwnSegment(text: string): string {
|
||||
return text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">");
|
||||
}
|
||||
@@ -74,165 +49,63 @@ function escapeSlackMrkdwnText(text: string): string {
|
||||
return out.join("");
|
||||
}
|
||||
|
||||
function getListStack(env: RenderEnv): ListState[] {
|
||||
if (!env.slackListStack) env.slackListStack = [];
|
||||
return env.slackListStack;
|
||||
function buildSlackLink(link: MarkdownLinkSpan, text: string) {
|
||||
const href = link.href.trim();
|
||||
if (!href) return null;
|
||||
const label = text.slice(link.start, link.end);
|
||||
const trimmedLabel = label.trim();
|
||||
const comparableHref = href.startsWith("mailto:") ? href.slice("mailto:".length) : href;
|
||||
const useMarkup =
|
||||
trimmedLabel.length > 0 && trimmedLabel !== href && trimmedLabel !== comparableHref;
|
||||
if (!useMarkup) return null;
|
||||
const safeHref = escapeSlackMrkdwnSegment(href);
|
||||
return {
|
||||
start: link.start,
|
||||
end: link.end,
|
||||
open: `<${safeHref}|`,
|
||||
close: ">",
|
||||
};
|
||||
}
|
||||
|
||||
function getLinkStack(env: RenderEnv): { href: string }[] {
|
||||
if (!env.slackLinkStack) env.slackLinkStack = [];
|
||||
return env.slackLinkStack;
|
||||
}
|
||||
|
||||
md.renderer.rules.text = (tokens, idx) => escapeSlackMrkdwnText(tokens[idx]?.content ?? "");
|
||||
|
||||
md.renderer.rules.softbreak = () => "\n";
|
||||
md.renderer.rules.hardbreak = () => "\n";
|
||||
|
||||
md.renderer.rules.paragraph_open = () => "";
|
||||
md.renderer.rules.paragraph_close = (_tokens, _idx, _opts, env) => {
|
||||
const stack = getListStack(env as RenderEnv);
|
||||
return stack.length ? "" : "\n\n";
|
||||
};
|
||||
|
||||
md.renderer.rules.heading_open = () => "*";
|
||||
md.renderer.rules.heading_close = () => "*\n\n";
|
||||
|
||||
md.renderer.rules.blockquote_open = () => "> ";
|
||||
md.renderer.rules.blockquote_close = () => "\n";
|
||||
|
||||
md.renderer.rules.bullet_list_open = (_tokens, _idx, _opts, env) => {
|
||||
getListStack(env as RenderEnv).push({ type: "bullet", index: 0 });
|
||||
return "";
|
||||
};
|
||||
md.renderer.rules.bullet_list_close = (_tokens, _idx, _opts, env) => {
|
||||
getListStack(env as RenderEnv).pop();
|
||||
return "";
|
||||
};
|
||||
md.renderer.rules.ordered_list_open = (tokens, idx, _opts, env) => {
|
||||
const start = Number(tokens[idx]?.attrGet("start") ?? "1");
|
||||
getListStack(env as RenderEnv).push({ type: "ordered", index: start - 1 });
|
||||
return "";
|
||||
};
|
||||
md.renderer.rules.ordered_list_close = (_tokens, _idx, _opts, env) => {
|
||||
getListStack(env as RenderEnv).pop();
|
||||
return "";
|
||||
};
|
||||
md.renderer.rules.list_item_open = (_tokens, _idx, _opts, env) => {
|
||||
const stack = getListStack(env as RenderEnv);
|
||||
const top = stack[stack.length - 1];
|
||||
if (!top) return "";
|
||||
top.index += 1;
|
||||
const indent = " ".repeat(Math.max(0, stack.length - 1));
|
||||
const prefix = top.type === "ordered" ? `${top.index}. ` : "• ";
|
||||
return `${indent}${prefix}`;
|
||||
};
|
||||
md.renderer.rules.list_item_close = () => "\n";
|
||||
|
||||
// Slack mrkdwn uses _text_ for italic (same as markdown)
|
||||
md.renderer.rules.em_open = () => "_";
|
||||
md.renderer.rules.em_close = () => "_";
|
||||
|
||||
// Slack mrkdwn uses *text* for bold (single asterisk, not double)
|
||||
md.renderer.rules.strong_open = () => "*";
|
||||
md.renderer.rules.strong_close = () => "*";
|
||||
|
||||
// Slack mrkdwn uses ~text~ for strikethrough (single tilde)
|
||||
md.renderer.rules.s_open = () => "~";
|
||||
md.renderer.rules.s_close = () => "~";
|
||||
|
||||
md.renderer.rules.code_inline = (tokens, idx) =>
|
||||
`\`${escapeSlackMrkdwnSegment(tokens[idx]?.content ?? "")}\``;
|
||||
|
||||
md.renderer.rules.code_block = (tokens, idx) =>
|
||||
`\`\`\`\n${escapeSlackMrkdwnSegment(tokens[idx]?.content ?? "")}\`\`\`\n`;
|
||||
|
||||
md.renderer.rules.fence = (tokens, idx) =>
|
||||
`\`\`\`\n${escapeSlackMrkdwnSegment(tokens[idx]?.content ?? "")}\`\`\`\n`;
|
||||
|
||||
md.renderer.rules.link_open = (tokens, idx, _opts, env) => {
|
||||
const href = tokens[idx]?.attrGet("href") ?? "";
|
||||
const stack = getLinkStack(env as RenderEnv);
|
||||
stack.push({ href });
|
||||
return "";
|
||||
};
|
||||
md.renderer.rules.link_close = (_tokens, _idx, _opts, env) => {
|
||||
const stack = getLinkStack(env as RenderEnv);
|
||||
const link = stack.pop();
|
||||
if (link?.href) {
|
||||
return ` (${escapeSlackMrkdwnSegment(link.href)})`;
|
||||
}
|
||||
return "";
|
||||
};
|
||||
|
||||
md.renderer.rules.image = (tokens, idx) => {
|
||||
const alt = tokens[idx]?.content ?? "";
|
||||
return escapeSlackMrkdwnSegment(alt);
|
||||
};
|
||||
|
||||
md.renderer.rules.html_block = (tokens, idx) =>
|
||||
escapeSlackMrkdwnSegment(tokens[idx]?.content ?? "");
|
||||
md.renderer.rules.html_inline = (tokens, idx) =>
|
||||
escapeSlackMrkdwnSegment(tokens[idx]?.content ?? "");
|
||||
|
||||
md.renderer.rules.table_open = () => "";
|
||||
md.renderer.rules.table_close = () => "";
|
||||
md.renderer.rules.thead_open = () => "";
|
||||
md.renderer.rules.thead_close = () => "";
|
||||
md.renderer.rules.tbody_open = () => "";
|
||||
md.renderer.rules.tbody_close = () => "";
|
||||
md.renderer.rules.tr_open = () => "";
|
||||
md.renderer.rules.tr_close = () => "\n";
|
||||
md.renderer.rules.th_open = () => "";
|
||||
md.renderer.rules.th_close = () => "\t";
|
||||
md.renderer.rules.td_open = () => "";
|
||||
md.renderer.rules.td_close = () => "\t";
|
||||
|
||||
md.renderer.rules.hr = () => "\n";
|
||||
|
||||
function protectSlackAngleLinks(markdown: string): {
|
||||
markdown: string;
|
||||
tokens: string[];
|
||||
} {
|
||||
const tokens: string[] = [];
|
||||
const protectedMarkdown = (markdown ?? "").replace(
|
||||
/<(?:https?:\/\/|mailto:|tel:|slack:\/\/)[^>\n]+>/g,
|
||||
(match) => {
|
||||
const id = tokens.length;
|
||||
tokens.push(match);
|
||||
return `⟦clawdbot-slacktok:${id}⟧`;
|
||||
},
|
||||
);
|
||||
return { markdown: protectedMarkdown, tokens };
|
||||
}
|
||||
|
||||
function restoreSlackAngleLinks(text: string, tokens: string[]): string {
|
||||
let out = text;
|
||||
for (let i = 0; i < tokens.length; i++) {
|
||||
out = out.replaceAll(`⟦clawdbot-slacktok:${i}⟧`, tokens[i] ?? "");
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert standard Markdown to Slack mrkdwn format.
|
||||
*
|
||||
* Slack mrkdwn differences from standard Markdown:
|
||||
* - Bold: *text* (single asterisk, not double)
|
||||
* - Italic: _text_ (same)
|
||||
* - Strikethrough: ~text~ (single tilde)
|
||||
* - Code: `code` (same)
|
||||
* - Links: <url|text> or plain URL
|
||||
* - Escape &, <, > as &, <, >
|
||||
*/
|
||||
export function markdownToSlackMrkdwn(markdown: string): string {
|
||||
const env: RenderEnv = {};
|
||||
const protectedLinks = protectSlackAngleLinks(markdown ?? "");
|
||||
const rendered = md.render(protectedLinks.markdown, env);
|
||||
const normalized = rendered
|
||||
.replace(/[ \t]+\n/g, "\n")
|
||||
.replace(/\t+\n/g, "\n")
|
||||
.replace(/\n{3,}/g, "\n\n")
|
||||
.trimEnd();
|
||||
return restoreSlackAngleLinks(normalized, protectedLinks.tokens);
|
||||
const ir = markdownToIR(markdown ?? "", {
|
||||
linkify: false,
|
||||
autolink: false,
|
||||
headingStyle: "bold",
|
||||
blockquotePrefix: "> ",
|
||||
});
|
||||
return renderMarkdownWithMarkers(ir, {
|
||||
styleMarkers: {
|
||||
bold: { open: "*", close: "*" },
|
||||
italic: { open: "_", close: "_" },
|
||||
strikethrough: { open: "~", close: "~" },
|
||||
code: { open: "`", close: "`" },
|
||||
code_block: { open: "```\n", close: "```" },
|
||||
},
|
||||
escapeText: escapeSlackMrkdwnText,
|
||||
buildLink: buildSlackLink,
|
||||
});
|
||||
}
|
||||
|
||||
export function markdownToSlackMrkdwnChunks(markdown: string, limit: number): string[] {
|
||||
const ir = markdownToIR(markdown ?? "", {
|
||||
linkify: false,
|
||||
autolink: false,
|
||||
headingStyle: "bold",
|
||||
blockquotePrefix: "> ",
|
||||
});
|
||||
const chunks = chunkMarkdownIR(ir, limit);
|
||||
return chunks.map((chunk) =>
|
||||
renderMarkdownWithMarkers(chunk, {
|
||||
styleMarkers: {
|
||||
bold: { open: "*", close: "*" },
|
||||
italic: { open: "_", close: "_" },
|
||||
strikethrough: { open: "~", close: "~" },
|
||||
code: { open: "`", close: "`" },
|
||||
code_block: { open: "```\n", close: "```" },
|
||||
},
|
||||
escapeText: escapeSlackMrkdwnText,
|
||||
buildLink: buildSlackLink,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { chunkMarkdownText } from "../../auto-reply/chunk.js";
|
||||
import { createReplyReferencePlanner } from "../../auto-reply/reply/reply-reference.js";
|
||||
import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../../auto-reply/tokens.js";
|
||||
import type { ReplyPayload } from "../../auto-reply/types.js";
|
||||
import type { RuntimeEnv } from "../../runtime.js";
|
||||
import { markdownToSlackMrkdwnChunks } from "../format.js";
|
||||
import { sendMessageSlack } from "../send.js";
|
||||
|
||||
export async function deliverReplies(params: {
|
||||
@@ -14,7 +14,6 @@ export async function deliverReplies(params: {
|
||||
textLimit: number;
|
||||
replyThreadTs?: string;
|
||||
}) {
|
||||
const chunkLimit = Math.min(params.textLimit, 4000);
|
||||
for (const payload of params.replies) {
|
||||
const threadTs = payload.replyToId ?? params.replyThreadTs;
|
||||
const mediaList = payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
|
||||
@@ -22,15 +21,13 @@ export async function deliverReplies(params: {
|
||||
if (!text && mediaList.length === 0) continue;
|
||||
|
||||
if (mediaList.length === 0) {
|
||||
for (const chunk of chunkMarkdownText(text, chunkLimit)) {
|
||||
const trimmed = chunk.trim();
|
||||
if (!trimmed || isSilentReplyText(trimmed, SILENT_REPLY_TOKEN)) continue;
|
||||
await sendMessageSlack(params.target, trimmed, {
|
||||
token: params.token,
|
||||
threadTs,
|
||||
accountId: params.accountId,
|
||||
});
|
||||
}
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed || isSilentReplyText(trimmed, SILENT_REPLY_TOKEN)) continue;
|
||||
await sendMessageSlack(params.target, trimmed, {
|
||||
token: params.token,
|
||||
threadTs,
|
||||
accountId: params.accountId,
|
||||
});
|
||||
} else {
|
||||
let first = true;
|
||||
for (const mediaUrl of mediaList) {
|
||||
@@ -130,7 +127,7 @@ export async function deliverSlackSlashReplies(params: {
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
if (!combined) continue;
|
||||
for (const chunk of chunkMarkdownText(combined, chunkLimit)) {
|
||||
for (const chunk of markdownToSlackMrkdwnChunks(combined, chunkLimit)) {
|
||||
messages.push(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import { type FilesUploadV2Arguments, WebClient } from "@slack/web-api";
|
||||
|
||||
import { chunkMarkdownText, resolveTextChunkLimit } from "../auto-reply/chunk.js";
|
||||
import { resolveTextChunkLimit } from "../auto-reply/chunk.js";
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import { logVerbose } from "../globals.js";
|
||||
import { loadWebMedia } from "../web/media.js";
|
||||
import type { SlackTokenSource } from "./accounts.js";
|
||||
import { resolveSlackAccount } from "./accounts.js";
|
||||
import { markdownToSlackMrkdwn } from "./format.js";
|
||||
import { markdownToSlackMrkdwnChunks } from "./format.js";
|
||||
import { resolveSlackBotToken } from "./token.js";
|
||||
|
||||
const SLACK_TEXT_LIMIT = 4000;
|
||||
@@ -164,8 +164,7 @@ export async function sendMessageSlack(
|
||||
const { channelId } = await resolveChannelId(client, recipient);
|
||||
const textLimit = resolveTextChunkLimit(cfg, "slack", account.accountId);
|
||||
const chunkLimit = Math.min(textLimit, SLACK_TEXT_LIMIT);
|
||||
const slackFormatted = markdownToSlackMrkdwn(trimmedMessage);
|
||||
const chunks = chunkMarkdownText(slackFormatted, chunkLimit);
|
||||
const chunks = markdownToSlackMrkdwnChunks(trimmedMessage, chunkLimit);
|
||||
const mediaMaxBytes =
|
||||
typeof account.config.mediaMaxMb === "number"
|
||||
? account.config.mediaMaxMb * 1024 * 1024
|
||||
|
||||
Reference in New Issue
Block a user