fix: preserve markdown fences when chunking
This commit is contained in:
@@ -101,6 +101,7 @@
|
||||
- Telegram: notify users when inbound media exceeds size limits. Thanks @jarvis-medmatic for PR #283.
|
||||
- Telegram: send GIF media as animations (auto-play) and improve filename sniffing.
|
||||
- Bash tool: inherit gateway PATH so Nix-provided tools resolve during commands. Thanks @joshp123 for PR #202.
|
||||
- Delivery chunking: keep Markdown fenced code blocks valid when splitting long replies (close + reopen fences).
|
||||
|
||||
### Maintenance
|
||||
- Agent: add `skipBootstrap` config option. Thanks @onutc for PR #292.
|
||||
|
||||
@@ -1,17 +1,15 @@
|
||||
import {
|
||||
findFenceSpanAt,
|
||||
isSafeFenceBreak,
|
||||
parseFenceSpans,
|
||||
} from "../markdown/fences.js";
|
||||
|
||||
export type BlockReplyChunking = {
|
||||
minChars: number;
|
||||
maxChars: number;
|
||||
breakPreference?: "paragraph" | "newline" | "sentence";
|
||||
};
|
||||
|
||||
type FenceSpan = {
|
||||
start: number;
|
||||
end: number;
|
||||
openLine: string;
|
||||
marker: string;
|
||||
indent: string;
|
||||
};
|
||||
|
||||
type FenceSplit = {
|
||||
closeFenceLine: string;
|
||||
reopenFenceLine: string;
|
||||
@@ -123,7 +121,10 @@ export class EmbeddedBlockChunker {
|
||||
if (preference === "paragraph") {
|
||||
let paragraphIdx = buffer.indexOf("\n\n");
|
||||
while (paragraphIdx !== -1) {
|
||||
if (paragraphIdx >= minChars && isSafeBreak(fenceSpans, paragraphIdx)) {
|
||||
if (
|
||||
paragraphIdx >= minChars &&
|
||||
isSafeFenceBreak(fenceSpans, paragraphIdx)
|
||||
) {
|
||||
return { index: paragraphIdx };
|
||||
}
|
||||
paragraphIdx = buffer.indexOf("\n\n", paragraphIdx + 2);
|
||||
@@ -133,7 +134,10 @@ export class EmbeddedBlockChunker {
|
||||
if (preference === "paragraph" || preference === "newline") {
|
||||
let newlineIdx = buffer.indexOf("\n");
|
||||
while (newlineIdx !== -1) {
|
||||
if (newlineIdx >= minChars && isSafeBreak(fenceSpans, newlineIdx)) {
|
||||
if (
|
||||
newlineIdx >= minChars &&
|
||||
isSafeFenceBreak(fenceSpans, newlineIdx)
|
||||
) {
|
||||
return { index: newlineIdx };
|
||||
}
|
||||
newlineIdx = buffer.indexOf("\n", newlineIdx + 1);
|
||||
@@ -147,7 +151,7 @@ export class EmbeddedBlockChunker {
|
||||
const at = match.index ?? -1;
|
||||
if (at < minChars) continue;
|
||||
const candidate = at + 1;
|
||||
if (isSafeBreak(fenceSpans, candidate)) {
|
||||
if (isSafeFenceBreak(fenceSpans, candidate)) {
|
||||
sentenceIdx = candidate;
|
||||
}
|
||||
}
|
||||
@@ -168,7 +172,7 @@ export class EmbeddedBlockChunker {
|
||||
if (preference === "paragraph") {
|
||||
let paragraphIdx = window.lastIndexOf("\n\n");
|
||||
while (paragraphIdx >= minChars) {
|
||||
if (isSafeBreak(fenceSpans, paragraphIdx)) {
|
||||
if (isSafeFenceBreak(fenceSpans, paragraphIdx)) {
|
||||
return { index: paragraphIdx };
|
||||
}
|
||||
paragraphIdx = window.lastIndexOf("\n\n", paragraphIdx - 1);
|
||||
@@ -178,7 +182,7 @@ export class EmbeddedBlockChunker {
|
||||
if (preference === "paragraph" || preference === "newline") {
|
||||
let newlineIdx = window.lastIndexOf("\n");
|
||||
while (newlineIdx >= minChars) {
|
||||
if (isSafeBreak(fenceSpans, newlineIdx)) {
|
||||
if (isSafeFenceBreak(fenceSpans, newlineIdx)) {
|
||||
return { index: newlineIdx };
|
||||
}
|
||||
newlineIdx = window.lastIndexOf("\n", newlineIdx - 1);
|
||||
@@ -192,7 +196,7 @@ export class EmbeddedBlockChunker {
|
||||
const at = match.index ?? -1;
|
||||
if (at < minChars) continue;
|
||||
const candidate = at + 1;
|
||||
if (isSafeBreak(fenceSpans, candidate)) {
|
||||
if (isSafeFenceBreak(fenceSpans, candidate)) {
|
||||
sentenceIdx = candidate;
|
||||
}
|
||||
}
|
||||
@@ -200,13 +204,13 @@ export class EmbeddedBlockChunker {
|
||||
}
|
||||
|
||||
for (let i = window.length - 1; i >= minChars; i--) {
|
||||
if (/\s/.test(window[i]) && isSafeBreak(fenceSpans, i)) {
|
||||
if (/\s/.test(window[i]) && isSafeFenceBreak(fenceSpans, i)) {
|
||||
return { index: i };
|
||||
}
|
||||
}
|
||||
|
||||
if (buffer.length >= maxChars) {
|
||||
if (isSafeBreak(fenceSpans, maxChars)) return { index: maxChars };
|
||||
if (isSafeFenceBreak(fenceSpans, maxChars)) return { index: maxChars };
|
||||
const fence = findFenceSpanAt(fenceSpans, maxChars);
|
||||
if (fence) {
|
||||
return {
|
||||
@@ -229,76 +233,3 @@ function stripLeadingNewlines(value: string): string {
|
||||
while (i < value.length && value[i] === "\n") i++;
|
||||
return i > 0 ? value.slice(i) : value;
|
||||
}
|
||||
|
||||
function parseFenceSpans(buffer: string): FenceSpan[] {
|
||||
const spans: FenceSpan[] = [];
|
||||
let open:
|
||||
| {
|
||||
start: number;
|
||||
markerChar: string;
|
||||
markerLen: number;
|
||||
openLine: string;
|
||||
marker: string;
|
||||
indent: string;
|
||||
}
|
||||
| undefined;
|
||||
let offset = 0;
|
||||
while (offset <= buffer.length) {
|
||||
const nextNewline = buffer.indexOf("\n", offset);
|
||||
const lineEnd = nextNewline === -1 ? buffer.length : nextNewline;
|
||||
const line = buffer.slice(offset, lineEnd);
|
||||
const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/);
|
||||
if (match) {
|
||||
const indent = match[1];
|
||||
const marker = match[2];
|
||||
const markerChar = marker[0];
|
||||
const markerLen = marker.length;
|
||||
if (!open) {
|
||||
open = {
|
||||
start: offset,
|
||||
markerChar,
|
||||
markerLen,
|
||||
openLine: line,
|
||||
marker,
|
||||
indent,
|
||||
};
|
||||
} else if (
|
||||
open.markerChar === markerChar &&
|
||||
markerLen >= open.markerLen
|
||||
) {
|
||||
const end = nextNewline === -1 ? buffer.length : nextNewline + 1;
|
||||
spans.push({
|
||||
start: open.start,
|
||||
end,
|
||||
openLine: open.openLine,
|
||||
marker: open.marker,
|
||||
indent: open.indent,
|
||||
});
|
||||
open = undefined;
|
||||
}
|
||||
}
|
||||
if (nextNewline === -1) break;
|
||||
offset = nextNewline + 1;
|
||||
}
|
||||
if (open) {
|
||||
spans.push({
|
||||
start: open.start,
|
||||
end: buffer.length,
|
||||
openLine: open.openLine,
|
||||
marker: open.marker,
|
||||
indent: open.indent,
|
||||
});
|
||||
}
|
||||
return spans;
|
||||
}
|
||||
|
||||
function findFenceSpanAt(
|
||||
spans: FenceSpan[],
|
||||
index: number,
|
||||
): FenceSpan | undefined {
|
||||
return spans.find((span) => index > span.start && index < span.end);
|
||||
}
|
||||
|
||||
function isSafeBreak(spans: FenceSpan[], index: number): boolean {
|
||||
return !findFenceSpanAt(spans, index);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,29 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { chunkText, resolveTextChunkLimit } from "./chunk.js";
|
||||
import {
|
||||
chunkMarkdownText,
|
||||
chunkText,
|
||||
resolveTextChunkLimit,
|
||||
} from "./chunk.js";
|
||||
|
||||
function expectFencesBalanced(chunks: string[]) {
|
||||
for (const chunk of chunks) {
|
||||
let open: { markerChar: string; markerLen: number } | null = null;
|
||||
for (const line of chunk.split("\n")) {
|
||||
const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/);
|
||||
if (!match) continue;
|
||||
const marker = match[2];
|
||||
if (!open) {
|
||||
open = { markerChar: marker[0], markerLen: marker.length };
|
||||
continue;
|
||||
}
|
||||
if (open.markerChar === marker[0] && marker.length >= open.markerLen) {
|
||||
open = null;
|
||||
}
|
||||
}
|
||||
expect(open).toBe(null);
|
||||
}
|
||||
}
|
||||
|
||||
describe("chunkText", () => {
|
||||
it("keeps multi-line text in one chunk when under limit", () => {
|
||||
@@ -72,3 +95,79 @@ describe("resolveTextChunkLimit", () => {
|
||||
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(4000);
|
||||
});
|
||||
});
|
||||
|
||||
describe("chunkMarkdownText", () => {
|
||||
it("keeps fenced blocks intact when a safe break exists", () => {
|
||||
const prefix = "p".repeat(60);
|
||||
const fence = "```bash\nline1\nline2\n```";
|
||||
const suffix = "s".repeat(60);
|
||||
const text = `${prefix}\n\n${fence}\n\n${suffix}`;
|
||||
|
||||
const chunks = chunkMarkdownText(text, 40);
|
||||
expect(chunks.some((chunk) => chunk.trimEnd() === fence)).toBe(true);
|
||||
expectFencesBalanced(chunks);
|
||||
});
|
||||
|
||||
it("reopens fenced blocks when forced to split inside them", () => {
|
||||
const text = `\`\`\`txt\n${"a".repeat(500)}\n\`\`\``;
|
||||
const limit = 120;
|
||||
const chunks = chunkMarkdownText(text, limit);
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.length).toBeLessThanOrEqual(limit);
|
||||
expect(chunk.startsWith("```txt\n")).toBe(true);
|
||||
expect(chunk.trimEnd().endsWith("```")).toBe(true);
|
||||
}
|
||||
expectFencesBalanced(chunks);
|
||||
});
|
||||
|
||||
it("supports tilde fences", () => {
|
||||
const text = `~~~sh\n${"x".repeat(600)}\n~~~`;
|
||||
const limit = 140;
|
||||
const chunks = chunkMarkdownText(text, limit);
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.length).toBeLessThanOrEqual(limit);
|
||||
expect(chunk.startsWith("~~~sh\n")).toBe(true);
|
||||
expect(chunk.trimEnd().endsWith("~~~")).toBe(true);
|
||||
}
|
||||
expectFencesBalanced(chunks);
|
||||
});
|
||||
|
||||
it("supports longer fence markers for close", () => {
|
||||
const text = `\`\`\`\`md\n${"y".repeat(600)}\n\`\`\`\``;
|
||||
const limit = 140;
|
||||
const chunks = chunkMarkdownText(text, limit);
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.length).toBeLessThanOrEqual(limit);
|
||||
expect(chunk.startsWith("````md\n")).toBe(true);
|
||||
expect(chunk.trimEnd().endsWith("````")).toBe(true);
|
||||
}
|
||||
expectFencesBalanced(chunks);
|
||||
});
|
||||
|
||||
it("preserves indentation for indented fences", () => {
|
||||
const text = ` \`\`\`js\n ${"z".repeat(600)}\n \`\`\``;
|
||||
const limit = 160;
|
||||
const chunks = chunkMarkdownText(text, limit);
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.length).toBeLessThanOrEqual(limit);
|
||||
expect(chunk.startsWith(" ```js\n")).toBe(true);
|
||||
expect(chunk.trimEnd().endsWith(" ```")).toBe(true);
|
||||
}
|
||||
expectFencesBalanced(chunks);
|
||||
});
|
||||
|
||||
it("never produces an empty fenced chunk when splitting", () => {
|
||||
const text = `\`\`\`txt\n${"a".repeat(300)}\n\`\`\``;
|
||||
const chunks = chunkMarkdownText(text, 60);
|
||||
for (const chunk of chunks) {
|
||||
const nonFenceLines = chunk
|
||||
.split("\n")
|
||||
.filter((line) => !/^( {0,3})(`{3,}|~{3,})(.*)$/.test(line));
|
||||
expect(nonFenceLines.join("\n").trim()).not.toBe("");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -3,6 +3,11 @@
|
||||
// the chunk so messages are only split when they truly exceed the limit.
|
||||
|
||||
import type { ClawdbotConfig } from "../config/config.js";
|
||||
import {
|
||||
findFenceSpanAt,
|
||||
isSafeFenceBreak,
|
||||
parseFenceSpans,
|
||||
} from "../markdown/fences.js";
|
||||
|
||||
export type TextChunkProvider =
|
||||
| "whatsapp"
|
||||
@@ -91,3 +96,123 @@ export function chunkText(text: string, limit: number): string[] {
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
export function chunkMarkdownText(text: string, limit: number): string[] {
|
||||
if (!text) return [];
|
||||
if (limit <= 0) return [text];
|
||||
if (text.length <= limit) return [text];
|
||||
|
||||
const chunks: string[] = [];
|
||||
let remaining = text;
|
||||
|
||||
while (remaining.length > limit) {
|
||||
const spans = parseFenceSpans(remaining);
|
||||
const window = remaining.slice(0, limit);
|
||||
|
||||
const softBreak = pickSafeBreakIndex(window, spans);
|
||||
let breakIdx = softBreak > 0 ? softBreak : limit;
|
||||
|
||||
const initialFence = isSafeFenceBreak(spans, breakIdx)
|
||||
? undefined
|
||||
: findFenceSpanAt(spans, breakIdx);
|
||||
|
||||
let fenceToSplit = initialFence;
|
||||
if (initialFence) {
|
||||
const closeLine = `${initialFence.indent}${initialFence.marker}`;
|
||||
const maxIdxIfNeedNewline = limit - (closeLine.length + 1);
|
||||
|
||||
if (maxIdxIfNeedNewline <= 0) {
|
||||
fenceToSplit = undefined;
|
||||
breakIdx = limit;
|
||||
} else {
|
||||
const minProgressIdx = Math.min(
|
||||
remaining.length,
|
||||
initialFence.start + initialFence.openLine.length + 2,
|
||||
);
|
||||
const maxIdxIfAlreadyNewline = limit - closeLine.length;
|
||||
|
||||
let pickedNewline = false;
|
||||
let lastNewline = remaining.lastIndexOf(
|
||||
"\n",
|
||||
Math.max(0, maxIdxIfAlreadyNewline - 1),
|
||||
);
|
||||
while (lastNewline !== -1) {
|
||||
const candidateBreak = lastNewline + 1;
|
||||
if (candidateBreak < minProgressIdx) break;
|
||||
const candidateFence = findFenceSpanAt(spans, candidateBreak);
|
||||
if (candidateFence && candidateFence.start === initialFence.start) {
|
||||
breakIdx = Math.max(1, candidateBreak);
|
||||
pickedNewline = true;
|
||||
break;
|
||||
}
|
||||
lastNewline = remaining.lastIndexOf("\n", lastNewline - 1);
|
||||
}
|
||||
|
||||
if (!pickedNewline) {
|
||||
if (minProgressIdx > maxIdxIfAlreadyNewline) {
|
||||
fenceToSplit = undefined;
|
||||
breakIdx = limit;
|
||||
} else {
|
||||
breakIdx = Math.max(minProgressIdx, maxIdxIfNeedNewline);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const fenceAtBreak = findFenceSpanAt(spans, breakIdx);
|
||||
fenceToSplit =
|
||||
fenceAtBreak && fenceAtBreak.start === initialFence.start
|
||||
? fenceAtBreak
|
||||
: undefined;
|
||||
}
|
||||
|
||||
let rawChunk = remaining.slice(0, breakIdx);
|
||||
if (!rawChunk) break;
|
||||
|
||||
const brokeOnSeparator =
|
||||
breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
||||
const nextStart = Math.min(
|
||||
remaining.length,
|
||||
breakIdx + (brokeOnSeparator ? 1 : 0),
|
||||
);
|
||||
let next = remaining.slice(nextStart);
|
||||
|
||||
if (fenceToSplit) {
|
||||
const closeLine = `${fenceToSplit.indent}${fenceToSplit.marker}`;
|
||||
rawChunk = rawChunk.endsWith("\n")
|
||||
? `${rawChunk}${closeLine}`
|
||||
: `${rawChunk}\n${closeLine}`;
|
||||
next = `${fenceToSplit.openLine}\n${next}`;
|
||||
} else {
|
||||
next = stripLeadingNewlines(next);
|
||||
}
|
||||
|
||||
chunks.push(rawChunk);
|
||||
remaining = next;
|
||||
}
|
||||
|
||||
if (remaining.length) chunks.push(remaining);
|
||||
return chunks;
|
||||
}
|
||||
|
||||
function stripLeadingNewlines(value: string): string {
|
||||
let i = 0;
|
||||
while (i < value.length && value[i] === "\n") i++;
|
||||
return i > 0 ? value.slice(i) : value;
|
||||
}
|
||||
|
||||
function pickSafeBreakIndex(
|
||||
window: string,
|
||||
spans: ReturnType<typeof parseFenceSpans>,
|
||||
): number {
|
||||
let newlineIdx = window.lastIndexOf("\n");
|
||||
while (newlineIdx > 0) {
|
||||
if (isSafeFenceBreak(spans, newlineIdx)) return newlineIdx;
|
||||
newlineIdx = window.lastIndexOf("\n", newlineIdx - 1);
|
||||
}
|
||||
|
||||
for (let i = window.length - 1; i > 0; i--) {
|
||||
if (/\s/.test(window[i]) && isSafeFenceBreak(spans, i)) return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -22,7 +22,11 @@ import {
|
||||
DEFAULT_AGENT_WORKSPACE_DIR,
|
||||
ensureAgentWorkspace,
|
||||
} from "../agents/workspace.js";
|
||||
import { chunkText, resolveTextChunkLimit } from "../auto-reply/chunk.js";
|
||||
import {
|
||||
chunkMarkdownText,
|
||||
chunkText,
|
||||
resolveTextChunkLimit,
|
||||
} from "../auto-reply/chunk.js";
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import {
|
||||
normalizeThinkLevel,
|
||||
@@ -667,7 +671,7 @@ export async function agentCommand(
|
||||
if (deliveryProvider === "telegram" && telegramTarget) {
|
||||
try {
|
||||
if (media.length === 0) {
|
||||
for (const chunk of chunkText(text, deliveryTextLimit)) {
|
||||
for (const chunk of chunkMarkdownText(text, deliveryTextLimit)) {
|
||||
await deps.sendMessageTelegram(telegramTarget, chunk, {
|
||||
verbose: false,
|
||||
token: telegramToken || undefined,
|
||||
|
||||
@@ -19,7 +19,11 @@ import {
|
||||
DEFAULT_AGENT_WORKSPACE_DIR,
|
||||
ensureAgentWorkspace,
|
||||
} from "../agents/workspace.js";
|
||||
import { chunkText, resolveTextChunkLimit } from "../auto-reply/chunk.js";
|
||||
import {
|
||||
chunkMarkdownText,
|
||||
chunkText,
|
||||
resolveTextChunkLimit,
|
||||
} from "../auto-reply/chunk.js";
|
||||
import {
|
||||
DEFAULT_HEARTBEAT_ACK_MAX_CHARS,
|
||||
stripHeartbeatToken,
|
||||
@@ -439,7 +443,10 @@ export async function runCronIsolatedAgentTurn(params: {
|
||||
const mediaList =
|
||||
payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
|
||||
if (mediaList.length === 0) {
|
||||
for (const chunk of chunkText(payload.text ?? "", textLimit)) {
|
||||
for (const chunk of chunkMarkdownText(
|
||||
payload.text ?? "",
|
||||
textLimit,
|
||||
)) {
|
||||
await params.deps.sendMessageTelegram(chatId, chunk, {
|
||||
verbose: false,
|
||||
token: telegramToken || undefined,
|
||||
@@ -528,7 +535,10 @@ export async function runCronIsolatedAgentTurn(params: {
|
||||
const mediaList =
|
||||
payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
|
||||
if (mediaList.length === 0) {
|
||||
for (const chunk of chunkText(payload.text ?? "", textLimit)) {
|
||||
for (const chunk of chunkMarkdownText(
|
||||
payload.text ?? "",
|
||||
textLimit,
|
||||
)) {
|
||||
await params.deps.sendMessageSlack(slackTarget, chunk);
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -15,7 +15,10 @@ import {
|
||||
type PartialUser,
|
||||
type User,
|
||||
} from "discord.js";
|
||||
import { chunkText, resolveTextChunkLimit } from "../auto-reply/chunk.js";
|
||||
import {
|
||||
chunkMarkdownText,
|
||||
resolveTextChunkLimit,
|
||||
} from "../auto-reply/chunk.js";
|
||||
import { hasControlCommand } from "../auto-reply/command-detection.js";
|
||||
import { formatAgentEnvelope } from "../auto-reply/envelope.js";
|
||||
import { dispatchReplyFromConfig } from "../auto-reply/reply/dispatch-from-config.js";
|
||||
@@ -1295,7 +1298,7 @@ async function deliverReplies({
|
||||
const replyToId = payload.replyToId;
|
||||
if (!text && mediaList.length === 0) continue;
|
||||
if (mediaList.length === 0) {
|
||||
for (const chunk of chunkText(text, chunkLimit)) {
|
||||
for (const chunk of chunkMarkdownText(text, chunkLimit)) {
|
||||
const replyTo = resolveDiscordReplyTarget({
|
||||
replyToMode,
|
||||
replyToId,
|
||||
|
||||
@@ -12,7 +12,7 @@ import type {
|
||||
RESTPostAPIGuildScheduledEventJSONBody,
|
||||
} from "discord-api-types/v10";
|
||||
|
||||
import { chunkText } from "../auto-reply/chunk.js";
|
||||
import { chunkMarkdownText } from "../auto-reply/chunk.js";
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import {
|
||||
normalizePollDurationHours,
|
||||
@@ -360,7 +360,7 @@ async function sendDiscordText(
|
||||
})) as { id: string; channel_id: string };
|
||||
return res;
|
||||
}
|
||||
const chunks = chunkText(text, DISCORD_TEXT_LIMIT);
|
||||
const chunks = chunkMarkdownText(text, DISCORD_TEXT_LIMIT);
|
||||
let last: { id: string; channel_id: string } | null = null;
|
||||
let isFirst = true;
|
||||
for (const chunk of chunks) {
|
||||
|
||||
85
src/markdown/fences.ts
Normal file
85
src/markdown/fences.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
export type FenceSpan = {
|
||||
start: number;
|
||||
end: number;
|
||||
openLine: string;
|
||||
marker: string;
|
||||
indent: string;
|
||||
};
|
||||
|
||||
export function parseFenceSpans(buffer: string): FenceSpan[] {
|
||||
const spans: FenceSpan[] = [];
|
||||
let open:
|
||||
| {
|
||||
start: number;
|
||||
markerChar: string;
|
||||
markerLen: number;
|
||||
openLine: string;
|
||||
marker: string;
|
||||
indent: string;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
let offset = 0;
|
||||
while (offset <= buffer.length) {
|
||||
const nextNewline = buffer.indexOf("\n", offset);
|
||||
const lineEnd = nextNewline === -1 ? buffer.length : nextNewline;
|
||||
const line = buffer.slice(offset, lineEnd);
|
||||
|
||||
const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/);
|
||||
if (match) {
|
||||
const indent = match[1];
|
||||
const marker = match[2];
|
||||
const markerChar = marker[0];
|
||||
const markerLen = marker.length;
|
||||
if (!open) {
|
||||
open = {
|
||||
start: offset,
|
||||
markerChar,
|
||||
markerLen,
|
||||
openLine: line,
|
||||
marker,
|
||||
indent,
|
||||
};
|
||||
} else if (
|
||||
open.markerChar === markerChar &&
|
||||
markerLen >= open.markerLen
|
||||
) {
|
||||
const end = nextNewline === -1 ? buffer.length : nextNewline + 1;
|
||||
spans.push({
|
||||
start: open.start,
|
||||
end,
|
||||
openLine: open.openLine,
|
||||
marker: open.marker,
|
||||
indent: open.indent,
|
||||
});
|
||||
open = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
if (nextNewline === -1) break;
|
||||
offset = nextNewline + 1;
|
||||
}
|
||||
|
||||
if (open) {
|
||||
spans.push({
|
||||
start: open.start,
|
||||
end: buffer.length,
|
||||
openLine: open.openLine,
|
||||
marker: open.marker,
|
||||
indent: open.indent,
|
||||
});
|
||||
}
|
||||
|
||||
return spans;
|
||||
}
|
||||
|
||||
export function findFenceSpanAt(
|
||||
spans: FenceSpan[],
|
||||
index: number,
|
||||
): FenceSpan | undefined {
|
||||
return spans.find((span) => index > span.start && index < span.end);
|
||||
}
|
||||
|
||||
export function isSafeFenceBreak(spans: FenceSpan[], index: number): boolean {
|
||||
return !findFenceSpanAt(spans, index);
|
||||
}
|
||||
@@ -3,7 +3,10 @@ import {
|
||||
type SlackCommandMiddlewareArgs,
|
||||
type SlackEventMiddlewareArgs,
|
||||
} from "@slack/bolt";
|
||||
import { chunkText, resolveTextChunkLimit } from "../auto-reply/chunk.js";
|
||||
import {
|
||||
chunkMarkdownText,
|
||||
resolveTextChunkLimit,
|
||||
} from "../auto-reply/chunk.js";
|
||||
import { hasControlCommand } from "../auto-reply/command-detection.js";
|
||||
import { formatAgentEnvelope } from "../auto-reply/envelope.js";
|
||||
import { dispatchReplyFromConfig } from "../auto-reply/reply/dispatch-from-config.js";
|
||||
@@ -1525,7 +1528,7 @@ async function deliverReplies(params: {
|
||||
if (!text && mediaList.length === 0) continue;
|
||||
|
||||
if (mediaList.length === 0) {
|
||||
for (const chunk of chunkText(text, chunkLimit)) {
|
||||
for (const chunk of chunkMarkdownText(text, chunkLimit)) {
|
||||
const trimmed = chunk.trim();
|
||||
if (!trimmed || trimmed === SILENT_REPLY_TOKEN) continue;
|
||||
await sendMessageSlack(params.target, trimmed, {
|
||||
@@ -1587,7 +1590,7 @@ async function deliverSlackSlashReplies(params: {
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
if (!combined) continue;
|
||||
for (const chunk of chunkText(combined, chunkLimit)) {
|
||||
for (const chunk of chunkMarkdownText(combined, chunkLimit)) {
|
||||
messages.push(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import { type FilesUploadV2Arguments, WebClient } from "@slack/web-api";
|
||||
|
||||
import { chunkText, resolveTextChunkLimit } from "../auto-reply/chunk.js";
|
||||
import {
|
||||
chunkMarkdownText,
|
||||
resolveTextChunkLimit,
|
||||
} from "../auto-reply/chunk.js";
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import { loadWebMedia } from "../web/media.js";
|
||||
import { resolveSlackBotToken } from "./token.js";
|
||||
@@ -144,7 +147,7 @@ export async function sendMessageSlack(
|
||||
const cfg = loadConfig();
|
||||
const textLimit = resolveTextChunkLimit(cfg, "slack");
|
||||
const chunkLimit = Math.min(textLimit, SLACK_TEXT_LIMIT);
|
||||
const chunks = chunkText(trimmedMessage, chunkLimit);
|
||||
const chunks = chunkMarkdownText(trimmedMessage, chunkLimit);
|
||||
const mediaMaxBytes =
|
||||
typeof cfg.slack?.mediaMaxMb === "number"
|
||||
? cfg.slack.mediaMaxMb * 1024 * 1024
|
||||
|
||||
@@ -4,7 +4,10 @@ import { Buffer } from "node:buffer";
|
||||
import { apiThrottler } from "@grammyjs/transformer-throttler";
|
||||
import type { ApiClientOptions, Message } from "grammy";
|
||||
import { Bot, InputFile, webhookCallback } from "grammy";
|
||||
import { chunkText, resolveTextChunkLimit } from "../auto-reply/chunk.js";
|
||||
import {
|
||||
chunkMarkdownText,
|
||||
resolveTextChunkLimit,
|
||||
} from "../auto-reply/chunk.js";
|
||||
import { hasControlCommand } from "../auto-reply/command-detection.js";
|
||||
import { formatAgentEnvelope } from "../auto-reply/envelope.js";
|
||||
import { dispatchReplyFromConfig } from "../auto-reply/reply/dispatch-from-config.js";
|
||||
@@ -667,7 +670,7 @@ async function deliverReplies(params: {
|
||||
? [reply.mediaUrl]
|
||||
: [];
|
||||
if (mediaList.length === 0) {
|
||||
for (const chunk of chunkText(reply.text || "", textLimit)) {
|
||||
for (const chunk of chunkMarkdownText(reply.text || "", textLimit)) {
|
||||
await sendTelegramText(bot, chatId, chunk, runtime, {
|
||||
replyToMessageId:
|
||||
replyToId && (replyToMode === "all" || !hasReplied)
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import { chunkText, resolveTextChunkLimit } from "../auto-reply/chunk.js";
|
||||
import {
|
||||
chunkMarkdownText,
|
||||
resolveTextChunkLimit,
|
||||
} from "../auto-reply/chunk.js";
|
||||
import { formatAgentEnvelope } from "../auto-reply/envelope.js";
|
||||
import {
|
||||
normalizeGroupActivation,
|
||||
@@ -556,7 +559,7 @@ async function deliverWebReply(params: {
|
||||
skipLog,
|
||||
} = params;
|
||||
const replyStarted = Date.now();
|
||||
const textChunks = chunkText(replyResult.text || "", textLimit);
|
||||
const textChunks = chunkMarkdownText(replyResult.text || "", textLimit);
|
||||
const mediaList = replyResult.mediaUrls?.length
|
||||
? replyResult.mediaUrls
|
||||
: replyResult.mediaUrl
|
||||
|
||||
Reference in New Issue
Block a user