import { describe, expect, it } from "vitest"; import { chunkMarkdownText, chunkText, resolveTextChunkLimit } from "./chunk.js"; function expectFencesBalanced(chunks: string[]) { for (const chunk of chunks) { let open: { markerChar: string; markerLen: number } | null = null; for (const line of chunk.split("\n")) { const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/); if (!match) continue; const marker = match[2]; if (!open) { open = { markerChar: marker[0], markerLen: marker.length }; continue; } if (open.markerChar === marker[0] && marker.length >= open.markerLen) { open = null; } } expect(open).toBe(null); } } type ChunkCase = { name: string; text: string; limit: number; expected: string[]; }; function runChunkCases(chunker: (text: string, limit: number) => string[], cases: ChunkCase[]) { for (const { name, text, limit, expected } of cases) { it(name, () => { expect(chunker(text, limit)).toEqual(expected); }); } } const parentheticalCases: ChunkCase[] = [ { name: "keeps parenthetical phrases together", text: "Heads up now (Though now I'm curious)ok", limit: 35, expected: ["Heads up now", "(Though now I'm curious)ok"], }, { name: "handles nested parentheses", text: "Hello (outer (inner) end) world", limit: 26, expected: ["Hello (outer (inner) end)", "world"], }, { name: "ignores unmatched closing parentheses", text: "Hello) world (ok)", limit: 12, expected: ["Hello)", "world (ok)"], }, ]; describe("chunkText", () => { it("keeps multi-line text in one chunk when under limit", () => { const text = "Line one\n\nLine two\n\nLine three"; const chunks = chunkText(text, 1600); expect(chunks).toEqual([text]); }); it("splits only when text exceeds the limit", () => { const part = "a".repeat(20); const text = part.repeat(5); // 100 chars const chunks = chunkText(text, 60); expect(chunks.length).toBe(2); expect(chunks[0].length).toBe(60); expect(chunks[1].length).toBe(40); expect(chunks.join("")).toBe(text); }); it("prefers breaking at a newline before the limit", () => { const text = `paragraph one line\n\nparagraph two starts here and continues`; const chunks = chunkText(text, 40); expect(chunks).toEqual(["paragraph one line", "paragraph two starts here and continues"]); }); it("otherwise breaks at the last whitespace under the limit", () => { const text = "This is a message that should break nicely near a word boundary."; const chunks = chunkText(text, 30); expect(chunks[0].length).toBeLessThanOrEqual(30); expect(chunks[1].length).toBeLessThanOrEqual(30); expect(chunks.join(" ").replace(/\s+/g, " ").trim()).toBe(text.replace(/\s+/g, " ").trim()); }); it("falls back to a hard break when no whitespace is present", () => { const text = "Supercalifragilisticexpialidocious"; // 34 chars const chunks = chunkText(text, 10); expect(chunks).toEqual(["Supercalif", "ragilistic", "expialidoc", "ious"]); }); runChunkCases(chunkText, [parentheticalCases[0]]); }); describe("resolveTextChunkLimit", () => { it("uses per-provider defaults", () => { expect(resolveTextChunkLimit(undefined, "whatsapp")).toBe(4000); expect(resolveTextChunkLimit(undefined, "telegram")).toBe(4000); expect(resolveTextChunkLimit(undefined, "slack")).toBe(4000); expect(resolveTextChunkLimit(undefined, "signal")).toBe(4000); expect(resolveTextChunkLimit(undefined, "imessage")).toBe(4000); expect(resolveTextChunkLimit(undefined, "discord")).toBe(4000); expect( resolveTextChunkLimit(undefined, "discord", undefined, { fallbackLimit: 2000, }), ).toBe(2000); }); it("supports provider overrides", () => { const cfg = { channels: { telegram: { textChunkLimit: 1234 } } }; expect(resolveTextChunkLimit(cfg, "whatsapp")).toBe(4000); expect(resolveTextChunkLimit(cfg, "telegram")).toBe(1234); }); it("prefers account overrides when provided", () => { const cfg = { channels: { telegram: { textChunkLimit: 2000, accounts: { default: { textChunkLimit: 1234 }, primary: { textChunkLimit: 777 }, }, }, }, }; expect(resolveTextChunkLimit(cfg, "telegram", "primary")).toBe(777); expect(resolveTextChunkLimit(cfg, "telegram", "default")).toBe(1234); }); it("uses the matching provider override", () => { const cfg = { channels: { discord: { textChunkLimit: 111 }, slack: { textChunkLimit: 222 }, }, }; expect(resolveTextChunkLimit(cfg, "discord")).toBe(111); expect(resolveTextChunkLimit(cfg, "slack")).toBe(222); expect(resolveTextChunkLimit(cfg, "telegram")).toBe(4000); }); }); describe("chunkMarkdownText", () => { it("keeps fenced blocks intact when a safe break exists", () => { const prefix = "p".repeat(60); const fence = "```bash\nline1\nline2\n```"; const suffix = "s".repeat(60); const text = `${prefix}\n\n${fence}\n\n${suffix}`; const chunks = chunkMarkdownText(text, 40); expect(chunks.some((chunk) => chunk.trimEnd() === fence)).toBe(true); expectFencesBalanced(chunks); }); it("reopens fenced blocks when forced to split inside them", () => { const text = `\`\`\`txt\n${"a".repeat(500)}\n\`\`\``; const limit = 120; const chunks = chunkMarkdownText(text, limit); expect(chunks.length).toBeGreaterThan(1); for (const chunk of chunks) { expect(chunk.length).toBeLessThanOrEqual(limit); expect(chunk.startsWith("```txt\n")).toBe(true); expect(chunk.trimEnd().endsWith("```")).toBe(true); } expectFencesBalanced(chunks); }); it("supports tilde fences", () => { const text = `~~~sh\n${"x".repeat(600)}\n~~~`; const limit = 140; const chunks = chunkMarkdownText(text, limit); expect(chunks.length).toBeGreaterThan(1); for (const chunk of chunks) { expect(chunk.length).toBeLessThanOrEqual(limit); expect(chunk.startsWith("~~~sh\n")).toBe(true); expect(chunk.trimEnd().endsWith("~~~")).toBe(true); } expectFencesBalanced(chunks); }); it("supports longer fence markers for close", () => { const text = `\`\`\`\`md\n${"y".repeat(600)}\n\`\`\`\``; const limit = 140; const chunks = chunkMarkdownText(text, limit); expect(chunks.length).toBeGreaterThan(1); for (const chunk of chunks) { expect(chunk.length).toBeLessThanOrEqual(limit); expect(chunk.startsWith("````md\n")).toBe(true); expect(chunk.trimEnd().endsWith("````")).toBe(true); } expectFencesBalanced(chunks); }); it("preserves indentation for indented fences", () => { const text = ` \`\`\`js\n ${"z".repeat(600)}\n \`\`\``; const limit = 160; const chunks = chunkMarkdownText(text, limit); expect(chunks.length).toBeGreaterThan(1); for (const chunk of chunks) { expect(chunk.length).toBeLessThanOrEqual(limit); expect(chunk.startsWith(" ```js\n")).toBe(true); expect(chunk.trimEnd().endsWith(" ```")).toBe(true); } expectFencesBalanced(chunks); }); it("never produces an empty fenced chunk when splitting", () => { const text = `\`\`\`txt\n${"a".repeat(300)}\n\`\`\``; const chunks = chunkMarkdownText(text, 60); for (const chunk of chunks) { const nonFenceLines = chunk .split("\n") .filter((line) => !/^( {0,3})(`{3,}|~{3,})(.*)$/.test(line)); expect(nonFenceLines.join("\n").trim()).not.toBe(""); } }); runChunkCases(chunkMarkdownText, parentheticalCases); it("hard-breaks when a parenthetical exceeds the limit", () => { const text = `(${"a".repeat(80)})`; const chunks = chunkMarkdownText(text, 20); expect(chunks[0]?.length).toBe(20); expect(chunks.join("")).toBe(text); }); });