188 lines
6.4 KiB
TypeScript
188 lines
6.4 KiB
TypeScript
import { describe, expect, it } from "vitest";
|
|
|
|
import {
|
|
chunkMarkdownText,
|
|
chunkText,
|
|
resolveTextChunkLimit,
|
|
} from "./chunk.js";
|
|
|
|
function expectFencesBalanced(chunks: string[]) {
|
|
for (const chunk of chunks) {
|
|
let open: { markerChar: string; markerLen: number } | null = null;
|
|
for (const line of chunk.split("\n")) {
|
|
const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/);
|
|
if (!match) continue;
|
|
const marker = match[2];
|
|
if (!open) {
|
|
open = { markerChar: marker[0], markerLen: marker.length };
|
|
continue;
|
|
}
|
|
if (open.markerChar === marker[0] && marker.length >= open.markerLen) {
|
|
open = null;
|
|
}
|
|
}
|
|
expect(open).toBe(null);
|
|
}
|
|
}
|
|
|
|
describe("chunkText", () => {
|
|
it("keeps multi-line text in one chunk when under limit", () => {
|
|
const text = "Line one\n\nLine two\n\nLine three";
|
|
const chunks = chunkText(text, 1600);
|
|
expect(chunks).toEqual([text]);
|
|
});
|
|
|
|
it("splits only when text exceeds the limit", () => {
|
|
const part = "a".repeat(20);
|
|
const text = part.repeat(5); // 100 chars
|
|
const chunks = chunkText(text, 60);
|
|
expect(chunks.length).toBe(2);
|
|
expect(chunks[0].length).toBe(60);
|
|
expect(chunks[1].length).toBe(40);
|
|
expect(chunks.join("")).toBe(text);
|
|
});
|
|
|
|
it("prefers breaking at a newline before the limit", () => {
|
|
const text = `paragraph one line\n\nparagraph two starts here and continues`;
|
|
const chunks = chunkText(text, 40);
|
|
expect(chunks).toEqual([
|
|
"paragraph one line",
|
|
"paragraph two starts here and continues",
|
|
]);
|
|
});
|
|
|
|
it("otherwise breaks at the last whitespace under the limit", () => {
|
|
const text =
|
|
"This is a message that should break nicely near a word boundary.";
|
|
const chunks = chunkText(text, 30);
|
|
expect(chunks[0].length).toBeLessThanOrEqual(30);
|
|
expect(chunks[1].length).toBeLessThanOrEqual(30);
|
|
expect(chunks.join(" ").replace(/\s+/g, " ").trim()).toBe(
|
|
text.replace(/\s+/g, " ").trim(),
|
|
);
|
|
});
|
|
|
|
it("falls back to a hard break when no whitespace is present", () => {
|
|
const text = "Supercalifragilisticexpialidocious"; // 34 chars
|
|
const chunks = chunkText(text, 10);
|
|
expect(chunks).toEqual(["Supercalif", "ragilistic", "expialidoc", "ious"]);
|
|
});
|
|
});
|
|
|
|
describe("resolveTextChunkLimit", () => {
|
|
it("uses per-provider defaults", () => {
|
|
expect(resolveTextChunkLimit(undefined, "whatsapp")).toBe(4000);
|
|
expect(resolveTextChunkLimit(undefined, "telegram")).toBe(4000);
|
|
expect(resolveTextChunkLimit(undefined, "slack")).toBe(4000);
|
|
expect(resolveTextChunkLimit(undefined, "signal")).toBe(4000);
|
|
expect(resolveTextChunkLimit(undefined, "imessage")).toBe(4000);
|
|
expect(resolveTextChunkLimit(undefined, "discord")).toBe(2000);
|
|
});
|
|
|
|
it("supports provider overrides", () => {
|
|
const cfg = { telegram: { textChunkLimit: 1234 } };
|
|
expect(resolveTextChunkLimit(cfg, "whatsapp")).toBe(4000);
|
|
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(1234);
|
|
});
|
|
|
|
it("prefers account overrides when provided", () => {
|
|
const cfg = {
|
|
telegram: {
|
|
textChunkLimit: 2000,
|
|
accounts: {
|
|
default: { textChunkLimit: 1234 },
|
|
primary: { textChunkLimit: 777 },
|
|
},
|
|
},
|
|
};
|
|
expect(resolveTextChunkLimit(cfg, "telegram", "primary")).toBe(777);
|
|
expect(resolveTextChunkLimit(cfg, "telegram", "default")).toBe(1234);
|
|
});
|
|
|
|
it("uses the matching provider override", () => {
|
|
const cfg = {
|
|
discord: { textChunkLimit: 111 },
|
|
slack: { textChunkLimit: 222 },
|
|
};
|
|
expect(resolveTextChunkLimit(cfg, "discord")).toBe(111);
|
|
expect(resolveTextChunkLimit(cfg, "slack")).toBe(222);
|
|
expect(resolveTextChunkLimit(cfg, "telegram")).toBe(4000);
|
|
});
|
|
});
|
|
|
|
describe("chunkMarkdownText", () => {
|
|
it("keeps fenced blocks intact when a safe break exists", () => {
|
|
const prefix = "p".repeat(60);
|
|
const fence = "```bash\nline1\nline2\n```";
|
|
const suffix = "s".repeat(60);
|
|
const text = `${prefix}\n\n${fence}\n\n${suffix}`;
|
|
|
|
const chunks = chunkMarkdownText(text, 40);
|
|
expect(chunks.some((chunk) => chunk.trimEnd() === fence)).toBe(true);
|
|
expectFencesBalanced(chunks);
|
|
});
|
|
|
|
it("reopens fenced blocks when forced to split inside them", () => {
|
|
const text = `\`\`\`txt\n${"a".repeat(500)}\n\`\`\``;
|
|
const limit = 120;
|
|
const chunks = chunkMarkdownText(text, limit);
|
|
expect(chunks.length).toBeGreaterThan(1);
|
|
for (const chunk of chunks) {
|
|
expect(chunk.length).toBeLessThanOrEqual(limit);
|
|
expect(chunk.startsWith("```txt\n")).toBe(true);
|
|
expect(chunk.trimEnd().endsWith("```")).toBe(true);
|
|
}
|
|
expectFencesBalanced(chunks);
|
|
});
|
|
|
|
it("supports tilde fences", () => {
|
|
const text = `~~~sh\n${"x".repeat(600)}\n~~~`;
|
|
const limit = 140;
|
|
const chunks = chunkMarkdownText(text, limit);
|
|
expect(chunks.length).toBeGreaterThan(1);
|
|
for (const chunk of chunks) {
|
|
expect(chunk.length).toBeLessThanOrEqual(limit);
|
|
expect(chunk.startsWith("~~~sh\n")).toBe(true);
|
|
expect(chunk.trimEnd().endsWith("~~~")).toBe(true);
|
|
}
|
|
expectFencesBalanced(chunks);
|
|
});
|
|
|
|
it("supports longer fence markers for close", () => {
|
|
const text = `\`\`\`\`md\n${"y".repeat(600)}\n\`\`\`\``;
|
|
const limit = 140;
|
|
const chunks = chunkMarkdownText(text, limit);
|
|
expect(chunks.length).toBeGreaterThan(1);
|
|
for (const chunk of chunks) {
|
|
expect(chunk.length).toBeLessThanOrEqual(limit);
|
|
expect(chunk.startsWith("````md\n")).toBe(true);
|
|
expect(chunk.trimEnd().endsWith("````")).toBe(true);
|
|
}
|
|
expectFencesBalanced(chunks);
|
|
});
|
|
|
|
it("preserves indentation for indented fences", () => {
|
|
const text = ` \`\`\`js\n ${"z".repeat(600)}\n \`\`\``;
|
|
const limit = 160;
|
|
const chunks = chunkMarkdownText(text, limit);
|
|
expect(chunks.length).toBeGreaterThan(1);
|
|
for (const chunk of chunks) {
|
|
expect(chunk.length).toBeLessThanOrEqual(limit);
|
|
expect(chunk.startsWith(" ```js\n")).toBe(true);
|
|
expect(chunk.trimEnd().endsWith(" ```")).toBe(true);
|
|
}
|
|
expectFencesBalanced(chunks);
|
|
});
|
|
|
|
it("never produces an empty fenced chunk when splitting", () => {
|
|
const text = `\`\`\`txt\n${"a".repeat(300)}\n\`\`\``;
|
|
const chunks = chunkMarkdownText(text, 60);
|
|
for (const chunk of chunks) {
|
|
const nonFenceLines = chunk
|
|
.split("\n")
|
|
.filter((line) => !/^( {0,3})(`{3,}|~{3,})(.*)$/.test(line));
|
|
expect(nonFenceLines.join("\n").trim()).not.toBe("");
|
|
}
|
|
});
|
|
});
|