fix: split long memory lines
This commit is contained in:
@@ -7,6 +7,7 @@ Docs: https://docs.clawd.bot
|
|||||||
### Changes
|
### Changes
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
- Memory: split overly long lines to keep embeddings under token limits.
|
||||||
|
|
||||||
## 2026.1.17-1
|
## 2026.1.17-1
|
||||||
|
|
||||||
|
|||||||
16
src/memory/internal.test.ts
Normal file
16
src/memory/internal.test.ts
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import { chunkMarkdown } from "./internal.js";
|
||||||
|
|
||||||
|
describe("chunkMarkdown", () => {
|
||||||
|
it("splits overly long lines into max-sized chunks", () => {
|
||||||
|
const chunkTokens = 400;
|
||||||
|
const maxChars = chunkTokens * 4;
|
||||||
|
const content = "a".repeat(maxChars * 3 + 25);
|
||||||
|
const chunks = chunkMarkdown(content, { tokens: chunkTokens, overlap: 0 });
|
||||||
|
expect(chunks.length).toBeGreaterThan(1);
|
||||||
|
for (const chunk of chunks) {
|
||||||
|
expect(chunk.text.length).toBeLessThanOrEqual(maxChars);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -144,13 +144,23 @@ export function chunkMarkdown(
|
|||||||
for (let i = 0; i < lines.length; i += 1) {
|
for (let i = 0; i < lines.length; i += 1) {
|
||||||
const line = lines[i] ?? "";
|
const line = lines[i] ?? "";
|
||||||
const lineNo = i + 1;
|
const lineNo = i + 1;
|
||||||
const lineSize = line.length + 1;
|
const segments: string[] = [];
|
||||||
if (currentChars + lineSize > maxChars && current.length > 0) {
|
if (line.length === 0) {
|
||||||
flush();
|
segments.push("");
|
||||||
carryOverlap();
|
} else {
|
||||||
|
for (let start = 0; start < line.length; start += maxChars) {
|
||||||
|
segments.push(line.slice(start, start + maxChars));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const segment of segments) {
|
||||||
|
const lineSize = segment.length + 1;
|
||||||
|
if (currentChars + lineSize > maxChars && current.length > 0) {
|
||||||
|
flush();
|
||||||
|
carryOverlap();
|
||||||
|
}
|
||||||
|
current.push({ line: segment, lineNo });
|
||||||
|
currentChars += lineSize;
|
||||||
}
|
}
|
||||||
current.push({ line, lineNo });
|
|
||||||
currentChars += lineSize;
|
|
||||||
}
|
}
|
||||||
flush();
|
flush();
|
||||||
return chunks;
|
return chunks;
|
||||||
|
|||||||
Reference in New Issue
Block a user