fix: skip empty memory chunks

This commit is contained in:
Peter Steinberger
2026-01-17 21:58:59 +00:00
parent f6d359932a
commit 030ed5d592
3 changed files with 33 additions and 1 deletions

View File

@@ -10,6 +10,7 @@ Docs: https://docs.clawd.bot
- Tools: show exec elevated flag before the command and keep it outside markdown in tool summaries.
- Memory: parallelize embedding indexing with rate-limit retries.
- Memory: split overly long lines to keep embeddings under token limits.
- Memory: skip empty chunks to avoid invalid embedding inputs.
## 2026.1.17-1

View File

@@ -191,4 +191,33 @@ describe("memory embedding batches", () => {
expect(calls).toBe(3);
}, 10000);
it("skips empty chunks so embeddings input stays valid", async () => {
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-07.md"), "\n\n\n");
const cfg = {
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { path: indexPath },
sync: { watch: false, onSessionStart: false, onSearch: false },
query: { minScore: 0 },
},
},
list: [{ id: "main", default: true }],
},
};
const result = await getMemorySearchManager({ cfg, agentId: "main" });
expect(result.manager).not.toBeNull();
if (!result.manager) throw new Error("manager missing");
manager = result.manager;
await manager.sync({ force: true });
const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []);
expect(inputs).not.toContain("");
});
});

View File

@@ -1091,7 +1091,9 @@ export class MemoryIndexManager {
options: { source: MemorySource; content?: string },
) {
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
const chunks = chunkMarkdown(content, this.settings.chunking);
const chunks = chunkMarkdown(content, this.settings.chunking).filter(
(chunk) => chunk.text.trim().length > 0,
);
const embeddings = await this.embedChunksInBatches(chunks);
const sample = embeddings.find((embedding) => embedding.length > 0);
const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;