From 030ed5d5926c038b0e54c3afb0e7c92e47f7c695 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 17 Jan 2026 21:58:59 +0000 Subject: [PATCH] fix: skip empty memory chunks --- CHANGELOG.md | 1 + src/memory/manager.embedding-batches.test.ts | 29 ++++++++++++++++++++ src/memory/manager.ts | 4 ++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f5ecd4b6..c1466b408 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Docs: https://docs.clawd.bot - Tools: show exec elevated flag before the command and keep it outside markdown in tool summaries. - Memory: parallelize embedding indexing with rate-limit retries. - Memory: split overly long lines to keep embeddings under token limits. +- Memory: skip empty chunks to avoid invalid embedding inputs. ## 2026.1.17-1 diff --git a/src/memory/manager.embedding-batches.test.ts b/src/memory/manager.embedding-batches.test.ts index a11839db2..6a0b7e505 100644 --- a/src/memory/manager.embedding-batches.test.ts +++ b/src/memory/manager.embedding-batches.test.ts @@ -191,4 +191,33 @@ describe("memory embedding batches", () => { expect(calls).toBe(3); }, 10000); + + it("skips empty chunks so embeddings input stays valid", async () => { + await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-07.md"), "\n\n\n"); + + const cfg = { + agents: { + defaults: { + workspace: workspaceDir, + memorySearch: { + provider: "openai", + model: "mock-embed", + store: { path: indexPath }, + sync: { watch: false, onSessionStart: false, onSearch: false }, + query: { minScore: 0 }, + }, + }, + list: [{ id: "main", default: true }], + }, + }; + + const result = await getMemorySearchManager({ cfg, agentId: "main" }); + expect(result.manager).not.toBeNull(); + if (!result.manager) throw new Error("manager missing"); + manager = result.manager; + await manager.sync({ force: true }); + + const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []); + expect(inputs).not.toContain(""); + }); }); diff --git a/src/memory/manager.ts b/src/memory/manager.ts index b5d638fa0..58afa0250 100644 --- a/src/memory/manager.ts +++ b/src/memory/manager.ts @@ -1091,7 +1091,9 @@ export class MemoryIndexManager { options: { source: MemorySource; content?: string }, ) { const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8")); - const chunks = chunkMarkdown(content, this.settings.chunking); + const chunks = chunkMarkdown(content, this.settings.chunking).filter( + (chunk) => chunk.text.trim().length > 0, + ); const embeddings = await this.embedChunksInBatches(chunks); const sample = embeddings.find((embedding) => embedding.length > 0); const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;