fix: skip empty memory chunks
This commit is contained in:
@@ -10,6 +10,7 @@ Docs: https://docs.clawd.bot
|
||||
- Tools: show exec elevated flag before the command and keep it outside markdown in tool summaries.
|
||||
- Memory: parallelize embedding indexing with rate-limit retries.
|
||||
- Memory: split overly long lines to keep embeddings under token limits.
|
||||
- Memory: skip empty chunks to avoid invalid embedding inputs.
|
||||
|
||||
## 2026.1.17-1
|
||||
|
||||
|
||||
@@ -191,4 +191,33 @@ describe("memory embedding batches", () => {
|
||||
|
||||
expect(calls).toBe(3);
|
||||
}, 10000);
|
||||
|
||||
it("skips empty chunks so embeddings input stays valid", async () => {
|
||||
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-07.md"), "\n\n\n");
|
||||
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: indexPath },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
query: { minScore: 0 },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
};
|
||||
|
||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
expect(result.manager).not.toBeNull();
|
||||
if (!result.manager) throw new Error("manager missing");
|
||||
manager = result.manager;
|
||||
await manager.sync({ force: true });
|
||||
|
||||
const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []);
|
||||
expect(inputs).not.toContain("");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1091,7 +1091,9 @@ export class MemoryIndexManager {
|
||||
options: { source: MemorySource; content?: string },
|
||||
) {
|
||||
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
|
||||
const chunks = chunkMarkdown(content, this.settings.chunking);
|
||||
const chunks = chunkMarkdown(content, this.settings.chunking).filter(
|
||||
(chunk) => chunk.text.trim().length > 0,
|
||||
);
|
||||
const embeddings = await this.embedChunksInBatches(chunks);
|
||||
const sample = embeddings.find((embedding) => embedding.length > 0);
|
||||
const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
|
||||
|
||||
Reference in New Issue
Block a user