fix: skip empty memory chunks
This commit is contained in:
@@ -10,6 +10,7 @@ Docs: https://docs.clawd.bot
|
|||||||
- Tools: show exec elevated flag before the command and keep it outside markdown in tool summaries.
|
- Tools: show exec elevated flag before the command and keep it outside markdown in tool summaries.
|
||||||
- Memory: parallelize embedding indexing with rate-limit retries.
|
- Memory: parallelize embedding indexing with rate-limit retries.
|
||||||
- Memory: split overly long lines to keep embeddings under token limits.
|
- Memory: split overly long lines to keep embeddings under token limits.
|
||||||
|
- Memory: skip empty chunks to avoid invalid embedding inputs.
|
||||||
|
|
||||||
## 2026.1.17-1
|
## 2026.1.17-1
|
||||||
|
|
||||||
|
|||||||
@@ -191,4 +191,33 @@ describe("memory embedding batches", () => {
|
|||||||
|
|
||||||
expect(calls).toBe(3);
|
expect(calls).toBe(3);
|
||||||
}, 10000);
|
}, 10000);
|
||||||
|
|
||||||
|
it("skips empty chunks so embeddings input stays valid", async () => {
|
||||||
|
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-07.md"), "\n\n\n");
|
||||||
|
|
||||||
|
const cfg = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
workspace: workspaceDir,
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "mock-embed",
|
||||||
|
store: { path: indexPath },
|
||||||
|
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||||
|
query: { minScore: 0 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
list: [{ id: "main", default: true }],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||||
|
expect(result.manager).not.toBeNull();
|
||||||
|
if (!result.manager) throw new Error("manager missing");
|
||||||
|
manager = result.manager;
|
||||||
|
await manager.sync({ force: true });
|
||||||
|
|
||||||
|
const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []);
|
||||||
|
expect(inputs).not.toContain("");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1091,7 +1091,9 @@ export class MemoryIndexManager {
|
|||||||
options: { source: MemorySource; content?: string },
|
options: { source: MemorySource; content?: string },
|
||||||
) {
|
) {
|
||||||
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
|
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
|
||||||
const chunks = chunkMarkdown(content, this.settings.chunking);
|
const chunks = chunkMarkdown(content, this.settings.chunking).filter(
|
||||||
|
(chunk) => chunk.text.trim().length > 0,
|
||||||
|
);
|
||||||
const embeddings = await this.embedChunksInBatches(chunks);
|
const embeddings = await this.embedChunksInBatches(chunks);
|
||||||
const sample = embeddings.find((embedding) => embedding.length > 0);
|
const sample = embeddings.find((embedding) => embedding.length > 0);
|
||||||
const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
|
const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
|
||||||
|
|||||||
Reference in New Issue
Block a user