From 676d41d4156c5089e84a47da5024cc0071a1a250 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 18 Jan 2026 09:28:42 +0000 Subject: [PATCH] fix: seed embedding cache for atomic reindex --- ...onboard-non-interactive.ai-gateway.test.ts | 7 ++- .../onboard-non-interactive.token.test.ts | 7 ++- src/memory/manager.atomic-reindex.test.ts | 1 + src/memory/manager.ts | 57 +++++++++++++++++++ 4 files changed, 66 insertions(+), 6 deletions(-) diff --git a/src/commands/onboard-non-interactive.ai-gateway.test.ts b/src/commands/onboard-non-interactive.ai-gateway.test.ts index 14afec665..fbd9b08dc 100644 --- a/src/commands/onboard-non-interactive.ai-gateway.test.ts +++ b/src/commands/onboard-non-interactive.ai-gateway.test.ts @@ -2,7 +2,7 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; describe("onboard (non-interactive): Vercel AI Gateway", () => { it("stores the API key and configures the default model", async () => { @@ -27,8 +27,9 @@ describe("onboard (non-interactive): Vercel AI Gateway", () => { const tempHome = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-onboard-gateway-")); process.env.HOME = tempHome; - delete process.env.CLAWDBOT_STATE_DIR; - delete process.env.CLAWDBOT_CONFIG_PATH; + process.env.CLAWDBOT_STATE_DIR = tempHome; + process.env.CLAWDBOT_CONFIG_PATH = path.join(tempHome, "clawdbot.json"); + vi.resetModules(); const runtime = { log: () => {}, diff --git a/src/commands/onboard-non-interactive.token.test.ts b/src/commands/onboard-non-interactive.token.test.ts index 28b5a3aa3..f2455ff24 100644 --- a/src/commands/onboard-non-interactive.token.test.ts +++ b/src/commands/onboard-non-interactive.token.test.ts @@ -2,7 +2,7 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; describe("onboard (non-interactive): token auth", () => { it("writes token profile config and stores the token", async () => { @@ -27,8 +27,9 @@ describe("onboard (non-interactive): token auth", () => { const tempHome = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-onboard-token-")); process.env.HOME = tempHome; - delete process.env.CLAWDBOT_STATE_DIR; - delete process.env.CLAWDBOT_CONFIG_PATH; + process.env.CLAWDBOT_STATE_DIR = tempHome; + process.env.CLAWDBOT_CONFIG_PATH = path.join(tempHome, "clawdbot.json"); + vi.resetModules(); const token = `sk-ant-oat01-${"a".repeat(80)}`; diff --git a/src/memory/manager.atomic-reindex.test.ts b/src/memory/manager.atomic-reindex.test.ts index 801bdba4b..a26b63a63 100644 --- a/src/memory/manager.atomic-reindex.test.ts +++ b/src/memory/manager.atomic-reindex.test.ts @@ -66,6 +66,7 @@ describe("memory manager atomic reindex", () => { provider: "openai", model: "mock-embed", store: { path: indexPath }, + cache: { enabled: false }, sync: { watch: false, onSessionStart: false, onSearch: false }, }, }, diff --git a/src/memory/manager.ts b/src/memory/manager.ts index e5cb9c1ca..0e463d4d4 100644 --- a/src/memory/manager.ts +++ b/src/memory/manager.ts @@ -897,6 +897,62 @@ export class MemoryIndexManager { return path.join(dir, `${path.basename(basePath)}.tmp-${stamp}`); } + private seedEmbeddingCacheFrom(source: DatabaseSync): void { + if (!this.cache.enabled) return; + try { + const insert = this.db.prepare( + `INSERT INTO ${EMBEDDING_CACHE_TABLE} (provider, model, provider_key, hash, embedding, dims, updated_at)\n` + + ` VALUES (?, ?, ?, ?, ?, ?, ?)\n` + + ` ON CONFLICT(provider, model, provider_key, hash) DO UPDATE SET\n` + + ` embedding=excluded.embedding,\n` + + ` dims=excluded.dims,\n` + + ` updated_at=excluded.updated_at`, + ); + const select = source.prepare( + `SELECT rowid, provider, model, provider_key, hash, embedding, dims, updated_at\n` + + ` FROM ${EMBEDDING_CACHE_TABLE}\n` + + ` WHERE provider = ? AND model = ? AND provider_key = ? AND rowid > ?\n` + + ` ORDER BY rowid\n` + + ` LIMIT ?`, + ); + const batchSize = 500; + let lastRowId = 0; + while (true) { + const rows = select.all( + this.provider.id, + this.provider.model, + this.providerKey, + lastRowId, + batchSize, + ) as Array<{ + rowid: number; + provider: string; + model: string; + provider_key: string; + hash: string; + embedding: string; + dims: number | null; + updated_at: number; + }>; + if (rows.length === 0) break; + for (const row of rows) { + insert.run( + row.provider, + row.model, + row.provider_key, + row.hash, + row.embedding, + row.dims, + row.updated_at, + ); + lastRowId = row.rowid; + } + } + } catch { + // Swallow cache seed errors to avoid blocking indexing. + } + } + private reopenDatabase() { this.db = this.openDatabase(); this.fts.available = false; @@ -962,6 +1018,7 @@ export class MemoryIndexManager { const tempPath = this.buildTempIndexPath(); const scratch = this.createScratchManager(tempPath); try { + scratch.seedEmbeddingCacheFrom(this.db); await scratch.sync({ reason: params.reason, force: true, progress: params.progress }); } catch (err) { await fs.rm(tempPath, { force: true });