fix: seed embedding cache for atomic reindex

This commit is contained in:
Peter Steinberger
2026-01-18 09:28:42 +00:00
parent a3a4996adb
commit 676d41d415
4 changed files with 66 additions and 6 deletions

View File

@@ -2,7 +2,7 @@ import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { describe, expect, it, vi } from "vitest";
describe("onboard (non-interactive): Vercel AI Gateway", () => {
it("stores the API key and configures the default model", async () => {
@@ -27,8 +27,9 @@ describe("onboard (non-interactive): Vercel AI Gateway", () => {
const tempHome = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-onboard-gateway-"));
process.env.HOME = tempHome;
delete process.env.CLAWDBOT_STATE_DIR;
delete process.env.CLAWDBOT_CONFIG_PATH;
process.env.CLAWDBOT_STATE_DIR = tempHome;
process.env.CLAWDBOT_CONFIG_PATH = path.join(tempHome, "clawdbot.json");
vi.resetModules();
const runtime = {
log: () => {},

View File

@@ -2,7 +2,7 @@ import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { describe, expect, it, vi } from "vitest";
describe("onboard (non-interactive): token auth", () => {
it("writes token profile config and stores the token", async () => {
@@ -27,8 +27,9 @@ describe("onboard (non-interactive): token auth", () => {
const tempHome = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-onboard-token-"));
process.env.HOME = tempHome;
delete process.env.CLAWDBOT_STATE_DIR;
delete process.env.CLAWDBOT_CONFIG_PATH;
process.env.CLAWDBOT_STATE_DIR = tempHome;
process.env.CLAWDBOT_CONFIG_PATH = path.join(tempHome, "clawdbot.json");
vi.resetModules();
const token = `sk-ant-oat01-${"a".repeat(80)}`;

View File

@@ -66,6 +66,7 @@ describe("memory manager atomic reindex", () => {
provider: "openai",
model: "mock-embed",
store: { path: indexPath },
cache: { enabled: false },
sync: { watch: false, onSessionStart: false, onSearch: false },
},
},

View File

@@ -897,6 +897,62 @@ export class MemoryIndexManager {
return path.join(dir, `${path.basename(basePath)}.tmp-${stamp}`);
}
private seedEmbeddingCacheFrom(source: DatabaseSync): void {
if (!this.cache.enabled) return;
try {
const insert = this.db.prepare(
`INSERT INTO ${EMBEDDING_CACHE_TABLE} (provider, model, provider_key, hash, embedding, dims, updated_at)\n` +
` VALUES (?, ?, ?, ?, ?, ?, ?)\n` +
` ON CONFLICT(provider, model, provider_key, hash) DO UPDATE SET\n` +
` embedding=excluded.embedding,\n` +
` dims=excluded.dims,\n` +
` updated_at=excluded.updated_at`,
);
const select = source.prepare(
`SELECT rowid, provider, model, provider_key, hash, embedding, dims, updated_at\n` +
` FROM ${EMBEDDING_CACHE_TABLE}\n` +
` WHERE provider = ? AND model = ? AND provider_key = ? AND rowid > ?\n` +
` ORDER BY rowid\n` +
` LIMIT ?`,
);
const batchSize = 500;
let lastRowId = 0;
while (true) {
const rows = select.all(
this.provider.id,
this.provider.model,
this.providerKey,
lastRowId,
batchSize,
) as Array<{
rowid: number;
provider: string;
model: string;
provider_key: string;
hash: string;
embedding: string;
dims: number | null;
updated_at: number;
}>;
if (rows.length === 0) break;
for (const row of rows) {
insert.run(
row.provider,
row.model,
row.provider_key,
row.hash,
row.embedding,
row.dims,
row.updated_at,
);
lastRowId = row.rowid;
}
}
} catch {
// Swallow cache seed errors to avoid blocking indexing.
}
}
private reopenDatabase() {
this.db = this.openDatabase();
this.fts.available = false;
@@ -962,6 +1018,7 @@ export class MemoryIndexManager {
const tempPath = this.buildTempIndexPath();
const scratch = this.createScratchManager(tempPath);
try {
scratch.seedEmbeddingCacheFrom(this.db);
await scratch.sync({ reason: params.reason, force: true, progress: params.progress });
} catch (err) {
await fs.rm(tempPath, { force: true });