fix: seed embedding cache for atomic reindex
This commit is contained in:
@@ -2,7 +2,7 @@ import fs from "node:fs/promises";
|
|||||||
import os from "node:os";
|
import os from "node:os";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
|
|
||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
describe("onboard (non-interactive): Vercel AI Gateway", () => {
|
describe("onboard (non-interactive): Vercel AI Gateway", () => {
|
||||||
it("stores the API key and configures the default model", async () => {
|
it("stores the API key and configures the default model", async () => {
|
||||||
@@ -27,8 +27,9 @@ describe("onboard (non-interactive): Vercel AI Gateway", () => {
|
|||||||
|
|
||||||
const tempHome = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-onboard-gateway-"));
|
const tempHome = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-onboard-gateway-"));
|
||||||
process.env.HOME = tempHome;
|
process.env.HOME = tempHome;
|
||||||
delete process.env.CLAWDBOT_STATE_DIR;
|
process.env.CLAWDBOT_STATE_DIR = tempHome;
|
||||||
delete process.env.CLAWDBOT_CONFIG_PATH;
|
process.env.CLAWDBOT_CONFIG_PATH = path.join(tempHome, "clawdbot.json");
|
||||||
|
vi.resetModules();
|
||||||
|
|
||||||
const runtime = {
|
const runtime = {
|
||||||
log: () => {},
|
log: () => {},
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import fs from "node:fs/promises";
|
|||||||
import os from "node:os";
|
import os from "node:os";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
|
|
||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
describe("onboard (non-interactive): token auth", () => {
|
describe("onboard (non-interactive): token auth", () => {
|
||||||
it("writes token profile config and stores the token", async () => {
|
it("writes token profile config and stores the token", async () => {
|
||||||
@@ -27,8 +27,9 @@ describe("onboard (non-interactive): token auth", () => {
|
|||||||
|
|
||||||
const tempHome = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-onboard-token-"));
|
const tempHome = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-onboard-token-"));
|
||||||
process.env.HOME = tempHome;
|
process.env.HOME = tempHome;
|
||||||
delete process.env.CLAWDBOT_STATE_DIR;
|
process.env.CLAWDBOT_STATE_DIR = tempHome;
|
||||||
delete process.env.CLAWDBOT_CONFIG_PATH;
|
process.env.CLAWDBOT_CONFIG_PATH = path.join(tempHome, "clawdbot.json");
|
||||||
|
vi.resetModules();
|
||||||
|
|
||||||
const token = `sk-ant-oat01-${"a".repeat(80)}`;
|
const token = `sk-ant-oat01-${"a".repeat(80)}`;
|
||||||
|
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ describe("memory manager atomic reindex", () => {
|
|||||||
provider: "openai",
|
provider: "openai",
|
||||||
model: "mock-embed",
|
model: "mock-embed",
|
||||||
store: { path: indexPath },
|
store: { path: indexPath },
|
||||||
|
cache: { enabled: false },
|
||||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -897,6 +897,62 @@ export class MemoryIndexManager {
|
|||||||
return path.join(dir, `${path.basename(basePath)}.tmp-${stamp}`);
|
return path.join(dir, `${path.basename(basePath)}.tmp-${stamp}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private seedEmbeddingCacheFrom(source: DatabaseSync): void {
|
||||||
|
if (!this.cache.enabled) return;
|
||||||
|
try {
|
||||||
|
const insert = this.db.prepare(
|
||||||
|
`INSERT INTO ${EMBEDDING_CACHE_TABLE} (provider, model, provider_key, hash, embedding, dims, updated_at)\n` +
|
||||||
|
` VALUES (?, ?, ?, ?, ?, ?, ?)\n` +
|
||||||
|
` ON CONFLICT(provider, model, provider_key, hash) DO UPDATE SET\n` +
|
||||||
|
` embedding=excluded.embedding,\n` +
|
||||||
|
` dims=excluded.dims,\n` +
|
||||||
|
` updated_at=excluded.updated_at`,
|
||||||
|
);
|
||||||
|
const select = source.prepare(
|
||||||
|
`SELECT rowid, provider, model, provider_key, hash, embedding, dims, updated_at\n` +
|
||||||
|
` FROM ${EMBEDDING_CACHE_TABLE}\n` +
|
||||||
|
` WHERE provider = ? AND model = ? AND provider_key = ? AND rowid > ?\n` +
|
||||||
|
` ORDER BY rowid\n` +
|
||||||
|
` LIMIT ?`,
|
||||||
|
);
|
||||||
|
const batchSize = 500;
|
||||||
|
let lastRowId = 0;
|
||||||
|
while (true) {
|
||||||
|
const rows = select.all(
|
||||||
|
this.provider.id,
|
||||||
|
this.provider.model,
|
||||||
|
this.providerKey,
|
||||||
|
lastRowId,
|
||||||
|
batchSize,
|
||||||
|
) as Array<{
|
||||||
|
rowid: number;
|
||||||
|
provider: string;
|
||||||
|
model: string;
|
||||||
|
provider_key: string;
|
||||||
|
hash: string;
|
||||||
|
embedding: string;
|
||||||
|
dims: number | null;
|
||||||
|
updated_at: number;
|
||||||
|
}>;
|
||||||
|
if (rows.length === 0) break;
|
||||||
|
for (const row of rows) {
|
||||||
|
insert.run(
|
||||||
|
row.provider,
|
||||||
|
row.model,
|
||||||
|
row.provider_key,
|
||||||
|
row.hash,
|
||||||
|
row.embedding,
|
||||||
|
row.dims,
|
||||||
|
row.updated_at,
|
||||||
|
);
|
||||||
|
lastRowId = row.rowid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Swallow cache seed errors to avoid blocking indexing.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private reopenDatabase() {
|
private reopenDatabase() {
|
||||||
this.db = this.openDatabase();
|
this.db = this.openDatabase();
|
||||||
this.fts.available = false;
|
this.fts.available = false;
|
||||||
@@ -962,6 +1018,7 @@ export class MemoryIndexManager {
|
|||||||
const tempPath = this.buildTempIndexPath();
|
const tempPath = this.buildTempIndexPath();
|
||||||
const scratch = this.createScratchManager(tempPath);
|
const scratch = this.createScratchManager(tempPath);
|
||||||
try {
|
try {
|
||||||
|
scratch.seedEmbeddingCacheFrom(this.db);
|
||||||
await scratch.sync({ reason: params.reason, force: true, progress: params.progress });
|
await scratch.sync({ reason: params.reason, force: true, progress: params.progress });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
await fs.rm(tempPath, { force: true });
|
await fs.rm(tempPath, { force: true });
|
||||||
|
|||||||
Reference in New Issue
Block a user