From 0fb2777c6de919673b62dad9208dcb47784895b9 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 18 Jan 2026 01:35:58 +0000 Subject: [PATCH] feat: add memory embedding cache --- CHANGELOG.md | 6 + docs/concepts/memory.md | 25 ++- src/agents/memory-search.ts | 16 ++ src/cli/memory-cli.ts | 16 +- src/config/schema.ts | 8 +- src/config/types.tools.ts | 23 +++ src/config/zod-schema.agent-runtime.ts | 6 + src/memory/index.test.ts | 41 +++- src/memory/manager.ts | 258 +++++++++++++++++++++++-- 9 files changed, 372 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fbc4fe595..c8242d551 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ Docs: https://docs.clawd.bot +## 2026.1.18-2 + +### Changes +- Memory: add SQLite embedding cache to speed up reindexing and frequent updates. +- CLI: surface embedding cache state in `clawdbot memory status`. + ## 2026.1.18-1 ### Changes diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index 11b87ba60..384c1aed5 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -157,9 +157,28 @@ Local mode: ### What gets indexed (and when) - File type: Markdown only (`MEMORY.md`, `memory/**/*.md`). -- Index storage: per-agent SQLite at `~/.clawdbot/state/memory/.sqlite` (configurable via `agents.defaults.memorySearch.store.path`, supports `{agentId}` token). -- Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync runs on session start, on first search when dirty, and optionally on an interval. Reindex triggers when embedding model/provider or chunk sizes change. -- Model changes: the index stores the embedding **model + provider + chunking params**. If any of those change, Clawdbot automatically resets and reindexes the entire store. +- Index storage: per-agent SQLite at `~/.clawdbot/memory/.sqlite` (configurable via `agents.defaults.memorySearch.store.path`, supports `{agentId}` token). +- Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync runs on session start, on first search when dirty, and optionally on an interval. +- Reindex triggers: the index stores the embedding **provider/model + endpoint fingerprint + chunking params**. If any of those change, Clawdbot automatically resets and reindexes the entire store. + +### Embedding cache + +Clawdbot can cache **chunk embeddings** in SQLite so reindexing and frequent updates (especially session transcripts) don't re-embed unchanged text. + +Config: + +```json5 +agents: { + defaults: { + memorySearch: { + cache: { + enabled: true, + maxEntries: 50000 + } + } + } +} +``` ### Session memory search (experimental) diff --git a/src/agents/memory-search.ts b/src/agents/memory-search.ts index 8599240ec..673441dea 100644 --- a/src/agents/memory-search.ts +++ b/src/agents/memory-search.ts @@ -54,6 +54,10 @@ export type ResolvedMemorySearchConfig = { maxResults: number; minScore: number; }; + cache: { + enabled: boolean; + maxEntries?: number; + }; }; const DEFAULT_MODEL = "text-embedding-3-small"; @@ -62,6 +66,7 @@ const DEFAULT_CHUNK_OVERLAP = 80; const DEFAULT_WATCH_DEBOUNCE_MS = 1500; const DEFAULT_MAX_RESULTS = 6; const DEFAULT_MIN_SCORE = 0.35; +const DEFAULT_CACHE_ENABLED = true; const DEFAULT_SOURCES: Array<"memory" | "sessions"> = ["memory"]; function normalizeSources( @@ -152,6 +157,10 @@ function mergeConfig( maxResults: overrides?.query?.maxResults ?? defaults?.query?.maxResults ?? DEFAULT_MAX_RESULTS, minScore: overrides?.query?.minScore ?? defaults?.query?.minScore ?? DEFAULT_MIN_SCORE, }; + const cache = { + enabled: overrides?.cache?.enabled ?? defaults?.cache?.enabled ?? DEFAULT_CACHE_ENABLED, + maxEntries: overrides?.cache?.maxEntries ?? defaults?.cache?.maxEntries, + }; const overlap = Math.max(0, Math.min(chunking.overlap, chunking.tokens - 1)); const minScore = Math.max(0, Math.min(1, query.minScore)); @@ -170,6 +179,13 @@ function mergeConfig( chunking: { tokens: Math.max(1, chunking.tokens), overlap }, sync, query: { ...query, minScore }, + cache: { + enabled: Boolean(cache.enabled), + maxEntries: + typeof cache.maxEntries === "number" && Number.isFinite(cache.maxEntries) + ? Math.max(1, Math.floor(cache.maxEntries)) + : undefined, + }, }; } diff --git a/src/cli/memory-cli.ts b/src/cli/memory-cli.ts index 79a8275f2..26c7a901a 100644 --- a/src/cli/memory-cli.ts +++ b/src/cli/memory-cli.ts @@ -178,8 +178,20 @@ export function registerMemoryCli(program: Command) { if (status.vector.extensionPath) { lines.push(`${label("Vector path")} ${info(status.vector.extensionPath)}`); } - if (status.vector.loadError) { - lines.push(`${label("Vector error")} ${warn(status.vector.loadError)}`); + if (status.vector.loadError) { + lines.push(`${label("Vector error")} ${warn(status.vector.loadError)}`); + } + } + if (status.cache) { + const cacheState = status.cache.enabled ? "enabled" : "disabled"; + const cacheColor = status.cache.enabled ? theme.success : theme.muted; + const suffix = + status.cache.enabled && typeof status.cache.entries === "number" + ? ` (${status.cache.entries} entries)` + : ""; + lines.push(`${label("Embedding cache")} ${colorize(rich, cacheColor, cacheState)}${suffix}`); + if (status.cache.enabled && typeof status.cache.maxEntries === "number") { + lines.push(`${label("Cache cap")} ${info(String(status.cache.maxEntries))}`); } } if (status.fallback?.reason) { diff --git a/src/config/schema.ts b/src/config/schema.ts index c950fee29..999dcbbb2 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -190,6 +190,8 @@ const FIELD_LABELS: Record = { "agents.defaults.memorySearch.sync.watchDebounceMs": "Memory Watch Debounce (ms)", "agents.defaults.memorySearch.query.maxResults": "Memory Search Max Results", "agents.defaults.memorySearch.query.minScore": "Memory Search Min Score", + "agents.defaults.memorySearch.cache.enabled": "Memory Search Embedding Cache", + "agents.defaults.memorySearch.cache.maxEntries": "Memory Search Embedding Cache Max Entries", "auth.profiles": "Auth Profiles", "auth.order": "Auth Profile Order", "auth.cooldowns.billingBackoffHours": "Billing Backoff (hours)", @@ -382,11 +384,15 @@ const FIELD_HELP: Record = { "agents.defaults.memorySearch.fallback": 'Fallback to OpenAI when local embeddings fail ("openai" or "none").', "agents.defaults.memorySearch.store.path": - "SQLite index path (default: ~/.clawdbot/state/memory/{agentId}.sqlite).", + "SQLite index path (default: ~/.clawdbot/memory/{agentId}.sqlite).", "agents.defaults.memorySearch.store.vector.enabled": "Enable sqlite-vec extension for vector search (default: true).", "agents.defaults.memorySearch.store.vector.extensionPath": "Optional override path to sqlite-vec extension library (.dylib/.so/.dll).", + "agents.defaults.memorySearch.cache.enabled": + "Cache chunk embeddings in SQLite to speed up reindexing and frequent updates (default: true).", + "agents.defaults.memorySearch.cache.maxEntries": + "Optional cap on cached embeddings (best-effort).", "agents.defaults.memorySearch.sync.onSearch": "Lazy sync: reindex on first search after a change.", "agents.defaults.memorySearch.sync.watch": "Watch memory files for changes (chokidar).", diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index 2cff62919..55bb26dd7 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -192,6 +192,12 @@ export type MemorySearchConfig = { /** Optional override path to sqlite-vec extension (.dylib/.so/.dll). */ extensionPath?: string; }; + cache?: { + /** Enable embedding cache (default: true). */ + enabled?: boolean; + /** Optional max cache entries per provider/model. */ + maxEntries?: number; + }; }; /** Chunking configuration. */ chunking?: { @@ -210,6 +216,23 @@ export type MemorySearchConfig = { query?: { maxResults?: number; minScore?: number; + hybrid?: { + /** Enable hybrid BM25 + vector search (default: true). */ + enabled?: boolean; + /** Weight for vector similarity when merging results (0-1). */ + vectorWeight?: number; + /** Weight for BM25 text relevance when merging results (0-1). */ + textWeight?: number; + /** Multiplier for candidate pool size (default: 4). */ + candidateMultiplier?: number; + }; + }; + /** Index cache behavior. */ + cache?: { + /** Cache chunk embeddings in SQLite (default: true). */ + enabled?: boolean; + /** Optional cap on cached embeddings (best-effort). */ + maxEntries?: number; }; }; diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index 2e65a12d9..d028adff1 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -258,6 +258,12 @@ export const MemorySearchSchema = z minScore: z.number().min(0).max(1).optional(), }) .optional(), + cache: z + .object({ + enabled: z.boolean().optional(), + maxEntries: z.number().int().positive().optional(), + }) + .optional(), }) .optional(); export const AgentModelSchema = z.union([ diff --git a/src/memory/index.test.ts b/src/memory/index.test.ts index 9cea20808..38ed7225d 100644 --- a/src/memory/index.test.ts +++ b/src/memory/index.test.ts @@ -6,12 +6,14 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { getMemorySearchManager, type MemoryIndexManager } from "./index.js"; +let embedBatchCalls = 0; + vi.mock("./embeddings.js", () => { const embedText = (text: string) => { const lower = text.toLowerCase(); const alpha = lower.split("alpha").length - 1; const beta = lower.split("beta").length - 1; - return [alpha, beta, 1]; + return [alpha, beta]; }; return { createEmbeddingProvider: async (options: { model?: string }) => ({ @@ -20,7 +22,10 @@ vi.mock("./embeddings.js", () => { id: "mock", model: options.model ?? "mock-embed", embedQuery: async (text: string) => embedText(text), - embedBatch: async (texts: string[]) => texts.map(embedText), + embedBatch: async (texts: string[]) => { + embedBatchCalls += 1; + return texts.map(embedText); + }, }, }), }; @@ -32,12 +37,13 @@ describe("memory index", () => { let manager: MemoryIndexManager | null = null; beforeEach(async () => { + embedBatchCalls = 0; workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-mem-")); indexPath = path.join(workspaceDir, "index.sqlite"); await fs.mkdir(path.join(workspaceDir, "memory")); await fs.writeFile( path.join(workspaceDir, "memory", "2026-01-12.md"), - "# Log\nAlpha memory line.\nAnother line.", + "# Log\nAlpha memory line.\nZebra memory line.\nAnother line.", ); await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Beta knowledge base entry."); }); @@ -146,6 +152,35 @@ describe("memory index", () => { expect(results.length).toBeGreaterThan(0); }); + it("reuses cached embeddings on forced reindex", async () => { + const cfg = { + agents: { + defaults: { + workspace: workspaceDir, + memorySearch: { + provider: "openai", + model: "mock-embed", + store: { path: indexPath, vector: { enabled: false } }, + sync: { watch: false, onSessionStart: false, onSearch: false }, + query: { minScore: 0 }, + cache: { enabled: true }, + }, + }, + list: [{ id: "main", default: true }], + }, + }; + const result = await getMemorySearchManager({ cfg, agentId: "main" }); + expect(result.manager).not.toBeNull(); + if (!result.manager) throw new Error("manager missing"); + manager = result.manager; + await manager.sync({ force: true }); + const afterFirst = embedBatchCalls; + expect(afterFirst).toBeGreaterThan(0); + + await manager.sync({ force: true }); + expect(embedBatchCalls).toBe(afterFirst); + }); + it("reports vector availability after probe", async () => { const cfg = { agents: { diff --git a/src/memory/manager.ts b/src/memory/manager.ts index b14137c52..91a3096d5 100644 --- a/src/memory/manager.ts +++ b/src/memory/manager.ts @@ -47,6 +47,7 @@ export type MemorySearchResult = { type MemoryIndexMeta = { model: string; provider: string; + providerKey?: string; chunkTokens: number; chunkOverlap: number; vectorDims?: number; @@ -106,6 +107,7 @@ type OpenAiBatchOutputLine = { const META_KEY = "memory_index_meta_v1"; const SNIPPET_MAX_CHARS = 700; const VECTOR_TABLE = "chunks_vec"; +const EMBEDDING_CACHE_TABLE = "embedding_cache"; const SESSION_DIRTY_DEBOUNCE_MS = 5000; const EMBEDDING_BATCH_MAX_TOKENS = 8000; const EMBEDDING_APPROX_CHARS_PER_TOKEN = 1; @@ -143,6 +145,8 @@ export class MemoryIndexManager { }; private readonly db: DatabaseSync; private readonly sources: Set; + private readonly providerKey: string; + private readonly cache: { enabled: boolean; maxEntries?: number }; private readonly vector: { enabled: boolean; available: boolean | null; @@ -214,6 +218,11 @@ export class MemoryIndexManager { this.openAi = params.providerResult.openAi; this.sources = new Set(params.settings.sources); this.db = this.openDatabase(); + this.providerKey = this.computeProviderKey(); + this.cache = { + enabled: params.settings.cache.enabled, + maxEntries: params.settings.cache.maxEntries, + }; this.ensureSchema(); this.vector = { enabled: params.settings.store.vector.enabled, @@ -266,19 +275,19 @@ export class MemoryIndexManager { const minScore = opts?.minScore ?? this.settings.query.minScore; const maxResults = opts?.maxResults ?? this.settings.query.maxResults; const queryVec = await this.provider.embedQuery(cleaned); - if (queryVec.length === 0) return []; + if (!queryVec.some((v) => v !== 0)) return []; if (await this.ensureVectorReady(queryVec.length)) { const sourceFilter = this.buildSourceFilter("c"); const rows = this.db .prepare( - `SELECT c.path, c.start_line, c.end_line, c.text, - c.source, - vec_distance_cosine(v.embedding, ?) AS dist - FROM ${VECTOR_TABLE} v - JOIN chunks c ON c.id = v.id - WHERE c.model = ?${sourceFilter.sql} - ORDER BY dist ASC - LIMIT ?`, + `SELECT c.path, c.start_line, c.end_line, c.text,\n` + + ` c.source,\n` + + ` vec_distance_cosine(v.embedding, ?) AS dist\n` + + ` FROM ${VECTOR_TABLE} v\n` + + ` JOIN chunks c ON c.id = v.id\n` + + ` WHERE c.model = ?${sourceFilter.sql}\n` + + ` ORDER BY dist ASC\n` + + ` LIMIT ?`, ) .all( vectorToBlob(queryVec), @@ -372,6 +381,7 @@ export class MemoryIndexManager { requestedProvider: string; sources: MemorySource[]; sourceCounts: Array<{ source: MemorySource; files: number; chunks: number }>; + cache?: { enabled: boolean; entries?: number; maxEntries?: number }; fallback?: { from: string; reason?: string }; vector?: { enabled: boolean; @@ -432,6 +442,16 @@ export class MemoryIndexManager { requestedProvider: this.requestedProvider, sources: Array.from(this.sources), sourceCounts, + cache: this.cache.enabled + ? { + enabled: true, + entries: + (this.db + .prepare(`SELECT COUNT(*) as c FROM ${EMBEDDING_CACHE_TABLE}`) + .get() as { c: number } | undefined)?.c ?? 0, + maxEntries: this.cache.maxEntries, + } + : { enabled: false, maxEntries: this.cache.maxEntries }, fallback: this.fallbackReason ? { from: "local", reason: this.fallbackReason } : undefined, vector: { enabled: this.vector.enabled, @@ -603,6 +623,21 @@ export class MemoryIndexManager { updated_at INTEGER NOT NULL ); `); + this.db.exec(` + CREATE TABLE IF NOT EXISTS ${EMBEDDING_CACHE_TABLE} ( + provider TEXT NOT NULL, + model TEXT NOT NULL, + provider_key TEXT NOT NULL, + hash TEXT NOT NULL, + embedding TEXT NOT NULL, + dims INTEGER, + updated_at INTEGER NOT NULL, + PRIMARY KEY (provider, model, provider_key, hash) + ); + `); + this.db.exec( + `CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at ON ${EMBEDDING_CACHE_TABLE}(updated_at);`, + ); this.ensureColumn("files", "source", "TEXT NOT NULL DEFAULT 'memory'"); this.ensureColumn("chunks", "source", "TEXT NOT NULL DEFAULT 'memory'"); this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`); @@ -681,6 +716,7 @@ export class MemoryIndexManager { } private listChunks(): Array<{ + id: string; path: string; startLine: number; endLine: number; @@ -691,11 +727,12 @@ export class MemoryIndexManager { const sourceFilter = this.buildSourceFilter(); const rows = this.db .prepare( - `SELECT path, start_line, end_line, text, embedding, source + `SELECT id, path, start_line, end_line, text, embedding, source FROM chunks WHERE model = ?${sourceFilter.sql}`, ) .all(this.provider.model, ...sourceFilter.params) as Array<{ + id: string; path: string; start_line: number; end_line: number; @@ -704,6 +741,7 @@ export class MemoryIndexManager { source: MemorySource; }>; return rows.map((row) => ({ + id: row.id, path: row.path, startLine: row.start_line, endLine: row.end_line, @@ -779,6 +817,13 @@ export class MemoryIndexManager { for (const stale of staleRows) { if (activePaths.has(stale.path)) continue; this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(stale.path, "memory"); + try { + this.db + .prepare( + `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`, + ) + .run(stale.path, "memory"); + } catch {} this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(stale.path, "memory"); } } @@ -860,6 +905,13 @@ export class MemoryIndexManager { this.db .prepare(`DELETE FROM files WHERE path = ? AND source = ?`) .run(stale.path, "sessions"); + try { + this.db + .prepare( + `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`, + ) + .run(stale.path, "sessions"); + } catch {} this.db .prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`) .run(stale.path, "sessions"); @@ -902,6 +954,7 @@ export class MemoryIndexManager { !meta || meta.model !== this.provider.model || meta.provider !== this.provider.id || + meta.providerKey !== this.providerKey || meta.chunkTokens !== this.settings.chunking.tokens || meta.chunkOverlap !== this.settings.chunking.overlap || (vectorReady && !meta?.vectorDims); @@ -929,6 +982,7 @@ export class MemoryIndexManager { const nextMeta: MemoryIndexMeta = { model: this.provider.model, provider: this.provider.id, + providerKey: this.providerKey, chunkTokens: this.settings.chunking.tokens, chunkOverlap: this.settings.chunking.overlap, }; @@ -938,6 +992,9 @@ export class MemoryIndexManager { if (shouldSyncMemory || shouldSyncSessions || needsFullReindex) { this.writeMeta(nextMeta); } + if (shouldSyncMemory || shouldSyncSessions || needsFullReindex) { + this.pruneEmbeddingCacheIfNeeded(); + } } private resetIndex() { @@ -1091,16 +1148,121 @@ export class MemoryIndexManager { return batches; } + private loadEmbeddingCache(hashes: string[]): Map { + if (!this.cache.enabled) return new Map(); + if (hashes.length === 0) return new Map(); + const unique: string[] = []; + const seen = new Set(); + for (const hash of hashes) { + if (!hash) continue; + if (seen.has(hash)) continue; + seen.add(hash); + unique.push(hash); + } + if (unique.length === 0) return new Map(); + + const out = new Map(); + const baseParams = [this.provider.id, this.provider.model, this.providerKey]; + const batchSize = 400; + for (let start = 0; start < unique.length; start += batchSize) { + const batch = unique.slice(start, start + batchSize); + const placeholders = batch.map(() => "?").join(", "); + const rows = this.db + .prepare( + `SELECT hash, embedding FROM ${EMBEDDING_CACHE_TABLE}\n` + + ` WHERE provider = ? AND model = ? AND provider_key = ? AND hash IN (${placeholders})`, + ) + .all(...baseParams, ...batch) as Array<{ hash: string; embedding: string }>; + for (const row of rows) { + out.set(row.hash, parseEmbedding(row.embedding)); + } + } + return out; + } + + private upsertEmbeddingCache(entries: Array<{ hash: string; embedding: number[] }>): void { + if (!this.cache.enabled) return; + if (entries.length === 0) return; + const now = Date.now(); + const stmt = this.db.prepare( + `INSERT INTO ${EMBEDDING_CACHE_TABLE} (provider, model, provider_key, hash, embedding, dims, updated_at)\n` + + ` VALUES (?, ?, ?, ?, ?, ?, ?)\n` + + ` ON CONFLICT(provider, model, provider_key, hash) DO UPDATE SET\n` + + ` embedding=excluded.embedding,\n` + + ` dims=excluded.dims,\n` + + ` updated_at=excluded.updated_at`, + ); + for (const entry of entries) { + const embedding = entry.embedding ?? []; + stmt.run( + this.provider.id, + this.provider.model, + this.providerKey, + entry.hash, + JSON.stringify(embedding), + embedding.length, + now, + ); + } + } + + private pruneEmbeddingCacheIfNeeded(): void { + if (!this.cache.enabled) return; + const max = this.cache.maxEntries; + if (!max || max <= 0) return; + const row = this.db + .prepare(`SELECT COUNT(*) as c FROM ${EMBEDDING_CACHE_TABLE}`) + .get() as { c: number } | undefined; + const count = row?.c ?? 0; + if (count <= max) return; + const excess = count - max; + this.db + .prepare( + `DELETE FROM ${EMBEDDING_CACHE_TABLE}\n` + + ` WHERE rowid IN (\n` + + ` SELECT rowid FROM ${EMBEDDING_CACHE_TABLE}\n` + + ` ORDER BY updated_at ASC\n` + + ` LIMIT ?\n` + + ` )`, + ) + .run(excess); + } + private async embedChunksInBatches(chunks: MemoryChunk[]): Promise { if (chunks.length === 0) return []; - const batches = this.buildEmbeddingBatches(chunks); - const embeddings: number[][] = []; + const cached = this.loadEmbeddingCache(chunks.map((chunk) => chunk.hash)); + const embeddings: number[][] = Array.from({ length: chunks.length }, () => []); + const missing: Array<{ index: number; chunk: MemoryChunk }> = []; + + for (let i = 0; i < chunks.length; i += 1) { + const chunk = chunks[i]; + const hit = chunk?.hash ? cached.get(chunk.hash) : undefined; + if (hit && hit.length > 0) { + embeddings[i] = hit; + } else if (chunk) { + missing.push({ index: i, chunk }); + } + } + + if (missing.length === 0) return embeddings; + + const missingChunks = missing.map((m) => m.chunk); + const batches = this.buildEmbeddingBatches(missingChunks); + const toCache: Array<{ hash: string; embedding: number[] }> = []; + let cursor = 0; for (const batch of batches) { const batchEmbeddings = await this.embedBatchWithRetry(batch.map((chunk) => chunk.text)); for (let i = 0; i < batch.length; i += 1) { - embeddings.push(batchEmbeddings[i] ?? []); + const item = missing[cursor + i]; + const embedding = batchEmbeddings[i] ?? []; + if (item) { + embeddings[item.index] = embedding; + toCache.push({ hash: item.chunk.hash, embedding }); + } } + cursor += batch.length; } + this.upsertEmbeddingCache(toCache); return embeddings; } @@ -1121,6 +1283,24 @@ export class MemoryIndexManager { return headers; } + private computeProviderKey(): string { + if (this.provider.id === "openai" && this.openAi) { + const entries = Object.entries(this.openAi.headers) + .filter(([key]) => key.toLowerCase() !== "authorization") + .sort(([a], [b]) => a.localeCompare(b)) + .map(([key, value]) => [key, value]); + return hashText( + JSON.stringify({ + provider: "openai", + baseUrl: this.openAi.baseUrl, + model: this.openAi.model, + headers: entries, + }), + ); + } + return hashText(JSON.stringify({ provider: this.provider.id, model: this.provider.model })); + } + private buildOpenAiBatchRequests( chunks: MemoryChunk[], entry: MemoryFileEntry | SessionFileEntry, @@ -1300,8 +1480,40 @@ export class MemoryIndexManager { return this.embedChunksInBatches(chunks); } if (chunks.length === 0) return []; + const cached = this.loadEmbeddingCache(chunks.map((chunk) => chunk.hash)); + const embeddings: number[][] = Array.from({ length: chunks.length }, () => []); + const missing: Array<{ index: number; chunk: MemoryChunk }> = []; - const { requests, mapping } = this.buildOpenAiBatchRequests(chunks, entry, source); + for (let i = 0; i < chunks.length; i += 1) { + const chunk = chunks[i]; + const hit = chunk?.hash ? cached.get(chunk.hash) : undefined; + if (hit && hit.length > 0) { + embeddings[i] = hit; + } else if (chunk) { + missing.push({ index: i, chunk }); + } + } + + if (missing.length === 0) return embeddings; + + const requests: OpenAiBatchRequest[] = []; + const mapping = new Map(); + for (const item of missing) { + const chunk = item.chunk; + const customId = hashText( + `${source}:${entry.path}:${chunk.startLine}:${chunk.endLine}:${chunk.hash}:${item.index}`, + ); + mapping.set(customId, item.index); + requests.push({ + custom_id: customId, + method: "POST", + url: OPENAI_BATCH_ENDPOINT, + body: { + model: this.openAi?.model ?? this.provider.model, + input: chunk.text, + }, + }); + } const groups = this.splitOpenAiBatchRequests(requests); log.debug("memory embeddings: openai batch submit", { source, @@ -1313,7 +1525,7 @@ export class MemoryIndexManager { pollIntervalMs: this.batch.pollIntervalMs, timeoutMs: this.batch.timeoutMs, }); - const embeddings: number[][] = Array.from({ length: chunks.length }, () => []); + const toCache: Array<{ hash: string; embedding: number[] }> = []; const tasks = groups.map((group, groupIndex) => async () => { const batchInfo = await this.submitOpenAiBatch(group); @@ -1373,6 +1585,8 @@ export class MemoryIndexManager { continue; } embeddings[index] = embedding; + const chunk = chunks[index]; + if (chunk) toCache.push({ hash: chunk.hash, embedding }); } if (errors.length > 0) { throw new Error(`openai batch ${batchInfo.id} failed: ${errors.join("; ")}`); @@ -1385,6 +1599,7 @@ export class MemoryIndexManager { }); await this.runWithConcurrency(tasks, this.batch.concurrency); + this.upsertEmbeddingCache(toCache); return embeddings; } @@ -1463,9 +1678,16 @@ export class MemoryIndexManager { const sample = embeddings.find((embedding) => embedding.length > 0); const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false; const now = Date.now(); - this.db - .prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`) - .run(entry.path, options.source); + if (vectorReady) { + try { + this.db + .prepare( + `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`, + ) + .run(entry.path, options.source); + } catch {} + } + this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(entry.path, options.source); for (let i = 0; i < chunks.length; i++) { const chunk = chunks[i]; const embedding = embeddings[i] ?? [];