feat: add memory embedding cache

This commit is contained in:
Peter Steinberger
2026-01-18 01:35:58 +00:00
parent 568b8ee96c
commit 0fb2777c6d
9 changed files with 372 additions and 27 deletions

View File

@@ -2,6 +2,12 @@
Docs: https://docs.clawd.bot Docs: https://docs.clawd.bot
## 2026.1.18-2
### Changes
- Memory: add SQLite embedding cache to speed up reindexing and frequent updates.
- CLI: surface embedding cache state in `clawdbot memory status`.
## 2026.1.18-1 ## 2026.1.18-1
### Changes ### Changes

View File

@@ -157,9 +157,28 @@ Local mode:
### What gets indexed (and when) ### What gets indexed (and when)
- File type: Markdown only (`MEMORY.md`, `memory/**/*.md`). - File type: Markdown only (`MEMORY.md`, `memory/**/*.md`).
- Index storage: per-agent SQLite at `~/.clawdbot/state/memory/<agentId>.sqlite` (configurable via `agents.defaults.memorySearch.store.path`, supports `{agentId}` token). - Index storage: per-agent SQLite at `~/.clawdbot/memory/<agentId>.sqlite` (configurable via `agents.defaults.memorySearch.store.path`, supports `{agentId}` token).
- Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync runs on session start, on first search when dirty, and optionally on an interval. Reindex triggers when embedding model/provider or chunk sizes change. - Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync runs on session start, on first search when dirty, and optionally on an interval.
- Model changes: the index stores the embedding **model + provider + chunking params**. If any of those change, Clawdbot automatically resets and reindexes the entire store. - Reindex triggers: the index stores the embedding **provider/model + endpoint fingerprint + chunking params**. If any of those change, Clawdbot automatically resets and reindexes the entire store.
### Embedding cache
Clawdbot can cache **chunk embeddings** in SQLite so reindexing and frequent updates (especially session transcripts) don't re-embed unchanged text.
Config:
```json5
agents: {
defaults: {
memorySearch: {
cache: {
enabled: true,
maxEntries: 50000
}
}
}
}
```
### Session memory search (experimental) ### Session memory search (experimental)

View File

@@ -54,6 +54,10 @@ export type ResolvedMemorySearchConfig = {
maxResults: number; maxResults: number;
minScore: number; minScore: number;
}; };
cache: {
enabled: boolean;
maxEntries?: number;
};
}; };
const DEFAULT_MODEL = "text-embedding-3-small"; const DEFAULT_MODEL = "text-embedding-3-small";
@@ -62,6 +66,7 @@ const DEFAULT_CHUNK_OVERLAP = 80;
const DEFAULT_WATCH_DEBOUNCE_MS = 1500; const DEFAULT_WATCH_DEBOUNCE_MS = 1500;
const DEFAULT_MAX_RESULTS = 6; const DEFAULT_MAX_RESULTS = 6;
const DEFAULT_MIN_SCORE = 0.35; const DEFAULT_MIN_SCORE = 0.35;
const DEFAULT_CACHE_ENABLED = true;
const DEFAULT_SOURCES: Array<"memory" | "sessions"> = ["memory"]; const DEFAULT_SOURCES: Array<"memory" | "sessions"> = ["memory"];
function normalizeSources( function normalizeSources(
@@ -152,6 +157,10 @@ function mergeConfig(
maxResults: overrides?.query?.maxResults ?? defaults?.query?.maxResults ?? DEFAULT_MAX_RESULTS, maxResults: overrides?.query?.maxResults ?? defaults?.query?.maxResults ?? DEFAULT_MAX_RESULTS,
minScore: overrides?.query?.minScore ?? defaults?.query?.minScore ?? DEFAULT_MIN_SCORE, minScore: overrides?.query?.minScore ?? defaults?.query?.minScore ?? DEFAULT_MIN_SCORE,
}; };
const cache = {
enabled: overrides?.cache?.enabled ?? defaults?.cache?.enabled ?? DEFAULT_CACHE_ENABLED,
maxEntries: overrides?.cache?.maxEntries ?? defaults?.cache?.maxEntries,
};
const overlap = Math.max(0, Math.min(chunking.overlap, chunking.tokens - 1)); const overlap = Math.max(0, Math.min(chunking.overlap, chunking.tokens - 1));
const minScore = Math.max(0, Math.min(1, query.minScore)); const minScore = Math.max(0, Math.min(1, query.minScore));
@@ -170,6 +179,13 @@ function mergeConfig(
chunking: { tokens: Math.max(1, chunking.tokens), overlap }, chunking: { tokens: Math.max(1, chunking.tokens), overlap },
sync, sync,
query: { ...query, minScore }, query: { ...query, minScore },
cache: {
enabled: Boolean(cache.enabled),
maxEntries:
typeof cache.maxEntries === "number" && Number.isFinite(cache.maxEntries)
? Math.max(1, Math.floor(cache.maxEntries))
: undefined,
},
}; };
} }

View File

@@ -178,8 +178,20 @@ export function registerMemoryCli(program: Command) {
if (status.vector.extensionPath) { if (status.vector.extensionPath) {
lines.push(`${label("Vector path")} ${info(status.vector.extensionPath)}`); lines.push(`${label("Vector path")} ${info(status.vector.extensionPath)}`);
} }
if (status.vector.loadError) { if (status.vector.loadError) {
lines.push(`${label("Vector error")} ${warn(status.vector.loadError)}`); lines.push(`${label("Vector error")} ${warn(status.vector.loadError)}`);
}
}
if (status.cache) {
const cacheState = status.cache.enabled ? "enabled" : "disabled";
const cacheColor = status.cache.enabled ? theme.success : theme.muted;
const suffix =
status.cache.enabled && typeof status.cache.entries === "number"
? ` (${status.cache.entries} entries)`
: "";
lines.push(`${label("Embedding cache")} ${colorize(rich, cacheColor, cacheState)}${suffix}`);
if (status.cache.enabled && typeof status.cache.maxEntries === "number") {
lines.push(`${label("Cache cap")} ${info(String(status.cache.maxEntries))}`);
} }
} }
if (status.fallback?.reason) { if (status.fallback?.reason) {

View File

@@ -190,6 +190,8 @@ const FIELD_LABELS: Record<string, string> = {
"agents.defaults.memorySearch.sync.watchDebounceMs": "Memory Watch Debounce (ms)", "agents.defaults.memorySearch.sync.watchDebounceMs": "Memory Watch Debounce (ms)",
"agents.defaults.memorySearch.query.maxResults": "Memory Search Max Results", "agents.defaults.memorySearch.query.maxResults": "Memory Search Max Results",
"agents.defaults.memorySearch.query.minScore": "Memory Search Min Score", "agents.defaults.memorySearch.query.minScore": "Memory Search Min Score",
"agents.defaults.memorySearch.cache.enabled": "Memory Search Embedding Cache",
"agents.defaults.memorySearch.cache.maxEntries": "Memory Search Embedding Cache Max Entries",
"auth.profiles": "Auth Profiles", "auth.profiles": "Auth Profiles",
"auth.order": "Auth Profile Order", "auth.order": "Auth Profile Order",
"auth.cooldowns.billingBackoffHours": "Billing Backoff (hours)", "auth.cooldowns.billingBackoffHours": "Billing Backoff (hours)",
@@ -382,11 +384,15 @@ const FIELD_HELP: Record<string, string> = {
"agents.defaults.memorySearch.fallback": "agents.defaults.memorySearch.fallback":
'Fallback to OpenAI when local embeddings fail ("openai" or "none").', 'Fallback to OpenAI when local embeddings fail ("openai" or "none").',
"agents.defaults.memorySearch.store.path": "agents.defaults.memorySearch.store.path":
"SQLite index path (default: ~/.clawdbot/state/memory/{agentId}.sqlite).", "SQLite index path (default: ~/.clawdbot/memory/{agentId}.sqlite).",
"agents.defaults.memorySearch.store.vector.enabled": "agents.defaults.memorySearch.store.vector.enabled":
"Enable sqlite-vec extension for vector search (default: true).", "Enable sqlite-vec extension for vector search (default: true).",
"agents.defaults.memorySearch.store.vector.extensionPath": "agents.defaults.memorySearch.store.vector.extensionPath":
"Optional override path to sqlite-vec extension library (.dylib/.so/.dll).", "Optional override path to sqlite-vec extension library (.dylib/.so/.dll).",
"agents.defaults.memorySearch.cache.enabled":
"Cache chunk embeddings in SQLite to speed up reindexing and frequent updates (default: true).",
"agents.defaults.memorySearch.cache.maxEntries":
"Optional cap on cached embeddings (best-effort).",
"agents.defaults.memorySearch.sync.onSearch": "agents.defaults.memorySearch.sync.onSearch":
"Lazy sync: reindex on first search after a change.", "Lazy sync: reindex on first search after a change.",
"agents.defaults.memorySearch.sync.watch": "Watch memory files for changes (chokidar).", "agents.defaults.memorySearch.sync.watch": "Watch memory files for changes (chokidar).",

View File

@@ -192,6 +192,12 @@ export type MemorySearchConfig = {
/** Optional override path to sqlite-vec extension (.dylib/.so/.dll). */ /** Optional override path to sqlite-vec extension (.dylib/.so/.dll). */
extensionPath?: string; extensionPath?: string;
}; };
cache?: {
/** Enable embedding cache (default: true). */
enabled?: boolean;
/** Optional max cache entries per provider/model. */
maxEntries?: number;
};
}; };
/** Chunking configuration. */ /** Chunking configuration. */
chunking?: { chunking?: {
@@ -210,6 +216,23 @@ export type MemorySearchConfig = {
query?: { query?: {
maxResults?: number; maxResults?: number;
minScore?: number; minScore?: number;
hybrid?: {
/** Enable hybrid BM25 + vector search (default: true). */
enabled?: boolean;
/** Weight for vector similarity when merging results (0-1). */
vectorWeight?: number;
/** Weight for BM25 text relevance when merging results (0-1). */
textWeight?: number;
/** Multiplier for candidate pool size (default: 4). */
candidateMultiplier?: number;
};
};
/** Index cache behavior. */
cache?: {
/** Cache chunk embeddings in SQLite (default: true). */
enabled?: boolean;
/** Optional cap on cached embeddings (best-effort). */
maxEntries?: number;
}; };
}; };

View File

@@ -258,6 +258,12 @@ export const MemorySearchSchema = z
minScore: z.number().min(0).max(1).optional(), minScore: z.number().min(0).max(1).optional(),
}) })
.optional(), .optional(),
cache: z
.object({
enabled: z.boolean().optional(),
maxEntries: z.number().int().positive().optional(),
})
.optional(),
}) })
.optional(); .optional();
export const AgentModelSchema = z.union([ export const AgentModelSchema = z.union([

View File

@@ -6,12 +6,14 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { getMemorySearchManager, type MemoryIndexManager } from "./index.js"; import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
let embedBatchCalls = 0;
vi.mock("./embeddings.js", () => { vi.mock("./embeddings.js", () => {
const embedText = (text: string) => { const embedText = (text: string) => {
const lower = text.toLowerCase(); const lower = text.toLowerCase();
const alpha = lower.split("alpha").length - 1; const alpha = lower.split("alpha").length - 1;
const beta = lower.split("beta").length - 1; const beta = lower.split("beta").length - 1;
return [alpha, beta, 1]; return [alpha, beta];
}; };
return { return {
createEmbeddingProvider: async (options: { model?: string }) => ({ createEmbeddingProvider: async (options: { model?: string }) => ({
@@ -20,7 +22,10 @@ vi.mock("./embeddings.js", () => {
id: "mock", id: "mock",
model: options.model ?? "mock-embed", model: options.model ?? "mock-embed",
embedQuery: async (text: string) => embedText(text), embedQuery: async (text: string) => embedText(text),
embedBatch: async (texts: string[]) => texts.map(embedText), embedBatch: async (texts: string[]) => {
embedBatchCalls += 1;
return texts.map(embedText);
},
}, },
}), }),
}; };
@@ -32,12 +37,13 @@ describe("memory index", () => {
let manager: MemoryIndexManager | null = null; let manager: MemoryIndexManager | null = null;
beforeEach(async () => { beforeEach(async () => {
embedBatchCalls = 0;
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-mem-")); workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-mem-"));
indexPath = path.join(workspaceDir, "index.sqlite"); indexPath = path.join(workspaceDir, "index.sqlite");
await fs.mkdir(path.join(workspaceDir, "memory")); await fs.mkdir(path.join(workspaceDir, "memory"));
await fs.writeFile( await fs.writeFile(
path.join(workspaceDir, "memory", "2026-01-12.md"), path.join(workspaceDir, "memory", "2026-01-12.md"),
"# Log\nAlpha memory line.\nAnother line.", "# Log\nAlpha memory line.\nZebra memory line.\nAnother line.",
); );
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Beta knowledge base entry."); await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Beta knowledge base entry.");
}); });
@@ -146,6 +152,35 @@ describe("memory index", () => {
expect(results.length).toBeGreaterThan(0); expect(results.length).toBeGreaterThan(0);
}); });
it("reuses cached embeddings on forced reindex", async () => {
const cfg = {
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { path: indexPath, vector: { enabled: false } },
sync: { watch: false, onSessionStart: false, onSearch: false },
query: { minScore: 0 },
cache: { enabled: true },
},
},
list: [{ id: "main", default: true }],
},
};
const result = await getMemorySearchManager({ cfg, agentId: "main" });
expect(result.manager).not.toBeNull();
if (!result.manager) throw new Error("manager missing");
manager = result.manager;
await manager.sync({ force: true });
const afterFirst = embedBatchCalls;
expect(afterFirst).toBeGreaterThan(0);
await manager.sync({ force: true });
expect(embedBatchCalls).toBe(afterFirst);
});
it("reports vector availability after probe", async () => { it("reports vector availability after probe", async () => {
const cfg = { const cfg = {
agents: { agents: {

View File

@@ -47,6 +47,7 @@ export type MemorySearchResult = {
type MemoryIndexMeta = { type MemoryIndexMeta = {
model: string; model: string;
provider: string; provider: string;
providerKey?: string;
chunkTokens: number; chunkTokens: number;
chunkOverlap: number; chunkOverlap: number;
vectorDims?: number; vectorDims?: number;
@@ -106,6 +107,7 @@ type OpenAiBatchOutputLine = {
const META_KEY = "memory_index_meta_v1"; const META_KEY = "memory_index_meta_v1";
const SNIPPET_MAX_CHARS = 700; const SNIPPET_MAX_CHARS = 700;
const VECTOR_TABLE = "chunks_vec"; const VECTOR_TABLE = "chunks_vec";
const EMBEDDING_CACHE_TABLE = "embedding_cache";
const SESSION_DIRTY_DEBOUNCE_MS = 5000; const SESSION_DIRTY_DEBOUNCE_MS = 5000;
const EMBEDDING_BATCH_MAX_TOKENS = 8000; const EMBEDDING_BATCH_MAX_TOKENS = 8000;
const EMBEDDING_APPROX_CHARS_PER_TOKEN = 1; const EMBEDDING_APPROX_CHARS_PER_TOKEN = 1;
@@ -143,6 +145,8 @@ export class MemoryIndexManager {
}; };
private readonly db: DatabaseSync; private readonly db: DatabaseSync;
private readonly sources: Set<MemorySource>; private readonly sources: Set<MemorySource>;
private readonly providerKey: string;
private readonly cache: { enabled: boolean; maxEntries?: number };
private readonly vector: { private readonly vector: {
enabled: boolean; enabled: boolean;
available: boolean | null; available: boolean | null;
@@ -214,6 +218,11 @@ export class MemoryIndexManager {
this.openAi = params.providerResult.openAi; this.openAi = params.providerResult.openAi;
this.sources = new Set(params.settings.sources); this.sources = new Set(params.settings.sources);
this.db = this.openDatabase(); this.db = this.openDatabase();
this.providerKey = this.computeProviderKey();
this.cache = {
enabled: params.settings.cache.enabled,
maxEntries: params.settings.cache.maxEntries,
};
this.ensureSchema(); this.ensureSchema();
this.vector = { this.vector = {
enabled: params.settings.store.vector.enabled, enabled: params.settings.store.vector.enabled,
@@ -266,19 +275,19 @@ export class MemoryIndexManager {
const minScore = opts?.minScore ?? this.settings.query.minScore; const minScore = opts?.minScore ?? this.settings.query.minScore;
const maxResults = opts?.maxResults ?? this.settings.query.maxResults; const maxResults = opts?.maxResults ?? this.settings.query.maxResults;
const queryVec = await this.provider.embedQuery(cleaned); const queryVec = await this.provider.embedQuery(cleaned);
if (queryVec.length === 0) return []; if (!queryVec.some((v) => v !== 0)) return [];
if (await this.ensureVectorReady(queryVec.length)) { if (await this.ensureVectorReady(queryVec.length)) {
const sourceFilter = this.buildSourceFilter("c"); const sourceFilter = this.buildSourceFilter("c");
const rows = this.db const rows = this.db
.prepare( .prepare(
`SELECT c.path, c.start_line, c.end_line, c.text, `SELECT c.path, c.start_line, c.end_line, c.text,\n` +
c.source, ` c.source,\n` +
vec_distance_cosine(v.embedding, ?) AS dist ` vec_distance_cosine(v.embedding, ?) AS dist\n` +
FROM ${VECTOR_TABLE} v ` FROM ${VECTOR_TABLE} v\n` +
JOIN chunks c ON c.id = v.id ` JOIN chunks c ON c.id = v.id\n` +
WHERE c.model = ?${sourceFilter.sql} ` WHERE c.model = ?${sourceFilter.sql}\n` +
ORDER BY dist ASC ` ORDER BY dist ASC\n` +
LIMIT ?`, ` LIMIT ?`,
) )
.all( .all(
vectorToBlob(queryVec), vectorToBlob(queryVec),
@@ -372,6 +381,7 @@ export class MemoryIndexManager {
requestedProvider: string; requestedProvider: string;
sources: MemorySource[]; sources: MemorySource[];
sourceCounts: Array<{ source: MemorySource; files: number; chunks: number }>; sourceCounts: Array<{ source: MemorySource; files: number; chunks: number }>;
cache?: { enabled: boolean; entries?: number; maxEntries?: number };
fallback?: { from: string; reason?: string }; fallback?: { from: string; reason?: string };
vector?: { vector?: {
enabled: boolean; enabled: boolean;
@@ -432,6 +442,16 @@ export class MemoryIndexManager {
requestedProvider: this.requestedProvider, requestedProvider: this.requestedProvider,
sources: Array.from(this.sources), sources: Array.from(this.sources),
sourceCounts, sourceCounts,
cache: this.cache.enabled
? {
enabled: true,
entries:
(this.db
.prepare(`SELECT COUNT(*) as c FROM ${EMBEDDING_CACHE_TABLE}`)
.get() as { c: number } | undefined)?.c ?? 0,
maxEntries: this.cache.maxEntries,
}
: { enabled: false, maxEntries: this.cache.maxEntries },
fallback: this.fallbackReason ? { from: "local", reason: this.fallbackReason } : undefined, fallback: this.fallbackReason ? { from: "local", reason: this.fallbackReason } : undefined,
vector: { vector: {
enabled: this.vector.enabled, enabled: this.vector.enabled,
@@ -603,6 +623,21 @@ export class MemoryIndexManager {
updated_at INTEGER NOT NULL updated_at INTEGER NOT NULL
); );
`); `);
this.db.exec(`
CREATE TABLE IF NOT EXISTS ${EMBEDDING_CACHE_TABLE} (
provider TEXT NOT NULL,
model TEXT NOT NULL,
provider_key TEXT NOT NULL,
hash TEXT NOT NULL,
embedding TEXT NOT NULL,
dims INTEGER,
updated_at INTEGER NOT NULL,
PRIMARY KEY (provider, model, provider_key, hash)
);
`);
this.db.exec(
`CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at ON ${EMBEDDING_CACHE_TABLE}(updated_at);`,
);
this.ensureColumn("files", "source", "TEXT NOT NULL DEFAULT 'memory'"); this.ensureColumn("files", "source", "TEXT NOT NULL DEFAULT 'memory'");
this.ensureColumn("chunks", "source", "TEXT NOT NULL DEFAULT 'memory'"); this.ensureColumn("chunks", "source", "TEXT NOT NULL DEFAULT 'memory'");
this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`); this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`);
@@ -681,6 +716,7 @@ export class MemoryIndexManager {
} }
private listChunks(): Array<{ private listChunks(): Array<{
id: string;
path: string; path: string;
startLine: number; startLine: number;
endLine: number; endLine: number;
@@ -691,11 +727,12 @@ export class MemoryIndexManager {
const sourceFilter = this.buildSourceFilter(); const sourceFilter = this.buildSourceFilter();
const rows = this.db const rows = this.db
.prepare( .prepare(
`SELECT path, start_line, end_line, text, embedding, source `SELECT id, path, start_line, end_line, text, embedding, source
FROM chunks FROM chunks
WHERE model = ?${sourceFilter.sql}`, WHERE model = ?${sourceFilter.sql}`,
) )
.all(this.provider.model, ...sourceFilter.params) as Array<{ .all(this.provider.model, ...sourceFilter.params) as Array<{
id: string;
path: string; path: string;
start_line: number; start_line: number;
end_line: number; end_line: number;
@@ -704,6 +741,7 @@ export class MemoryIndexManager {
source: MemorySource; source: MemorySource;
}>; }>;
return rows.map((row) => ({ return rows.map((row) => ({
id: row.id,
path: row.path, path: row.path,
startLine: row.start_line, startLine: row.start_line,
endLine: row.end_line, endLine: row.end_line,
@@ -779,6 +817,13 @@ export class MemoryIndexManager {
for (const stale of staleRows) { for (const stale of staleRows) {
if (activePaths.has(stale.path)) continue; if (activePaths.has(stale.path)) continue;
this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(stale.path, "memory"); this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(stale.path, "memory");
try {
this.db
.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
.run(stale.path, "memory");
} catch {}
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(stale.path, "memory"); this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(stale.path, "memory");
} }
} }
@@ -860,6 +905,13 @@ export class MemoryIndexManager {
this.db this.db
.prepare(`DELETE FROM files WHERE path = ? AND source = ?`) .prepare(`DELETE FROM files WHERE path = ? AND source = ?`)
.run(stale.path, "sessions"); .run(stale.path, "sessions");
try {
this.db
.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
.run(stale.path, "sessions");
} catch {}
this.db this.db
.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`) .prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
.run(stale.path, "sessions"); .run(stale.path, "sessions");
@@ -902,6 +954,7 @@ export class MemoryIndexManager {
!meta || !meta ||
meta.model !== this.provider.model || meta.model !== this.provider.model ||
meta.provider !== this.provider.id || meta.provider !== this.provider.id ||
meta.providerKey !== this.providerKey ||
meta.chunkTokens !== this.settings.chunking.tokens || meta.chunkTokens !== this.settings.chunking.tokens ||
meta.chunkOverlap !== this.settings.chunking.overlap || meta.chunkOverlap !== this.settings.chunking.overlap ||
(vectorReady && !meta?.vectorDims); (vectorReady && !meta?.vectorDims);
@@ -929,6 +982,7 @@ export class MemoryIndexManager {
const nextMeta: MemoryIndexMeta = { const nextMeta: MemoryIndexMeta = {
model: this.provider.model, model: this.provider.model,
provider: this.provider.id, provider: this.provider.id,
providerKey: this.providerKey,
chunkTokens: this.settings.chunking.tokens, chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap, chunkOverlap: this.settings.chunking.overlap,
}; };
@@ -938,6 +992,9 @@ export class MemoryIndexManager {
if (shouldSyncMemory || shouldSyncSessions || needsFullReindex) { if (shouldSyncMemory || shouldSyncSessions || needsFullReindex) {
this.writeMeta(nextMeta); this.writeMeta(nextMeta);
} }
if (shouldSyncMemory || shouldSyncSessions || needsFullReindex) {
this.pruneEmbeddingCacheIfNeeded();
}
} }
private resetIndex() { private resetIndex() {
@@ -1091,16 +1148,121 @@ export class MemoryIndexManager {
return batches; return batches;
} }
private loadEmbeddingCache(hashes: string[]): Map<string, number[]> {
if (!this.cache.enabled) return new Map();
if (hashes.length === 0) return new Map();
const unique: string[] = [];
const seen = new Set<string>();
for (const hash of hashes) {
if (!hash) continue;
if (seen.has(hash)) continue;
seen.add(hash);
unique.push(hash);
}
if (unique.length === 0) return new Map();
const out = new Map<string, number[]>();
const baseParams = [this.provider.id, this.provider.model, this.providerKey];
const batchSize = 400;
for (let start = 0; start < unique.length; start += batchSize) {
const batch = unique.slice(start, start + batchSize);
const placeholders = batch.map(() => "?").join(", ");
const rows = this.db
.prepare(
`SELECT hash, embedding FROM ${EMBEDDING_CACHE_TABLE}\n` +
` WHERE provider = ? AND model = ? AND provider_key = ? AND hash IN (${placeholders})`,
)
.all(...baseParams, ...batch) as Array<{ hash: string; embedding: string }>;
for (const row of rows) {
out.set(row.hash, parseEmbedding(row.embedding));
}
}
return out;
}
private upsertEmbeddingCache(entries: Array<{ hash: string; embedding: number[] }>): void {
if (!this.cache.enabled) return;
if (entries.length === 0) return;
const now = Date.now();
const stmt = this.db.prepare(
`INSERT INTO ${EMBEDDING_CACHE_TABLE} (provider, model, provider_key, hash, embedding, dims, updated_at)\n` +
` VALUES (?, ?, ?, ?, ?, ?, ?)\n` +
` ON CONFLICT(provider, model, provider_key, hash) DO UPDATE SET\n` +
` embedding=excluded.embedding,\n` +
` dims=excluded.dims,\n` +
` updated_at=excluded.updated_at`,
);
for (const entry of entries) {
const embedding = entry.embedding ?? [];
stmt.run(
this.provider.id,
this.provider.model,
this.providerKey,
entry.hash,
JSON.stringify(embedding),
embedding.length,
now,
);
}
}
private pruneEmbeddingCacheIfNeeded(): void {
if (!this.cache.enabled) return;
const max = this.cache.maxEntries;
if (!max || max <= 0) return;
const row = this.db
.prepare(`SELECT COUNT(*) as c FROM ${EMBEDDING_CACHE_TABLE}`)
.get() as { c: number } | undefined;
const count = row?.c ?? 0;
if (count <= max) return;
const excess = count - max;
this.db
.prepare(
`DELETE FROM ${EMBEDDING_CACHE_TABLE}\n` +
` WHERE rowid IN (\n` +
` SELECT rowid FROM ${EMBEDDING_CACHE_TABLE}\n` +
` ORDER BY updated_at ASC\n` +
` LIMIT ?\n` +
` )`,
)
.run(excess);
}
private async embedChunksInBatches(chunks: MemoryChunk[]): Promise<number[][]> { private async embedChunksInBatches(chunks: MemoryChunk[]): Promise<number[][]> {
if (chunks.length === 0) return []; if (chunks.length === 0) return [];
const batches = this.buildEmbeddingBatches(chunks); const cached = this.loadEmbeddingCache(chunks.map((chunk) => chunk.hash));
const embeddings: number[][] = []; const embeddings: number[][] = Array.from({ length: chunks.length }, () => []);
const missing: Array<{ index: number; chunk: MemoryChunk }> = [];
for (let i = 0; i < chunks.length; i += 1) {
const chunk = chunks[i];
const hit = chunk?.hash ? cached.get(chunk.hash) : undefined;
if (hit && hit.length > 0) {
embeddings[i] = hit;
} else if (chunk) {
missing.push({ index: i, chunk });
}
}
if (missing.length === 0) return embeddings;
const missingChunks = missing.map((m) => m.chunk);
const batches = this.buildEmbeddingBatches(missingChunks);
const toCache: Array<{ hash: string; embedding: number[] }> = [];
let cursor = 0;
for (const batch of batches) { for (const batch of batches) {
const batchEmbeddings = await this.embedBatchWithRetry(batch.map((chunk) => chunk.text)); const batchEmbeddings = await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
for (let i = 0; i < batch.length; i += 1) { for (let i = 0; i < batch.length; i += 1) {
embeddings.push(batchEmbeddings[i] ?? []); const item = missing[cursor + i];
const embedding = batchEmbeddings[i] ?? [];
if (item) {
embeddings[item.index] = embedding;
toCache.push({ hash: item.chunk.hash, embedding });
}
} }
cursor += batch.length;
} }
this.upsertEmbeddingCache(toCache);
return embeddings; return embeddings;
} }
@@ -1121,6 +1283,24 @@ export class MemoryIndexManager {
return headers; return headers;
} }
private computeProviderKey(): string {
if (this.provider.id === "openai" && this.openAi) {
const entries = Object.entries(this.openAi.headers)
.filter(([key]) => key.toLowerCase() !== "authorization")
.sort(([a], [b]) => a.localeCompare(b))
.map(([key, value]) => [key, value]);
return hashText(
JSON.stringify({
provider: "openai",
baseUrl: this.openAi.baseUrl,
model: this.openAi.model,
headers: entries,
}),
);
}
return hashText(JSON.stringify({ provider: this.provider.id, model: this.provider.model }));
}
private buildOpenAiBatchRequests( private buildOpenAiBatchRequests(
chunks: MemoryChunk[], chunks: MemoryChunk[],
entry: MemoryFileEntry | SessionFileEntry, entry: MemoryFileEntry | SessionFileEntry,
@@ -1300,8 +1480,40 @@ export class MemoryIndexManager {
return this.embedChunksInBatches(chunks); return this.embedChunksInBatches(chunks);
} }
if (chunks.length === 0) return []; if (chunks.length === 0) return [];
const cached = this.loadEmbeddingCache(chunks.map((chunk) => chunk.hash));
const embeddings: number[][] = Array.from({ length: chunks.length }, () => []);
const missing: Array<{ index: number; chunk: MemoryChunk }> = [];
const { requests, mapping } = this.buildOpenAiBatchRequests(chunks, entry, source); for (let i = 0; i < chunks.length; i += 1) {
const chunk = chunks[i];
const hit = chunk?.hash ? cached.get(chunk.hash) : undefined;
if (hit && hit.length > 0) {
embeddings[i] = hit;
} else if (chunk) {
missing.push({ index: i, chunk });
}
}
if (missing.length === 0) return embeddings;
const requests: OpenAiBatchRequest[] = [];
const mapping = new Map<string, number>();
for (const item of missing) {
const chunk = item.chunk;
const customId = hashText(
`${source}:${entry.path}:${chunk.startLine}:${chunk.endLine}:${chunk.hash}:${item.index}`,
);
mapping.set(customId, item.index);
requests.push({
custom_id: customId,
method: "POST",
url: OPENAI_BATCH_ENDPOINT,
body: {
model: this.openAi?.model ?? this.provider.model,
input: chunk.text,
},
});
}
const groups = this.splitOpenAiBatchRequests(requests); const groups = this.splitOpenAiBatchRequests(requests);
log.debug("memory embeddings: openai batch submit", { log.debug("memory embeddings: openai batch submit", {
source, source,
@@ -1313,7 +1525,7 @@ export class MemoryIndexManager {
pollIntervalMs: this.batch.pollIntervalMs, pollIntervalMs: this.batch.pollIntervalMs,
timeoutMs: this.batch.timeoutMs, timeoutMs: this.batch.timeoutMs,
}); });
const embeddings: number[][] = Array.from({ length: chunks.length }, () => []); const toCache: Array<{ hash: string; embedding: number[] }> = [];
const tasks = groups.map((group, groupIndex) => async () => { const tasks = groups.map((group, groupIndex) => async () => {
const batchInfo = await this.submitOpenAiBatch(group); const batchInfo = await this.submitOpenAiBatch(group);
@@ -1373,6 +1585,8 @@ export class MemoryIndexManager {
continue; continue;
} }
embeddings[index] = embedding; embeddings[index] = embedding;
const chunk = chunks[index];
if (chunk) toCache.push({ hash: chunk.hash, embedding });
} }
if (errors.length > 0) { if (errors.length > 0) {
throw new Error(`openai batch ${batchInfo.id} failed: ${errors.join("; ")}`); throw new Error(`openai batch ${batchInfo.id} failed: ${errors.join("; ")}`);
@@ -1385,6 +1599,7 @@ export class MemoryIndexManager {
}); });
await this.runWithConcurrency(tasks, this.batch.concurrency); await this.runWithConcurrency(tasks, this.batch.concurrency);
this.upsertEmbeddingCache(toCache);
return embeddings; return embeddings;
} }
@@ -1463,9 +1678,16 @@ export class MemoryIndexManager {
const sample = embeddings.find((embedding) => embedding.length > 0); const sample = embeddings.find((embedding) => embedding.length > 0);
const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false; const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
const now = Date.now(); const now = Date.now();
this.db if (vectorReady) {
.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`) try {
.run(entry.path, options.source); this.db
.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
.run(entry.path, options.source);
} catch {}
}
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(entry.path, options.source);
for (let i = 0; i < chunks.length; i++) { for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i]; const chunk = chunks[i];
const embedding = embeddings[i] ?? []; const embedding = embeddings[i] ?? [];