From 0fb2777c6de919673b62dad9208dcb47784895b9 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sun, 18 Jan 2026 01:35:58 +0000
Subject: [PATCH] feat: add memory embedding cache

---
 CHANGELOG.md                           |   6 +
 docs/concepts/memory.md                |  25 ++-
 src/agents/memory-search.ts            |  16 ++
 src/cli/memory-cli.ts                  |  16 +-
 src/config/schema.ts                   |   8 +-
 src/config/types.tools.ts              |  23 +++
 src/config/zod-schema.agent-runtime.ts |   6 +
 src/memory/index.test.ts               |  41 +++-
 src/memory/manager.ts                  | 258 +++++++++++++++++++++++--
 9 files changed, 372 insertions(+), 27 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fbc4fe595..c8242d551 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,12 @@
 
 Docs: https://docs.clawd.bot
 
+## 2026.1.18-2
+
+### Changes
+- Memory: add SQLite embedding cache to speed up reindexing and frequent updates.
+- CLI: surface embedding cache state in `clawdbot memory status`.
+
 ## 2026.1.18-1
 
 ### Changes
diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md
index 11b87ba60..384c1aed5 100644
--- a/docs/concepts/memory.md
+++ b/docs/concepts/memory.md
@@ -157,9 +157,28 @@ Local mode:
 ### What gets indexed (and when)
 
 - File type: Markdown only (`MEMORY.md`, `memory/**/*.md`).
-- Index storage: per-agent SQLite at `~/.clawdbot/state/memory/<agentId>.sqlite` (configurable via `agents.defaults.memorySearch.store.path`, supports `{agentId}` token).
-- Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync runs on session start, on first search when dirty, and optionally on an interval. Reindex triggers when embedding model/provider or chunk sizes change.
-- Model changes: the index stores the embedding **model + provider + chunking params**. If any of those change, Clawdbot automatically resets and reindexes the entire store.
+- Index storage: per-agent SQLite at `~/.clawdbot/memory/<agentId>.sqlite` (configurable via `agents.defaults.memorySearch.store.path`, supports `{agentId}` token).
+- Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync runs on session start, on first search when dirty, and optionally on an interval.
+- Reindex triggers: the index stores the embedding **provider/model + endpoint fingerprint + chunking params**. If any of those change, Clawdbot automatically resets and reindexes the entire store.
+
+### Embedding cache
+
+Clawdbot can cache **chunk embeddings** in SQLite so reindexing and frequent updates (especially session transcripts) don't re-embed unchanged text.
+
+Config:
+
+```json5
+agents: {
+  defaults: {
+    memorySearch: {
+      cache: {
+        enabled: true,
+        maxEntries: 50000
+      }
+    }
+  }
+}
+```
 
 ### Session memory search (experimental)
 
diff --git a/src/agents/memory-search.ts b/src/agents/memory-search.ts
index 8599240ec..673441dea 100644
--- a/src/agents/memory-search.ts
+++ b/src/agents/memory-search.ts
@@ -54,6 +54,10 @@ export type ResolvedMemorySearchConfig = {
     maxResults: number;
     minScore: number;
   };
+  cache: {
+    enabled: boolean;
+    maxEntries?: number;
+  };
 };
 
 const DEFAULT_MODEL = "text-embedding-3-small";
@@ -62,6 +66,7 @@ const DEFAULT_CHUNK_OVERLAP = 80;
 const DEFAULT_WATCH_DEBOUNCE_MS = 1500;
 const DEFAULT_MAX_RESULTS = 6;
 const DEFAULT_MIN_SCORE = 0.35;
+const DEFAULT_CACHE_ENABLED = true;
 const DEFAULT_SOURCES: Array<"memory" | "sessions"> = ["memory"];
 
 function normalizeSources(
@@ -152,6 +157,10 @@ function mergeConfig(
     maxResults: overrides?.query?.maxResults ?? defaults?.query?.maxResults ?? DEFAULT_MAX_RESULTS,
     minScore: overrides?.query?.minScore ?? defaults?.query?.minScore ?? DEFAULT_MIN_SCORE,
   };
+  const cache = {
+    enabled: overrides?.cache?.enabled ?? defaults?.cache?.enabled ?? DEFAULT_CACHE_ENABLED,
+    maxEntries: overrides?.cache?.maxEntries ?? defaults?.cache?.maxEntries,
+  };
 
   const overlap = Math.max(0, Math.min(chunking.overlap, chunking.tokens - 1));
   const minScore = Math.max(0, Math.min(1, query.minScore));
@@ -170,6 +179,13 @@ function mergeConfig(
     chunking: { tokens: Math.max(1, chunking.tokens), overlap },
     sync,
     query: { ...query, minScore },
+    cache: {
+      enabled: Boolean(cache.enabled),
+      maxEntries:
+        typeof cache.maxEntries === "number" && Number.isFinite(cache.maxEntries)
+          ? Math.max(1, Math.floor(cache.maxEntries))
+          : undefined,
+    },
   };
 }
 
diff --git a/src/cli/memory-cli.ts b/src/cli/memory-cli.ts
index 79a8275f2..26c7a901a 100644
--- a/src/cli/memory-cli.ts
+++ b/src/cli/memory-cli.ts
@@ -178,8 +178,20 @@ export function registerMemoryCli(program: Command) {
             if (status.vector.extensionPath) {
               lines.push(`${label("Vector path")} ${info(status.vector.extensionPath)}`);
             }
-            if (status.vector.loadError) {
-              lines.push(`${label("Vector error")} ${warn(status.vector.loadError)}`);
+          if (status.vector.loadError) {
+            lines.push(`${label("Vector error")} ${warn(status.vector.loadError)}`);
+          }
+        }
+        if (status.cache) {
+          const cacheState = status.cache.enabled ? "enabled" : "disabled";
+          const cacheColor = status.cache.enabled ? theme.success : theme.muted;
+          const suffix =
+              status.cache.enabled && typeof status.cache.entries === "number"
+                ? ` (${status.cache.entries} entries)`
+                : "";
+            lines.push(`${label("Embedding cache")} ${colorize(rich, cacheColor, cacheState)}${suffix}`);
+            if (status.cache.enabled && typeof status.cache.maxEntries === "number") {
+              lines.push(`${label("Cache cap")} ${info(String(status.cache.maxEntries))}`);
             }
           }
           if (status.fallback?.reason) {
diff --git a/src/config/schema.ts b/src/config/schema.ts
index c950fee29..999dcbbb2 100644
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -190,6 +190,8 @@ const FIELD_LABELS: Record<string, string> = {
   "agents.defaults.memorySearch.sync.watchDebounceMs": "Memory Watch Debounce (ms)",
   "agents.defaults.memorySearch.query.maxResults": "Memory Search Max Results",
   "agents.defaults.memorySearch.query.minScore": "Memory Search Min Score",
+  "agents.defaults.memorySearch.cache.enabled": "Memory Search Embedding Cache",
+  "agents.defaults.memorySearch.cache.maxEntries": "Memory Search Embedding Cache Max Entries",
   "auth.profiles": "Auth Profiles",
   "auth.order": "Auth Profile Order",
   "auth.cooldowns.billingBackoffHours": "Billing Backoff (hours)",
@@ -382,11 +384,15 @@ const FIELD_HELP: Record<string, string> = {
   "agents.defaults.memorySearch.fallback":
     'Fallback to OpenAI when local embeddings fail ("openai" or "none").',
   "agents.defaults.memorySearch.store.path":
-    "SQLite index path (default: ~/.clawdbot/state/memory/{agentId}.sqlite).",
+    "SQLite index path (default: ~/.clawdbot/memory/{agentId}.sqlite).",
   "agents.defaults.memorySearch.store.vector.enabled":
     "Enable sqlite-vec extension for vector search (default: true).",
   "agents.defaults.memorySearch.store.vector.extensionPath":
     "Optional override path to sqlite-vec extension library (.dylib/.so/.dll).",
+  "agents.defaults.memorySearch.cache.enabled":
+    "Cache chunk embeddings in SQLite to speed up reindexing and frequent updates (default: true).",
+  "agents.defaults.memorySearch.cache.maxEntries":
+    "Optional cap on cached embeddings (best-effort).",
   "agents.defaults.memorySearch.sync.onSearch":
     "Lazy sync: reindex on first search after a change.",
   "agents.defaults.memorySearch.sync.watch": "Watch memory files for changes (chokidar).",
diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts
index 2cff62919..55bb26dd7 100644
--- a/src/config/types.tools.ts
+++ b/src/config/types.tools.ts
@@ -192,6 +192,12 @@ export type MemorySearchConfig = {
       /** Optional override path to sqlite-vec extension (.dylib/.so/.dll). */
       extensionPath?: string;
     };
+    cache?: {
+      /** Enable embedding cache (default: true). */
+      enabled?: boolean;
+      /** Optional max cache entries per provider/model. */
+      maxEntries?: number;
+    };
   };
   /** Chunking configuration. */
   chunking?: {
@@ -210,6 +216,23 @@ export type MemorySearchConfig = {
   query?: {
     maxResults?: number;
     minScore?: number;
+    hybrid?: {
+      /** Enable hybrid BM25 + vector search (default: true). */
+      enabled?: boolean;
+      /** Weight for vector similarity when merging results (0-1). */
+      vectorWeight?: number;
+      /** Weight for BM25 text relevance when merging results (0-1). */
+      textWeight?: number;
+      /** Multiplier for candidate pool size (default: 4). */
+      candidateMultiplier?: number;
+    };
+  };
+  /** Index cache behavior. */
+  cache?: {
+    /** Cache chunk embeddings in SQLite (default: true). */
+    enabled?: boolean;
+    /** Optional cap on cached embeddings (best-effort). */
+    maxEntries?: number;
   };
 };
 
diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts
index 2e65a12d9..d028adff1 100644
--- a/src/config/zod-schema.agent-runtime.ts
+++ b/src/config/zod-schema.agent-runtime.ts
@@ -258,6 +258,12 @@ export const MemorySearchSchema = z
         minScore: z.number().min(0).max(1).optional(),
       })
       .optional(),
+    cache: z
+      .object({
+        enabled: z.boolean().optional(),
+        maxEntries: z.number().int().positive().optional(),
+      })
+      .optional(),
   })
   .optional();
 export const AgentModelSchema = z.union([
diff --git a/src/memory/index.test.ts b/src/memory/index.test.ts
index 9cea20808..38ed7225d 100644
--- a/src/memory/index.test.ts
+++ b/src/memory/index.test.ts
@@ -6,12 +6,14 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 
 import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
 
+let embedBatchCalls = 0;
+
 vi.mock("./embeddings.js", () => {
   const embedText = (text: string) => {
     const lower = text.toLowerCase();
     const alpha = lower.split("alpha").length - 1;
     const beta = lower.split("beta").length - 1;
-    return [alpha, beta, 1];
+    return [alpha, beta];
   };
   return {
     createEmbeddingProvider: async (options: { model?: string }) => ({
@@ -20,7 +22,10 @@ vi.mock("./embeddings.js", () => {
         id: "mock",
         model: options.model ?? "mock-embed",
         embedQuery: async (text: string) => embedText(text),
-        embedBatch: async (texts: string[]) => texts.map(embedText),
+        embedBatch: async (texts: string[]) => {
+          embedBatchCalls += 1;
+          return texts.map(embedText);
+        },
       },
     }),
   };
@@ -32,12 +37,13 @@ describe("memory index", () => {
   let manager: MemoryIndexManager | null = null;
 
   beforeEach(async () => {
+    embedBatchCalls = 0;
     workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-mem-"));
     indexPath = path.join(workspaceDir, "index.sqlite");
     await fs.mkdir(path.join(workspaceDir, "memory"));
     await fs.writeFile(
       path.join(workspaceDir, "memory", "2026-01-12.md"),
-      "# Log\nAlpha memory line.\nAnother line.",
+      "# Log\nAlpha memory line.\nZebra memory line.\nAnother line.",
     );
     await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Beta knowledge base entry.");
   });
@@ -146,6 +152,35 @@ describe("memory index", () => {
     expect(results.length).toBeGreaterThan(0);
   });
 
+  it("reuses cached embeddings on forced reindex", async () => {
+    const cfg = {
+      agents: {
+        defaults: {
+          workspace: workspaceDir,
+          memorySearch: {
+            provider: "openai",
+            model: "mock-embed",
+            store: { path: indexPath, vector: { enabled: false } },
+            sync: { watch: false, onSessionStart: false, onSearch: false },
+            query: { minScore: 0 },
+            cache: { enabled: true },
+          },
+        },
+        list: [{ id: "main", default: true }],
+      },
+    };
+    const result = await getMemorySearchManager({ cfg, agentId: "main" });
+    expect(result.manager).not.toBeNull();
+    if (!result.manager) throw new Error("manager missing");
+    manager = result.manager;
+    await manager.sync({ force: true });
+    const afterFirst = embedBatchCalls;
+    expect(afterFirst).toBeGreaterThan(0);
+
+    await manager.sync({ force: true });
+    expect(embedBatchCalls).toBe(afterFirst);
+  });
+
   it("reports vector availability after probe", async () => {
     const cfg = {
       agents: {
diff --git a/src/memory/manager.ts b/src/memory/manager.ts
index b14137c52..91a3096d5 100644
--- a/src/memory/manager.ts
+++ b/src/memory/manager.ts
@@ -47,6 +47,7 @@ export type MemorySearchResult = {
 type MemoryIndexMeta = {
   model: string;
   provider: string;
+  providerKey?: string;
   chunkTokens: number;
   chunkOverlap: number;
   vectorDims?: number;
@@ -106,6 +107,7 @@ type OpenAiBatchOutputLine = {
 const META_KEY = "memory_index_meta_v1";
 const SNIPPET_MAX_CHARS = 700;
 const VECTOR_TABLE = "chunks_vec";
+const EMBEDDING_CACHE_TABLE = "embedding_cache";
 const SESSION_DIRTY_DEBOUNCE_MS = 5000;
 const EMBEDDING_BATCH_MAX_TOKENS = 8000;
 const EMBEDDING_APPROX_CHARS_PER_TOKEN = 1;
@@ -143,6 +145,8 @@ export class MemoryIndexManager {
   };
   private readonly db: DatabaseSync;
   private readonly sources: Set<MemorySource>;
+  private readonly providerKey: string;
+  private readonly cache: { enabled: boolean; maxEntries?: number };
   private readonly vector: {
     enabled: boolean;
     available: boolean | null;
@@ -214,6 +218,11 @@ export class MemoryIndexManager {
     this.openAi = params.providerResult.openAi;
     this.sources = new Set(params.settings.sources);
     this.db = this.openDatabase();
+    this.providerKey = this.computeProviderKey();
+    this.cache = {
+      enabled: params.settings.cache.enabled,
+      maxEntries: params.settings.cache.maxEntries,
+    };
     this.ensureSchema();
     this.vector = {
       enabled: params.settings.store.vector.enabled,
@@ -266,19 +275,19 @@ export class MemoryIndexManager {
     const minScore = opts?.minScore ?? this.settings.query.minScore;
     const maxResults = opts?.maxResults ?? this.settings.query.maxResults;
     const queryVec = await this.provider.embedQuery(cleaned);
-    if (queryVec.length === 0) return [];
+    if (!queryVec.some((v) => v !== 0)) return [];
     if (await this.ensureVectorReady(queryVec.length)) {
       const sourceFilter = this.buildSourceFilter("c");
       const rows = this.db
         .prepare(
-          `SELECT c.path, c.start_line, c.end_line, c.text,
-                  c.source,
-                  vec_distance_cosine(v.embedding, ?) AS dist
-             FROM ${VECTOR_TABLE} v
-             JOIN chunks c ON c.id = v.id
-            WHERE c.model = ?${sourceFilter.sql}
-            ORDER BY dist ASC
-            LIMIT ?`,
+          `SELECT c.path, c.start_line, c.end_line, c.text,\n` +
+            `       c.source,\n` +
+            `       vec_distance_cosine(v.embedding, ?) AS dist\n` +
+            `  FROM ${VECTOR_TABLE} v\n` +
+            `  JOIN chunks c ON c.id = v.id\n` +
+            ` WHERE c.model = ?${sourceFilter.sql}\n` +
+            ` ORDER BY dist ASC\n` +
+            ` LIMIT ?`,
         )
         .all(
           vectorToBlob(queryVec),
@@ -372,6 +381,7 @@ export class MemoryIndexManager {
     requestedProvider: string;
     sources: MemorySource[];
     sourceCounts: Array<{ source: MemorySource; files: number; chunks: number }>;
+    cache?: { enabled: boolean; entries?: number; maxEntries?: number };
     fallback?: { from: string; reason?: string };
     vector?: {
       enabled: boolean;
@@ -432,6 +442,16 @@ export class MemoryIndexManager {
       requestedProvider: this.requestedProvider,
       sources: Array.from(this.sources),
       sourceCounts,
+      cache: this.cache.enabled
+        ? {
+            enabled: true,
+            entries:
+              (this.db
+                .prepare(`SELECT COUNT(*) as c FROM ${EMBEDDING_CACHE_TABLE}`)
+                .get() as { c: number } | undefined)?.c ?? 0,
+            maxEntries: this.cache.maxEntries,
+          }
+        : { enabled: false, maxEntries: this.cache.maxEntries },
       fallback: this.fallbackReason ? { from: "local", reason: this.fallbackReason } : undefined,
       vector: {
         enabled: this.vector.enabled,
@@ -603,6 +623,21 @@ export class MemoryIndexManager {
         updated_at INTEGER NOT NULL
       );
     `);
+    this.db.exec(`
+      CREATE TABLE IF NOT EXISTS ${EMBEDDING_CACHE_TABLE} (
+        provider TEXT NOT NULL,
+        model TEXT NOT NULL,
+        provider_key TEXT NOT NULL,
+        hash TEXT NOT NULL,
+        embedding TEXT NOT NULL,
+        dims INTEGER,
+        updated_at INTEGER NOT NULL,
+        PRIMARY KEY (provider, model, provider_key, hash)
+      );
+    `);
+    this.db.exec(
+      `CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at ON ${EMBEDDING_CACHE_TABLE}(updated_at);`,
+    );
     this.ensureColumn("files", "source", "TEXT NOT NULL DEFAULT 'memory'");
     this.ensureColumn("chunks", "source", "TEXT NOT NULL DEFAULT 'memory'");
     this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`);
@@ -681,6 +716,7 @@ export class MemoryIndexManager {
   }
 
   private listChunks(): Array<{
+    id: string;
     path: string;
     startLine: number;
     endLine: number;
@@ -691,11 +727,12 @@ export class MemoryIndexManager {
     const sourceFilter = this.buildSourceFilter();
     const rows = this.db
       .prepare(
-        `SELECT path, start_line, end_line, text, embedding, source
+        `SELECT id, path, start_line, end_line, text, embedding, source
            FROM chunks
           WHERE model = ?${sourceFilter.sql}`,
       )
       .all(this.provider.model, ...sourceFilter.params) as Array<{
+      id: string;
       path: string;
       start_line: number;
       end_line: number;
@@ -704,6 +741,7 @@ export class MemoryIndexManager {
       source: MemorySource;
     }>;
     return rows.map((row) => ({
+      id: row.id,
       path: row.path,
       startLine: row.start_line,
       endLine: row.end_line,
@@ -779,6 +817,13 @@ export class MemoryIndexManager {
     for (const stale of staleRows) {
       if (activePaths.has(stale.path)) continue;
       this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(stale.path, "memory");
+      try {
+        this.db
+          .prepare(
+            `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
+          )
+          .run(stale.path, "memory");
+      } catch {}
       this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(stale.path, "memory");
     }
   }
@@ -860,6 +905,13 @@ export class MemoryIndexManager {
       this.db
         .prepare(`DELETE FROM files WHERE path = ? AND source = ?`)
         .run(stale.path, "sessions");
+      try {
+        this.db
+          .prepare(
+            `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
+          )
+          .run(stale.path, "sessions");
+      } catch {}
       this.db
         .prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
         .run(stale.path, "sessions");
@@ -902,6 +954,7 @@ export class MemoryIndexManager {
       !meta ||
       meta.model !== this.provider.model ||
       meta.provider !== this.provider.id ||
+      meta.providerKey !== this.providerKey ||
       meta.chunkTokens !== this.settings.chunking.tokens ||
       meta.chunkOverlap !== this.settings.chunking.overlap ||
       (vectorReady && !meta?.vectorDims);
@@ -929,6 +982,7 @@ export class MemoryIndexManager {
     const nextMeta: MemoryIndexMeta = {
       model: this.provider.model,
       provider: this.provider.id,
+      providerKey: this.providerKey,
       chunkTokens: this.settings.chunking.tokens,
       chunkOverlap: this.settings.chunking.overlap,
     };
@@ -938,6 +992,9 @@ export class MemoryIndexManager {
     if (shouldSyncMemory || shouldSyncSessions || needsFullReindex) {
       this.writeMeta(nextMeta);
     }
+    if (shouldSyncMemory || shouldSyncSessions || needsFullReindex) {
+      this.pruneEmbeddingCacheIfNeeded();
+    }
   }
 
   private resetIndex() {
@@ -1091,16 +1148,121 @@ export class MemoryIndexManager {
     return batches;
   }
 
+  private loadEmbeddingCache(hashes: string[]): Map<string, number[]> {
+    if (!this.cache.enabled) return new Map();
+    if (hashes.length === 0) return new Map();
+    const unique: string[] = [];
+    const seen = new Set<string>();
+    for (const hash of hashes) {
+      if (!hash) continue;
+      if (seen.has(hash)) continue;
+      seen.add(hash);
+      unique.push(hash);
+    }
+    if (unique.length === 0) return new Map();
+
+    const out = new Map<string, number[]>();
+    const baseParams = [this.provider.id, this.provider.model, this.providerKey];
+    const batchSize = 400;
+    for (let start = 0; start < unique.length; start += batchSize) {
+      const batch = unique.slice(start, start + batchSize);
+      const placeholders = batch.map(() => "?").join(", ");
+      const rows = this.db
+        .prepare(
+          `SELECT hash, embedding FROM ${EMBEDDING_CACHE_TABLE}\n` +
+            ` WHERE provider = ? AND model = ? AND provider_key = ? AND hash IN (${placeholders})`,
+        )
+        .all(...baseParams, ...batch) as Array<{ hash: string; embedding: string }>;
+      for (const row of rows) {
+        out.set(row.hash, parseEmbedding(row.embedding));
+      }
+    }
+    return out;
+  }
+
+  private upsertEmbeddingCache(entries: Array<{ hash: string; embedding: number[] }>): void {
+    if (!this.cache.enabled) return;
+    if (entries.length === 0) return;
+    const now = Date.now();
+    const stmt = this.db.prepare(
+      `INSERT INTO ${EMBEDDING_CACHE_TABLE} (provider, model, provider_key, hash, embedding, dims, updated_at)\n` +
+        ` VALUES (?, ?, ?, ?, ?, ?, ?)\n` +
+        ` ON CONFLICT(provider, model, provider_key, hash) DO UPDATE SET\n` +
+        `   embedding=excluded.embedding,\n` +
+        `   dims=excluded.dims,\n` +
+        `   updated_at=excluded.updated_at`,
+    );
+    for (const entry of entries) {
+      const embedding = entry.embedding ?? [];
+      stmt.run(
+        this.provider.id,
+        this.provider.model,
+        this.providerKey,
+        entry.hash,
+        JSON.stringify(embedding),
+        embedding.length,
+        now,
+      );
+    }
+  }
+
+  private pruneEmbeddingCacheIfNeeded(): void {
+    if (!this.cache.enabled) return;
+    const max = this.cache.maxEntries;
+    if (!max || max <= 0) return;
+    const row = this.db
+      .prepare(`SELECT COUNT(*) as c FROM ${EMBEDDING_CACHE_TABLE}`)
+      .get() as { c: number } | undefined;
+    const count = row?.c ?? 0;
+    if (count <= max) return;
+    const excess = count - max;
+    this.db
+      .prepare(
+        `DELETE FROM ${EMBEDDING_CACHE_TABLE}\n` +
+          ` WHERE rowid IN (\n` +
+          `   SELECT rowid FROM ${EMBEDDING_CACHE_TABLE}\n` +
+          `   ORDER BY updated_at ASC\n` +
+          `   LIMIT ?\n` +
+          ` )`,
+      )
+      .run(excess);
+  }
+
   private async embedChunksInBatches(chunks: MemoryChunk[]): Promise<number[][]> {
     if (chunks.length === 0) return [];
-    const batches = this.buildEmbeddingBatches(chunks);
-    const embeddings: number[][] = [];
+    const cached = this.loadEmbeddingCache(chunks.map((chunk) => chunk.hash));
+    const embeddings: number[][] = Array.from({ length: chunks.length }, () => []);
+    const missing: Array<{ index: number; chunk: MemoryChunk }> = [];
+
+    for (let i = 0; i < chunks.length; i += 1) {
+      const chunk = chunks[i];
+      const hit = chunk?.hash ? cached.get(chunk.hash) : undefined;
+      if (hit && hit.length > 0) {
+        embeddings[i] = hit;
+      } else if (chunk) {
+        missing.push({ index: i, chunk });
+      }
+    }
+
+    if (missing.length === 0) return embeddings;
+
+    const missingChunks = missing.map((m) => m.chunk);
+    const batches = this.buildEmbeddingBatches(missingChunks);
+    const toCache: Array<{ hash: string; embedding: number[] }> = [];
+    let cursor = 0;
     for (const batch of batches) {
       const batchEmbeddings = await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
       for (let i = 0; i < batch.length; i += 1) {
-        embeddings.push(batchEmbeddings[i] ?? []);
+        const item = missing[cursor + i];
+        const embedding = batchEmbeddings[i] ?? [];
+        if (item) {
+          embeddings[item.index] = embedding;
+          toCache.push({ hash: item.chunk.hash, embedding });
+        }
       }
+      cursor += batch.length;
     }
+    this.upsertEmbeddingCache(toCache);
     return embeddings;
   }
 
@@ -1121,6 +1283,24 @@ export class MemoryIndexManager {
     return headers;
   }
 
+  private computeProviderKey(): string {
+    if (this.provider.id === "openai" && this.openAi) {
+      const entries = Object.entries(this.openAi.headers)
+        .filter(([key]) => key.toLowerCase() !== "authorization")
+        .sort(([a], [b]) => a.localeCompare(b))
+        .map(([key, value]) => [key, value]);
+      return hashText(
+        JSON.stringify({
+          provider: "openai",
+          baseUrl: this.openAi.baseUrl,
+          model: this.openAi.model,
+          headers: entries,
+        }),
+      );
+    }
+    return hashText(JSON.stringify({ provider: this.provider.id, model: this.provider.model }));
+  }
+
   private buildOpenAiBatchRequests(
     chunks: MemoryChunk[],
     entry: MemoryFileEntry | SessionFileEntry,
@@ -1300,8 +1480,40 @@ export class MemoryIndexManager {
       return this.embedChunksInBatches(chunks);
     }
     if (chunks.length === 0) return [];
+    const cached = this.loadEmbeddingCache(chunks.map((chunk) => chunk.hash));
+    const embeddings: number[][] = Array.from({ length: chunks.length }, () => []);
+    const missing: Array<{ index: number; chunk: MemoryChunk }> = [];
 
-    const { requests, mapping } = this.buildOpenAiBatchRequests(chunks, entry, source);
+    for (let i = 0; i < chunks.length; i += 1) {
+      const chunk = chunks[i];
+      const hit = chunk?.hash ? cached.get(chunk.hash) : undefined;
+      if (hit && hit.length > 0) {
+        embeddings[i] = hit;
+      } else if (chunk) {
+        missing.push({ index: i, chunk });
+      }
+    }
+
+    if (missing.length === 0) return embeddings;
+
+    const requests: OpenAiBatchRequest[] = [];
+    const mapping = new Map<string, number>();
+    for (const item of missing) {
+      const chunk = item.chunk;
+      const customId = hashText(
+        `${source}:${entry.path}:${chunk.startLine}:${chunk.endLine}:${chunk.hash}:${item.index}`,
+      );
+      mapping.set(customId, item.index);
+      requests.push({
+        custom_id: customId,
+        method: "POST",
+        url: OPENAI_BATCH_ENDPOINT,
+        body: {
+          model: this.openAi?.model ?? this.provider.model,
+          input: chunk.text,
+        },
+      });
+    }
     const groups = this.splitOpenAiBatchRequests(requests);
     log.debug("memory embeddings: openai batch submit", {
       source,
@@ -1313,7 +1525,7 @@ export class MemoryIndexManager {
       pollIntervalMs: this.batch.pollIntervalMs,
       timeoutMs: this.batch.timeoutMs,
     });
-    const embeddings: number[][] = Array.from({ length: chunks.length }, () => []);
+    const toCache: Array<{ hash: string; embedding: number[] }> = [];
 
     const tasks = groups.map((group, groupIndex) => async () => {
       const batchInfo = await this.submitOpenAiBatch(group);
@@ -1373,6 +1585,8 @@ export class MemoryIndexManager {
           continue;
         }
         embeddings[index] = embedding;
+        const chunk = chunks[index];
+        if (chunk) toCache.push({ hash: chunk.hash, embedding });
       }
       if (errors.length > 0) {
         throw new Error(`openai batch ${batchInfo.id} failed: ${errors.join("; ")}`);
@@ -1385,6 +1599,7 @@ export class MemoryIndexManager {
     });
     await this.runWithConcurrency(tasks, this.batch.concurrency);
 
+    this.upsertEmbeddingCache(toCache);
     return embeddings;
   }
 
@@ -1463,9 +1678,16 @@ export class MemoryIndexManager {
     const sample = embeddings.find((embedding) => embedding.length > 0);
     const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
     const now = Date.now();
-    this.db
-      .prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
-      .run(entry.path, options.source);
+    if (vectorReady) {
+      try {
+        this.db
+          .prepare(
+            `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
+          )
+          .run(entry.path, options.source);
+      } catch {}
+    }
+    this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(entry.path, options.source);
     for (let i = 0; i < chunks.length; i++) {
       const chunk = chunks[i];
       const embedding = embeddings[i] ?? [];