feat: add memory embedding cache

2026-01-18 01:35:58 +00:00
parent 568b8ee96c
commit 0fb2777c6d
9 changed files with 372 additions and 27 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,12 @@
 Docs: https://docs.clawd.bot
 ## 2026.1.18-2
 ### Changes
 - Memory: add SQLite embedding cache to speed up reindexing and frequent updates.
 - CLI: surface embedding cache state in `clawdbot memory status`.
 ## 2026.1.18-1
 ### Changes
--- a/docs/concepts/memory.md
+++ b/docs/concepts/memory.md
@@ -157,9 +157,28 @@ Local mode:
 ### What gets indexed (and when)
 - File type: Markdown only (`MEMORY.md`, `memory/**/*.md`).
- Index storage: per-agent SQLite at `~/.clawdbot/state/memory/<agentId>.sqlite` (configurable via `agents.defaults.memorySearch.store.path`, supports `{agentId}` token).
+- Index storage: per-agent SQLite at `~/.clawdbot/memory/<agentId>.sqlite` (configurable via `agents.defaults.memorySearch.store.path`, supports `{agentId}` token).
- Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync runs on session start, on first search when dirty, and optionally on an interval. Reindex triggers when embedding model/provider or chunk sizes change.
+- Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync runs on session start, on first search when dirty, and optionally on an interval.
- Model changes: the index stores the embedding **model + provider + chunking params**. If any of those change, Clawdbot automatically resets and reindexes the entire store.
+- Reindex triggers: the index stores the embedding **provider/model + endpoint fingerprint + chunking params**. If any of those change, Clawdbot automatically resets and reindexes the entire store.
 ### Embedding cache
 Clawdbot can cache **chunk embeddings** in SQLite so reindexing and frequent updates (especially session transcripts) don't re-embed unchanged text.
 Config:
 ```json5
 agents: {
  defaults: {
    memorySearch: {
      cache: {
        enabled: true,
        maxEntries: 50000
      }
    }
  }
 }
 ```
 ### Session memory search (experimental)
--- a/src/agents/memory-search.ts
+++ b/src/agents/memory-search.ts
@@ -54,6 +54,10 @@ export type ResolvedMemorySearchConfig = {
    maxResults: number;
    minScore: number;
  };
  cache: {
    enabled: boolean;
    maxEntries?: number;
  };
 };
 const DEFAULT_MODEL = "text-embedding-3-small";
@@ -62,6 +66,7 @@ const DEFAULT_CHUNK_OVERLAP = 80;
 const DEFAULT_WATCH_DEBOUNCE_MS = 1500;
 const DEFAULT_MAX_RESULTS = 6;
 const DEFAULT_MIN_SCORE = 0.35;
 const DEFAULT_CACHE_ENABLED = true;
 const DEFAULT_SOURCES: Array<"memory" | "sessions"> = ["memory"];
 function normalizeSources(
@@ -152,6 +157,10 @@ function mergeConfig(
    maxResults: overrides?.query?.maxResults ?? defaults?.query?.maxResults ?? DEFAULT_MAX_RESULTS,
    minScore: overrides?.query?.minScore ?? defaults?.query?.minScore ?? DEFAULT_MIN_SCORE,
  };
  const cache = {
    enabled: overrides?.cache?.enabled ?? defaults?.cache?.enabled ?? DEFAULT_CACHE_ENABLED,
    maxEntries: overrides?.cache?.maxEntries ?? defaults?.cache?.maxEntries,
  };
  const overlap = Math.max(0, Math.min(chunking.overlap, chunking.tokens - 1));
  const minScore = Math.max(0, Math.min(1, query.minScore));
@@ -170,6 +179,13 @@ function mergeConfig(
    chunking: { tokens: Math.max(1, chunking.tokens), overlap },
    sync,
    query: { ...query, minScore },
    cache: {
      enabled: Boolean(cache.enabled),
      maxEntries:
        typeof cache.maxEntries === "number" && Number.isFinite(cache.maxEntries)
          ? Math.max(1, Math.floor(cache.maxEntries))
          : undefined,
    },
  };
 }
--- a/src/cli/memory-cli.ts
+++ b/src/cli/memory-cli.ts
@@ -178,8 +178,20 @@ export function registerMemoryCli(program: Command) {
            if (status.vector.extensionPath) {
              lines.push(`${label("Vector path")} ${info(status.vector.extensionPath)}`);
            }
-            if (status.vector.loadError) {
+          if (status.vector.loadError) {
-              lines.push(`${label("Vector error")} ${warn(status.vector.loadError)}`);
+            lines.push(`${label("Vector error")} ${warn(status.vector.loadError)}`);
          }
        }
        if (status.cache) {
          const cacheState = status.cache.enabled ? "enabled" : "disabled";
          const cacheColor = status.cache.enabled ? theme.success : theme.muted;
          const suffix =
              status.cache.enabled && typeof status.cache.entries === "number"
                ? ` (${status.cache.entries} entries)`
                : "";
            lines.push(`${label("Embedding cache")} ${colorize(rich, cacheColor, cacheState)}${suffix}`);
            if (status.cache.enabled && typeof status.cache.maxEntries === "number") {
              lines.push(`${label("Cache cap")} ${info(String(status.cache.maxEntries))}`);
            }
          }
          if (status.fallback?.reason) {
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -190,6 +190,8 @@ const FIELD_LABELS: Record<string, string> = {
  "agents.defaults.memorySearch.sync.watchDebounceMs": "Memory Watch Debounce (ms)",
  "agents.defaults.memorySearch.query.maxResults": "Memory Search Max Results",
  "agents.defaults.memorySearch.query.minScore": "Memory Search Min Score",
  "agents.defaults.memorySearch.cache.enabled": "Memory Search Embedding Cache",
  "agents.defaults.memorySearch.cache.maxEntries": "Memory Search Embedding Cache Max Entries",
  "auth.profiles": "Auth Profiles",
  "auth.order": "Auth Profile Order",
  "auth.cooldowns.billingBackoffHours": "Billing Backoff (hours)",
@@ -382,11 +384,15 @@ const FIELD_HELP: Record<string, string> = {
  "agents.defaults.memorySearch.fallback":
    'Fallback to OpenAI when local embeddings fail ("openai" or "none").',
  "agents.defaults.memorySearch.store.path":
-    "SQLite index path (default: ~/.clawdbot/state/memory/{agentId}.sqlite).",
+    "SQLite index path (default: ~/.clawdbot/memory/{agentId}.sqlite).",
  "agents.defaults.memorySearch.store.vector.enabled":
    "Enable sqlite-vec extension for vector search (default: true).",
  "agents.defaults.memorySearch.store.vector.extensionPath":
    "Optional override path to sqlite-vec extension library (.dylib/.so/.dll).",
  "agents.defaults.memorySearch.cache.enabled":
    "Cache chunk embeddings in SQLite to speed up reindexing and frequent updates (default: true).",
  "agents.defaults.memorySearch.cache.maxEntries":
    "Optional cap on cached embeddings (best-effort).",
  "agents.defaults.memorySearch.sync.onSearch":
    "Lazy sync: reindex on first search after a change.",
  "agents.defaults.memorySearch.sync.watch": "Watch memory files for changes (chokidar).",
--- a/src/config/types.tools.ts
+++ b/src/config/types.tools.ts
@@ -192,6 +192,12 @@ export type MemorySearchConfig = {
      /** Optional override path to sqlite-vec extension (.dylib/.so/.dll). */
      extensionPath?: string;
    };
    cache?: {
      /** Enable embedding cache (default: true). */
      enabled?: boolean;
      /** Optional max cache entries per provider/model. */
      maxEntries?: number;
    };
  };
  /** Chunking configuration. */
  chunking?: {
@@ -210,6 +216,23 @@ export type MemorySearchConfig = {
  query?: {
    maxResults?: number;
    minScore?: number;
    hybrid?: {
      /** Enable hybrid BM25 + vector search (default: true). */
      enabled?: boolean;
      /** Weight for vector similarity when merging results (0-1). */
      vectorWeight?: number;
      /** Weight for BM25 text relevance when merging results (0-1). */
      textWeight?: number;
      /** Multiplier for candidate pool size (default: 4). */
      candidateMultiplier?: number;
    };
  };
  /** Index cache behavior. */
  cache?: {
    /** Cache chunk embeddings in SQLite (default: true). */
    enabled?: boolean;
    /** Optional cap on cached embeddings (best-effort). */
    maxEntries?: number;
  };
 };
--- a/src/config/zod-schema.agent-runtime.ts
+++ b/src/config/zod-schema.agent-runtime.ts
@@ -258,6 +258,12 @@ export const MemorySearchSchema = z
        minScore: z.number().min(0).max(1).optional(),
      })
      .optional(),
    cache: z
      .object({
        enabled: z.boolean().optional(),
        maxEntries: z.number().int().positive().optional(),
      })
      .optional(),
  })
  .optional();
 export const AgentModelSchema = z.union([
--- a/src/memory/index.test.ts
+++ b/src/memory/index.test.ts
@@ -6,12 +6,14 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
 let embedBatchCalls = 0;
 vi.mock("./embeddings.js", () => {
  const embedText = (text: string) => {
    const lower = text.toLowerCase();
    const alpha = lower.split("alpha").length - 1;
    const beta = lower.split("beta").length - 1;
-    return [alpha, beta, 1];
+    return [alpha, beta];
  };
  return {
    createEmbeddingProvider: async (options: { model?: string }) => ({
@@ -20,7 +22,10 @@ vi.mock("./embeddings.js", () => {
        id: "mock",
        model: options.model ?? "mock-embed",
        embedQuery: async (text: string) => embedText(text),
-        embedBatch: async (texts: string[]) => texts.map(embedText),
+        embedBatch: async (texts: string[]) => {
          embedBatchCalls += 1;
          return texts.map(embedText);
        },
      },
    }),
  };
@@ -32,12 +37,13 @@ describe("memory index", () => {
  let manager: MemoryIndexManager | null = null;
  beforeEach(async () => {
    embedBatchCalls = 0;
    workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-mem-"));
    indexPath = path.join(workspaceDir, "index.sqlite");
    await fs.mkdir(path.join(workspaceDir, "memory"));
    await fs.writeFile(
      path.join(workspaceDir, "memory", "2026-01-12.md"),
-      "# Log\nAlpha memory line.\nAnother line.",
+      "# Log\nAlpha memory line.\nZebra memory line.\nAnother line.",
    );
    await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Beta knowledge base entry.");
  });
@@ -146,6 +152,35 @@ describe("memory index", () => {
    expect(results.length).toBeGreaterThan(0);
  });
  it("reuses cached embeddings on forced reindex", async () => {
    const cfg = {
      agents: {
        defaults: {
          workspace: workspaceDir,
          memorySearch: {
            provider: "openai",
            model: "mock-embed",
            store: { path: indexPath, vector: { enabled: false } },
            sync: { watch: false, onSessionStart: false, onSearch: false },
            query: { minScore: 0 },
            cache: { enabled: true },
          },
        },
        list: [{ id: "main", default: true }],
      },
    };
    const result = await getMemorySearchManager({ cfg, agentId: "main" });
    expect(result.manager).not.toBeNull();
    if (!result.manager) throw new Error("manager missing");
    manager = result.manager;
    await manager.sync({ force: true });
    const afterFirst = embedBatchCalls;
    expect(afterFirst).toBeGreaterThan(0);
    await manager.sync({ force: true });
    expect(embedBatchCalls).toBe(afterFirst);
  });
  it("reports vector availability after probe", async () => {
    const cfg = {
      agents: {
--- a/src/memory/manager.ts
+++ b/src/memory/manager.ts
@@ -47,6 +47,7 @@ export type MemorySearchResult = {
 type MemoryIndexMeta = {
  model: string;
  provider: string;
  providerKey?: string;
  chunkTokens: number;
  chunkOverlap: number;
  vectorDims?: number;
@@ -106,6 +107,7 @@ type OpenAiBatchOutputLine = {
 const META_KEY = "memory_index_meta_v1";
 const SNIPPET_MAX_CHARS = 700;
 const VECTOR_TABLE = "chunks_vec";
 const EMBEDDING_CACHE_TABLE = "embedding_cache";
 const SESSION_DIRTY_DEBOUNCE_MS = 5000;
 const EMBEDDING_BATCH_MAX_TOKENS = 8000;
 const EMBEDDING_APPROX_CHARS_PER_TOKEN = 1;
@@ -143,6 +145,8 @@ export class MemoryIndexManager {
  };
  private readonly db: DatabaseSync;
  private readonly sources: Set<MemorySource>;
  private readonly providerKey: string;
  private readonly cache: { enabled: boolean; maxEntries?: number };
  private readonly vector: {
    enabled: boolean;
    available: boolean | null;
@@ -214,6 +218,11 @@ export class MemoryIndexManager {
    this.openAi = params.providerResult.openAi;
    this.sources = new Set(params.settings.sources);
    this.db = this.openDatabase();
    this.providerKey = this.computeProviderKey();
    this.cache = {
      enabled: params.settings.cache.enabled,
      maxEntries: params.settings.cache.maxEntries,
    };
    this.ensureSchema();
    this.vector = {
      enabled: params.settings.store.vector.enabled,
@@ -266,19 +275,19 @@ export class MemoryIndexManager {
    const minScore = opts?.minScore ?? this.settings.query.minScore;
    const maxResults = opts?.maxResults ?? this.settings.query.maxResults;
    const queryVec = await this.provider.embedQuery(cleaned);
-    if (queryVec.length === 0) return [];
+    if (!queryVec.some((v) => v !== 0)) return [];
    if (await this.ensureVectorReady(queryVec.length)) {
      const sourceFilter = this.buildSourceFilter("c");
      const rows = this.db
        .prepare(
-          `SELECT c.path, c.start_line, c.end_line, c.text,
+          `SELECT c.path, c.start_line, c.end_line, c.text,\n` +
-                  c.source,
+            `       c.source,\n` +
-                  vec_distance_cosine(v.embedding, ?) AS dist
+            `       vec_distance_cosine(v.embedding, ?) AS dist\n` +
-             FROM ${VECTOR_TABLE} v
+            `  FROM ${VECTOR_TABLE} v\n` +
-             JOIN chunks c ON c.id = v.id
+            `  JOIN chunks c ON c.id = v.id\n` +
-            WHERE c.model = ?${sourceFilter.sql}
+            ` WHERE c.model = ?${sourceFilter.sql}\n` +
-            ORDER BY dist ASC
+            ` ORDER BY dist ASC\n` +
-            LIMIT ?`,
+            ` LIMIT ?`,
        )
        .all(
          vectorToBlob(queryVec),
@@ -372,6 +381,7 @@ export class MemoryIndexManager {
    requestedProvider: string;
    sources: MemorySource[];
    sourceCounts: Array<{ source: MemorySource; files: number; chunks: number }>;
    cache?: { enabled: boolean; entries?: number; maxEntries?: number };
    fallback?: { from: string; reason?: string };
    vector?: {
      enabled: boolean;
@@ -432,6 +442,16 @@ export class MemoryIndexManager {
      requestedProvider: this.requestedProvider,
      sources: Array.from(this.sources),
      sourceCounts,
      cache: this.cache.enabled
        ? {
            enabled: true,
            entries:
              (this.db
                .prepare(`SELECT COUNT(*) as c FROM ${EMBEDDING_CACHE_TABLE}`)
                .get() as { c: number } | undefined)?.c ?? 0,
            maxEntries: this.cache.maxEntries,
          }
        : { enabled: false, maxEntries: this.cache.maxEntries },
      fallback: this.fallbackReason ? { from: "local", reason: this.fallbackReason } : undefined,
      vector: {
        enabled: this.vector.enabled,
@@ -603,6 +623,21 @@ export class MemoryIndexManager {
        updated_at INTEGER NOT NULL
      );
    `);
    this.db.exec(`
      CREATE TABLE IF NOT EXISTS ${EMBEDDING_CACHE_TABLE} (
        provider TEXT NOT NULL,
        model TEXT NOT NULL,
        provider_key TEXT NOT NULL,
        hash TEXT NOT NULL,
        embedding TEXT NOT NULL,
        dims INTEGER,
        updated_at INTEGER NOT NULL,
        PRIMARY KEY (provider, model, provider_key, hash)
      );
    `);
    this.db.exec(
      `CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at ON ${EMBEDDING_CACHE_TABLE}(updated_at);`,
    );
    this.ensureColumn("files", "source", "TEXT NOT NULL DEFAULT 'memory'");
    this.ensureColumn("chunks", "source", "TEXT NOT NULL DEFAULT 'memory'");
    this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`);
@@ -681,6 +716,7 @@ export class MemoryIndexManager {
  }
  private listChunks(): Array<{
    id: string;
    path: string;
    startLine: number;
    endLine: number;
@@ -691,11 +727,12 @@ export class MemoryIndexManager {
    const sourceFilter = this.buildSourceFilter();
    const rows = this.db
      .prepare(
-        `SELECT path, start_line, end_line, text, embedding, source
+        `SELECT id, path, start_line, end_line, text, embedding, source
           FROM chunks
          WHERE model = ?${sourceFilter.sql}`,
      )
      .all(this.provider.model, ...sourceFilter.params) as Array<{
      id: string;
      path: string;
      start_line: number;
      end_line: number;
@@ -704,6 +741,7 @@ export class MemoryIndexManager {
      source: MemorySource;
    }>;
    return rows.map((row) => ({
      id: row.id,
      path: row.path,
      startLine: row.start_line,
      endLine: row.end_line,
@@ -779,6 +817,13 @@ export class MemoryIndexManager {
    for (const stale of staleRows) {
      if (activePaths.has(stale.path)) continue;
      this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(stale.path, "memory");
      try {
        this.db
          .prepare(
            `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
          )
          .run(stale.path, "memory");
      } catch {}
      this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(stale.path, "memory");
    }
  }
@@ -860,6 +905,13 @@ export class MemoryIndexManager {
      this.db
        .prepare(`DELETE FROM files WHERE path = ? AND source = ?`)
        .run(stale.path, "sessions");
      try {
        this.db
          .prepare(
            `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
          )
          .run(stale.path, "sessions");
      } catch {}
      this.db
        .prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
        .run(stale.path, "sessions");
@@ -902,6 +954,7 @@ export class MemoryIndexManager {
      !meta ||
      meta.model !== this.provider.model ||
      meta.provider !== this.provider.id ||
      meta.providerKey !== this.providerKey ||
      meta.chunkTokens !== this.settings.chunking.tokens ||
      meta.chunkOverlap !== this.settings.chunking.overlap ||
      (vectorReady && !meta?.vectorDims);
@@ -929,6 +982,7 @@ export class MemoryIndexManager {
    const nextMeta: MemoryIndexMeta = {
      model: this.provider.model,
      provider: this.provider.id,
      providerKey: this.providerKey,
      chunkTokens: this.settings.chunking.tokens,
      chunkOverlap: this.settings.chunking.overlap,
    };
@@ -938,6 +992,9 @@ export class MemoryIndexManager {
    if (shouldSyncMemory || shouldSyncSessions || needsFullReindex) {
      this.writeMeta(nextMeta);
    }
    if (shouldSyncMemory || shouldSyncSessions || needsFullReindex) {
      this.pruneEmbeddingCacheIfNeeded();
    }
  }
  private resetIndex() {
@@ -1091,16 +1148,121 @@ export class MemoryIndexManager {
    return batches;
  }
  private loadEmbeddingCache(hashes: string[]): Map<string, number[]> {
    if (!this.cache.enabled) return new Map();
    if (hashes.length === 0) return new Map();
    const unique: string[] = [];
    const seen = new Set<string>();
    for (const hash of hashes) {
      if (!hash) continue;
      if (seen.has(hash)) continue;
      seen.add(hash);
      unique.push(hash);
    }
    if (unique.length === 0) return new Map();
    const out = new Map<string, number[]>();
    const baseParams = [this.provider.id, this.provider.model, this.providerKey];
    const batchSize = 400;
    for (let start = 0; start < unique.length; start += batchSize) {
      const batch = unique.slice(start, start + batchSize);
      const placeholders = batch.map(() => "?").join(", ");
      const rows = this.db
        .prepare(
          `SELECT hash, embedding FROM ${EMBEDDING_CACHE_TABLE}\n` +
            ` WHERE provider = ? AND model = ? AND provider_key = ? AND hash IN (${placeholders})`,
        )
        .all(...baseParams, ...batch) as Array<{ hash: string; embedding: string }>;
      for (const row of rows) {
        out.set(row.hash, parseEmbedding(row.embedding));
      }
    }
    return out;
  }
  private upsertEmbeddingCache(entries: Array<{ hash: string; embedding: number[] }>): void {
    if (!this.cache.enabled) return;
    if (entries.length === 0) return;
    const now = Date.now();
    const stmt = this.db.prepare(
      `INSERT INTO ${EMBEDDING_CACHE_TABLE} (provider, model, provider_key, hash, embedding, dims, updated_at)\n` +
        ` VALUES (?, ?, ?, ?, ?, ?, ?)\n` +
        ` ON CONFLICT(provider, model, provider_key, hash) DO UPDATE SET\n` +
        `   embedding=excluded.embedding,\n` +
        `   dims=excluded.dims,\n` +
        `   updated_at=excluded.updated_at`,
    );
    for (const entry of entries) {
      const embedding = entry.embedding ?? [];
      stmt.run(
        this.provider.id,
        this.provider.model,
        this.providerKey,
        entry.hash,
        JSON.stringify(embedding),
        embedding.length,
        now,
      );
    }
  }
  private pruneEmbeddingCacheIfNeeded(): void {
    if (!this.cache.enabled) return;
    const max = this.cache.maxEntries;
    if (!max || max <= 0) return;
    const row = this.db
      .prepare(`SELECT COUNT(*) as c FROM ${EMBEDDING_CACHE_TABLE}`)
      .get() as { c: number } | undefined;
    const count = row?.c ?? 0;
    if (count <= max) return;
    const excess = count - max;
    this.db
      .prepare(
        `DELETE FROM ${EMBEDDING_CACHE_TABLE}\n` +
          ` WHERE rowid IN (\n` +
          `   SELECT rowid FROM ${EMBEDDING_CACHE_TABLE}\n` +
          `   ORDER BY updated_at ASC\n` +
          `   LIMIT ?\n` +
          ` )`,
      )
      .run(excess);
  }
  private async embedChunksInBatches(chunks: MemoryChunk[]): Promise<number[][]> {
    if (chunks.length === 0) return [];
-    const batches = this.buildEmbeddingBatches(chunks);
+    const cached = this.loadEmbeddingCache(chunks.map((chunk) => chunk.hash));
-    const embeddings: number[][] = [];
+    const embeddings: number[][] = Array.from({ length: chunks.length }, () => []);
    const missing: Array<{ index: number; chunk: MemoryChunk }> = [];
    for (let i = 0; i < chunks.length; i += 1) {
      const chunk = chunks[i];
      const hit = chunk?.hash ? cached.get(chunk.hash) : undefined;
      if (hit && hit.length > 0) {
        embeddings[i] = hit;
      } else if (chunk) {
        missing.push({ index: i, chunk });
      }
    }
    if (missing.length === 0) return embeddings;
    const missingChunks = missing.map((m) => m.chunk);
    const batches = this.buildEmbeddingBatches(missingChunks);
    const toCache: Array<{ hash: string; embedding: number[] }> = [];
    let cursor = 0;
    for (const batch of batches) {
      const batchEmbeddings = await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
      for (let i = 0; i < batch.length; i += 1) {
-        embeddings.push(batchEmbeddings[i] ?? []);
+        const item = missing[cursor + i];
        const embedding = batchEmbeddings[i] ?? [];
        if (item) {
          embeddings[item.index] = embedding;
          toCache.push({ hash: item.chunk.hash, embedding });
        }
      }
      cursor += batch.length;
    }
    this.upsertEmbeddingCache(toCache);
    return embeddings;
  }
@@ -1121,6 +1283,24 @@ export class MemoryIndexManager {
    return headers;
  }
  private computeProviderKey(): string {
    if (this.provider.id === "openai" && this.openAi) {
      const entries = Object.entries(this.openAi.headers)
        .filter(([key]) => key.toLowerCase() !== "authorization")
        .sort(([a], [b]) => a.localeCompare(b))
        .map(([key, value]) => [key, value]);
      return hashText(
        JSON.stringify({
          provider: "openai",
          baseUrl: this.openAi.baseUrl,
          model: this.openAi.model,
          headers: entries,
        }),
      );
    }
    return hashText(JSON.stringify({ provider: this.provider.id, model: this.provider.model }));
  }
  private buildOpenAiBatchRequests(
    chunks: MemoryChunk[],
    entry: MemoryFileEntry | SessionFileEntry,
@@ -1300,8 +1480,40 @@ export class MemoryIndexManager {
      return this.embedChunksInBatches(chunks);
    }
    if (chunks.length === 0) return [];
    const cached = this.loadEmbeddingCache(chunks.map((chunk) => chunk.hash));
    const embeddings: number[][] = Array.from({ length: chunks.length }, () => []);
    const missing: Array<{ index: number; chunk: MemoryChunk }> = [];
-    const { requests, mapping } = this.buildOpenAiBatchRequests(chunks, entry, source);
+    for (let i = 0; i < chunks.length; i += 1) {
      const chunk = chunks[i];
      const hit = chunk?.hash ? cached.get(chunk.hash) : undefined;
      if (hit && hit.length > 0) {
        embeddings[i] = hit;
      } else if (chunk) {
        missing.push({ index: i, chunk });
      }
    }
    if (missing.length === 0) return embeddings;
    const requests: OpenAiBatchRequest[] = [];
    const mapping = new Map<string, number>();
    for (const item of missing) {
      const chunk = item.chunk;
      const customId = hashText(
        `${source}:${entry.path}:${chunk.startLine}:${chunk.endLine}:${chunk.hash}:${item.index}`,
      );
      mapping.set(customId, item.index);
      requests.push({
        custom_id: customId,
        method: "POST",
        url: OPENAI_BATCH_ENDPOINT,
        body: {
          model: this.openAi?.model ?? this.provider.model,
          input: chunk.text,
        },
      });
    }
    const groups = this.splitOpenAiBatchRequests(requests);
    log.debug("memory embeddings: openai batch submit", {
      source,
@@ -1313,7 +1525,7 @@ export class MemoryIndexManager {
      pollIntervalMs: this.batch.pollIntervalMs,
      timeoutMs: this.batch.timeoutMs,
    });
-    const embeddings: number[][] = Array.from({ length: chunks.length }, () => []);
+    const toCache: Array<{ hash: string; embedding: number[] }> = [];
    const tasks = groups.map((group, groupIndex) => async () => {
      const batchInfo = await this.submitOpenAiBatch(group);
@@ -1373,6 +1585,8 @@ export class MemoryIndexManager {
          continue;
        }
        embeddings[index] = embedding;
        const chunk = chunks[index];
        if (chunk) toCache.push({ hash: chunk.hash, embedding });
      }
      if (errors.length > 0) {
        throw new Error(`openai batch ${batchInfo.id} failed: ${errors.join("; ")}`);
@@ -1385,6 +1599,7 @@ export class MemoryIndexManager {
    });
    await this.runWithConcurrency(tasks, this.batch.concurrency);
    this.upsertEmbeddingCache(toCache);
    return embeddings;
  }
@@ -1463,9 +1678,16 @@ export class MemoryIndexManager {
    const sample = embeddings.find((embedding) => embedding.length > 0);
    const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
    const now = Date.now();
-    this.db
+    if (vectorReady) {
-      .prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
+      try {
-      .run(entry.path, options.source);
+        this.db
          .prepare(
            `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
          )
          .run(entry.path, options.source);
      } catch {}
    }
    this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(entry.path, options.source);
    for (let i = 0; i < chunks.length; i++) {
      const chunk = chunks[i];
      const embedding = embeddings[i] ?? [];