feat: add memory vector search

2026-01-12 11:22:56 +00:00
parent 8049f33435
commit bf11a42c37
22 changed files with 2923 additions and 94 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,37 +1,17 @@
 # Changelog

-## 2026.1.12-1
+## 2026.1.12
+
+### Highlights
+- Memory: add vector search for agent memories (Markdown-only scope) with SQLite index, chunking, lazy sync + file watch, and per-agent enablement/fallback.

 ### Changes
- Heartbeat: raise default `ackMaxChars` to 300 so any `HEARTBEAT_OK` replies with short padding stay internal (fewer noisy heartbeat posts on providers).
- Onboarding: normalize API key inputs (strip `export KEY=...` wrappers) so shell-style entries paste cleanly.
-
-## 2026.1.11-5
+- Memory: embedding providers support OpenAI or local `node-llama-cpp`; config adds defaults + per-agent overrides, provider/fallback metadata surfaced in tools/CLI.
+- CLI/Tools: new `clawdbot memory` commands plus `memory_search`/`memory_get` tools returning snippets + line ranges and provider info.
+- Runtime: memory index stored under `~/.clawdbot/memory/{agentId}.sqlite` with watch-on-by-default; inline status replies now stay auth-gated while inline prompts continue to the agent.

 ### Fixes
- Auto-reply: prevent duplicate /status replies (including /usage alias) and add tests for inline + standalone cases.
-
-## 2026.1.11-4
-
-### Fixes
- CLI: read the git commit hash from the package root so npm installs show it.
-
-## 2026.1.11-3
-
-### Fixes
- CLI: avoid top-level await warnings in the entrypoint on fresh installs.
- CLI: show a commit hash in the banner for npm installs (package.json gitHead fallback).
-
-## 2026.1.11-2
-
-### Fixes
- Installer: ensure the CLI entrypoint is executable after npm installs.
- Packaging: include `dist/plugins/` in the npm package to avoid missing module errors.
-
-## 2026.1.11-1
-
-### Fixes
- Installer: include `patches/` in the npm package so postinstall patching works for npm/bun installs.
+- Auto-reply: inline `/status` now honors allowlists (authorized stripped + replied inline; unauthorized leaves text for the agent) to match command gating tests.

 ## 2026.1.11

@@ -42,9 +22,6 @@
 - Agents: automatic pre-compaction memory flush turn to store durable memories before compaction.

 ### Changes
- Deps: update pi-agent-core/pi-ai/pi-coding-agent/pi-tui and refresh the pi-ai patch.
- Dev: bump @types/node.
- macOS: add wizard debug CLI and share wizard parsing helpers.
 - CLI/Onboarding: simplify MiniMax auth choice to a single M2.1 option.
 - CLI: configure section selection now loops until Continue.
 - Docs: explain MiniMax vs MiniMax Lightning (speed vs cost) and restore LM Studio example.
@@ -52,20 +29,16 @@
 - Onboarding/CLI: group model/auth choice by provider and label Z.AI as GLM 4.7.
 - Onboarding/Docs: add Moonshot AI (Kimi K2) auth choice + config example.
 - CLI/Onboarding: prompt to reuse detected API keys for Moonshot/MiniMax/Z.AI/Gemini/Anthropic/OpenCode.
- CLI/Onboarding: move MiniMax to the top of the provider list.
- CLI/Onboarding: add MiniMax M2.1 Lightning auth choice.
- CLI/Onboarding: show key previews when reusing detected API keys.
 - Auto-reply: add compact `/model` picker (models + available providers) and show provider endpoints in `/model status`.
 - Control UI: add Config tab model presets (MiniMax M2.1, GLM 4.7, Kimi) for one-click setup.
 - Plugins: add extension loader (tools/RPC/CLI/services), discovery paths, and config schema + Control UI labels (uiHints).
 - Plugins: add `clawdbot plugins install` (path/tgz/npm), plus `list|info|enable|disable|doctor` UX.
 - Plugins: voice-call plugin now real (Twilio/log), adds start/status RPC/CLI/tool + tests.
 - Docs: add plugins doc + cross-links from tools/skills/gateway config.
- Docs: clarify memory flush behavior + writable workspace requirement in Memory/Session/FAQ.
 - Docs: add beginner-friendly plugin quick start + expand Voice Call plugin docs.
 - Tests: add Docker plugin loader + tgz-install smoke test.
 - Tests: extend Docker plugin E2E to cover installing from local folders (`plugins.load.paths`) and `file:` npm specs.
- Tests: add coverage for pre-compaction memory flush settings (including read-only/CLI skips).
+- Tests: add coverage for pre-compaction memory flush settings.
 - Tests: modernize live model smoke selection for current releases and enforce tools/images/thinking-high coverage. (#769) — thanks @steipete.
 - Agents/Tools: add `apply_patch` tool for multi-file edits (experimental; gated by tools.exec.applyPatch; OpenAI-only).
 - Agents/Tools: rename the bash tool to exec (config alias maintained). (#748) — thanks @myfunc.
@@ -92,17 +65,9 @@
 - Installer UX: add `--install-method git|npm` and auto-detect source checkouts (prompt to update git checkout vs migrate to npm).

 ### Fixes
- Control UI: flatten nav into a single horizontal scroll row on tablet/mobile (and always show collapsed group items). (#771) — thanks @carlulsoe.
- macOS: start + await local gateway before onboarding wizard begins.
- macOS: cancel onboarding wizard on close, recover if the gateway drops the session, and time out stalled gateway connects.
- macOS: wizard debug CLI now surfaces error status instead of exiting as complete.
 - Models/Onboarding: configure MiniMax (minimax.io) via Anthropic-compatible `/anthropic` endpoint by default (keep `minimax-api` as a legacy alias).
- Agents/Browser: cap Playwright AI snapshots for tool calls (maxChars); CLI snapshots remain full. (#763) — thanks @thesash.
 - Models: normalize Gemini 3 Pro/Flash IDs to preview names for live model lookups. (#769) — thanks @steipete.
 - CLI: fix guardCancel typing for configure prompts. (#769) — thanks @steipete.
- Providers: default groupPolicy to allowlist across providers and warn in doctor when groups are open.
- MS Teams: add groupPolicy/groupAllowFrom gating for group chats and warn when groups are open.
- Providers: strip tool call/result ids from Gemini CLI payloads to avoid API 400s. (#756)
 - Gateway/WebChat: include handshake validation details in the WebSocket close reason for easier debugging; preserve close codes.
 - Gateway/Auth: send invalid connect responses before closing the handshake; stabilize invalid-connect auth test.
 - Gateway: tighten gateway listener detection.
@@ -114,23 +79,16 @@
 - Config: expand `~` in `CLAWDBOT_CONFIG_PATH` and common path-like config fields (including `plugins.load.paths`); guard invalid `$include` paths. (#731) — thanks @pasogott.
 - Agents: stop pre-creating session transcripts so first user messages persist in JSONL history.
 - Agents: skip pre-compaction memory flush when the session workspace is read-only.
- Auto-reply: allow inline `/status` for allowlisted senders (stripped before the model); unauthorized senders see it as plain text.
- Auto-reply: include config-only allowlisted models in `/model` even when the catalog is partial.
- Auto-reply: allow fuzzy `/model` matches (e.g. `/model kimi` or `/model moonshot/kimi`) when unambiguous.
 - Auto-reply: ignore inline `/status` directives unless the message is directive-only.
- CLI/Configure: enter the selected section immediately, then return to the section picker.
- CLI/Configure: apply the chosen auth model as default (skip the extra picker) and refresh the model catalog for new providers.
 - Auto-reply: align `/think` default display with model reasoning defaults. (#751) — thanks @gabriel-trigo.
 - Auto-reply: flush block reply buffers on tool boundaries. (#750) — thanks @sebslight.
 - Auto-reply: allow sender fallback for command authorization when `SenderId` is empty (WhatsApp self-chat). (#755) — thanks @juanpablodlc.
 - Heartbeat: refresh prompt text for updated defaults.
 - Agents/Tools: use PowerShell on Windows to capture system utility output. (#748) — thanks @myfunc.
- Agents/Tools: normalize Claude Code-style read/write/edit params (file_path/old_string/new_string) and keep sandbox guards in place. (#768) — thanks @hsrvc.
 - Docker: tolerate unset optional env vars in docker-setup.sh under strict mode. (#725) — thanks @petradonka.
 - CLI/Update: preserve base environment when passing overrides to update subprocesses. (#713) — thanks @danielz1z.
 - Agents: treat message tool errors as failures so fallback replies still send; require `to` + `message` for `action=send`. (#717) — thanks @theglove44.
 - Agents: preserve reasoning items on tool-only turns.
- Agents: enforce `<final>` gating for reasoning-tag providers to prevent tag/reasoning leaks. (#754) — thanks @mcinteerj.
 - Agents/Subagents: wait for completion before announcing, align wait timeout with run timeout, and make announce prompts more emphatic.
 - Agents: route subagent transcripts to the target agent sessions directory and add regression coverage. (#708) — thanks @xMikeMickelson.
 - Agents/Tools: preserve action enums when flattening tool schemas. (#708) — thanks @xMikeMickelson.
--- a/docs/cli/index.md
+++ b/docs/cli/index.md
@@ -70,6 +70,10 @@ clawdbot [--dev] [--profile <name>] <command>
    enable
    disable
    doctor
+  memory
+    status
+    index
+    search
  message
  agent
  agents
@@ -188,6 +192,14 @@ Manage extensions and their config:

 Most plugin changes require a gateway restart. See [/plugin](/plugin).

+## Memory
+
+Vector search over `MEMORY.md` + `memory/*.md`:
+
+- `clawdbot memory status` — show index stats.
+- `clawdbot memory index` — reindex memory files.
+- `clawdbot memory search "<query>"` — semantic search over memory.
+
 ## Chat slash commands

 Chat messages support `/...` commands (text and native). See [/tools/slash-commands](/tools/slash-commands).
--- a/docs/concepts/memory.md
+++ b/docs/concepts/memory.md
@@ -67,3 +67,38 @@ Details:

 For the full compaction lifecycle, see
 [Session management + compaction](/reference/session-management-compaction).
+
+## Vector memory search
+
+Clawdbot can build a small vector index over `MEMORY.md` and `memory/*.md` so
+semantic queries can find related notes even when wording differs.
+
+Defaults:
+- Enabled by default.
+- Watches memory files for changes (debounced).
+- Uses remote embeddings (OpenAI) unless configured for local.
+- Local mode uses node-llama-cpp and may require `pnpm approve-builds`.
+
+Config example:
+
+```json5
+agents: {
+  defaults: {
+    memorySearch: {
+      provider: "openai",
+      model: "text-embedding-3-small",
+      fallback: "openai",
+      sync: { watch: true }
+    }
+  }
+}
+```
+
+Tools:
+- `memory_search` — returns snippets with file + line ranges.
+- `memory_get` — read memory file content by path.
+
+Local mode:
+- Set `agents.defaults.memorySearch.provider = "local"`.
+- Provide `agents.defaults.memorySearch.local.modelPath` (GGUF or `hf:` URI).
+- Optional: set `agents.defaults.memorySearch.fallback = "none"` to avoid remote fallback.
--- a/docs/refactor/vector-memory.md
+++ b/docs/refactor/vector-memory.md
@@ -0,0 +1,127 @@
+---
+summary: "Vector memory search design plan (per-agent, watch/lazy sync, storage)"
+read_when:
+  - Designing or implementing vector memory search
+  - Adding embedding providers or sync behavior
+---
+
+# Vector Memory Search — Design Plan
+
+Goal: semantic search over **agent memory files** only, with minimal deps and
+good UX defaults. Default enabled. Per-agent overrides.
+
+## Scope
+- Sources: `MEMORY.md` + `memory/YYYY-MM-DD.md` inside the agent workspace.
+- No indexing outside the workspace. No hidden paths.
+- No QMD-style query expansion or rerank in v1.
+
+## Config Shape
+Location: `agents.defaults.memorySearch` + `agents.list[].memorySearch`.
+
+```json5
+agents: {
+  defaults: {
+    memorySearch: {
+      enabled: true,
+      provider: "openai", // "openai" | "local"
+      fallback: "openai", // "openai" | "none"
+      model: "text-embedding-3-small",
+      store: {
+        driver: "sqlite",
+        path: "~/.clawdbot/memory/{agentId}.sqlite"
+      },
+      chunking: {
+        tokens: 400,
+        overlap: 80
+      },
+      sync: {
+        onSessionStart: true,
+        onSearch: true,        // LazySync
+        watch: true,           // default on
+        watchDebounceMs: 1500,
+        intervalMinutes: 0
+      },
+      query: {
+        maxResults: 6,
+        minScore: 0.35
+      }
+    }
+  },
+  list: [
+    { id: "peter", memorySearch: { provider: "local", sync: { watch: false } } }
+  ]
+}
+```
+
+## Storage
+Per-agent DB (default): `~/.clawdbot/memory/{agentId}.sqlite`.
+
+Tables (v1):
+- `files(path PRIMARY KEY, hash, mtime, size)`
+- `chunks(id PRIMARY KEY, path, start_line, end_line, hash, text, embedding, updated_at)`
+
+Notes:
+- `hash` = content hash of chunk text.
+- `embedding` stored as float[] (sqlite vec extension optional); if not using vec,
+  store as JSON and do linear scan in memory for small corpora.
+
+## Embedding Providers
+Interface (core):
+- `embedQuery(text): number[]`
+- `embedBatch(texts[]): number[][]`
+
+Providers:
+- `openai` (default): OpenAI embeddings via existing keys.
+- `local` (optional): node-llama-cpp (GGUF).
+- Fallback: when `provider: "local"` fails, fallback to OpenAI unless `fallback: "none"`.
+
+## Index Pipeline
+1) Resolve memory file list (workspace only).
+2) Read file, compute file hash/mtime.
+3) Chunk by headings + token cap (overlap).
+4) Embed only changed chunks (hash compare).
+5) Upsert `chunks` rows, prune deleted files.
+
+Chunking:
+- Prefer heading-aware splits.
+- Max tokens + overlap; keep line ranges for snippets.
+
+## Sync Strategy
+Default: **watch + lazy + session-start**
+- `watch`: chokidar on `MEMORY.md` + `memory/**/*.md` (debounced).
+- `onSearch`: if dirty, sync before search (LazySync).
+- `onSessionStart`: warm index once per session.
+- `intervalMinutes`: optional for long-lived sessions.
+
+If workspace access is read-only or missing: disable writes; return “not indexed”.
+
+## Query Flow
+1) Embed query.
+2) Cosine similarity over all chunk embeddings.
+3) Return top K with `{path, startLine, endLine, snippet, score}`.
+4) Model may call `memory_get` when full context needed.
+
+Optional v2: add FTS5 + RRF merge (FTS + vector) for quality.
+
+## Tool + CLI
+Tools:
+- `memory_search { query, maxResults?, minScore? }`
+- `memory_get { path, from?, lines? }`
+
+CLI (optional):
+- `clawdbot memory index|search|status`
+
+## Security + Permissions
+- Indexer reads only memory files in workspace.
+- No scanning outside workspace; no “sneak” reads.
+- Respect sandbox `workspaceAccess` (ro = read-only; none = disabled).
+
+## Tests
+- Chunking boundaries + line ranges.
+- Hash-based incremental updates.
+- Search ranking (cosine).
+- Watcher debounce (fake fs).
+
+## Rollout
+- Default enabled; if no memory files, index is empty (silent).
+- No migration needed.
--- a/package.json
+++ b/package.json
@@ -160,6 +160,7 @@
    "json5": "^2.2.3",
    "long": "5.3.2",
    "markdown-it": "^14.1.0",
+    "node-llama-cpp": "3.14.5",
    "osc-progress": "^0.2.0",
    "playwright-core": "1.57.0",
    "proper-lockfile": "^4.1.2",
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/src/agents/agent-scope.ts
+++ b/src/agents/agent-scope.ts
@@ -22,6 +22,7 @@ type ResolvedAgentConfig = {
  workspace?: string;
  agentDir?: string;
  model?: string;
+  memorySearch?: AgentEntry["memorySearch"];
  humanDelay?: AgentEntry["humanDelay"];
  identity?: AgentEntry["identity"];
  groupChat?: AgentEntry["groupChat"];
@@ -95,6 +96,7 @@ export function resolveAgentConfig(
      typeof entry.workspace === "string" ? entry.workspace : undefined,
    agentDir: typeof entry.agentDir === "string" ? entry.agentDir : undefined,
    model: typeof entry.model === "string" ? entry.model : undefined,
+    memorySearch: entry.memorySearch,
    humanDelay: entry.humanDelay,
    identity: entry.identity,
    groupChat: entry.groupChat,
--- a/src/agents/clawdbot-tools.ts
+++ b/src/agents/clawdbot-tools.ts
@@ -9,6 +9,10 @@ import type { AnyAgentTool } from "./tools/common.js";
 import { createCronTool } from "./tools/cron-tool.js";
 import { createGatewayTool } from "./tools/gateway-tool.js";
 import { createImageTool } from "./tools/image-tool.js";
+import {
+  createMemoryGetTool,
+  createMemorySearchTool,
+} from "./tools/memory-tool.js";
 import { createMessageTool } from "./tools/message-tool.js";
 import { createNodesTool } from "./tools/nodes-tool.js";
 import { createSessionStatusTool } from "./tools/session-status-tool.js";
@@ -43,6 +47,14 @@ export function createClawdbotTools(options?: {
    config: options?.config,
    agentDir: options?.agentDir,
  });
+  const memorySearchTool = createMemorySearchTool({
+    config: options?.config,
+    agentSessionKey: options?.agentSessionKey,
+  });
+  const memoryGetTool = createMemoryGetTool({
+    config: options?.config,
+    agentSessionKey: options?.agentSessionKey,
+  });
  const tools: AnyAgentTool[] = [
    createBrowserTool({
      defaultControlUrl: options?.browserControlUrl,
@@ -89,6 +101,9 @@ export function createClawdbotTools(options?: {
      agentSessionKey: options?.agentSessionKey,
      config: options?.config,
    }),
+    ...(memorySearchTool && memoryGetTool
+      ? [memorySearchTool, memoryGetTool]
+      : []),
    ...(imageTool ? [imageTool] : []),
  ];

--- a/src/agents/memory-search.test.ts
+++ b/src/agents/memory-search.test.ts
@@ -0,0 +1,56 @@
+import { describe, expect, it } from "vitest";
+
+import { resolveMemorySearchConfig } from "./memory-search.js";
+
+describe("memory search config", () => {
+  it("returns null when disabled", () => {
+    const cfg = {
+      agents: {
+        defaults: {
+          memorySearch: { enabled: true },
+        },
+        list: [
+          {
+            id: "main",
+            default: true,
+            memorySearch: { enabled: false },
+          },
+        ],
+      },
+    };
+    const resolved = resolveMemorySearchConfig(cfg, "main");
+    expect(resolved).toBeNull();
+  });
+
+  it("merges defaults and overrides", () => {
+    const cfg = {
+      agents: {
+        defaults: {
+          memorySearch: {
+            provider: "openai",
+            model: "text-embedding-3-small",
+            chunking: { tokens: 500, overlap: 100 },
+            query: { maxResults: 4, minScore: 0.2 },
+          },
+        },
+        list: [
+          {
+            id: "main",
+            default: true,
+            memorySearch: {
+              chunking: { tokens: 320 },
+              query: { maxResults: 8 },
+            },
+          },
+        ],
+      },
+    };
+    const resolved = resolveMemorySearchConfig(cfg, "main");
+    expect(resolved?.provider).toBe("openai");
+    expect(resolved?.model).toBe("text-embedding-3-small");
+    expect(resolved?.chunking.tokens).toBe(320);
+    expect(resolved?.chunking.overlap).toBe(100);
+    expect(resolved?.query.maxResults).toBe(8);
+    expect(resolved?.query.minScore).toBe(0.2);
+  });
+});
--- a/src/agents/memory-search.ts
+++ b/src/agents/memory-search.ts
@@ -0,0 +1,134 @@
+import os from "node:os";
+import path from "node:path";
+
+import type { ClawdbotConfig, MemorySearchConfig } from "../config/config.js";
+import { resolveStateDir } from "../config/paths.js";
+import { resolveUserPath } from "../utils.js";
+import { resolveAgentConfig } from "./agent-scope.js";
+
+export type ResolvedMemorySearchConfig = {
+  enabled: boolean;
+  provider: "openai" | "local";
+  fallback: "openai" | "none";
+  model: string;
+  local: {
+    modelPath?: string;
+    modelCacheDir?: string;
+  };
+  store: {
+    driver: "sqlite";
+    path: string;
+  };
+  chunking: {
+    tokens: number;
+    overlap: number;
+  };
+  sync: {
+    onSessionStart: boolean;
+    onSearch: boolean;
+    watch: boolean;
+    watchDebounceMs: number;
+    intervalMinutes: number;
+  };
+  query: {
+    maxResults: number;
+    minScore: number;
+  };
+};
+
+const DEFAULT_MODEL = "text-embedding-3-small";
+const DEFAULT_CHUNK_TOKENS = 400;
+const DEFAULT_CHUNK_OVERLAP = 80;
+const DEFAULT_WATCH_DEBOUNCE_MS = 1500;
+const DEFAULT_MAX_RESULTS = 6;
+const DEFAULT_MIN_SCORE = 0.35;
+
+function resolveStorePath(agentId: string, raw?: string): string {
+  const stateDir = resolveStateDir(process.env, os.homedir);
+  const fallback = path.join(stateDir, "memory", `${agentId}.sqlite`);
+  if (!raw) return fallback;
+  const withToken = raw.includes("{agentId}")
+    ? raw.replaceAll("{agentId}", agentId)
+    : raw;
+  return resolveUserPath(withToken);
+}
+
+function mergeConfig(
+  defaults: MemorySearchConfig | undefined,
+  overrides: MemorySearchConfig | undefined,
+  agentId: string,
+): ResolvedMemorySearchConfig {
+  const enabled = overrides?.enabled ?? defaults?.enabled ?? true;
+  const provider = overrides?.provider ?? defaults?.provider ?? "openai";
+  const fallback = overrides?.fallback ?? defaults?.fallback ?? "openai";
+  const model = overrides?.model ?? defaults?.model ?? DEFAULT_MODEL;
+  const local = {
+    modelPath: overrides?.local?.modelPath ?? defaults?.local?.modelPath,
+    modelCacheDir:
+      overrides?.local?.modelCacheDir ?? defaults?.local?.modelCacheDir,
+  };
+  const store = {
+    driver: overrides?.store?.driver ?? defaults?.store?.driver ?? "sqlite",
+    path: resolveStorePath(
+      agentId,
+      overrides?.store?.path ?? defaults?.store?.path,
+    ),
+  };
+  const chunking = {
+    tokens:
+      overrides?.chunking?.tokens ??
+      defaults?.chunking?.tokens ??
+      DEFAULT_CHUNK_TOKENS,
+    overlap:
+      overrides?.chunking?.overlap ??
+      defaults?.chunking?.overlap ??
+      DEFAULT_CHUNK_OVERLAP,
+  };
+  const sync = {
+    onSessionStart:
+      overrides?.sync?.onSessionStart ?? defaults?.sync?.onSessionStart ?? true,
+    onSearch: overrides?.sync?.onSearch ?? defaults?.sync?.onSearch ?? true,
+    watch: overrides?.sync?.watch ?? defaults?.sync?.watch ?? true,
+    watchDebounceMs:
+      overrides?.sync?.watchDebounceMs ??
+      defaults?.sync?.watchDebounceMs ??
+      DEFAULT_WATCH_DEBOUNCE_MS,
+    intervalMinutes:
+      overrides?.sync?.intervalMinutes ?? defaults?.sync?.intervalMinutes ?? 0,
+  };
+  const query = {
+    maxResults:
+      overrides?.query?.maxResults ??
+      defaults?.query?.maxResults ??
+      DEFAULT_MAX_RESULTS,
+    minScore:
+      overrides?.query?.minScore ??
+      defaults?.query?.minScore ??
+      DEFAULT_MIN_SCORE,
+  };
+
+  const overlap = Math.max(0, Math.min(chunking.overlap, chunking.tokens - 1));
+  const minScore = Math.max(0, Math.min(1, query.minScore));
+  return {
+    enabled,
+    provider,
+    fallback,
+    model,
+    local,
+    store,
+    chunking: { tokens: Math.max(1, chunking.tokens), overlap },
+    sync,
+    query: { ...query, minScore },
+  };
+}
+
+export function resolveMemorySearchConfig(
+  cfg: ClawdbotConfig,
+  agentId: string,
+): ResolvedMemorySearchConfig | null {
+  const defaults = cfg.agents?.defaults?.memorySearch;
+  const overrides = resolveAgentConfig(cfg, agentId)?.memorySearch;
+  const resolved = mergeConfig(defaults, overrides, agentId);
+  if (!resolved.enabled) return null;
+  return resolved;
+}
--- a/src/agents/tool-display.json
+++ b/src/agents/tool-display.json
@@ -222,6 +222,16 @@
      "title": "Session Status",
      "detailKeys": ["sessionKey", "model"]
    },
+    "memory_search": {
+      "emoji": "🧠",
+      "title": "Memory Search",
+      "detailKeys": ["query"]
+    },
+    "memory_get": {
+      "emoji": "📓",
+      "title": "Memory Get",
+      "detailKeys": ["path", "from", "lines"]
+    },
    "whatsapp_login": {
      "emoji": "🟢",
      "title": "WhatsApp Login",
--- a/src/agents/tools/memory-tool.ts
+++ b/src/agents/tools/memory-tool.ts
@@ -0,0 +1,101 @@
+import { Type } from "@sinclair/typebox";
+
+import type { ClawdbotConfig } from "../../config/config.js";
+import { getMemorySearchManager } from "../../memory/index.js";
+import { resolveSessionAgentId } from "../agent-scope.js";
+import { resolveMemorySearchConfig } from "../memory-search.js";
+import type { AnyAgentTool } from "./common.js";
+import { jsonResult, readNumberParam, readStringParam } from "./common.js";
+
+const MemorySearchSchema = Type.Object({
+  query: Type.String(),
+  maxResults: Type.Optional(Type.Number()),
+  minScore: Type.Optional(Type.Number()),
+});
+
+const MemoryGetSchema = Type.Object({
+  path: Type.String(),
+  from: Type.Optional(Type.Number()),
+  lines: Type.Optional(Type.Number()),
+});
+
+export function createMemorySearchTool(options: {
+  config?: ClawdbotConfig;
+  agentSessionKey?: string;
+}): AnyAgentTool | null {
+  const cfg = options.config;
+  if (!cfg) return null;
+  const agentId = resolveSessionAgentId({
+    sessionKey: options.agentSessionKey,
+    config: cfg,
+  });
+  if (!resolveMemorySearchConfig(cfg, agentId)) return null;
+  return {
+    label: "Memory Search",
+    name: "memory_search",
+    description:
+      "Search agent memory files (MEMORY.md + memory/*.md) using semantic vectors.",
+    parameters: MemorySearchSchema,
+    execute: async (_toolCallId, params) => {
+      const query = readStringParam(params, "query", { required: true });
+      const maxResults = readNumberParam(params, "maxResults");
+      const minScore = readNumberParam(params, "minScore");
+      const { manager, error } = await getMemorySearchManager({
+        cfg,
+        agentId,
+      });
+      if (!manager) {
+        return jsonResult({ results: [], disabled: true, error });
+      }
+      const results = await manager.search(query, {
+        maxResults,
+        minScore,
+        sessionKey: options.agentSessionKey,
+      });
+      const status = manager.status();
+      return jsonResult({
+        results,
+        provider: status.provider,
+        model: status.model,
+        fallback: status.fallback,
+      });
+    },
+  };
+}
+
+export function createMemoryGetTool(options: {
+  config?: ClawdbotConfig;
+  agentSessionKey?: string;
+}): AnyAgentTool | null {
+  const cfg = options.config;
+  if (!cfg) return null;
+  const agentId = resolveSessionAgentId({
+    sessionKey: options.agentSessionKey,
+    config: cfg,
+  });
+  if (!resolveMemorySearchConfig(cfg, agentId)) return null;
+  return {
+    label: "Memory Get",
+    name: "memory_get",
+    description: "Read a memory file by path (workspace-relative).",
+    parameters: MemoryGetSchema,
+    execute: async (_toolCallId, params) => {
+      const relPath = readStringParam(params, "path", { required: true });
+      const from = readNumberParam(params, "from", { integer: true });
+      const lines = readNumberParam(params, "lines", { integer: true });
+      const { manager, error } = await getMemorySearchManager({
+        cfg,
+        agentId,
+      });
+      if (!manager) {
+        return jsonResult({ path: relPath, text: "", disabled: true, error });
+      }
+      const result = await manager.readFile({
+        relPath,
+        from: from ?? undefined,
+        lines: lines ?? undefined,
+      });
+      return jsonResult(result);
+    },
+  };
+}
--- a/src/auto-reply/reply.ts
+++ b/src/auto-reply/reply.ts
@@ -40,7 +40,6 @@ import {
 import { normalizeMainKey } from "../routing/session-key.js";
 import { defaultRuntime } from "../runtime.js";
 import { INTERNAL_MESSAGE_PROVIDER } from "../utils/message-provider.js";
-import { isReasoningTagProvider } from "../utils/provider-utils.js";
 import { resolveCommandAuthorization } from "./command-auth.js";
 import { hasControlCommand } from "./command-detection.js";
 import {
@@ -493,6 +492,15 @@ export async function getReplyFromConfig(
    modelAliases: configuredAliases,
    allowStatusDirective,
  });
+  const hasInlineStatus =
+    parsedDirectives.hasStatusDirective &&
+    parsedDirectives.cleaned.trim().length > 0;
+  if (hasInlineStatus) {
+    parsedDirectives = {
+      ...parsedDirectives,
+      hasStatusDirective: false,
+    };
+  }
  if (
    isGroup &&
    ctx.WasMentioned !== true &&
@@ -522,7 +530,6 @@ export async function getReplyFromConfig(
    if (noMentions.trim().length > 0) {
      const directiveOnlyCheck = parseInlineDirectives(noMentions, {
        modelAliases: configuredAliases,
-        allowStatusDirective,
      });
      if (directiveOnlyCheck.cleaned.trim().length > 0) {
        const allowInlineStatus =
@@ -698,11 +705,10 @@ export async function getReplyFromConfig(
    ? undefined
    : directives.rawModelDirective;

+  const inlineStatusRequested =
+    hasInlineStatus && allowTextCommands && command.isAuthorizedSender;
+
  if (!command.isAuthorizedSender) {
-    // Treat slash tokens as plain text for unauthorized senders.
-    cleanedBody = existingBody;
-    sessionCtx.Body = cleanedBody;
-    sessionCtx.BodyStripped = cleanedBody;
    directives = {
      ...directives,
      hasThinkDirective: false,
@@ -863,11 +869,7 @@ export async function getReplyFromConfig(
      cfg,
      agentId,
      isGroup,
-    }) &&
-    directives.hasStatusDirective &&
-    allowTextCommands &&
-    command.isAuthorizedSender &&
-    command.commandBodyNormalized !== "/status";
+    }) && inlineStatusRequested;
  if (handleInlineStatus) {
    const inlineStatusReply = await buildStatusReply({
      cfg,
@@ -1158,7 +1160,6 @@ export async function getReplyFromConfig(
    resolvedQueue.mode === "collect" ||
    resolvedQueue.mode === "steer-backlog";
  const authProfileId = sessionEntry?.authProfileOverride;
-
  const followupRun = {
    prompt: queuedBody,
    messageId: sessionCtx.MessageSid,
@@ -1197,7 +1198,7 @@ export async function getReplyFromConfig(
      ownerNumbers:
        command.ownerList.length > 0 ? command.ownerList : undefined,
      extraSystemPrompt: extraSystemPrompt || undefined,
-      ...(isReasoningTagProvider(provider) ? { enforceFinalTag: true } : {}),
+      ...(provider === "ollama" ? { enforceFinalTag: true } : {}),
    },
  };

--- a/src/cli/memory-cli.ts
+++ b/src/cli/memory-cli.ts
@@ -0,0 +1,124 @@
+import chalk from "chalk";
+import type { Command } from "commander";
+
+import { resolveDefaultAgentId } from "../agents/agent-scope.js";
+import { loadConfig } from "../config/config.js";
+import { getMemorySearchManager } from "../memory/index.js";
+import { defaultRuntime } from "../runtime.js";
+
+type MemoryCommandOptions = {
+  agent?: string;
+  json?: boolean;
+};
+
+function resolveAgent(cfg: ReturnType<typeof loadConfig>, agent?: string) {
+  const trimmed = agent?.trim();
+  if (trimmed) return trimmed;
+  return resolveDefaultAgentId(cfg);
+}
+
+export function registerMemoryCli(program: Command) {
+  const memory = program.command("memory").description("Memory search tools");
+
+  memory
+    .command("status")
+    .description("Show memory search index status")
+    .option("--agent <id>", "Agent id (default: default agent)")
+    .option("--json", "Print JSON")
+    .action(async (opts: MemoryCommandOptions) => {
+      const cfg = loadConfig();
+      const agentId = resolveAgent(cfg, opts.agent);
+      const { manager, error } = await getMemorySearchManager({ cfg, agentId });
+      if (!manager) {
+        defaultRuntime.log(error ?? "Memory search disabled.");
+        return;
+      }
+      const status = manager.status();
+      if (opts.json) {
+        defaultRuntime.log(JSON.stringify(status, null, 2));
+        return;
+      }
+      const lines = [
+        `${chalk.bold.cyan("Memory Search")} (${agentId})`,
+        `Provider: ${status.provider} (requested: ${status.requestedProvider})`,
+        status.fallback
+          ? chalk.yellow(`Fallback: ${status.fallback.from}`)
+          : null,
+        `Files: ${status.files}`,
+        `Chunks: ${status.chunks}`,
+        `Dirty: ${status.dirty ? "yes" : "no"}`,
+        `Index: ${status.dbPath}`,
+      ].filter(Boolean) as string[];
+      if (status.fallback?.reason) {
+        lines.push(chalk.gray(status.fallback.reason));
+      }
+      defaultRuntime.log(lines.join("\n"));
+    });
+
+  memory
+    .command("index")
+    .description("Reindex memory files")
+    .option("--agent <id>", "Agent id (default: default agent)")
+    .option("--force", "Force full reindex", false)
+    .action(async (opts: MemoryCommandOptions & { force?: boolean }) => {
+      const cfg = loadConfig();
+      const agentId = resolveAgent(cfg, opts.agent);
+      const { manager, error } = await getMemorySearchManager({ cfg, agentId });
+      if (!manager) {
+        defaultRuntime.log(error ?? "Memory search disabled.");
+        return;
+      }
+      await manager.sync({ reason: "cli", force: opts.force });
+      defaultRuntime.log("Memory index updated.");
+    });
+
+  memory
+    .command("search")
+    .description("Search memory files")
+    .argument("<query>", "Search query")
+    .option("--agent <id>", "Agent id (default: default agent)")
+    .option("--max-results <n>", "Max results", (v) => Number(v))
+    .option("--min-score <n>", "Minimum score", (v) => Number(v))
+    .option("--json", "Print JSON")
+    .action(
+      async (
+        query: string,
+        opts: MemoryCommandOptions & {
+          maxResults?: number;
+          minScore?: number;
+        },
+      ) => {
+        const cfg = loadConfig();
+        const agentId = resolveAgent(cfg, opts.agent);
+        const { manager, error } = await getMemorySearchManager({
+          cfg,
+          agentId,
+        });
+        if (!manager) {
+          defaultRuntime.log(error ?? "Memory search disabled.");
+          return;
+        }
+        const results = await manager.search(query, {
+          maxResults: opts.maxResults,
+          minScore: opts.minScore,
+        });
+        if (opts.json) {
+          defaultRuntime.log(JSON.stringify({ results }, null, 2));
+          return;
+        }
+        if (results.length === 0) {
+          defaultRuntime.log("No matches.");
+          return;
+        }
+        const lines: string[] = [];
+        for (const result of results) {
+          lines.push(
+            `${chalk.green(result.score.toFixed(3))} ${result.path}:${result.startLine}-${result.endLine}`,
+          );
+          lines.push(chalk.gray(result.snippet));
+          lines.push("");
+        }
+        defaultRuntime.log(lines.join("\n").trim());
+      },
+    );
+}
--- a/src/cli/program.ts
+++ b/src/cli/program.ts
@@ -50,6 +50,7 @@ import { registerDocsCli } from "./docs-cli.js";
 import { registerGatewayCli } from "./gateway-cli.js";
 import { registerHooksCli } from "./hooks-cli.js";
 import { registerLogsCli } from "./logs-cli.js";
+import { registerMemoryCli } from "./memory-cli.js";
 import { registerModelsCli } from "./models-cli.js";
 import { registerNodesCli } from "./nodes-cli.js";
 import { registerPairingCli } from "./pairing-cli.js";
@@ -1213,6 +1214,7 @@ ${theme.muted("Docs:")} ${formatDocsLink(
  registerDaemonCli(program);
  registerGatewayCli(program);
  registerLogsCli(program);
+  registerMemoryCli(program);
  registerModelsCli(program);
  registerNodesCli(program);
  registerSandboxCli(program);
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -115,6 +115,23 @@ const FIELD_LABELS: Record<string, string> = {
  "gateway.reload.mode": "Config Reload Mode",
  "gateway.reload.debounceMs": "Config Reload Debounce (ms)",
  "agents.defaults.workspace": "Workspace",
+  "agents.defaults.memorySearch": "Memory Search",
+  "agents.defaults.memorySearch.enabled": "Enable Memory Search",
+  "agents.defaults.memorySearch.provider": "Memory Search Provider",
+  "agents.defaults.memorySearch.model": "Memory Search Model",
+  "agents.defaults.memorySearch.fallback": "Memory Search Fallback",
+  "agents.defaults.memorySearch.local.modelPath": "Local Embedding Model Path",
+  "agents.defaults.memorySearch.store.path": "Memory Search Index Path",
+  "agents.defaults.memorySearch.chunking.tokens": "Memory Chunk Tokens",
+  "agents.defaults.memorySearch.chunking.overlap":
+    "Memory Chunk Overlap Tokens",
+  "agents.defaults.memorySearch.sync.onSessionStart": "Index on Session Start",
+  "agents.defaults.memorySearch.sync.onSearch": "Index on Search (Lazy)",
+  "agents.defaults.memorySearch.sync.watch": "Watch Memory Files",
+  "agents.defaults.memorySearch.sync.watchDebounceMs":
+    "Memory Watch Debounce (ms)",
+  "agents.defaults.memorySearch.query.maxResults": "Memory Search Max Results",
+  "agents.defaults.memorySearch.query.minScore": "Memory Search Min Score",
  "auth.profiles": "Auth Profiles",
  "auth.order": "Auth Profile Order",
  "auth.cooldowns.billingBackoffHours": "Billing Backoff (hours)",
@@ -215,6 +232,20 @@ const FIELD_HELP: Record<string, string> = {
    "Failure window (hours) for backoff counters (default: 24).",
  "agents.defaults.models":
    "Configured model catalog (keys are full provider/model IDs).",
+  "agents.defaults.memorySearch":
+    "Vector search over MEMORY.md and memory/*.md (per-agent overrides supported).",
+  "agents.defaults.memorySearch.provider":
+    'Embedding provider ("openai" or "local").',
+  "agents.defaults.memorySearch.local.modelPath":
+    "Local GGUF model path or hf: URI (node-llama-cpp).",
+  "agents.defaults.memorySearch.fallback":
+    'Fallback to OpenAI when local embeddings fail ("openai" or "none").',
+  "agents.defaults.memorySearch.store.path":
+    "SQLite index path (default: ~/.clawdbot/memory/{agentId}.sqlite).",
+  "agents.defaults.memorySearch.sync.onSearch":
+    "Lazy sync: reindex on first search after a change.",
+  "agents.defaults.memorySearch.sync.watch":
+    "Watch memory files for changes (chokidar).",
  "plugins.enabled": "Enable plugin/extension loading (default: true).",
  "plugins.allow":
    "Optional allowlist of plugin ids; when set, only listed plugins load.",
--- a/src/config/types.ts
+++ b/src/config/types.ts
@@ -996,6 +996,47 @@ export type AgentToolsConfig = {
  };
 };

+export type MemorySearchConfig = {
+  /** Enable vector memory search (default: true). */
+  enabled?: boolean;
+  /** Embedding provider mode. */
+  provider?: "openai" | "local";
+  /** Fallback behavior when local embeddings fail. */
+  fallback?: "openai" | "none";
+  /** Embedding model id (remote) or alias (local). */
+  model?: string;
+  /** Local embedding settings (node-llama-cpp). */
+  local?: {
+    /** GGUF model path or hf: URI. */
+    modelPath?: string;
+    /** Optional cache directory for local models. */
+    modelCacheDir?: string;
+  };
+  /** Index storage configuration. */
+  store?: {
+    driver?: "sqlite";
+    path?: string;
+  };
+  /** Chunking configuration. */
+  chunking?: {
+    tokens?: number;
+    overlap?: number;
+  };
+  /** Sync behavior. */
+  sync?: {
+    onSessionStart?: boolean;
+    onSearch?: boolean;
+    watch?: boolean;
+    watchDebounceMs?: number;
+    intervalMinutes?: number;
+  };
+  /** Query behavior. */
+  query?: {
+    maxResults?: number;
+    minScore?: number;
+  };
+};
+
 export type ToolsConfig = {
  allow?: string[];
  deny?: string[];
@@ -1070,6 +1111,7 @@ export type AgentConfig = {
  workspace?: string;
  agentDir?: string;
  model?: string;
+  memorySearch?: MemorySearchConfig;
  /** Human-like delay between block replies for this agent. */
  humanDelay?: HumanDelayConfig;
  identity?: IdentityConfig;
@@ -1534,6 +1576,8 @@ export type AgentDefaultsConfig = {
  contextPruning?: AgentContextPruningConfig;
  /** Compaction tuning and pre-compaction memory flush behavior. */
  compaction?: AgentCompactionConfig;
+  /** Vector memory search configuration (per-agent overrides supported). */
+  memorySearch?: MemorySearchConfig;
  /** Default thinking level when no /think directive is present. */
  thinkingDefault?: "off" | "minimal" | "low" | "medium" | "high";
  /** Default verbose level when no /verbose directive is present. */
--- a/src/config/zod-schema.ts
+++ b/src/config/zod-schema.ts
@@ -867,6 +867,48 @@ const AgentToolsSchema = z
  })
  .optional();

+const MemorySearchSchema = z
+  .object({
+    enabled: z.boolean().optional(),
+    provider: z.union([z.literal("openai"), z.literal("local")]).optional(),
+    fallback: z.union([z.literal("openai"), z.literal("none")]).optional(),
+    model: z.string().optional(),
+    local: z
+      .object({
+        modelPath: z.string().optional(),
+        modelCacheDir: z.string().optional(),
+      })
+      .optional(),
+    store: z
+      .object({
+        driver: z.literal("sqlite").optional(),
+        path: z.string().optional(),
+      })
+      .optional(),
+    chunking: z
+      .object({
+        tokens: z.number().int().positive().optional(),
+        overlap: z.number().int().nonnegative().optional(),
+      })
+      .optional(),
+    sync: z
+      .object({
+        onSessionStart: z.boolean().optional(),
+        onSearch: z.boolean().optional(),
+        watch: z.boolean().optional(),
+        watchDebounceMs: z.number().int().nonnegative().optional(),
+        intervalMinutes: z.number().int().nonnegative().optional(),
+      })
+      .optional(),
+    query: z
+      .object({
+        maxResults: z.number().int().positive().optional(),
+        minScore: z.number().min(0).max(1).optional(),
+      })
+      .optional(),
+  })
+  .optional();
+
 const AgentEntrySchema = z.object({
  id: z.string(),
  default: z.boolean().optional(),
@@ -874,6 +916,7 @@ const AgentEntrySchema = z.object({
  workspace: z.string().optional(),
  agentDir: z.string().optional(),
  model: z.string().optional(),
+  memorySearch: MemorySearchSchema,
  humanDelay: HumanDelaySchema.optional(),
  identity: IdentitySchema,
  groupChat: GroupChatSchema,
@@ -1098,6 +1141,7 @@ const AgentDefaultsSchema = z
    userTimezone: z.string().optional(),
    contextTokens: z.number().int().positive().optional(),
    cliBackends: z.record(z.string(), CliBackendSchema).optional(),
+    memorySearch: MemorySearchSchema,
    contextPruning: z
      .object({
        mode: z
--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@@ -238,22 +238,10 @@ function buildLiveGatewayConfig(params: {
  candidates: Array<Model<Api>>;
  providerOverrides?: Record<string, ModelProviderConfig>;
 }): ClawdbotConfig {
+  const providerOverrides = params.providerOverrides ?? {};
  const lmstudioProvider = params.cfg.models?.providers?.lmstudio;
  const baseProviders = params.cfg.models?.providers ?? {};
-  const nextProviders = params.providerOverrides
-    ? {
-        ...baseProviders,
-        ...(lmstudioProvider
-          ? {
-              lmstudio: {
-                ...lmstudioProvider,
-                api: "openai-completions",
-              },
-            }
-          : {}),
-        ...params.providerOverrides,
-      }
-    : {
+  const nextProviders = {
    ...baseProviders,
    ...(lmstudioProvider
      ? {
@@ -263,6 +251,7 @@ function buildLiveGatewayConfig(params: {
          },
        }
      : {}),
+    ...providerOverrides,
  };
  const providers =
    Object.keys(nextProviders).length > 0 ? nextProviders : baseProviders;
--- a/src/memory/embeddings.ts
+++ b/src/memory/embeddings.ts
@@ -0,0 +1,194 @@
+import type { Llama, LlamaEmbeddingContext, LlamaModel } from "node-llama-cpp";
+import { resolveApiKeyForProvider } from "../agents/model-auth.js";
+import type { ClawdbotConfig } from "../config/config.js";
+
+export type EmbeddingProvider = {
+  id: string;
+  model: string;
+  embedQuery: (text: string) => Promise<number[]>;
+  embedBatch: (texts: string[]) => Promise<number[][]>;
+};
+
+export type EmbeddingProviderResult = {
+  provider: EmbeddingProvider;
+  requestedProvider: "openai" | "local";
+  fallbackFrom?: "local";
+  fallbackReason?: string;
+};
+
+export type EmbeddingProviderOptions = {
+  config: ClawdbotConfig;
+  agentDir?: string;
+  provider: "openai" | "local";
+  model: string;
+  fallback: "openai" | "none";
+  local?: {
+    modelPath?: string;
+    modelCacheDir?: string;
+  };
+};
+
+const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
+const DEFAULT_LOCAL_MODEL =
+  "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
+
+function normalizeOpenAiModel(model: string): string {
+  const trimmed = model.trim();
+  if (!trimmed) return "text-embedding-3-small";
+  if (trimmed.startsWith("openai/")) return trimmed.slice("openai/".length);
+  return trimmed;
+}
+
+async function createOpenAiEmbeddingProvider(
+  options: EmbeddingProviderOptions,
+): Promise<EmbeddingProvider> {
+  const { apiKey } = await resolveApiKeyForProvider({
+    provider: "openai",
+    cfg: options.config,
+    agentDir: options.agentDir,
+  });
+
+  const providerConfig = options.config.models?.providers?.openai;
+  const baseUrl = providerConfig?.baseUrl?.trim() || DEFAULT_OPENAI_BASE_URL;
+  const url = `${baseUrl.replace(/\/$/, "")}/embeddings`;
+  const headerOverrides = providerConfig?.headers ?? {};
+  const headers: Record<string, string> = {
+    "Content-Type": "application/json",
+    Authorization: `Bearer ${apiKey}`,
+    ...headerOverrides,
+  };
+  const model = normalizeOpenAiModel(options.model);
+
+  const embed = async (input: string[]): Promise<number[][]> => {
+    if (input.length === 0) return [];
+    const res = await fetch(url, {
+      method: "POST",
+      headers,
+      body: JSON.stringify({ model, input }),
+    });
+    if (!res.ok) {
+      const text = await res.text();
+      throw new Error(`openai embeddings failed: ${res.status} ${text}`);
+    }
+    const payload = (await res.json()) as {
+      data?: Array<{ embedding?: number[] }>;
+    };
+    const data = payload.data ?? [];
+    return data.map((entry) => entry.embedding ?? []);
+  };
+
+  return {
+    id: "openai",
+    model,
+    embedQuery: async (text) => {
+      const [vec] = await embed([text]);
+      return vec ?? [];
+    },
+    embedBatch: embed,
+  };
+}
+
+async function createLocalEmbeddingProvider(
+  options: EmbeddingProviderOptions,
+): Promise<EmbeddingProvider> {
+  const modelPath = options.local?.modelPath?.trim() || DEFAULT_LOCAL_MODEL;
+  const modelCacheDir = options.local?.modelCacheDir?.trim();
+
+  // Lazy-load node-llama-cpp to keep startup light unless local is enabled.
+  const { getLlama, resolveModelFile, LlamaLogLevel } = await import(
+    "node-llama-cpp"
+  );
+
+  let llama: Llama | null = null;
+  let embeddingModel: LlamaModel | null = null;
+  let embeddingContext: LlamaEmbeddingContext | null = null;
+
+  const ensureContext = async () => {
+    if (!llama) {
+      llama = await getLlama({ logLevel: LlamaLogLevel.error });
+    }
+    if (!embeddingModel) {
+      const resolved = await resolveModelFile(
+        modelPath,
+        modelCacheDir || undefined,
+      );
+      embeddingModel = await llama.loadModel({ modelPath: resolved });
+    }
+    if (!embeddingContext) {
+      embeddingContext = await embeddingModel.createEmbeddingContext();
+    }
+    return embeddingContext;
+  };
+
+  return {
+    id: "local",
+    model: modelPath,
+    embedQuery: async (text) => {
+      const ctx = await ensureContext();
+      const embedding = await ctx.getEmbeddingFor(text);
+      return Array.from(embedding.vector) as number[];
+    },
+    embedBatch: async (texts) => {
+      const ctx = await ensureContext();
+      const embeddings = await Promise.all(
+        texts.map(async (text) => {
+          const embedding = await ctx.getEmbeddingFor(text);
+          return Array.from(embedding.vector) as number[];
+        }),
+      );
+      return embeddings;
+    },
+  };
+}
+
+export async function createEmbeddingProvider(
+  options: EmbeddingProviderOptions,
+): Promise<EmbeddingProviderResult> {
+  const requestedProvider = options.provider;
+  if (options.provider === "local") {
+    try {
+      const provider = await createLocalEmbeddingProvider(options);
+      return { provider, requestedProvider };
+    } catch (err) {
+      const reason = formatLocalSetupError(err);
+      if (options.fallback === "openai") {
+        try {
+          const provider = await createOpenAiEmbeddingProvider(options);
+          return {
+            provider,
+            requestedProvider,
+            fallbackFrom: "local",
+            fallbackReason: reason,
+          };
+        } catch (fallbackErr) {
+          throw new Error(
+            `${reason}\n\nFallback to OpenAI failed: ${formatError(fallbackErr)}`,
+          );
+        }
+      }
+      throw new Error(reason);
+    }
+  }
+  const provider = await createOpenAiEmbeddingProvider(options);
+  return { provider, requestedProvider };
+}
+
+function formatError(err: unknown): string {
+  if (err instanceof Error) return err.message;
+  return String(err);
+}
+
+function formatLocalSetupError(err: unknown): string {
+  const detail = formatError(err);
+  return [
+    "Local embeddings unavailable.",
+    detail ? `Reason: ${detail}` : undefined,
+    "To enable local embeddings:",
+    "1) pnpm approve-builds",
+    "2) select node-llama-cpp",
+    "3) pnpm rebuild node-llama-cpp",
+    'Or set agents.defaults.memorySearch.provider = "openai" (remote).',
+  ]
+    .filter(Boolean)
+    .join("\n");
+}
--- a/src/memory/index.test.ts
+++ b/src/memory/index.test.ts
@@ -0,0 +1,98 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+import { getMemorySearchManager } from "./index.js";
+
+vi.mock("./embeddings.js", () => {
+  const embedText = (text: string) => {
+    const lower = text.toLowerCase();
+    const alpha = lower.split("alpha").length - 1;
+    const beta = lower.split("beta").length - 1;
+    return [alpha, beta, 1];
+  };
+  return {
+    createEmbeddingProvider: async () => ({
+      requestedProvider: "openai",
+      provider: {
+        id: "mock",
+        model: "mock-embed",
+        embedQuery: async (text: string) => embedText(text),
+        embedBatch: async (texts: string[]) => texts.map(embedText),
+      },
+    }),
+  };
+});
+
+describe("memory index", () => {
+  let workspaceDir: string;
+  let indexPath: string;
+
+  beforeEach(async () => {
+    workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-mem-"));
+    indexPath = path.join(workspaceDir, "index.sqlite");
+    await fs.mkdir(path.join(workspaceDir, "memory"));
+    await fs.writeFile(
+      path.join(workspaceDir, "memory", "2026-01-12.md"),
+      "# Log\nAlpha memory line.\nAnother line.",
+    );
+    await fs.writeFile(
+      path.join(workspaceDir, "MEMORY.md"),
+      "Beta knowledge base entry.",
+    );
+  });
+
+  afterEach(async () => {
+    await fs.rm(workspaceDir, { recursive: true, force: true });
+  });
+
+  it("indexes memory files and searches by vector", async () => {
+    const cfg = {
+      agents: {
+        defaults: {
+          workspace: workspaceDir,
+          memorySearch: {
+            provider: "openai",
+            model: "mock-embed",
+            store: { path: indexPath },
+            sync: { watch: false, onSessionStart: false, onSearch: true },
+            query: { minScore: 0 },
+          },
+        },
+        list: [{ id: "main", default: true }],
+      },
+    };
+    const result = await getMemorySearchManager({ cfg, agentId: "main" });
+    expect(result.manager).not.toBeNull();
+    if (!result.manager) throw new Error("manager missing");
+    await result.manager.sync({ force: true });
+    const results = await result.manager.search("alpha");
+    expect(results.length).toBeGreaterThan(0);
+    expect(results[0]?.path).toContain("memory/2026-01-12.md");
+  });
+
+  it("rejects reading non-memory paths", async () => {
+    const cfg = {
+      agents: {
+        defaults: {
+          workspace: workspaceDir,
+          memorySearch: {
+            provider: "openai",
+            model: "mock-embed",
+            store: { path: indexPath },
+            sync: { watch: false, onSessionStart: false, onSearch: true },
+          },
+        },
+        list: [{ id: "main", default: true }],
+      },
+    };
+    const result = await getMemorySearchManager({ cfg, agentId: "main" });
+    expect(result.manager).not.toBeNull();
+    if (!result.manager) throw new Error("manager missing");
+    await expect(
+      result.manager.readFile({ relPath: "NOTES.md" }),
+    ).rejects.toThrow("path required");
+  });
+});
--- a/src/memory/index.ts
+++ b/src/memory/index.ts
@@ -0,0 +1,641 @@
+import crypto from "node:crypto";
+import fsSync from "node:fs";
+import fs from "node:fs/promises";
+import path from "node:path";
+
+import { DatabaseSync } from "node:sqlite";
+import chokidar, { type FSWatcher } from "chokidar";
+
+import {
+  resolveAgentDir,
+  resolveAgentWorkspaceDir,
+} from "../agents/agent-scope.js";
+import type { ResolvedMemorySearchConfig } from "../agents/memory-search.js";
+import { resolveMemorySearchConfig } from "../agents/memory-search.js";
+import type { ClawdbotConfig } from "../config/config.js";
+import { resolveUserPath, truncateUtf16Safe } from "../utils.js";
+import {
+  createEmbeddingProvider,
+  type EmbeddingProvider,
+  type EmbeddingProviderResult,
+} from "./embeddings.js";
+
+export type MemorySearchResult = {
+  path: string;
+  startLine: number;
+  endLine: number;
+  score: number;
+  snippet: string;
+};
+
+type MemoryFileEntry = {
+  path: string;
+  absPath: string;
+  mtimeMs: number;
+  size: number;
+  hash: string;
+};
+
+type MemoryChunk = {
+  startLine: number;
+  endLine: number;
+  text: string;
+  hash: string;
+};
+
+type MemoryIndexMeta = {
+  model: string;
+  provider: string;
+  chunkTokens: number;
+  chunkOverlap: number;
+};
+
+const META_KEY = "memory_index_meta_v1";
+const SNIPPET_MAX_CHARS = 700;
+
+const INDEX_CACHE = new Map<string, MemoryIndexManager>();
+
+export class MemoryIndexManager {
+  private readonly cfg: ClawdbotConfig;
+  private readonly agentId: string;
+  private readonly workspaceDir: string;
+  private readonly settings: ResolvedMemorySearchConfig;
+  private readonly provider: EmbeddingProvider;
+  private readonly requestedProvider: "openai" | "local";
+  private readonly fallbackReason?: string;
+  private readonly db: DatabaseSync;
+  private watcher: FSWatcher | null = null;
+  private watchTimer: NodeJS.Timeout | null = null;
+  private intervalTimer: NodeJS.Timeout | null = null;
+  private dirty = false;
+  private sessionWarm = new Set<string>();
+  private syncing: Promise<void> | null = null;
+
+  static async get(params: {
+    cfg: ClawdbotConfig;
+    agentId: string;
+  }): Promise<MemoryIndexManager | null> {
+    const { cfg, agentId } = params;
+    const settings = resolveMemorySearchConfig(cfg, agentId);
+    if (!settings) return null;
+    const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId);
+    const key = `${agentId}:${workspaceDir}:${JSON.stringify(settings)}`;
+    const existing = INDEX_CACHE.get(key);
+    if (existing) return existing;
+    const providerResult = await createEmbeddingProvider({
+      config: cfg,
+      agentDir: resolveAgentDir(cfg, agentId),
+      provider: settings.provider,
+      model: settings.model,
+      fallback: settings.fallback,
+      local: settings.local,
+    });
+    const manager = new MemoryIndexManager({
+      cfg,
+      agentId,
+      workspaceDir,
+      settings,
+      providerResult,
+    });
+    INDEX_CACHE.set(key, manager);
+    return manager;
+  }
+
+  private constructor(params: {
+    cfg: ClawdbotConfig;
+    agentId: string;
+    workspaceDir: string;
+    settings: ResolvedMemorySearchConfig;
+    providerResult: EmbeddingProviderResult;
+  }) {
+    this.cfg = params.cfg;
+    this.agentId = params.agentId;
+    this.workspaceDir = params.workspaceDir;
+    this.settings = params.settings;
+    this.provider = params.providerResult.provider;
+    this.requestedProvider = params.providerResult.requestedProvider;
+    this.fallbackReason = params.providerResult.fallbackReason;
+    this.db = this.openDatabase();
+    this.ensureSchema();
+    this.ensureWatcher();
+    this.ensureIntervalSync();
+    this.dirty = true;
+  }
+
+  async warmSession(sessionKey?: string): Promise<void> {
+    if (!this.settings.sync.onSessionStart) return;
+    const key = sessionKey?.trim() || "";
+    if (key && this.sessionWarm.has(key)) return;
+    await this.sync({ reason: "session-start" });
+    if (key) this.sessionWarm.add(key);
+  }
+
+  async search(
+    query: string,
+    opts?: {
+      maxResults?: number;
+      minScore?: number;
+      sessionKey?: string;
+    },
+  ): Promise<MemorySearchResult[]> {
+    await this.warmSession(opts?.sessionKey);
+    if (this.settings.sync.onSearch && this.dirty) {
+      await this.sync({ reason: "search" });
+    }
+    const cleaned = query.trim();
+    if (!cleaned) return [];
+    const queryVec = await this.provider.embedQuery(cleaned);
+    if (queryVec.length === 0) return [];
+    const candidates = this.listChunks();
+    const scored = candidates
+      .map((chunk) => ({
+        chunk,
+        score: cosineSimilarity(queryVec, chunk.embedding),
+      }))
+      .filter((entry) => Number.isFinite(entry.score));
+    const minScore = opts?.minScore ?? this.settings.query.minScore;
+    const maxResults = opts?.maxResults ?? this.settings.query.maxResults;
+    return scored
+      .filter((entry) => entry.score >= minScore)
+      .sort((a, b) => b.score - a.score)
+      .slice(0, maxResults)
+      .map((entry) => ({
+        path: entry.chunk.path,
+        startLine: entry.chunk.startLine,
+        endLine: entry.chunk.endLine,
+        score: entry.score,
+        snippet: truncateUtf16Safe(entry.chunk.text, SNIPPET_MAX_CHARS),
+      }));
+  }
+
+  async sync(params?: { reason?: string; force?: boolean }): Promise<void> {
+    if (this.syncing) return this.syncing;
+    this.syncing = this.runSync(params).finally(() => {
+      this.syncing = null;
+    });
+    return this.syncing;
+  }
+
+  async readFile(params: {
+    relPath: string;
+    from?: number;
+    lines?: number;
+  }): Promise<{ text: string; path: string }> {
+    const relPath = normalizeRelPath(params.relPath);
+    if (!relPath || !isMemoryPath(relPath)) {
+      throw new Error("path required");
+    }
+    const absPath = path.resolve(this.workspaceDir, relPath);
+    if (!absPath.startsWith(this.workspaceDir)) {
+      throw new Error("path escapes workspace");
+    }
+    const content = await fs.readFile(absPath, "utf-8");
+    if (!params.from && !params.lines) {
+      return { text: content, path: relPath };
+    }
+    const lines = content.split("\n");
+    const start = Math.max(1, params.from ?? 1);
+    const count = Math.max(1, params.lines ?? lines.length);
+    const slice = lines.slice(start - 1, start - 1 + count);
+    return { text: slice.join("\n"), path: relPath };
+  }
+
+  status(): {
+    files: number;
+    chunks: number;
+    dirty: boolean;
+    workspaceDir: string;
+    dbPath: string;
+    provider: string;
+    model: string;
+    requestedProvider: string;
+    fallback?: { from: string; reason?: string };
+  } {
+    const files = this.db.prepare(`SELECT COUNT(*) as c FROM files`).get() as {
+      c: number;
+    };
+    const chunks = this.db
+      .prepare(`SELECT COUNT(*) as c FROM chunks`)
+      .get() as {
+      c: number;
+    };
+    return {
+      files: files?.c ?? 0,
+      chunks: chunks?.c ?? 0,
+      dirty: this.dirty,
+      workspaceDir: this.workspaceDir,
+      dbPath: this.settings.store.path,
+      provider: this.provider.id,
+      model: this.provider.model,
+      requestedProvider: this.requestedProvider,
+      fallback: this.fallbackReason
+        ? { from: "local", reason: this.fallbackReason }
+        : undefined,
+    };
+  }
+
+  private openDatabase(): DatabaseSync {
+    const dbPath = resolveUserPath(this.settings.store.path);
+    const dir = path.dirname(dbPath);
+    ensureDir(dir);
+    return new DatabaseSync(dbPath);
+  }
+
+  private ensureSchema() {
+    this.db.exec(`
+      CREATE TABLE IF NOT EXISTS meta (
+        key TEXT PRIMARY KEY,
+        value TEXT NOT NULL
+      );
+    `);
+    this.db.exec(`
+      CREATE TABLE IF NOT EXISTS files (
+        path TEXT PRIMARY KEY,
+        hash TEXT NOT NULL,
+        mtime INTEGER NOT NULL,
+        size INTEGER NOT NULL
+      );
+    `);
+    this.db.exec(`
+      CREATE TABLE IF NOT EXISTS chunks (
+        id TEXT PRIMARY KEY,
+        path TEXT NOT NULL,
+        start_line INTEGER NOT NULL,
+        end_line INTEGER NOT NULL,
+        hash TEXT NOT NULL,
+        model TEXT NOT NULL,
+        text TEXT NOT NULL,
+        embedding TEXT NOT NULL,
+        updated_at INTEGER NOT NULL
+      );
+    `);
+    this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`);
+  }
+
+  private ensureWatcher() {
+    if (!this.settings.sync.watch || this.watcher) return;
+    const watchPaths = [
+      path.join(this.workspaceDir, "MEMORY.md"),
+      path.join(this.workspaceDir, "memory"),
+    ];
+    this.watcher = chokidar.watch(watchPaths, {
+      ignoreInitial: true,
+      awaitWriteFinish: {
+        stabilityThreshold: this.settings.sync.watchDebounceMs,
+        pollInterval: 100,
+      },
+    });
+    const markDirty = () => {
+      this.dirty = true;
+      this.scheduleWatchSync();
+    };
+    this.watcher.on("add", markDirty);
+    this.watcher.on("change", markDirty);
+    this.watcher.on("unlink", markDirty);
+  }
+
+  private ensureIntervalSync() {
+    const minutes = this.settings.sync.intervalMinutes;
+    if (!minutes || minutes <= 0 || this.intervalTimer) return;
+    const ms = minutes * 60 * 1000;
+    this.intervalTimer = setInterval(() => {
+      void this.sync({ reason: "interval" });
+    }, ms);
+  }
+
+  private scheduleWatchSync() {
+    if (!this.settings.sync.watch) return;
+    if (this.watchTimer) clearTimeout(this.watchTimer);
+    this.watchTimer = setTimeout(() => {
+      this.watchTimer = null;
+      void this.sync({ reason: "watch" });
+    }, this.settings.sync.watchDebounceMs);
+  }
+
+  private listChunks(): Array<{
+    path: string;
+    startLine: number;
+    endLine: number;
+    text: string;
+    embedding: number[];
+  }> {
+    const rows = this.db
+      .prepare(
+        `SELECT path, start_line, end_line, text, embedding FROM chunks WHERE model = ?`,
+      )
+      .all(this.provider.model) as Array<{
+      path: string;
+      start_line: number;
+      end_line: number;
+      text: string;
+      embedding: string;
+    }>;
+    return rows.map((row) => ({
+      path: row.path,
+      startLine: row.start_line,
+      endLine: row.end_line,
+      text: row.text,
+      embedding: parseEmbedding(row.embedding),
+    }));
+  }
+
+  private async runSync(params?: { reason?: string; force?: boolean }) {
+    const meta = this.readMeta();
+    const needsFullReindex =
+      params?.force ||
+      !meta ||
+      meta.model !== this.provider.model ||
+      meta.provider !== this.provider.id ||
+      meta.chunkTokens !== this.settings.chunking.tokens ||
+      meta.chunkOverlap !== this.settings.chunking.overlap;
+    if (needsFullReindex) {
+      this.resetIndex();
+    }
+
+    const files = await listMemoryFiles(this.workspaceDir);
+    const fileEntries = await Promise.all(
+      files.map(async (file) => buildFileEntry(file, this.workspaceDir)),
+    );
+    const activePaths = new Set(fileEntries.map((entry) => entry.path));
+
+    for (const entry of fileEntries) {
+      const record = this.db
+        .prepare(`SELECT hash FROM files WHERE path = ?`)
+        .get(entry.path) as { hash: string } | undefined;
+      if (!needsFullReindex && record?.hash === entry.hash) {
+        continue;
+      }
+      await this.indexFile(entry);
+    }
+
+    const staleRows = this.db.prepare(`SELECT path FROM files`).all() as Array<{
+      path: string;
+    }>;
+    for (const stale of staleRows) {
+      if (activePaths.has(stale.path)) continue;
+      this.db.prepare(`DELETE FROM files WHERE path = ?`).run(stale.path);
+      this.db.prepare(`DELETE FROM chunks WHERE path = ?`).run(stale.path);
+    }
+
+    this.writeMeta({
+      model: this.provider.model,
+      provider: this.provider.id,
+      chunkTokens: this.settings.chunking.tokens,
+      chunkOverlap: this.settings.chunking.overlap,
+    });
+    this.dirty = false;
+  }
+
+  private resetIndex() {
+    this.db.exec(`DELETE FROM files`);
+    this.db.exec(`DELETE FROM chunks`);
+  }
+
+  private readMeta(): MemoryIndexMeta | null {
+    const row = this.db
+      .prepare(`SELECT value FROM meta WHERE key = ?`)
+      .get(META_KEY) as { value: string } | undefined;
+    if (!row?.value) return null;
+    try {
+      return JSON.parse(row.value) as MemoryIndexMeta;
+    } catch {
+      return null;
+    }
+  }
+
+  private writeMeta(meta: MemoryIndexMeta) {
+    const value = JSON.stringify(meta);
+    this.db
+      .prepare(
+        `INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value`,
+      )
+      .run(META_KEY, value);
+  }
+
+  private async indexFile(entry: MemoryFileEntry) {
+    const content = await fs.readFile(entry.absPath, "utf-8");
+    const chunks = chunkMarkdown(content, this.settings.chunking);
+    const embeddings = await this.provider.embedBatch(
+      chunks.map((chunk) => chunk.text),
+    );
+    const now = Date.now();
+    this.db.prepare(`DELETE FROM chunks WHERE path = ?`).run(entry.path);
+    for (let i = 0; i < chunks.length; i++) {
+      const chunk = chunks[i];
+      const embedding = embeddings[i] ?? [];
+      const id = hashText(
+        `${entry.path}:${chunk.startLine}:${chunk.endLine}:${chunk.hash}:${this.provider.model}`,
+      );
+      this.db
+        .prepare(
+          `INSERT INTO chunks (id, path, start_line, end_line, hash, model, text, embedding, updated_at)
+           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+           ON CONFLICT(id) DO UPDATE SET
+             hash=excluded.hash,
+             model=excluded.model,
+             text=excluded.text,
+             embedding=excluded.embedding,
+             updated_at=excluded.updated_at`,
+        )
+        .run(
+          id,
+          entry.path,
+          chunk.startLine,
+          chunk.endLine,
+          chunk.hash,
+          this.provider.model,
+          chunk.text,
+          JSON.stringify(embedding),
+          now,
+        );
+    }
+    this.db
+      .prepare(
+        `INSERT INTO files (path, hash, mtime, size) VALUES (?, ?, ?, ?)
+         ON CONFLICT(path) DO UPDATE SET hash=excluded.hash, mtime=excluded.mtime, size=excluded.size`,
+      )
+      .run(entry.path, entry.hash, entry.mtimeMs, entry.size);
+  }
+}
+
+export type MemorySearchManagerResult = {
+  manager: MemoryIndexManager | null;
+  error?: string;
+};
+
+export async function getMemorySearchManager(params: {
+  cfg: ClawdbotConfig;
+  agentId: string;
+}): Promise<MemorySearchManagerResult> {
+  try {
+    const manager = await MemoryIndexManager.get(params);
+    return { manager };
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    return { manager: null, error: message };
+  }
+}
+
+function ensureDir(dir: string): string {
+  try {
+    fsSync.mkdirSync(dir, { recursive: true });
+  } catch {}
+  return dir;
+}
+
+function normalizeRelPath(value: string): string {
+  const trimmed = value.trim().replace(/^[./]+/, "");
+  return trimmed.replace(/\\/g, "/");
+}
+
+function isMemoryPath(relPath: string): boolean {
+  const normalized = normalizeRelPath(relPath);
+  if (!normalized) return false;
+  if (normalized === "MEMORY.md" || normalized === "memory.md") return true;
+  return normalized.startsWith("memory/");
+}
+
+async function listMemoryFiles(workspaceDir: string): Promise<string[]> {
+  const result: string[] = [];
+  const memoryFile = path.join(workspaceDir, "MEMORY.md");
+  const altMemoryFile = path.join(workspaceDir, "memory.md");
+  if (await exists(memoryFile)) result.push(memoryFile);
+  if (await exists(altMemoryFile)) result.push(altMemoryFile);
+  const memoryDir = path.join(workspaceDir, "memory");
+  if (await exists(memoryDir)) {
+    await walkDir(memoryDir, result);
+  }
+  return result;
+}
+
+async function walkDir(dir: string, files: string[]) {
+  const entries = await fs.readdir(dir, { withFileTypes: true });
+  for (const entry of entries) {
+    const full = path.join(dir, entry.name);
+    if (entry.isDirectory()) {
+      await walkDir(full, files);
+      continue;
+    }
+    if (!entry.isFile()) continue;
+    if (!entry.name.endsWith(".md")) continue;
+    files.push(full);
+  }
+}
+
+async function exists(filePath: string): Promise<boolean> {
+  try {
+    await fs.access(filePath);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+async function buildFileEntry(
+  absPath: string,
+  workspaceDir: string,
+): Promise<MemoryFileEntry> {
+  const stat = await fs.stat(absPath);
+  const content = await fs.readFile(absPath, "utf-8");
+  const hash = hashText(content);
+  return {
+    path: path.relative(workspaceDir, absPath).replace(/\\/g, "/"),
+    absPath,
+    mtimeMs: stat.mtimeMs,
+    size: stat.size,
+    hash,
+  };
+}
+
+function hashText(value: string): string {
+  return crypto.createHash("sha256").update(value).digest("hex");
+}
+
+function chunkMarkdown(
+  content: string,
+  chunking: { tokens: number; overlap: number },
+): MemoryChunk[] {
+  const lines = content.split("\n");
+  if (lines.length === 0) return [];
+  const maxChars = Math.max(32, chunking.tokens * 4);
+  const overlapChars = Math.max(0, chunking.overlap * 4);
+  const chunks: MemoryChunk[] = [];
+
+  let current: Array<{ line: string; lineNo: number }> = [];
+  let currentChars = 0;
+
+  const flush = () => {
+    if (current.length === 0) return;
+    const firstEntry = current[0];
+    const lastEntry = current[current.length - 1];
+    if (!firstEntry || !lastEntry) return;
+    const text = current.map((entry) => entry.line).join("\n");
+    const startLine = firstEntry.lineNo;
+    const endLine = lastEntry.lineNo;
+    chunks.push({
+      startLine,
+      endLine,
+      text,
+      hash: hashText(text),
+    });
+  };
+
+  const carryOverlap = () => {
+    if (overlapChars <= 0 || current.length === 0) {
+      current = [];
+      currentChars = 0;
+      return;
+    }
+    let acc = 0;
+    const kept: Array<{ line: string; lineNo: number }> = [];
+    for (let i = current.length - 1; i >= 0; i -= 1) {
+      const entry = current[i];
+      if (!entry) continue;
+      acc += entry.line.length + 1;
+      kept.unshift(entry);
+      if (acc >= overlapChars) break;
+    }
+    current = kept;
+    currentChars = kept.reduce((sum, entry) => sum + entry.line.length + 1, 0);
+  };
+
+  for (let i = 0; i < lines.length; i += 1) {
+    const line = lines[i] ?? "";
+    const lineNo = i + 1;
+    const lineSize = line.length + 1;
+    if (currentChars + lineSize > maxChars && current.length > 0) {
+      flush();
+      carryOverlap();
+    }
+    current.push({ line, lineNo });
+    currentChars += lineSize;
+  }
+  flush();
+  return chunks;
+}
+
+function parseEmbedding(raw: string): number[] {
+  try {
+    const parsed = JSON.parse(raw) as number[];
+    return Array.isArray(parsed) ? parsed : [];
+  } catch {
+    return [];
+  }
+}
+
+function cosineSimilarity(a: number[], b: number[]): number {
+  if (a.length === 0 || b.length === 0) return 0;
+  const len = Math.min(a.length, b.length);
+  let dot = 0;
+  let normA = 0;
+  let normB = 0;
+  for (let i = 0; i < len; i += 1) {
+    const av = a[i] ?? 0;
+    const bv = b[i] ?? 0;
+    dot += av * bv;
+    normA += av * av;
+    normB += bv * bv;
+  }
+  if (normA === 0 || normB === 0) return 0;
+  return dot / (Math.sqrt(normA) * Math.sqrt(normB));
+}