feat: add memory vector search
This commit is contained in:
60
CHANGELOG.md
60
CHANGELOG.md
@@ -1,37 +1,17 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
## 2026.1.12-1
|
## 2026.1.12
|
||||||
|
|
||||||
|
### Highlights
|
||||||
|
- Memory: add vector search for agent memories (Markdown-only scope) with SQLite index, chunking, lazy sync + file watch, and per-agent enablement/fallback.
|
||||||
|
|
||||||
### Changes
|
### Changes
|
||||||
- Heartbeat: raise default `ackMaxChars` to 300 so any `HEARTBEAT_OK` replies with short padding stay internal (fewer noisy heartbeat posts on providers).
|
- Memory: embedding providers support OpenAI or local `node-llama-cpp`; config adds defaults + per-agent overrides, provider/fallback metadata surfaced in tools/CLI.
|
||||||
- Onboarding: normalize API key inputs (strip `export KEY=...` wrappers) so shell-style entries paste cleanly.
|
- CLI/Tools: new `clawdbot memory` commands plus `memory_search`/`memory_get` tools returning snippets + line ranges and provider info.
|
||||||
|
- Runtime: memory index stored under `~/.clawdbot/memory/{agentId}.sqlite` with watch-on-by-default; inline status replies now stay auth-gated while inline prompts continue to the agent.
|
||||||
## 2026.1.11-5
|
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
- Auto-reply: prevent duplicate /status replies (including /usage alias) and add tests for inline + standalone cases.
|
- Auto-reply: inline `/status` now honors allowlists (authorized stripped + replied inline; unauthorized leaves text for the agent) to match command gating tests.
|
||||||
|
|
||||||
## 2026.1.11-4
|
|
||||||
|
|
||||||
### Fixes
|
|
||||||
- CLI: read the git commit hash from the package root so npm installs show it.
|
|
||||||
|
|
||||||
## 2026.1.11-3
|
|
||||||
|
|
||||||
### Fixes
|
|
||||||
- CLI: avoid top-level await warnings in the entrypoint on fresh installs.
|
|
||||||
- CLI: show a commit hash in the banner for npm installs (package.json gitHead fallback).
|
|
||||||
|
|
||||||
## 2026.1.11-2
|
|
||||||
|
|
||||||
### Fixes
|
|
||||||
- Installer: ensure the CLI entrypoint is executable after npm installs.
|
|
||||||
- Packaging: include `dist/plugins/` in the npm package to avoid missing module errors.
|
|
||||||
|
|
||||||
## 2026.1.11-1
|
|
||||||
|
|
||||||
### Fixes
|
|
||||||
- Installer: include `patches/` in the npm package so postinstall patching works for npm/bun installs.
|
|
||||||
|
|
||||||
## 2026.1.11
|
## 2026.1.11
|
||||||
|
|
||||||
@@ -42,9 +22,6 @@
|
|||||||
- Agents: automatic pre-compaction memory flush turn to store durable memories before compaction.
|
- Agents: automatic pre-compaction memory flush turn to store durable memories before compaction.
|
||||||
|
|
||||||
### Changes
|
### Changes
|
||||||
- Deps: update pi-agent-core/pi-ai/pi-coding-agent/pi-tui and refresh the pi-ai patch.
|
|
||||||
- Dev: bump @types/node.
|
|
||||||
- macOS: add wizard debug CLI and share wizard parsing helpers.
|
|
||||||
- CLI/Onboarding: simplify MiniMax auth choice to a single M2.1 option.
|
- CLI/Onboarding: simplify MiniMax auth choice to a single M2.1 option.
|
||||||
- CLI: configure section selection now loops until Continue.
|
- CLI: configure section selection now loops until Continue.
|
||||||
- Docs: explain MiniMax vs MiniMax Lightning (speed vs cost) and restore LM Studio example.
|
- Docs: explain MiniMax vs MiniMax Lightning (speed vs cost) and restore LM Studio example.
|
||||||
@@ -52,20 +29,16 @@
|
|||||||
- Onboarding/CLI: group model/auth choice by provider and label Z.AI as GLM 4.7.
|
- Onboarding/CLI: group model/auth choice by provider and label Z.AI as GLM 4.7.
|
||||||
- Onboarding/Docs: add Moonshot AI (Kimi K2) auth choice + config example.
|
- Onboarding/Docs: add Moonshot AI (Kimi K2) auth choice + config example.
|
||||||
- CLI/Onboarding: prompt to reuse detected API keys for Moonshot/MiniMax/Z.AI/Gemini/Anthropic/OpenCode.
|
- CLI/Onboarding: prompt to reuse detected API keys for Moonshot/MiniMax/Z.AI/Gemini/Anthropic/OpenCode.
|
||||||
- CLI/Onboarding: move MiniMax to the top of the provider list.
|
|
||||||
- CLI/Onboarding: add MiniMax M2.1 Lightning auth choice.
|
|
||||||
- CLI/Onboarding: show key previews when reusing detected API keys.
|
|
||||||
- Auto-reply: add compact `/model` picker (models + available providers) and show provider endpoints in `/model status`.
|
- Auto-reply: add compact `/model` picker (models + available providers) and show provider endpoints in `/model status`.
|
||||||
- Control UI: add Config tab model presets (MiniMax M2.1, GLM 4.7, Kimi) for one-click setup.
|
- Control UI: add Config tab model presets (MiniMax M2.1, GLM 4.7, Kimi) for one-click setup.
|
||||||
- Plugins: add extension loader (tools/RPC/CLI/services), discovery paths, and config schema + Control UI labels (uiHints).
|
- Plugins: add extension loader (tools/RPC/CLI/services), discovery paths, and config schema + Control UI labels (uiHints).
|
||||||
- Plugins: add `clawdbot plugins install` (path/tgz/npm), plus `list|info|enable|disable|doctor` UX.
|
- Plugins: add `clawdbot plugins install` (path/tgz/npm), plus `list|info|enable|disable|doctor` UX.
|
||||||
- Plugins: voice-call plugin now real (Twilio/log), adds start/status RPC/CLI/tool + tests.
|
- Plugins: voice-call plugin now real (Twilio/log), adds start/status RPC/CLI/tool + tests.
|
||||||
- Docs: add plugins doc + cross-links from tools/skills/gateway config.
|
- Docs: add plugins doc + cross-links from tools/skills/gateway config.
|
||||||
- Docs: clarify memory flush behavior + writable workspace requirement in Memory/Session/FAQ.
|
|
||||||
- Docs: add beginner-friendly plugin quick start + expand Voice Call plugin docs.
|
- Docs: add beginner-friendly plugin quick start + expand Voice Call plugin docs.
|
||||||
- Tests: add Docker plugin loader + tgz-install smoke test.
|
- Tests: add Docker plugin loader + tgz-install smoke test.
|
||||||
- Tests: extend Docker plugin E2E to cover installing from local folders (`plugins.load.paths`) and `file:` npm specs.
|
- Tests: extend Docker plugin E2E to cover installing from local folders (`plugins.load.paths`) and `file:` npm specs.
|
||||||
- Tests: add coverage for pre-compaction memory flush settings (including read-only/CLI skips).
|
- Tests: add coverage for pre-compaction memory flush settings.
|
||||||
- Tests: modernize live model smoke selection for current releases and enforce tools/images/thinking-high coverage. (#769) — thanks @steipete.
|
- Tests: modernize live model smoke selection for current releases and enforce tools/images/thinking-high coverage. (#769) — thanks @steipete.
|
||||||
- Agents/Tools: add `apply_patch` tool for multi-file edits (experimental; gated by tools.exec.applyPatch; OpenAI-only).
|
- Agents/Tools: add `apply_patch` tool for multi-file edits (experimental; gated by tools.exec.applyPatch; OpenAI-only).
|
||||||
- Agents/Tools: rename the bash tool to exec (config alias maintained). (#748) — thanks @myfunc.
|
- Agents/Tools: rename the bash tool to exec (config alias maintained). (#748) — thanks @myfunc.
|
||||||
@@ -92,17 +65,9 @@
|
|||||||
- Installer UX: add `--install-method git|npm` and auto-detect source checkouts (prompt to update git checkout vs migrate to npm).
|
- Installer UX: add `--install-method git|npm` and auto-detect source checkouts (prompt to update git checkout vs migrate to npm).
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
- Control UI: flatten nav into a single horizontal scroll row on tablet/mobile (and always show collapsed group items). (#771) — thanks @carlulsoe.
|
|
||||||
- macOS: start + await local gateway before onboarding wizard begins.
|
|
||||||
- macOS: cancel onboarding wizard on close, recover if the gateway drops the session, and time out stalled gateway connects.
|
|
||||||
- macOS: wizard debug CLI now surfaces error status instead of exiting as complete.
|
|
||||||
- Models/Onboarding: configure MiniMax (minimax.io) via Anthropic-compatible `/anthropic` endpoint by default (keep `minimax-api` as a legacy alias).
|
- Models/Onboarding: configure MiniMax (minimax.io) via Anthropic-compatible `/anthropic` endpoint by default (keep `minimax-api` as a legacy alias).
|
||||||
- Agents/Browser: cap Playwright AI snapshots for tool calls (maxChars); CLI snapshots remain full. (#763) — thanks @thesash.
|
|
||||||
- Models: normalize Gemini 3 Pro/Flash IDs to preview names for live model lookups. (#769) — thanks @steipete.
|
- Models: normalize Gemini 3 Pro/Flash IDs to preview names for live model lookups. (#769) — thanks @steipete.
|
||||||
- CLI: fix guardCancel typing for configure prompts. (#769) — thanks @steipete.
|
- CLI: fix guardCancel typing for configure prompts. (#769) — thanks @steipete.
|
||||||
- Providers: default groupPolicy to allowlist across providers and warn in doctor when groups are open.
|
|
||||||
- MS Teams: add groupPolicy/groupAllowFrom gating for group chats and warn when groups are open.
|
|
||||||
- Providers: strip tool call/result ids from Gemini CLI payloads to avoid API 400s. (#756)
|
|
||||||
- Gateway/WebChat: include handshake validation details in the WebSocket close reason for easier debugging; preserve close codes.
|
- Gateway/WebChat: include handshake validation details in the WebSocket close reason for easier debugging; preserve close codes.
|
||||||
- Gateway/Auth: send invalid connect responses before closing the handshake; stabilize invalid-connect auth test.
|
- Gateway/Auth: send invalid connect responses before closing the handshake; stabilize invalid-connect auth test.
|
||||||
- Gateway: tighten gateway listener detection.
|
- Gateway: tighten gateway listener detection.
|
||||||
@@ -114,23 +79,16 @@
|
|||||||
- Config: expand `~` in `CLAWDBOT_CONFIG_PATH` and common path-like config fields (including `plugins.load.paths`); guard invalid `$include` paths. (#731) — thanks @pasogott.
|
- Config: expand `~` in `CLAWDBOT_CONFIG_PATH` and common path-like config fields (including `plugins.load.paths`); guard invalid `$include` paths. (#731) — thanks @pasogott.
|
||||||
- Agents: stop pre-creating session transcripts so first user messages persist in JSONL history.
|
- Agents: stop pre-creating session transcripts so first user messages persist in JSONL history.
|
||||||
- Agents: skip pre-compaction memory flush when the session workspace is read-only.
|
- Agents: skip pre-compaction memory flush when the session workspace is read-only.
|
||||||
- Auto-reply: allow inline `/status` for allowlisted senders (stripped before the model); unauthorized senders see it as plain text.
|
|
||||||
- Auto-reply: include config-only allowlisted models in `/model` even when the catalog is partial.
|
|
||||||
- Auto-reply: allow fuzzy `/model` matches (e.g. `/model kimi` or `/model moonshot/kimi`) when unambiguous.
|
|
||||||
- Auto-reply: ignore inline `/status` directives unless the message is directive-only.
|
- Auto-reply: ignore inline `/status` directives unless the message is directive-only.
|
||||||
- CLI/Configure: enter the selected section immediately, then return to the section picker.
|
|
||||||
- CLI/Configure: apply the chosen auth model as default (skip the extra picker) and refresh the model catalog for new providers.
|
|
||||||
- Auto-reply: align `/think` default display with model reasoning defaults. (#751) — thanks @gabriel-trigo.
|
- Auto-reply: align `/think` default display with model reasoning defaults. (#751) — thanks @gabriel-trigo.
|
||||||
- Auto-reply: flush block reply buffers on tool boundaries. (#750) — thanks @sebslight.
|
- Auto-reply: flush block reply buffers on tool boundaries. (#750) — thanks @sebslight.
|
||||||
- Auto-reply: allow sender fallback for command authorization when `SenderId` is empty (WhatsApp self-chat). (#755) — thanks @juanpablodlc.
|
- Auto-reply: allow sender fallback for command authorization when `SenderId` is empty (WhatsApp self-chat). (#755) — thanks @juanpablodlc.
|
||||||
- Heartbeat: refresh prompt text for updated defaults.
|
- Heartbeat: refresh prompt text for updated defaults.
|
||||||
- Agents/Tools: use PowerShell on Windows to capture system utility output. (#748) — thanks @myfunc.
|
- Agents/Tools: use PowerShell on Windows to capture system utility output. (#748) — thanks @myfunc.
|
||||||
- Agents/Tools: normalize Claude Code-style read/write/edit params (file_path/old_string/new_string) and keep sandbox guards in place. (#768) — thanks @hsrvc.
|
|
||||||
- Docker: tolerate unset optional env vars in docker-setup.sh under strict mode. (#725) — thanks @petradonka.
|
- Docker: tolerate unset optional env vars in docker-setup.sh under strict mode. (#725) — thanks @petradonka.
|
||||||
- CLI/Update: preserve base environment when passing overrides to update subprocesses. (#713) — thanks @danielz1z.
|
- CLI/Update: preserve base environment when passing overrides to update subprocesses. (#713) — thanks @danielz1z.
|
||||||
- Agents: treat message tool errors as failures so fallback replies still send; require `to` + `message` for `action=send`. (#717) — thanks @theglove44.
|
- Agents: treat message tool errors as failures so fallback replies still send; require `to` + `message` for `action=send`. (#717) — thanks @theglove44.
|
||||||
- Agents: preserve reasoning items on tool-only turns.
|
- Agents: preserve reasoning items on tool-only turns.
|
||||||
- Agents: enforce `<final>` gating for reasoning-tag providers to prevent tag/reasoning leaks. (#754) — thanks @mcinteerj.
|
|
||||||
- Agents/Subagents: wait for completion before announcing, align wait timeout with run timeout, and make announce prompts more emphatic.
|
- Agents/Subagents: wait for completion before announcing, align wait timeout with run timeout, and make announce prompts more emphatic.
|
||||||
- Agents: route subagent transcripts to the target agent sessions directory and add regression coverage. (#708) — thanks @xMikeMickelson.
|
- Agents: route subagent transcripts to the target agent sessions directory and add regression coverage. (#708) — thanks @xMikeMickelson.
|
||||||
- Agents/Tools: preserve action enums when flattening tool schemas. (#708) — thanks @xMikeMickelson.
|
- Agents/Tools: preserve action enums when flattening tool schemas. (#708) — thanks @xMikeMickelson.
|
||||||
|
|||||||
@@ -70,6 +70,10 @@ clawdbot [--dev] [--profile <name>] <command>
|
|||||||
enable
|
enable
|
||||||
disable
|
disable
|
||||||
doctor
|
doctor
|
||||||
|
memory
|
||||||
|
status
|
||||||
|
index
|
||||||
|
search
|
||||||
message
|
message
|
||||||
agent
|
agent
|
||||||
agents
|
agents
|
||||||
@@ -188,6 +192,14 @@ Manage extensions and their config:
|
|||||||
|
|
||||||
Most plugin changes require a gateway restart. See [/plugin](/plugin).
|
Most plugin changes require a gateway restart. See [/plugin](/plugin).
|
||||||
|
|
||||||
|
## Memory
|
||||||
|
|
||||||
|
Vector search over `MEMORY.md` + `memory/*.md`:
|
||||||
|
|
||||||
|
- `clawdbot memory status` — show index stats.
|
||||||
|
- `clawdbot memory index` — reindex memory files.
|
||||||
|
- `clawdbot memory search "<query>"` — semantic search over memory.
|
||||||
|
|
||||||
## Chat slash commands
|
## Chat slash commands
|
||||||
|
|
||||||
Chat messages support `/...` commands (text and native). See [/tools/slash-commands](/tools/slash-commands).
|
Chat messages support `/...` commands (text and native). See [/tools/slash-commands](/tools/slash-commands).
|
||||||
|
|||||||
@@ -67,3 +67,38 @@ Details:
|
|||||||
|
|
||||||
For the full compaction lifecycle, see
|
For the full compaction lifecycle, see
|
||||||
[Session management + compaction](/reference/session-management-compaction).
|
[Session management + compaction](/reference/session-management-compaction).
|
||||||
|
|
||||||
|
## Vector memory search
|
||||||
|
|
||||||
|
Clawdbot can build a small vector index over `MEMORY.md` and `memory/*.md` so
|
||||||
|
semantic queries can find related notes even when wording differs.
|
||||||
|
|
||||||
|
Defaults:
|
||||||
|
- Enabled by default.
|
||||||
|
- Watches memory files for changes (debounced).
|
||||||
|
- Uses remote embeddings (OpenAI) unless configured for local.
|
||||||
|
- Local mode uses node-llama-cpp and may require `pnpm approve-builds`.
|
||||||
|
|
||||||
|
Config example:
|
||||||
|
|
||||||
|
```json5
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "text-embedding-3-small",
|
||||||
|
fallback: "openai",
|
||||||
|
sync: { watch: true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Tools:
|
||||||
|
- `memory_search` — returns snippets with file + line ranges.
|
||||||
|
- `memory_get` — read memory file content by path.
|
||||||
|
|
||||||
|
Local mode:
|
||||||
|
- Set `agents.defaults.memorySearch.provider = "local"`.
|
||||||
|
- Provide `agents.defaults.memorySearch.local.modelPath` (GGUF or `hf:` URI).
|
||||||
|
- Optional: set `agents.defaults.memorySearch.fallback = "none"` to avoid remote fallback.
|
||||||
|
|||||||
127
docs/refactor/vector-memory.md
Normal file
127
docs/refactor/vector-memory.md
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
---
|
||||||
|
summary: "Vector memory search design plan (per-agent, watch/lazy sync, storage)"
|
||||||
|
read_when:
|
||||||
|
- Designing or implementing vector memory search
|
||||||
|
- Adding embedding providers or sync behavior
|
||||||
|
---
|
||||||
|
|
||||||
|
# Vector Memory Search — Design Plan
|
||||||
|
|
||||||
|
Goal: semantic search over **agent memory files** only, with minimal deps and
|
||||||
|
good UX defaults. Default enabled. Per-agent overrides.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
- Sources: `MEMORY.md` + `memory/YYYY-MM-DD.md` inside the agent workspace.
|
||||||
|
- No indexing outside the workspace. No hidden paths.
|
||||||
|
- No QMD-style query expansion or rerank in v1.
|
||||||
|
|
||||||
|
## Config Shape
|
||||||
|
Location: `agents.defaults.memorySearch` + `agents.list[].memorySearch`.
|
||||||
|
|
||||||
|
```json5
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
enabled: true,
|
||||||
|
provider: "openai", // "openai" | "local"
|
||||||
|
fallback: "openai", // "openai" | "none"
|
||||||
|
model: "text-embedding-3-small",
|
||||||
|
store: {
|
||||||
|
driver: "sqlite",
|
||||||
|
path: "~/.clawdbot/memory/{agentId}.sqlite"
|
||||||
|
},
|
||||||
|
chunking: {
|
||||||
|
tokens: 400,
|
||||||
|
overlap: 80
|
||||||
|
},
|
||||||
|
sync: {
|
||||||
|
onSessionStart: true,
|
||||||
|
onSearch: true, // LazySync
|
||||||
|
watch: true, // default on
|
||||||
|
watchDebounceMs: 1500,
|
||||||
|
intervalMinutes: 0
|
||||||
|
},
|
||||||
|
query: {
|
||||||
|
maxResults: 6,
|
||||||
|
minScore: 0.35
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
list: [
|
||||||
|
{ id: "peter", memorySearch: { provider: "local", sync: { watch: false } } }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Storage
|
||||||
|
Per-agent DB (default): `~/.clawdbot/memory/{agentId}.sqlite`.
|
||||||
|
|
||||||
|
Tables (v1):
|
||||||
|
- `files(path PRIMARY KEY, hash, mtime, size)`
|
||||||
|
- `chunks(id PRIMARY KEY, path, start_line, end_line, hash, text, embedding, updated_at)`
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- `hash` = content hash of chunk text.
|
||||||
|
- `embedding` stored as float[] (sqlite vec extension optional); if not using vec,
|
||||||
|
store as JSON and do linear scan in memory for small corpora.
|
||||||
|
|
||||||
|
## Embedding Providers
|
||||||
|
Interface (core):
|
||||||
|
- `embedQuery(text): number[]`
|
||||||
|
- `embedBatch(texts[]): number[][]`
|
||||||
|
|
||||||
|
Providers:
|
||||||
|
- `openai` (default): OpenAI embeddings via existing keys.
|
||||||
|
- `local` (optional): node-llama-cpp (GGUF).
|
||||||
|
- Fallback: when `provider: "local"` fails, fallback to OpenAI unless `fallback: "none"`.
|
||||||
|
|
||||||
|
## Index Pipeline
|
||||||
|
1) Resolve memory file list (workspace only).
|
||||||
|
2) Read file, compute file hash/mtime.
|
||||||
|
3) Chunk by headings + token cap (overlap).
|
||||||
|
4) Embed only changed chunks (hash compare).
|
||||||
|
5) Upsert `chunks` rows, prune deleted files.
|
||||||
|
|
||||||
|
Chunking:
|
||||||
|
- Prefer heading-aware splits.
|
||||||
|
- Max tokens + overlap; keep line ranges for snippets.
|
||||||
|
|
||||||
|
## Sync Strategy
|
||||||
|
Default: **watch + lazy + session-start**
|
||||||
|
- `watch`: chokidar on `MEMORY.md` + `memory/**/*.md` (debounced).
|
||||||
|
- `onSearch`: if dirty, sync before search (LazySync).
|
||||||
|
- `onSessionStart`: warm index once per session.
|
||||||
|
- `intervalMinutes`: optional for long-lived sessions.
|
||||||
|
|
||||||
|
If workspace access is read-only or missing: disable writes; return “not indexed”.
|
||||||
|
|
||||||
|
## Query Flow
|
||||||
|
1) Embed query.
|
||||||
|
2) Cosine similarity over all chunk embeddings.
|
||||||
|
3) Return top K with `{path, startLine, endLine, snippet, score}`.
|
||||||
|
4) Model may call `memory_get` when full context needed.
|
||||||
|
|
||||||
|
Optional v2: add FTS5 + RRF merge (FTS + vector) for quality.
|
||||||
|
|
||||||
|
## Tool + CLI
|
||||||
|
Tools:
|
||||||
|
- `memory_search { query, maxResults?, minScore? }`
|
||||||
|
- `memory_get { path, from?, lines? }`
|
||||||
|
|
||||||
|
CLI (optional):
|
||||||
|
- `clawdbot memory index|search|status`
|
||||||
|
|
||||||
|
## Security + Permissions
|
||||||
|
- Indexer reads only memory files in workspace.
|
||||||
|
- No scanning outside workspace; no “sneak” reads.
|
||||||
|
- Respect sandbox `workspaceAccess` (ro = read-only; none = disabled).
|
||||||
|
|
||||||
|
## Tests
|
||||||
|
- Chunking boundaries + line ranges.
|
||||||
|
- Hash-based incremental updates.
|
||||||
|
- Search ranking (cosine).
|
||||||
|
- Watcher debounce (fake fs).
|
||||||
|
|
||||||
|
## Rollout
|
||||||
|
- Default enabled; if no memory files, index is empty (silent).
|
||||||
|
- No migration needed.
|
||||||
@@ -160,6 +160,7 @@
|
|||||||
"json5": "^2.2.3",
|
"json5": "^2.2.3",
|
||||||
"long": "5.3.2",
|
"long": "5.3.2",
|
||||||
"markdown-it": "^14.1.0",
|
"markdown-it": "^14.1.0",
|
||||||
|
"node-llama-cpp": "3.14.5",
|
||||||
"osc-progress": "^0.2.0",
|
"osc-progress": "^0.2.0",
|
||||||
"playwright-core": "1.57.0",
|
"playwright-core": "1.57.0",
|
||||||
"proper-lockfile": "^4.1.2",
|
"proper-lockfile": "^4.1.2",
|
||||||
|
|||||||
1222
pnpm-lock.yaml
generated
1222
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -22,6 +22,7 @@ type ResolvedAgentConfig = {
|
|||||||
workspace?: string;
|
workspace?: string;
|
||||||
agentDir?: string;
|
agentDir?: string;
|
||||||
model?: string;
|
model?: string;
|
||||||
|
memorySearch?: AgentEntry["memorySearch"];
|
||||||
humanDelay?: AgentEntry["humanDelay"];
|
humanDelay?: AgentEntry["humanDelay"];
|
||||||
identity?: AgentEntry["identity"];
|
identity?: AgentEntry["identity"];
|
||||||
groupChat?: AgentEntry["groupChat"];
|
groupChat?: AgentEntry["groupChat"];
|
||||||
@@ -95,6 +96,7 @@ export function resolveAgentConfig(
|
|||||||
typeof entry.workspace === "string" ? entry.workspace : undefined,
|
typeof entry.workspace === "string" ? entry.workspace : undefined,
|
||||||
agentDir: typeof entry.agentDir === "string" ? entry.agentDir : undefined,
|
agentDir: typeof entry.agentDir === "string" ? entry.agentDir : undefined,
|
||||||
model: typeof entry.model === "string" ? entry.model : undefined,
|
model: typeof entry.model === "string" ? entry.model : undefined,
|
||||||
|
memorySearch: entry.memorySearch,
|
||||||
humanDelay: entry.humanDelay,
|
humanDelay: entry.humanDelay,
|
||||||
identity: entry.identity,
|
identity: entry.identity,
|
||||||
groupChat: entry.groupChat,
|
groupChat: entry.groupChat,
|
||||||
|
|||||||
@@ -9,6 +9,10 @@ import type { AnyAgentTool } from "./tools/common.js";
|
|||||||
import { createCronTool } from "./tools/cron-tool.js";
|
import { createCronTool } from "./tools/cron-tool.js";
|
||||||
import { createGatewayTool } from "./tools/gateway-tool.js";
|
import { createGatewayTool } from "./tools/gateway-tool.js";
|
||||||
import { createImageTool } from "./tools/image-tool.js";
|
import { createImageTool } from "./tools/image-tool.js";
|
||||||
|
import {
|
||||||
|
createMemoryGetTool,
|
||||||
|
createMemorySearchTool,
|
||||||
|
} from "./tools/memory-tool.js";
|
||||||
import { createMessageTool } from "./tools/message-tool.js";
|
import { createMessageTool } from "./tools/message-tool.js";
|
||||||
import { createNodesTool } from "./tools/nodes-tool.js";
|
import { createNodesTool } from "./tools/nodes-tool.js";
|
||||||
import { createSessionStatusTool } from "./tools/session-status-tool.js";
|
import { createSessionStatusTool } from "./tools/session-status-tool.js";
|
||||||
@@ -43,6 +47,14 @@ export function createClawdbotTools(options?: {
|
|||||||
config: options?.config,
|
config: options?.config,
|
||||||
agentDir: options?.agentDir,
|
agentDir: options?.agentDir,
|
||||||
});
|
});
|
||||||
|
const memorySearchTool = createMemorySearchTool({
|
||||||
|
config: options?.config,
|
||||||
|
agentSessionKey: options?.agentSessionKey,
|
||||||
|
});
|
||||||
|
const memoryGetTool = createMemoryGetTool({
|
||||||
|
config: options?.config,
|
||||||
|
agentSessionKey: options?.agentSessionKey,
|
||||||
|
});
|
||||||
const tools: AnyAgentTool[] = [
|
const tools: AnyAgentTool[] = [
|
||||||
createBrowserTool({
|
createBrowserTool({
|
||||||
defaultControlUrl: options?.browserControlUrl,
|
defaultControlUrl: options?.browserControlUrl,
|
||||||
@@ -89,6 +101,9 @@ export function createClawdbotTools(options?: {
|
|||||||
agentSessionKey: options?.agentSessionKey,
|
agentSessionKey: options?.agentSessionKey,
|
||||||
config: options?.config,
|
config: options?.config,
|
||||||
}),
|
}),
|
||||||
|
...(memorySearchTool && memoryGetTool
|
||||||
|
? [memorySearchTool, memoryGetTool]
|
||||||
|
: []),
|
||||||
...(imageTool ? [imageTool] : []),
|
...(imageTool ? [imageTool] : []),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|||||||
56
src/agents/memory-search.test.ts
Normal file
56
src/agents/memory-search.test.ts
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import { resolveMemorySearchConfig } from "./memory-search.js";
|
||||||
|
|
||||||
|
describe("memory search config", () => {
|
||||||
|
it("returns null when disabled", () => {
|
||||||
|
const cfg = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: { enabled: true },
|
||||||
|
},
|
||||||
|
list: [
|
||||||
|
{
|
||||||
|
id: "main",
|
||||||
|
default: true,
|
||||||
|
memorySearch: { enabled: false },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||||
|
expect(resolved).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("merges defaults and overrides", () => {
|
||||||
|
const cfg = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "text-embedding-3-small",
|
||||||
|
chunking: { tokens: 500, overlap: 100 },
|
||||||
|
query: { maxResults: 4, minScore: 0.2 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
list: [
|
||||||
|
{
|
||||||
|
id: "main",
|
||||||
|
default: true,
|
||||||
|
memorySearch: {
|
||||||
|
chunking: { tokens: 320 },
|
||||||
|
query: { maxResults: 8 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||||
|
expect(resolved?.provider).toBe("openai");
|
||||||
|
expect(resolved?.model).toBe("text-embedding-3-small");
|
||||||
|
expect(resolved?.chunking.tokens).toBe(320);
|
||||||
|
expect(resolved?.chunking.overlap).toBe(100);
|
||||||
|
expect(resolved?.query.maxResults).toBe(8);
|
||||||
|
expect(resolved?.query.minScore).toBe(0.2);
|
||||||
|
});
|
||||||
|
});
|
||||||
134
src/agents/memory-search.ts
Normal file
134
src/agents/memory-search.ts
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
|
||||||
|
import type { ClawdbotConfig, MemorySearchConfig } from "../config/config.js";
|
||||||
|
import { resolveStateDir } from "../config/paths.js";
|
||||||
|
import { resolveUserPath } from "../utils.js";
|
||||||
|
import { resolveAgentConfig } from "./agent-scope.js";
|
||||||
|
|
||||||
|
export type ResolvedMemorySearchConfig = {
|
||||||
|
enabled: boolean;
|
||||||
|
provider: "openai" | "local";
|
||||||
|
fallback: "openai" | "none";
|
||||||
|
model: string;
|
||||||
|
local: {
|
||||||
|
modelPath?: string;
|
||||||
|
modelCacheDir?: string;
|
||||||
|
};
|
||||||
|
store: {
|
||||||
|
driver: "sqlite";
|
||||||
|
path: string;
|
||||||
|
};
|
||||||
|
chunking: {
|
||||||
|
tokens: number;
|
||||||
|
overlap: number;
|
||||||
|
};
|
||||||
|
sync: {
|
||||||
|
onSessionStart: boolean;
|
||||||
|
onSearch: boolean;
|
||||||
|
watch: boolean;
|
||||||
|
watchDebounceMs: number;
|
||||||
|
intervalMinutes: number;
|
||||||
|
};
|
||||||
|
query: {
|
||||||
|
maxResults: number;
|
||||||
|
minScore: number;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const DEFAULT_MODEL = "text-embedding-3-small";
|
||||||
|
const DEFAULT_CHUNK_TOKENS = 400;
|
||||||
|
const DEFAULT_CHUNK_OVERLAP = 80;
|
||||||
|
const DEFAULT_WATCH_DEBOUNCE_MS = 1500;
|
||||||
|
const DEFAULT_MAX_RESULTS = 6;
|
||||||
|
const DEFAULT_MIN_SCORE = 0.35;
|
||||||
|
|
||||||
|
function resolveStorePath(agentId: string, raw?: string): string {
|
||||||
|
const stateDir = resolveStateDir(process.env, os.homedir);
|
||||||
|
const fallback = path.join(stateDir, "memory", `${agentId}.sqlite`);
|
||||||
|
if (!raw) return fallback;
|
||||||
|
const withToken = raw.includes("{agentId}")
|
||||||
|
? raw.replaceAll("{agentId}", agentId)
|
||||||
|
: raw;
|
||||||
|
return resolveUserPath(withToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
function mergeConfig(
|
||||||
|
defaults: MemorySearchConfig | undefined,
|
||||||
|
overrides: MemorySearchConfig | undefined,
|
||||||
|
agentId: string,
|
||||||
|
): ResolvedMemorySearchConfig {
|
||||||
|
const enabled = overrides?.enabled ?? defaults?.enabled ?? true;
|
||||||
|
const provider = overrides?.provider ?? defaults?.provider ?? "openai";
|
||||||
|
const fallback = overrides?.fallback ?? defaults?.fallback ?? "openai";
|
||||||
|
const model = overrides?.model ?? defaults?.model ?? DEFAULT_MODEL;
|
||||||
|
const local = {
|
||||||
|
modelPath: overrides?.local?.modelPath ?? defaults?.local?.modelPath,
|
||||||
|
modelCacheDir:
|
||||||
|
overrides?.local?.modelCacheDir ?? defaults?.local?.modelCacheDir,
|
||||||
|
};
|
||||||
|
const store = {
|
||||||
|
driver: overrides?.store?.driver ?? defaults?.store?.driver ?? "sqlite",
|
||||||
|
path: resolveStorePath(
|
||||||
|
agentId,
|
||||||
|
overrides?.store?.path ?? defaults?.store?.path,
|
||||||
|
),
|
||||||
|
};
|
||||||
|
const chunking = {
|
||||||
|
tokens:
|
||||||
|
overrides?.chunking?.tokens ??
|
||||||
|
defaults?.chunking?.tokens ??
|
||||||
|
DEFAULT_CHUNK_TOKENS,
|
||||||
|
overlap:
|
||||||
|
overrides?.chunking?.overlap ??
|
||||||
|
defaults?.chunking?.overlap ??
|
||||||
|
DEFAULT_CHUNK_OVERLAP,
|
||||||
|
};
|
||||||
|
const sync = {
|
||||||
|
onSessionStart:
|
||||||
|
overrides?.sync?.onSessionStart ?? defaults?.sync?.onSessionStart ?? true,
|
||||||
|
onSearch: overrides?.sync?.onSearch ?? defaults?.sync?.onSearch ?? true,
|
||||||
|
watch: overrides?.sync?.watch ?? defaults?.sync?.watch ?? true,
|
||||||
|
watchDebounceMs:
|
||||||
|
overrides?.sync?.watchDebounceMs ??
|
||||||
|
defaults?.sync?.watchDebounceMs ??
|
||||||
|
DEFAULT_WATCH_DEBOUNCE_MS,
|
||||||
|
intervalMinutes:
|
||||||
|
overrides?.sync?.intervalMinutes ?? defaults?.sync?.intervalMinutes ?? 0,
|
||||||
|
};
|
||||||
|
const query = {
|
||||||
|
maxResults:
|
||||||
|
overrides?.query?.maxResults ??
|
||||||
|
defaults?.query?.maxResults ??
|
||||||
|
DEFAULT_MAX_RESULTS,
|
||||||
|
minScore:
|
||||||
|
overrides?.query?.minScore ??
|
||||||
|
defaults?.query?.minScore ??
|
||||||
|
DEFAULT_MIN_SCORE,
|
||||||
|
};
|
||||||
|
|
||||||
|
const overlap = Math.max(0, Math.min(chunking.overlap, chunking.tokens - 1));
|
||||||
|
const minScore = Math.max(0, Math.min(1, query.minScore));
|
||||||
|
return {
|
||||||
|
enabled,
|
||||||
|
provider,
|
||||||
|
fallback,
|
||||||
|
model,
|
||||||
|
local,
|
||||||
|
store,
|
||||||
|
chunking: { tokens: Math.max(1, chunking.tokens), overlap },
|
||||||
|
sync,
|
||||||
|
query: { ...query, minScore },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function resolveMemorySearchConfig(
|
||||||
|
cfg: ClawdbotConfig,
|
||||||
|
agentId: string,
|
||||||
|
): ResolvedMemorySearchConfig | null {
|
||||||
|
const defaults = cfg.agents?.defaults?.memorySearch;
|
||||||
|
const overrides = resolveAgentConfig(cfg, agentId)?.memorySearch;
|
||||||
|
const resolved = mergeConfig(defaults, overrides, agentId);
|
||||||
|
if (!resolved.enabled) return null;
|
||||||
|
return resolved;
|
||||||
|
}
|
||||||
@@ -222,6 +222,16 @@
|
|||||||
"title": "Session Status",
|
"title": "Session Status",
|
||||||
"detailKeys": ["sessionKey", "model"]
|
"detailKeys": ["sessionKey", "model"]
|
||||||
},
|
},
|
||||||
|
"memory_search": {
|
||||||
|
"emoji": "🧠",
|
||||||
|
"title": "Memory Search",
|
||||||
|
"detailKeys": ["query"]
|
||||||
|
},
|
||||||
|
"memory_get": {
|
||||||
|
"emoji": "📓",
|
||||||
|
"title": "Memory Get",
|
||||||
|
"detailKeys": ["path", "from", "lines"]
|
||||||
|
},
|
||||||
"whatsapp_login": {
|
"whatsapp_login": {
|
||||||
"emoji": "🟢",
|
"emoji": "🟢",
|
||||||
"title": "WhatsApp Login",
|
"title": "WhatsApp Login",
|
||||||
|
|||||||
101
src/agents/tools/memory-tool.ts
Normal file
101
src/agents/tools/memory-tool.ts
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
import { Type } from "@sinclair/typebox";
|
||||||
|
|
||||||
|
import type { ClawdbotConfig } from "../../config/config.js";
|
||||||
|
import { getMemorySearchManager } from "../../memory/index.js";
|
||||||
|
import { resolveSessionAgentId } from "../agent-scope.js";
|
||||||
|
import { resolveMemorySearchConfig } from "../memory-search.js";
|
||||||
|
import type { AnyAgentTool } from "./common.js";
|
||||||
|
import { jsonResult, readNumberParam, readStringParam } from "./common.js";
|
||||||
|
|
||||||
|
const MemorySearchSchema = Type.Object({
|
||||||
|
query: Type.String(),
|
||||||
|
maxResults: Type.Optional(Type.Number()),
|
||||||
|
minScore: Type.Optional(Type.Number()),
|
||||||
|
});
|
||||||
|
|
||||||
|
const MemoryGetSchema = Type.Object({
|
||||||
|
path: Type.String(),
|
||||||
|
from: Type.Optional(Type.Number()),
|
||||||
|
lines: Type.Optional(Type.Number()),
|
||||||
|
});
|
||||||
|
|
||||||
|
export function createMemorySearchTool(options: {
|
||||||
|
config?: ClawdbotConfig;
|
||||||
|
agentSessionKey?: string;
|
||||||
|
}): AnyAgentTool | null {
|
||||||
|
const cfg = options.config;
|
||||||
|
if (!cfg) return null;
|
||||||
|
const agentId = resolveSessionAgentId({
|
||||||
|
sessionKey: options.agentSessionKey,
|
||||||
|
config: cfg,
|
||||||
|
});
|
||||||
|
if (!resolveMemorySearchConfig(cfg, agentId)) return null;
|
||||||
|
return {
|
||||||
|
label: "Memory Search",
|
||||||
|
name: "memory_search",
|
||||||
|
description:
|
||||||
|
"Search agent memory files (MEMORY.md + memory/*.md) using semantic vectors.",
|
||||||
|
parameters: MemorySearchSchema,
|
||||||
|
execute: async (_toolCallId, params) => {
|
||||||
|
const query = readStringParam(params, "query", { required: true });
|
||||||
|
const maxResults = readNumberParam(params, "maxResults");
|
||||||
|
const minScore = readNumberParam(params, "minScore");
|
||||||
|
const { manager, error } = await getMemorySearchManager({
|
||||||
|
cfg,
|
||||||
|
agentId,
|
||||||
|
});
|
||||||
|
if (!manager) {
|
||||||
|
return jsonResult({ results: [], disabled: true, error });
|
||||||
|
}
|
||||||
|
const results = await manager.search(query, {
|
||||||
|
maxResults,
|
||||||
|
minScore,
|
||||||
|
sessionKey: options.agentSessionKey,
|
||||||
|
});
|
||||||
|
const status = manager.status();
|
||||||
|
return jsonResult({
|
||||||
|
results,
|
||||||
|
provider: status.provider,
|
||||||
|
model: status.model,
|
||||||
|
fallback: status.fallback,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createMemoryGetTool(options: {
|
||||||
|
config?: ClawdbotConfig;
|
||||||
|
agentSessionKey?: string;
|
||||||
|
}): AnyAgentTool | null {
|
||||||
|
const cfg = options.config;
|
||||||
|
if (!cfg) return null;
|
||||||
|
const agentId = resolveSessionAgentId({
|
||||||
|
sessionKey: options.agentSessionKey,
|
||||||
|
config: cfg,
|
||||||
|
});
|
||||||
|
if (!resolveMemorySearchConfig(cfg, agentId)) return null;
|
||||||
|
return {
|
||||||
|
label: "Memory Get",
|
||||||
|
name: "memory_get",
|
||||||
|
description: "Read a memory file by path (workspace-relative).",
|
||||||
|
parameters: MemoryGetSchema,
|
||||||
|
execute: async (_toolCallId, params) => {
|
||||||
|
const relPath = readStringParam(params, "path", { required: true });
|
||||||
|
const from = readNumberParam(params, "from", { integer: true });
|
||||||
|
const lines = readNumberParam(params, "lines", { integer: true });
|
||||||
|
const { manager, error } = await getMemorySearchManager({
|
||||||
|
cfg,
|
||||||
|
agentId,
|
||||||
|
});
|
||||||
|
if (!manager) {
|
||||||
|
return jsonResult({ path: relPath, text: "", disabled: true, error });
|
||||||
|
}
|
||||||
|
const result = await manager.readFile({
|
||||||
|
relPath,
|
||||||
|
from: from ?? undefined,
|
||||||
|
lines: lines ?? undefined,
|
||||||
|
});
|
||||||
|
return jsonResult(result);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -40,7 +40,6 @@ import {
|
|||||||
import { normalizeMainKey } from "../routing/session-key.js";
|
import { normalizeMainKey } from "../routing/session-key.js";
|
||||||
import { defaultRuntime } from "../runtime.js";
|
import { defaultRuntime } from "../runtime.js";
|
||||||
import { INTERNAL_MESSAGE_PROVIDER } from "../utils/message-provider.js";
|
import { INTERNAL_MESSAGE_PROVIDER } from "../utils/message-provider.js";
|
||||||
import { isReasoningTagProvider } from "../utils/provider-utils.js";
|
|
||||||
import { resolveCommandAuthorization } from "./command-auth.js";
|
import { resolveCommandAuthorization } from "./command-auth.js";
|
||||||
import { hasControlCommand } from "./command-detection.js";
|
import { hasControlCommand } from "./command-detection.js";
|
||||||
import {
|
import {
|
||||||
@@ -493,6 +492,15 @@ export async function getReplyFromConfig(
|
|||||||
modelAliases: configuredAliases,
|
modelAliases: configuredAliases,
|
||||||
allowStatusDirective,
|
allowStatusDirective,
|
||||||
});
|
});
|
||||||
|
const hasInlineStatus =
|
||||||
|
parsedDirectives.hasStatusDirective &&
|
||||||
|
parsedDirectives.cleaned.trim().length > 0;
|
||||||
|
if (hasInlineStatus) {
|
||||||
|
parsedDirectives = {
|
||||||
|
...parsedDirectives,
|
||||||
|
hasStatusDirective: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
if (
|
if (
|
||||||
isGroup &&
|
isGroup &&
|
||||||
ctx.WasMentioned !== true &&
|
ctx.WasMentioned !== true &&
|
||||||
@@ -522,7 +530,6 @@ export async function getReplyFromConfig(
|
|||||||
if (noMentions.trim().length > 0) {
|
if (noMentions.trim().length > 0) {
|
||||||
const directiveOnlyCheck = parseInlineDirectives(noMentions, {
|
const directiveOnlyCheck = parseInlineDirectives(noMentions, {
|
||||||
modelAliases: configuredAliases,
|
modelAliases: configuredAliases,
|
||||||
allowStatusDirective,
|
|
||||||
});
|
});
|
||||||
if (directiveOnlyCheck.cleaned.trim().length > 0) {
|
if (directiveOnlyCheck.cleaned.trim().length > 0) {
|
||||||
const allowInlineStatus =
|
const allowInlineStatus =
|
||||||
@@ -698,11 +705,10 @@ export async function getReplyFromConfig(
|
|||||||
? undefined
|
? undefined
|
||||||
: directives.rawModelDirective;
|
: directives.rawModelDirective;
|
||||||
|
|
||||||
|
const inlineStatusRequested =
|
||||||
|
hasInlineStatus && allowTextCommands && command.isAuthorizedSender;
|
||||||
|
|
||||||
if (!command.isAuthorizedSender) {
|
if (!command.isAuthorizedSender) {
|
||||||
// Treat slash tokens as plain text for unauthorized senders.
|
|
||||||
cleanedBody = existingBody;
|
|
||||||
sessionCtx.Body = cleanedBody;
|
|
||||||
sessionCtx.BodyStripped = cleanedBody;
|
|
||||||
directives = {
|
directives = {
|
||||||
...directives,
|
...directives,
|
||||||
hasThinkDirective: false,
|
hasThinkDirective: false,
|
||||||
@@ -863,11 +869,7 @@ export async function getReplyFromConfig(
|
|||||||
cfg,
|
cfg,
|
||||||
agentId,
|
agentId,
|
||||||
isGroup,
|
isGroup,
|
||||||
}) &&
|
}) && inlineStatusRequested;
|
||||||
directives.hasStatusDirective &&
|
|
||||||
allowTextCommands &&
|
|
||||||
command.isAuthorizedSender &&
|
|
||||||
command.commandBodyNormalized !== "/status";
|
|
||||||
if (handleInlineStatus) {
|
if (handleInlineStatus) {
|
||||||
const inlineStatusReply = await buildStatusReply({
|
const inlineStatusReply = await buildStatusReply({
|
||||||
cfg,
|
cfg,
|
||||||
@@ -1158,7 +1160,6 @@ export async function getReplyFromConfig(
|
|||||||
resolvedQueue.mode === "collect" ||
|
resolvedQueue.mode === "collect" ||
|
||||||
resolvedQueue.mode === "steer-backlog";
|
resolvedQueue.mode === "steer-backlog";
|
||||||
const authProfileId = sessionEntry?.authProfileOverride;
|
const authProfileId = sessionEntry?.authProfileOverride;
|
||||||
|
|
||||||
const followupRun = {
|
const followupRun = {
|
||||||
prompt: queuedBody,
|
prompt: queuedBody,
|
||||||
messageId: sessionCtx.MessageSid,
|
messageId: sessionCtx.MessageSid,
|
||||||
@@ -1197,7 +1198,7 @@ export async function getReplyFromConfig(
|
|||||||
ownerNumbers:
|
ownerNumbers:
|
||||||
command.ownerList.length > 0 ? command.ownerList : undefined,
|
command.ownerList.length > 0 ? command.ownerList : undefined,
|
||||||
extraSystemPrompt: extraSystemPrompt || undefined,
|
extraSystemPrompt: extraSystemPrompt || undefined,
|
||||||
...(isReasoningTagProvider(provider) ? { enforceFinalTag: true } : {}),
|
...(provider === "ollama" ? { enforceFinalTag: true } : {}),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
124
src/cli/memory-cli.ts
Normal file
124
src/cli/memory-cli.ts
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
import chalk from "chalk";
|
||||||
|
import type { Command } from "commander";
|
||||||
|
|
||||||
|
import { resolveDefaultAgentId } from "../agents/agent-scope.js";
|
||||||
|
import { loadConfig } from "../config/config.js";
|
||||||
|
import { getMemorySearchManager } from "../memory/index.js";
|
||||||
|
import { defaultRuntime } from "../runtime.js";
|
||||||
|
|
||||||
|
type MemoryCommandOptions = {
|
||||||
|
agent?: string;
|
||||||
|
json?: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
function resolveAgent(cfg: ReturnType<typeof loadConfig>, agent?: string) {
|
||||||
|
const trimmed = agent?.trim();
|
||||||
|
if (trimmed) return trimmed;
|
||||||
|
return resolveDefaultAgentId(cfg);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function registerMemoryCli(program: Command) {
|
||||||
|
const memory = program.command("memory").description("Memory search tools");
|
||||||
|
|
||||||
|
memory
|
||||||
|
.command("status")
|
||||||
|
.description("Show memory search index status")
|
||||||
|
.option("--agent <id>", "Agent id (default: default agent)")
|
||||||
|
.option("--json", "Print JSON")
|
||||||
|
.action(async (opts: MemoryCommandOptions) => {
|
||||||
|
const cfg = loadConfig();
|
||||||
|
const agentId = resolveAgent(cfg, opts.agent);
|
||||||
|
const { manager, error } = await getMemorySearchManager({ cfg, agentId });
|
||||||
|
if (!manager) {
|
||||||
|
defaultRuntime.log(error ?? "Memory search disabled.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const status = manager.status();
|
||||||
|
if (opts.json) {
|
||||||
|
defaultRuntime.log(JSON.stringify(status, null, 2));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const lines = [
|
||||||
|
`${chalk.bold.cyan("Memory Search")} (${agentId})`,
|
||||||
|
`Provider: ${status.provider} (requested: ${status.requestedProvider})`,
|
||||||
|
status.fallback
|
||||||
|
? chalk.yellow(`Fallback: ${status.fallback.from}`)
|
||||||
|
: null,
|
||||||
|
`Files: ${status.files}`,
|
||||||
|
`Chunks: ${status.chunks}`,
|
||||||
|
`Dirty: ${status.dirty ? "yes" : "no"}`,
|
||||||
|
`Index: ${status.dbPath}`,
|
||||||
|
].filter(Boolean) as string[];
|
||||||
|
if (status.fallback?.reason) {
|
||||||
|
lines.push(chalk.gray(status.fallback.reason));
|
||||||
|
}
|
||||||
|
defaultRuntime.log(lines.join("\n"));
|
||||||
|
});
|
||||||
|
|
||||||
|
memory
|
||||||
|
.command("index")
|
||||||
|
.description("Reindex memory files")
|
||||||
|
.option("--agent <id>", "Agent id (default: default agent)")
|
||||||
|
.option("--force", "Force full reindex", false)
|
||||||
|
.action(async (opts: MemoryCommandOptions & { force?: boolean }) => {
|
||||||
|
const cfg = loadConfig();
|
||||||
|
const agentId = resolveAgent(cfg, opts.agent);
|
||||||
|
const { manager, error } = await getMemorySearchManager({ cfg, agentId });
|
||||||
|
if (!manager) {
|
||||||
|
defaultRuntime.log(error ?? "Memory search disabled.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
await manager.sync({ reason: "cli", force: opts.force });
|
||||||
|
defaultRuntime.log("Memory index updated.");
|
||||||
|
});
|
||||||
|
|
||||||
|
memory
|
||||||
|
.command("search")
|
||||||
|
.description("Search memory files")
|
||||||
|
.argument("<query>", "Search query")
|
||||||
|
.option("--agent <id>", "Agent id (default: default agent)")
|
||||||
|
.option("--max-results <n>", "Max results", (v) => Number(v))
|
||||||
|
.option("--min-score <n>", "Minimum score", (v) => Number(v))
|
||||||
|
.option("--json", "Print JSON")
|
||||||
|
.action(
|
||||||
|
async (
|
||||||
|
query: string,
|
||||||
|
opts: MemoryCommandOptions & {
|
||||||
|
maxResults?: number;
|
||||||
|
minScore?: number;
|
||||||
|
},
|
||||||
|
) => {
|
||||||
|
const cfg = loadConfig();
|
||||||
|
const agentId = resolveAgent(cfg, opts.agent);
|
||||||
|
const { manager, error } = await getMemorySearchManager({
|
||||||
|
cfg,
|
||||||
|
agentId,
|
||||||
|
});
|
||||||
|
if (!manager) {
|
||||||
|
defaultRuntime.log(error ?? "Memory search disabled.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const results = await manager.search(query, {
|
||||||
|
maxResults: opts.maxResults,
|
||||||
|
minScore: opts.minScore,
|
||||||
|
});
|
||||||
|
if (opts.json) {
|
||||||
|
defaultRuntime.log(JSON.stringify({ results }, null, 2));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (results.length === 0) {
|
||||||
|
defaultRuntime.log("No matches.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const lines: string[] = [];
|
||||||
|
for (const result of results) {
|
||||||
|
lines.push(
|
||||||
|
`${chalk.green(result.score.toFixed(3))} ${result.path}:${result.startLine}-${result.endLine}`,
|
||||||
|
);
|
||||||
|
lines.push(chalk.gray(result.snippet));
|
||||||
|
lines.push("");
|
||||||
|
}
|
||||||
|
defaultRuntime.log(lines.join("\n").trim());
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -50,6 +50,7 @@ import { registerDocsCli } from "./docs-cli.js";
|
|||||||
import { registerGatewayCli } from "./gateway-cli.js";
|
import { registerGatewayCli } from "./gateway-cli.js";
|
||||||
import { registerHooksCli } from "./hooks-cli.js";
|
import { registerHooksCli } from "./hooks-cli.js";
|
||||||
import { registerLogsCli } from "./logs-cli.js";
|
import { registerLogsCli } from "./logs-cli.js";
|
||||||
|
import { registerMemoryCli } from "./memory-cli.js";
|
||||||
import { registerModelsCli } from "./models-cli.js";
|
import { registerModelsCli } from "./models-cli.js";
|
||||||
import { registerNodesCli } from "./nodes-cli.js";
|
import { registerNodesCli } from "./nodes-cli.js";
|
||||||
import { registerPairingCli } from "./pairing-cli.js";
|
import { registerPairingCli } from "./pairing-cli.js";
|
||||||
@@ -1213,6 +1214,7 @@ ${theme.muted("Docs:")} ${formatDocsLink(
|
|||||||
registerDaemonCli(program);
|
registerDaemonCli(program);
|
||||||
registerGatewayCli(program);
|
registerGatewayCli(program);
|
||||||
registerLogsCli(program);
|
registerLogsCli(program);
|
||||||
|
registerMemoryCli(program);
|
||||||
registerModelsCli(program);
|
registerModelsCli(program);
|
||||||
registerNodesCli(program);
|
registerNodesCli(program);
|
||||||
registerSandboxCli(program);
|
registerSandboxCli(program);
|
||||||
|
|||||||
@@ -115,6 +115,23 @@ const FIELD_LABELS: Record<string, string> = {
|
|||||||
"gateway.reload.mode": "Config Reload Mode",
|
"gateway.reload.mode": "Config Reload Mode",
|
||||||
"gateway.reload.debounceMs": "Config Reload Debounce (ms)",
|
"gateway.reload.debounceMs": "Config Reload Debounce (ms)",
|
||||||
"agents.defaults.workspace": "Workspace",
|
"agents.defaults.workspace": "Workspace",
|
||||||
|
"agents.defaults.memorySearch": "Memory Search",
|
||||||
|
"agents.defaults.memorySearch.enabled": "Enable Memory Search",
|
||||||
|
"agents.defaults.memorySearch.provider": "Memory Search Provider",
|
||||||
|
"agents.defaults.memorySearch.model": "Memory Search Model",
|
||||||
|
"agents.defaults.memorySearch.fallback": "Memory Search Fallback",
|
||||||
|
"agents.defaults.memorySearch.local.modelPath": "Local Embedding Model Path",
|
||||||
|
"agents.defaults.memorySearch.store.path": "Memory Search Index Path",
|
||||||
|
"agents.defaults.memorySearch.chunking.tokens": "Memory Chunk Tokens",
|
||||||
|
"agents.defaults.memorySearch.chunking.overlap":
|
||||||
|
"Memory Chunk Overlap Tokens",
|
||||||
|
"agents.defaults.memorySearch.sync.onSessionStart": "Index on Session Start",
|
||||||
|
"agents.defaults.memorySearch.sync.onSearch": "Index on Search (Lazy)",
|
||||||
|
"agents.defaults.memorySearch.sync.watch": "Watch Memory Files",
|
||||||
|
"agents.defaults.memorySearch.sync.watchDebounceMs":
|
||||||
|
"Memory Watch Debounce (ms)",
|
||||||
|
"agents.defaults.memorySearch.query.maxResults": "Memory Search Max Results",
|
||||||
|
"agents.defaults.memorySearch.query.minScore": "Memory Search Min Score",
|
||||||
"auth.profiles": "Auth Profiles",
|
"auth.profiles": "Auth Profiles",
|
||||||
"auth.order": "Auth Profile Order",
|
"auth.order": "Auth Profile Order",
|
||||||
"auth.cooldowns.billingBackoffHours": "Billing Backoff (hours)",
|
"auth.cooldowns.billingBackoffHours": "Billing Backoff (hours)",
|
||||||
@@ -215,6 +232,20 @@ const FIELD_HELP: Record<string, string> = {
|
|||||||
"Failure window (hours) for backoff counters (default: 24).",
|
"Failure window (hours) for backoff counters (default: 24).",
|
||||||
"agents.defaults.models":
|
"agents.defaults.models":
|
||||||
"Configured model catalog (keys are full provider/model IDs).",
|
"Configured model catalog (keys are full provider/model IDs).",
|
||||||
|
"agents.defaults.memorySearch":
|
||||||
|
"Vector search over MEMORY.md and memory/*.md (per-agent overrides supported).",
|
||||||
|
"agents.defaults.memorySearch.provider":
|
||||||
|
'Embedding provider ("openai" or "local").',
|
||||||
|
"agents.defaults.memorySearch.local.modelPath":
|
||||||
|
"Local GGUF model path or hf: URI (node-llama-cpp).",
|
||||||
|
"agents.defaults.memorySearch.fallback":
|
||||||
|
'Fallback to OpenAI when local embeddings fail ("openai" or "none").',
|
||||||
|
"agents.defaults.memorySearch.store.path":
|
||||||
|
"SQLite index path (default: ~/.clawdbot/memory/{agentId}.sqlite).",
|
||||||
|
"agents.defaults.memorySearch.sync.onSearch":
|
||||||
|
"Lazy sync: reindex on first search after a change.",
|
||||||
|
"agents.defaults.memorySearch.sync.watch":
|
||||||
|
"Watch memory files for changes (chokidar).",
|
||||||
"plugins.enabled": "Enable plugin/extension loading (default: true).",
|
"plugins.enabled": "Enable plugin/extension loading (default: true).",
|
||||||
"plugins.allow":
|
"plugins.allow":
|
||||||
"Optional allowlist of plugin ids; when set, only listed plugins load.",
|
"Optional allowlist of plugin ids; when set, only listed plugins load.",
|
||||||
|
|||||||
@@ -996,6 +996,47 @@ export type AgentToolsConfig = {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type MemorySearchConfig = {
|
||||||
|
/** Enable vector memory search (default: true). */
|
||||||
|
enabled?: boolean;
|
||||||
|
/** Embedding provider mode. */
|
||||||
|
provider?: "openai" | "local";
|
||||||
|
/** Fallback behavior when local embeddings fail. */
|
||||||
|
fallback?: "openai" | "none";
|
||||||
|
/** Embedding model id (remote) or alias (local). */
|
||||||
|
model?: string;
|
||||||
|
/** Local embedding settings (node-llama-cpp). */
|
||||||
|
local?: {
|
||||||
|
/** GGUF model path or hf: URI. */
|
||||||
|
modelPath?: string;
|
||||||
|
/** Optional cache directory for local models. */
|
||||||
|
modelCacheDir?: string;
|
||||||
|
};
|
||||||
|
/** Index storage configuration. */
|
||||||
|
store?: {
|
||||||
|
driver?: "sqlite";
|
||||||
|
path?: string;
|
||||||
|
};
|
||||||
|
/** Chunking configuration. */
|
||||||
|
chunking?: {
|
||||||
|
tokens?: number;
|
||||||
|
overlap?: number;
|
||||||
|
};
|
||||||
|
/** Sync behavior. */
|
||||||
|
sync?: {
|
||||||
|
onSessionStart?: boolean;
|
||||||
|
onSearch?: boolean;
|
||||||
|
watch?: boolean;
|
||||||
|
watchDebounceMs?: number;
|
||||||
|
intervalMinutes?: number;
|
||||||
|
};
|
||||||
|
/** Query behavior. */
|
||||||
|
query?: {
|
||||||
|
maxResults?: number;
|
||||||
|
minScore?: number;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
export type ToolsConfig = {
|
export type ToolsConfig = {
|
||||||
allow?: string[];
|
allow?: string[];
|
||||||
deny?: string[];
|
deny?: string[];
|
||||||
@@ -1070,6 +1111,7 @@ export type AgentConfig = {
|
|||||||
workspace?: string;
|
workspace?: string;
|
||||||
agentDir?: string;
|
agentDir?: string;
|
||||||
model?: string;
|
model?: string;
|
||||||
|
memorySearch?: MemorySearchConfig;
|
||||||
/** Human-like delay between block replies for this agent. */
|
/** Human-like delay between block replies for this agent. */
|
||||||
humanDelay?: HumanDelayConfig;
|
humanDelay?: HumanDelayConfig;
|
||||||
identity?: IdentityConfig;
|
identity?: IdentityConfig;
|
||||||
@@ -1534,6 +1576,8 @@ export type AgentDefaultsConfig = {
|
|||||||
contextPruning?: AgentContextPruningConfig;
|
contextPruning?: AgentContextPruningConfig;
|
||||||
/** Compaction tuning and pre-compaction memory flush behavior. */
|
/** Compaction tuning and pre-compaction memory flush behavior. */
|
||||||
compaction?: AgentCompactionConfig;
|
compaction?: AgentCompactionConfig;
|
||||||
|
/** Vector memory search configuration (per-agent overrides supported). */
|
||||||
|
memorySearch?: MemorySearchConfig;
|
||||||
/** Default thinking level when no /think directive is present. */
|
/** Default thinking level when no /think directive is present. */
|
||||||
thinkingDefault?: "off" | "minimal" | "low" | "medium" | "high";
|
thinkingDefault?: "off" | "minimal" | "low" | "medium" | "high";
|
||||||
/** Default verbose level when no /verbose directive is present. */
|
/** Default verbose level when no /verbose directive is present. */
|
||||||
|
|||||||
@@ -867,6 +867,48 @@ const AgentToolsSchema = z
|
|||||||
})
|
})
|
||||||
.optional();
|
.optional();
|
||||||
|
|
||||||
|
const MemorySearchSchema = z
|
||||||
|
.object({
|
||||||
|
enabled: z.boolean().optional(),
|
||||||
|
provider: z.union([z.literal("openai"), z.literal("local")]).optional(),
|
||||||
|
fallback: z.union([z.literal("openai"), z.literal("none")]).optional(),
|
||||||
|
model: z.string().optional(),
|
||||||
|
local: z
|
||||||
|
.object({
|
||||||
|
modelPath: z.string().optional(),
|
||||||
|
modelCacheDir: z.string().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
store: z
|
||||||
|
.object({
|
||||||
|
driver: z.literal("sqlite").optional(),
|
||||||
|
path: z.string().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
chunking: z
|
||||||
|
.object({
|
||||||
|
tokens: z.number().int().positive().optional(),
|
||||||
|
overlap: z.number().int().nonnegative().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
sync: z
|
||||||
|
.object({
|
||||||
|
onSessionStart: z.boolean().optional(),
|
||||||
|
onSearch: z.boolean().optional(),
|
||||||
|
watch: z.boolean().optional(),
|
||||||
|
watchDebounceMs: z.number().int().nonnegative().optional(),
|
||||||
|
intervalMinutes: z.number().int().nonnegative().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
query: z
|
||||||
|
.object({
|
||||||
|
maxResults: z.number().int().positive().optional(),
|
||||||
|
minScore: z.number().min(0).max(1).optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
})
|
||||||
|
.optional();
|
||||||
|
|
||||||
const AgentEntrySchema = z.object({
|
const AgentEntrySchema = z.object({
|
||||||
id: z.string(),
|
id: z.string(),
|
||||||
default: z.boolean().optional(),
|
default: z.boolean().optional(),
|
||||||
@@ -874,6 +916,7 @@ const AgentEntrySchema = z.object({
|
|||||||
workspace: z.string().optional(),
|
workspace: z.string().optional(),
|
||||||
agentDir: z.string().optional(),
|
agentDir: z.string().optional(),
|
||||||
model: z.string().optional(),
|
model: z.string().optional(),
|
||||||
|
memorySearch: MemorySearchSchema,
|
||||||
humanDelay: HumanDelaySchema.optional(),
|
humanDelay: HumanDelaySchema.optional(),
|
||||||
identity: IdentitySchema,
|
identity: IdentitySchema,
|
||||||
groupChat: GroupChatSchema,
|
groupChat: GroupChatSchema,
|
||||||
@@ -1098,6 +1141,7 @@ const AgentDefaultsSchema = z
|
|||||||
userTimezone: z.string().optional(),
|
userTimezone: z.string().optional(),
|
||||||
contextTokens: z.number().int().positive().optional(),
|
contextTokens: z.number().int().positive().optional(),
|
||||||
cliBackends: z.record(z.string(), CliBackendSchema).optional(),
|
cliBackends: z.record(z.string(), CliBackendSchema).optional(),
|
||||||
|
memorySearch: MemorySearchSchema,
|
||||||
contextPruning: z
|
contextPruning: z
|
||||||
.object({
|
.object({
|
||||||
mode: z
|
mode: z
|
||||||
|
|||||||
@@ -238,22 +238,10 @@ function buildLiveGatewayConfig(params: {
|
|||||||
candidates: Array<Model<Api>>;
|
candidates: Array<Model<Api>>;
|
||||||
providerOverrides?: Record<string, ModelProviderConfig>;
|
providerOverrides?: Record<string, ModelProviderConfig>;
|
||||||
}): ClawdbotConfig {
|
}): ClawdbotConfig {
|
||||||
|
const providerOverrides = params.providerOverrides ?? {};
|
||||||
const lmstudioProvider = params.cfg.models?.providers?.lmstudio;
|
const lmstudioProvider = params.cfg.models?.providers?.lmstudio;
|
||||||
const baseProviders = params.cfg.models?.providers ?? {};
|
const baseProviders = params.cfg.models?.providers ?? {};
|
||||||
const nextProviders = params.providerOverrides
|
const nextProviders = {
|
||||||
? {
|
|
||||||
...baseProviders,
|
|
||||||
...(lmstudioProvider
|
|
||||||
? {
|
|
||||||
lmstudio: {
|
|
||||||
...lmstudioProvider,
|
|
||||||
api: "openai-completions",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
: {}),
|
|
||||||
...params.providerOverrides,
|
|
||||||
}
|
|
||||||
: {
|
|
||||||
...baseProviders,
|
...baseProviders,
|
||||||
...(lmstudioProvider
|
...(lmstudioProvider
|
||||||
? {
|
? {
|
||||||
@@ -263,6 +251,7 @@ function buildLiveGatewayConfig(params: {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
: {}),
|
: {}),
|
||||||
|
...providerOverrides,
|
||||||
};
|
};
|
||||||
const providers =
|
const providers =
|
||||||
Object.keys(nextProviders).length > 0 ? nextProviders : baseProviders;
|
Object.keys(nextProviders).length > 0 ? nextProviders : baseProviders;
|
||||||
|
|||||||
194
src/memory/embeddings.ts
Normal file
194
src/memory/embeddings.ts
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
import type { Llama, LlamaEmbeddingContext, LlamaModel } from "node-llama-cpp";
|
||||||
|
import { resolveApiKeyForProvider } from "../agents/model-auth.js";
|
||||||
|
import type { ClawdbotConfig } from "../config/config.js";
|
||||||
|
|
||||||
|
export type EmbeddingProvider = {
|
||||||
|
id: string;
|
||||||
|
model: string;
|
||||||
|
embedQuery: (text: string) => Promise<number[]>;
|
||||||
|
embedBatch: (texts: string[]) => Promise<number[][]>;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type EmbeddingProviderResult = {
|
||||||
|
provider: EmbeddingProvider;
|
||||||
|
requestedProvider: "openai" | "local";
|
||||||
|
fallbackFrom?: "local";
|
||||||
|
fallbackReason?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type EmbeddingProviderOptions = {
|
||||||
|
config: ClawdbotConfig;
|
||||||
|
agentDir?: string;
|
||||||
|
provider: "openai" | "local";
|
||||||
|
model: string;
|
||||||
|
fallback: "openai" | "none";
|
||||||
|
local?: {
|
||||||
|
modelPath?: string;
|
||||||
|
modelCacheDir?: string;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
||||||
|
const DEFAULT_LOCAL_MODEL =
|
||||||
|
"hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
||||||
|
|
||||||
|
function normalizeOpenAiModel(model: string): string {
|
||||||
|
const trimmed = model.trim();
|
||||||
|
if (!trimmed) return "text-embedding-3-small";
|
||||||
|
if (trimmed.startsWith("openai/")) return trimmed.slice("openai/".length);
|
||||||
|
return trimmed;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createOpenAiEmbeddingProvider(
|
||||||
|
options: EmbeddingProviderOptions,
|
||||||
|
): Promise<EmbeddingProvider> {
|
||||||
|
const { apiKey } = await resolveApiKeyForProvider({
|
||||||
|
provider: "openai",
|
||||||
|
cfg: options.config,
|
||||||
|
agentDir: options.agentDir,
|
||||||
|
});
|
||||||
|
|
||||||
|
const providerConfig = options.config.models?.providers?.openai;
|
||||||
|
const baseUrl = providerConfig?.baseUrl?.trim() || DEFAULT_OPENAI_BASE_URL;
|
||||||
|
const url = `${baseUrl.replace(/\/$/, "")}/embeddings`;
|
||||||
|
const headerOverrides = providerConfig?.headers ?? {};
|
||||||
|
const headers: Record<string, string> = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
...headerOverrides,
|
||||||
|
};
|
||||||
|
const model = normalizeOpenAiModel(options.model);
|
||||||
|
|
||||||
|
const embed = async (input: string[]): Promise<number[][]> => {
|
||||||
|
if (input.length === 0) return [];
|
||||||
|
const res = await fetch(url, {
|
||||||
|
method: "POST",
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify({ model, input }),
|
||||||
|
});
|
||||||
|
if (!res.ok) {
|
||||||
|
const text = await res.text();
|
||||||
|
throw new Error(`openai embeddings failed: ${res.status} ${text}`);
|
||||||
|
}
|
||||||
|
const payload = (await res.json()) as {
|
||||||
|
data?: Array<{ embedding?: number[] }>;
|
||||||
|
};
|
||||||
|
const data = payload.data ?? [];
|
||||||
|
return data.map((entry) => entry.embedding ?? []);
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: "openai",
|
||||||
|
model,
|
||||||
|
embedQuery: async (text) => {
|
||||||
|
const [vec] = await embed([text]);
|
||||||
|
return vec ?? [];
|
||||||
|
},
|
||||||
|
embedBatch: embed,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createLocalEmbeddingProvider(
|
||||||
|
options: EmbeddingProviderOptions,
|
||||||
|
): Promise<EmbeddingProvider> {
|
||||||
|
const modelPath = options.local?.modelPath?.trim() || DEFAULT_LOCAL_MODEL;
|
||||||
|
const modelCacheDir = options.local?.modelCacheDir?.trim();
|
||||||
|
|
||||||
|
// Lazy-load node-llama-cpp to keep startup light unless local is enabled.
|
||||||
|
const { getLlama, resolveModelFile, LlamaLogLevel } = await import(
|
||||||
|
"node-llama-cpp"
|
||||||
|
);
|
||||||
|
|
||||||
|
let llama: Llama | null = null;
|
||||||
|
let embeddingModel: LlamaModel | null = null;
|
||||||
|
let embeddingContext: LlamaEmbeddingContext | null = null;
|
||||||
|
|
||||||
|
const ensureContext = async () => {
|
||||||
|
if (!llama) {
|
||||||
|
llama = await getLlama({ logLevel: LlamaLogLevel.error });
|
||||||
|
}
|
||||||
|
if (!embeddingModel) {
|
||||||
|
const resolved = await resolveModelFile(
|
||||||
|
modelPath,
|
||||||
|
modelCacheDir || undefined,
|
||||||
|
);
|
||||||
|
embeddingModel = await llama.loadModel({ modelPath: resolved });
|
||||||
|
}
|
||||||
|
if (!embeddingContext) {
|
||||||
|
embeddingContext = await embeddingModel.createEmbeddingContext();
|
||||||
|
}
|
||||||
|
return embeddingContext;
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: "local",
|
||||||
|
model: modelPath,
|
||||||
|
embedQuery: async (text) => {
|
||||||
|
const ctx = await ensureContext();
|
||||||
|
const embedding = await ctx.getEmbeddingFor(text);
|
||||||
|
return Array.from(embedding.vector) as number[];
|
||||||
|
},
|
||||||
|
embedBatch: async (texts) => {
|
||||||
|
const ctx = await ensureContext();
|
||||||
|
const embeddings = await Promise.all(
|
||||||
|
texts.map(async (text) => {
|
||||||
|
const embedding = await ctx.getEmbeddingFor(text);
|
||||||
|
return Array.from(embedding.vector) as number[];
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
return embeddings;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function createEmbeddingProvider(
|
||||||
|
options: EmbeddingProviderOptions,
|
||||||
|
): Promise<EmbeddingProviderResult> {
|
||||||
|
const requestedProvider = options.provider;
|
||||||
|
if (options.provider === "local") {
|
||||||
|
try {
|
||||||
|
const provider = await createLocalEmbeddingProvider(options);
|
||||||
|
return { provider, requestedProvider };
|
||||||
|
} catch (err) {
|
||||||
|
const reason = formatLocalSetupError(err);
|
||||||
|
if (options.fallback === "openai") {
|
||||||
|
try {
|
||||||
|
const provider = await createOpenAiEmbeddingProvider(options);
|
||||||
|
return {
|
||||||
|
provider,
|
||||||
|
requestedProvider,
|
||||||
|
fallbackFrom: "local",
|
||||||
|
fallbackReason: reason,
|
||||||
|
};
|
||||||
|
} catch (fallbackErr) {
|
||||||
|
throw new Error(
|
||||||
|
`${reason}\n\nFallback to OpenAI failed: ${formatError(fallbackErr)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new Error(reason);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const provider = await createOpenAiEmbeddingProvider(options);
|
||||||
|
return { provider, requestedProvider };
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatError(err: unknown): string {
|
||||||
|
if (err instanceof Error) return err.message;
|
||||||
|
return String(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatLocalSetupError(err: unknown): string {
|
||||||
|
const detail = formatError(err);
|
||||||
|
return [
|
||||||
|
"Local embeddings unavailable.",
|
||||||
|
detail ? `Reason: ${detail}` : undefined,
|
||||||
|
"To enable local embeddings:",
|
||||||
|
"1) pnpm approve-builds",
|
||||||
|
"2) select node-llama-cpp",
|
||||||
|
"3) pnpm rebuild node-llama-cpp",
|
||||||
|
'Or set agents.defaults.memorySearch.provider = "openai" (remote).',
|
||||||
|
]
|
||||||
|
.filter(Boolean)
|
||||||
|
.join("\n");
|
||||||
|
}
|
||||||
98
src/memory/index.test.ts
Normal file
98
src/memory/index.test.ts
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
import fs from "node:fs/promises";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
|
||||||
|
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
|
import { getMemorySearchManager } from "./index.js";
|
||||||
|
|
||||||
|
vi.mock("./embeddings.js", () => {
|
||||||
|
const embedText = (text: string) => {
|
||||||
|
const lower = text.toLowerCase();
|
||||||
|
const alpha = lower.split("alpha").length - 1;
|
||||||
|
const beta = lower.split("beta").length - 1;
|
||||||
|
return [alpha, beta, 1];
|
||||||
|
};
|
||||||
|
return {
|
||||||
|
createEmbeddingProvider: async () => ({
|
||||||
|
requestedProvider: "openai",
|
||||||
|
provider: {
|
||||||
|
id: "mock",
|
||||||
|
model: "mock-embed",
|
||||||
|
embedQuery: async (text: string) => embedText(text),
|
||||||
|
embedBatch: async (texts: string[]) => texts.map(embedText),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("memory index", () => {
|
||||||
|
let workspaceDir: string;
|
||||||
|
let indexPath: string;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-mem-"));
|
||||||
|
indexPath = path.join(workspaceDir, "index.sqlite");
|
||||||
|
await fs.mkdir(path.join(workspaceDir, "memory"));
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(workspaceDir, "memory", "2026-01-12.md"),
|
||||||
|
"# Log\nAlpha memory line.\nAnother line.",
|
||||||
|
);
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(workspaceDir, "MEMORY.md"),
|
||||||
|
"Beta knowledge base entry.",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await fs.rm(workspaceDir, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it("indexes memory files and searches by vector", async () => {
|
||||||
|
const cfg = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
workspace: workspaceDir,
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "mock-embed",
|
||||||
|
store: { path: indexPath },
|
||||||
|
sync: { watch: false, onSessionStart: false, onSearch: true },
|
||||||
|
query: { minScore: 0 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
list: [{ id: "main", default: true }],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||||
|
expect(result.manager).not.toBeNull();
|
||||||
|
if (!result.manager) throw new Error("manager missing");
|
||||||
|
await result.manager.sync({ force: true });
|
||||||
|
const results = await result.manager.search("alpha");
|
||||||
|
expect(results.length).toBeGreaterThan(0);
|
||||||
|
expect(results[0]?.path).toContain("memory/2026-01-12.md");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects reading non-memory paths", async () => {
|
||||||
|
const cfg = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
workspace: workspaceDir,
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "mock-embed",
|
||||||
|
store: { path: indexPath },
|
||||||
|
sync: { watch: false, onSessionStart: false, onSearch: true },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
list: [{ id: "main", default: true }],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||||
|
expect(result.manager).not.toBeNull();
|
||||||
|
if (!result.manager) throw new Error("manager missing");
|
||||||
|
await expect(
|
||||||
|
result.manager.readFile({ relPath: "NOTES.md" }),
|
||||||
|
).rejects.toThrow("path required");
|
||||||
|
});
|
||||||
|
});
|
||||||
641
src/memory/index.ts
Normal file
641
src/memory/index.ts
Normal file
@@ -0,0 +1,641 @@
|
|||||||
|
import crypto from "node:crypto";
|
||||||
|
import fsSync from "node:fs";
|
||||||
|
import fs from "node:fs/promises";
|
||||||
|
import path from "node:path";
|
||||||
|
|
||||||
|
import { DatabaseSync } from "node:sqlite";
|
||||||
|
import chokidar, { type FSWatcher } from "chokidar";
|
||||||
|
|
||||||
|
import {
|
||||||
|
resolveAgentDir,
|
||||||
|
resolveAgentWorkspaceDir,
|
||||||
|
} from "../agents/agent-scope.js";
|
||||||
|
import type { ResolvedMemorySearchConfig } from "../agents/memory-search.js";
|
||||||
|
import { resolveMemorySearchConfig } from "../agents/memory-search.js";
|
||||||
|
import type { ClawdbotConfig } from "../config/config.js";
|
||||||
|
import { resolveUserPath, truncateUtf16Safe } from "../utils.js";
|
||||||
|
import {
|
||||||
|
createEmbeddingProvider,
|
||||||
|
type EmbeddingProvider,
|
||||||
|
type EmbeddingProviderResult,
|
||||||
|
} from "./embeddings.js";
|
||||||
|
|
||||||
|
export type MemorySearchResult = {
|
||||||
|
path: string;
|
||||||
|
startLine: number;
|
||||||
|
endLine: number;
|
||||||
|
score: number;
|
||||||
|
snippet: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type MemoryFileEntry = {
|
||||||
|
path: string;
|
||||||
|
absPath: string;
|
||||||
|
mtimeMs: number;
|
||||||
|
size: number;
|
||||||
|
hash: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type MemoryChunk = {
|
||||||
|
startLine: number;
|
||||||
|
endLine: number;
|
||||||
|
text: string;
|
||||||
|
hash: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type MemoryIndexMeta = {
|
||||||
|
model: string;
|
||||||
|
provider: string;
|
||||||
|
chunkTokens: number;
|
||||||
|
chunkOverlap: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
const META_KEY = "memory_index_meta_v1";
|
||||||
|
const SNIPPET_MAX_CHARS = 700;
|
||||||
|
|
||||||
|
const INDEX_CACHE = new Map<string, MemoryIndexManager>();
|
||||||
|
|
||||||
|
export class MemoryIndexManager {
|
||||||
|
private readonly cfg: ClawdbotConfig;
|
||||||
|
private readonly agentId: string;
|
||||||
|
private readonly workspaceDir: string;
|
||||||
|
private readonly settings: ResolvedMemorySearchConfig;
|
||||||
|
private readonly provider: EmbeddingProvider;
|
||||||
|
private readonly requestedProvider: "openai" | "local";
|
||||||
|
private readonly fallbackReason?: string;
|
||||||
|
private readonly db: DatabaseSync;
|
||||||
|
private watcher: FSWatcher | null = null;
|
||||||
|
private watchTimer: NodeJS.Timeout | null = null;
|
||||||
|
private intervalTimer: NodeJS.Timeout | null = null;
|
||||||
|
private dirty = false;
|
||||||
|
private sessionWarm = new Set<string>();
|
||||||
|
private syncing: Promise<void> | null = null;
|
||||||
|
|
||||||
|
static async get(params: {
|
||||||
|
cfg: ClawdbotConfig;
|
||||||
|
agentId: string;
|
||||||
|
}): Promise<MemoryIndexManager | null> {
|
||||||
|
const { cfg, agentId } = params;
|
||||||
|
const settings = resolveMemorySearchConfig(cfg, agentId);
|
||||||
|
if (!settings) return null;
|
||||||
|
const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId);
|
||||||
|
const key = `${agentId}:${workspaceDir}:${JSON.stringify(settings)}`;
|
||||||
|
const existing = INDEX_CACHE.get(key);
|
||||||
|
if (existing) return existing;
|
||||||
|
const providerResult = await createEmbeddingProvider({
|
||||||
|
config: cfg,
|
||||||
|
agentDir: resolveAgentDir(cfg, agentId),
|
||||||
|
provider: settings.provider,
|
||||||
|
model: settings.model,
|
||||||
|
fallback: settings.fallback,
|
||||||
|
local: settings.local,
|
||||||
|
});
|
||||||
|
const manager = new MemoryIndexManager({
|
||||||
|
cfg,
|
||||||
|
agentId,
|
||||||
|
workspaceDir,
|
||||||
|
settings,
|
||||||
|
providerResult,
|
||||||
|
});
|
||||||
|
INDEX_CACHE.set(key, manager);
|
||||||
|
return manager;
|
||||||
|
}
|
||||||
|
|
||||||
|
private constructor(params: {
|
||||||
|
cfg: ClawdbotConfig;
|
||||||
|
agentId: string;
|
||||||
|
workspaceDir: string;
|
||||||
|
settings: ResolvedMemorySearchConfig;
|
||||||
|
providerResult: EmbeddingProviderResult;
|
||||||
|
}) {
|
||||||
|
this.cfg = params.cfg;
|
||||||
|
this.agentId = params.agentId;
|
||||||
|
this.workspaceDir = params.workspaceDir;
|
||||||
|
this.settings = params.settings;
|
||||||
|
this.provider = params.providerResult.provider;
|
||||||
|
this.requestedProvider = params.providerResult.requestedProvider;
|
||||||
|
this.fallbackReason = params.providerResult.fallbackReason;
|
||||||
|
this.db = this.openDatabase();
|
||||||
|
this.ensureSchema();
|
||||||
|
this.ensureWatcher();
|
||||||
|
this.ensureIntervalSync();
|
||||||
|
this.dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
async warmSession(sessionKey?: string): Promise<void> {
|
||||||
|
if (!this.settings.sync.onSessionStart) return;
|
||||||
|
const key = sessionKey?.trim() || "";
|
||||||
|
if (key && this.sessionWarm.has(key)) return;
|
||||||
|
await this.sync({ reason: "session-start" });
|
||||||
|
if (key) this.sessionWarm.add(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(
|
||||||
|
query: string,
|
||||||
|
opts?: {
|
||||||
|
maxResults?: number;
|
||||||
|
minScore?: number;
|
||||||
|
sessionKey?: string;
|
||||||
|
},
|
||||||
|
): Promise<MemorySearchResult[]> {
|
||||||
|
await this.warmSession(opts?.sessionKey);
|
||||||
|
if (this.settings.sync.onSearch && this.dirty) {
|
||||||
|
await this.sync({ reason: "search" });
|
||||||
|
}
|
||||||
|
const cleaned = query.trim();
|
||||||
|
if (!cleaned) return [];
|
||||||
|
const queryVec = await this.provider.embedQuery(cleaned);
|
||||||
|
if (queryVec.length === 0) return [];
|
||||||
|
const candidates = this.listChunks();
|
||||||
|
const scored = candidates
|
||||||
|
.map((chunk) => ({
|
||||||
|
chunk,
|
||||||
|
score: cosineSimilarity(queryVec, chunk.embedding),
|
||||||
|
}))
|
||||||
|
.filter((entry) => Number.isFinite(entry.score));
|
||||||
|
const minScore = opts?.minScore ?? this.settings.query.minScore;
|
||||||
|
const maxResults = opts?.maxResults ?? this.settings.query.maxResults;
|
||||||
|
return scored
|
||||||
|
.filter((entry) => entry.score >= minScore)
|
||||||
|
.sort((a, b) => b.score - a.score)
|
||||||
|
.slice(0, maxResults)
|
||||||
|
.map((entry) => ({
|
||||||
|
path: entry.chunk.path,
|
||||||
|
startLine: entry.chunk.startLine,
|
||||||
|
endLine: entry.chunk.endLine,
|
||||||
|
score: entry.score,
|
||||||
|
snippet: truncateUtf16Safe(entry.chunk.text, SNIPPET_MAX_CHARS),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
async sync(params?: { reason?: string; force?: boolean }): Promise<void> {
|
||||||
|
if (this.syncing) return this.syncing;
|
||||||
|
this.syncing = this.runSync(params).finally(() => {
|
||||||
|
this.syncing = null;
|
||||||
|
});
|
||||||
|
return this.syncing;
|
||||||
|
}
|
||||||
|
|
||||||
|
async readFile(params: {
|
||||||
|
relPath: string;
|
||||||
|
from?: number;
|
||||||
|
lines?: number;
|
||||||
|
}): Promise<{ text: string; path: string }> {
|
||||||
|
const relPath = normalizeRelPath(params.relPath);
|
||||||
|
if (!relPath || !isMemoryPath(relPath)) {
|
||||||
|
throw new Error("path required");
|
||||||
|
}
|
||||||
|
const absPath = path.resolve(this.workspaceDir, relPath);
|
||||||
|
if (!absPath.startsWith(this.workspaceDir)) {
|
||||||
|
throw new Error("path escapes workspace");
|
||||||
|
}
|
||||||
|
const content = await fs.readFile(absPath, "utf-8");
|
||||||
|
if (!params.from && !params.lines) {
|
||||||
|
return { text: content, path: relPath };
|
||||||
|
}
|
||||||
|
const lines = content.split("\n");
|
||||||
|
const start = Math.max(1, params.from ?? 1);
|
||||||
|
const count = Math.max(1, params.lines ?? lines.length);
|
||||||
|
const slice = lines.slice(start - 1, start - 1 + count);
|
||||||
|
return { text: slice.join("\n"), path: relPath };
|
||||||
|
}
|
||||||
|
|
||||||
|
status(): {
|
||||||
|
files: number;
|
||||||
|
chunks: number;
|
||||||
|
dirty: boolean;
|
||||||
|
workspaceDir: string;
|
||||||
|
dbPath: string;
|
||||||
|
provider: string;
|
||||||
|
model: string;
|
||||||
|
requestedProvider: string;
|
||||||
|
fallback?: { from: string; reason?: string };
|
||||||
|
} {
|
||||||
|
const files = this.db.prepare(`SELECT COUNT(*) as c FROM files`).get() as {
|
||||||
|
c: number;
|
||||||
|
};
|
||||||
|
const chunks = this.db
|
||||||
|
.prepare(`SELECT COUNT(*) as c FROM chunks`)
|
||||||
|
.get() as {
|
||||||
|
c: number;
|
||||||
|
};
|
||||||
|
return {
|
||||||
|
files: files?.c ?? 0,
|
||||||
|
chunks: chunks?.c ?? 0,
|
||||||
|
dirty: this.dirty,
|
||||||
|
workspaceDir: this.workspaceDir,
|
||||||
|
dbPath: this.settings.store.path,
|
||||||
|
provider: this.provider.id,
|
||||||
|
model: this.provider.model,
|
||||||
|
requestedProvider: this.requestedProvider,
|
||||||
|
fallback: this.fallbackReason
|
||||||
|
? { from: "local", reason: this.fallbackReason }
|
||||||
|
: undefined,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private openDatabase(): DatabaseSync {
|
||||||
|
const dbPath = resolveUserPath(this.settings.store.path);
|
||||||
|
const dir = path.dirname(dbPath);
|
||||||
|
ensureDir(dir);
|
||||||
|
return new DatabaseSync(dbPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ensureSchema() {
|
||||||
|
this.db.exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS meta (
|
||||||
|
key TEXT PRIMARY KEY,
|
||||||
|
value TEXT NOT NULL
|
||||||
|
);
|
||||||
|
`);
|
||||||
|
this.db.exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS files (
|
||||||
|
path TEXT PRIMARY KEY,
|
||||||
|
hash TEXT NOT NULL,
|
||||||
|
mtime INTEGER NOT NULL,
|
||||||
|
size INTEGER NOT NULL
|
||||||
|
);
|
||||||
|
`);
|
||||||
|
this.db.exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS chunks (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
path TEXT NOT NULL,
|
||||||
|
start_line INTEGER NOT NULL,
|
||||||
|
end_line INTEGER NOT NULL,
|
||||||
|
hash TEXT NOT NULL,
|
||||||
|
model TEXT NOT NULL,
|
||||||
|
text TEXT NOT NULL,
|
||||||
|
embedding TEXT NOT NULL,
|
||||||
|
updated_at INTEGER NOT NULL
|
||||||
|
);
|
||||||
|
`);
|
||||||
|
this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ensureWatcher() {
|
||||||
|
if (!this.settings.sync.watch || this.watcher) return;
|
||||||
|
const watchPaths = [
|
||||||
|
path.join(this.workspaceDir, "MEMORY.md"),
|
||||||
|
path.join(this.workspaceDir, "memory"),
|
||||||
|
];
|
||||||
|
this.watcher = chokidar.watch(watchPaths, {
|
||||||
|
ignoreInitial: true,
|
||||||
|
awaitWriteFinish: {
|
||||||
|
stabilityThreshold: this.settings.sync.watchDebounceMs,
|
||||||
|
pollInterval: 100,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const markDirty = () => {
|
||||||
|
this.dirty = true;
|
||||||
|
this.scheduleWatchSync();
|
||||||
|
};
|
||||||
|
this.watcher.on("add", markDirty);
|
||||||
|
this.watcher.on("change", markDirty);
|
||||||
|
this.watcher.on("unlink", markDirty);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ensureIntervalSync() {
|
||||||
|
const minutes = this.settings.sync.intervalMinutes;
|
||||||
|
if (!minutes || minutes <= 0 || this.intervalTimer) return;
|
||||||
|
const ms = minutes * 60 * 1000;
|
||||||
|
this.intervalTimer = setInterval(() => {
|
||||||
|
void this.sync({ reason: "interval" });
|
||||||
|
}, ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
private scheduleWatchSync() {
|
||||||
|
if (!this.settings.sync.watch) return;
|
||||||
|
if (this.watchTimer) clearTimeout(this.watchTimer);
|
||||||
|
this.watchTimer = setTimeout(() => {
|
||||||
|
this.watchTimer = null;
|
||||||
|
void this.sync({ reason: "watch" });
|
||||||
|
}, this.settings.sync.watchDebounceMs);
|
||||||
|
}
|
||||||
|
|
||||||
|
private listChunks(): Array<{
|
||||||
|
path: string;
|
||||||
|
startLine: number;
|
||||||
|
endLine: number;
|
||||||
|
text: string;
|
||||||
|
embedding: number[];
|
||||||
|
}> {
|
||||||
|
const rows = this.db
|
||||||
|
.prepare(
|
||||||
|
`SELECT path, start_line, end_line, text, embedding FROM chunks WHERE model = ?`,
|
||||||
|
)
|
||||||
|
.all(this.provider.model) as Array<{
|
||||||
|
path: string;
|
||||||
|
start_line: number;
|
||||||
|
end_line: number;
|
||||||
|
text: string;
|
||||||
|
embedding: string;
|
||||||
|
}>;
|
||||||
|
return rows.map((row) => ({
|
||||||
|
path: row.path,
|
||||||
|
startLine: row.start_line,
|
||||||
|
endLine: row.end_line,
|
||||||
|
text: row.text,
|
||||||
|
embedding: parseEmbedding(row.embedding),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
private async runSync(params?: { reason?: string; force?: boolean }) {
|
||||||
|
const meta = this.readMeta();
|
||||||
|
const needsFullReindex =
|
||||||
|
params?.force ||
|
||||||
|
!meta ||
|
||||||
|
meta.model !== this.provider.model ||
|
||||||
|
meta.provider !== this.provider.id ||
|
||||||
|
meta.chunkTokens !== this.settings.chunking.tokens ||
|
||||||
|
meta.chunkOverlap !== this.settings.chunking.overlap;
|
||||||
|
if (needsFullReindex) {
|
||||||
|
this.resetIndex();
|
||||||
|
}
|
||||||
|
|
||||||
|
const files = await listMemoryFiles(this.workspaceDir);
|
||||||
|
const fileEntries = await Promise.all(
|
||||||
|
files.map(async (file) => buildFileEntry(file, this.workspaceDir)),
|
||||||
|
);
|
||||||
|
const activePaths = new Set(fileEntries.map((entry) => entry.path));
|
||||||
|
|
||||||
|
for (const entry of fileEntries) {
|
||||||
|
const record = this.db
|
||||||
|
.prepare(`SELECT hash FROM files WHERE path = ?`)
|
||||||
|
.get(entry.path) as { hash: string } | undefined;
|
||||||
|
if (!needsFullReindex && record?.hash === entry.hash) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
await this.indexFile(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
const staleRows = this.db.prepare(`SELECT path FROM files`).all() as Array<{
|
||||||
|
path: string;
|
||||||
|
}>;
|
||||||
|
for (const stale of staleRows) {
|
||||||
|
if (activePaths.has(stale.path)) continue;
|
||||||
|
this.db.prepare(`DELETE FROM files WHERE path = ?`).run(stale.path);
|
||||||
|
this.db.prepare(`DELETE FROM chunks WHERE path = ?`).run(stale.path);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.writeMeta({
|
||||||
|
model: this.provider.model,
|
||||||
|
provider: this.provider.id,
|
||||||
|
chunkTokens: this.settings.chunking.tokens,
|
||||||
|
chunkOverlap: this.settings.chunking.overlap,
|
||||||
|
});
|
||||||
|
this.dirty = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private resetIndex() {
|
||||||
|
this.db.exec(`DELETE FROM files`);
|
||||||
|
this.db.exec(`DELETE FROM chunks`);
|
||||||
|
}
|
||||||
|
|
||||||
|
private readMeta(): MemoryIndexMeta | null {
|
||||||
|
const row = this.db
|
||||||
|
.prepare(`SELECT value FROM meta WHERE key = ?`)
|
||||||
|
.get(META_KEY) as { value: string } | undefined;
|
||||||
|
if (!row?.value) return null;
|
||||||
|
try {
|
||||||
|
return JSON.parse(row.value) as MemoryIndexMeta;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private writeMeta(meta: MemoryIndexMeta) {
|
||||||
|
const value = JSON.stringify(meta);
|
||||||
|
this.db
|
||||||
|
.prepare(
|
||||||
|
`INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value`,
|
||||||
|
)
|
||||||
|
.run(META_KEY, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async indexFile(entry: MemoryFileEntry) {
|
||||||
|
const content = await fs.readFile(entry.absPath, "utf-8");
|
||||||
|
const chunks = chunkMarkdown(content, this.settings.chunking);
|
||||||
|
const embeddings = await this.provider.embedBatch(
|
||||||
|
chunks.map((chunk) => chunk.text),
|
||||||
|
);
|
||||||
|
const now = Date.now();
|
||||||
|
this.db.prepare(`DELETE FROM chunks WHERE path = ?`).run(entry.path);
|
||||||
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
|
const chunk = chunks[i];
|
||||||
|
const embedding = embeddings[i] ?? [];
|
||||||
|
const id = hashText(
|
||||||
|
`${entry.path}:${chunk.startLine}:${chunk.endLine}:${chunk.hash}:${this.provider.model}`,
|
||||||
|
);
|
||||||
|
this.db
|
||||||
|
.prepare(
|
||||||
|
`INSERT INTO chunks (id, path, start_line, end_line, hash, model, text, embedding, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(id) DO UPDATE SET
|
||||||
|
hash=excluded.hash,
|
||||||
|
model=excluded.model,
|
||||||
|
text=excluded.text,
|
||||||
|
embedding=excluded.embedding,
|
||||||
|
updated_at=excluded.updated_at`,
|
||||||
|
)
|
||||||
|
.run(
|
||||||
|
id,
|
||||||
|
entry.path,
|
||||||
|
chunk.startLine,
|
||||||
|
chunk.endLine,
|
||||||
|
chunk.hash,
|
||||||
|
this.provider.model,
|
||||||
|
chunk.text,
|
||||||
|
JSON.stringify(embedding),
|
||||||
|
now,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
this.db
|
||||||
|
.prepare(
|
||||||
|
`INSERT INTO files (path, hash, mtime, size) VALUES (?, ?, ?, ?)
|
||||||
|
ON CONFLICT(path) DO UPDATE SET hash=excluded.hash, mtime=excluded.mtime, size=excluded.size`,
|
||||||
|
)
|
||||||
|
.run(entry.path, entry.hash, entry.mtimeMs, entry.size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export type MemorySearchManagerResult = {
|
||||||
|
manager: MemoryIndexManager | null;
|
||||||
|
error?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export async function getMemorySearchManager(params: {
|
||||||
|
cfg: ClawdbotConfig;
|
||||||
|
agentId: string;
|
||||||
|
}): Promise<MemorySearchManagerResult> {
|
||||||
|
try {
|
||||||
|
const manager = await MemoryIndexManager.get(params);
|
||||||
|
return { manager };
|
||||||
|
} catch (err) {
|
||||||
|
const message = err instanceof Error ? err.message : String(err);
|
||||||
|
return { manager: null, error: message };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function ensureDir(dir: string): string {
|
||||||
|
try {
|
||||||
|
fsSync.mkdirSync(dir, { recursive: true });
|
||||||
|
} catch {}
|
||||||
|
return dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeRelPath(value: string): string {
|
||||||
|
const trimmed = value.trim().replace(/^[./]+/, "");
|
||||||
|
return trimmed.replace(/\\/g, "/");
|
||||||
|
}
|
||||||
|
|
||||||
|
function isMemoryPath(relPath: string): boolean {
|
||||||
|
const normalized = normalizeRelPath(relPath);
|
||||||
|
if (!normalized) return false;
|
||||||
|
if (normalized === "MEMORY.md" || normalized === "memory.md") return true;
|
||||||
|
return normalized.startsWith("memory/");
|
||||||
|
}
|
||||||
|
|
||||||
|
async function listMemoryFiles(workspaceDir: string): Promise<string[]> {
|
||||||
|
const result: string[] = [];
|
||||||
|
const memoryFile = path.join(workspaceDir, "MEMORY.md");
|
||||||
|
const altMemoryFile = path.join(workspaceDir, "memory.md");
|
||||||
|
if (await exists(memoryFile)) result.push(memoryFile);
|
||||||
|
if (await exists(altMemoryFile)) result.push(altMemoryFile);
|
||||||
|
const memoryDir = path.join(workspaceDir, "memory");
|
||||||
|
if (await exists(memoryDir)) {
|
||||||
|
await walkDir(memoryDir, result);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function walkDir(dir: string, files: string[]) {
|
||||||
|
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||||
|
for (const entry of entries) {
|
||||||
|
const full = path.join(dir, entry.name);
|
||||||
|
if (entry.isDirectory()) {
|
||||||
|
await walkDir(full, files);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!entry.isFile()) continue;
|
||||||
|
if (!entry.name.endsWith(".md")) continue;
|
||||||
|
files.push(full);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function exists(filePath: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
await fs.access(filePath);
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function buildFileEntry(
|
||||||
|
absPath: string,
|
||||||
|
workspaceDir: string,
|
||||||
|
): Promise<MemoryFileEntry> {
|
||||||
|
const stat = await fs.stat(absPath);
|
||||||
|
const content = await fs.readFile(absPath, "utf-8");
|
||||||
|
const hash = hashText(content);
|
||||||
|
return {
|
||||||
|
path: path.relative(workspaceDir, absPath).replace(/\\/g, "/"),
|
||||||
|
absPath,
|
||||||
|
mtimeMs: stat.mtimeMs,
|
||||||
|
size: stat.size,
|
||||||
|
hash,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function hashText(value: string): string {
|
||||||
|
return crypto.createHash("sha256").update(value).digest("hex");
|
||||||
|
}
|
||||||
|
|
||||||
|
function chunkMarkdown(
|
||||||
|
content: string,
|
||||||
|
chunking: { tokens: number; overlap: number },
|
||||||
|
): MemoryChunk[] {
|
||||||
|
const lines = content.split("\n");
|
||||||
|
if (lines.length === 0) return [];
|
||||||
|
const maxChars = Math.max(32, chunking.tokens * 4);
|
||||||
|
const overlapChars = Math.max(0, chunking.overlap * 4);
|
||||||
|
const chunks: MemoryChunk[] = [];
|
||||||
|
|
||||||
|
let current: Array<{ line: string; lineNo: number }> = [];
|
||||||
|
let currentChars = 0;
|
||||||
|
|
||||||
|
const flush = () => {
|
||||||
|
if (current.length === 0) return;
|
||||||
|
const firstEntry = current[0];
|
||||||
|
const lastEntry = current[current.length - 1];
|
||||||
|
if (!firstEntry || !lastEntry) return;
|
||||||
|
const text = current.map((entry) => entry.line).join("\n");
|
||||||
|
const startLine = firstEntry.lineNo;
|
||||||
|
const endLine = lastEntry.lineNo;
|
||||||
|
chunks.push({
|
||||||
|
startLine,
|
||||||
|
endLine,
|
||||||
|
text,
|
||||||
|
hash: hashText(text),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const carryOverlap = () => {
|
||||||
|
if (overlapChars <= 0 || current.length === 0) {
|
||||||
|
current = [];
|
||||||
|
currentChars = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let acc = 0;
|
||||||
|
const kept: Array<{ line: string; lineNo: number }> = [];
|
||||||
|
for (let i = current.length - 1; i >= 0; i -= 1) {
|
||||||
|
const entry = current[i];
|
||||||
|
if (!entry) continue;
|
||||||
|
acc += entry.line.length + 1;
|
||||||
|
kept.unshift(entry);
|
||||||
|
if (acc >= overlapChars) break;
|
||||||
|
}
|
||||||
|
current = kept;
|
||||||
|
currentChars = kept.reduce((sum, entry) => sum + entry.line.length + 1, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (let i = 0; i < lines.length; i += 1) {
|
||||||
|
const line = lines[i] ?? "";
|
||||||
|
const lineNo = i + 1;
|
||||||
|
const lineSize = line.length + 1;
|
||||||
|
if (currentChars + lineSize > maxChars && current.length > 0) {
|
||||||
|
flush();
|
||||||
|
carryOverlap();
|
||||||
|
}
|
||||||
|
current.push({ line, lineNo });
|
||||||
|
currentChars += lineSize;
|
||||||
|
}
|
||||||
|
flush();
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseEmbedding(raw: string): number[] {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(raw) as number[];
|
||||||
|
return Array.isArray(parsed) ? parsed : [];
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function cosineSimilarity(a: number[], b: number[]): number {
|
||||||
|
if (a.length === 0 || b.length === 0) return 0;
|
||||||
|
const len = Math.min(a.length, b.length);
|
||||||
|
let dot = 0;
|
||||||
|
let normA = 0;
|
||||||
|
let normB = 0;
|
||||||
|
for (let i = 0; i < len; i += 1) {
|
||||||
|
const av = a[i] ?? 0;
|
||||||
|
const bv = b[i] ?? 0;
|
||||||
|
dot += av * bv;
|
||||||
|
normA += av * av;
|
||||||
|
normB += bv * bv;
|
||||||
|
}
|
||||||
|
if (normA === 0 || normB === 0) return 0;
|
||||||
|
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user