feat: add hybrid memory search
This commit is contained in:
@@ -5,8 +5,9 @@ Docs: https://docs.clawd.bot
|
||||
## 2026.1.18-2
|
||||
|
||||
### Changes
|
||||
- Memory: add hybrid BM25 + vector search (FTS5) with weighted merging and fallback.
|
||||
- Memory: add SQLite embedding cache to speed up reindexing and frequent updates.
|
||||
- CLI: surface embedding cache state in `clawdbot memory status`.
|
||||
- CLI: surface FTS + embedding cache state in `clawdbot memory status`.
|
||||
|
||||
## 2026.1.18-1
|
||||
|
||||
|
||||
@@ -161,6 +161,33 @@ Local mode:
|
||||
- Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync runs on session start, on first search when dirty, and optionally on an interval.
|
||||
- Reindex triggers: the index stores the embedding **provider/model + endpoint fingerprint + chunking params**. If any of those change, Clawdbot automatically resets and reindexes the entire store.
|
||||
|
||||
### Hybrid search (BM25 + vector)
|
||||
|
||||
When enabled, Clawdbot combines:
|
||||
- **Vector similarity** (semantic match, wording can differ)
|
||||
- **BM25 keyword relevance** (exact tokens like IDs, env vars, code symbols)
|
||||
|
||||
If full-text search is unavailable on your platform, Clawdbot falls back to vector-only search.
|
||||
|
||||
Config:
|
||||
|
||||
```json5
|
||||
agents: {
|
||||
defaults: {
|
||||
memorySearch: {
|
||||
query: {
|
||||
hybrid: {
|
||||
enabled: true,
|
||||
vectorWeight: 0.7,
|
||||
textWeight: 0.3,
|
||||
candidateMultiplier: 4
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Embedding cache
|
||||
|
||||
Clawdbot can cache **chunk embeddings** in SQLite so reindexing and frequent updates (especially session transcripts) don't re-embed unchanged text.
|
||||
|
||||
@@ -53,6 +53,12 @@ export type ResolvedMemorySearchConfig = {
|
||||
query: {
|
||||
maxResults: number;
|
||||
minScore: number;
|
||||
hybrid: {
|
||||
enabled: boolean;
|
||||
vectorWeight: number;
|
||||
textWeight: number;
|
||||
candidateMultiplier: number;
|
||||
};
|
||||
};
|
||||
cache: {
|
||||
enabled: boolean;
|
||||
@@ -66,6 +72,10 @@ const DEFAULT_CHUNK_OVERLAP = 80;
|
||||
const DEFAULT_WATCH_DEBOUNCE_MS = 1500;
|
||||
const DEFAULT_MAX_RESULTS = 6;
|
||||
const DEFAULT_MIN_SCORE = 0.35;
|
||||
const DEFAULT_HYBRID_ENABLED = true;
|
||||
const DEFAULT_HYBRID_VECTOR_WEIGHT = 0.7;
|
||||
const DEFAULT_HYBRID_TEXT_WEIGHT = 0.3;
|
||||
const DEFAULT_HYBRID_CANDIDATE_MULTIPLIER = 4;
|
||||
const DEFAULT_CACHE_ENABLED = true;
|
||||
const DEFAULT_SOURCES: Array<"memory" | "sessions"> = ["memory"];
|
||||
|
||||
@@ -157,6 +167,24 @@ function mergeConfig(
|
||||
maxResults: overrides?.query?.maxResults ?? defaults?.query?.maxResults ?? DEFAULT_MAX_RESULTS,
|
||||
minScore: overrides?.query?.minScore ?? defaults?.query?.minScore ?? DEFAULT_MIN_SCORE,
|
||||
};
|
||||
const hybrid = {
|
||||
enabled:
|
||||
overrides?.query?.hybrid?.enabled ??
|
||||
defaults?.query?.hybrid?.enabled ??
|
||||
DEFAULT_HYBRID_ENABLED,
|
||||
vectorWeight:
|
||||
overrides?.query?.hybrid?.vectorWeight ??
|
||||
defaults?.query?.hybrid?.vectorWeight ??
|
||||
DEFAULT_HYBRID_VECTOR_WEIGHT,
|
||||
textWeight:
|
||||
overrides?.query?.hybrid?.textWeight ??
|
||||
defaults?.query?.hybrid?.textWeight ??
|
||||
DEFAULT_HYBRID_TEXT_WEIGHT,
|
||||
candidateMultiplier:
|
||||
overrides?.query?.hybrid?.candidateMultiplier ??
|
||||
defaults?.query?.hybrid?.candidateMultiplier ??
|
||||
DEFAULT_HYBRID_CANDIDATE_MULTIPLIER,
|
||||
};
|
||||
const cache = {
|
||||
enabled: overrides?.cache?.enabled ?? defaults?.cache?.enabled ?? DEFAULT_CACHE_ENABLED,
|
||||
maxEntries: overrides?.cache?.maxEntries ?? defaults?.cache?.maxEntries,
|
||||
@@ -164,6 +192,12 @@ function mergeConfig(
|
||||
|
||||
const overlap = Math.max(0, Math.min(chunking.overlap, chunking.tokens - 1));
|
||||
const minScore = Math.max(0, Math.min(1, query.minScore));
|
||||
const vectorWeight = Math.max(0, Math.min(1, hybrid.vectorWeight));
|
||||
const textWeight = Math.max(0, Math.min(1, hybrid.textWeight));
|
||||
const sum = vectorWeight + textWeight;
|
||||
const normalizedVectorWeight = sum > 0 ? vectorWeight / sum : DEFAULT_HYBRID_VECTOR_WEIGHT;
|
||||
const normalizedTextWeight = sum > 0 ? textWeight / sum : DEFAULT_HYBRID_TEXT_WEIGHT;
|
||||
const candidateMultiplier = Math.max(1, Math.min(20, Math.floor(hybrid.candidateMultiplier)));
|
||||
return {
|
||||
enabled,
|
||||
sources,
|
||||
@@ -178,7 +212,16 @@ function mergeConfig(
|
||||
store,
|
||||
chunking: { tokens: Math.max(1, chunking.tokens), overlap },
|
||||
sync,
|
||||
query: { ...query, minScore },
|
||||
query: {
|
||||
...query,
|
||||
minScore,
|
||||
hybrid: {
|
||||
enabled: Boolean(hybrid.enabled),
|
||||
vectorWeight: normalizedVectorWeight,
|
||||
textWeight: normalizedTextWeight,
|
||||
candidateMultiplier,
|
||||
},
|
||||
},
|
||||
cache: {
|
||||
enabled: Boolean(cache.enabled),
|
||||
maxEntries:
|
||||
|
||||
@@ -178,18 +178,33 @@ export function registerMemoryCli(program: Command) {
|
||||
if (status.vector.extensionPath) {
|
||||
lines.push(`${label("Vector path")} ${info(status.vector.extensionPath)}`);
|
||||
}
|
||||
if (status.vector.loadError) {
|
||||
lines.push(`${label("Vector error")} ${warn(status.vector.loadError)}`);
|
||||
if (status.vector.loadError) {
|
||||
lines.push(`${label("Vector error")} ${warn(status.vector.loadError)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (status.cache) {
|
||||
const cacheState = status.cache.enabled ? "enabled" : "disabled";
|
||||
const cacheColor = status.cache.enabled ? theme.success : theme.muted;
|
||||
const suffix =
|
||||
if (status.fts) {
|
||||
const ftsState = status.fts.enabled
|
||||
? status.fts.available
|
||||
? "ready"
|
||||
: "unavailable"
|
||||
: "disabled";
|
||||
const ftsColor =
|
||||
ftsState === "ready" ? theme.success : ftsState === "unavailable" ? theme.warn : theme.muted;
|
||||
lines.push(`${label("FTS")} ${colorize(rich, ftsColor, ftsState)}`);
|
||||
if (status.fts.error) {
|
||||
lines.push(`${label("FTS error")} ${warn(status.fts.error)}`);
|
||||
}
|
||||
}
|
||||
if (status.cache) {
|
||||
const cacheState = status.cache.enabled ? "enabled" : "disabled";
|
||||
const cacheColor = status.cache.enabled ? theme.success : theme.muted;
|
||||
const suffix =
|
||||
status.cache.enabled && typeof status.cache.entries === "number"
|
||||
? ` (${status.cache.entries} entries)`
|
||||
: "";
|
||||
lines.push(`${label("Embedding cache")} ${colorize(rich, cacheColor, cacheState)}${suffix}`);
|
||||
lines.push(
|
||||
`${label("Embedding cache")} ${colorize(rich, cacheColor, cacheState)}${suffix}`,
|
||||
);
|
||||
if (status.cache.enabled && typeof status.cache.maxEntries === "number") {
|
||||
lines.push(`${label("Cache cap")} ${info(String(status.cache.maxEntries))}`);
|
||||
}
|
||||
|
||||
@@ -190,6 +190,11 @@ const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.memorySearch.sync.watchDebounceMs": "Memory Watch Debounce (ms)",
|
||||
"agents.defaults.memorySearch.query.maxResults": "Memory Search Max Results",
|
||||
"agents.defaults.memorySearch.query.minScore": "Memory Search Min Score",
|
||||
"agents.defaults.memorySearch.query.hybrid.enabled": "Memory Search Hybrid",
|
||||
"agents.defaults.memorySearch.query.hybrid.vectorWeight": "Memory Search Vector Weight",
|
||||
"agents.defaults.memorySearch.query.hybrid.textWeight": "Memory Search Text Weight",
|
||||
"agents.defaults.memorySearch.query.hybrid.candidateMultiplier":
|
||||
"Memory Search Hybrid Candidate Multiplier",
|
||||
"agents.defaults.memorySearch.cache.enabled": "Memory Search Embedding Cache",
|
||||
"agents.defaults.memorySearch.cache.maxEntries": "Memory Search Embedding Cache Max Entries",
|
||||
"auth.profiles": "Auth Profiles",
|
||||
@@ -389,6 +394,14 @@ const FIELD_HELP: Record<string, string> = {
|
||||
"Enable sqlite-vec extension for vector search (default: true).",
|
||||
"agents.defaults.memorySearch.store.vector.extensionPath":
|
||||
"Optional override path to sqlite-vec extension library (.dylib/.so/.dll).",
|
||||
"agents.defaults.memorySearch.query.hybrid.enabled":
|
||||
"Enable hybrid BM25 + vector search for memory (default: true).",
|
||||
"agents.defaults.memorySearch.query.hybrid.vectorWeight":
|
||||
"Weight for vector similarity when merging results (0-1).",
|
||||
"agents.defaults.memorySearch.query.hybrid.textWeight":
|
||||
"Weight for BM25 text relevance when merging results (0-1).",
|
||||
"agents.defaults.memorySearch.query.hybrid.candidateMultiplier":
|
||||
"Multiplier for candidate pool size (default: 4).",
|
||||
"agents.defaults.memorySearch.cache.enabled":
|
||||
"Cache chunk embeddings in SQLite to speed up reindexing and frequent updates (default: true).",
|
||||
"agents.defaults.memorySearch.cache.maxEntries":
|
||||
|
||||
@@ -256,6 +256,14 @@ export const MemorySearchSchema = z
|
||||
.object({
|
||||
maxResults: z.number().int().positive().optional(),
|
||||
minScore: z.number().min(0).max(1).optional(),
|
||||
hybrid: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
vectorWeight: z.number().min(0).max(1).optional(),
|
||||
textWeight: z.number().min(0).max(1).optional(),
|
||||
candidateMultiplier: z.number().int().positive().optional(),
|
||||
})
|
||||
.optional(),
|
||||
})
|
||||
.optional(),
|
||||
cache: z
|
||||
|
||||
@@ -181,6 +181,38 @@ describe("memory index", () => {
|
||||
expect(embedBatchCalls).toBe(afterFirst);
|
||||
});
|
||||
|
||||
it("finds keyword matches via hybrid search when query embedding is zero", async () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: indexPath, vector: { enabled: false } },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: true },
|
||||
query: {
|
||||
minScore: 0,
|
||||
hybrid: { enabled: true, vectorWeight: 0, textWeight: 1 },
|
||||
},
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
};
|
||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
expect(result.manager).not.toBeNull();
|
||||
if (!result.manager) throw new Error("manager missing");
|
||||
manager = result.manager;
|
||||
|
||||
const status = manager.status();
|
||||
if (!status.fts?.available) return;
|
||||
|
||||
const results = await manager.search("zebra");
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results[0]?.path).toContain("memory/2026-01-12.md");
|
||||
});
|
||||
|
||||
it("reports vector availability after probe", async () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
|
||||
@@ -107,6 +107,7 @@ type OpenAiBatchOutputLine = {
|
||||
const META_KEY = "memory_index_meta_v1";
|
||||
const SNIPPET_MAX_CHARS = 700;
|
||||
const VECTOR_TABLE = "chunks_vec";
|
||||
const FTS_TABLE = "chunks_fts";
|
||||
const EMBEDDING_CACHE_TABLE = "embedding_cache";
|
||||
const SESSION_DIRTY_DEBOUNCE_MS = 5000;
|
||||
const EMBEDDING_BATCH_MAX_TOKENS = 8000;
|
||||
@@ -154,6 +155,11 @@ export class MemoryIndexManager {
|
||||
loadError?: string;
|
||||
dims?: number;
|
||||
};
|
||||
private readonly fts: {
|
||||
enabled: boolean;
|
||||
available: boolean;
|
||||
loadError?: string;
|
||||
};
|
||||
private vectorReady: Promise<boolean> | null = null;
|
||||
private watcher: FSWatcher | null = null;
|
||||
private watchTimer: NodeJS.Timeout | null = null;
|
||||
@@ -223,6 +229,7 @@ export class MemoryIndexManager {
|
||||
enabled: params.settings.cache.enabled,
|
||||
maxEntries: params.settings.cache.maxEntries,
|
||||
};
|
||||
this.fts = { enabled: params.settings.query.hybrid.enabled, available: false };
|
||||
this.ensureSchema();
|
||||
this.vector = {
|
||||
enabled: params.settings.store.vector.enabled,
|
||||
@@ -274,13 +281,46 @@ export class MemoryIndexManager {
|
||||
if (!cleaned) return [];
|
||||
const minScore = opts?.minScore ?? this.settings.query.minScore;
|
||||
const maxResults = opts?.maxResults ?? this.settings.query.maxResults;
|
||||
const hybrid = this.settings.query.hybrid;
|
||||
const candidates = Math.min(
|
||||
200,
|
||||
Math.max(1, Math.floor(maxResults * hybrid.candidateMultiplier)),
|
||||
);
|
||||
|
||||
const keywordResults = hybrid.enabled
|
||||
? await this.searchKeyword(cleaned, candidates).catch(() => [])
|
||||
: [];
|
||||
|
||||
const queryVec = await this.provider.embedQuery(cleaned);
|
||||
if (!queryVec.some((v) => v !== 0)) return [];
|
||||
const hasVector = queryVec.some((v) => v !== 0);
|
||||
const vectorResults = hasVector
|
||||
? await this.searchVector(queryVec, candidates).catch(() => [])
|
||||
: [];
|
||||
|
||||
if (!hybrid.enabled) {
|
||||
return vectorResults.filter((entry) => entry.score >= minScore).slice(0, maxResults);
|
||||
}
|
||||
|
||||
const merged = this.mergeHybridResults({
|
||||
vector: vectorResults,
|
||||
keyword: keywordResults,
|
||||
vectorWeight: hybrid.vectorWeight,
|
||||
textWeight: hybrid.textWeight,
|
||||
});
|
||||
|
||||
return merged.filter((entry) => entry.score >= minScore).slice(0, maxResults);
|
||||
}
|
||||
|
||||
private async searchVector(
|
||||
queryVec: number[],
|
||||
limit: number,
|
||||
): Promise<Array<MemorySearchResult & { id: string }>> {
|
||||
if (queryVec.length === 0 || limit <= 0) return [];
|
||||
if (await this.ensureVectorReady(queryVec.length)) {
|
||||
const sourceFilter = this.buildSourceFilter("c");
|
||||
const rows = this.db
|
||||
.prepare(
|
||||
`SELECT c.path, c.start_line, c.end_line, c.text,\n` +
|
||||
`SELECT c.id, c.path, c.start_line, c.end_line, c.text,\n` +
|
||||
` c.source,\n` +
|
||||
` vec_distance_cosine(v.embedding, ?) AS dist\n` +
|
||||
` FROM ${VECTOR_TABLE} v\n` +
|
||||
@@ -293,8 +333,9 @@ export class MemoryIndexManager {
|
||||
vectorToBlob(queryVec),
|
||||
this.provider.model,
|
||||
...sourceFilter.params,
|
||||
maxResults,
|
||||
limit,
|
||||
) as Array<{
|
||||
id: string;
|
||||
path: string;
|
||||
start_line: number;
|
||||
end_line: number;
|
||||
@@ -302,17 +343,17 @@ export class MemoryIndexManager {
|
||||
source: MemorySource;
|
||||
dist: number;
|
||||
}>;
|
||||
return rows
|
||||
.map((row) => ({
|
||||
path: row.path,
|
||||
startLine: row.start_line,
|
||||
endLine: row.end_line,
|
||||
score: 1 - row.dist,
|
||||
snippet: truncateUtf16Safe(row.text, SNIPPET_MAX_CHARS),
|
||||
source: row.source,
|
||||
}))
|
||||
.filter((entry) => entry.score >= minScore);
|
||||
return rows.map((row) => ({
|
||||
id: row.id,
|
||||
path: row.path,
|
||||
startLine: row.start_line,
|
||||
endLine: row.end_line,
|
||||
score: 1 - row.dist,
|
||||
snippet: truncateUtf16Safe(row.text, SNIPPET_MAX_CHARS),
|
||||
source: row.source,
|
||||
}));
|
||||
}
|
||||
|
||||
const candidates = this.listChunks();
|
||||
const scored = candidates
|
||||
.map((chunk) => ({
|
||||
@@ -321,10 +362,10 @@ export class MemoryIndexManager {
|
||||
}))
|
||||
.filter((entry) => Number.isFinite(entry.score));
|
||||
return scored
|
||||
.filter((entry) => entry.score >= minScore)
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, maxResults)
|
||||
.slice(0, limit)
|
||||
.map((entry) => ({
|
||||
id: entry.chunk.id,
|
||||
path: entry.chunk.path,
|
||||
startLine: entry.chunk.startLine,
|
||||
endLine: entry.chunk.endLine,
|
||||
@@ -334,6 +375,121 @@ export class MemoryIndexManager {
|
||||
}));
|
||||
}
|
||||
|
||||
private buildFtsQuery(raw: string): string | null {
|
||||
const tokens = raw.match(/[A-Za-z0-9_]+/g)?.map((t) => t.trim()).filter(Boolean) ?? [];
|
||||
if (tokens.length === 0) return null;
|
||||
const quoted = tokens.map((t) => `"${t.replaceAll("\"", "")}"`);
|
||||
return quoted.join(" AND ");
|
||||
}
|
||||
|
||||
private async searchKeyword(
|
||||
query: string,
|
||||
limit: number,
|
||||
): Promise<Array<MemorySearchResult & { id: string; textScore: number }>> {
|
||||
if (!this.fts.enabled || !this.fts.available) return [];
|
||||
if (limit <= 0) return [];
|
||||
const ftsQuery = this.buildFtsQuery(query);
|
||||
if (!ftsQuery) return [];
|
||||
const sourceFilter = this.buildSourceFilter();
|
||||
const rows = this.db
|
||||
.prepare(
|
||||
`SELECT id, path, source, start_line, end_line, text,\n` +
|
||||
` bm25(${FTS_TABLE}) AS rank\n` +
|
||||
` FROM ${FTS_TABLE}\n` +
|
||||
` WHERE ${FTS_TABLE} MATCH ? AND model = ?${sourceFilter.sql}\n` +
|
||||
` ORDER BY rank ASC\n` +
|
||||
` LIMIT ?`,
|
||||
)
|
||||
.all(ftsQuery, this.provider.model, ...sourceFilter.params, limit) as Array<{
|
||||
id: string;
|
||||
path: string;
|
||||
source: MemorySource;
|
||||
start_line: number;
|
||||
end_line: number;
|
||||
text: string;
|
||||
rank: number;
|
||||
}>;
|
||||
return rows.map((row) => {
|
||||
const rank = Number.isFinite(row.rank) ? Math.max(0, row.rank) : 999;
|
||||
const textScore = 1 / (1 + rank);
|
||||
return {
|
||||
id: row.id,
|
||||
path: row.path,
|
||||
startLine: row.start_line,
|
||||
endLine: row.end_line,
|
||||
score: textScore,
|
||||
textScore,
|
||||
snippet: truncateUtf16Safe(row.text, SNIPPET_MAX_CHARS),
|
||||
source: row.source,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private mergeHybridResults(params: {
|
||||
vector: Array<MemorySearchResult & { id: string }>;
|
||||
keyword: Array<MemorySearchResult & { id: string; textScore: number }>;
|
||||
vectorWeight: number;
|
||||
textWeight: number;
|
||||
}): MemorySearchResult[] {
|
||||
const byId = new Map<
|
||||
string,
|
||||
{
|
||||
id: string;
|
||||
path: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
source: MemorySource;
|
||||
snippet: string;
|
||||
vectorScore: number;
|
||||
textScore: number;
|
||||
}
|
||||
>();
|
||||
|
||||
for (const r of params.vector) {
|
||||
byId.set(r.id, {
|
||||
id: r.id,
|
||||
path: r.path,
|
||||
startLine: r.startLine,
|
||||
endLine: r.endLine,
|
||||
source: r.source,
|
||||
snippet: r.snippet,
|
||||
vectorScore: r.score,
|
||||
textScore: 0,
|
||||
});
|
||||
}
|
||||
for (const r of params.keyword) {
|
||||
const existing = byId.get(r.id);
|
||||
if (existing) {
|
||||
existing.textScore = r.textScore;
|
||||
if (r.snippet && r.snippet.length > 0) existing.snippet = r.snippet;
|
||||
} else {
|
||||
byId.set(r.id, {
|
||||
id: r.id,
|
||||
path: r.path,
|
||||
startLine: r.startLine,
|
||||
endLine: r.endLine,
|
||||
source: r.source,
|
||||
snippet: r.snippet,
|
||||
vectorScore: 0,
|
||||
textScore: r.textScore,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const merged = Array.from(byId.values()).map((entry) => {
|
||||
const score = params.vectorWeight * entry.vectorScore + params.textWeight * entry.textScore;
|
||||
return {
|
||||
path: entry.path,
|
||||
startLine: entry.startLine,
|
||||
endLine: entry.endLine,
|
||||
score,
|
||||
snippet: entry.snippet,
|
||||
source: entry.source,
|
||||
} satisfies MemorySearchResult;
|
||||
});
|
||||
return merged.sort((a, b) => b.score - a.score);
|
||||
}
|
||||
|
||||
async sync(params?: {
|
||||
reason?: string;
|
||||
force?: boolean;
|
||||
@@ -382,6 +538,7 @@ export class MemoryIndexManager {
|
||||
sources: MemorySource[];
|
||||
sourceCounts: Array<{ source: MemorySource; files: number; chunks: number }>;
|
||||
cache?: { enabled: boolean; entries?: number; maxEntries?: number };
|
||||
fts?: { enabled: boolean; available: boolean; error?: string };
|
||||
fallback?: { from: string; reason?: string };
|
||||
vector?: {
|
||||
enabled: boolean;
|
||||
@@ -452,6 +609,11 @@ export class MemoryIndexManager {
|
||||
maxEntries: this.cache.maxEntries,
|
||||
}
|
||||
: { enabled: false, maxEntries: this.cache.maxEntries },
|
||||
fts: {
|
||||
enabled: this.fts.enabled,
|
||||
available: this.fts.available,
|
||||
error: this.fts.loadError,
|
||||
},
|
||||
fallback: this.fallbackReason ? { from: "local", reason: this.fallbackReason } : undefined,
|
||||
vector: {
|
||||
enabled: this.vector.enabled,
|
||||
@@ -638,6 +800,27 @@ export class MemoryIndexManager {
|
||||
this.db.exec(
|
||||
`CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at ON ${EMBEDDING_CACHE_TABLE}(updated_at);`,
|
||||
);
|
||||
if (this.fts.enabled) {
|
||||
try {
|
||||
this.db.exec(
|
||||
`CREATE VIRTUAL TABLE IF NOT EXISTS ${FTS_TABLE} USING fts5(\n` +
|
||||
` text,\n` +
|
||||
` id UNINDEXED,\n` +
|
||||
` path UNINDEXED,\n` +
|
||||
` source UNINDEXED,\n` +
|
||||
` model UNINDEXED,\n` +
|
||||
` start_line UNINDEXED,\n` +
|
||||
` end_line UNINDEXED\n` +
|
||||
`);`,
|
||||
);
|
||||
this.fts.available = true;
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
this.fts.available = false;
|
||||
this.fts.loadError = message;
|
||||
log.warn(`fts unavailable: ${message}`);
|
||||
}
|
||||
}
|
||||
this.ensureColumn("files", "source", "TEXT NOT NULL DEFAULT 'memory'");
|
||||
this.ensureColumn("chunks", "source", "TEXT NOT NULL DEFAULT 'memory'");
|
||||
this.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`);
|
||||
@@ -825,6 +1008,13 @@ export class MemoryIndexManager {
|
||||
.run(stale.path, "memory");
|
||||
} catch {}
|
||||
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(stale.path, "memory");
|
||||
if (this.fts.enabled && this.fts.available) {
|
||||
try {
|
||||
this.db
|
||||
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
|
||||
.run(stale.path, "memory", this.provider.model);
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -915,6 +1105,13 @@ export class MemoryIndexManager {
|
||||
this.db
|
||||
.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
|
||||
.run(stale.path, "sessions");
|
||||
if (this.fts.enabled && this.fts.available) {
|
||||
try {
|
||||
this.db
|
||||
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
|
||||
.run(stale.path, "sessions", this.provider.model);
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1000,6 +1197,11 @@ export class MemoryIndexManager {
|
||||
private resetIndex() {
|
||||
this.db.exec(`DELETE FROM files`);
|
||||
this.db.exec(`DELETE FROM chunks`);
|
||||
if (this.fts.enabled && this.fts.available) {
|
||||
try {
|
||||
this.db.exec(`DELETE FROM ${FTS_TABLE}`);
|
||||
} catch {}
|
||||
}
|
||||
this.dropVectorTable();
|
||||
this.vector.dims = undefined;
|
||||
this.sessionsDirtyFiles.clear();
|
||||
@@ -1687,6 +1889,13 @@ export class MemoryIndexManager {
|
||||
.run(entry.path, options.source);
|
||||
} catch {}
|
||||
}
|
||||
if (this.fts.enabled && this.fts.available) {
|
||||
try {
|
||||
this.db
|
||||
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
|
||||
.run(entry.path, options.source, this.provider.model);
|
||||
} catch {}
|
||||
}
|
||||
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(entry.path, options.source);
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const chunk = chunks[i];
|
||||
@@ -1722,6 +1931,22 @@ export class MemoryIndexManager {
|
||||
.prepare(`INSERT OR REPLACE INTO ${VECTOR_TABLE} (id, embedding) VALUES (?, ?)`)
|
||||
.run(id, vectorToBlob(embedding));
|
||||
}
|
||||
if (this.fts.enabled && this.fts.available) {
|
||||
this.db
|
||||
.prepare(
|
||||
`INSERT INTO ${FTS_TABLE} (text, id, path, source, model, start_line, end_line)\n` +
|
||||
` VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
||||
)
|
||||
.run(
|
||||
chunk.text,
|
||||
id,
|
||||
entry.path,
|
||||
options.source,
|
||||
this.provider.model,
|
||||
chunk.startLine,
|
||||
chunk.endLine,
|
||||
);
|
||||
}
|
||||
}
|
||||
this.db
|
||||
.prepare(
|
||||
|
||||
Reference in New Issue
Block a user