From 072a13f3b2fff695e4bd512ef866fdbe7f42e601 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 18 Jan 2026 03:09:34 +0000 Subject: [PATCH] test: expand memory hybrid coverage --- src/memory/hybrid.test.ts | 87 ++++++++++++++++++++++++++++++ src/memory/hybrid.ts | 108 ++++++++++++++++++++++++++++++++++++++ src/memory/index.test.ts | 96 +++++++++++++++++++++++++++++++++ 3 files changed, 291 insertions(+) create mode 100644 src/memory/hybrid.test.ts create mode 100644 src/memory/hybrid.ts diff --git a/src/memory/hybrid.test.ts b/src/memory/hybrid.test.ts new file mode 100644 index 000000000..959543fd3 --- /dev/null +++ b/src/memory/hybrid.test.ts @@ -0,0 +1,87 @@ +import { describe, expect, it } from "vitest"; + +import { bm25RankToScore, buildFtsQuery, mergeHybridResults } from "./hybrid.js"; + +describe("memory hybrid helpers", () => { + it("buildFtsQuery tokenizes and AND-joins", () => { + expect(buildFtsQuery("hello world")).toBe("\"hello\" AND \"world\""); + expect(buildFtsQuery("FOO_bar baz-1")).toBe("\"FOO_bar\" AND \"baz\" AND \"1\""); + expect(buildFtsQuery(" ")).toBeNull(); + }); + + it("bm25RankToScore is monotonic and clamped", () => { + expect(bm25RankToScore(0)).toBeCloseTo(1); + expect(bm25RankToScore(1)).toBeCloseTo(0.5); + expect(bm25RankToScore(10)).toBeLessThan(bm25RankToScore(1)); + expect(bm25RankToScore(-100)).toBeCloseTo(1); + }); + + it("mergeHybridResults unions by id and combines weighted scores", () => { + const merged = mergeHybridResults({ + vectorWeight: 0.7, + textWeight: 0.3, + vector: [ + { + id: "a", + path: "memory/a.md", + startLine: 1, + endLine: 2, + source: "memory", + snippet: "vec-a", + vectorScore: 0.9, + }, + ], + keyword: [ + { + id: "b", + path: "memory/b.md", + startLine: 3, + endLine: 4, + source: "memory", + snippet: "kw-b", + textScore: 1.0, + }, + ], + }); + + expect(merged).toHaveLength(2); + const a = merged.find((r) => r.path === "memory/a.md"); + const b = merged.find((r) => r.path === "memory/b.md"); + expect(a?.score).toBeCloseTo(0.7 * 0.9); + expect(b?.score).toBeCloseTo(0.3 * 1.0); + }); + + it("mergeHybridResults prefers keyword snippet when ids overlap", () => { + const merged = mergeHybridResults({ + vectorWeight: 0.5, + textWeight: 0.5, + vector: [ + { + id: "a", + path: "memory/a.md", + startLine: 1, + endLine: 2, + source: "memory", + snippet: "vec-a", + vectorScore: 0.2, + }, + ], + keyword: [ + { + id: "a", + path: "memory/a.md", + startLine: 1, + endLine: 2, + source: "memory", + snippet: "kw-a", + textScore: 1.0, + }, + ], + }); + + expect(merged).toHaveLength(1); + expect(merged[0]?.snippet).toBe("kw-a"); + expect(merged[0]?.score).toBeCloseTo(0.5 * 0.2 + 0.5 * 1.0); + }); +}); + diff --git a/src/memory/hybrid.ts b/src/memory/hybrid.ts new file mode 100644 index 000000000..6af9ba64a --- /dev/null +++ b/src/memory/hybrid.ts @@ -0,0 +1,108 @@ +export type HybridSource = string; + +export type HybridVectorResult = { + id: string; + path: string; + startLine: number; + endLine: number; + source: HybridSource; + snippet: string; + vectorScore: number; +}; + +export type HybridKeywordResult = { + id: string; + path: string; + startLine: number; + endLine: number; + source: HybridSource; + snippet: string; + textScore: number; +}; + +export function buildFtsQuery(raw: string): string | null { + const tokens = raw.match(/[A-Za-z0-9_]+/g)?.map((t) => t.trim()).filter(Boolean) ?? []; + if (tokens.length === 0) return null; + const quoted = tokens.map((t) => `"${t.replaceAll("\"", "")}"`); + return quoted.join(" AND "); +} + +export function bm25RankToScore(rank: number): number { + const normalized = Number.isFinite(rank) ? Math.max(0, rank) : 999; + return 1 / (1 + normalized); +} + +export function mergeHybridResults(params: { + vector: HybridVectorResult[]; + keyword: HybridKeywordResult[]; + vectorWeight: number; + textWeight: number; +}): Array<{ + path: string; + startLine: number; + endLine: number; + score: number; + snippet: string; + source: HybridSource; +}> { + const byId = new Map< + string, + { + id: string; + path: string; + startLine: number; + endLine: number; + source: HybridSource; + snippet: string; + vectorScore: number; + textScore: number; + } + >(); + + for (const r of params.vector) { + byId.set(r.id, { + id: r.id, + path: r.path, + startLine: r.startLine, + endLine: r.endLine, + source: r.source, + snippet: r.snippet, + vectorScore: r.vectorScore, + textScore: 0, + }); + } + + for (const r of params.keyword) { + const existing = byId.get(r.id); + if (existing) { + existing.textScore = r.textScore; + if (r.snippet && r.snippet.length > 0) existing.snippet = r.snippet; + } else { + byId.set(r.id, { + id: r.id, + path: r.path, + startLine: r.startLine, + endLine: r.endLine, + source: r.source, + snippet: r.snippet, + vectorScore: 0, + textScore: r.textScore, + }); + } + } + + const merged = Array.from(byId.values()).map((entry) => { + const score = params.vectorWeight * entry.vectorScore + params.textWeight * entry.textScore; + return { + path: entry.path, + startLine: entry.startLine, + endLine: entry.endLine, + score, + snippet: entry.snippet, + source: entry.source, + }; + }); + + return merged.sort((a, b) => b.score - a.score); +} + diff --git a/src/memory/index.test.ts b/src/memory/index.test.ts index 56ab7eda7..daa682c3b 100644 --- a/src/memory/index.test.ts +++ b/src/memory/index.test.ts @@ -213,6 +213,102 @@ describe("memory index", () => { expect(results[0]?.path).toContain("memory/2026-01-12.md"); }); + it("hybrid weights can favor vector-only matches over keyword-only matches", async () => { + const manyAlpha = Array.from({ length: 200 }, () => "Alpha").join(" "); + await fs.writeFile( + path.join(workspaceDir, "memory", "vector-only.md"), + "Alpha beta. Alpha beta. Alpha beta. Alpha beta.", + ); + await fs.writeFile( + path.join(workspaceDir, "memory", "keyword-only.md"), + `${manyAlpha} beta id123.`, + ); + + const cfg = { + agents: { + defaults: { + workspace: workspaceDir, + memorySearch: { + provider: "openai", + model: "mock-embed", + store: { path: indexPath, vector: { enabled: false } }, + sync: { watch: false, onSessionStart: false, onSearch: true }, + query: { + minScore: 0, + maxResults: 200, + hybrid: { enabled: true, vectorWeight: 0.99, textWeight: 0.01, candidateMultiplier: 10 }, + }, + }, + }, + list: [{ id: "main", default: true }], + }, + }; + const result = await getMemorySearchManager({ cfg, agentId: "main" }); + expect(result.manager).not.toBeNull(); + if (!result.manager) throw new Error("manager missing"); + manager = result.manager; + + const status = manager.status(); + if (!status.fts?.available) return; + + const results = await manager.search("alpha beta id123"); + expect(results.length).toBeGreaterThan(0); + const paths = results.map((r) => r.path); + expect(paths).toContain("memory/vector-only.md"); + expect(paths).toContain("memory/keyword-only.md"); + const vectorOnly = results.find((r) => r.path === "memory/vector-only.md"); + const keywordOnly = results.find((r) => r.path === "memory/keyword-only.md"); + expect((vectorOnly?.score ?? 0) > (keywordOnly?.score ?? 0)).toBe(true); + }); + + it("hybrid weights can favor keyword matches when text weight dominates", async () => { + const manyAlpha = Array.from({ length: 200 }, () => "Alpha").join(" "); + await fs.writeFile( + path.join(workspaceDir, "memory", "vector-only.md"), + "Alpha beta. Alpha beta. Alpha beta. Alpha beta.", + ); + await fs.writeFile( + path.join(workspaceDir, "memory", "keyword-only.md"), + `${manyAlpha} beta id123.`, + ); + + const cfg = { + agents: { + defaults: { + workspace: workspaceDir, + memorySearch: { + provider: "openai", + model: "mock-embed", + store: { path: indexPath, vector: { enabled: false } }, + sync: { watch: false, onSessionStart: false, onSearch: true }, + query: { + minScore: 0, + maxResults: 200, + hybrid: { enabled: true, vectorWeight: 0.01, textWeight: 0.99, candidateMultiplier: 10 }, + }, + }, + }, + list: [{ id: "main", default: true }], + }, + }; + const result = await getMemorySearchManager({ cfg, agentId: "main" }); + expect(result.manager).not.toBeNull(); + if (!result.manager) throw new Error("manager missing"); + manager = result.manager; + + const status = manager.status(); + if (!status.fts?.available) return; + + const results = await manager.search("alpha beta id123"); + expect(results.length).toBeGreaterThan(0); + const paths = results.map((r) => r.path); + expect(paths).toContain("memory/vector-only.md"); + expect(paths).toContain("memory/keyword-only.md"); + const vectorOnly = results.find((r) => r.path === "memory/vector-only.md"); + const keywordOnly = results.find((r) => r.path === "memory/keyword-only.md"); + expect((keywordOnly?.score ?? 0) > (vectorOnly?.score ?? 0)).toBe(true); + }); + it("reports vector availability after probe", async () => { const cfg = { agents: {