test: expand memory hybrid coverage
This commit is contained in:
87
src/memory/hybrid.test.ts
Normal file
87
src/memory/hybrid.test.ts
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import { bm25RankToScore, buildFtsQuery, mergeHybridResults } from "./hybrid.js";
|
||||||
|
|
||||||
|
describe("memory hybrid helpers", () => {
|
||||||
|
it("buildFtsQuery tokenizes and AND-joins", () => {
|
||||||
|
expect(buildFtsQuery("hello world")).toBe("\"hello\" AND \"world\"");
|
||||||
|
expect(buildFtsQuery("FOO_bar baz-1")).toBe("\"FOO_bar\" AND \"baz\" AND \"1\"");
|
||||||
|
expect(buildFtsQuery(" ")).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("bm25RankToScore is monotonic and clamped", () => {
|
||||||
|
expect(bm25RankToScore(0)).toBeCloseTo(1);
|
||||||
|
expect(bm25RankToScore(1)).toBeCloseTo(0.5);
|
||||||
|
expect(bm25RankToScore(10)).toBeLessThan(bm25RankToScore(1));
|
||||||
|
expect(bm25RankToScore(-100)).toBeCloseTo(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("mergeHybridResults unions by id and combines weighted scores", () => {
|
||||||
|
const merged = mergeHybridResults({
|
||||||
|
vectorWeight: 0.7,
|
||||||
|
textWeight: 0.3,
|
||||||
|
vector: [
|
||||||
|
{
|
||||||
|
id: "a",
|
||||||
|
path: "memory/a.md",
|
||||||
|
startLine: 1,
|
||||||
|
endLine: 2,
|
||||||
|
source: "memory",
|
||||||
|
snippet: "vec-a",
|
||||||
|
vectorScore: 0.9,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
keyword: [
|
||||||
|
{
|
||||||
|
id: "b",
|
||||||
|
path: "memory/b.md",
|
||||||
|
startLine: 3,
|
||||||
|
endLine: 4,
|
||||||
|
source: "memory",
|
||||||
|
snippet: "kw-b",
|
||||||
|
textScore: 1.0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(merged).toHaveLength(2);
|
||||||
|
const a = merged.find((r) => r.path === "memory/a.md");
|
||||||
|
const b = merged.find((r) => r.path === "memory/b.md");
|
||||||
|
expect(a?.score).toBeCloseTo(0.7 * 0.9);
|
||||||
|
expect(b?.score).toBeCloseTo(0.3 * 1.0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("mergeHybridResults prefers keyword snippet when ids overlap", () => {
|
||||||
|
const merged = mergeHybridResults({
|
||||||
|
vectorWeight: 0.5,
|
||||||
|
textWeight: 0.5,
|
||||||
|
vector: [
|
||||||
|
{
|
||||||
|
id: "a",
|
||||||
|
path: "memory/a.md",
|
||||||
|
startLine: 1,
|
||||||
|
endLine: 2,
|
||||||
|
source: "memory",
|
||||||
|
snippet: "vec-a",
|
||||||
|
vectorScore: 0.2,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
keyword: [
|
||||||
|
{
|
||||||
|
id: "a",
|
||||||
|
path: "memory/a.md",
|
||||||
|
startLine: 1,
|
||||||
|
endLine: 2,
|
||||||
|
source: "memory",
|
||||||
|
snippet: "kw-a",
|
||||||
|
textScore: 1.0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(merged).toHaveLength(1);
|
||||||
|
expect(merged[0]?.snippet).toBe("kw-a");
|
||||||
|
expect(merged[0]?.score).toBeCloseTo(0.5 * 0.2 + 0.5 * 1.0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
108
src/memory/hybrid.ts
Normal file
108
src/memory/hybrid.ts
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
export type HybridSource = string;
|
||||||
|
|
||||||
|
export type HybridVectorResult = {
|
||||||
|
id: string;
|
||||||
|
path: string;
|
||||||
|
startLine: number;
|
||||||
|
endLine: number;
|
||||||
|
source: HybridSource;
|
||||||
|
snippet: string;
|
||||||
|
vectorScore: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type HybridKeywordResult = {
|
||||||
|
id: string;
|
||||||
|
path: string;
|
||||||
|
startLine: number;
|
||||||
|
endLine: number;
|
||||||
|
source: HybridSource;
|
||||||
|
snippet: string;
|
||||||
|
textScore: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export function buildFtsQuery(raw: string): string | null {
|
||||||
|
const tokens = raw.match(/[A-Za-z0-9_]+/g)?.map((t) => t.trim()).filter(Boolean) ?? [];
|
||||||
|
if (tokens.length === 0) return null;
|
||||||
|
const quoted = tokens.map((t) => `"${t.replaceAll("\"", "")}"`);
|
||||||
|
return quoted.join(" AND ");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function bm25RankToScore(rank: number): number {
|
||||||
|
const normalized = Number.isFinite(rank) ? Math.max(0, rank) : 999;
|
||||||
|
return 1 / (1 + normalized);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function mergeHybridResults(params: {
|
||||||
|
vector: HybridVectorResult[];
|
||||||
|
keyword: HybridKeywordResult[];
|
||||||
|
vectorWeight: number;
|
||||||
|
textWeight: number;
|
||||||
|
}): Array<{
|
||||||
|
path: string;
|
||||||
|
startLine: number;
|
||||||
|
endLine: number;
|
||||||
|
score: number;
|
||||||
|
snippet: string;
|
||||||
|
source: HybridSource;
|
||||||
|
}> {
|
||||||
|
const byId = new Map<
|
||||||
|
string,
|
||||||
|
{
|
||||||
|
id: string;
|
||||||
|
path: string;
|
||||||
|
startLine: number;
|
||||||
|
endLine: number;
|
||||||
|
source: HybridSource;
|
||||||
|
snippet: string;
|
||||||
|
vectorScore: number;
|
||||||
|
textScore: number;
|
||||||
|
}
|
||||||
|
>();
|
||||||
|
|
||||||
|
for (const r of params.vector) {
|
||||||
|
byId.set(r.id, {
|
||||||
|
id: r.id,
|
||||||
|
path: r.path,
|
||||||
|
startLine: r.startLine,
|
||||||
|
endLine: r.endLine,
|
||||||
|
source: r.source,
|
||||||
|
snippet: r.snippet,
|
||||||
|
vectorScore: r.vectorScore,
|
||||||
|
textScore: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const r of params.keyword) {
|
||||||
|
const existing = byId.get(r.id);
|
||||||
|
if (existing) {
|
||||||
|
existing.textScore = r.textScore;
|
||||||
|
if (r.snippet && r.snippet.length > 0) existing.snippet = r.snippet;
|
||||||
|
} else {
|
||||||
|
byId.set(r.id, {
|
||||||
|
id: r.id,
|
||||||
|
path: r.path,
|
||||||
|
startLine: r.startLine,
|
||||||
|
endLine: r.endLine,
|
||||||
|
source: r.source,
|
||||||
|
snippet: r.snippet,
|
||||||
|
vectorScore: 0,
|
||||||
|
textScore: r.textScore,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const merged = Array.from(byId.values()).map((entry) => {
|
||||||
|
const score = params.vectorWeight * entry.vectorScore + params.textWeight * entry.textScore;
|
||||||
|
return {
|
||||||
|
path: entry.path,
|
||||||
|
startLine: entry.startLine,
|
||||||
|
endLine: entry.endLine,
|
||||||
|
score,
|
||||||
|
snippet: entry.snippet,
|
||||||
|
source: entry.source,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
return merged.sort((a, b) => b.score - a.score);
|
||||||
|
}
|
||||||
|
|
||||||
@@ -213,6 +213,102 @@ describe("memory index", () => {
|
|||||||
expect(results[0]?.path).toContain("memory/2026-01-12.md");
|
expect(results[0]?.path).toContain("memory/2026-01-12.md");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("hybrid weights can favor vector-only matches over keyword-only matches", async () => {
|
||||||
|
const manyAlpha = Array.from({ length: 200 }, () => "Alpha").join(" ");
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(workspaceDir, "memory", "vector-only.md"),
|
||||||
|
"Alpha beta. Alpha beta. Alpha beta. Alpha beta.",
|
||||||
|
);
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(workspaceDir, "memory", "keyword-only.md"),
|
||||||
|
`${manyAlpha} beta id123.`,
|
||||||
|
);
|
||||||
|
|
||||||
|
const cfg = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
workspace: workspaceDir,
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "mock-embed",
|
||||||
|
store: { path: indexPath, vector: { enabled: false } },
|
||||||
|
sync: { watch: false, onSessionStart: false, onSearch: true },
|
||||||
|
query: {
|
||||||
|
minScore: 0,
|
||||||
|
maxResults: 200,
|
||||||
|
hybrid: { enabled: true, vectorWeight: 0.99, textWeight: 0.01, candidateMultiplier: 10 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
list: [{ id: "main", default: true }],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||||
|
expect(result.manager).not.toBeNull();
|
||||||
|
if (!result.manager) throw new Error("manager missing");
|
||||||
|
manager = result.manager;
|
||||||
|
|
||||||
|
const status = manager.status();
|
||||||
|
if (!status.fts?.available) return;
|
||||||
|
|
||||||
|
const results = await manager.search("alpha beta id123");
|
||||||
|
expect(results.length).toBeGreaterThan(0);
|
||||||
|
const paths = results.map((r) => r.path);
|
||||||
|
expect(paths).toContain("memory/vector-only.md");
|
||||||
|
expect(paths).toContain("memory/keyword-only.md");
|
||||||
|
const vectorOnly = results.find((r) => r.path === "memory/vector-only.md");
|
||||||
|
const keywordOnly = results.find((r) => r.path === "memory/keyword-only.md");
|
||||||
|
expect((vectorOnly?.score ?? 0) > (keywordOnly?.score ?? 0)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("hybrid weights can favor keyword matches when text weight dominates", async () => {
|
||||||
|
const manyAlpha = Array.from({ length: 200 }, () => "Alpha").join(" ");
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(workspaceDir, "memory", "vector-only.md"),
|
||||||
|
"Alpha beta. Alpha beta. Alpha beta. Alpha beta.",
|
||||||
|
);
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(workspaceDir, "memory", "keyword-only.md"),
|
||||||
|
`${manyAlpha} beta id123.`,
|
||||||
|
);
|
||||||
|
|
||||||
|
const cfg = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
workspace: workspaceDir,
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "mock-embed",
|
||||||
|
store: { path: indexPath, vector: { enabled: false } },
|
||||||
|
sync: { watch: false, onSessionStart: false, onSearch: true },
|
||||||
|
query: {
|
||||||
|
minScore: 0,
|
||||||
|
maxResults: 200,
|
||||||
|
hybrid: { enabled: true, vectorWeight: 0.01, textWeight: 0.99, candidateMultiplier: 10 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
list: [{ id: "main", default: true }],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||||
|
expect(result.manager).not.toBeNull();
|
||||||
|
if (!result.manager) throw new Error("manager missing");
|
||||||
|
manager = result.manager;
|
||||||
|
|
||||||
|
const status = manager.status();
|
||||||
|
if (!status.fts?.available) return;
|
||||||
|
|
||||||
|
const results = await manager.search("alpha beta id123");
|
||||||
|
expect(results.length).toBeGreaterThan(0);
|
||||||
|
const paths = results.map((r) => r.path);
|
||||||
|
expect(paths).toContain("memory/vector-only.md");
|
||||||
|
expect(paths).toContain("memory/keyword-only.md");
|
||||||
|
const vectorOnly = results.find((r) => r.path === "memory/vector-only.md");
|
||||||
|
const keywordOnly = results.find((r) => r.path === "memory/keyword-only.md");
|
||||||
|
expect((keywordOnly?.score ?? 0) > (vectorOnly?.score ?? 0)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
it("reports vector availability after probe", async () => {
|
it("reports vector availability after probe", async () => {
|
||||||
const cfg = {
|
const cfg = {
|
||||||
agents: {
|
agents: {
|
||||||
|
|||||||
Reference in New Issue
Block a user