refactor: split memory manager internals
This commit is contained in:
181
src/memory/manager-search.ts
Normal file
181
src/memory/manager-search.ts
Normal file
@@ -0,0 +1,181 @@
|
||||
import type { DatabaseSync } from "node:sqlite";
|
||||
|
||||
import { truncateUtf16Safe } from "../utils.js";
|
||||
import { cosineSimilarity, parseEmbedding } from "./internal.js";
|
||||
|
||||
const vectorToBlob = (embedding: number[]): Buffer => Buffer.from(new Float32Array(embedding).buffer);
|
||||
|
||||
export type SearchSource = string;
|
||||
|
||||
export type SearchRowResult = {
|
||||
id: string;
|
||||
path: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
score: number;
|
||||
snippet: string;
|
||||
source: SearchSource;
|
||||
};
|
||||
|
||||
export async function searchVector(params: {
|
||||
db: DatabaseSync;
|
||||
vectorTable: string;
|
||||
providerModel: string;
|
||||
queryVec: number[];
|
||||
limit: number;
|
||||
snippetMaxChars: number;
|
||||
ensureVectorReady: (dimensions: number) => Promise<boolean>;
|
||||
sourceFilterVec: { sql: string; params: SearchSource[] };
|
||||
sourceFilterChunks: { sql: string; params: SearchSource[] };
|
||||
}): Promise<SearchRowResult[]> {
|
||||
if (params.queryVec.length === 0 || params.limit <= 0) return [];
|
||||
if (await params.ensureVectorReady(params.queryVec.length)) {
|
||||
const rows = params.db
|
||||
.prepare(
|
||||
`SELECT c.id, c.path, c.start_line, c.end_line, c.text,\n` +
|
||||
` c.source,\n` +
|
||||
` vec_distance_cosine(v.embedding, ?) AS dist\n` +
|
||||
` FROM ${params.vectorTable} v\n` +
|
||||
` JOIN chunks c ON c.id = v.id\n` +
|
||||
` WHERE c.model = ?${params.sourceFilterVec.sql}\n` +
|
||||
` ORDER BY dist ASC\n` +
|
||||
` LIMIT ?`,
|
||||
)
|
||||
.all(
|
||||
vectorToBlob(params.queryVec),
|
||||
params.providerModel,
|
||||
...params.sourceFilterVec.params,
|
||||
params.limit,
|
||||
) as Array<{
|
||||
id: string;
|
||||
path: string;
|
||||
start_line: number;
|
||||
end_line: number;
|
||||
text: string;
|
||||
source: SearchSource;
|
||||
dist: number;
|
||||
}>;
|
||||
return rows.map((row) => ({
|
||||
id: row.id,
|
||||
path: row.path,
|
||||
startLine: row.start_line,
|
||||
endLine: row.end_line,
|
||||
score: 1 - row.dist,
|
||||
snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
|
||||
source: row.source,
|
||||
}));
|
||||
}
|
||||
|
||||
const candidates = listChunks({
|
||||
db: params.db,
|
||||
providerModel: params.providerModel,
|
||||
sourceFilter: params.sourceFilterChunks,
|
||||
});
|
||||
const scored = candidates
|
||||
.map((chunk) => ({
|
||||
chunk,
|
||||
score: cosineSimilarity(params.queryVec, chunk.embedding),
|
||||
}))
|
||||
.filter((entry) => Number.isFinite(entry.score));
|
||||
return scored
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, params.limit)
|
||||
.map((entry) => ({
|
||||
id: entry.chunk.id,
|
||||
path: entry.chunk.path,
|
||||
startLine: entry.chunk.startLine,
|
||||
endLine: entry.chunk.endLine,
|
||||
score: entry.score,
|
||||
snippet: truncateUtf16Safe(entry.chunk.text, params.snippetMaxChars),
|
||||
source: entry.chunk.source,
|
||||
}));
|
||||
}
|
||||
|
||||
export function listChunks(params: {
|
||||
db: DatabaseSync;
|
||||
providerModel: string;
|
||||
sourceFilter: { sql: string; params: SearchSource[] };
|
||||
}): Array<{
|
||||
id: string;
|
||||
path: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
text: string;
|
||||
embedding: number[];
|
||||
source: SearchSource;
|
||||
}> {
|
||||
const rows = params.db
|
||||
.prepare(
|
||||
`SELECT id, path, start_line, end_line, text, embedding, source\n` +
|
||||
` FROM chunks\n` +
|
||||
` WHERE model = ?${params.sourceFilter.sql}`,
|
||||
)
|
||||
.all(params.providerModel, ...params.sourceFilter.params) as Array<{
|
||||
id: string;
|
||||
path: string;
|
||||
start_line: number;
|
||||
end_line: number;
|
||||
text: string;
|
||||
embedding: string;
|
||||
source: SearchSource;
|
||||
}>;
|
||||
|
||||
return rows.map((row) => ({
|
||||
id: row.id,
|
||||
path: row.path,
|
||||
startLine: row.start_line,
|
||||
endLine: row.end_line,
|
||||
text: row.text,
|
||||
embedding: parseEmbedding(row.embedding),
|
||||
source: row.source,
|
||||
}));
|
||||
}
|
||||
|
||||
export async function searchKeyword(params: {
|
||||
db: DatabaseSync;
|
||||
ftsTable: string;
|
||||
providerModel: string;
|
||||
query: string;
|
||||
limit: number;
|
||||
snippetMaxChars: number;
|
||||
sourceFilter: { sql: string; params: SearchSource[] };
|
||||
buildFtsQuery: (raw: string) => string | null;
|
||||
bm25RankToScore: (rank: number) => number;
|
||||
}): Promise<Array<SearchRowResult & { textScore: number }>> {
|
||||
if (params.limit <= 0) return [];
|
||||
const ftsQuery = params.buildFtsQuery(params.query);
|
||||
if (!ftsQuery) return [];
|
||||
|
||||
const rows = params.db
|
||||
.prepare(
|
||||
`SELECT id, path, source, start_line, end_line, text,\n` +
|
||||
` bm25(${params.ftsTable}) AS rank\n` +
|
||||
` FROM ${params.ftsTable}\n` +
|
||||
` WHERE ${params.ftsTable} MATCH ? AND model = ?${params.sourceFilter.sql}\n` +
|
||||
` ORDER BY rank ASC\n` +
|
||||
` LIMIT ?`,
|
||||
)
|
||||
.all(ftsQuery, params.providerModel, ...params.sourceFilter.params, params.limit) as Array<{
|
||||
id: string;
|
||||
path: string;
|
||||
source: SearchSource;
|
||||
start_line: number;
|
||||
end_line: number;
|
||||
text: string;
|
||||
rank: number;
|
||||
}>;
|
||||
|
||||
return rows.map((row) => {
|
||||
const textScore = params.bm25RankToScore(row.rank);
|
||||
return {
|
||||
id: row.id,
|
||||
path: row.path,
|
||||
startLine: row.start_line,
|
||||
endLine: row.end_line,
|
||||
score: textScore,
|
||||
textScore,
|
||||
snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
|
||||
source: row.source,
|
||||
};
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user