206 lines
5.4 KiB
TypeScript
206 lines
5.4 KiB
TypeScript
import crypto from "node:crypto";
|
|
import fsSync from "node:fs";
|
|
import fs from "node:fs/promises";
|
|
import path from "node:path";
|
|
|
|
export type MemoryFileEntry = {
|
|
path: string;
|
|
absPath: string;
|
|
mtimeMs: number;
|
|
size: number;
|
|
hash: string;
|
|
};
|
|
|
|
export type MemoryChunk = {
|
|
startLine: number;
|
|
endLine: number;
|
|
text: string;
|
|
hash: string;
|
|
};
|
|
|
|
export function ensureDir(dir: string): string {
|
|
try {
|
|
fsSync.mkdirSync(dir, { recursive: true });
|
|
} catch {}
|
|
return dir;
|
|
}
|
|
|
|
export function normalizeRelPath(value: string): string {
|
|
const trimmed = value.trim().replace(/^[./]+/, "");
|
|
return trimmed.replace(/\\/g, "/");
|
|
}
|
|
|
|
export function isMemoryPath(relPath: string): boolean {
|
|
const normalized = normalizeRelPath(relPath);
|
|
if (!normalized) return false;
|
|
if (normalized === "MEMORY.md" || normalized === "memory.md") return true;
|
|
return normalized.startsWith("memory/");
|
|
}
|
|
|
|
async function exists(filePath: string): Promise<boolean> {
|
|
try {
|
|
await fs.access(filePath);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async function walkDir(dir: string, files: string[]) {
|
|
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
for (const entry of entries) {
|
|
const full = path.join(dir, entry.name);
|
|
if (entry.isDirectory()) {
|
|
await walkDir(full, files);
|
|
continue;
|
|
}
|
|
if (!entry.isFile()) continue;
|
|
if (!entry.name.endsWith(".md")) continue;
|
|
files.push(full);
|
|
}
|
|
}
|
|
|
|
export async function listMemoryFiles(workspaceDir: string): Promise<string[]> {
|
|
const result: string[] = [];
|
|
const memoryFile = path.join(workspaceDir, "MEMORY.md");
|
|
const altMemoryFile = path.join(workspaceDir, "memory.md");
|
|
if (await exists(memoryFile)) result.push(memoryFile);
|
|
if (await exists(altMemoryFile)) result.push(altMemoryFile);
|
|
const memoryDir = path.join(workspaceDir, "memory");
|
|
if (await exists(memoryDir)) {
|
|
await walkDir(memoryDir, result);
|
|
}
|
|
if (result.length <= 1) return result;
|
|
const seen = new Set<string>();
|
|
const deduped: string[] = [];
|
|
for (const entry of result) {
|
|
let key = entry;
|
|
try {
|
|
key = await fs.realpath(entry);
|
|
} catch {}
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
deduped.push(entry);
|
|
}
|
|
return deduped;
|
|
}
|
|
|
|
export function hashText(value: string): string {
|
|
return crypto.createHash("sha256").update(value).digest("hex");
|
|
}
|
|
|
|
export async function buildFileEntry(
|
|
absPath: string,
|
|
workspaceDir: string,
|
|
): Promise<MemoryFileEntry> {
|
|
const stat = await fs.stat(absPath);
|
|
const content = await fs.readFile(absPath, "utf-8");
|
|
const hash = hashText(content);
|
|
return {
|
|
path: path.relative(workspaceDir, absPath).replace(/\\/g, "/"),
|
|
absPath,
|
|
mtimeMs: stat.mtimeMs,
|
|
size: stat.size,
|
|
hash,
|
|
};
|
|
}
|
|
|
|
export function chunkMarkdown(
|
|
content: string,
|
|
chunking: { tokens: number; overlap: number },
|
|
): MemoryChunk[] {
|
|
const lines = content.split("\n");
|
|
if (lines.length === 0) return [];
|
|
const maxChars = Math.max(32, chunking.tokens * 4);
|
|
const overlapChars = Math.max(0, chunking.overlap * 4);
|
|
const chunks: MemoryChunk[] = [];
|
|
|
|
let current: Array<{ line: string; lineNo: number }> = [];
|
|
let currentChars = 0;
|
|
|
|
const flush = () => {
|
|
if (current.length === 0) return;
|
|
const firstEntry = current[0];
|
|
const lastEntry = current[current.length - 1];
|
|
if (!firstEntry || !lastEntry) return;
|
|
const text = current.map((entry) => entry.line).join("\n");
|
|
const startLine = firstEntry.lineNo;
|
|
const endLine = lastEntry.lineNo;
|
|
chunks.push({
|
|
startLine,
|
|
endLine,
|
|
text,
|
|
hash: hashText(text),
|
|
});
|
|
};
|
|
|
|
const carryOverlap = () => {
|
|
if (overlapChars <= 0 || current.length === 0) {
|
|
current = [];
|
|
currentChars = 0;
|
|
return;
|
|
}
|
|
let acc = 0;
|
|
const kept: Array<{ line: string; lineNo: number }> = [];
|
|
for (let i = current.length - 1; i >= 0; i -= 1) {
|
|
const entry = current[i];
|
|
if (!entry) continue;
|
|
acc += entry.line.length + 1;
|
|
kept.unshift(entry);
|
|
if (acc >= overlapChars) break;
|
|
}
|
|
current = kept;
|
|
currentChars = kept.reduce((sum, entry) => sum + entry.line.length + 1, 0);
|
|
};
|
|
|
|
for (let i = 0; i < lines.length; i += 1) {
|
|
const line = lines[i] ?? "";
|
|
const lineNo = i + 1;
|
|
const segments: string[] = [];
|
|
if (line.length === 0) {
|
|
segments.push("");
|
|
} else {
|
|
for (let start = 0; start < line.length; start += maxChars) {
|
|
segments.push(line.slice(start, start + maxChars));
|
|
}
|
|
}
|
|
for (const segment of segments) {
|
|
const lineSize = segment.length + 1;
|
|
if (currentChars + lineSize > maxChars && current.length > 0) {
|
|
flush();
|
|
carryOverlap();
|
|
}
|
|
current.push({ line: segment, lineNo });
|
|
currentChars += lineSize;
|
|
}
|
|
}
|
|
flush();
|
|
return chunks;
|
|
}
|
|
|
|
export function parseEmbedding(raw: string): number[] {
|
|
try {
|
|
const parsed = JSON.parse(raw) as number[];
|
|
return Array.isArray(parsed) ? parsed : [];
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
export function cosineSimilarity(a: number[], b: number[]): number {
|
|
if (a.length === 0 || b.length === 0) return 0;
|
|
const len = Math.min(a.length, b.length);
|
|
let dot = 0;
|
|
let normA = 0;
|
|
let normB = 0;
|
|
for (let i = 0; i < len; i += 1) {
|
|
const av = a[i] ?? 0;
|
|
const bv = b[i] ?? 0;
|
|
dot += av * bv;
|
|
normA += av * av;
|
|
normB += bv * bv;
|
|
}
|
|
if (normA === 0 || normB === 0) return 0;
|
|
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
}
|