/** * Clawdbot Memory (LanceDB) Plugin * * Long-term memory with vector search for AI conversations. * Uses LanceDB for storage and OpenAI for embeddings. * Provides seamless auto-recall and auto-capture via lifecycle hooks. */ import { Type } from "@sinclair/typebox"; import * as lancedb from "@lancedb/lancedb"; import OpenAI from "openai"; import { randomUUID } from "node:crypto"; import type { ClawdbotPluginApi } from "clawdbot/plugin-sdk"; import { stringEnum } from "clawdbot/plugin-sdk"; import { MEMORY_CATEGORIES, type MemoryCategory, memoryConfigSchema, vectorDimsForModel, } from "./config.js"; // ============================================================================ // Types // ============================================================================ type MemoryEntry = { id: string; text: string; vector: number[]; importance: number; category: MemoryCategory; createdAt: number; }; type MemorySearchResult = { entry: MemoryEntry; score: number; }; // ============================================================================ // LanceDB Provider // ============================================================================ const TABLE_NAME = "memories"; class MemoryDB { private db: lancedb.Connection | null = null; private table: lancedb.Table | null = null; private initPromise: Promise | null = null; constructor( private readonly dbPath: string, private readonly vectorDim: number, ) {} private async ensureInitialized(): Promise { if (this.table) return; if (this.initPromise) return this.initPromise; this.initPromise = this.doInitialize(); return this.initPromise; } private async doInitialize(): Promise { this.db = await lancedb.connect(this.dbPath); const tables = await this.db.tableNames(); if (tables.includes(TABLE_NAME)) { this.table = await this.db.openTable(TABLE_NAME); } else { this.table = await this.db.createTable(TABLE_NAME, [ { id: "__schema__", text: "", vector: new Array(this.vectorDim).fill(0), importance: 0, category: "other", createdAt: 0, }, ]); await this.table.delete('id = "__schema__"'); } } async store( entry: Omit, ): Promise { await this.ensureInitialized(); const fullEntry: MemoryEntry = { ...entry, id: randomUUID(), createdAt: Date.now(), }; await this.table!.add([fullEntry]); return fullEntry; } async search( vector: number[], limit = 5, minScore = 0.5, ): Promise { await this.ensureInitialized(); const results = await this.table!.vectorSearch(vector).limit(limit).toArray(); // LanceDB uses L2 distance by default; convert to similarity score const mapped = results.map((row) => { const distance = row._distance ?? 0; // Use inverse for a 0-1 range: sim = 1 / (1 + d) const score = 1 / (1 + distance); return { entry: { id: row.id as string, text: row.text as string, vector: row.vector as number[], importance: row.importance as number, category: row.category as MemoryEntry["category"], createdAt: row.createdAt as number, }, score, }; }); return mapped.filter((r) => r.score >= minScore); } async delete(id: string): Promise { await this.ensureInitialized(); // Validate UUID format to prevent injection const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; if (!uuidRegex.test(id)) { throw new Error(`Invalid memory ID format: ${id}`); } await this.table!.delete(`id = '${id}'`); return true; } async count(): Promise { await this.ensureInitialized(); return this.table!.countRows(); } } // ============================================================================ // OpenAI Embeddings // ============================================================================ class Embeddings { private client: OpenAI; constructor( apiKey: string, private model: string, ) { this.client = new OpenAI({ apiKey }); } async embed(text: string): Promise { const response = await this.client.embeddings.create({ model: this.model, input: text, }); return response.data[0].embedding; } } // ============================================================================ // Rule-based capture filter // ============================================================================ const MEMORY_TRIGGERS = [ /zapamatuj si|pamatuj|remember/i, /preferuji|radši|nechci|prefer/i, /rozhodli jsme|budeme používat/i, /\+\d{10,}/, /[\w.-]+@[\w.-]+\.\w+/, /můj\s+\w+\s+je|je\s+můj/i, /my\s+\w+\s+is|is\s+my/i, /i (like|prefer|hate|love|want|need)/i, /always|never|important/i, ]; function shouldCapture(text: string): boolean { if (text.length < 10 || text.length > 500) return false; // Skip injected context from memory recall if (text.includes("")) return false; // Skip system-generated content if (text.startsWith("<") && text.includes(" 3) return false; return MEMORY_TRIGGERS.some((r) => r.test(text)); } function detectCategory(text: string): MemoryCategory { const lower = text.toLowerCase(); if (/prefer|radši|like|love|hate|want/i.test(lower)) return "preference"; if (/rozhodli|decided|will use|budeme/i.test(lower)) return "decision"; if (/\+\d{10,}|@[\w.-]+\.\w+|is called|jmenuje se/i.test(lower)) return "entity"; if (/is|are|has|have|je|má|jsou/i.test(lower)) return "fact"; return "other"; } // ============================================================================ // Plugin Definition // ============================================================================ const memoryPlugin = { id: "memory-lancedb", name: "Memory (LanceDB)", description: "LanceDB-backed long-term memory with auto-recall/capture", kind: "memory" as const, configSchema: memoryConfigSchema, register(api: ClawdbotPluginApi) { const cfg = memoryConfigSchema.parse(api.pluginConfig); const resolvedDbPath = api.resolvePath(cfg.dbPath!); const vectorDim = vectorDimsForModel(cfg.embedding.model ?? "text-embedding-3-small"); const db = new MemoryDB(resolvedDbPath, vectorDim); const embeddings = new Embeddings(cfg.embedding.apiKey, cfg.embedding.model!); api.logger.info( `memory-lancedb: plugin registered (db: ${resolvedDbPath}, lazy init)`, ); // ======================================================================== // Tools // ======================================================================== api.registerTool( { name: "memory_recall", label: "Memory Recall", description: "Search through long-term memories. Use when you need context about user preferences, past decisions, or previously discussed topics.", parameters: Type.Object({ query: Type.String({ description: "Search query" }), limit: Type.Optional(Type.Number({ description: "Max results (default: 5)" })), }), async execute(_toolCallId, params) { const { query, limit = 5 } = params as { query: string; limit?: number }; const vector = await embeddings.embed(query); const results = await db.search(vector, limit, 0.1); if (results.length === 0) { return { content: [{ type: "text", text: "No relevant memories found." }], details: { count: 0 }, }; } const text = results .map( (r, i) => `${i + 1}. [${r.entry.category}] ${r.entry.text} (${(r.score * 100).toFixed(0)}%)`, ) .join("\n"); // Strip vector data for serialization (typed arrays can't be cloned) const sanitizedResults = results.map((r) => ({ id: r.entry.id, text: r.entry.text, category: r.entry.category, importance: r.entry.importance, score: r.score, })); return { content: [ { type: "text", text: `Found ${results.length} memories:\n\n${text}` }, ], details: { count: results.length, memories: sanitizedResults }, }; }, }, { name: "memory_recall" }, ); api.registerTool( { name: "memory_store", label: "Memory Store", description: "Save important information in long-term memory. Use for preferences, facts, decisions.", parameters: Type.Object({ text: Type.String({ description: "Information to remember" }), importance: Type.Optional( Type.Number({ description: "Importance 0-1 (default: 0.7)" }), ), category: Type.Optional(stringEnum(MEMORY_CATEGORIES)), }), async execute(_toolCallId, params) { const { text, importance = 0.7, category = "other", } = params as { text: string; importance?: number; category?: MemoryEntry["category"]; }; const vector = await embeddings.embed(text); // Check for duplicates const existing = await db.search(vector, 1, 0.95); if (existing.length > 0) { return { content: [ { type: "text", text: `Similar memory already exists: "${existing[0].entry.text}"` }, ], details: { action: "duplicate", existingId: existing[0].entry.id, existingText: existing[0].entry.text }, }; } const entry = await db.store({ text, vector, importance, category, }); return { content: [{ type: "text", text: `Stored: "${text.slice(0, 100)}..."` }], details: { action: "created", id: entry.id }, }; }, }, { name: "memory_store" }, ); api.registerTool( { name: "memory_forget", label: "Memory Forget", description: "Delete specific memories. GDPR-compliant.", parameters: Type.Object({ query: Type.Optional(Type.String({ description: "Search to find memory" })), memoryId: Type.Optional(Type.String({ description: "Specific memory ID" })), }), async execute(_toolCallId, params) { const { query, memoryId } = params as { query?: string; memoryId?: string }; if (memoryId) { await db.delete(memoryId); return { content: [{ type: "text", text: `Memory ${memoryId} forgotten.` }], details: { action: "deleted", id: memoryId }, }; } if (query) { const vector = await embeddings.embed(query); const results = await db.search(vector, 5, 0.7); if (results.length === 0) { return { content: [{ type: "text", text: "No matching memories found." }], details: { found: 0 }, }; } if (results.length === 1 && results[0].score > 0.9) { await db.delete(results[0].entry.id); return { content: [ { type: "text", text: `Forgotten: "${results[0].entry.text}"` }, ], details: { action: "deleted", id: results[0].entry.id }, }; } const list = results .map((r) => `- [${r.entry.id.slice(0, 8)}] ${r.entry.text.slice(0, 60)}...`) .join("\n"); // Strip vector data for serialization const sanitizedCandidates = results.map((r) => ({ id: r.entry.id, text: r.entry.text, category: r.entry.category, score: r.score, })); return { content: [ { type: "text", text: `Found ${results.length} candidates. Specify memoryId:\n${list}`, }, ], details: { action: "candidates", candidates: sanitizedCandidates }, }; } return { content: [{ type: "text", text: "Provide query or memoryId." }], details: { error: "missing_param" }, }; }, }, { name: "memory_forget" }, ); // ======================================================================== // CLI Commands // ======================================================================== api.registerCli( ({ program }) => { const memory = program .command("ltm") .description("LanceDB memory plugin commands"); memory .command("list") .description("List memories") .action(async () => { const count = await db.count(); console.log(`Total memories: ${count}`); }); memory .command("search") .description("Search memories") .argument("", "Search query") .option("--limit ", "Max results", "5") .action(async (query, opts) => { const vector = await embeddings.embed(query); const results = await db.search(vector, parseInt(opts.limit), 0.3); // Strip vectors for output const output = results.map((r) => ({ id: r.entry.id, text: r.entry.text, category: r.entry.category, importance: r.entry.importance, score: r.score, })); console.log(JSON.stringify(output, null, 2)); }); memory .command("stats") .description("Show memory statistics") .action(async () => { const count = await db.count(); console.log(`Total memories: ${count}`); }); }, { commands: ["ltm"] }, ); // ======================================================================== // Lifecycle Hooks // ======================================================================== // Auto-recall: inject relevant memories before agent starts if (cfg.autoRecall) { api.on("before_agent_start", async (event) => { if (!event.prompt || event.prompt.length < 5) return; try { const vector = await embeddings.embed(event.prompt); const results = await db.search(vector, 3, 0.3); if (results.length === 0) return; const memoryContext = results .map((r) => `- [${r.entry.category}] ${r.entry.text}`) .join("\n"); api.logger.info?.( `memory-lancedb: injecting ${results.length} memories into context`, ); return { prependContext: `\nThe following memories may be relevant to this conversation:\n${memoryContext}\n`, }; } catch (err) { api.logger.warn(`memory-lancedb: recall failed: ${String(err)}`); } }); } // Auto-capture: analyze and store important information after agent ends if (cfg.autoCapture) { api.on("agent_end", async (event) => { if (!event.success || !event.messages || event.messages.length === 0) { return; } try { // Extract text content from messages (handling unknown[] type) const texts: string[] = []; for (const msg of event.messages) { // Type guard for message object if (!msg || typeof msg !== "object") continue; const msgObj = msg as Record; // Only process user and assistant messages const role = msgObj.role; if (role !== "user" && role !== "assistant") continue; const content = msgObj.content; // Handle string content directly if (typeof content === "string") { texts.push(content); continue; } // Handle array content (content blocks) if (Array.isArray(content)) { for (const block of content) { if ( block && typeof block === "object" && "type" in block && (block as Record).type === "text" && "text" in block && typeof (block as Record).text === "string" ) { texts.push((block as Record).text as string); } } } } // Filter for capturable content const toCapture = texts.filter( (text) => text && shouldCapture(text), ); if (toCapture.length === 0) return; // Store each capturable piece (limit to 3 per conversation) let stored = 0; for (const text of toCapture.slice(0, 3)) { const category = detectCategory(text); const vector = await embeddings.embed(text); // Check for duplicates (high similarity threshold) const existing = await db.search(vector, 1, 0.95); if (existing.length > 0) continue; await db.store({ text, vector, importance: 0.7, category, }); stored++; } if (stored > 0) { api.logger.info(`memory-lancedb: auto-captured ${stored} memories`); } } catch (err) { api.logger.warn(`memory-lancedb: capture failed: ${String(err)}`); } }); } // ======================================================================== // Service // ======================================================================== api.registerService({ id: "memory-lancedb", start: () => { api.logger.info( `memory-lancedb: initialized (db: ${resolvedDbPath}, model: ${cfg.embedding.model})`, ); }, stop: () => { api.logger.info("memory-lancedb: stopped"); }, }); }, }; export default memoryPlugin;