Add plugin lifecycle hooks infrastructure: - before_agent_start: inject context before agent loop - agent_end: analyze conversation after completion - 13 hook types total (message, tool, session, gateway hooks) Memory plugin implementation: - LanceDB vector storage with OpenAI embeddings - kind: "memory" to integrate with upstream slot system - Auto-recall: injects <relevant-memories> when context found - Auto-capture: stores preferences, decisions, entities - Rule-based capture filtering with 0.95 similarity dedup - Tools: memory_recall, memory_store, memory_forget - CLI: clawdbot ltm list|search|stats Plugin infrastructure: - api.on() method for hook registration - Global hook runner singleton for cross-module access - Priority ordering and error catching Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
672 lines
21 KiB
TypeScript
672 lines
21 KiB
TypeScript
/**
|
|
* Clawdbot Memory Plugin
|
|
*
|
|
* Long-term memory with vector search for AI conversations.
|
|
* Uses LanceDB for storage and OpenAI for embeddings.
|
|
* Provides seamless auto-recall and auto-capture via lifecycle hooks.
|
|
*/
|
|
|
|
import { Type } from "@sinclair/typebox";
|
|
import * as lancedb from "@lancedb/lancedb";
|
|
import OpenAI from "openai";
|
|
import { randomUUID } from "node:crypto";
|
|
import { homedir } from "node:os";
|
|
import { join } from "node:path";
|
|
import type { ClawdbotPluginApi } from "clawdbot/plugin-sdk";
|
|
|
|
// ============================================================================
|
|
// Types
|
|
// ============================================================================
|
|
|
|
type MemoryConfig = {
|
|
embedding: {
|
|
provider: "openai";
|
|
model?: string;
|
|
apiKey: string;
|
|
};
|
|
dbPath?: string;
|
|
autoCapture?: boolean;
|
|
autoRecall?: boolean;
|
|
};
|
|
|
|
type MemoryEntry = {
|
|
id: string;
|
|
text: string;
|
|
vector: number[];
|
|
importance: number;
|
|
category: "preference" | "fact" | "decision" | "entity" | "other";
|
|
createdAt: number;
|
|
};
|
|
|
|
type MemorySearchResult = {
|
|
entry: MemoryEntry;
|
|
score: number;
|
|
};
|
|
|
|
// ============================================================================
|
|
// Config Schema
|
|
// ============================================================================
|
|
|
|
const memoryConfigSchema = {
|
|
parse(value: unknown): MemoryConfig {
|
|
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
throw new Error("memory config required");
|
|
}
|
|
const cfg = value as Record<string, unknown>;
|
|
|
|
// Embedding config is required
|
|
const embedding = cfg.embedding as Record<string, unknown> | undefined;
|
|
if (!embedding || typeof embedding.apiKey !== "string") {
|
|
throw new Error("embedding.apiKey is required");
|
|
}
|
|
|
|
return {
|
|
embedding: {
|
|
provider: "openai",
|
|
model:
|
|
typeof embedding.model === "string"
|
|
? embedding.model
|
|
: "text-embedding-3-small",
|
|
apiKey: resolveEnvVars(embedding.apiKey),
|
|
},
|
|
dbPath:
|
|
typeof cfg.dbPath === "string"
|
|
? cfg.dbPath
|
|
: join(homedir(), ".clawdbot", "memory", "lancedb"),
|
|
autoCapture: cfg.autoCapture !== false,
|
|
autoRecall: cfg.autoRecall !== false,
|
|
};
|
|
},
|
|
uiHints: {
|
|
"embedding.apiKey": {
|
|
label: "OpenAI API Key",
|
|
sensitive: true,
|
|
placeholder: "sk-proj-...",
|
|
help: "API key for OpenAI embeddings (or use ${OPENAI_API_KEY})",
|
|
},
|
|
"embedding.model": {
|
|
label: "Embedding Model",
|
|
placeholder: "text-embedding-3-small",
|
|
help: "OpenAI embedding model to use",
|
|
},
|
|
dbPath: {
|
|
label: "Database Path",
|
|
placeholder: "~/.clawdbot/memory/lancedb",
|
|
advanced: true,
|
|
},
|
|
autoCapture: {
|
|
label: "Auto-Capture",
|
|
help: "Automatically capture important information from conversations",
|
|
},
|
|
autoRecall: {
|
|
label: "Auto-Recall",
|
|
help: "Automatically inject relevant memories into context",
|
|
},
|
|
},
|
|
};
|
|
|
|
function resolveEnvVars(value: string): string {
|
|
return value.replace(/\$\{([^}]+)\}/g, (_, envVar) => {
|
|
const envValue = process.env[envVar];
|
|
if (!envValue) {
|
|
throw new Error(`Environment variable ${envVar} is not set`);
|
|
}
|
|
return envValue;
|
|
});
|
|
}
|
|
|
|
// ============================================================================
|
|
// LanceDB Provider
|
|
// ============================================================================
|
|
|
|
const TABLE_NAME = "memories";
|
|
const VECTOR_DIM = 1536; // OpenAI text-embedding-3-small
|
|
|
|
class MemoryDB {
|
|
private db: lancedb.Connection | null = null;
|
|
private table: lancedb.Table | null = null;
|
|
private initPromise: Promise<void> | null = null;
|
|
|
|
constructor(private readonly dbPath: string) {}
|
|
|
|
private async ensureInitialized(): Promise<void> {
|
|
if (this.table) return;
|
|
if (this.initPromise) return this.initPromise;
|
|
|
|
this.initPromise = this.doInitialize();
|
|
return this.initPromise;
|
|
}
|
|
|
|
private async doInitialize(): Promise<void> {
|
|
this.db = await lancedb.connect(this.dbPath);
|
|
const tables = await this.db.tableNames();
|
|
|
|
if (tables.includes(TABLE_NAME)) {
|
|
this.table = await this.db.openTable(TABLE_NAME);
|
|
} else {
|
|
this.table = await this.db.createTable(TABLE_NAME, [
|
|
{
|
|
id: "__schema__",
|
|
text: "",
|
|
vector: new Array(VECTOR_DIM).fill(0),
|
|
importance: 0,
|
|
category: "other",
|
|
createdAt: 0,
|
|
},
|
|
]);
|
|
await this.table.delete('id = "__schema__"');
|
|
}
|
|
}
|
|
|
|
async store(
|
|
entry: Omit<MemoryEntry, "id" | "createdAt">,
|
|
): Promise<MemoryEntry> {
|
|
await this.ensureInitialized();
|
|
|
|
const fullEntry: MemoryEntry = {
|
|
...entry,
|
|
id: randomUUID(),
|
|
createdAt: Date.now(),
|
|
};
|
|
|
|
await this.table!.add([fullEntry]);
|
|
return fullEntry;
|
|
}
|
|
|
|
async search(
|
|
vector: number[],
|
|
limit = 5,
|
|
minScore = 0.5,
|
|
): Promise<MemorySearchResult[]> {
|
|
await this.ensureInitialized();
|
|
|
|
const results = await this.table!.vectorSearch(vector).limit(limit).toArray();
|
|
|
|
// LanceDB uses L2 distance by default; convert to similarity score
|
|
const mapped = results.map((row) => {
|
|
const distance = row._distance ?? 0;
|
|
// Use inverse for a 0-1 range: sim = 1 / (1 + d)
|
|
const score = 1 / (1 + distance);
|
|
return {
|
|
entry: {
|
|
id: row.id as string,
|
|
text: row.text as string,
|
|
vector: row.vector as number[],
|
|
importance: row.importance as number,
|
|
category: row.category as MemoryEntry["category"],
|
|
createdAt: row.createdAt as number,
|
|
},
|
|
score,
|
|
};
|
|
});
|
|
|
|
return mapped.filter((r) => r.score >= minScore);
|
|
}
|
|
|
|
async delete(id: string): Promise<boolean> {
|
|
await this.ensureInitialized();
|
|
// Validate UUID format to prevent injection
|
|
const uuidRegex =
|
|
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
if (!uuidRegex.test(id)) {
|
|
throw new Error(`Invalid memory ID format: ${id}`);
|
|
}
|
|
await this.table!.delete(`id = '${id}'`);
|
|
return true;
|
|
}
|
|
|
|
async count(): Promise<number> {
|
|
await this.ensureInitialized();
|
|
return this.table!.countRows();
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// OpenAI Embeddings
|
|
// ============================================================================
|
|
|
|
class Embeddings {
|
|
private client: OpenAI;
|
|
|
|
constructor(
|
|
apiKey: string,
|
|
private model: string,
|
|
) {
|
|
this.client = new OpenAI({ apiKey });
|
|
}
|
|
|
|
async embed(text: string): Promise<number[]> {
|
|
const response = await this.client.embeddings.create({
|
|
model: this.model,
|
|
input: text,
|
|
});
|
|
return response.data[0].embedding;
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Rule-based capture filter
|
|
// ============================================================================
|
|
|
|
const MEMORY_TRIGGERS = [
|
|
/zapamatuj si|pamatuj|remember/i,
|
|
/preferuji|radši|nechci|prefer/i,
|
|
/rozhodli jsme|budeme používat/i,
|
|
/\+\d{10,}/,
|
|
/[\w.-]+@[\w.-]+\.\w+/,
|
|
/můj\s+\w+\s+je|je\s+můj/i,
|
|
/my\s+\w+\s+is|is\s+my/i,
|
|
/i (like|prefer|hate|love|want|need)/i,
|
|
/always|never|important/i,
|
|
];
|
|
|
|
function shouldCapture(text: string): boolean {
|
|
if (text.length < 10 || text.length > 500) return false;
|
|
// Skip injected context from memory recall
|
|
if (text.includes("<relevant-memories>")) return false;
|
|
// Skip system-generated content
|
|
if (text.startsWith("<") && text.includes("</")) return false;
|
|
// Skip agent summary responses (contain markdown formatting)
|
|
if (text.includes("**") && text.includes("\n-")) return false;
|
|
// Skip emoji-heavy responses (likely agent output)
|
|
const emojiCount = (text.match(/[\u{1F300}-\u{1F9FF}]/gu) || []).length;
|
|
if (emojiCount > 3) return false;
|
|
return MEMORY_TRIGGERS.some((r) => r.test(text));
|
|
}
|
|
|
|
function detectCategory(
|
|
text: string,
|
|
): "preference" | "fact" | "decision" | "entity" | "other" {
|
|
const lower = text.toLowerCase();
|
|
if (/prefer|radši|like|love|hate|want/i.test(lower)) return "preference";
|
|
if (/rozhodli|decided|will use|budeme/i.test(lower)) return "decision";
|
|
if (/\+\d{10,}|@[\w.-]+\.\w+|is called|jmenuje se/i.test(lower))
|
|
return "entity";
|
|
if (/is|are|has|have|je|má|jsou/i.test(lower)) return "fact";
|
|
return "other";
|
|
}
|
|
|
|
// ============================================================================
|
|
// Plugin Definition
|
|
// ============================================================================
|
|
|
|
const memoryPlugin = {
|
|
id: "memory",
|
|
name: "Memory (Vector)",
|
|
description: "Long-term memory with vector search and seamless auto-recall/capture",
|
|
kind: "memory" as const,
|
|
configSchema: memoryConfigSchema,
|
|
|
|
register(api: ClawdbotPluginApi) {
|
|
const cfg = memoryConfigSchema.parse(api.pluginConfig);
|
|
const db = new MemoryDB(cfg.dbPath!);
|
|
const embeddings = new Embeddings(cfg.embedding.apiKey, cfg.embedding.model!);
|
|
|
|
api.logger.info(`memory: plugin registered (db: ${cfg.dbPath}, lazy init)`);
|
|
|
|
// ========================================================================
|
|
// Tools
|
|
// ========================================================================
|
|
|
|
api.registerTool(
|
|
{
|
|
name: "memory_recall",
|
|
label: "Memory Recall",
|
|
description:
|
|
"Search through long-term memories. Use when you need context about user preferences, past decisions, or previously discussed topics.",
|
|
parameters: Type.Object({
|
|
query: Type.String({ description: "Search query" }),
|
|
limit: Type.Optional(Type.Number({ description: "Max results (default: 5)" })),
|
|
}),
|
|
async execute(_toolCallId, params) {
|
|
const { query, limit = 5 } = params as { query: string; limit?: number };
|
|
|
|
const vector = await embeddings.embed(query);
|
|
const results = await db.search(vector, limit, 0.1);
|
|
|
|
if (results.length === 0) {
|
|
return {
|
|
content: [{ type: "text", text: "No relevant memories found." }],
|
|
details: { count: 0 },
|
|
};
|
|
}
|
|
|
|
const text = results
|
|
.map(
|
|
(r, i) =>
|
|
`${i + 1}. [${r.entry.category}] ${r.entry.text} (${(r.score * 100).toFixed(0)}%)`,
|
|
)
|
|
.join("\n");
|
|
|
|
// Strip vector data for serialization (typed arrays can't be cloned)
|
|
const sanitizedResults = results.map((r) => ({
|
|
id: r.entry.id,
|
|
text: r.entry.text,
|
|
category: r.entry.category,
|
|
importance: r.entry.importance,
|
|
score: r.score,
|
|
}));
|
|
|
|
return {
|
|
content: [
|
|
{ type: "text", text: `Found ${results.length} memories:\n\n${text}` },
|
|
],
|
|
details: { count: results.length, memories: sanitizedResults },
|
|
};
|
|
},
|
|
},
|
|
{ name: "memory_recall" },
|
|
);
|
|
|
|
api.registerTool(
|
|
{
|
|
name: "memory_store",
|
|
label: "Memory Store",
|
|
description:
|
|
"Save important information in long-term memory. Use for preferences, facts, decisions.",
|
|
parameters: Type.Object({
|
|
text: Type.String({ description: "Information to remember" }),
|
|
importance: Type.Optional(
|
|
Type.Number({ description: "Importance 0-1 (default: 0.7)" }),
|
|
),
|
|
category: Type.Optional(
|
|
Type.Union([
|
|
Type.Literal("preference"),
|
|
Type.Literal("fact"),
|
|
Type.Literal("decision"),
|
|
Type.Literal("entity"),
|
|
Type.Literal("other"),
|
|
]),
|
|
),
|
|
}),
|
|
async execute(_toolCallId, params) {
|
|
const {
|
|
text,
|
|
importance = 0.7,
|
|
category = "other",
|
|
} = params as {
|
|
text: string;
|
|
importance?: number;
|
|
category?: MemoryEntry["category"];
|
|
};
|
|
|
|
const vector = await embeddings.embed(text);
|
|
|
|
// Check for duplicates
|
|
const existing = await db.search(vector, 1, 0.95);
|
|
if (existing.length > 0) {
|
|
return {
|
|
content: [
|
|
{ type: "text", text: `Similar memory already exists: "${existing[0].entry.text}"` },
|
|
],
|
|
details: { action: "duplicate", existingId: existing[0].entry.id, existingText: existing[0].entry.text },
|
|
};
|
|
}
|
|
|
|
const entry = await db.store({
|
|
text,
|
|
vector,
|
|
importance,
|
|
category,
|
|
});
|
|
|
|
return {
|
|
content: [{ type: "text", text: `Stored: "${text.slice(0, 100)}..."` }],
|
|
details: { action: "created", id: entry.id },
|
|
};
|
|
},
|
|
},
|
|
{ name: "memory_store" },
|
|
);
|
|
|
|
api.registerTool(
|
|
{
|
|
name: "memory_forget",
|
|
label: "Memory Forget",
|
|
description: "Delete specific memories. GDPR-compliant.",
|
|
parameters: Type.Object({
|
|
query: Type.Optional(Type.String({ description: "Search to find memory" })),
|
|
memoryId: Type.Optional(Type.String({ description: "Specific memory ID" })),
|
|
}),
|
|
async execute(_toolCallId, params) {
|
|
const { query, memoryId } = params as { query?: string; memoryId?: string };
|
|
|
|
if (memoryId) {
|
|
await db.delete(memoryId);
|
|
return {
|
|
content: [{ type: "text", text: `Memory ${memoryId} forgotten.` }],
|
|
details: { action: "deleted", id: memoryId },
|
|
};
|
|
}
|
|
|
|
if (query) {
|
|
const vector = await embeddings.embed(query);
|
|
const results = await db.search(vector, 5, 0.7);
|
|
|
|
if (results.length === 0) {
|
|
return {
|
|
content: [{ type: "text", text: "No matching memories found." }],
|
|
details: { found: 0 },
|
|
};
|
|
}
|
|
|
|
if (results.length === 1 && results[0].score > 0.9) {
|
|
await db.delete(results[0].entry.id);
|
|
return {
|
|
content: [
|
|
{ type: "text", text: `Forgotten: "${results[0].entry.text}"` },
|
|
],
|
|
details: { action: "deleted", id: results[0].entry.id },
|
|
};
|
|
}
|
|
|
|
const list = results
|
|
.map((r) => `- [${r.entry.id.slice(0, 8)}] ${r.entry.text.slice(0, 60)}...`)
|
|
.join("\n");
|
|
|
|
// Strip vector data for serialization
|
|
const sanitizedCandidates = results.map((r) => ({
|
|
id: r.entry.id,
|
|
text: r.entry.text,
|
|
category: r.entry.category,
|
|
score: r.score,
|
|
}));
|
|
|
|
return {
|
|
content: [
|
|
{
|
|
type: "text",
|
|
text: `Found ${results.length} candidates. Specify memoryId:\n${list}`,
|
|
},
|
|
],
|
|
details: { action: "candidates", candidates: sanitizedCandidates },
|
|
};
|
|
}
|
|
|
|
return {
|
|
content: [{ type: "text", text: "Provide query or memoryId." }],
|
|
details: { error: "missing_param" },
|
|
};
|
|
},
|
|
},
|
|
{ name: "memory_forget" },
|
|
);
|
|
|
|
// ========================================================================
|
|
// CLI Commands
|
|
// ========================================================================
|
|
|
|
api.registerCli(
|
|
({ program }) => {
|
|
const memory = program
|
|
.command("ltm")
|
|
.description("Long-term memory plugin commands");
|
|
|
|
memory
|
|
.command("list")
|
|
.description("List memories")
|
|
.action(async () => {
|
|
const count = await db.count();
|
|
console.log(`Total memories: ${count}`);
|
|
});
|
|
|
|
memory
|
|
.command("search")
|
|
.description("Search memories")
|
|
.argument("<query>", "Search query")
|
|
.option("--limit <n>", "Max results", "5")
|
|
.action(async (query, opts) => {
|
|
const vector = await embeddings.embed(query);
|
|
const results = await db.search(vector, parseInt(opts.limit), 0.3);
|
|
// Strip vectors for output
|
|
const output = results.map((r) => ({
|
|
id: r.entry.id,
|
|
text: r.entry.text,
|
|
category: r.entry.category,
|
|
importance: r.entry.importance,
|
|
score: r.score,
|
|
}));
|
|
console.log(JSON.stringify(output, null, 2));
|
|
});
|
|
|
|
memory
|
|
.command("stats")
|
|
.description("Show memory statistics")
|
|
.action(async () => {
|
|
const count = await db.count();
|
|
console.log(`Total memories: ${count}`);
|
|
});
|
|
},
|
|
{ commands: ["ltm"] },
|
|
);
|
|
|
|
// ========================================================================
|
|
// Lifecycle Hooks
|
|
// ========================================================================
|
|
|
|
// Auto-recall: inject relevant memories before agent starts
|
|
if (cfg.autoRecall) {
|
|
api.on("before_agent_start", async (event) => {
|
|
if (!event.prompt || event.prompt.length < 5) return;
|
|
|
|
try {
|
|
const vector = await embeddings.embed(event.prompt);
|
|
const results = await db.search(vector, 3, 0.3);
|
|
|
|
if (results.length === 0) return;
|
|
|
|
const memoryContext = results
|
|
.map((r) => `- [${r.entry.category}] ${r.entry.text}`)
|
|
.join("\n");
|
|
|
|
api.logger.info?.(
|
|
`memory: injecting ${results.length} memories into context`,
|
|
);
|
|
|
|
return {
|
|
prependContext: `<relevant-memories>\nThe following memories may be relevant to this conversation:\n${memoryContext}\n</relevant-memories>`,
|
|
};
|
|
} catch (err) {
|
|
api.logger.warn(`memory: recall failed: ${String(err)}`);
|
|
}
|
|
});
|
|
}
|
|
|
|
// Auto-capture: analyze and store important information after agent ends
|
|
if (cfg.autoCapture) {
|
|
api.on("agent_end", async (event) => {
|
|
if (!event.success || !event.messages || event.messages.length === 0) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// Extract text content from messages (handling unknown[] type)
|
|
const texts: string[] = [];
|
|
for (const msg of event.messages) {
|
|
// Type guard for message object
|
|
if (!msg || typeof msg !== "object") continue;
|
|
const msgObj = msg as Record<string, unknown>;
|
|
|
|
// Only process user and assistant messages
|
|
const role = msgObj.role;
|
|
if (role !== "user" && role !== "assistant") continue;
|
|
|
|
const content = msgObj.content;
|
|
|
|
// Handle string content directly
|
|
if (typeof content === "string") {
|
|
texts.push(content);
|
|
continue;
|
|
}
|
|
|
|
// Handle array content (content blocks)
|
|
if (Array.isArray(content)) {
|
|
for (const block of content) {
|
|
if (
|
|
block &&
|
|
typeof block === "object" &&
|
|
"type" in block &&
|
|
(block as Record<string, unknown>).type === "text" &&
|
|
"text" in block &&
|
|
typeof (block as Record<string, unknown>).text === "string"
|
|
) {
|
|
texts.push((block as Record<string, unknown>).text as string);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Filter for capturable content
|
|
const toCapture = texts.filter(
|
|
(text) => text && shouldCapture(text),
|
|
);
|
|
if (toCapture.length === 0) return;
|
|
|
|
// Store each capturable piece (limit to 3 per conversation)
|
|
let stored = 0;
|
|
for (const text of toCapture.slice(0, 3)) {
|
|
const category = detectCategory(text);
|
|
const vector = await embeddings.embed(text);
|
|
|
|
// Check for duplicates (high similarity threshold)
|
|
const existing = await db.search(vector, 1, 0.95);
|
|
if (existing.length > 0) continue;
|
|
|
|
await db.store({
|
|
text,
|
|
vector,
|
|
importance: 0.7,
|
|
category,
|
|
});
|
|
stored++;
|
|
}
|
|
|
|
if (stored > 0) {
|
|
api.logger.info(`memory: auto-captured ${stored} memories`);
|
|
}
|
|
} catch (err) {
|
|
api.logger.warn(`memory: capture failed: ${String(err)}`);
|
|
}
|
|
});
|
|
}
|
|
|
|
// ========================================================================
|
|
// Service
|
|
// ========================================================================
|
|
|
|
api.registerService({
|
|
id: "memory",
|
|
start: () => {
|
|
api.logger.info(
|
|
`memory: initialized (db: ${cfg.dbPath}, model: ${cfg.embedding.model})`,
|
|
);
|
|
},
|
|
stop: () => {
|
|
api.logger.info("memory: stopped");
|
|
},
|
|
});
|
|
},
|
|
};
|
|
|
|
export default memoryPlugin;
|