diff --git a/extensions/llm-task/README.md b/extensions/llm-task/README.md new file mode 100644 index 000000000..4bce6c759 --- /dev/null +++ b/extensions/llm-task/README.md @@ -0,0 +1,86 @@ +# LLM Task (plugin) + +Adds an **optional** agent tool `llm-task` for running **JSON-only** LLM tasks (drafting, summarizing, classifying) with optional JSON Schema validation. + +This is designed to be called from workflow engines (e.g. Lobster via `clawd.invoke --each`) without adding new Clawdbot code per workflow. + +## Enable + +1) Enable the plugin: + +```json +{ + "plugins": { + "entries": { + "llm-task": { "enabled": true } + } + } +} +``` + +2) Allowlist the tool (it is registered with `optional: true`): + +```json +{ + "agents": { + "list": [ + { + "id": "main", + "tools": { "allow": ["llm-task"] } + } + ] + } +} +``` + +## Config (optional) + +```json +{ + "plugins": { + "entries": { + "llm-task": { + "enabled": true, + "config": { + "defaultProvider": "openai-codex", + "defaultModel": "gpt-5.2", + "allowedModels": ["openai-codex/gpt-5.2"], + "maxTokens": 800, + "timeoutMs": 30000 + } + } + } + } +} +``` + +`allowedModels` is an allowlist of `provider/model` strings. If set, any request outside the list is rejected. + +## Tool API + +### Parameters + +- `prompt` (string, required) +- `input` (any, optional) +- `schema` (object, optional JSON Schema) +- `provider` (string, optional) +- `model` (string, optional) +- `authProfileId` (string, optional) +- `temperature` (number, optional) +- `maxTokens` (number, optional) +- `timeoutMs` (number, optional) + +### Output + +Returns `details.json` containing the parsed JSON (and validates against `schema` when provided). + +## Notes + +- The tool is **JSON-only** and instructs the model to output only JSON (no code fences, no commentary). +- Side effects should be handled outside this tool (e.g. approvals in Lobster) before calling tools that send messages/emails. + +## Bundled extension note + +This extension depends on Clawdbot internal modules (the embedded agent runner). It is intended to ship as a **bundled** Clawdbot extension (like `lobster`) and be enabled via `plugins.entries` + tool allowlists. + +It is **not** currently designed to be copied into `~/.clawdbot/extensions` as a standalone plugin directory. diff --git a/extensions/llm-task/clawdbot.plugin.json b/extensions/llm-task/clawdbot.plugin.json new file mode 100644 index 000000000..08f8cc067 --- /dev/null +++ b/extensions/llm-task/clawdbot.plugin.json @@ -0,0 +1,21 @@ +{ + "id": "llm-task", + "name": "LLM Task", + "description": "Generic JSON-only LLM tool for structured tasks callable from workflows.", + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": { + "defaultProvider": { "type": "string" }, + "defaultModel": { "type": "string" }, + "defaultAuthProfileId": { "type": "string" }, + "allowedModels": { + "type": "array", + "items": { "type": "string" }, + "description": "Allowlist of provider/model keys like openai-codex/gpt-5.2." + }, + "maxTokens": { "type": "number" }, + "timeoutMs": { "type": "number" } + } + } +} diff --git a/extensions/llm-task/index.ts b/extensions/llm-task/index.ts new file mode 100644 index 000000000..025a20fa1 --- /dev/null +++ b/extensions/llm-task/index.ts @@ -0,0 +1,5 @@ +import { createLlmTaskTool } from "./src/llm-task-tool.js"; + +export default function (api: any) { + api.registerTool(createLlmTaskTool(api), { optional: true }); +} diff --git a/extensions/llm-task/package.json b/extensions/llm-task/package.json new file mode 100644 index 000000000..fbe66cd7b --- /dev/null +++ b/extensions/llm-task/package.json @@ -0,0 +1,7 @@ +{ + "name": "@clawdbot/llm-task", + "private": true, + "type": "module", + "main": "index.ts", + "version": "0.0.0" +} diff --git a/extensions/llm-task/src/llm-task-tool.test.ts b/extensions/llm-task/src/llm-task-tool.test.ts new file mode 100644 index 000000000..881feb243 --- /dev/null +++ b/extensions/llm-task/src/llm-task-tool.test.ts @@ -0,0 +1,96 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; + +vi.mock("../../../src/agents/pi-embedded-runner.js", () => { + return { + runEmbeddedPiAgent: vi.fn(async () => ({ + meta: { startedAt: Date.now() }, + payloads: [{ text: "{}" }], + })), + }; +}); + +import { runEmbeddedPiAgent } from "../../../src/agents/pi-embedded-runner.js"; +import { createLlmTaskTool } from "./llm-task-tool.js"; + +function fakeApi(overrides: any = {}) { + return { + id: "llm-task", + name: "llm-task", + source: "test", + config: { agents: { defaults: { workspace: "/tmp", model: { primary: "openai-codex/gpt-5.2" } } } }, + pluginConfig: {}, + runtime: { version: "test" }, + logger: { debug() {}, info() {}, warn() {}, error() {} }, + registerTool() {}, + ...overrides, + }; +} + +describe("llm-task tool (json-only)", () => { + beforeEach(() => vi.clearAllMocks()); + + it("returns parsed json", async () => { + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ + meta: {}, + payloads: [{ text: JSON.stringify({ foo: "bar" }) }], + }); + const tool = createLlmTaskTool(fakeApi() as any); + const res = await tool.execute("id", { prompt: "return foo" }); + expect((res as any).details.json).toEqual({ foo: "bar" }); + }); + + it("validates schema", async () => { + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ + meta: {}, + payloads: [{ text: JSON.stringify({ foo: "bar" }) }], + }); + const tool = createLlmTaskTool(fakeApi() as any); + const schema = { + type: "object", + properties: { foo: { type: "string" } }, + required: ["foo"], + additionalProperties: false, + }; + const res = await tool.execute("id", { prompt: "return foo", schema }); + expect((res as any).details.json).toEqual({ foo: "bar" }); + }); + + it("throws on invalid json", async () => { + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ meta: {}, payloads: [{ text: "not-json" }] }); + const tool = createLlmTaskTool(fakeApi() as any); + await expect(tool.execute("id", { prompt: "x" })).rejects.toThrow(/invalid json/i); + }); + + it("throws on schema mismatch", async () => { + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ + meta: {}, + payloads: [{ text: JSON.stringify({ foo: 1 }) }], + }); + const tool = createLlmTaskTool(fakeApi() as any); + const schema = { type: "object", properties: { foo: { type: "string" } }, required: ["foo"] }; + await expect(tool.execute("id", { prompt: "x", schema })).rejects.toThrow(/match schema/i); + }); + + it("passes provider/model overrides to embedded runner", async () => { + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ + meta: {}, + payloads: [{ text: JSON.stringify({ ok: true }) }], + }); + const tool = createLlmTaskTool(fakeApi() as any); + await tool.execute("id", { prompt: "x", provider: "anthropic", model: "claude-4-sonnet" }); + const call = (runEmbeddedPiAgent as any).mock.calls[0]?.[0]; + expect(call.provider).toBe("anthropic"); + expect(call.model).toBe("claude-4-sonnet"); + }); + + it("enforces allowedModels", async () => { + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ + meta: {}, + payloads: [{ text: JSON.stringify({ ok: true }) }], + }); + const tool = createLlmTaskTool(fakeApi({ pluginConfig: { allowedModels: ["openai-codex/gpt-5.2"] } }) as any); + await expect(tool.execute("id", { prompt: "x", provider: "anthropic", model: "claude-4-sonnet" })).rejects.toThrow( + /not allowed/i, + ); + }); +}); diff --git a/extensions/llm-task/src/llm-task-tool.ts b/extensions/llm-task/src/llm-task-tool.ts new file mode 100644 index 000000000..a8ae0e3e7 --- /dev/null +++ b/extensions/llm-task/src/llm-task-tool.ts @@ -0,0 +1,200 @@ +import os from "node:os"; +import path from "node:path"; +import fs from "node:fs/promises"; + +import Ajv from "ajv"; +import { Type } from "@sinclair/typebox"; + +// NOTE: This extension is intended to be bundled with Clawdbot. +// When running from source (tests/dev), Clawdbot internals live under src/. +// When running from a built install, internals live under dist/ (no src/ tree). +// So we resolve internal imports dynamically with src-first, dist-fallback. + +import type { ClawdbotPluginApi } from "../../../src/plugins/types.js"; + +type RunEmbeddedPiAgentFn = (params: any) => Promise; + +async function loadRunEmbeddedPiAgent(): Promise { + // Source checkout (tests/dev) + try { + const mod = await import("../../../src/agents/pi-embedded-runner.js"); + if (typeof (mod as any).runEmbeddedPiAgent === "function") return (mod as any).runEmbeddedPiAgent; + } catch { + // ignore + } + + // Bundled install (built) + const mod = await import("../../../agents/pi-embedded-runner.js"); + if (typeof (mod as any).runEmbeddedPiAgent !== "function") { + throw new Error("Internal error: runEmbeddedPiAgent not available"); + } + return (mod as any).runEmbeddedPiAgent; +} + +function stripCodeFences(s: string): string { + const trimmed = s.trim(); + const m = trimmed.match(/^```(?:json)?s*([sS]*?)s*```$/i); + if (m) return (m[1] ?? "").trim(); + return trimmed; +} + +function collectText(payloads: Array<{ text?: string; isError?: boolean }> | undefined): string { + const texts = (payloads ?? []) + .filter((p) => !p.isError && typeof p.text === "string") + .map((p) => p.text ?? ""); + return texts.join("n").trim(); +} + +function toModelKey(provider?: string, model?: string): string | undefined { + const p = provider?.trim(); + const m = model?.trim(); + if (!p || !m) return undefined; + return `${p}/${m}`; +} + +type PluginCfg = { + defaultProvider?: string; + defaultModel?: string; + defaultAuthProfileId?: string; + allowedModels?: string[]; + maxTokens?: number; + timeoutMs?: number; +}; + +export function createLlmTaskTool(api: ClawdbotPluginApi) { + return { + name: "llm-task", + description: + "Run a generic JSON-only LLM task and return schema-validated JSON. Designed for orchestration from Lobster workflows via clawd.invoke.", + parameters: Type.Object({ + prompt: Type.String({ description: "Task instruction for the LLM." }), + input: Type.Optional(Type.Unknown({ description: "Optional input payload for the task." })), + schema: Type.Optional(Type.Unknown({ description: "Optional JSON Schema to validate the returned JSON." })), + provider: Type.Optional(Type.String({ description: "Provider override (e.g. openai-codex, anthropic)." })), + model: Type.Optional(Type.String({ description: "Model id override." })), + authProfileId: Type.Optional(Type.String({ description: "Auth profile override." })), + temperature: Type.Optional(Type.Number({ description: "Best-effort temperature override." })), + maxTokens: Type.Optional(Type.Number({ description: "Best-effort maxTokens override." })), + timeoutMs: Type.Optional(Type.Number({ description: "Timeout for the LLM run." })), + }), + + async execute(_id: string, params: Record) { + const prompt = String(params.prompt ?? ""); + if (!prompt.trim()) throw new Error("prompt required"); + + const pluginCfg = (api.pluginConfig ?? {}) as PluginCfg; + + const primary = api.config?.agents?.defaults?.model?.primary; + const primaryProvider = typeof primary === "string" ? primary.split("/")[0] : undefined; + const primaryModel = typeof primary === "string" ? primary.split("/").slice(1).join("/") : undefined; + + const provider = + (typeof params.provider === "string" && params.provider.trim()) || + (typeof pluginCfg.defaultProvider === "string" && pluginCfg.defaultProvider.trim()) || + primaryProvider || + undefined; + + const model = + (typeof params.model === "string" && params.model.trim()) || + (typeof pluginCfg.defaultModel === "string" && pluginCfg.defaultModel.trim()) || + primaryModel || + undefined; + + const authProfileId = + (typeof (params as any).authProfileId === "string" && (params as any).authProfileId.trim()) || + (typeof pluginCfg.defaultAuthProfileId === "string" && pluginCfg.defaultAuthProfileId.trim()) || + undefined; + + const modelKey = toModelKey(provider, model); + if (!provider || !model || !modelKey) { + throw new Error( + `provider/model could not be resolved (provider=${String(provider ?? "")}, model=${String(model ?? "")})`, + ); + } + + const allowed = Array.isArray(pluginCfg.allowedModels) ? pluginCfg.allowedModels : undefined; + if (allowed && allowed.length > 0 && !allowed.includes(modelKey)) { + throw new Error( + `Model not allowed by llm-task plugin config: ${modelKey}. Allowed models: ${allowed.join(", ")}`, + ); + } + + const timeoutMs = + (typeof params.timeoutMs === "number" && params.timeoutMs > 0 ? params.timeoutMs : undefined) || + (typeof pluginCfg.timeoutMs === "number" && pluginCfg.timeoutMs > 0 ? pluginCfg.timeoutMs : undefined) || + 30_000; + + const streamParams = { + temperature: typeof params.temperature === "number" ? params.temperature : undefined, + maxTokens: + typeof params.maxTokens === "number" + ? params.maxTokens + : typeof pluginCfg.maxTokens === "number" + ? pluginCfg.maxTokens + : undefined, + }; + + const input = (params as any).input as unknown; + + const system = [ + "You are a JSON-only function.", + "Return ONLY a valid JSON value.", + "Do not wrap in markdown fences.", + "Do not include commentary.", + "Do not call tools.", + ].join(" "); + + const fullPrompt = `${system}nnTASK:n${prompt}nnINPUT_JSON:n${JSON.stringify(input ?? null, null, 2)}n`; + + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-llm-task-")); + const sessionId = `llm-task-${Date.now()}`; + const sessionFile = path.join(tmpDir, "session.json"); + + const runEmbeddedPiAgent = await loadRunEmbeddedPiAgent(); + + const result = await runEmbeddedPiAgent({ + sessionId, + sessionFile, + workspaceDir: api.config?.agents?.defaults?.workspace ?? process.cwd(), + config: api.config, + prompt: fullPrompt, + timeoutMs, + runId: `llm-task-${Date.now()}`, + provider, + model, + authProfileId, + authProfileIdSource: authProfileId ? "user" : "auto", + streamParams, + }); + + const text = collectText((result as any).payloads); + if (!text) throw new Error("LLM returned empty output"); + + const raw = stripCodeFences(text); + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch { + throw new Error("LLM returned invalid JSON"); + } + + const schema = (params as any).schema as unknown; + if (schema && typeof schema === "object") { + const ajv = new Ajv({ allErrors: true, strict: false }); + const validate = ajv.compile(schema as any); + const ok = validate(parsed); + if (!ok) { + const msg = + validate.errors?.map((e) => `${e.instancePath || ""} ${e.message || "invalid"}`).join("; ") ?? + "invalid"; + throw new Error(`LLM JSON did not match schema: ${msg}`); + } + } + + return { + content: [{ type: "text", text: JSON.stringify(parsed, null, 2) }], + details: { json: parsed, provider, model }, + }; + }, + }; +}