feat: add optional llm-task JSON-only tool (#1498)

* feat(llm-task): add optional JSON-only LLM task tool * fix(llm-task): fix invalid package.json * fix(llm-task): fix invalid plugin manifest JSON * fix(llm-task): fix index.ts import quoting * fix(llm-task): load embedded runner from src or bundled dist
2026-01-23 17:18:47 -08:00
parent cb06e133ca
commit 95d45c0aa7
6 changed files with 415 additions and 0 deletions
--- a/extensions/llm-task/README.md
+++ b/extensions/llm-task/README.md
@@ -0,0 +1,86 @@
 # LLM Task (plugin)
 Adds an **optional** agent tool `llm-task` for running **JSON-only** LLM tasks (drafting, summarizing, classifying) with optional JSON Schema validation.
 This is designed to be called from workflow engines (e.g. Lobster via `clawd.invoke --each`) without adding new Clawdbot code per workflow.
 ## Enable
 1) Enable the plugin:
 ```json
 {
  "plugins": {
    "entries": {
      "llm-task": { "enabled": true }
    }
  }
 }
 ```
 2) Allowlist the tool (it is registered with `optional: true`):
 ```json
 {
  "agents": {
    "list": [
      {
        "id": "main",
        "tools": { "allow": ["llm-task"] }
      }
    ]
  }
 }
 ```
 ## Config (optional)
 ```json
 {
  "plugins": {
    "entries": {
      "llm-task": {
        "enabled": true,
        "config": {
          "defaultProvider": "openai-codex",
          "defaultModel": "gpt-5.2",
          "allowedModels": ["openai-codex/gpt-5.2"],
          "maxTokens": 800,
          "timeoutMs": 30000
        }
      }
    }
  }
 }
 ```
 `allowedModels` is an allowlist of `provider/model` strings. If set, any request outside the list is rejected.
 ## Tool API
 ### Parameters
 - `prompt` (string, required)
 - `input` (any, optional)
 - `schema` (object, optional JSON Schema)
 - `provider` (string, optional)
 - `model` (string, optional)
 - `authProfileId` (string, optional)
 - `temperature` (number, optional)
 - `maxTokens` (number, optional)
 - `timeoutMs` (number, optional)
 ### Output
 Returns `details.json` containing the parsed JSON (and validates against `schema` when provided).
 ## Notes
 - The tool is **JSON-only** and instructs the model to output only JSON (no code fences, no commentary).
 - Side effects should be handled outside this tool (e.g. approvals in Lobster) before calling tools that send messages/emails.
 ## Bundled extension note
 This extension depends on Clawdbot internal modules (the embedded agent runner). It is intended to ship as a **bundled** Clawdbot extension (like `lobster`) and be enabled via `plugins.entries` + tool allowlists.
 It is **not** currently designed to be copied into `~/.clawdbot/extensions` as a standalone plugin directory.
--- a/extensions/llm-task/clawdbot.plugin.json
+++ b/extensions/llm-task/clawdbot.plugin.json
@@ -0,0 +1,21 @@
 {
  "id": "llm-task",
  "name": "LLM Task",
  "description": "Generic JSON-only LLM tool for structured tasks callable from workflows.",
  "configSchema": {
    "type": "object",
    "additionalProperties": false,
    "properties": {
      "defaultProvider": { "type": "string" },
      "defaultModel": { "type": "string" },
      "defaultAuthProfileId": { "type": "string" },
      "allowedModels": {
        "type": "array",
        "items": { "type": "string" },
        "description": "Allowlist of provider/model keys like openai-codex/gpt-5.2."
      },
      "maxTokens": { "type": "number" },
      "timeoutMs": { "type": "number" }
    }
  }
 }
--- a/extensions/llm-task/index.ts
+++ b/extensions/llm-task/index.ts
@@ -0,0 +1,5 @@
 import { createLlmTaskTool } from "./src/llm-task-tool.js";
 export default function (api: any) {
  api.registerTool(createLlmTaskTool(api), { optional: true });
 }
--- a/extensions/llm-task/package.json
+++ b/extensions/llm-task/package.json
@@ -0,0 +1,7 @@
 {
  "name": "@clawdbot/llm-task",
  "private": true,
  "type": "module",
  "main": "index.ts",
  "version": "0.0.0"
 }
--- a/extensions/llm-task/src/llm-task-tool.test.ts
+++ b/extensions/llm-task/src/llm-task-tool.test.ts
@@ -0,0 +1,96 @@
 import { describe, it, expect, vi, beforeEach } from "vitest";
 vi.mock("../../../src/agents/pi-embedded-runner.js", () => {
  return {
    runEmbeddedPiAgent: vi.fn(async () => ({
      meta: { startedAt: Date.now() },
      payloads: [{ text: "{}" }],
    })),
  };
 });
 import { runEmbeddedPiAgent } from "../../../src/agents/pi-embedded-runner.js";
 import { createLlmTaskTool } from "./llm-task-tool.js";
 function fakeApi(overrides: any = {}) {
  return {
    id: "llm-task",
    name: "llm-task",
    source: "test",
    config: { agents: { defaults: { workspace: "/tmp", model: { primary: "openai-codex/gpt-5.2" } } } },
    pluginConfig: {},
    runtime: { version: "test" },
    logger: { debug() {}, info() {}, warn() {}, error() {} },
    registerTool() {},
    ...overrides,
  };
 }
 describe("llm-task tool (json-only)", () => {
  beforeEach(() => vi.clearAllMocks());
  it("returns parsed json", async () => {
    (runEmbeddedPiAgent as any).mockResolvedValueOnce({
      meta: {},
      payloads: [{ text: JSON.stringify({ foo: "bar" }) }],
    });
    const tool = createLlmTaskTool(fakeApi() as any);
    const res = await tool.execute("id", { prompt: "return foo" });
    expect((res as any).details.json).toEqual({ foo: "bar" });
  });
  it("validates schema", async () => {
    (runEmbeddedPiAgent as any).mockResolvedValueOnce({
      meta: {},
      payloads: [{ text: JSON.stringify({ foo: "bar" }) }],
    });
    const tool = createLlmTaskTool(fakeApi() as any);
    const schema = {
      type: "object",
      properties: { foo: { type: "string" } },
      required: ["foo"],
      additionalProperties: false,
    };
    const res = await tool.execute("id", { prompt: "return foo", schema });
    expect((res as any).details.json).toEqual({ foo: "bar" });
  });
  it("throws on invalid json", async () => {
    (runEmbeddedPiAgent as any).mockResolvedValueOnce({ meta: {}, payloads: [{ text: "not-json" }] });
    const tool = createLlmTaskTool(fakeApi() as any);
    await expect(tool.execute("id", { prompt: "x" })).rejects.toThrow(/invalid json/i);
  });
  it("throws on schema mismatch", async () => {
    (runEmbeddedPiAgent as any).mockResolvedValueOnce({
      meta: {},
      payloads: [{ text: JSON.stringify({ foo: 1 }) }],
    });
    const tool = createLlmTaskTool(fakeApi() as any);
    const schema = { type: "object", properties: { foo: { type: "string" } }, required: ["foo"] };
    await expect(tool.execute("id", { prompt: "x", schema })).rejects.toThrow(/match schema/i);
  });
  it("passes provider/model overrides to embedded runner", async () => {
    (runEmbeddedPiAgent as any).mockResolvedValueOnce({
      meta: {},
      payloads: [{ text: JSON.stringify({ ok: true }) }],
    });
    const tool = createLlmTaskTool(fakeApi() as any);
    await tool.execute("id", { prompt: "x", provider: "anthropic", model: "claude-4-sonnet" });
    const call = (runEmbeddedPiAgent as any).mock.calls[0]?.[0];
    expect(call.provider).toBe("anthropic");
    expect(call.model).toBe("claude-4-sonnet");
  });
  it("enforces allowedModels", async () => {
    (runEmbeddedPiAgent as any).mockResolvedValueOnce({
      meta: {},
      payloads: [{ text: JSON.stringify({ ok: true }) }],
    });
    const tool = createLlmTaskTool(fakeApi({ pluginConfig: { allowedModels: ["openai-codex/gpt-5.2"] } }) as any);
    await expect(tool.execute("id", { prompt: "x", provider: "anthropic", model: "claude-4-sonnet" })).rejects.toThrow(
      /not allowed/i,
    );
  });
 });
--- a/extensions/llm-task/src/llm-task-tool.ts
+++ b/extensions/llm-task/src/llm-task-tool.ts
@@ -0,0 +1,200 @@
 import os from "node:os";
 import path from "node:path";
 import fs from "node:fs/promises";
 import Ajv from "ajv";
 import { Type } from "@sinclair/typebox";
 // NOTE: This extension is intended to be bundled with Clawdbot.
 // When running from source (tests/dev), Clawdbot internals live under src/.
 // When running from a built install, internals live under dist/ (no src/ tree).
 // So we resolve internal imports dynamically with src-first, dist-fallback.
 import type { ClawdbotPluginApi } from "../../../src/plugins/types.js";
 type RunEmbeddedPiAgentFn = (params: any) => Promise<any>;
 async function loadRunEmbeddedPiAgent(): Promise<RunEmbeddedPiAgentFn> {
  // Source checkout (tests/dev)
  try {
    const mod = await import("../../../src/agents/pi-embedded-runner.js");
    if (typeof (mod as any).runEmbeddedPiAgent === "function") return (mod as any).runEmbeddedPiAgent;
  } catch {
    // ignore
  }
  // Bundled install (built)
  const mod = await import("../../../agents/pi-embedded-runner.js");
  if (typeof (mod as any).runEmbeddedPiAgent !== "function") {
    throw new Error("Internal error: runEmbeddedPiAgent not available");
  }
  return (mod as any).runEmbeddedPiAgent;
 }
 function stripCodeFences(s: string): string {
  const trimmed = s.trim();
  const m = trimmed.match(/^```(?:json)?s*([sS]*?)s*```$/i);
  if (m) return (m[1] ?? "").trim();
  return trimmed;
 }
 function collectText(payloads: Array<{ text?: string; isError?: boolean }> | undefined): string {
  const texts = (payloads ?? [])
    .filter((p) => !p.isError && typeof p.text === "string")
    .map((p) => p.text ?? "");
  return texts.join("n").trim();
 }
 function toModelKey(provider?: string, model?: string): string | undefined {
  const p = provider?.trim();
  const m = model?.trim();
  if (!p || !m) return undefined;
  return `${p}/${m}`;
 }
 type PluginCfg = {
  defaultProvider?: string;
  defaultModel?: string;
  defaultAuthProfileId?: string;
  allowedModels?: string[];
  maxTokens?: number;
  timeoutMs?: number;
 };
 export function createLlmTaskTool(api: ClawdbotPluginApi) {
  return {
    name: "llm-task",
    description:
      "Run a generic JSON-only LLM task and return schema-validated JSON. Designed for orchestration from Lobster workflows via clawd.invoke.",
    parameters: Type.Object({
      prompt: Type.String({ description: "Task instruction for the LLM." }),
      input: Type.Optional(Type.Unknown({ description: "Optional input payload for the task." })),
      schema: Type.Optional(Type.Unknown({ description: "Optional JSON Schema to validate the returned JSON." })),
      provider: Type.Optional(Type.String({ description: "Provider override (e.g. openai-codex, anthropic)." })),
      model: Type.Optional(Type.String({ description: "Model id override." })),
      authProfileId: Type.Optional(Type.String({ description: "Auth profile override." })),
      temperature: Type.Optional(Type.Number({ description: "Best-effort temperature override." })),
      maxTokens: Type.Optional(Type.Number({ description: "Best-effort maxTokens override." })),
      timeoutMs: Type.Optional(Type.Number({ description: "Timeout for the LLM run." })),
    }),
    async execute(_id: string, params: Record<string, unknown>) {
      const prompt = String(params.prompt ?? "");
      if (!prompt.trim()) throw new Error("prompt required");
      const pluginCfg = (api.pluginConfig ?? {}) as PluginCfg;
      const primary = api.config?.agents?.defaults?.model?.primary;
      const primaryProvider = typeof primary === "string" ? primary.split("/")[0] : undefined;
      const primaryModel = typeof primary === "string" ? primary.split("/").slice(1).join("/") : undefined;
      const provider =
        (typeof params.provider === "string" && params.provider.trim()) ||
        (typeof pluginCfg.defaultProvider === "string" && pluginCfg.defaultProvider.trim()) ||
        primaryProvider ||
        undefined;
      const model =
        (typeof params.model === "string" && params.model.trim()) ||
        (typeof pluginCfg.defaultModel === "string" && pluginCfg.defaultModel.trim()) ||
        primaryModel ||
        undefined;
      const authProfileId =
        (typeof (params as any).authProfileId === "string" && (params as any).authProfileId.trim()) ||
        (typeof pluginCfg.defaultAuthProfileId === "string" && pluginCfg.defaultAuthProfileId.trim()) ||
        undefined;
      const modelKey = toModelKey(provider, model);
      if (!provider || !model || !modelKey) {
        throw new Error(
          `provider/model could not be resolved (provider=${String(provider ?? "")}, model=${String(model ?? "")})`,
        );
      }
      const allowed = Array.isArray(pluginCfg.allowedModels) ? pluginCfg.allowedModels : undefined;
      if (allowed && allowed.length > 0 && !allowed.includes(modelKey)) {
        throw new Error(
          `Model not allowed by llm-task plugin config: ${modelKey}. Allowed models: ${allowed.join(", ")}`,
        );
      }
      const timeoutMs =
        (typeof params.timeoutMs === "number" && params.timeoutMs > 0 ? params.timeoutMs : undefined) ||
        (typeof pluginCfg.timeoutMs === "number" && pluginCfg.timeoutMs > 0 ? pluginCfg.timeoutMs : undefined) ||
        30_000;
      const streamParams = {
        temperature: typeof params.temperature === "number" ? params.temperature : undefined,
        maxTokens:
          typeof params.maxTokens === "number"
            ? params.maxTokens
            : typeof pluginCfg.maxTokens === "number"
              ? pluginCfg.maxTokens
              : undefined,
      };
      const input = (params as any).input as unknown;
      const system = [
        "You are a JSON-only function.",
        "Return ONLY a valid JSON value.",
        "Do not wrap in markdown fences.",
        "Do not include commentary.",
        "Do not call tools.",
      ].join(" ");
      const fullPrompt = `${system}nnTASK:n${prompt}nnINPUT_JSON:n${JSON.stringify(input ?? null, null, 2)}n`;
      const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-llm-task-"));
      const sessionId = `llm-task-${Date.now()}`;
      const sessionFile = path.join(tmpDir, "session.json");
      const runEmbeddedPiAgent = await loadRunEmbeddedPiAgent();
      const result = await runEmbeddedPiAgent({
        sessionId,
        sessionFile,
        workspaceDir: api.config?.agents?.defaults?.workspace ?? process.cwd(),
        config: api.config,
        prompt: fullPrompt,
        timeoutMs,
        runId: `llm-task-${Date.now()}`,
        provider,
        model,
        authProfileId,
        authProfileIdSource: authProfileId ? "user" : "auto",
        streamParams,
      });
      const text = collectText((result as any).payloads);
      if (!text) throw new Error("LLM returned empty output");
      const raw = stripCodeFences(text);
      let parsed: unknown;
      try {
        parsed = JSON.parse(raw);
      } catch {
        throw new Error("LLM returned invalid JSON");
      }
      const schema = (params as any).schema as unknown;
      if (schema && typeof schema === "object") {
        const ajv = new Ajv({ allErrors: true, strict: false });
        const validate = ajv.compile(schema as any);
        const ok = validate(parsed);
        if (!ok) {
          const msg =
            validate.errors?.map((e) => `${e.instancePath || "<root>"} ${e.message || "invalid"}`).join("; ") ??
            "invalid";
          throw new Error(`LLM JSON did not match schema: ${msg}`);
        }
      }
      return {
        content: [{ type: "text", text: JSON.stringify(parsed, null, 2) }],
        details: { json: parsed, provider, model },
      };
    },
  };
 }