From 309fcc5321d34f3f4ff6929ad64c8b11e8b00bbe Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 24 Jan 2026 01:44:36 +0000 Subject: [PATCH] fix: publish llm-task docs and harden tool --- CHANGELOG.md | 1 + docs/docs.json | 2 + docs/tools/index.md | 1 + docs/tools/llm-task.md | 114 +++++++++++++++++ docs/tools/lobster.md | 46 +++++++ extensions/llm-task/README.md | 27 ++-- extensions/llm-task/index.ts | 4 +- extensions/llm-task/package.json | 10 +- extensions/llm-task/src/llm-task-tool.test.ts | 21 ++++ extensions/llm-task/src/llm-task-tool.ts | 116 ++++++++++-------- src/agents/pi-embedded-runner/run.ts | 1 + src/agents/pi-embedded-runner/run/attempt.ts | 50 ++++---- src/agents/pi-embedded-runner/run/params.ts | 2 + src/agents/pi-embedded-runner/run/types.ts | 2 + 14 files changed, 312 insertions(+), 85 deletions(-) create mode 100644 docs/tools/llm-task.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 7eb7964f8..87df0eb66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Docs: https://docs.clawd.bot ## 2026.1.23 (Unreleased) ### Changes +- Plugins: add optional llm-task JSON-only tool for workflows. (#1498) Thanks @vignesh07. - CLI: restart the gateway by default after `clawdbot update`; add `--no-restart` to skip it. - CLI: add live auth probes to `clawdbot models status` for per-profile verification. - Agents: add Bedrock auto-discovery defaults + config overrides. (#1553) Thanks @fal3. diff --git a/docs/docs.json b/docs/docs.json index 2886e0fab..63f12ccfc 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -1000,6 +1000,8 @@ "group": "Tools & Skills", "pages": [ "tools", + "tools/lobster", + "tools/llm-task", "plugin", "plugins/voice-call", "plugins/zalouser", diff --git a/docs/tools/index.md b/docs/tools/index.md index 95372d109..42e216a6d 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -160,6 +160,7 @@ alongside tools (for example, the voice-call plugin). Optional plugin tools: - [Lobster](/tools/lobster): typed workflow runtime with resumable approvals (requires the Lobster CLI on the gateway host). +- [LLM Task](/tools/llm-task): JSON-only LLM step for structured workflow output (optional schema validation). ## Tool inventory diff --git a/docs/tools/llm-task.md b/docs/tools/llm-task.md new file mode 100644 index 000000000..622c0c1cb --- /dev/null +++ b/docs/tools/llm-task.md @@ -0,0 +1,114 @@ +--- +summary: "JSON-only LLM tasks for workflows (optional plugin tool)" +read_when: + - You want a JSON-only LLM step inside workflows + - You need schema-validated LLM output for automation +--- + +# LLM Task + +`llm-task` is an **optional plugin tool** that runs a JSON-only LLM task and +returns structured output (optionally validated against JSON Schema). + +This is ideal for workflow engines like Lobster: you can add a single LLM step +without writing custom Clawdbot code for each workflow. + +## Enable the plugin + +1) Enable the plugin: + +```json +{ + "plugins": { + "entries": { + "llm-task": { "enabled": true } + } + } +} +``` + +2) Allowlist the tool (it is registered with `optional: true`): + +```json +{ + "agents": { + "list": [ + { + "id": "main", + "tools": { "allow": ["llm-task"] } + } + ] + } +} +``` + +## Config (optional) + +```json +{ + "plugins": { + "entries": { + "llm-task": { + "enabled": true, + "config": { + "defaultProvider": "openai-codex", + "defaultModel": "gpt-5.2", + "defaultAuthProfileId": "main", + "allowedModels": ["openai-codex/gpt-5.2"], + "maxTokens": 800, + "timeoutMs": 30000 + } + } + } + } +} +``` + +`allowedModels` is an allowlist of `provider/model` strings. If set, any request +outside the list is rejected. + +## Tool parameters + +- `prompt` (string, required) +- `input` (any, optional) +- `schema` (object, optional JSON Schema) +- `provider` (string, optional) +- `model` (string, optional) +- `authProfileId` (string, optional) +- `temperature` (number, optional) +- `maxTokens` (number, optional) +- `timeoutMs` (number, optional) + +## Output + +Returns `details.json` containing the parsed JSON (and validates against +`schema` when provided). + +## Example: Lobster workflow step + +```lobster +clawd.invoke --tool llm-task --action json --args-json '{ + "prompt": "Given the input email, return intent and draft.", + "input": { + "subject": "Hello", + "body": "Can you help?" + }, + "schema": { + "type": "object", + "properties": { + "intent": { "type": "string" }, + "draft": { "type": "string" } + }, + "required": ["intent", "draft"], + "additionalProperties": false + } +}' +``` + +## Safety notes + +- The tool is **JSON-only** and instructs the model to output only JSON (no + code fences, no commentary). +- No tools are exposed to the model for this run. +- Treat output as untrusted unless you validate with `schema`. +- Put approvals before any side-effecting step (send, post, exec). diff --git a/docs/tools/lobster.md b/docs/tools/lobster.md index 0f4760399..2e803846f 100644 --- a/docs/tools/lobster.md +++ b/docs/tools/lobster.md @@ -65,6 +65,52 @@ gog.gmail.search --query 'newer_than:1d' \ | clawd.invoke --tool message --action send --each --item-key message --args-json '{"provider":"telegram","to":"..."}' ``` +## JSON-only LLM steps (llm-task) + +For workflows that need a **structured LLM step**, enable the optional +`llm-task` plugin tool and call it from Lobster. This keeps the workflow +deterministic while still letting you classify/summarize/draft with a model. + +Enable the tool: + +```json +{ + "plugins": { + "entries": { + "llm-task": { "enabled": true } + } + }, + "agents": { + "list": [ + { + "id": "main", + "tools": { "allow": ["llm-task"] } + } + ] + } +} +``` + +Use it in a pipeline: + +```lobster +clawd.invoke --tool llm-task --action json --args-json '{ + "prompt": "Given the input email, return intent and draft.", + "input": { "subject": "Hello", "body": "Can you help?" }, + "schema": { + "type": "object", + "properties": { + "intent": { "type": "string" }, + "draft": { "type": "string" } + }, + "required": ["intent", "draft"], + "additionalProperties": false + } +}' +``` + +See [LLM Task](/tools/llm-task) for details and configuration options. + ## Workflow files (.lobster) Lobster can run YAML/JSON workflow files with `name`, `args`, `steps`, `env`, `condition`, and `approval` fields. In Clawdbot tool calls, set `pipeline` to the file path. diff --git a/extensions/llm-task/README.md b/extensions/llm-task/README.md index 4bce6c759..9d96307cc 100644 --- a/extensions/llm-task/README.md +++ b/extensions/llm-task/README.md @@ -1,8 +1,10 @@ # LLM Task (plugin) -Adds an **optional** agent tool `llm-task` for running **JSON-only** LLM tasks (drafting, summarizing, classifying) with optional JSON Schema validation. +Adds an **optional** agent tool `llm-task` for running **JSON-only** LLM tasks +(drafting, summarizing, classifying) with optional JSON Schema validation. -This is designed to be called from workflow engines (e.g. Lobster via `clawd.invoke --each`) without adding new Clawdbot code per workflow. +Designed to be called from workflow engines (for example, Lobster via +`clawd.invoke --each`) without adding new Clawdbot code per workflow. ## Enable @@ -44,6 +46,7 @@ This is designed to be called from workflow engines (e.g. Lobster via `clawd.inv "config": { "defaultProvider": "openai-codex", "defaultModel": "gpt-5.2", + "defaultAuthProfileId": "main", "allowedModels": ["openai-codex/gpt-5.2"], "maxTokens": 800, "timeoutMs": 30000 @@ -54,7 +57,8 @@ This is designed to be called from workflow engines (e.g. Lobster via `clawd.inv } ``` -`allowedModels` is an allowlist of `provider/model` strings. If set, any request outside the list is rejected. +`allowedModels` is an allowlist of `provider/model` strings. If set, any request +outside the list is rejected. ## Tool API @@ -72,15 +76,22 @@ This is designed to be called from workflow engines (e.g. Lobster via `clawd.inv ### Output -Returns `details.json` containing the parsed JSON (and validates against `schema` when provided). +Returns `details.json` containing the parsed JSON (and validates against +`schema` when provided). ## Notes -- The tool is **JSON-only** and instructs the model to output only JSON (no code fences, no commentary). -- Side effects should be handled outside this tool (e.g. approvals in Lobster) before calling tools that send messages/emails. +- The tool is **JSON-only** and instructs the model to output only JSON + (no code fences, no commentary). +- No tools are exposed to the model for this run. +- Side effects should be handled outside this tool (for example, approvals in + Lobster) before calling tools that send messages/emails. ## Bundled extension note -This extension depends on Clawdbot internal modules (the embedded agent runner). It is intended to ship as a **bundled** Clawdbot extension (like `lobster`) and be enabled via `plugins.entries` + tool allowlists. +This extension depends on Clawdbot internal modules (the embedded agent runner). +It is intended to ship as a **bundled** Clawdbot extension (like `lobster`) and +be enabled via `plugins.entries` + tool allowlists. -It is **not** currently designed to be copied into `~/.clawdbot/extensions` as a standalone plugin directory. +It is **not** currently designed to be copied into +`~/.clawdbot/extensions` as a standalone plugin directory. diff --git a/extensions/llm-task/index.ts b/extensions/llm-task/index.ts index 025a20fa1..72cba0b58 100644 --- a/extensions/llm-task/index.ts +++ b/extensions/llm-task/index.ts @@ -1,5 +1,7 @@ +import type { ClawdbotPluginApi } from "../../src/plugins/types.js"; + import { createLlmTaskTool } from "./src/llm-task-tool.js"; -export default function (api: any) { +export default function register(api: ClawdbotPluginApi) { api.registerTool(createLlmTaskTool(api), { optional: true }); } diff --git a/extensions/llm-task/package.json b/extensions/llm-task/package.json index fbe66cd7b..e27384d9e 100644 --- a/extensions/llm-task/package.json +++ b/extensions/llm-task/package.json @@ -1,7 +1,11 @@ { "name": "@clawdbot/llm-task", - "private": true, + "version": "2026.1.23", "type": "module", - "main": "index.ts", - "version": "0.0.0" + "description": "Clawdbot JSON-only LLM task plugin", + "clawdbot": { + "extensions": [ + "./index.ts" + ] + } } diff --git a/extensions/llm-task/src/llm-task-tool.test.ts b/extensions/llm-task/src/llm-task-tool.test.ts index 881feb243..63c211cd0 100644 --- a/extensions/llm-task/src/llm-task-tool.test.ts +++ b/extensions/llm-task/src/llm-task-tool.test.ts @@ -39,6 +39,16 @@ describe("llm-task tool (json-only)", () => { expect((res as any).details.json).toEqual({ foo: "bar" }); }); + it("strips fenced json", async () => { + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ + meta: {}, + payloads: [{ text: "```json\n{\"ok\":true}\n```" }], + }); + const tool = createLlmTaskTool(fakeApi() as any); + const res = await tool.execute("id", { prompt: "return ok" }); + expect((res as any).details.json).toEqual({ ok: true }); + }); + it("validates schema", async () => { (runEmbeddedPiAgent as any).mockResolvedValueOnce({ meta: {}, @@ -93,4 +103,15 @@ describe("llm-task tool (json-only)", () => { /not allowed/i, ); }); + + it("disables tools for embedded run", async () => { + (runEmbeddedPiAgent as any).mockResolvedValueOnce({ + meta: {}, + payloads: [{ text: JSON.stringify({ ok: true }) }], + }); + const tool = createLlmTaskTool(fakeApi() as any); + await tool.execute("id", { prompt: "x" }); + const call = (runEmbeddedPiAgent as any).mock.calls[0]?.[0]; + expect(call.disableTools).toBe(true); + }); }); diff --git a/extensions/llm-task/src/llm-task-tool.ts b/extensions/llm-task/src/llm-task-tool.ts index a8ae0e3e7..64a61e2cd 100644 --- a/extensions/llm-task/src/llm-task-tool.ts +++ b/extensions/llm-task/src/llm-task-tool.ts @@ -12,7 +12,7 @@ import { Type } from "@sinclair/typebox"; import type { ClawdbotPluginApi } from "../../../src/plugins/types.js"; -type RunEmbeddedPiAgentFn = (params: any) => Promise; +type RunEmbeddedPiAgentFn = (params: Record) => Promise; async function loadRunEmbeddedPiAgent(): Promise { // Source checkout (tests/dev) @@ -33,7 +33,7 @@ async function loadRunEmbeddedPiAgent(): Promise { function stripCodeFences(s: string): string { const trimmed = s.trim(); - const m = trimmed.match(/^```(?:json)?s*([sS]*?)s*```$/i); + const m = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); if (m) return (m[1] ?? "").trim(); return trimmed; } @@ -42,7 +42,7 @@ function collectText(payloads: Array<{ text?: string; isError?: boolean }> | und const texts = (payloads ?? []) .filter((p) => !p.isError && typeof p.text === "string") .map((p) => p.text ?? ""); - return texts.join("n").trim(); + return texts.join("\n").trim(); } function toModelKey(provider?: string, model?: string): string | undefined { @@ -135,6 +135,12 @@ export function createLlmTaskTool(api: ClawdbotPluginApi) { }; const input = (params as any).input as unknown; + let inputJson: string; + try { + inputJson = JSON.stringify(input ?? null, null, 2); + } catch { + throw new Error("input must be JSON-serializable"); + } const system = [ "You are a JSON-only function.", @@ -144,57 +150,69 @@ export function createLlmTaskTool(api: ClawdbotPluginApi) { "Do not call tools.", ].join(" "); - const fullPrompt = `${system}nnTASK:n${prompt}nnINPUT_JSON:n${JSON.stringify(input ?? null, null, 2)}n`; + const fullPrompt = `${system}\n\nTASK:\n${prompt}\n\nINPUT_JSON:\n${inputJson}\n`; - const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-llm-task-")); - const sessionId = `llm-task-${Date.now()}`; - const sessionFile = path.join(tmpDir, "session.json"); - - const runEmbeddedPiAgent = await loadRunEmbeddedPiAgent(); - - const result = await runEmbeddedPiAgent({ - sessionId, - sessionFile, - workspaceDir: api.config?.agents?.defaults?.workspace ?? process.cwd(), - config: api.config, - prompt: fullPrompt, - timeoutMs, - runId: `llm-task-${Date.now()}`, - provider, - model, - authProfileId, - authProfileIdSource: authProfileId ? "user" : "auto", - streamParams, - }); - - const text = collectText((result as any).payloads); - if (!text) throw new Error("LLM returned empty output"); - - const raw = stripCodeFences(text); - let parsed: unknown; + let tmpDir: string | null = null; try { - parsed = JSON.parse(raw); - } catch { - throw new Error("LLM returned invalid JSON"); - } + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-llm-task-")); + const sessionId = `llm-task-${Date.now()}`; + const sessionFile = path.join(tmpDir, "session.json"); - const schema = (params as any).schema as unknown; - if (schema && typeof schema === "object") { - const ajv = new Ajv({ allErrors: true, strict: false }); - const validate = ajv.compile(schema as any); - const ok = validate(parsed); - if (!ok) { - const msg = - validate.errors?.map((e) => `${e.instancePath || ""} ${e.message || "invalid"}`).join("; ") ?? - "invalid"; - throw new Error(`LLM JSON did not match schema: ${msg}`); + const runEmbeddedPiAgent = await loadRunEmbeddedPiAgent(); + + const result = await runEmbeddedPiAgent({ + sessionId, + sessionFile, + workspaceDir: api.config?.agents?.defaults?.workspace ?? process.cwd(), + config: api.config, + prompt: fullPrompt, + timeoutMs, + runId: `llm-task-${Date.now()}`, + provider, + model, + authProfileId, + authProfileIdSource: authProfileId ? "user" : "auto", + streamParams, + disableTools: true, + }); + + const text = collectText((result as any).payloads); + if (!text) throw new Error("LLM returned empty output"); + + const raw = stripCodeFences(text); + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch { + throw new Error("LLM returned invalid JSON"); + } + + const schema = (params as any).schema as unknown; + if (schema && typeof schema === "object" && !Array.isArray(schema)) { + const ajv = new Ajv({ allErrors: true, strict: false }); + const validate = ajv.compile(schema as any); + const ok = validate(parsed); + if (!ok) { + const msg = + validate.errors?.map((e) => `${e.instancePath || ""} ${e.message || "invalid"}`).join("; ") ?? + "invalid"; + throw new Error(`LLM JSON did not match schema: ${msg}`); + } + } + + return { + content: [{ type: "text", text: JSON.stringify(parsed, null, 2) }], + details: { json: parsed, provider, model }, + }; + } finally { + if (tmpDir) { + try { + await fs.rm(tmpDir, { recursive: true, force: true }); + } catch { + // ignore + } } } - - return { - content: [{ type: "text", text: JSON.stringify(parsed, null, 2) }], - details: { json: parsed, provider, model }, - }; }, }; } diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index da33315f8..d0ff32d3f 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -273,6 +273,7 @@ export async function runEmbeddedPiAgent( skillsSnapshot: params.skillsSnapshot, prompt, images: params.images, + disableTools: params.disableTools, provider, modelId, model, diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 74c405981..655ab6ba3 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -196,30 +196,32 @@ export async function runEmbeddedAttempt( // Check if the model supports native image input const modelHasVision = params.model.input?.includes("image") ?? false; - const toolsRaw = createClawdbotCodingTools({ - exec: { - ...params.execOverrides, - elevated: params.bashElevated, - }, - sandbox, - messageProvider: params.messageChannel ?? params.messageProvider, - agentAccountId: params.agentAccountId, - messageTo: params.messageTo, - messageThreadId: params.messageThreadId, - sessionKey: params.sessionKey ?? params.sessionId, - agentDir, - workspaceDir: effectiveWorkspace, - config: params.config, - abortSignal: runAbortController.signal, - modelProvider: params.model.provider, - modelId: params.modelId, - modelAuthMode: resolveModelAuthMode(params.model.provider, params.config), - currentChannelId: params.currentChannelId, - currentThreadTs: params.currentThreadTs, - replyToMode: params.replyToMode, - hasRepliedRef: params.hasRepliedRef, - modelHasVision, - }); + const toolsRaw = params.disableTools + ? [] + : createClawdbotCodingTools({ + exec: { + ...params.execOverrides, + elevated: params.bashElevated, + }, + sandbox, + messageProvider: params.messageChannel ?? params.messageProvider, + agentAccountId: params.agentAccountId, + messageTo: params.messageTo, + messageThreadId: params.messageThreadId, + sessionKey: params.sessionKey ?? params.sessionId, + agentDir, + workspaceDir: effectiveWorkspace, + config: params.config, + abortSignal: runAbortController.signal, + modelProvider: params.model.provider, + modelId: params.modelId, + modelAuthMode: resolveModelAuthMode(params.model.provider, params.config), + currentChannelId: params.currentChannelId, + currentThreadTs: params.currentThreadTs, + replyToMode: params.replyToMode, + hasRepliedRef: params.hasRepliedRef, + modelHasVision, + }); const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider: params.provider }); logToolSchemasForGoogle({ tools, provider: params.provider }); diff --git a/src/agents/pi-embedded-runner/run/params.ts b/src/agents/pi-embedded-runner/run/params.ts index c0320bbbe..38fa3fcc3 100644 --- a/src/agents/pi-embedded-runner/run/params.ts +++ b/src/agents/pi-embedded-runner/run/params.ts @@ -44,6 +44,8 @@ export type RunEmbeddedPiAgentParams = { images?: ImageContent[]; /** Optional client-provided tools (OpenResponses hosted tools). */ clientTools?: ClientToolDefinition[]; + /** Disable built-in tools for this run (LLM-only mode). */ + disableTools?: boolean; provider?: string; model?: string; authProfileId?: string; diff --git a/src/agents/pi-embedded-runner/run/types.ts b/src/agents/pi-embedded-runner/run/types.ts index 5ae947ec7..c67e96ca0 100644 --- a/src/agents/pi-embedded-runner/run/types.ts +++ b/src/agents/pi-embedded-runner/run/types.ts @@ -36,6 +36,8 @@ export type EmbeddedRunAttemptParams = { images?: ImageContent[]; /** Optional client-provided tools (OpenResponses hosted tools). */ clientTools?: ClientToolDefinition[]; + /** Disable built-in tools for this run (LLM-only mode). */ + disableTools?: boolean; provider: string; modelId: string; model: Model;