feat: replace clawdis skills with tools
This commit is contained in:
101
docs/tools.md
Normal file
101
docs/tools.md
Normal file
@@ -0,0 +1,101 @@
|
||||
---
|
||||
summary: "Agent tool surface for Clawdis (browser, canvas, nodes, cron) replacing clawdis-* skills"
|
||||
read_when:
|
||||
- Adding or modifying agent tools
|
||||
- Retiring or changing clawdis-* skills
|
||||
---
|
||||
|
||||
# Tools (Clawdis)
|
||||
|
||||
Clawdis exposes **first-class agent tools** for browser, canvas, nodes, and cron.
|
||||
These replace the old `clawdis-*` skills: the tools are typed, no shelling,
|
||||
and the agent should rely on them directly.
|
||||
|
||||
## Tool inventory
|
||||
|
||||
### `clawdis_browser`
|
||||
Control the dedicated clawd browser.
|
||||
|
||||
Core actions:
|
||||
- `status`, `start`, `stop`, `tabs`, `open`, `focus`, `close`
|
||||
- `snapshot` (aria/ai)
|
||||
- `screenshot` (returns image block + `MEDIA:<path>`)
|
||||
- `act` (UI actions: click/type/press/hover/drag/select/fill/resize/wait/evaluate)
|
||||
- `navigate`, `console`, `pdf`, `upload`, `dialog`
|
||||
|
||||
Notes:
|
||||
- Requires `browser.enabled=true` in `~/.clawdis/clawdis.json`.
|
||||
- Uses `browser.controlUrl` unless `controlUrl` is passed explicitly.
|
||||
|
||||
### `clawdis_canvas`
|
||||
Drive the node Canvas (present, eval, snapshot, A2UI).
|
||||
|
||||
Core actions:
|
||||
- `present`, `hide`, `navigate`, `eval`
|
||||
- `snapshot` (returns image block + `MEDIA:<path>`)
|
||||
- `a2ui_push`, `a2ui_reset`
|
||||
|
||||
Notes:
|
||||
- Uses gateway `node.invoke` under the hood.
|
||||
- If no `node` is provided, the tool picks a default (single connected node or local mac node).
|
||||
- A2UI is v0.8 only (no `createSurface`).
|
||||
|
||||
### `clawdis_nodes`
|
||||
Discover and target paired nodes; send notifications; capture camera/screen.
|
||||
|
||||
Core actions:
|
||||
- `status`, `describe`
|
||||
- `pending`, `approve`, `reject` (pairing)
|
||||
- `notify` (macOS `system.notify`)
|
||||
- `camera_snap`, `camera_clip`, `screen_record`
|
||||
|
||||
Notes:
|
||||
- Camera/screen commands require the node app to be foregrounded.
|
||||
- Images return image blocks + `MEDIA:<path>`.
|
||||
- Videos return `FILE:<path>` (mp4).
|
||||
|
||||
### `clawdis_cron`
|
||||
Manage Gateway cron jobs and wakeups.
|
||||
|
||||
Core actions:
|
||||
- `status`, `list`
|
||||
- `add`, `update`, `remove`, `run`, `runs`
|
||||
- `wake` (enqueue system event + optional immediate heartbeat)
|
||||
|
||||
Notes:
|
||||
- `add` expects a full cron job object (same schema as `cron.add` RPC).
|
||||
- `update` uses `{ jobId, patch }`.
|
||||
|
||||
## Parameters (common)
|
||||
|
||||
Gateway-backed tools (`clawdis_canvas`, `clawdis_nodes`, `clawdis_cron`):
|
||||
- `gatewayUrl` (default `ws://127.0.0.1:18789`)
|
||||
- `gatewayToken` (if auth enabled)
|
||||
- `timeoutMs`
|
||||
|
||||
Browser tool:
|
||||
- `controlUrl` (defaults from config)
|
||||
|
||||
## Recommended agent flows
|
||||
|
||||
Browser automation:
|
||||
1) `clawdis_browser` → `status` / `start`
|
||||
2) `snapshot` (ai or aria)
|
||||
3) `act` (click/type/press)
|
||||
4) `screenshot` if you need visual confirmation
|
||||
|
||||
Canvas render:
|
||||
1) `clawdis_canvas` → `present`
|
||||
2) `a2ui_push` (optional)
|
||||
3) `snapshot`
|
||||
|
||||
Node targeting:
|
||||
1) `clawdis_nodes` → `status`
|
||||
2) `describe` on the chosen node
|
||||
3) `notify` / `camera_snap` / `screen_record`
|
||||
|
||||
## Safety
|
||||
|
||||
- Avoid `system.run` (not exposed as a tool).
|
||||
- Respect user consent for camera/screen capture.
|
||||
- Use `status/describe` to ensure permissions before invoking media commands.
|
||||
@@ -1,33 +0,0 @@
|
||||
---
|
||||
name: clawdis-browser
|
||||
description: Control clawd's dedicated browser (tabs, snapshots, actions) via the clawdis CLI.
|
||||
homepage: https://clawdis.ai
|
||||
metadata: {"clawdis":{"emoji":"🧭","requires":{"config":["browser.enabled"]}}}
|
||||
---
|
||||
|
||||
# Clawdis Browser
|
||||
|
||||
Use the clawd-managed Chrome/Chromium instance through `clawdis browser`.
|
||||
Only available when `browser.enabled` is true.
|
||||
|
||||
Core flow
|
||||
- `clawdis browser status`
|
||||
- `clawdis browser start` (if stopped)
|
||||
- `clawdis browser tabs`
|
||||
- `clawdis browser open <url>`
|
||||
|
||||
Inspection
|
||||
- `clawdis browser snapshot --format ai|aria [--limit N]`
|
||||
- `clawdis browser screenshot [--full-page]`
|
||||
|
||||
Actions
|
||||
- `clawdis browser click <ref>`
|
||||
- `clawdis browser type <ref> "text" --submit`
|
||||
- `clawdis browser press Enter`
|
||||
- `clawdis browser navigate <url>`
|
||||
- `clawdis browser wait --text "Done"`
|
||||
|
||||
Notes
|
||||
- This is a dedicated profile; do not use the user's personal browser.
|
||||
- If disabled, ask the user to enable `browser.enabled` in `~/.clawdis/clawdis.json`.
|
||||
- Canvas UI is full-screen with native overlays. Keep critical controls out of the top-left/top-right/bottom edges (leave explicit gutters ~28px top, ~16px sides, ~20px bottom). Do not rely on safe-area insets.
|
||||
@@ -1,49 +0,0 @@
|
||||
---
|
||||
name: clawdis-canvas
|
||||
description: Drive the Clawdis Canvas panel (present, eval, snapshot, A2UI) via the clawdis CLI, including gateway-hosted A2UI surfaces and action bridging.
|
||||
homepage: https://clawdis.ai
|
||||
metadata: {"clawdis":{"emoji":"🎨","always":true}}
|
||||
---
|
||||
|
||||
# Clawdis Canvas
|
||||
|
||||
Use Canvas to render HTML/JS or A2UI surfaces and capture snapshots.
|
||||
|
||||
Core commands
|
||||
- Present: `clawdis canvas present [--node <id>] [--target <path>]`
|
||||
- Hide: `clawdis canvas hide`
|
||||
- Eval JS: `clawdis canvas eval --js "..."`
|
||||
- Snapshot: `clawdis canvas snapshot`
|
||||
|
||||
A2UI
|
||||
- Push JSONL: `clawdis canvas a2ui push --jsonl /path/to/file.jsonl`
|
||||
- Reset: `clawdis canvas a2ui reset`
|
||||
|
||||
Notes
|
||||
- Keep HTML under `~/clawd/canvas` when targeting remote nodes.
|
||||
- Use snapshot after renders to verify UI state.
|
||||
- Treat A2UI as gateway-hosted at `http(s)://<gateway-host>:18789/__clawdis__/a2ui/`.
|
||||
- Rely on `canvas a2ui push/reset` to auto-navigate the Canvas to the gateway-hosted A2UI page.
|
||||
- Expect A2UI to fail if the Gateway does not advertise `canvasHostUrl` or is unreachable:
|
||||
- `A2UI_HOST_NOT_CONFIGURED`
|
||||
- `A2UI_HOST_UNAVAILABLE`
|
||||
|
||||
A2UI quick flow
|
||||
1. Ensure the Gateway is running and reachable from the node.
|
||||
2. Build JSONL with **v0.8** server→client messages (`beginRendering`, `surfaceUpdate`, `dataModelUpdate`, `deleteSurface`).
|
||||
- Do not use v0.9 `createSurface` (unsupported).
|
||||
3. Push JSONL and (optionally) snapshot the result.
|
||||
|
||||
Example JSONL (v0.8)
|
||||
```bash
|
||||
cat > /tmp/a2ui-v0.8.jsonl <<'EOF'
|
||||
{"surfaceUpdate":{"surfaceId":"main","components":[{"id":"root","component":{"Column":{"children":{"explicitList":["title","content"]}}}},{"id":"title","component":{"Text":{"text":{"literalString":"A2UI (v0.8)"},"usageHint":"h1"}}},{"id":"content","component":{"Text":{"text":{"literalString":"If you can read this, A2UI is live."},"usageHint":"body"}}}]}}
|
||||
{"beginRendering":{"surfaceId":"main","root":"root"}}
|
||||
EOF
|
||||
|
||||
clawdis canvas a2ui push --jsonl /tmp/a2ui-v0.8.jsonl --node <id>
|
||||
```
|
||||
|
||||
Action callbacks (A2UI → agent)
|
||||
- A2UI user actions (buttons, etc.) are bridged from the WebView back to the node via `clawdisCanvasA2UIAction`.
|
||||
- Handle them on the agent side as `CANVAS_A2UI` messages (node → gateway → agent).
|
||||
@@ -1,82 +0,0 @@
|
||||
---
|
||||
name: clawdis-cron
|
||||
description: Schedule jobs and wakeups via Clawdis Gateway cron.* RPC.
|
||||
homepage: https://clawdis.ai
|
||||
metadata: {"clawdis":{"emoji":"⏰","always":true}}
|
||||
---
|
||||
|
||||
# Clawdis Cron
|
||||
|
||||
Cron runs inside the Gateway. Jobs live in `~/.clawdis/cron/jobs.json` and run logs in `~/.clawdis/cron/runs/<jobId>.jsonl`.
|
||||
|
||||
Enable/disable
|
||||
- Enabled by default.
|
||||
- Disable with config `cron.enabled=false` or env `CLAWDIS_SKIP_CRON=1`.
|
||||
- Config: `cron.store`, `cron.maxConcurrentRuns`.
|
||||
|
||||
Job fields
|
||||
- `name` is required (non-empty).
|
||||
- `description` is optional.
|
||||
|
||||
RPC methods (Gateway WS)
|
||||
- `cron.list`, `cron.status`, `cron.add`, `cron.update`, `cron.remove`, `cron.run`, `cron.runs`
|
||||
- `wake` (enqueue system event + optionally trigger immediate heartbeat)
|
||||
|
||||
Payload rules
|
||||
- `sessionTarget: "main"` requires `payload.kind: "systemEvent"`.
|
||||
- `sessionTarget: "isolated"` requires `payload.kind: "agentTurn"`.
|
||||
|
||||
Examples
|
||||
|
||||
One-shot reminder (main session, immediate wake):
|
||||
```json
|
||||
{
|
||||
"method": "cron.add",
|
||||
"params": {
|
||||
"name": "remind-me",
|
||||
"enabled": true,
|
||||
"schedule": { "kind": "at", "atMs": 1734715200000 },
|
||||
"sessionTarget": "main",
|
||||
"wakeMode": "now",
|
||||
"payload": { "kind": "systemEvent", "text": "Remind me in 20 minutes." }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Recurring hourly check (isolated job, no external delivery):
|
||||
```json
|
||||
{
|
||||
"method": "cron.add",
|
||||
"params": {
|
||||
"name": "hourly-check",
|
||||
"enabled": true,
|
||||
"schedule": { "kind": "every", "everyMs": 3600000 },
|
||||
"sessionTarget": "isolated",
|
||||
"wakeMode": "now",
|
||||
"payload": { "kind": "agentTurn", "message": "Check battery; report only if < 20%.", "deliver": false },
|
||||
"isolation": { "postToMainPrefix": "Cron" }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Cron expression (weekday 07:30):
|
||||
```json
|
||||
{
|
||||
"method": "cron.add",
|
||||
"params": {
|
||||
"name": "weekday-wakeup",
|
||||
"enabled": true,
|
||||
"schedule": { "kind": "cron", "expr": "30 7 * * 1-5", "tz": "America/Los_Angeles" },
|
||||
"sessionTarget": "isolated",
|
||||
"wakeMode": "now",
|
||||
"payload": { "kind": "agentTurn", "message": "Wake me up and start music.", "deliver": true, "channel": "whatsapp" }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Run history
|
||||
- `cron.runs` returns recent JSONL entries for a job.
|
||||
|
||||
Notes
|
||||
- `wakeMode: "now"` triggers an immediate heartbeat for main jobs.
|
||||
- Isolated jobs run in `cron:<jobId>` sessions and post a summary back to main.
|
||||
@@ -1,91 +0,0 @@
|
||||
---
|
||||
name: clawdis-nodes
|
||||
description: Discover, interpret, and target Clawdis nodes (paired devices) via the Gateway/CLI. Use when an agent must find available nodes, choose the best target machine, or reason about presence vs node availability (Tailnet/Tailscale optional).
|
||||
homepage: https://clawdis.ai
|
||||
metadata: {"clawdis":{"emoji":"🛰️"}}
|
||||
---
|
||||
|
||||
# Clawdis Nodes
|
||||
|
||||
Use the node system to target specific devices (macOS node mode, iOS, Android) for canvas/camera/screen/system actions. Use presence to infer which **user machine** is active, then pick the matching node.
|
||||
|
||||
## Quick start
|
||||
|
||||
List known nodes and whether they are paired/connected:
|
||||
```bash
|
||||
clawdis nodes status
|
||||
```
|
||||
|
||||
Inspect a specific node (commands, caps, permissions):
|
||||
```bash
|
||||
clawdis nodes describe --node <idOrNameOrIp>
|
||||
```
|
||||
|
||||
## Node discovery workflow (agent)
|
||||
|
||||
1) **List nodes** with `clawdis nodes status`.
|
||||
2) **Choose a target**:
|
||||
- Prefer `connected` nodes with the capabilities you need.
|
||||
- Use `perms` (permissions map) to avoid asking for actions that will fail.
|
||||
3) **Confirm commands** with `clawdis nodes describe --node …`.
|
||||
4) **Invoke actions** via `clawdis nodes …` (camera, canvas, screen, system).
|
||||
|
||||
If no nodes are connected:
|
||||
- Check pairing: `clawdis nodes pending` / `clawdis nodes list`
|
||||
- Ask the user to open/foreground the node app if the action requires it (canvas/camera/screen on iOS/Android).
|
||||
|
||||
## Presence vs nodes (don’t confuse them)
|
||||
|
||||
**Presence** shows Gateway + connected clients (mac app, WebChat, CLI).
|
||||
**Nodes** are paired devices that expose commands.
|
||||
|
||||
Use presence to infer **where the user is active**, then map that to a node:
|
||||
|
||||
```bash
|
||||
clawdis gateway call system-presence
|
||||
```
|
||||
|
||||
Heuristics:
|
||||
- Pick the presence entry with the smallest `lastInputSeconds` (most active).
|
||||
- Match presence `host` / `deviceFamily` to a node `displayName` / `deviceFamily`.
|
||||
- If multiple matches, ask for clarification or use `nodes describe` to choose.
|
||||
|
||||
Note: CLI connections (`client.mode=cli`) do **not** show up in presence.
|
||||
|
||||
## Tailnet / Tailscale (optional context)
|
||||
|
||||
Node discovery is Gateway‑owned; Tailnet details only matter for reaching the Gateway:
|
||||
- On LAN, the Gateway advertises a Bridge via Bonjour.
|
||||
- Cross‑network, prefer Tailnet MagicDNS or Tailnet IP to reach the Gateway.
|
||||
- Once connected, always target nodes by id/name/IP via the Gateway (not direct).
|
||||
|
||||
## Pairing & approvals
|
||||
|
||||
List pairing requests:
|
||||
```bash
|
||||
clawdis nodes pending
|
||||
```
|
||||
|
||||
Approve/reject:
|
||||
```bash
|
||||
clawdis nodes approve <requestId>
|
||||
clawdis nodes reject <requestId>
|
||||
```
|
||||
|
||||
## Typical agent usages
|
||||
|
||||
Send a notification to a specific Mac node:
|
||||
```bash
|
||||
clawdis nodes notify --node <idOrNameOrIp> --title "Ping" --body "Gateway ready"
|
||||
```
|
||||
|
||||
Capture a node canvas snapshot:
|
||||
```bash
|
||||
clawdis nodes canvas snapshot --node <idOrNameOrIp> --format png
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- `NODE_BACKGROUND_UNAVAILABLE`: the node app must be foregrounded (iOS/Android).
|
||||
- Missing permissions in `nodes status`: ask the user to grant permissions in the node app.
|
||||
- No connected nodes: ensure the Gateway is reachable; check tailnet/SSH config if remote.
|
||||
@@ -1,60 +0,0 @@
|
||||
---
|
||||
name: clawdis-notify
|
||||
description: Send system notifications to specific Clawdis nodes (macOS computers) via the Gateway and CLI. Use when you need to alert a person or confirm a remote action on a particular machine, or when an agent must push a notification to another computer.
|
||||
homepage: https://clawdis.ai
|
||||
metadata: {"clawdis":{"emoji":"🔔"}}
|
||||
---
|
||||
|
||||
# Clawdis Notify
|
||||
|
||||
## Overview
|
||||
|
||||
Send local notifications to a specific Clawdis node (currently macOS only) via the Gateway CLI.
|
||||
|
||||
## Quick start
|
||||
|
||||
1) Find a target node.
|
||||
```bash
|
||||
clawdis nodes status
|
||||
clawdis nodes describe --node <idOrNameOrIp>
|
||||
```
|
||||
|
||||
2) Send the notification.
|
||||
```bash
|
||||
clawdis nodes notify --node <idOrNameOrIp> --title "Ping" --body "Gateway ready"
|
||||
```
|
||||
|
||||
## Core command
|
||||
|
||||
`clawdis nodes notify --node <idOrNameOrIp> [--title <text>] [--body <text>] [--sound <name>] [--priority <passive|active|timeSensitive>] [--delivery <system|overlay|auto>]`
|
||||
|
||||
Notes:
|
||||
- Provide at least one of `--title` or `--body`.
|
||||
- `--delivery` defaults to `system`.
|
||||
- Only macOS nodes expose `system.notify` right now.
|
||||
- Notification permission must be granted in the macOS app or the command fails.
|
||||
|
||||
## Multi‑computer usage
|
||||
|
||||
Pick a specific node by id/name/IP, or iterate across nodes:
|
||||
|
||||
```bash
|
||||
for node in $(clawdis nodes status --json | jq -r '.nodes[].id'); do
|
||||
clawdis nodes notify --node "$node" --title "Heads up" --body "Maintenance in 5 minutes"
|
||||
done
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- `nodes notify failed: ...` usually means the node is offline, not paired, or missing permission.
|
||||
- If the Gateway is down or unreachable, notifications cannot be delivered.
|
||||
|
||||
## Low‑level fallback (rare)
|
||||
|
||||
If needed, use raw invoke:
|
||||
```bash
|
||||
clawdis nodes invoke \
|
||||
--node <idOrNameOrIp> \
|
||||
--command system.notify \
|
||||
--params '{"title":"Ping","body":"Hello","sound":"Glass","priority":"active","delivery":"system"}'
|
||||
```
|
||||
1272
src/agents/clawdis-tools.ts
Normal file
1272
src/agents/clawdis-tools.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@ import path from "node:path";
|
||||
import type { AppMessage } from "@mariozechner/pi-agent-core";
|
||||
import type { AgentToolResult, AssistantMessage } from "@mariozechner/pi-ai";
|
||||
|
||||
import { sanitizeContentBlocksImages } from "./pi-tools.js";
|
||||
import { sanitizeContentBlocksImages } from "./tool-images.js";
|
||||
import type { WorkspaceBootstrapFile } from "./workspace.js";
|
||||
|
||||
export type EmbeddedContextFile = { path: string; content: string };
|
||||
|
||||
@@ -2,9 +2,10 @@ import type { AgentTool, AgentToolResult } from "@mariozechner/pi-ai";
|
||||
import { bashTool, codingTools, readTool } from "@mariozechner/pi-coding-agent";
|
||||
import { type TSchema, Type } from "@sinclair/typebox";
|
||||
|
||||
import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js";
|
||||
import { detectMime } from "../media/mime.js";
|
||||
import { startWebLoginWithQr, waitForWebLogin } from "../web/login-qr.js";
|
||||
import { createClawdisTools } from "./clawdis-tools.js";
|
||||
import { sanitizeToolResultImages } from "./tool-images.js";
|
||||
|
||||
// TODO(steipete): Remove this wrapper once pi-mono ships file-magic MIME detection
|
||||
// for `read` image payloads in `@mariozechner/pi-coding-agent` (then switch back to `codingTools` directly).
|
||||
@@ -12,14 +13,6 @@ type ToolContentBlock = AgentToolResult<unknown>["content"][number];
|
||||
type ImageContentBlock = Extract<ToolContentBlock, { type: "image" }>;
|
||||
type TextContentBlock = Extract<ToolContentBlock, { type: "text" }>;
|
||||
|
||||
// Anthropic Messages API limitation (observed in Clawdis sessions):
|
||||
// When sending many images in a single request (e.g. via session history + tool results),
|
||||
// Anthropic rejects any image where *either* dimension exceeds 2000px.
|
||||
//
|
||||
// To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale
|
||||
// all base64 image blocks above this limit while preserving aspect ratio.
|
||||
const MAX_IMAGE_DIMENSION_PX = 2000;
|
||||
|
||||
async function sniffMimeFromBase64(
|
||||
base64: string,
|
||||
): Promise<string | undefined> {
|
||||
@@ -170,133 +163,6 @@ function createWhatsAppLoginTool(): AnyAgentTool {
|
||||
};
|
||||
}
|
||||
|
||||
function isImageBlock(block: unknown): block is ImageContentBlock {
|
||||
if (!block || typeof block !== "object") return false;
|
||||
const rec = block as Record<string, unknown>;
|
||||
return (
|
||||
rec.type === "image" &&
|
||||
typeof rec.data === "string" &&
|
||||
typeof rec.mimeType === "string"
|
||||
);
|
||||
}
|
||||
|
||||
function isTextBlock(block: unknown): block is TextContentBlock {
|
||||
if (!block || typeof block !== "object") return false;
|
||||
const rec = block as Record<string, unknown>;
|
||||
return rec.type === "text" && typeof rec.text === "string";
|
||||
}
|
||||
|
||||
async function resizeImageBase64IfNeeded(params: {
|
||||
base64: string;
|
||||
mimeType: string;
|
||||
maxDimensionPx: number;
|
||||
}): Promise<{ base64: string; mimeType: string; resized: boolean }> {
|
||||
const buf = Buffer.from(params.base64, "base64");
|
||||
const meta = await getImageMetadata(buf);
|
||||
const width = meta?.width;
|
||||
const height = meta?.height;
|
||||
if (
|
||||
typeof width !== "number" ||
|
||||
typeof height !== "number" ||
|
||||
(width <= params.maxDimensionPx && height <= params.maxDimensionPx)
|
||||
) {
|
||||
return { base64: params.base64, mimeType: params.mimeType, resized: false };
|
||||
}
|
||||
|
||||
const mime = params.mimeType.toLowerCase();
|
||||
let out: Buffer;
|
||||
try {
|
||||
const mod = (await import("sharp")) as unknown as {
|
||||
default?: typeof import("sharp");
|
||||
};
|
||||
const sharp = mod.default ?? (mod as unknown as typeof import("sharp"));
|
||||
const img = sharp(buf, { failOnError: false }).resize({
|
||||
width: params.maxDimensionPx,
|
||||
height: params.maxDimensionPx,
|
||||
fit: "inside",
|
||||
withoutEnlargement: true,
|
||||
});
|
||||
if (mime === "image/jpeg" || mime === "image/jpg") {
|
||||
out = await img.jpeg({ quality: 85 }).toBuffer();
|
||||
} else if (mime === "image/webp") {
|
||||
out = await img.webp({ quality: 85 }).toBuffer();
|
||||
} else if (mime === "image/png") {
|
||||
out = await img.png().toBuffer();
|
||||
} else {
|
||||
out = await img.png().toBuffer();
|
||||
}
|
||||
} catch {
|
||||
// Bun can't load sharp native addons. Fall back to a JPEG conversion.
|
||||
out = await resizeToJpeg({
|
||||
buffer: buf,
|
||||
maxSide: params.maxDimensionPx,
|
||||
quality: 85,
|
||||
withoutEnlargement: true,
|
||||
});
|
||||
}
|
||||
|
||||
const sniffed = await detectMime({ buffer: out.slice(0, 256) });
|
||||
const nextMime = sniffed?.startsWith("image/") ? sniffed : params.mimeType;
|
||||
|
||||
return { base64: out.toString("base64"), mimeType: nextMime, resized: true };
|
||||
}
|
||||
|
||||
export async function sanitizeContentBlocksImages(
|
||||
blocks: ToolContentBlock[],
|
||||
label: string,
|
||||
opts: { maxDimensionPx?: number } = {},
|
||||
): Promise<ToolContentBlock[]> {
|
||||
const maxDimensionPx = Math.max(
|
||||
opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX,
|
||||
1,
|
||||
);
|
||||
const out: ToolContentBlock[] = [];
|
||||
|
||||
for (const block of blocks) {
|
||||
if (!isImageBlock(block)) {
|
||||
out.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
const data = block.data.trim();
|
||||
if (!data) {
|
||||
out.push({
|
||||
type: "text",
|
||||
text: `[${label}] omitted empty image payload`,
|
||||
} satisfies TextContentBlock);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const resized = await resizeImageBase64IfNeeded({
|
||||
base64: data,
|
||||
mimeType: block.mimeType,
|
||||
maxDimensionPx,
|
||||
});
|
||||
out.push({ ...block, data: resized.base64, mimeType: resized.mimeType });
|
||||
} catch (err) {
|
||||
out.push({
|
||||
type: "text",
|
||||
text: `[${label}] omitted image payload: ${String(err)}`,
|
||||
} satisfies TextContentBlock);
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
export async function sanitizeToolResultImages(
|
||||
result: AgentToolResult<unknown>,
|
||||
label: string,
|
||||
opts: { maxDimensionPx?: number } = {},
|
||||
): Promise<AgentToolResult<unknown>> {
|
||||
const content = Array.isArray(result.content) ? result.content : [];
|
||||
if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) return result;
|
||||
|
||||
const next = await sanitizeContentBlocksImages(content, label, opts);
|
||||
return { ...result, content: next };
|
||||
}
|
||||
|
||||
function createClawdisReadTool(base: AnyAgentTool): AnyAgentTool {
|
||||
return {
|
||||
...base,
|
||||
@@ -340,5 +206,5 @@ export function createClawdisCodingTools(): AnyAgentTool[] {
|
||||
? createClawdisBashTool(tool)
|
||||
: (tool as AnyAgentTool),
|
||||
);
|
||||
return [...base, createWhatsAppLoginTool()];
|
||||
return [...base, createWhatsAppLoginTool(), ...createClawdisTools()];
|
||||
}
|
||||
|
||||
@@ -18,6 +18,10 @@ export function buildAgentSystemPromptAppend(params: {
|
||||
"- find: find files by glob pattern",
|
||||
"- ls: list directory contents",
|
||||
"- whatsapp_login: generate a WhatsApp QR code and wait for linking",
|
||||
"- clawdis_browser: control clawd's dedicated browser",
|
||||
"- clawdis_canvas: present/eval/snapshot the Canvas",
|
||||
"- clawdis_nodes: list/describe/notify/camera/screen on paired nodes",
|
||||
"- clawdis_cron: manage cron jobs and wake events",
|
||||
"TOOLS.md does not control tool availability; it is user guidance for how to use external tools.",
|
||||
"",
|
||||
"## Workspace",
|
||||
|
||||
143
src/agents/tool-images.ts
Normal file
143
src/agents/tool-images.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import type { AgentToolResult } from "@mariozechner/pi-ai";
|
||||
|
||||
import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js";
|
||||
import { detectMime } from "../media/mime.js";
|
||||
|
||||
type ToolContentBlock = AgentToolResult<unknown>["content"][number];
|
||||
type ImageContentBlock = Extract<ToolContentBlock, { type: "image" }>;
|
||||
type TextContentBlock = Extract<ToolContentBlock, { type: "text" }>;
|
||||
|
||||
// Anthropic Messages API limitation (observed in Clawdis sessions):
|
||||
// When sending many images in a single request (e.g. via session history + tool results),
|
||||
// Anthropic rejects any image where *either* dimension exceeds 2000px.
|
||||
//
|
||||
// To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale
|
||||
// all base64 image blocks above this limit while preserving aspect ratio.
|
||||
const MAX_IMAGE_DIMENSION_PX = 2000;
|
||||
|
||||
function isImageBlock(block: unknown): block is ImageContentBlock {
|
||||
if (!block || typeof block !== "object") return false;
|
||||
const rec = block as Record<string, unknown>;
|
||||
return (
|
||||
rec.type === "image" &&
|
||||
typeof rec.data === "string" &&
|
||||
typeof rec.mimeType === "string"
|
||||
);
|
||||
}
|
||||
|
||||
function isTextBlock(block: unknown): block is TextContentBlock {
|
||||
if (!block || typeof block !== "object") return false;
|
||||
const rec = block as Record<string, unknown>;
|
||||
return rec.type === "text" && typeof rec.text === "string";
|
||||
}
|
||||
|
||||
async function resizeImageBase64IfNeeded(params: {
|
||||
base64: string;
|
||||
mimeType: string;
|
||||
maxDimensionPx: number;
|
||||
}): Promise<{ base64: string; mimeType: string; resized: boolean }> {
|
||||
const buf = Buffer.from(params.base64, "base64");
|
||||
const meta = await getImageMetadata(buf);
|
||||
const width = meta?.width;
|
||||
const height = meta?.height;
|
||||
if (
|
||||
typeof width !== "number" ||
|
||||
typeof height !== "number" ||
|
||||
(width <= params.maxDimensionPx && height <= params.maxDimensionPx)
|
||||
) {
|
||||
return { base64: params.base64, mimeType: params.mimeType, resized: false };
|
||||
}
|
||||
|
||||
const mime = params.mimeType.toLowerCase();
|
||||
let out: Buffer;
|
||||
try {
|
||||
const mod = (await import("sharp")) as unknown as {
|
||||
default?: typeof import("sharp");
|
||||
};
|
||||
const sharp = mod.default ?? (mod as unknown as typeof import("sharp"));
|
||||
const img = sharp(buf, { failOnError: false }).resize({
|
||||
width: params.maxDimensionPx,
|
||||
height: params.maxDimensionPx,
|
||||
fit: "inside",
|
||||
withoutEnlargement: true,
|
||||
});
|
||||
if (mime === "image/jpeg" || mime === "image/jpg") {
|
||||
out = await img.jpeg({ quality: 85 }).toBuffer();
|
||||
} else if (mime === "image/webp") {
|
||||
out = await img.webp({ quality: 85 }).toBuffer();
|
||||
} else if (mime === "image/png") {
|
||||
out = await img.png().toBuffer();
|
||||
} else {
|
||||
out = await img.png().toBuffer();
|
||||
}
|
||||
} catch {
|
||||
// Bun can't load sharp native addons. Fall back to a JPEG conversion.
|
||||
out = await resizeToJpeg({
|
||||
buffer: buf,
|
||||
maxSide: params.maxDimensionPx,
|
||||
quality: 85,
|
||||
withoutEnlargement: true,
|
||||
});
|
||||
}
|
||||
|
||||
const sniffed = await detectMime({ buffer: out.slice(0, 256) });
|
||||
const nextMime = sniffed?.startsWith("image/") ? sniffed : params.mimeType;
|
||||
|
||||
return { base64: out.toString("base64"), mimeType: nextMime, resized: true };
|
||||
}
|
||||
|
||||
export async function sanitizeContentBlocksImages(
|
||||
blocks: ToolContentBlock[],
|
||||
label: string,
|
||||
opts: { maxDimensionPx?: number } = {},
|
||||
): Promise<ToolContentBlock[]> {
|
||||
const maxDimensionPx = Math.max(
|
||||
opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX,
|
||||
1,
|
||||
);
|
||||
const out: ToolContentBlock[] = [];
|
||||
|
||||
for (const block of blocks) {
|
||||
if (!isImageBlock(block)) {
|
||||
out.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
const data = block.data.trim();
|
||||
if (!data) {
|
||||
out.push({
|
||||
type: "text",
|
||||
text: `[${label}] omitted empty image payload`,
|
||||
} satisfies TextContentBlock);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const resized = await resizeImageBase64IfNeeded({
|
||||
base64: data,
|
||||
mimeType: block.mimeType,
|
||||
maxDimensionPx,
|
||||
});
|
||||
out.push({ ...block, data: resized.base64, mimeType: resized.mimeType });
|
||||
} catch (err) {
|
||||
out.push({
|
||||
type: "text",
|
||||
text: `[${label}] omitted image payload: ${String(err)}`,
|
||||
} satisfies TextContentBlock);
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
export async function sanitizeToolResultImages(
|
||||
result: AgentToolResult<unknown>,
|
||||
label: string,
|
||||
opts: { maxDimensionPx?: number } = {},
|
||||
): Promise<AgentToolResult<unknown>> {
|
||||
const content = Array.isArray(result.content) ? result.content : [];
|
||||
if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) return result;
|
||||
|
||||
const next = await sanitizeContentBlocksImages(content, label, opts);
|
||||
return { ...result, content: next };
|
||||
}
|
||||
Reference in New Issue
Block a user