feat: replace clawdis skills with tools
This commit is contained in:
101
docs/tools.md
Normal file
101
docs/tools.md
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
---
|
||||||
|
summary: "Agent tool surface for Clawdis (browser, canvas, nodes, cron) replacing clawdis-* skills"
|
||||||
|
read_when:
|
||||||
|
- Adding or modifying agent tools
|
||||||
|
- Retiring or changing clawdis-* skills
|
||||||
|
---
|
||||||
|
|
||||||
|
# Tools (Clawdis)
|
||||||
|
|
||||||
|
Clawdis exposes **first-class agent tools** for browser, canvas, nodes, and cron.
|
||||||
|
These replace the old `clawdis-*` skills: the tools are typed, no shelling,
|
||||||
|
and the agent should rely on them directly.
|
||||||
|
|
||||||
|
## Tool inventory
|
||||||
|
|
||||||
|
### `clawdis_browser`
|
||||||
|
Control the dedicated clawd browser.
|
||||||
|
|
||||||
|
Core actions:
|
||||||
|
- `status`, `start`, `stop`, `tabs`, `open`, `focus`, `close`
|
||||||
|
- `snapshot` (aria/ai)
|
||||||
|
- `screenshot` (returns image block + `MEDIA:<path>`)
|
||||||
|
- `act` (UI actions: click/type/press/hover/drag/select/fill/resize/wait/evaluate)
|
||||||
|
- `navigate`, `console`, `pdf`, `upload`, `dialog`
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Requires `browser.enabled=true` in `~/.clawdis/clawdis.json`.
|
||||||
|
- Uses `browser.controlUrl` unless `controlUrl` is passed explicitly.
|
||||||
|
|
||||||
|
### `clawdis_canvas`
|
||||||
|
Drive the node Canvas (present, eval, snapshot, A2UI).
|
||||||
|
|
||||||
|
Core actions:
|
||||||
|
- `present`, `hide`, `navigate`, `eval`
|
||||||
|
- `snapshot` (returns image block + `MEDIA:<path>`)
|
||||||
|
- `a2ui_push`, `a2ui_reset`
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Uses gateway `node.invoke` under the hood.
|
||||||
|
- If no `node` is provided, the tool picks a default (single connected node or local mac node).
|
||||||
|
- A2UI is v0.8 only (no `createSurface`).
|
||||||
|
|
||||||
|
### `clawdis_nodes`
|
||||||
|
Discover and target paired nodes; send notifications; capture camera/screen.
|
||||||
|
|
||||||
|
Core actions:
|
||||||
|
- `status`, `describe`
|
||||||
|
- `pending`, `approve`, `reject` (pairing)
|
||||||
|
- `notify` (macOS `system.notify`)
|
||||||
|
- `camera_snap`, `camera_clip`, `screen_record`
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Camera/screen commands require the node app to be foregrounded.
|
||||||
|
- Images return image blocks + `MEDIA:<path>`.
|
||||||
|
- Videos return `FILE:<path>` (mp4).
|
||||||
|
|
||||||
|
### `clawdis_cron`
|
||||||
|
Manage Gateway cron jobs and wakeups.
|
||||||
|
|
||||||
|
Core actions:
|
||||||
|
- `status`, `list`
|
||||||
|
- `add`, `update`, `remove`, `run`, `runs`
|
||||||
|
- `wake` (enqueue system event + optional immediate heartbeat)
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- `add` expects a full cron job object (same schema as `cron.add` RPC).
|
||||||
|
- `update` uses `{ jobId, patch }`.
|
||||||
|
|
||||||
|
## Parameters (common)
|
||||||
|
|
||||||
|
Gateway-backed tools (`clawdis_canvas`, `clawdis_nodes`, `clawdis_cron`):
|
||||||
|
- `gatewayUrl` (default `ws://127.0.0.1:18789`)
|
||||||
|
- `gatewayToken` (if auth enabled)
|
||||||
|
- `timeoutMs`
|
||||||
|
|
||||||
|
Browser tool:
|
||||||
|
- `controlUrl` (defaults from config)
|
||||||
|
|
||||||
|
## Recommended agent flows
|
||||||
|
|
||||||
|
Browser automation:
|
||||||
|
1) `clawdis_browser` → `status` / `start`
|
||||||
|
2) `snapshot` (ai or aria)
|
||||||
|
3) `act` (click/type/press)
|
||||||
|
4) `screenshot` if you need visual confirmation
|
||||||
|
|
||||||
|
Canvas render:
|
||||||
|
1) `clawdis_canvas` → `present`
|
||||||
|
2) `a2ui_push` (optional)
|
||||||
|
3) `snapshot`
|
||||||
|
|
||||||
|
Node targeting:
|
||||||
|
1) `clawdis_nodes` → `status`
|
||||||
|
2) `describe` on the chosen node
|
||||||
|
3) `notify` / `camera_snap` / `screen_record`
|
||||||
|
|
||||||
|
## Safety
|
||||||
|
|
||||||
|
- Avoid `system.run` (not exposed as a tool).
|
||||||
|
- Respect user consent for camera/screen capture.
|
||||||
|
- Use `status/describe` to ensure permissions before invoking media commands.
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
---
|
|
||||||
name: clawdis-browser
|
|
||||||
description: Control clawd's dedicated browser (tabs, snapshots, actions) via the clawdis CLI.
|
|
||||||
homepage: https://clawdis.ai
|
|
||||||
metadata: {"clawdis":{"emoji":"🧭","requires":{"config":["browser.enabled"]}}}
|
|
||||||
---
|
|
||||||
|
|
||||||
# Clawdis Browser
|
|
||||||
|
|
||||||
Use the clawd-managed Chrome/Chromium instance through `clawdis browser`.
|
|
||||||
Only available when `browser.enabled` is true.
|
|
||||||
|
|
||||||
Core flow
|
|
||||||
- `clawdis browser status`
|
|
||||||
- `clawdis browser start` (if stopped)
|
|
||||||
- `clawdis browser tabs`
|
|
||||||
- `clawdis browser open <url>`
|
|
||||||
|
|
||||||
Inspection
|
|
||||||
- `clawdis browser snapshot --format ai|aria [--limit N]`
|
|
||||||
- `clawdis browser screenshot [--full-page]`
|
|
||||||
|
|
||||||
Actions
|
|
||||||
- `clawdis browser click <ref>`
|
|
||||||
- `clawdis browser type <ref> "text" --submit`
|
|
||||||
- `clawdis browser press Enter`
|
|
||||||
- `clawdis browser navigate <url>`
|
|
||||||
- `clawdis browser wait --text "Done"`
|
|
||||||
|
|
||||||
Notes
|
|
||||||
- This is a dedicated profile; do not use the user's personal browser.
|
|
||||||
- If disabled, ask the user to enable `browser.enabled` in `~/.clawdis/clawdis.json`.
|
|
||||||
- Canvas UI is full-screen with native overlays. Keep critical controls out of the top-left/top-right/bottom edges (leave explicit gutters ~28px top, ~16px sides, ~20px bottom). Do not rely on safe-area insets.
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
---
|
|
||||||
name: clawdis-canvas
|
|
||||||
description: Drive the Clawdis Canvas panel (present, eval, snapshot, A2UI) via the clawdis CLI, including gateway-hosted A2UI surfaces and action bridging.
|
|
||||||
homepage: https://clawdis.ai
|
|
||||||
metadata: {"clawdis":{"emoji":"🎨","always":true}}
|
|
||||||
---
|
|
||||||
|
|
||||||
# Clawdis Canvas
|
|
||||||
|
|
||||||
Use Canvas to render HTML/JS or A2UI surfaces and capture snapshots.
|
|
||||||
|
|
||||||
Core commands
|
|
||||||
- Present: `clawdis canvas present [--node <id>] [--target <path>]`
|
|
||||||
- Hide: `clawdis canvas hide`
|
|
||||||
- Eval JS: `clawdis canvas eval --js "..."`
|
|
||||||
- Snapshot: `clawdis canvas snapshot`
|
|
||||||
|
|
||||||
A2UI
|
|
||||||
- Push JSONL: `clawdis canvas a2ui push --jsonl /path/to/file.jsonl`
|
|
||||||
- Reset: `clawdis canvas a2ui reset`
|
|
||||||
|
|
||||||
Notes
|
|
||||||
- Keep HTML under `~/clawd/canvas` when targeting remote nodes.
|
|
||||||
- Use snapshot after renders to verify UI state.
|
|
||||||
- Treat A2UI as gateway-hosted at `http(s)://<gateway-host>:18789/__clawdis__/a2ui/`.
|
|
||||||
- Rely on `canvas a2ui push/reset` to auto-navigate the Canvas to the gateway-hosted A2UI page.
|
|
||||||
- Expect A2UI to fail if the Gateway does not advertise `canvasHostUrl` or is unreachable:
|
|
||||||
- `A2UI_HOST_NOT_CONFIGURED`
|
|
||||||
- `A2UI_HOST_UNAVAILABLE`
|
|
||||||
|
|
||||||
A2UI quick flow
|
|
||||||
1. Ensure the Gateway is running and reachable from the node.
|
|
||||||
2. Build JSONL with **v0.8** server→client messages (`beginRendering`, `surfaceUpdate`, `dataModelUpdate`, `deleteSurface`).
|
|
||||||
- Do not use v0.9 `createSurface` (unsupported).
|
|
||||||
3. Push JSONL and (optionally) snapshot the result.
|
|
||||||
|
|
||||||
Example JSONL (v0.8)
|
|
||||||
```bash
|
|
||||||
cat > /tmp/a2ui-v0.8.jsonl <<'EOF'
|
|
||||||
{"surfaceUpdate":{"surfaceId":"main","components":[{"id":"root","component":{"Column":{"children":{"explicitList":["title","content"]}}}},{"id":"title","component":{"Text":{"text":{"literalString":"A2UI (v0.8)"},"usageHint":"h1"}}},{"id":"content","component":{"Text":{"text":{"literalString":"If you can read this, A2UI is live."},"usageHint":"body"}}}]}}
|
|
||||||
{"beginRendering":{"surfaceId":"main","root":"root"}}
|
|
||||||
EOF
|
|
||||||
|
|
||||||
clawdis canvas a2ui push --jsonl /tmp/a2ui-v0.8.jsonl --node <id>
|
|
||||||
```
|
|
||||||
|
|
||||||
Action callbacks (A2UI → agent)
|
|
||||||
- A2UI user actions (buttons, etc.) are bridged from the WebView back to the node via `clawdisCanvasA2UIAction`.
|
|
||||||
- Handle them on the agent side as `CANVAS_A2UI` messages (node → gateway → agent).
|
|
||||||
@@ -1,82 +0,0 @@
|
|||||||
---
|
|
||||||
name: clawdis-cron
|
|
||||||
description: Schedule jobs and wakeups via Clawdis Gateway cron.* RPC.
|
|
||||||
homepage: https://clawdis.ai
|
|
||||||
metadata: {"clawdis":{"emoji":"⏰","always":true}}
|
|
||||||
---
|
|
||||||
|
|
||||||
# Clawdis Cron
|
|
||||||
|
|
||||||
Cron runs inside the Gateway. Jobs live in `~/.clawdis/cron/jobs.json` and run logs in `~/.clawdis/cron/runs/<jobId>.jsonl`.
|
|
||||||
|
|
||||||
Enable/disable
|
|
||||||
- Enabled by default.
|
|
||||||
- Disable with config `cron.enabled=false` or env `CLAWDIS_SKIP_CRON=1`.
|
|
||||||
- Config: `cron.store`, `cron.maxConcurrentRuns`.
|
|
||||||
|
|
||||||
Job fields
|
|
||||||
- `name` is required (non-empty).
|
|
||||||
- `description` is optional.
|
|
||||||
|
|
||||||
RPC methods (Gateway WS)
|
|
||||||
- `cron.list`, `cron.status`, `cron.add`, `cron.update`, `cron.remove`, `cron.run`, `cron.runs`
|
|
||||||
- `wake` (enqueue system event + optionally trigger immediate heartbeat)
|
|
||||||
|
|
||||||
Payload rules
|
|
||||||
- `sessionTarget: "main"` requires `payload.kind: "systemEvent"`.
|
|
||||||
- `sessionTarget: "isolated"` requires `payload.kind: "agentTurn"`.
|
|
||||||
|
|
||||||
Examples
|
|
||||||
|
|
||||||
One-shot reminder (main session, immediate wake):
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"method": "cron.add",
|
|
||||||
"params": {
|
|
||||||
"name": "remind-me",
|
|
||||||
"enabled": true,
|
|
||||||
"schedule": { "kind": "at", "atMs": 1734715200000 },
|
|
||||||
"sessionTarget": "main",
|
|
||||||
"wakeMode": "now",
|
|
||||||
"payload": { "kind": "systemEvent", "text": "Remind me in 20 minutes." }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Recurring hourly check (isolated job, no external delivery):
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"method": "cron.add",
|
|
||||||
"params": {
|
|
||||||
"name": "hourly-check",
|
|
||||||
"enabled": true,
|
|
||||||
"schedule": { "kind": "every", "everyMs": 3600000 },
|
|
||||||
"sessionTarget": "isolated",
|
|
||||||
"wakeMode": "now",
|
|
||||||
"payload": { "kind": "agentTurn", "message": "Check battery; report only if < 20%.", "deliver": false },
|
|
||||||
"isolation": { "postToMainPrefix": "Cron" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Cron expression (weekday 07:30):
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"method": "cron.add",
|
|
||||||
"params": {
|
|
||||||
"name": "weekday-wakeup",
|
|
||||||
"enabled": true,
|
|
||||||
"schedule": { "kind": "cron", "expr": "30 7 * * 1-5", "tz": "America/Los_Angeles" },
|
|
||||||
"sessionTarget": "isolated",
|
|
||||||
"wakeMode": "now",
|
|
||||||
"payload": { "kind": "agentTurn", "message": "Wake me up and start music.", "deliver": true, "channel": "whatsapp" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Run history
|
|
||||||
- `cron.runs` returns recent JSONL entries for a job.
|
|
||||||
|
|
||||||
Notes
|
|
||||||
- `wakeMode: "now"` triggers an immediate heartbeat for main jobs.
|
|
||||||
- Isolated jobs run in `cron:<jobId>` sessions and post a summary back to main.
|
|
||||||
@@ -1,91 +0,0 @@
|
|||||||
---
|
|
||||||
name: clawdis-nodes
|
|
||||||
description: Discover, interpret, and target Clawdis nodes (paired devices) via the Gateway/CLI. Use when an agent must find available nodes, choose the best target machine, or reason about presence vs node availability (Tailnet/Tailscale optional).
|
|
||||||
homepage: https://clawdis.ai
|
|
||||||
metadata: {"clawdis":{"emoji":"🛰️"}}
|
|
||||||
---
|
|
||||||
|
|
||||||
# Clawdis Nodes
|
|
||||||
|
|
||||||
Use the node system to target specific devices (macOS node mode, iOS, Android) for canvas/camera/screen/system actions. Use presence to infer which **user machine** is active, then pick the matching node.
|
|
||||||
|
|
||||||
## Quick start
|
|
||||||
|
|
||||||
List known nodes and whether they are paired/connected:
|
|
||||||
```bash
|
|
||||||
clawdis nodes status
|
|
||||||
```
|
|
||||||
|
|
||||||
Inspect a specific node (commands, caps, permissions):
|
|
||||||
```bash
|
|
||||||
clawdis nodes describe --node <idOrNameOrIp>
|
|
||||||
```
|
|
||||||
|
|
||||||
## Node discovery workflow (agent)
|
|
||||||
|
|
||||||
1) **List nodes** with `clawdis nodes status`.
|
|
||||||
2) **Choose a target**:
|
|
||||||
- Prefer `connected` nodes with the capabilities you need.
|
|
||||||
- Use `perms` (permissions map) to avoid asking for actions that will fail.
|
|
||||||
3) **Confirm commands** with `clawdis nodes describe --node …`.
|
|
||||||
4) **Invoke actions** via `clawdis nodes …` (camera, canvas, screen, system).
|
|
||||||
|
|
||||||
If no nodes are connected:
|
|
||||||
- Check pairing: `clawdis nodes pending` / `clawdis nodes list`
|
|
||||||
- Ask the user to open/foreground the node app if the action requires it (canvas/camera/screen on iOS/Android).
|
|
||||||
|
|
||||||
## Presence vs nodes (don’t confuse them)
|
|
||||||
|
|
||||||
**Presence** shows Gateway + connected clients (mac app, WebChat, CLI).
|
|
||||||
**Nodes** are paired devices that expose commands.
|
|
||||||
|
|
||||||
Use presence to infer **where the user is active**, then map that to a node:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
clawdis gateway call system-presence
|
|
||||||
```
|
|
||||||
|
|
||||||
Heuristics:
|
|
||||||
- Pick the presence entry with the smallest `lastInputSeconds` (most active).
|
|
||||||
- Match presence `host` / `deviceFamily` to a node `displayName` / `deviceFamily`.
|
|
||||||
- If multiple matches, ask for clarification or use `nodes describe` to choose.
|
|
||||||
|
|
||||||
Note: CLI connections (`client.mode=cli`) do **not** show up in presence.
|
|
||||||
|
|
||||||
## Tailnet / Tailscale (optional context)
|
|
||||||
|
|
||||||
Node discovery is Gateway‑owned; Tailnet details only matter for reaching the Gateway:
|
|
||||||
- On LAN, the Gateway advertises a Bridge via Bonjour.
|
|
||||||
- Cross‑network, prefer Tailnet MagicDNS or Tailnet IP to reach the Gateway.
|
|
||||||
- Once connected, always target nodes by id/name/IP via the Gateway (not direct).
|
|
||||||
|
|
||||||
## Pairing & approvals
|
|
||||||
|
|
||||||
List pairing requests:
|
|
||||||
```bash
|
|
||||||
clawdis nodes pending
|
|
||||||
```
|
|
||||||
|
|
||||||
Approve/reject:
|
|
||||||
```bash
|
|
||||||
clawdis nodes approve <requestId>
|
|
||||||
clawdis nodes reject <requestId>
|
|
||||||
```
|
|
||||||
|
|
||||||
## Typical agent usages
|
|
||||||
|
|
||||||
Send a notification to a specific Mac node:
|
|
||||||
```bash
|
|
||||||
clawdis nodes notify --node <idOrNameOrIp> --title "Ping" --body "Gateway ready"
|
|
||||||
```
|
|
||||||
|
|
||||||
Capture a node canvas snapshot:
|
|
||||||
```bash
|
|
||||||
clawdis nodes canvas snapshot --node <idOrNameOrIp> --format png
|
|
||||||
```
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
- `NODE_BACKGROUND_UNAVAILABLE`: the node app must be foregrounded (iOS/Android).
|
|
||||||
- Missing permissions in `nodes status`: ask the user to grant permissions in the node app.
|
|
||||||
- No connected nodes: ensure the Gateway is reachable; check tailnet/SSH config if remote.
|
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
---
|
|
||||||
name: clawdis-notify
|
|
||||||
description: Send system notifications to specific Clawdis nodes (macOS computers) via the Gateway and CLI. Use when you need to alert a person or confirm a remote action on a particular machine, or when an agent must push a notification to another computer.
|
|
||||||
homepage: https://clawdis.ai
|
|
||||||
metadata: {"clawdis":{"emoji":"🔔"}}
|
|
||||||
---
|
|
||||||
|
|
||||||
# Clawdis Notify
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
Send local notifications to a specific Clawdis node (currently macOS only) via the Gateway CLI.
|
|
||||||
|
|
||||||
## Quick start
|
|
||||||
|
|
||||||
1) Find a target node.
|
|
||||||
```bash
|
|
||||||
clawdis nodes status
|
|
||||||
clawdis nodes describe --node <idOrNameOrIp>
|
|
||||||
```
|
|
||||||
|
|
||||||
2) Send the notification.
|
|
||||||
```bash
|
|
||||||
clawdis nodes notify --node <idOrNameOrIp> --title "Ping" --body "Gateway ready"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Core command
|
|
||||||
|
|
||||||
`clawdis nodes notify --node <idOrNameOrIp> [--title <text>] [--body <text>] [--sound <name>] [--priority <passive|active|timeSensitive>] [--delivery <system|overlay|auto>]`
|
|
||||||
|
|
||||||
Notes:
|
|
||||||
- Provide at least one of `--title` or `--body`.
|
|
||||||
- `--delivery` defaults to `system`.
|
|
||||||
- Only macOS nodes expose `system.notify` right now.
|
|
||||||
- Notification permission must be granted in the macOS app or the command fails.
|
|
||||||
|
|
||||||
## Multi‑computer usage
|
|
||||||
|
|
||||||
Pick a specific node by id/name/IP, or iterate across nodes:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
for node in $(clawdis nodes status --json | jq -r '.nodes[].id'); do
|
|
||||||
clawdis nodes notify --node "$node" --title "Heads up" --body "Maintenance in 5 minutes"
|
|
||||||
done
|
|
||||||
```
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
- `nodes notify failed: ...` usually means the node is offline, not paired, or missing permission.
|
|
||||||
- If the Gateway is down or unreachable, notifications cannot be delivered.
|
|
||||||
|
|
||||||
## Low‑level fallback (rare)
|
|
||||||
|
|
||||||
If needed, use raw invoke:
|
|
||||||
```bash
|
|
||||||
clawdis nodes invoke \
|
|
||||||
--node <idOrNameOrIp> \
|
|
||||||
--command system.notify \
|
|
||||||
--params '{"title":"Ping","body":"Hello","sound":"Glass","priority":"active","delivery":"system"}'
|
|
||||||
```
|
|
||||||
1272
src/agents/clawdis-tools.ts
Normal file
1272
src/agents/clawdis-tools.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@ import path from "node:path";
|
|||||||
import type { AppMessage } from "@mariozechner/pi-agent-core";
|
import type { AppMessage } from "@mariozechner/pi-agent-core";
|
||||||
import type { AgentToolResult, AssistantMessage } from "@mariozechner/pi-ai";
|
import type { AgentToolResult, AssistantMessage } from "@mariozechner/pi-ai";
|
||||||
|
|
||||||
import { sanitizeContentBlocksImages } from "./pi-tools.js";
|
import { sanitizeContentBlocksImages } from "./tool-images.js";
|
||||||
import type { WorkspaceBootstrapFile } from "./workspace.js";
|
import type { WorkspaceBootstrapFile } from "./workspace.js";
|
||||||
|
|
||||||
export type EmbeddedContextFile = { path: string; content: string };
|
export type EmbeddedContextFile = { path: string; content: string };
|
||||||
|
|||||||
@@ -2,9 +2,10 @@ import type { AgentTool, AgentToolResult } from "@mariozechner/pi-ai";
|
|||||||
import { bashTool, codingTools, readTool } from "@mariozechner/pi-coding-agent";
|
import { bashTool, codingTools, readTool } from "@mariozechner/pi-coding-agent";
|
||||||
import { type TSchema, Type } from "@sinclair/typebox";
|
import { type TSchema, Type } from "@sinclair/typebox";
|
||||||
|
|
||||||
import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js";
|
|
||||||
import { detectMime } from "../media/mime.js";
|
import { detectMime } from "../media/mime.js";
|
||||||
import { startWebLoginWithQr, waitForWebLogin } from "../web/login-qr.js";
|
import { startWebLoginWithQr, waitForWebLogin } from "../web/login-qr.js";
|
||||||
|
import { createClawdisTools } from "./clawdis-tools.js";
|
||||||
|
import { sanitizeToolResultImages } from "./tool-images.js";
|
||||||
|
|
||||||
// TODO(steipete): Remove this wrapper once pi-mono ships file-magic MIME detection
|
// TODO(steipete): Remove this wrapper once pi-mono ships file-magic MIME detection
|
||||||
// for `read` image payloads in `@mariozechner/pi-coding-agent` (then switch back to `codingTools` directly).
|
// for `read` image payloads in `@mariozechner/pi-coding-agent` (then switch back to `codingTools` directly).
|
||||||
@@ -12,14 +13,6 @@ type ToolContentBlock = AgentToolResult<unknown>["content"][number];
|
|||||||
type ImageContentBlock = Extract<ToolContentBlock, { type: "image" }>;
|
type ImageContentBlock = Extract<ToolContentBlock, { type: "image" }>;
|
||||||
type TextContentBlock = Extract<ToolContentBlock, { type: "text" }>;
|
type TextContentBlock = Extract<ToolContentBlock, { type: "text" }>;
|
||||||
|
|
||||||
// Anthropic Messages API limitation (observed in Clawdis sessions):
|
|
||||||
// When sending many images in a single request (e.g. via session history + tool results),
|
|
||||||
// Anthropic rejects any image where *either* dimension exceeds 2000px.
|
|
||||||
//
|
|
||||||
// To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale
|
|
||||||
// all base64 image blocks above this limit while preserving aspect ratio.
|
|
||||||
const MAX_IMAGE_DIMENSION_PX = 2000;
|
|
||||||
|
|
||||||
async function sniffMimeFromBase64(
|
async function sniffMimeFromBase64(
|
||||||
base64: string,
|
base64: string,
|
||||||
): Promise<string | undefined> {
|
): Promise<string | undefined> {
|
||||||
@@ -170,133 +163,6 @@ function createWhatsAppLoginTool(): AnyAgentTool {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function isImageBlock(block: unknown): block is ImageContentBlock {
|
|
||||||
if (!block || typeof block !== "object") return false;
|
|
||||||
const rec = block as Record<string, unknown>;
|
|
||||||
return (
|
|
||||||
rec.type === "image" &&
|
|
||||||
typeof rec.data === "string" &&
|
|
||||||
typeof rec.mimeType === "string"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function isTextBlock(block: unknown): block is TextContentBlock {
|
|
||||||
if (!block || typeof block !== "object") return false;
|
|
||||||
const rec = block as Record<string, unknown>;
|
|
||||||
return rec.type === "text" && typeof rec.text === "string";
|
|
||||||
}
|
|
||||||
|
|
||||||
async function resizeImageBase64IfNeeded(params: {
|
|
||||||
base64: string;
|
|
||||||
mimeType: string;
|
|
||||||
maxDimensionPx: number;
|
|
||||||
}): Promise<{ base64: string; mimeType: string; resized: boolean }> {
|
|
||||||
const buf = Buffer.from(params.base64, "base64");
|
|
||||||
const meta = await getImageMetadata(buf);
|
|
||||||
const width = meta?.width;
|
|
||||||
const height = meta?.height;
|
|
||||||
if (
|
|
||||||
typeof width !== "number" ||
|
|
||||||
typeof height !== "number" ||
|
|
||||||
(width <= params.maxDimensionPx && height <= params.maxDimensionPx)
|
|
||||||
) {
|
|
||||||
return { base64: params.base64, mimeType: params.mimeType, resized: false };
|
|
||||||
}
|
|
||||||
|
|
||||||
const mime = params.mimeType.toLowerCase();
|
|
||||||
let out: Buffer;
|
|
||||||
try {
|
|
||||||
const mod = (await import("sharp")) as unknown as {
|
|
||||||
default?: typeof import("sharp");
|
|
||||||
};
|
|
||||||
const sharp = mod.default ?? (mod as unknown as typeof import("sharp"));
|
|
||||||
const img = sharp(buf, { failOnError: false }).resize({
|
|
||||||
width: params.maxDimensionPx,
|
|
||||||
height: params.maxDimensionPx,
|
|
||||||
fit: "inside",
|
|
||||||
withoutEnlargement: true,
|
|
||||||
});
|
|
||||||
if (mime === "image/jpeg" || mime === "image/jpg") {
|
|
||||||
out = await img.jpeg({ quality: 85 }).toBuffer();
|
|
||||||
} else if (mime === "image/webp") {
|
|
||||||
out = await img.webp({ quality: 85 }).toBuffer();
|
|
||||||
} else if (mime === "image/png") {
|
|
||||||
out = await img.png().toBuffer();
|
|
||||||
} else {
|
|
||||||
out = await img.png().toBuffer();
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// Bun can't load sharp native addons. Fall back to a JPEG conversion.
|
|
||||||
out = await resizeToJpeg({
|
|
||||||
buffer: buf,
|
|
||||||
maxSide: params.maxDimensionPx,
|
|
||||||
quality: 85,
|
|
||||||
withoutEnlargement: true,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const sniffed = await detectMime({ buffer: out.slice(0, 256) });
|
|
||||||
const nextMime = sniffed?.startsWith("image/") ? sniffed : params.mimeType;
|
|
||||||
|
|
||||||
return { base64: out.toString("base64"), mimeType: nextMime, resized: true };
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function sanitizeContentBlocksImages(
|
|
||||||
blocks: ToolContentBlock[],
|
|
||||||
label: string,
|
|
||||||
opts: { maxDimensionPx?: number } = {},
|
|
||||||
): Promise<ToolContentBlock[]> {
|
|
||||||
const maxDimensionPx = Math.max(
|
|
||||||
opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX,
|
|
||||||
1,
|
|
||||||
);
|
|
||||||
const out: ToolContentBlock[] = [];
|
|
||||||
|
|
||||||
for (const block of blocks) {
|
|
||||||
if (!isImageBlock(block)) {
|
|
||||||
out.push(block);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = block.data.trim();
|
|
||||||
if (!data) {
|
|
||||||
out.push({
|
|
||||||
type: "text",
|
|
||||||
text: `[${label}] omitted empty image payload`,
|
|
||||||
} satisfies TextContentBlock);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const resized = await resizeImageBase64IfNeeded({
|
|
||||||
base64: data,
|
|
||||||
mimeType: block.mimeType,
|
|
||||||
maxDimensionPx,
|
|
||||||
});
|
|
||||||
out.push({ ...block, data: resized.base64, mimeType: resized.mimeType });
|
|
||||||
} catch (err) {
|
|
||||||
out.push({
|
|
||||||
type: "text",
|
|
||||||
text: `[${label}] omitted image payload: ${String(err)}`,
|
|
||||||
} satisfies TextContentBlock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function sanitizeToolResultImages(
|
|
||||||
result: AgentToolResult<unknown>,
|
|
||||||
label: string,
|
|
||||||
opts: { maxDimensionPx?: number } = {},
|
|
||||||
): Promise<AgentToolResult<unknown>> {
|
|
||||||
const content = Array.isArray(result.content) ? result.content : [];
|
|
||||||
if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) return result;
|
|
||||||
|
|
||||||
const next = await sanitizeContentBlocksImages(content, label, opts);
|
|
||||||
return { ...result, content: next };
|
|
||||||
}
|
|
||||||
|
|
||||||
function createClawdisReadTool(base: AnyAgentTool): AnyAgentTool {
|
function createClawdisReadTool(base: AnyAgentTool): AnyAgentTool {
|
||||||
return {
|
return {
|
||||||
...base,
|
...base,
|
||||||
@@ -340,5 +206,5 @@ export function createClawdisCodingTools(): AnyAgentTool[] {
|
|||||||
? createClawdisBashTool(tool)
|
? createClawdisBashTool(tool)
|
||||||
: (tool as AnyAgentTool),
|
: (tool as AnyAgentTool),
|
||||||
);
|
);
|
||||||
return [...base, createWhatsAppLoginTool()];
|
return [...base, createWhatsAppLoginTool(), ...createClawdisTools()];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,6 +18,10 @@ export function buildAgentSystemPromptAppend(params: {
|
|||||||
"- find: find files by glob pattern",
|
"- find: find files by glob pattern",
|
||||||
"- ls: list directory contents",
|
"- ls: list directory contents",
|
||||||
"- whatsapp_login: generate a WhatsApp QR code and wait for linking",
|
"- whatsapp_login: generate a WhatsApp QR code and wait for linking",
|
||||||
|
"- clawdis_browser: control clawd's dedicated browser",
|
||||||
|
"- clawdis_canvas: present/eval/snapshot the Canvas",
|
||||||
|
"- clawdis_nodes: list/describe/notify/camera/screen on paired nodes",
|
||||||
|
"- clawdis_cron: manage cron jobs and wake events",
|
||||||
"TOOLS.md does not control tool availability; it is user guidance for how to use external tools.",
|
"TOOLS.md does not control tool availability; it is user guidance for how to use external tools.",
|
||||||
"",
|
"",
|
||||||
"## Workspace",
|
"## Workspace",
|
||||||
|
|||||||
143
src/agents/tool-images.ts
Normal file
143
src/agents/tool-images.ts
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
import type { AgentToolResult } from "@mariozechner/pi-ai";
|
||||||
|
|
||||||
|
import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js";
|
||||||
|
import { detectMime } from "../media/mime.js";
|
||||||
|
|
||||||
|
type ToolContentBlock = AgentToolResult<unknown>["content"][number];
|
||||||
|
type ImageContentBlock = Extract<ToolContentBlock, { type: "image" }>;
|
||||||
|
type TextContentBlock = Extract<ToolContentBlock, { type: "text" }>;
|
||||||
|
|
||||||
|
// Anthropic Messages API limitation (observed in Clawdis sessions):
|
||||||
|
// When sending many images in a single request (e.g. via session history + tool results),
|
||||||
|
// Anthropic rejects any image where *either* dimension exceeds 2000px.
|
||||||
|
//
|
||||||
|
// To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale
|
||||||
|
// all base64 image blocks above this limit while preserving aspect ratio.
|
||||||
|
const MAX_IMAGE_DIMENSION_PX = 2000;
|
||||||
|
|
||||||
|
function isImageBlock(block: unknown): block is ImageContentBlock {
|
||||||
|
if (!block || typeof block !== "object") return false;
|
||||||
|
const rec = block as Record<string, unknown>;
|
||||||
|
return (
|
||||||
|
rec.type === "image" &&
|
||||||
|
typeof rec.data === "string" &&
|
||||||
|
typeof rec.mimeType === "string"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isTextBlock(block: unknown): block is TextContentBlock {
|
||||||
|
if (!block || typeof block !== "object") return false;
|
||||||
|
const rec = block as Record<string, unknown>;
|
||||||
|
return rec.type === "text" && typeof rec.text === "string";
|
||||||
|
}
|
||||||
|
|
||||||
|
async function resizeImageBase64IfNeeded(params: {
|
||||||
|
base64: string;
|
||||||
|
mimeType: string;
|
||||||
|
maxDimensionPx: number;
|
||||||
|
}): Promise<{ base64: string; mimeType: string; resized: boolean }> {
|
||||||
|
const buf = Buffer.from(params.base64, "base64");
|
||||||
|
const meta = await getImageMetadata(buf);
|
||||||
|
const width = meta?.width;
|
||||||
|
const height = meta?.height;
|
||||||
|
if (
|
||||||
|
typeof width !== "number" ||
|
||||||
|
typeof height !== "number" ||
|
||||||
|
(width <= params.maxDimensionPx && height <= params.maxDimensionPx)
|
||||||
|
) {
|
||||||
|
return { base64: params.base64, mimeType: params.mimeType, resized: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
const mime = params.mimeType.toLowerCase();
|
||||||
|
let out: Buffer;
|
||||||
|
try {
|
||||||
|
const mod = (await import("sharp")) as unknown as {
|
||||||
|
default?: typeof import("sharp");
|
||||||
|
};
|
||||||
|
const sharp = mod.default ?? (mod as unknown as typeof import("sharp"));
|
||||||
|
const img = sharp(buf, { failOnError: false }).resize({
|
||||||
|
width: params.maxDimensionPx,
|
||||||
|
height: params.maxDimensionPx,
|
||||||
|
fit: "inside",
|
||||||
|
withoutEnlargement: true,
|
||||||
|
});
|
||||||
|
if (mime === "image/jpeg" || mime === "image/jpg") {
|
||||||
|
out = await img.jpeg({ quality: 85 }).toBuffer();
|
||||||
|
} else if (mime === "image/webp") {
|
||||||
|
out = await img.webp({ quality: 85 }).toBuffer();
|
||||||
|
} else if (mime === "image/png") {
|
||||||
|
out = await img.png().toBuffer();
|
||||||
|
} else {
|
||||||
|
out = await img.png().toBuffer();
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Bun can't load sharp native addons. Fall back to a JPEG conversion.
|
||||||
|
out = await resizeToJpeg({
|
||||||
|
buffer: buf,
|
||||||
|
maxSide: params.maxDimensionPx,
|
||||||
|
quality: 85,
|
||||||
|
withoutEnlargement: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const sniffed = await detectMime({ buffer: out.slice(0, 256) });
|
||||||
|
const nextMime = sniffed?.startsWith("image/") ? sniffed : params.mimeType;
|
||||||
|
|
||||||
|
return { base64: out.toString("base64"), mimeType: nextMime, resized: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function sanitizeContentBlocksImages(
|
||||||
|
blocks: ToolContentBlock[],
|
||||||
|
label: string,
|
||||||
|
opts: { maxDimensionPx?: number } = {},
|
||||||
|
): Promise<ToolContentBlock[]> {
|
||||||
|
const maxDimensionPx = Math.max(
|
||||||
|
opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX,
|
||||||
|
1,
|
||||||
|
);
|
||||||
|
const out: ToolContentBlock[] = [];
|
||||||
|
|
||||||
|
for (const block of blocks) {
|
||||||
|
if (!isImageBlock(block)) {
|
||||||
|
out.push(block);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = block.data.trim();
|
||||||
|
if (!data) {
|
||||||
|
out.push({
|
||||||
|
type: "text",
|
||||||
|
text: `[${label}] omitted empty image payload`,
|
||||||
|
} satisfies TextContentBlock);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const resized = await resizeImageBase64IfNeeded({
|
||||||
|
base64: data,
|
||||||
|
mimeType: block.mimeType,
|
||||||
|
maxDimensionPx,
|
||||||
|
});
|
||||||
|
out.push({ ...block, data: resized.base64, mimeType: resized.mimeType });
|
||||||
|
} catch (err) {
|
||||||
|
out.push({
|
||||||
|
type: "text",
|
||||||
|
text: `[${label}] omitted image payload: ${String(err)}`,
|
||||||
|
} satisfies TextContentBlock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function sanitizeToolResultImages(
|
||||||
|
result: AgentToolResult<unknown>,
|
||||||
|
label: string,
|
||||||
|
opts: { maxDimensionPx?: number } = {},
|
||||||
|
): Promise<AgentToolResult<unknown>> {
|
||||||
|
const content = Array.isArray(result.content) ? result.content : [];
|
||||||
|
if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) return result;
|
||||||
|
|
||||||
|
const next = await sanitizeContentBlocksImages(content, label, opts);
|
||||||
|
return { ...result, content: next };
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user