feat(browser): add clawd browser control

This commit is contained in:
Peter Steinberger
2025-12-13 15:15:09 +00:00
parent 4cdb21c5cd
commit 208ba02a4a
16 changed files with 1553 additions and 0 deletions

View File

@@ -37,6 +37,7 @@ Because every space lobster needs a time-and-space machine. The Doctor has a TAR
- 🤖 **AI Agent Gateway** — Pi only (Pi CLI in RPC mode)
- 💬 **Session Management** — Per-sender conversation context
- 🔔 **Heartbeats** — Periodic check-ins for proactive AI
- 🧭 **Clawd Browser** — Dedicated Chrome/Chromium profile with tabs + screenshot control (no interference with your daily browser)
- 👥 **Group Chat Support** — Mention-based triggering
- 📎 **Media Support** — Images, audio, documents, voice notes
- 🎤 **Voice Transcription** — Whisper integration
@@ -111,6 +112,18 @@ Create `~/.clawdis/clawdis.json`:
}
```
Optional: enable/configure clawds dedicated browser control (defaults are already on):
```json5
{
browser: {
enabled: true,
controlUrl: "http://127.0.0.1:18790",
color: "#FF4500"
}
}
```
## Documentation
- [Configuration Guide](./docs/configuration.md)
@@ -148,6 +161,7 @@ Bot-mode support (grammY only) shares the same `main` session as WhatsApp/WebCha
| `clawdis login` | Link WhatsApp Web via QR |
| `clawdis send` | Send a message (WhatsApp default; `--provider telegram` for bot mode). Always uses the Gateway WS; requires a running gateway. |
| `clawdis agent` | Talk directly to the agent (no WhatsApp send) |
| `clawdis browser ...` | Manage clawds dedicated browser (status/tabs/open/screenshot). |
| `clawdis gateway` | Start the Gateway server (WS control plane). Params: `--port`, `--token`, `--force`, `--verbose`. |
| `clawdis gateway health|status|send|agent|call` | Gateway WS clients; assume a running gateway. |
| `clawdis wake` | Enqueue a system event and optionally trigger a heartbeat via the Gateway. |

View File

@@ -18,6 +18,12 @@ struct ConfigSettings: View {
@AppStorage(webChatEnabledKey) private var webChatEnabled: Bool = true
@AppStorage(webChatPortKey) private var webChatPort: Int = 18788
// clawd browser settings (stored in ~/.clawdis/clawdis.json under "browser")
@State private var browserEnabled: Bool = true
@State private var browserControlUrl: String = "http://127.0.0.1:18790"
@State private var browserColorHex: String = "#FF4500"
@State private var browserAttachOnly: Bool = false
var body: some View {
VStack(alignment: .leading, spacing: 14) {
Text("Clawdis CLI config")
@@ -120,6 +126,56 @@ struct ConfigSettings: View {
}
}
Divider().padding(.vertical, 4)
LabeledContent("Browser (clawd)") {
VStack(alignment: .leading, spacing: 8) {
Toggle("Enable clawd browser control", isOn: self.$browserEnabled)
.toggleStyle(.switch)
.frame(width: 360, alignment: .leading)
.onChange(of: self.browserEnabled) { _, _ in self.autosaveConfig() }
HStack(spacing: 8) {
Text("Control URL")
TextField("http://127.0.0.1:18790", text: self.$browserControlUrl)
.textFieldStyle(.roundedBorder)
.frame(width: 320)
.disabled(!self.browserEnabled)
.onChange(of: self.browserControlUrl) { _, _ in self.autosaveConfig() }
}
HStack(spacing: 8) {
Text("Accent")
TextField("#FF4500", text: self.$browserColorHex)
.textFieldStyle(.roundedBorder)
.frame(width: 120)
.disabled(!self.browserEnabled)
.onChange(of: self.browserColorHex) { _, _ in self.autosaveConfig() }
Circle()
.fill(self.browserColor)
.frame(width: 12, height: 12)
.overlay(Circle().stroke(Color.secondary.opacity(0.25), lineWidth: 1))
Text("lobster-orange")
.font(.footnote)
.foregroundStyle(.secondary)
}
Toggle("Attach only (never launch)", isOn: self.$browserAttachOnly)
.toggleStyle(.switch)
.frame(width: 360, alignment: .leading)
.disabled(!self.browserEnabled)
.onChange(of: self.browserAttachOnly) { _, _ in self.autosaveConfig() }
.help("When enabled, the browser server will only connect if the clawd browser is already running.")
Text(
"Clawd uses a separate Chrome profile and ports (default 18790/18791) so it wont interfere with your daily browser."
)
.font(.footnote)
.foregroundStyle(.secondary)
.frame(maxWidth: 480, alignment: .leading)
}
}
Spacer()
}
.frame(maxWidth: .infinity, alignment: .leading)
@@ -153,6 +209,7 @@ struct ConfigSettings: View {
let agent = reply?["agent"] as? [String: Any]
let heartbeatMinutes = reply?["heartbeatMinutes"] as? Int
let heartbeatBody = reply?["heartbeatBody"] as? String
let browser = parsed["browser"] as? [String: Any]
let loadedModel = (agent?["model"] as? String) ?? ""
if !loadedModel.isEmpty {
@@ -165,6 +222,13 @@ struct ConfigSettings: View {
if let heartbeatMinutes { self.heartbeatMinutes = heartbeatMinutes }
if let heartbeatBody, !heartbeatBody.isEmpty { self.heartbeatBody = heartbeatBody }
if let browser {
if let enabled = browser["enabled"] as? Bool { self.browserEnabled = enabled }
if let url = browser["controlUrl"] as? String, !url.isEmpty { self.browserControlUrl = url }
if let color = browser["color"] as? String, !color.isEmpty { self.browserColorHex = color }
if let attachOnly = browser["attachOnly"] as? Bool { self.browserAttachOnly = attachOnly }
}
}
private func autosaveConfig() {
@@ -181,6 +245,7 @@ struct ConfigSettings: View {
var inbound = root["inbound"] as? [String: Any] ?? [:]
var reply = inbound["reply"] as? [String: Any] ?? [:]
var agent = reply["agent"] as? [String: Any] ?? [:]
var browser = root["browser"] as? [String: Any] ?? [:]
let chosenModel = (self.configModel == "__custom__" ? self.customModel : self.configModel)
.trimmingCharacters(in: .whitespacesAndNewlines)
@@ -201,6 +266,14 @@ struct ConfigSettings: View {
inbound["reply"] = reply
root["inbound"] = inbound
browser["enabled"] = self.browserEnabled
let trimmedUrl = self.browserControlUrl.trimmingCharacters(in: .whitespacesAndNewlines)
if !trimmedUrl.isEmpty { browser["controlUrl"] = trimmedUrl }
let trimmedColor = self.browserColorHex.trimmingCharacters(in: .whitespacesAndNewlines)
if !trimmedColor.isEmpty { browser["color"] = trimmedColor }
browser["attachOnly"] = self.browserAttachOnly
root["browser"] = browser
do {
let data = try JSONSerialization.data(withJSONObject: root, options: [.prettyPrinted, .sortedKeys])
let url = self.configURL()
@@ -217,6 +290,16 @@ struct ConfigSettings: View {
return (try? JSONSerialization.jsonObject(with: data) as? [String: Any]) ?? [:]
}
private var browserColor: Color {
let raw = self.browserColorHex.trimmingCharacters(in: .whitespacesAndNewlines)
let hex = raw.hasPrefix("#") ? String(raw.dropFirst()) : raw
guard hex.count == 6, let value = Int(hex, radix: 16) else { return .orange }
let r = Double((value >> 16) & 0xFF) / 255.0
let g = Double((value >> 8) & 0xFF) / 255.0
let b = Double(value & 0xFF) / 255.0
return Color(red: r, green: g, blue: b)
}
private func loadModels() async {
guard !self.modelsLoading else { return }
self.modelsLoading = true

View File

@@ -34,3 +34,4 @@ read_when:
- Run installs from the Tools tab; it hides the button if a tool is already present.
- For MCPs, mcporter writes to the home-scope config; re-run installs if you rotate tokens.
- Keep heartbeats enabled so the assistant can schedule reminders, monitor inboxes, and trigger camera captures.
- For browser-driven verification, use `clawdis browser` (tabs/status/screenshot) with the clawd-managed Chrome profile.

View File

@@ -86,6 +86,13 @@ RPC mode emits structured tool lifecycle events (start/result) and assistant out
- logged to `/tmp/clawdis/…`
- streamed over the Gateway WS to clients like WebChat and the macOS app
## Browser helpers
If you enable the clawd-managed browser (default on), the agent can use:
- `clawdis browser status` / `tabs` / `open <url>` / `screenshot [targetId]`
This uses a dedicated Chrome/Chromium profile (lobster-orange by default) so it doesnt interfere with your daily browser.
---
*Next: [Group Chats](./group-messages.md)* 🦞

View File

@@ -103,6 +103,28 @@ Example command-mode config:
}
```
### `browser` (clawd-managed Chrome)
Clawdis can start a **dedicated, isolated** Chrome/Chromium instance for clawd and expose a small loopback control server.
Defaults:
- enabled: `true`
- control URL: `http://127.0.0.1:18790` (CDP uses `18791`)
- profile color: `#FF4500` (lobster-orange)
```json5
{
browser: {
enabled: true,
controlUrl: "http://127.0.0.1:18790",
color: "#FF4500",
// Advanced:
// headless: false,
// attachOnly: false,
}
}
```
## Template variables
Template placeholders are expanded in `inbound.reply.command`, `sessionIntro`, `bodyPrefix`, and other templated strings.

150
docs/mac/browser.md Normal file
View File

@@ -0,0 +1,150 @@
---
summary: "Spec: clawd-managed Chrome/Chromium instance (separate profile, lobster-orange, tab management)"
read_when:
- Adding agent-controlled browser automation
- Debugging why clawd is interfering with your own Chrome
- Implementing browser settings + lifecycle in the macOS app
---
# Browser (macOS app) — clawd-managed Chrome
Status: draft spec · Date: 2025-12-13
Goal: give the **clawd** persona its own browser that is:
- Visually distinct (lobster-orange, profile labeled “clawd”).
- Fully agent-manageable (start/stop, list tabs, focus/close tabs, open URLs, screenshot).
- Non-interfering with the users own browser (separate profile + dedicated ports).
This doc covers the macOS app/gateway side. It intentionally does not mandate Playwright vs Puppeteer yet; the key is the **contract** and the **separation guarantees**.
## User-facing settings
Add a dedicated settings section (preferably under **Tools** or its own “Browser” tab):
- **Enable clawd browser** (`default: on`)
- When off: no browser is launched, and browser tools return “disabled”.
- **Browser control URL** (`default: http://127.0.0.1:18790`)
- Interpreted as the base URL of the local/remote browser-control server.
- If the URL host is not loopback, Clawdis must **not** attempt to launch a local browser; it only connects.
- **Accent color** (`default: #FF4500`, “lobster-orange”)
- Used to theme the clawd browser profile (best-effort) and to tint UI indicators in Clawdis.
Optional (advanced, can be hidden behind Debug initially):
- **Use headless browser** (`default: off`)
- **Attach to existing only** (`default: off`) — if on, never launch; only connect if already running.
### Port convention
Clawdis already uses:
- Gateway WebSocket: `18789`
- WebChat HTTP: `18788`
For the clawd browser-control server, use “family” ports:
- Browser control HTTP API: `18790` (gateway + 1)
- Browser CDP/debugging port: `18791` (control + 1)
The user usually only configures the **control URL** (port `18790`). CDP is an internal detail.
## Browser isolation guarantees (non-negotiable)
1) **Dedicated user data dir**
- Never attach to or reuse the users default Chrome profile.
- Store clawd browser state under an app-owned directory, e.g.:
- `~/Library/Application Support/Clawdis/browser/clawd/` (mac app)
- or `~/.clawdis/browser/clawd/` (gateway/CLI)
2) **Dedicated ports**
- Never use `9222` (reserved for ad-hoc dev workflows; avoids colliding with `agent-tools/browser-tools`).
- Default ports are `18790/18791` unless overridden.
3) **Named tab/page management**
- The agent must be able to enumerate and target tabs deterministically (by stable `targetId` or equivalent), not “last tab”.
## Browser selection (macOS)
On startup (when enabled + local URL), Clawdis chooses the browser executable in this order:
1) **Google Chrome Canary** (if installed)
2) **Chromium** (if installed)
3) **Google Chrome** (fallback)
Implementation detail: detection is by existence of the `.app` bundle under `/Applications` (and optionally `~/Applications`), then using the resolved executable path.
Rationale:
- Canary/Chromium are easy to visually distinguish from the users daily driver.
- Chrome fallback ensures the feature works on a stock machine.
## Visual differentiation (“lobster-orange”)
The clawd browser should be obviously different at a glance:
- Profile name: **clawd**
- Profile color: **#FF4500**
Preferred behavior:
- Seed/patch the profiles preferences on first launch so the color + name persist.
Fallback behavior:
- If preferences patching is not reliable, open with the dedicated profile and let the user set the profile color/name once via Chrome UI; it must persist because the `userDataDir` is persistent.
## Control server contract (proposed)
Expose a small local HTTP API (and/or gateway RPC surface) so the agent can manage state without touching the users Chrome.
Minimum endpoints/methods (names illustrative):
- `browser.status`
- returns: `{ enabled, url, running, pid?, version?, chosenBrowser?, userDataDir?, ports: { control, cdp } }`
- `browser.start`
- starts the browser-control server + browser (no-op if already running)
- `browser.stop`
- stops the server and closes the clawd browser (best-effort; graceful first, then force if needed)
- `browser.tabs.list`
- returns: array of `{ targetId, title, url, isActive, lastFocusedAt? }`
- `browser.tabs.open`
- params: `{ url, newTab?: true }` → returns `{ targetId }`
- `browser.tabs.focus`
- params: `{ targetId }`
- `browser.tabs.close`
- params: `{ targetId }`
- `browser.screenshot`
- params: `{ targetId?, fullPage?: false }` → returns a `MEDIA:` attachment URL (via the existing Clawdis media host)
Nice-to-have (later):
- `browser.snapshot.aria` (AI-friendly snapshot with stable refs)
- `browser.click` / `browser.type` / `browser.waitFor` helpers built atop snapshot refs
### “Is it open or closed?”
“Open” means:
- the control server is reachable at the configured URL **and**
- it reports a live browser connection.
“Closed” means:
- control server not reachable, or server reports no browser.
Clawdis should treat “open/closed” as a health check (fast path), not by scanning global Chrome processes (avoid false positives).
## Interaction with the agent (clawd)
The agent should use browser tools only when:
- enabled in settings
- control URL is configured
If disabled, tools must fail fast with a friendly error (“Browser disabled in settings”).
The agent should not assume tabs are ephemeral. It should:
- call `browser.tabs.list` to discover existing tabs first
- reuse an existing tab when appropriate (e.g. a persistent “main” tab)
- avoid opening duplicate tabs unless asked
## Security & privacy notes
- The clawd browser profile is app-owned; it may contain logged-in sessions. Treat it as sensitive data.
- The control server must bind to loopback only by default (`127.0.0.1`) unless the user explicitly configures a non-loopback URL.
- Never reuse or copy the users default Chrome profile.
## Non-goals (for the first cut)
- Cross-device “sync” of tabs between Mac and Pi.
- Sharing the users logged-in Chrome sessions automatically.
- General-purpose web scraping; this is primarily for “close-the-loop” verification and interaction.

108
src/browser/cdp.ts Normal file
View File

@@ -0,0 +1,108 @@
import WebSocket from "ws";
type CdpResponse = {
id: number;
result?: unknown;
error?: { message?: string };
};
type Pending = {
resolve: (value: unknown) => void;
reject: (err: Error) => void;
};
export async function captureScreenshotPng(opts: {
wsUrl: string;
fullPage?: boolean;
}): Promise<Buffer> {
const ws = new WebSocket(opts.wsUrl, { handshakeTimeout: 5000 });
let nextId = 1;
const pending = new Map<number, Pending>();
const send = (method: string, params?: Record<string, unknown>) => {
const id = nextId++;
const msg = { id, method, params };
ws.send(JSON.stringify(msg));
return new Promise<unknown>((resolve, reject) => {
pending.set(id, { resolve, reject });
});
};
const closeWithError = (err: Error) => {
for (const [, p] of pending) p.reject(err);
pending.clear();
try {
ws.close();
} catch {
// ignore
}
};
const openPromise = new Promise<void>((resolve, reject) => {
ws.once("open", () => resolve());
ws.once("error", (err) => reject(err));
});
ws.on("message", (data) => {
try {
const parsed = JSON.parse(String(data)) as CdpResponse;
if (typeof parsed.id !== "number") return;
const p = pending.get(parsed.id);
if (!p) return;
pending.delete(parsed.id);
if (parsed.error?.message) {
p.reject(new Error(parsed.error.message));
return;
}
p.resolve(parsed.result);
} catch {
// ignore
}
});
ws.on("close", () => {
closeWithError(new Error("CDP socket closed"));
});
await openPromise;
await send("Page.enable");
let clip:
| { x: number; y: number; width: number; height: number; scale: number }
| undefined;
if (opts.fullPage) {
const metrics = (await send("Page.getLayoutMetrics")) as {
cssContentSize?: { width?: number; height?: number };
contentSize?: { width?: number; height?: number };
};
const size = metrics?.cssContentSize ?? metrics?.contentSize;
const width = Number(size?.width ?? 0);
const height = Number(size?.height ?? 0);
if (width > 0 && height > 0) {
clip = { x: 0, y: 0, width, height, scale: 1 };
}
}
const result = (await send("Page.captureScreenshot", {
format: "png",
fromSurface: true,
captureBeyondViewport: true,
...(clip ? { clip } : {}),
})) as { data?: string };
const base64 = result?.data;
if (!base64) {
closeWithError(new Error("Screenshot failed: missing data"));
throw new Error("Screenshot failed: missing data");
}
try {
ws.close();
} catch {
// ignore
}
return Buffer.from(base64, "base64");
}

346
src/browser/chrome.ts Normal file
View File

@@ -0,0 +1,346 @@
import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process";
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { ensurePortAvailable } from "../infra/ports.js";
import { logInfo, logWarn } from "../logger.js";
import { defaultRuntime, type RuntimeEnv } from "../runtime.js";
import { CONFIG_DIR } from "../utils.js";
import type { ResolvedBrowserConfig } from "./config.js";
import {
DEFAULT_CLAWD_BROWSER_COLOR,
DEFAULT_CLAWD_BROWSER_PROFILE_NAME,
} from "./constants.js";
export type BrowserExecutable = {
kind: "canary" | "chromium" | "chrome";
path: string;
};
export type RunningChrome = {
pid: number;
exe: BrowserExecutable;
userDataDir: string;
cdpPort: number;
startedAt: number;
proc: ChildProcessWithoutNullStreams;
};
function exists(filePath: string) {
try {
return fs.existsSync(filePath);
} catch {
return false;
}
}
export function findChromeExecutableMac(): BrowserExecutable | null {
const candidates: Array<BrowserExecutable> = [
{
kind: "canary",
path: "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
},
{
kind: "canary",
path: path.join(
os.homedir(),
"Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
),
},
{
kind: "chromium",
path: "/Applications/Chromium.app/Contents/MacOS/Chromium",
},
{
kind: "chromium",
path: path.join(
os.homedir(),
"Applications/Chromium.app/Contents/MacOS/Chromium",
),
},
{
kind: "chrome",
path: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
},
{
kind: "chrome",
path: path.join(
os.homedir(),
"Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
),
},
];
for (const candidate of candidates) {
if (exists(candidate.path)) return candidate;
}
return null;
}
export function resolveClawdUserDataDir() {
return path.join(
CONFIG_DIR,
"browser",
DEFAULT_CLAWD_BROWSER_PROFILE_NAME,
"user-data",
);
}
function decoratedMarkerPath(userDataDir: string) {
return path.join(userDataDir, ".clawd-profile-decorated");
}
function safeReadJson(filePath: string): Record<string, unknown> | null {
try {
if (!exists(filePath)) return null;
const raw = fs.readFileSync(filePath, "utf-8");
const parsed = JSON.parse(raw) as unknown;
if (typeof parsed !== "object" || parsed === null) return null;
return parsed as Record<string, unknown>;
} catch {
return null;
}
}
function safeWriteJson(filePath: string, data: Record<string, unknown>) {
fs.mkdirSync(path.dirname(filePath), { recursive: true });
fs.writeFileSync(filePath, JSON.stringify(data, null, 2));
}
function setDeep(obj: Record<string, unknown>, keys: string[], value: unknown) {
let node: Record<string, unknown> = obj;
for (const key of keys.slice(0, -1)) {
const next = node[key];
if (typeof next !== "object" || next === null || Array.isArray(next)) {
node[key] = {};
}
node = node[key] as Record<string, unknown>;
}
node[keys[keys.length - 1] ?? ""] = value;
}
/**
* Best-effort profile decoration (name + lobster-orange). Chrome preference keys
* vary by version; we keep this conservative and idempotent.
*/
export function decorateClawdProfile(
userDataDir: string,
opts?: { color?: string },
) {
const desiredName = DEFAULT_CLAWD_BROWSER_PROFILE_NAME;
const desiredColor = (
opts?.color ?? DEFAULT_CLAWD_BROWSER_COLOR
).toUpperCase();
const localStatePath = path.join(userDataDir, "Local State");
const preferencesPath = path.join(userDataDir, "Default", "Preferences");
const localState = safeReadJson(localStatePath) ?? {};
// Common-ish shape: profile.info_cache.Default
setDeep(
localState,
["profile", "info_cache", "Default", "name"],
desiredName,
);
setDeep(
localState,
["profile", "info_cache", "Default", "shortcut_name"],
desiredName,
);
setDeep(
localState,
["profile", "info_cache", "Default", "user_name"],
desiredName,
);
// Color keys are best-effort (Chrome changes these frequently).
setDeep(
localState,
["profile", "info_cache", "Default", "profile_color"],
desiredColor,
);
setDeep(
localState,
["profile", "info_cache", "Default", "user_color"],
desiredColor,
);
safeWriteJson(localStatePath, localState);
const prefs = safeReadJson(preferencesPath) ?? {};
setDeep(prefs, ["profile", "name"], desiredName);
setDeep(prefs, ["profile", "profile_color"], desiredColor);
setDeep(prefs, ["profile", "user_color"], desiredColor);
safeWriteJson(preferencesPath, prefs);
try {
fs.writeFileSync(
decoratedMarkerPath(userDataDir),
`${Date.now()}\n`,
"utf-8",
);
} catch {
// ignore
}
}
export async function isChromeReachable(
cdpPort: number,
timeoutMs = 500,
): Promise<boolean> {
const ctrl = new AbortController();
const t = setTimeout(() => ctrl.abort(), timeoutMs);
try {
const res = await fetch(`http://127.0.0.1:${cdpPort}/json/version`, {
signal: ctrl.signal,
});
return res.ok;
} catch {
return false;
} finally {
clearTimeout(t);
}
}
export async function launchClawdChrome(
resolved: ResolvedBrowserConfig,
runtime: RuntimeEnv = defaultRuntime,
): Promise<RunningChrome> {
await ensurePortAvailable(resolved.cdpPort);
const exe = process.platform === "darwin" ? findChromeExecutableMac() : null;
if (!exe) {
throw new Error(
"No supported browser found (Chrome Canary/Chromium/Chrome on macOS).",
);
}
const userDataDir = resolveClawdUserDataDir();
fs.mkdirSync(userDataDir, { recursive: true });
const marker = decoratedMarkerPath(userDataDir);
const needsDecorate = !exists(marker);
// First launch to create preference files if missing, then decorate and relaunch.
const spawnOnce = () => {
const args: string[] = [
`--remote-debugging-port=${resolved.cdpPort}`,
`--user-data-dir=${userDataDir}`,
"--no-first-run",
"--no-default-browser-check",
"--disable-sync",
"--disable-background-networking",
"--disable-component-update",
"--disable-features=Translate,MediaRouter",
"--password-store=basic",
];
if (resolved.headless) {
// Best-effort; older Chromes may ignore.
args.push("--headless=new");
args.push("--disable-gpu");
}
// Always open a blank tab to ensure a target exists.
args.push("about:blank");
return spawn(exe.path, args, {
stdio: "pipe",
env: {
...process.env,
// Reduce accidental sharing with the user's env.
HOME: os.homedir(),
},
});
};
const startedAt = Date.now();
let proc = spawnOnce();
// If this is the first run, let Chrome create prefs, then decorate + restart.
if (needsDecorate) {
const deadline = Date.now() + 5000;
while (Date.now() < deadline) {
const localStatePath = path.join(userDataDir, "Local State");
const preferencesPath = path.join(userDataDir, "Default", "Preferences");
if (exists(localStatePath) && exists(preferencesPath)) break;
await new Promise((r) => setTimeout(r, 100));
}
try {
proc.kill("SIGTERM");
} catch {
// ignore
}
await new Promise((r) => setTimeout(r, 300));
try {
decorateClawdProfile(userDataDir, { color: resolved.color });
logInfo(
`🦞 clawd browser profile decorated (${resolved.color})`,
runtime,
);
} catch (err) {
logWarn(
`clawd browser profile decoration failed: ${String(err)}`,
runtime,
);
}
proc = spawnOnce();
}
// Wait for CDP to come up.
const readyDeadline = Date.now() + 15_000;
while (Date.now() < readyDeadline) {
if (await isChromeReachable(resolved.cdpPort, 500)) break;
await new Promise((r) => setTimeout(r, 200));
}
if (!(await isChromeReachable(resolved.cdpPort, 500))) {
try {
proc.kill("SIGKILL");
} catch {
// ignore
}
throw new Error(`Failed to start Chrome CDP on port ${resolved.cdpPort}.`);
}
const pid = proc.pid ?? -1;
logInfo(
`🦞 clawd browser started (${exe.kind}) on 127.0.0.1:${resolved.cdpPort} (pid ${pid})`,
runtime,
);
return {
pid,
exe,
userDataDir,
cdpPort: resolved.cdpPort,
startedAt,
proc,
};
}
export async function stopClawdChrome(
running: RunningChrome,
timeoutMs = 2500,
) {
const proc = running.proc;
if (proc.killed) return;
try {
proc.kill("SIGTERM");
} catch {
// ignore
}
const start = Date.now();
while (Date.now() - start < timeoutMs) {
if (!proc.exitCode && proc.killed) break;
if (!(await isChromeReachable(running.cdpPort, 200))) return;
await new Promise((r) => setTimeout(r, 100));
}
try {
proc.kill("SIGKILL");
} catch {
// ignore
}
}

122
src/browser/client.ts Normal file
View File

@@ -0,0 +1,122 @@
import { loadConfig } from "../config/config.js";
import { resolveBrowserConfig } from "./config.js";
export type BrowserStatus = {
enabled: boolean;
controlUrl: string;
running: boolean;
pid: number | null;
cdpPort: number;
chosenBrowser: string | null;
userDataDir: string | null;
color: string;
headless: boolean;
attachOnly: boolean;
};
export type BrowserTab = {
targetId: string;
title: string;
url: string;
type?: string;
};
export type ScreenshotResult = {
ok: true;
path: string;
targetId: string;
url: string;
};
async function fetchJson<T>(
url: string,
init?: RequestInit & { timeoutMs?: number },
): Promise<T> {
const timeoutMs = init?.timeoutMs ?? 5000;
const ctrl = new AbortController();
const t = setTimeout(() => ctrl.abort(), timeoutMs);
const res = await fetch(url, { ...init, signal: ctrl.signal } as RequestInit);
clearTimeout(t);
if (!res.ok) {
const text = await res.text().catch(() => "");
throw new Error(text ? `${res.status}: ${text}` : `HTTP ${res.status}`);
}
return (await res.json()) as T;
}
export function resolveBrowserControlUrl(overrideUrl?: string) {
const cfg = loadConfig();
const resolved = resolveBrowserConfig(cfg.browser);
const url = overrideUrl?.trim() ? overrideUrl.trim() : resolved.controlUrl;
return url.replace(/\/$/, "");
}
export async function browserStatus(baseUrl: string): Promise<BrowserStatus> {
return await fetchJson<BrowserStatus>(`${baseUrl}/`, { timeoutMs: 1500 });
}
export async function browserStart(baseUrl: string): Promise<void> {
await fetchJson(`${baseUrl}/start`, { method: "POST", timeoutMs: 15000 });
}
export async function browserStop(baseUrl: string): Promise<void> {
await fetchJson(`${baseUrl}/stop`, { method: "POST", timeoutMs: 15000 });
}
export async function browserTabs(baseUrl: string): Promise<BrowserTab[]> {
const res = await fetchJson<{ running: boolean; tabs: BrowserTab[] }>(
`${baseUrl}/tabs`,
{ timeoutMs: 3000 },
);
return res.tabs ?? [];
}
export async function browserOpenTab(
baseUrl: string,
url: string,
): Promise<BrowserTab> {
return await fetchJson<BrowserTab>(`${baseUrl}/tabs/open`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ url }),
timeoutMs: 15000,
});
}
export async function browserFocusTab(
baseUrl: string,
targetId: string,
): Promise<void> {
await fetchJson(`${baseUrl}/tabs/focus`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ targetId }),
timeoutMs: 5000,
});
}
export async function browserCloseTab(
baseUrl: string,
targetId: string,
): Promise<void> {
await fetchJson(`${baseUrl}/tabs/${encodeURIComponent(targetId)}`, {
method: "DELETE",
timeoutMs: 5000,
});
}
export async function browserScreenshot(
baseUrl: string,
opts: {
targetId?: string;
fullPage?: boolean;
},
): Promise<ScreenshotResult> {
const q = new URLSearchParams();
if (opts.targetId) q.set("targetId", opts.targetId);
if (opts.fullPage) q.set("fullPage", "true");
const suffix = q.toString() ? `?${q.toString()}` : "";
return await fetchJson<ScreenshotResult>(`${baseUrl}/screenshot${suffix}`, {
timeoutMs: 20000,
});
}

View File

@@ -0,0 +1,46 @@
import { describe, expect, it } from "vitest";
import {
resolveBrowserConfig,
shouldStartLocalBrowserServer,
} from "./config.js";
describe("browser config", () => {
it("defaults to enabled with loopback control url and lobster-orange color", () => {
const resolved = resolveBrowserConfig(undefined);
expect(resolved.enabled).toBe(true);
expect(resolved.controlPort).toBe(18790);
expect(resolved.cdpPort).toBe(18791);
expect(resolved.controlHost).toBe("127.0.0.1");
expect(resolved.color).toBe("#FF4500");
expect(shouldStartLocalBrowserServer(resolved)).toBe(true);
});
it("normalizes hex colors", () => {
const resolved = resolveBrowserConfig({
controlUrl: "http://localhost:18790",
color: "ff4500",
});
expect(resolved.color).toBe("#FF4500");
});
it("falls back to default color for invalid hex", () => {
const resolved = resolveBrowserConfig({
controlUrl: "http://localhost:18790",
color: "#GGGGGG",
});
expect(resolved.color).toBe("#FF4500");
});
it("treats non-loopback control urls as remote", () => {
const resolved = resolveBrowserConfig({
controlUrl: "http://example.com:18790",
});
expect(shouldStartLocalBrowserServer(resolved)).toBe(false);
});
it("rejects unsupported protocols", () => {
expect(() =>
resolveBrowserConfig({ controlUrl: "ws://127.0.0.1:18790" }),
).toThrow(/must be http/i);
});
});

82
src/browser/config.ts Normal file
View File

@@ -0,0 +1,82 @@
import type { BrowserConfig } from "../config/config.js";
import {
DEFAULT_CLAWD_BROWSER_CDP_PORT,
DEFAULT_CLAWD_BROWSER_COLOR,
DEFAULT_CLAWD_BROWSER_CONTROL_URL,
DEFAULT_CLAWD_BROWSER_ENABLED,
} from "./constants.js";
export type ResolvedBrowserConfig = {
enabled: boolean;
controlUrl: string;
controlHost: string;
controlPort: number;
cdpPort: number;
color: string;
headless: boolean;
attachOnly: boolean;
};
function isLoopbackHost(host: string) {
const h = host.trim().toLowerCase();
return h === "localhost" || h === "127.0.0.1" || h === "[::1]" || h === "::1";
}
function normalizeHexColor(raw: string | undefined) {
const value = (raw ?? "").trim();
if (!value) return DEFAULT_CLAWD_BROWSER_COLOR;
const normalized = value.startsWith("#") ? value : `#${value}`;
if (!/^#[0-9a-fA-F]{6}$/.test(normalized)) return DEFAULT_CLAWD_BROWSER_COLOR;
return normalized.toUpperCase();
}
export function resolveBrowserConfig(
cfg: BrowserConfig | undefined,
): ResolvedBrowserConfig {
const enabled = cfg?.enabled ?? DEFAULT_CLAWD_BROWSER_ENABLED;
const controlUrl = (
cfg?.controlUrl ?? DEFAULT_CLAWD_BROWSER_CONTROL_URL
).trim();
const parsed = new URL(controlUrl);
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
throw new Error(
`browser.controlUrl must be http(s), got: ${parsed.protocol.replace(":", "")}`,
);
}
const port =
parsed.port && Number.parseInt(parsed.port, 10) > 0
? Number.parseInt(parsed.port, 10)
: parsed.protocol === "https:"
? 443
: 80;
if (Number.isNaN(port) || port <= 0 || port > 65535) {
throw new Error(`browser.controlUrl has invalid port: ${parsed.port}`);
}
const cdpPort = DEFAULT_CLAWD_BROWSER_CDP_PORT;
if (port === cdpPort) {
throw new Error(
`browser.controlUrl port (${port}) must not equal CDP port (${cdpPort})`,
);
}
const headless = cfg?.headless === true;
const attachOnly = cfg?.attachOnly === true;
return {
enabled,
controlUrl: parsed.toString().replace(/\/$/, ""),
controlHost: parsed.hostname,
controlPort: port,
cdpPort,
color: normalizeHexColor(cfg?.color),
headless,
attachOnly,
};
}
export function shouldStartLocalBrowserServer(resolved: ResolvedBrowserConfig) {
return isLoopbackHost(resolved.controlHost);
}

5
src/browser/constants.ts Normal file
View File

@@ -0,0 +1,5 @@
export const DEFAULT_CLAWD_BROWSER_ENABLED = true;
export const DEFAULT_CLAWD_BROWSER_CONTROL_URL = "http://127.0.0.1:18790";
export const DEFAULT_CLAWD_BROWSER_CDP_PORT = 18791;
export const DEFAULT_CLAWD_BROWSER_COLOR = "#FF4500";
export const DEFAULT_CLAWD_BROWSER_PROFILE_NAME = "clawd";

311
src/browser/server.ts Normal file
View File

@@ -0,0 +1,311 @@
import type { Server } from "node:http";
import path from "node:path";
import express from "express";
import { loadConfig } from "../config/config.js";
import { logError, logInfo, logWarn } from "../logger.js";
import { ensureMediaDir, saveMediaBuffer } from "../media/store.js";
import { defaultRuntime, type RuntimeEnv } from "../runtime.js";
import { captureScreenshotPng } from "./cdp.js";
import {
isChromeReachable,
launchClawdChrome,
type RunningChrome,
stopClawdChrome,
} from "./chrome.js";
import {
resolveBrowserConfig,
shouldStartLocalBrowserServer,
} from "./config.js";
export type BrowserTab = {
targetId: string;
title: string;
url: string;
wsUrl?: string;
type?: string;
};
type BrowserServerState = {
server: Server;
port: number;
cdpPort: number;
running: RunningChrome | null;
resolved: ReturnType<typeof resolveBrowserConfig>;
};
let state: BrowserServerState | null = null;
function jsonError(res: express.Response, status: number, message: string) {
res.status(status).json({ error: message });
}
async function fetchJson<T>(url: string, timeoutMs = 1500): Promise<T> {
const ctrl = new AbortController();
const t = setTimeout(() => ctrl.abort(), timeoutMs);
try {
const res = await fetch(url, { signal: ctrl.signal });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
return (await res.json()) as T;
} finally {
clearTimeout(t);
}
}
async function listTabs(cdpPort: number): Promise<BrowserTab[]> {
const raw = await fetchJson<
Array<{
id?: string;
title?: string;
url?: string;
webSocketDebuggerUrl?: string;
type?: string;
}>
>(`http://127.0.0.1:${cdpPort}/json/list`);
return raw
.map((t) => ({
targetId: t.id ?? "",
title: t.title ?? "",
url: t.url ?? "",
wsUrl: t.webSocketDebuggerUrl,
type: t.type,
}))
.filter((t) => Boolean(t.targetId));
}
async function openTab(cdpPort: number, url: string): Promise<BrowserTab> {
const encoded = encodeURIComponent(url);
const created = await fetchJson<{
id?: string;
title?: string;
url?: string;
webSocketDebuggerUrl?: string;
type?: string;
}>(`http://127.0.0.1:${cdpPort}/json/new?${encoded}`);
if (!created.id) throw new Error("Failed to open tab (missing id)");
return {
targetId: created.id,
title: created.title ?? "",
url: created.url ?? url,
wsUrl: created.webSocketDebuggerUrl,
type: created.type,
};
}
async function activateTab(cdpPort: number, targetId: string): Promise<void> {
await fetchJson(`http://127.0.0.1:${cdpPort}/json/activate/${targetId}`);
}
async function closeTab(cdpPort: number, targetId: string): Promise<void> {
await fetchJson(`http://127.0.0.1:${cdpPort}/json/close/${targetId}`);
}
async function ensureBrowserAvailable(runtime: RuntimeEnv): Promise<void> {
if (!state) throw new Error("Browser server not started");
if (await isChromeReachable(state.cdpPort)) return;
if (state.resolved.attachOnly) {
throw new Error("Browser attachOnly is enabled and no browser is running.");
}
const launched = await launchClawdChrome(state.resolved, runtime);
state.running = launched;
launched.proc.on("exit", () => {
if (state?.running?.pid === launched.pid) {
state.running = null;
}
});
return;
}
export async function startBrowserControlServerFromConfig(
runtime: RuntimeEnv = defaultRuntime,
): Promise<BrowserServerState | null> {
if (state) return state;
const cfg = loadConfig();
const resolved = resolveBrowserConfig(cfg.browser);
if (!resolved.enabled) return null;
if (!shouldStartLocalBrowserServer(resolved)) {
logInfo(
`browser control URL is non-loopback (${resolved.controlUrl}); skipping local server start`,
runtime,
);
return null;
}
const app = express();
app.use(express.json({ limit: "1mb" }));
app.get("/", async (_req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const reachable = await isChromeReachable(state.cdpPort, 300);
res.json({
enabled: state.resolved.enabled,
controlUrl: state.resolved.controlUrl,
running: reachable,
pid: state.running?.pid ?? null,
cdpPort: state.cdpPort,
chosenBrowser: state.running?.exe.kind ?? null,
userDataDir: state.running?.userDataDir ?? null,
color: state.resolved.color,
headless: state.resolved.headless,
attachOnly: state.resolved.attachOnly,
});
});
app.post("/start", async (_req, res) => {
try {
await ensureBrowserAvailable(runtime);
res.json({ ok: true });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.post("/stop", async (_req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
if (!state.running) return res.json({ ok: true, stopped: false });
try {
await stopClawdChrome(state.running);
state.running = null;
res.json({ ok: true, stopped: true });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.get("/tabs", async (_req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const reachable = await isChromeReachable(state.cdpPort, 300);
if (!reachable)
return res.json({ running: false, tabs: [] as BrowserTab[] });
try {
const tabs = await listTabs(state.cdpPort);
res.json({ running: true, tabs });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.post("/tabs/open", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const url = String((req.body as { url?: unknown })?.url ?? "").trim();
if (!url) return jsonError(res, 400, "url is required");
try {
await ensureBrowserAvailable(runtime);
const tab = await openTab(state.cdpPort, url);
res.json(tab);
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.post("/tabs/focus", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const targetId = String(
(req.body as { targetId?: unknown })?.targetId ?? "",
).trim();
if (!targetId) return jsonError(res, 400, "targetId is required");
const reachable = await isChromeReachable(state.cdpPort, 300);
if (!reachable) return jsonError(res, 409, "browser not running");
try {
await activateTab(state.cdpPort, targetId);
res.json({ ok: true });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.delete("/tabs/:targetId", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const targetId = String(req.params.targetId ?? "").trim();
if (!targetId) return jsonError(res, 400, "targetId is required");
const reachable = await isChromeReachable(state.cdpPort, 300);
if (!reachable) return jsonError(res, 409, "browser not running");
try {
await closeTab(state.cdpPort, targetId);
res.json({ ok: true });
} catch (err) {
jsonError(res, 500, String(err));
}
});
app.get("/screenshot", async (req, res) => {
if (!state) return jsonError(res, 503, "browser server not started");
const targetId =
typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const fullPage =
req.query.fullPage === "true" || req.query.fullPage === "1";
const reachable = await isChromeReachable(state.cdpPort, 300);
if (!reachable) return jsonError(res, 409, "browser not running");
try {
const tabs = await listTabs(state.cdpPort);
const chosen = targetId
? tabs.find((t) => t.targetId === targetId)
: tabs.at(0);
if (!chosen?.wsUrl) return jsonError(res, 404, "tab not found");
const png = await captureScreenshotPng({ wsUrl: chosen.wsUrl, fullPage });
await ensureMediaDir();
const saved = await saveMediaBuffer(png, "image/png", "browser");
const filePath = path.resolve(saved.path);
res.json({
ok: true,
path: filePath,
targetId: chosen.targetId,
url: chosen.url,
});
} catch (err) {
jsonError(res, 500, String(err));
}
});
const port = resolved.controlPort;
const server = await new Promise<Server>((resolve, reject) => {
const s = app.listen(port, "127.0.0.1", () => resolve(s));
s.once("error", reject);
}).catch((err) => {
logError(
`clawd browser server failed to bind 127.0.0.1:${port}: ${String(err)}`,
);
return null;
});
if (!server) return null;
state = {
server,
port,
cdpPort: resolved.cdpPort,
running: null,
resolved,
};
logInfo(
`🦞 clawd browser control listening on http://127.0.0.1:${port}/`,
runtime,
);
return state;
}
export async function stopBrowserControlServer(
runtime: RuntimeEnv = defaultRuntime,
) {
if (!state) return;
const current = state;
state = null;
try {
if (current.running) {
await stopClawdChrome(current.running).catch((err) =>
logWarn(`clawd browser stop failed: ${String(err)}`, runtime),
);
}
} catch {
// ignore
}
await new Promise<void>((resolve) => current.server.close(() => resolve()));
}

View File

@@ -1,5 +1,16 @@
import chalk from "chalk";
import { Command } from "commander";
import {
browserCloseTab,
browserFocusTab,
browserOpenTab,
browserScreenshot,
browserStart,
browserStatus,
browserStop,
browserTabs,
resolveBrowserControlUrl,
} from "../browser/client.js";
import { agentCommand } from "../commands/agent.js";
import { healthCommand } from "../commands/health.js";
import { sendCommand } from "../commands/send.js";
@@ -360,5 +371,218 @@ Shows token usage per session when the agent reports it; set inbound.reply.agent
}
});
const browser = program
.command("browser")
.description("Manage clawd's dedicated browser (Chrome/Chromium)")
.option(
"--url <url>",
"Override browser control URL (default from ~/.clawdis/clawdis.json)",
)
.option("--json", "Output machine-readable JSON", false)
.addHelpText(
"after",
`
Examples:
clawdis browser status
clawdis browser start
clawdis browser tabs
clawdis browser open https://example.com
clawdis browser screenshot # emits MEDIA:<path>
clawdis browser screenshot <targetId> --full-page
`,
)
.action(() => {
defaultRuntime.error(
danger('Missing subcommand. Try: "clawdis browser status"'),
);
defaultRuntime.exit(1);
});
const parentOpts = (cmd: Command) =>
cmd.parent?.opts?.() as { url?: string; json?: boolean };
browser
.command("status")
.description("Show browser status")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const status = await browserStatus(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(status, null, 2));
return;
}
defaultRuntime.log(
[
`enabled: ${status.enabled}`,
`running: ${status.running}`,
`controlUrl: ${status.controlUrl}`,
`cdpPort: ${status.cdpPort}`,
`browser: ${status.chosenBrowser ?? "unknown"}`,
`profileColor: ${status.color}`,
].join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("start")
.description("Start the clawd browser (no-op if already running)")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserStart(baseUrl);
const status = await browserStatus(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(status, null, 2));
return;
}
defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("stop")
.description("Stop the clawd browser (best-effort)")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserStop(baseUrl);
const status = await browserStatus(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(status, null, 2));
return;
}
defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`));
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("tabs")
.description("List open tabs")
.action(async (_opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const tabs = await browserTabs(baseUrl);
if (parent?.json) {
defaultRuntime.log(JSON.stringify({ tabs }, null, 2));
return;
}
if (tabs.length === 0) {
defaultRuntime.log("No tabs (browser closed or no targets).");
return;
}
defaultRuntime.log(
tabs
.map(
(t, i) =>
`${i + 1}. ${t.title || "(untitled)"}\n ${t.url}\n id: ${t.targetId}`,
)
.join("\n"),
);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("open")
.description("Open a URL in a new tab")
.argument("<url>", "URL to open")
.action(async (url: string, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const tab = await browserOpenTab(baseUrl, url);
if (parent?.json) {
defaultRuntime.log(JSON.stringify(tab, null, 2));
return;
}
defaultRuntime.log(`opened: ${tab.url}\nid: ${tab.targetId}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("focus")
.description("Focus/activate a tab by target id")
.argument("<targetId>", "CDP target id")
.action(async (targetId: string, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserFocusTab(baseUrl, targetId);
if (parent?.json) {
defaultRuntime.log(JSON.stringify({ ok: true }, null, 2));
return;
}
defaultRuntime.log("ok");
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("close")
.description("Close a tab by target id")
.argument("<targetId>", "CDP target id")
.action(async (targetId: string, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
await browserCloseTab(baseUrl, targetId);
if (parent?.json) {
defaultRuntime.log(JSON.stringify({ ok: true }, null, 2));
return;
}
defaultRuntime.log("ok");
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
browser
.command("screenshot")
.description("Capture a screenshot (defaults to first tab)")
.argument("[targetId]", "CDP target id")
.option("--full-page", "Capture full page (best-effort)", false)
.action(async (targetId: string | undefined, opts, cmd) => {
const parent = parentOpts(cmd);
const baseUrl = resolveBrowserControlUrl(parent?.url);
try {
const result = await browserScreenshot(baseUrl, {
targetId: targetId?.trim() || undefined,
fullPage: Boolean(opts.fullPage),
});
if (parent?.json) {
defaultRuntime.log(JSON.stringify(result, null, 2));
return;
}
// Print MEDIA: token so the agent can forward the image as an attachment.
defaultRuntime.log(`MEDIA:${result.path}`);
} catch (err) {
defaultRuntime.error(danger(String(err)));
defaultRuntime.exit(1);
}
});
return program;
}

View File

@@ -49,6 +49,18 @@ export type WebChatConfig = {
port?: number;
};
export type BrowserConfig = {
enabled?: boolean;
/** Base URL of the clawd browser control server. Default: http://127.0.0.1:18790 */
controlUrl?: string;
/** Accent color for the clawd browser profile (hex). Default: #FF4500 */
color?: string;
/** Start Chrome headless (best-effort). Default: false */
headless?: boolean;
/** If true: never launch; only attach to an existing browser. Default: false */
attachOnly?: boolean;
};
export type CronConfig = {
enabled?: boolean;
store?: string;
@@ -74,6 +86,7 @@ export type GroupChatConfig = {
export type ClawdisConfig = {
logging?: LoggingConfig;
browser?: BrowserConfig;
inbound?: {
allowFrom?: string[]; // E.164 numbers allowed to trigger auto-reply (without whatsapp:)
messagePrefix?: string; // Prefix added to all inbound messages (default: "[clawdis]" if no allowFrom, else "")
@@ -203,6 +216,15 @@ const ClawdisSchema = z.object({
file: z.string().optional(),
})
.optional(),
browser: z
.object({
enabled: z.boolean().optional(),
controlUrl: z.string().optional(),
color: z.string().optional(),
headless: z.boolean().optional(),
attachOnly: z.boolean().optional(),
})
.optional(),
inbound: z
.object({
allowFrom: z.array(z.string()).optional(),

View File

@@ -8,6 +8,10 @@ import os from "node:os";
import path from "node:path";
import chalk from "chalk";
import { type WebSocket, WebSocketServer } from "ws";
import {
startBrowserControlServerFromConfig,
stopBrowserControlServer,
} from "../browser/server.js";
import { createDefaultDeps } from "../cli/deps.js";
import { agentCommand } from "../commands/agent.js";
import { getHealthSnapshot, type HealthSummary } from "../commands/health.js";
@@ -2109,6 +2113,11 @@ export async function startGatewayServer(
logError(`gateway: webchat failed to start: ${String(err)}`);
});
// Start clawd browser control server (unless disabled via config).
void startBrowserControlServerFromConfig(defaultRuntime).catch((err) => {
logError(`gateway: clawd browser server failed to start: ${String(err)}`);
});
// Launch configured providers (WhatsApp Web, Telegram) so gateway replies via the
// surface the message came from. Tests can opt out via CLAWDIS_SKIP_PROVIDERS.
if (process.env.CLAWDIS_SKIP_PROVIDERS !== "1") {
@@ -2168,6 +2177,7 @@ export async function startGatewayServer(
}
}
clients.clear();
await stopBrowserControlServer().catch(() => {});
await Promise.allSettled(providerTasks);
await new Promise<void>((resolve) => wss.close(() => resolve()));
await new Promise<void>((resolve, reject) =>