diff --git a/README.md b/README.md index 5b4b62e70..4dfc33a6d 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Because every space lobster needs a time-and-space machine. The Doctor has a TAR - 🤖 **AI Agent Gateway** — Pi only (Pi CLI in RPC mode) - 💬 **Session Management** — Per-sender conversation context - 🔔 **Heartbeats** — Periodic check-ins for proactive AI +- 🧭 **Clawd Browser** — Dedicated Chrome/Chromium profile with tabs + screenshot control (no interference with your daily browser) - 👥 **Group Chat Support** — Mention-based triggering - 📎 **Media Support** — Images, audio, documents, voice notes - 🎤 **Voice Transcription** — Whisper integration @@ -111,6 +112,18 @@ Create `~/.clawdis/clawdis.json`: } ``` +Optional: enable/configure clawd’s dedicated browser control (defaults are already on): + +```json5 +{ + browser: { + enabled: true, + controlUrl: "http://127.0.0.1:18790", + color: "#FF4500" + } +} +``` + ## Documentation - [Configuration Guide](./docs/configuration.md) @@ -148,6 +161,7 @@ Bot-mode support (grammY only) shares the same `main` session as WhatsApp/WebCha | `clawdis login` | Link WhatsApp Web via QR | | `clawdis send` | Send a message (WhatsApp default; `--provider telegram` for bot mode). Always uses the Gateway WS; requires a running gateway. | | `clawdis agent` | Talk directly to the agent (no WhatsApp send) | +| `clawdis browser ...` | Manage clawd’s dedicated browser (status/tabs/open/screenshot). | | `clawdis gateway` | Start the Gateway server (WS control plane). Params: `--port`, `--token`, `--force`, `--verbose`. | | `clawdis gateway health|status|send|agent|call` | Gateway WS clients; assume a running gateway. | | `clawdis wake` | Enqueue a system event and optionally trigger a heartbeat via the Gateway. | diff --git a/apps/macos/Sources/Clawdis/ConfigSettings.swift b/apps/macos/Sources/Clawdis/ConfigSettings.swift index d77980d9e..cf6e25332 100644 --- a/apps/macos/Sources/Clawdis/ConfigSettings.swift +++ b/apps/macos/Sources/Clawdis/ConfigSettings.swift @@ -18,6 +18,12 @@ struct ConfigSettings: View { @AppStorage(webChatEnabledKey) private var webChatEnabled: Bool = true @AppStorage(webChatPortKey) private var webChatPort: Int = 18788 + // clawd browser settings (stored in ~/.clawdis/clawdis.json under "browser") + @State private var browserEnabled: Bool = true + @State private var browserControlUrl: String = "http://127.0.0.1:18790" + @State private var browserColorHex: String = "#FF4500" + @State private var browserAttachOnly: Bool = false + var body: some View { VStack(alignment: .leading, spacing: 14) { Text("Clawdis CLI config") @@ -120,6 +126,56 @@ struct ConfigSettings: View { } } + Divider().padding(.vertical, 4) + + LabeledContent("Browser (clawd)") { + VStack(alignment: .leading, spacing: 8) { + Toggle("Enable clawd browser control", isOn: self.$browserEnabled) + .toggleStyle(.switch) + .frame(width: 360, alignment: .leading) + .onChange(of: self.browserEnabled) { _, _ in self.autosaveConfig() } + + HStack(spacing: 8) { + Text("Control URL") + TextField("http://127.0.0.1:18790", text: self.$browserControlUrl) + .textFieldStyle(.roundedBorder) + .frame(width: 320) + .disabled(!self.browserEnabled) + .onChange(of: self.browserControlUrl) { _, _ in self.autosaveConfig() } + } + + HStack(spacing: 8) { + Text("Accent") + TextField("#FF4500", text: self.$browserColorHex) + .textFieldStyle(.roundedBorder) + .frame(width: 120) + .disabled(!self.browserEnabled) + .onChange(of: self.browserColorHex) { _, _ in self.autosaveConfig() } + Circle() + .fill(self.browserColor) + .frame(width: 12, height: 12) + .overlay(Circle().stroke(Color.secondary.opacity(0.25), lineWidth: 1)) + Text("lobster-orange") + .font(.footnote) + .foregroundStyle(.secondary) + } + + Toggle("Attach only (never launch)", isOn: self.$browserAttachOnly) + .toggleStyle(.switch) + .frame(width: 360, alignment: .leading) + .disabled(!self.browserEnabled) + .onChange(of: self.browserAttachOnly) { _, _ in self.autosaveConfig() } + .help("When enabled, the browser server will only connect if the clawd browser is already running.") + + Text( + "Clawd uses a separate Chrome profile and ports (default 18790/18791) so it won’t interfere with your daily browser." + ) + .font(.footnote) + .foregroundStyle(.secondary) + .frame(maxWidth: 480, alignment: .leading) + } + } + Spacer() } .frame(maxWidth: .infinity, alignment: .leading) @@ -153,6 +209,7 @@ struct ConfigSettings: View { let agent = reply?["agent"] as? [String: Any] let heartbeatMinutes = reply?["heartbeatMinutes"] as? Int let heartbeatBody = reply?["heartbeatBody"] as? String + let browser = parsed["browser"] as? [String: Any] let loadedModel = (agent?["model"] as? String) ?? "" if !loadedModel.isEmpty { @@ -165,6 +222,13 @@ struct ConfigSettings: View { if let heartbeatMinutes { self.heartbeatMinutes = heartbeatMinutes } if let heartbeatBody, !heartbeatBody.isEmpty { self.heartbeatBody = heartbeatBody } + + if let browser { + if let enabled = browser["enabled"] as? Bool { self.browserEnabled = enabled } + if let url = browser["controlUrl"] as? String, !url.isEmpty { self.browserControlUrl = url } + if let color = browser["color"] as? String, !color.isEmpty { self.browserColorHex = color } + if let attachOnly = browser["attachOnly"] as? Bool { self.browserAttachOnly = attachOnly } + } } private func autosaveConfig() { @@ -181,6 +245,7 @@ struct ConfigSettings: View { var inbound = root["inbound"] as? [String: Any] ?? [:] var reply = inbound["reply"] as? [String: Any] ?? [:] var agent = reply["agent"] as? [String: Any] ?? [:] + var browser = root["browser"] as? [String: Any] ?? [:] let chosenModel = (self.configModel == "__custom__" ? self.customModel : self.configModel) .trimmingCharacters(in: .whitespacesAndNewlines) @@ -201,6 +266,14 @@ struct ConfigSettings: View { inbound["reply"] = reply root["inbound"] = inbound + browser["enabled"] = self.browserEnabled + let trimmedUrl = self.browserControlUrl.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmedUrl.isEmpty { browser["controlUrl"] = trimmedUrl } + let trimmedColor = self.browserColorHex.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmedColor.isEmpty { browser["color"] = trimmedColor } + browser["attachOnly"] = self.browserAttachOnly + root["browser"] = browser + do { let data = try JSONSerialization.data(withJSONObject: root, options: [.prettyPrinted, .sortedKeys]) let url = self.configURL() @@ -217,6 +290,16 @@ struct ConfigSettings: View { return (try? JSONSerialization.jsonObject(with: data) as? [String: Any]) ?? [:] } + private var browserColor: Color { + let raw = self.browserColorHex.trimmingCharacters(in: .whitespacesAndNewlines) + let hex = raw.hasPrefix("#") ? String(raw.dropFirst()) : raw + guard hex.count == 6, let value = Int(hex, radix: 16) else { return .orange } + let r = Double((value >> 16) & 0xFF) / 255.0 + let g = Double((value >> 8) & 0xFF) / 255.0 + let b = Double(value & 0xFF) / 255.0 + return Color(red: r, green: g, blue: b) + } + private func loadModels() async { guard !self.modelsLoading else { return } self.modelsLoading = true diff --git a/docs/AGENTS.default.md b/docs/AGENTS.default.md index fb8beed20..23c3ccebd 100644 --- a/docs/AGENTS.default.md +++ b/docs/AGENTS.default.md @@ -34,3 +34,4 @@ read_when: - Run installs from the Tools tab; it hides the button if a tool is already present. - For MCPs, mcporter writes to the home-scope config; re-run installs if you rotate tokens. - Keep heartbeats enabled so the assistant can schedule reminders, monitor inboxes, and trigger camera captures. +- For browser-driven verification, use `clawdis browser` (tabs/status/screenshot) with the clawd-managed Chrome profile. diff --git a/docs/agents.md b/docs/agents.md index 76dc1632b..3d6894d9c 100644 --- a/docs/agents.md +++ b/docs/agents.md @@ -86,6 +86,13 @@ RPC mode emits structured tool lifecycle events (start/result) and assistant out - logged to `/tmp/clawdis/…` - streamed over the Gateway WS to clients like WebChat and the macOS app +## Browser helpers + +If you enable the clawd-managed browser (default on), the agent can use: +- `clawdis browser status` / `tabs` / `open ` / `screenshot [targetId]` + +This uses a dedicated Chrome/Chromium profile (lobster-orange by default) so it doesn’t interfere with your daily browser. + --- *Next: [Group Chats](./group-messages.md)* 🦞 diff --git a/docs/configuration.md b/docs/configuration.md index 609cda912..69d8844c6 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -103,6 +103,28 @@ Example command-mode config: } ``` +### `browser` (clawd-managed Chrome) + +Clawdis can start a **dedicated, isolated** Chrome/Chromium instance for clawd and expose a small loopback control server. + +Defaults: +- enabled: `true` +- control URL: `http://127.0.0.1:18790` (CDP uses `18791`) +- profile color: `#FF4500` (lobster-orange) + +```json5 +{ + browser: { + enabled: true, + controlUrl: "http://127.0.0.1:18790", + color: "#FF4500", + // Advanced: + // headless: false, + // attachOnly: false, + } +} +``` + ## Template variables Template placeholders are expanded in `inbound.reply.command`, `sessionIntro`, `bodyPrefix`, and other templated strings. diff --git a/docs/mac/browser.md b/docs/mac/browser.md new file mode 100644 index 000000000..40a4987a9 --- /dev/null +++ b/docs/mac/browser.md @@ -0,0 +1,150 @@ +--- +summary: "Spec: clawd-managed Chrome/Chromium instance (separate profile, lobster-orange, tab management)" +read_when: + - Adding agent-controlled browser automation + - Debugging why clawd is interfering with your own Chrome + - Implementing browser settings + lifecycle in the macOS app +--- + +# Browser (macOS app) — clawd-managed Chrome + +Status: draft spec · Date: 2025-12-13 + +Goal: give the **clawd** persona its own browser that is: +- Visually distinct (lobster-orange, profile labeled “clawd”). +- Fully agent-manageable (start/stop, list tabs, focus/close tabs, open URLs, screenshot). +- Non-interfering with the user’s own browser (separate profile + dedicated ports). + +This doc covers the macOS app/gateway side. It intentionally does not mandate Playwright vs Puppeteer yet; the key is the **contract** and the **separation guarantees**. + +## User-facing settings + +Add a dedicated settings section (preferably under **Tools** or its own “Browser” tab): + +- **Enable clawd browser** (`default: on`) + - When off: no browser is launched, and browser tools return “disabled”. +- **Browser control URL** (`default: http://127.0.0.1:18790`) + - Interpreted as the base URL of the local/remote browser-control server. + - If the URL host is not loopback, Clawdis must **not** attempt to launch a local browser; it only connects. +- **Accent color** (`default: #FF4500`, “lobster-orange”) + - Used to theme the clawd browser profile (best-effort) and to tint UI indicators in Clawdis. + +Optional (advanced, can be hidden behind Debug initially): +- **Use headless browser** (`default: off`) +- **Attach to existing only** (`default: off`) — if on, never launch; only connect if already running. + +### Port convention + +Clawdis already uses: +- Gateway WebSocket: `18789` +- WebChat HTTP: `18788` + +For the clawd browser-control server, use “family” ports: +- Browser control HTTP API: `18790` (gateway + 1) +- Browser CDP/debugging port: `18791` (control + 1) + +The user usually only configures the **control URL** (port `18790`). CDP is an internal detail. + +## Browser isolation guarantees (non-negotiable) + +1) **Dedicated user data dir** + - Never attach to or reuse the user’s default Chrome profile. + - Store clawd browser state under an app-owned directory, e.g.: + - `~/Library/Application Support/Clawdis/browser/clawd/` (mac app) + - or `~/.clawdis/browser/clawd/` (gateway/CLI) + +2) **Dedicated ports** + - Never use `9222` (reserved for ad-hoc dev workflows; avoids colliding with `agent-tools/browser-tools`). + - Default ports are `18790/18791` unless overridden. + +3) **Named tab/page management** + - The agent must be able to enumerate and target tabs deterministically (by stable `targetId` or equivalent), not “last tab”. + +## Browser selection (macOS) + +On startup (when enabled + local URL), Clawdis chooses the browser executable in this order: +1) **Google Chrome Canary** (if installed) +2) **Chromium** (if installed) +3) **Google Chrome** (fallback) + +Implementation detail: detection is by existence of the `.app` bundle under `/Applications` (and optionally `~/Applications`), then using the resolved executable path. + +Rationale: +- Canary/Chromium are easy to visually distinguish from the user’s daily driver. +- Chrome fallback ensures the feature works on a stock machine. + +## Visual differentiation (“lobster-orange”) + +The clawd browser should be obviously different at a glance: +- Profile name: **clawd** +- Profile color: **#FF4500** + +Preferred behavior: +- Seed/patch the profile’s preferences on first launch so the color + name persist. + +Fallback behavior: +- If preferences patching is not reliable, open with the dedicated profile and let the user set the profile color/name once via Chrome UI; it must persist because the `userDataDir` is persistent. + +## Control server contract (proposed) + +Expose a small local HTTP API (and/or gateway RPC surface) so the agent can manage state without touching the user’s Chrome. + +Minimum endpoints/methods (names illustrative): + +- `browser.status` + - returns: `{ enabled, url, running, pid?, version?, chosenBrowser?, userDataDir?, ports: { control, cdp } }` +- `browser.start` + - starts the browser-control server + browser (no-op if already running) +- `browser.stop` + - stops the server and closes the clawd browser (best-effort; graceful first, then force if needed) +- `browser.tabs.list` + - returns: array of `{ targetId, title, url, isActive, lastFocusedAt? }` +- `browser.tabs.open` + - params: `{ url, newTab?: true }` → returns `{ targetId }` +- `browser.tabs.focus` + - params: `{ targetId }` +- `browser.tabs.close` + - params: `{ targetId }` +- `browser.screenshot` + - params: `{ targetId?, fullPage?: false }` → returns a `MEDIA:` attachment URL (via the existing Clawdis media host) + +Nice-to-have (later): +- `browser.snapshot.aria` (AI-friendly snapshot with stable refs) +- `browser.click` / `browser.type` / `browser.waitFor` helpers built atop snapshot refs + +### “Is it open or closed?” + +“Open” means: +- the control server is reachable at the configured URL **and** +- it reports a live browser connection. + +“Closed” means: +- control server not reachable, or server reports no browser. + +Clawdis should treat “open/closed” as a health check (fast path), not by scanning global Chrome processes (avoid false positives). + +## Interaction with the agent (clawd) + +The agent should use browser tools only when: +- enabled in settings +- control URL is configured + +If disabled, tools must fail fast with a friendly error (“Browser disabled in settings”). + +The agent should not assume tabs are ephemeral. It should: +- call `browser.tabs.list` to discover existing tabs first +- reuse an existing tab when appropriate (e.g. a persistent “main” tab) +- avoid opening duplicate tabs unless asked + +## Security & privacy notes + +- The clawd browser profile is app-owned; it may contain logged-in sessions. Treat it as sensitive data. +- The control server must bind to loopback only by default (`127.0.0.1`) unless the user explicitly configures a non-loopback URL. +- Never reuse or copy the user’s default Chrome profile. + +## Non-goals (for the first cut) + +- Cross-device “sync” of tabs between Mac and Pi. +- Sharing the user’s logged-in Chrome sessions automatically. +- General-purpose web scraping; this is primarily for “close-the-loop” verification and interaction. + diff --git a/src/browser/cdp.ts b/src/browser/cdp.ts new file mode 100644 index 000000000..6b51cfd20 --- /dev/null +++ b/src/browser/cdp.ts @@ -0,0 +1,108 @@ +import WebSocket from "ws"; + +type CdpResponse = { + id: number; + result?: unknown; + error?: { message?: string }; +}; + +type Pending = { + resolve: (value: unknown) => void; + reject: (err: Error) => void; +}; + +export async function captureScreenshotPng(opts: { + wsUrl: string; + fullPage?: boolean; +}): Promise { + const ws = new WebSocket(opts.wsUrl, { handshakeTimeout: 5000 }); + + let nextId = 1; + const pending = new Map(); + + const send = (method: string, params?: Record) => { + const id = nextId++; + const msg = { id, method, params }; + ws.send(JSON.stringify(msg)); + return new Promise((resolve, reject) => { + pending.set(id, { resolve, reject }); + }); + }; + + const closeWithError = (err: Error) => { + for (const [, p] of pending) p.reject(err); + pending.clear(); + try { + ws.close(); + } catch { + // ignore + } + }; + + const openPromise = new Promise((resolve, reject) => { + ws.once("open", () => resolve()); + ws.once("error", (err) => reject(err)); + }); + + ws.on("message", (data) => { + try { + const parsed = JSON.parse(String(data)) as CdpResponse; + if (typeof parsed.id !== "number") return; + const p = pending.get(parsed.id); + if (!p) return; + pending.delete(parsed.id); + if (parsed.error?.message) { + p.reject(new Error(parsed.error.message)); + return; + } + p.resolve(parsed.result); + } catch { + // ignore + } + }); + + ws.on("close", () => { + closeWithError(new Error("CDP socket closed")); + }); + + await openPromise; + + await send("Page.enable"); + + let clip: + | { x: number; y: number; width: number; height: number; scale: number } + | undefined; + if (opts.fullPage) { + const metrics = (await send("Page.getLayoutMetrics")) as { + cssContentSize?: { width?: number; height?: number }; + contentSize?: { width?: number; height?: number }; + }; + const size = metrics?.cssContentSize ?? metrics?.contentSize; + const width = Number(size?.width ?? 0); + const height = Number(size?.height ?? 0); + if (width > 0 && height > 0) { + clip = { x: 0, y: 0, width, height, scale: 1 }; + } + } + + const result = (await send("Page.captureScreenshot", { + format: "png", + fromSurface: true, + captureBeyondViewport: true, + ...(clip ? { clip } : {}), + })) as { data?: string }; + + const base64 = result?.data; + if (!base64) { + closeWithError(new Error("Screenshot failed: missing data")); + throw new Error("Screenshot failed: missing data"); + } + + try { + ws.close(); + } catch { + // ignore + } + + return Buffer.from(base64, "base64"); +} diff --git a/src/browser/chrome.ts b/src/browser/chrome.ts new file mode 100644 index 000000000..727988dd7 --- /dev/null +++ b/src/browser/chrome.ts @@ -0,0 +1,346 @@ +import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { ensurePortAvailable } from "../infra/ports.js"; +import { logInfo, logWarn } from "../logger.js"; +import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; +import { CONFIG_DIR } from "../utils.js"; +import type { ResolvedBrowserConfig } from "./config.js"; +import { + DEFAULT_CLAWD_BROWSER_COLOR, + DEFAULT_CLAWD_BROWSER_PROFILE_NAME, +} from "./constants.js"; + +export type BrowserExecutable = { + kind: "canary" | "chromium" | "chrome"; + path: string; +}; + +export type RunningChrome = { + pid: number; + exe: BrowserExecutable; + userDataDir: string; + cdpPort: number; + startedAt: number; + proc: ChildProcessWithoutNullStreams; +}; + +function exists(filePath: string) { + try { + return fs.existsSync(filePath); + } catch { + return false; + } +} + +export function findChromeExecutableMac(): BrowserExecutable | null { + const candidates: Array = [ + { + kind: "canary", + path: "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary", + }, + { + kind: "canary", + path: path.join( + os.homedir(), + "Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary", + ), + }, + { + kind: "chromium", + path: "/Applications/Chromium.app/Contents/MacOS/Chromium", + }, + { + kind: "chromium", + path: path.join( + os.homedir(), + "Applications/Chromium.app/Contents/MacOS/Chromium", + ), + }, + { + kind: "chrome", + path: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + }, + { + kind: "chrome", + path: path.join( + os.homedir(), + "Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + ), + }, + ]; + + for (const candidate of candidates) { + if (exists(candidate.path)) return candidate; + } + + return null; +} + +export function resolveClawdUserDataDir() { + return path.join( + CONFIG_DIR, + "browser", + DEFAULT_CLAWD_BROWSER_PROFILE_NAME, + "user-data", + ); +} + +function decoratedMarkerPath(userDataDir: string) { + return path.join(userDataDir, ".clawd-profile-decorated"); +} + +function safeReadJson(filePath: string): Record | null { + try { + if (!exists(filePath)) return null; + const raw = fs.readFileSync(filePath, "utf-8"); + const parsed = JSON.parse(raw) as unknown; + if (typeof parsed !== "object" || parsed === null) return null; + return parsed as Record; + } catch { + return null; + } +} + +function safeWriteJson(filePath: string, data: Record) { + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, JSON.stringify(data, null, 2)); +} + +function setDeep(obj: Record, keys: string[], value: unknown) { + let node: Record = obj; + for (const key of keys.slice(0, -1)) { + const next = node[key]; + if (typeof next !== "object" || next === null || Array.isArray(next)) { + node[key] = {}; + } + node = node[key] as Record; + } + node[keys[keys.length - 1] ?? ""] = value; +} + +/** + * Best-effort profile decoration (name + lobster-orange). Chrome preference keys + * vary by version; we keep this conservative and idempotent. + */ +export function decorateClawdProfile( + userDataDir: string, + opts?: { color?: string }, +) { + const desiredName = DEFAULT_CLAWD_BROWSER_PROFILE_NAME; + const desiredColor = ( + opts?.color ?? DEFAULT_CLAWD_BROWSER_COLOR + ).toUpperCase(); + + const localStatePath = path.join(userDataDir, "Local State"); + const preferencesPath = path.join(userDataDir, "Default", "Preferences"); + + const localState = safeReadJson(localStatePath) ?? {}; + // Common-ish shape: profile.info_cache.Default + setDeep( + localState, + ["profile", "info_cache", "Default", "name"], + desiredName, + ); + setDeep( + localState, + ["profile", "info_cache", "Default", "shortcut_name"], + desiredName, + ); + setDeep( + localState, + ["profile", "info_cache", "Default", "user_name"], + desiredName, + ); + // Color keys are best-effort (Chrome changes these frequently). + setDeep( + localState, + ["profile", "info_cache", "Default", "profile_color"], + desiredColor, + ); + setDeep( + localState, + ["profile", "info_cache", "Default", "user_color"], + desiredColor, + ); + safeWriteJson(localStatePath, localState); + + const prefs = safeReadJson(preferencesPath) ?? {}; + setDeep(prefs, ["profile", "name"], desiredName); + setDeep(prefs, ["profile", "profile_color"], desiredColor); + setDeep(prefs, ["profile", "user_color"], desiredColor); + safeWriteJson(preferencesPath, prefs); + + try { + fs.writeFileSync( + decoratedMarkerPath(userDataDir), + `${Date.now()}\n`, + "utf-8", + ); + } catch { + // ignore + } +} + +export async function isChromeReachable( + cdpPort: number, + timeoutMs = 500, +): Promise { + const ctrl = new AbortController(); + const t = setTimeout(() => ctrl.abort(), timeoutMs); + try { + const res = await fetch(`http://127.0.0.1:${cdpPort}/json/version`, { + signal: ctrl.signal, + }); + return res.ok; + } catch { + return false; + } finally { + clearTimeout(t); + } +} + +export async function launchClawdChrome( + resolved: ResolvedBrowserConfig, + runtime: RuntimeEnv = defaultRuntime, +): Promise { + await ensurePortAvailable(resolved.cdpPort); + + const exe = process.platform === "darwin" ? findChromeExecutableMac() : null; + if (!exe) { + throw new Error( + "No supported browser found (Chrome Canary/Chromium/Chrome on macOS).", + ); + } + + const userDataDir = resolveClawdUserDataDir(); + fs.mkdirSync(userDataDir, { recursive: true }); + + const marker = decoratedMarkerPath(userDataDir); + const needsDecorate = !exists(marker); + + // First launch to create preference files if missing, then decorate and relaunch. + const spawnOnce = () => { + const args: string[] = [ + `--remote-debugging-port=${resolved.cdpPort}`, + `--user-data-dir=${userDataDir}`, + "--no-first-run", + "--no-default-browser-check", + "--disable-sync", + "--disable-background-networking", + "--disable-component-update", + "--disable-features=Translate,MediaRouter", + "--password-store=basic", + ]; + + if (resolved.headless) { + // Best-effort; older Chromes may ignore. + args.push("--headless=new"); + args.push("--disable-gpu"); + } + + // Always open a blank tab to ensure a target exists. + args.push("about:blank"); + + return spawn(exe.path, args, { + stdio: "pipe", + env: { + ...process.env, + // Reduce accidental sharing with the user's env. + HOME: os.homedir(), + }, + }); + }; + + const startedAt = Date.now(); + let proc = spawnOnce(); + + // If this is the first run, let Chrome create prefs, then decorate + restart. + if (needsDecorate) { + const deadline = Date.now() + 5000; + while (Date.now() < deadline) { + const localStatePath = path.join(userDataDir, "Local State"); + const preferencesPath = path.join(userDataDir, "Default", "Preferences"); + if (exists(localStatePath) && exists(preferencesPath)) break; + await new Promise((r) => setTimeout(r, 100)); + } + try { + proc.kill("SIGTERM"); + } catch { + // ignore + } + await new Promise((r) => setTimeout(r, 300)); + try { + decorateClawdProfile(userDataDir, { color: resolved.color }); + logInfo( + `🦞 clawd browser profile decorated (${resolved.color})`, + runtime, + ); + } catch (err) { + logWarn( + `clawd browser profile decoration failed: ${String(err)}`, + runtime, + ); + } + proc = spawnOnce(); + } + + // Wait for CDP to come up. + const readyDeadline = Date.now() + 15_000; + while (Date.now() < readyDeadline) { + if (await isChromeReachable(resolved.cdpPort, 500)) break; + await new Promise((r) => setTimeout(r, 200)); + } + + if (!(await isChromeReachable(resolved.cdpPort, 500))) { + try { + proc.kill("SIGKILL"); + } catch { + // ignore + } + throw new Error(`Failed to start Chrome CDP on port ${resolved.cdpPort}.`); + } + + const pid = proc.pid ?? -1; + logInfo( + `🦞 clawd browser started (${exe.kind}) on 127.0.0.1:${resolved.cdpPort} (pid ${pid})`, + runtime, + ); + + return { + pid, + exe, + userDataDir, + cdpPort: resolved.cdpPort, + startedAt, + proc, + }; +} + +export async function stopClawdChrome( + running: RunningChrome, + timeoutMs = 2500, +) { + const proc = running.proc; + if (proc.killed) return; + try { + proc.kill("SIGTERM"); + } catch { + // ignore + } + + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + if (!proc.exitCode && proc.killed) break; + if (!(await isChromeReachable(running.cdpPort, 200))) return; + await new Promise((r) => setTimeout(r, 100)); + } + + try { + proc.kill("SIGKILL"); + } catch { + // ignore + } +} diff --git a/src/browser/client.ts b/src/browser/client.ts new file mode 100644 index 000000000..41ffa7b99 --- /dev/null +++ b/src/browser/client.ts @@ -0,0 +1,122 @@ +import { loadConfig } from "../config/config.js"; +import { resolveBrowserConfig } from "./config.js"; + +export type BrowserStatus = { + enabled: boolean; + controlUrl: string; + running: boolean; + pid: number | null; + cdpPort: number; + chosenBrowser: string | null; + userDataDir: string | null; + color: string; + headless: boolean; + attachOnly: boolean; +}; + +export type BrowserTab = { + targetId: string; + title: string; + url: string; + type?: string; +}; + +export type ScreenshotResult = { + ok: true; + path: string; + targetId: string; + url: string; +}; + +async function fetchJson( + url: string, + init?: RequestInit & { timeoutMs?: number }, +): Promise { + const timeoutMs = init?.timeoutMs ?? 5000; + const ctrl = new AbortController(); + const t = setTimeout(() => ctrl.abort(), timeoutMs); + const res = await fetch(url, { ...init, signal: ctrl.signal } as RequestInit); + clearTimeout(t); + if (!res.ok) { + const text = await res.text().catch(() => ""); + throw new Error(text ? `${res.status}: ${text}` : `HTTP ${res.status}`); + } + return (await res.json()) as T; +} + +export function resolveBrowserControlUrl(overrideUrl?: string) { + const cfg = loadConfig(); + const resolved = resolveBrowserConfig(cfg.browser); + const url = overrideUrl?.trim() ? overrideUrl.trim() : resolved.controlUrl; + return url.replace(/\/$/, ""); +} + +export async function browserStatus(baseUrl: string): Promise { + return await fetchJson(`${baseUrl}/`, { timeoutMs: 1500 }); +} + +export async function browserStart(baseUrl: string): Promise { + await fetchJson(`${baseUrl}/start`, { method: "POST", timeoutMs: 15000 }); +} + +export async function browserStop(baseUrl: string): Promise { + await fetchJson(`${baseUrl}/stop`, { method: "POST", timeoutMs: 15000 }); +} + +export async function browserTabs(baseUrl: string): Promise { + const res = await fetchJson<{ running: boolean; tabs: BrowserTab[] }>( + `${baseUrl}/tabs`, + { timeoutMs: 3000 }, + ); + return res.tabs ?? []; +} + +export async function browserOpenTab( + baseUrl: string, + url: string, +): Promise { + return await fetchJson(`${baseUrl}/tabs/open`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ url }), + timeoutMs: 15000, + }); +} + +export async function browserFocusTab( + baseUrl: string, + targetId: string, +): Promise { + await fetchJson(`${baseUrl}/tabs/focus`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ targetId }), + timeoutMs: 5000, + }); +} + +export async function browserCloseTab( + baseUrl: string, + targetId: string, +): Promise { + await fetchJson(`${baseUrl}/tabs/${encodeURIComponent(targetId)}`, { + method: "DELETE", + timeoutMs: 5000, + }); +} + +export async function browserScreenshot( + baseUrl: string, + opts: { + targetId?: string; + fullPage?: boolean; + }, +): Promise { + const q = new URLSearchParams(); + if (opts.targetId) q.set("targetId", opts.targetId); + if (opts.fullPage) q.set("fullPage", "true"); + const suffix = q.toString() ? `?${q.toString()}` : ""; + return await fetchJson(`${baseUrl}/screenshot${suffix}`, { + timeoutMs: 20000, + }); +} diff --git a/src/browser/config.test.ts b/src/browser/config.test.ts new file mode 100644 index 000000000..1c64cc7d6 --- /dev/null +++ b/src/browser/config.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it } from "vitest"; +import { + resolveBrowserConfig, + shouldStartLocalBrowserServer, +} from "./config.js"; + +describe("browser config", () => { + it("defaults to enabled with loopback control url and lobster-orange color", () => { + const resolved = resolveBrowserConfig(undefined); + expect(resolved.enabled).toBe(true); + expect(resolved.controlPort).toBe(18790); + expect(resolved.cdpPort).toBe(18791); + expect(resolved.controlHost).toBe("127.0.0.1"); + expect(resolved.color).toBe("#FF4500"); + expect(shouldStartLocalBrowserServer(resolved)).toBe(true); + }); + + it("normalizes hex colors", () => { + const resolved = resolveBrowserConfig({ + controlUrl: "http://localhost:18790", + color: "ff4500", + }); + expect(resolved.color).toBe("#FF4500"); + }); + + it("falls back to default color for invalid hex", () => { + const resolved = resolveBrowserConfig({ + controlUrl: "http://localhost:18790", + color: "#GGGGGG", + }); + expect(resolved.color).toBe("#FF4500"); + }); + + it("treats non-loopback control urls as remote", () => { + const resolved = resolveBrowserConfig({ + controlUrl: "http://example.com:18790", + }); + expect(shouldStartLocalBrowserServer(resolved)).toBe(false); + }); + + it("rejects unsupported protocols", () => { + expect(() => + resolveBrowserConfig({ controlUrl: "ws://127.0.0.1:18790" }), + ).toThrow(/must be http/i); + }); +}); diff --git a/src/browser/config.ts b/src/browser/config.ts new file mode 100644 index 000000000..9307c3278 --- /dev/null +++ b/src/browser/config.ts @@ -0,0 +1,82 @@ +import type { BrowserConfig } from "../config/config.js"; +import { + DEFAULT_CLAWD_BROWSER_CDP_PORT, + DEFAULT_CLAWD_BROWSER_COLOR, + DEFAULT_CLAWD_BROWSER_CONTROL_URL, + DEFAULT_CLAWD_BROWSER_ENABLED, +} from "./constants.js"; + +export type ResolvedBrowserConfig = { + enabled: boolean; + controlUrl: string; + controlHost: string; + controlPort: number; + cdpPort: number; + color: string; + headless: boolean; + attachOnly: boolean; +}; + +function isLoopbackHost(host: string) { + const h = host.trim().toLowerCase(); + return h === "localhost" || h === "127.0.0.1" || h === "[::1]" || h === "::1"; +} + +function normalizeHexColor(raw: string | undefined) { + const value = (raw ?? "").trim(); + if (!value) return DEFAULT_CLAWD_BROWSER_COLOR; + const normalized = value.startsWith("#") ? value : `#${value}`; + if (!/^#[0-9a-fA-F]{6}$/.test(normalized)) return DEFAULT_CLAWD_BROWSER_COLOR; + return normalized.toUpperCase(); +} + +export function resolveBrowserConfig( + cfg: BrowserConfig | undefined, +): ResolvedBrowserConfig { + const enabled = cfg?.enabled ?? DEFAULT_CLAWD_BROWSER_ENABLED; + const controlUrl = ( + cfg?.controlUrl ?? DEFAULT_CLAWD_BROWSER_CONTROL_URL + ).trim(); + const parsed = new URL(controlUrl); + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { + throw new Error( + `browser.controlUrl must be http(s), got: ${parsed.protocol.replace(":", "")}`, + ); + } + + const port = + parsed.port && Number.parseInt(parsed.port, 10) > 0 + ? Number.parseInt(parsed.port, 10) + : parsed.protocol === "https:" + ? 443 + : 80; + + if (Number.isNaN(port) || port <= 0 || port > 65535) { + throw new Error(`browser.controlUrl has invalid port: ${parsed.port}`); + } + + const cdpPort = DEFAULT_CLAWD_BROWSER_CDP_PORT; + if (port === cdpPort) { + throw new Error( + `browser.controlUrl port (${port}) must not equal CDP port (${cdpPort})`, + ); + } + + const headless = cfg?.headless === true; + const attachOnly = cfg?.attachOnly === true; + + return { + enabled, + controlUrl: parsed.toString().replace(/\/$/, ""), + controlHost: parsed.hostname, + controlPort: port, + cdpPort, + color: normalizeHexColor(cfg?.color), + headless, + attachOnly, + }; +} + +export function shouldStartLocalBrowserServer(resolved: ResolvedBrowserConfig) { + return isLoopbackHost(resolved.controlHost); +} diff --git a/src/browser/constants.ts b/src/browser/constants.ts new file mode 100644 index 000000000..7f7bfee1a --- /dev/null +++ b/src/browser/constants.ts @@ -0,0 +1,5 @@ +export const DEFAULT_CLAWD_BROWSER_ENABLED = true; +export const DEFAULT_CLAWD_BROWSER_CONTROL_URL = "http://127.0.0.1:18790"; +export const DEFAULT_CLAWD_BROWSER_CDP_PORT = 18791; +export const DEFAULT_CLAWD_BROWSER_COLOR = "#FF4500"; +export const DEFAULT_CLAWD_BROWSER_PROFILE_NAME = "clawd"; diff --git a/src/browser/server.ts b/src/browser/server.ts new file mode 100644 index 000000000..d2d8c1438 --- /dev/null +++ b/src/browser/server.ts @@ -0,0 +1,311 @@ +import type { Server } from "node:http"; +import path from "node:path"; +import express from "express"; + +import { loadConfig } from "../config/config.js"; +import { logError, logInfo, logWarn } from "../logger.js"; +import { ensureMediaDir, saveMediaBuffer } from "../media/store.js"; +import { defaultRuntime, type RuntimeEnv } from "../runtime.js"; +import { captureScreenshotPng } from "./cdp.js"; +import { + isChromeReachable, + launchClawdChrome, + type RunningChrome, + stopClawdChrome, +} from "./chrome.js"; +import { + resolveBrowserConfig, + shouldStartLocalBrowserServer, +} from "./config.js"; + +export type BrowserTab = { + targetId: string; + title: string; + url: string; + wsUrl?: string; + type?: string; +}; + +type BrowserServerState = { + server: Server; + port: number; + cdpPort: number; + running: RunningChrome | null; + resolved: ReturnType; +}; + +let state: BrowserServerState | null = null; + +function jsonError(res: express.Response, status: number, message: string) { + res.status(status).json({ error: message }); +} + +async function fetchJson(url: string, timeoutMs = 1500): Promise { + const ctrl = new AbortController(); + const t = setTimeout(() => ctrl.abort(), timeoutMs); + try { + const res = await fetch(url, { signal: ctrl.signal }); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + return (await res.json()) as T; + } finally { + clearTimeout(t); + } +} + +async function listTabs(cdpPort: number): Promise { + const raw = await fetchJson< + Array<{ + id?: string; + title?: string; + url?: string; + webSocketDebuggerUrl?: string; + type?: string; + }> + >(`http://127.0.0.1:${cdpPort}/json/list`); + return raw + .map((t) => ({ + targetId: t.id ?? "", + title: t.title ?? "", + url: t.url ?? "", + wsUrl: t.webSocketDebuggerUrl, + type: t.type, + })) + .filter((t) => Boolean(t.targetId)); +} + +async function openTab(cdpPort: number, url: string): Promise { + const encoded = encodeURIComponent(url); + const created = await fetchJson<{ + id?: string; + title?: string; + url?: string; + webSocketDebuggerUrl?: string; + type?: string; + }>(`http://127.0.0.1:${cdpPort}/json/new?${encoded}`); + + if (!created.id) throw new Error("Failed to open tab (missing id)"); + return { + targetId: created.id, + title: created.title ?? "", + url: created.url ?? url, + wsUrl: created.webSocketDebuggerUrl, + type: created.type, + }; +} + +async function activateTab(cdpPort: number, targetId: string): Promise { + await fetchJson(`http://127.0.0.1:${cdpPort}/json/activate/${targetId}`); +} + +async function closeTab(cdpPort: number, targetId: string): Promise { + await fetchJson(`http://127.0.0.1:${cdpPort}/json/close/${targetId}`); +} + +async function ensureBrowserAvailable(runtime: RuntimeEnv): Promise { + if (!state) throw new Error("Browser server not started"); + if (await isChromeReachable(state.cdpPort)) return; + if (state.resolved.attachOnly) { + throw new Error("Browser attachOnly is enabled and no browser is running."); + } + + const launched = await launchClawdChrome(state.resolved, runtime); + state.running = launched; + launched.proc.on("exit", () => { + if (state?.running?.pid === launched.pid) { + state.running = null; + } + }); + return; +} + +export async function startBrowserControlServerFromConfig( + runtime: RuntimeEnv = defaultRuntime, +): Promise { + if (state) return state; + + const cfg = loadConfig(); + const resolved = resolveBrowserConfig(cfg.browser); + if (!resolved.enabled) return null; + + if (!shouldStartLocalBrowserServer(resolved)) { + logInfo( + `browser control URL is non-loopback (${resolved.controlUrl}); skipping local server start`, + runtime, + ); + return null; + } + + const app = express(); + app.use(express.json({ limit: "1mb" })); + + app.get("/", async (_req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + const reachable = await isChromeReachable(state.cdpPort, 300); + res.json({ + enabled: state.resolved.enabled, + controlUrl: state.resolved.controlUrl, + running: reachable, + pid: state.running?.pid ?? null, + cdpPort: state.cdpPort, + chosenBrowser: state.running?.exe.kind ?? null, + userDataDir: state.running?.userDataDir ?? null, + color: state.resolved.color, + headless: state.resolved.headless, + attachOnly: state.resolved.attachOnly, + }); + }); + + app.post("/start", async (_req, res) => { + try { + await ensureBrowserAvailable(runtime); + res.json({ ok: true }); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); + + app.post("/stop", async (_req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + if (!state.running) return res.json({ ok: true, stopped: false }); + try { + await stopClawdChrome(state.running); + state.running = null; + res.json({ ok: true, stopped: true }); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); + + app.get("/tabs", async (_req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + const reachable = await isChromeReachable(state.cdpPort, 300); + if (!reachable) + return res.json({ running: false, tabs: [] as BrowserTab[] }); + try { + const tabs = await listTabs(state.cdpPort); + res.json({ running: true, tabs }); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); + + app.post("/tabs/open", async (req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + const url = String((req.body as { url?: unknown })?.url ?? "").trim(); + if (!url) return jsonError(res, 400, "url is required"); + try { + await ensureBrowserAvailable(runtime); + const tab = await openTab(state.cdpPort, url); + res.json(tab); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); + + app.post("/tabs/focus", async (req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + const targetId = String( + (req.body as { targetId?: unknown })?.targetId ?? "", + ).trim(); + if (!targetId) return jsonError(res, 400, "targetId is required"); + const reachable = await isChromeReachable(state.cdpPort, 300); + if (!reachable) return jsonError(res, 409, "browser not running"); + try { + await activateTab(state.cdpPort, targetId); + res.json({ ok: true }); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); + + app.delete("/tabs/:targetId", async (req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + const targetId = String(req.params.targetId ?? "").trim(); + if (!targetId) return jsonError(res, 400, "targetId is required"); + const reachable = await isChromeReachable(state.cdpPort, 300); + if (!reachable) return jsonError(res, 409, "browser not running"); + try { + await closeTab(state.cdpPort, targetId); + res.json({ ok: true }); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); + + app.get("/screenshot", async (req, res) => { + if (!state) return jsonError(res, 503, "browser server not started"); + const targetId = + typeof req.query.targetId === "string" ? req.query.targetId.trim() : ""; + const fullPage = + req.query.fullPage === "true" || req.query.fullPage === "1"; + + const reachable = await isChromeReachable(state.cdpPort, 300); + if (!reachable) return jsonError(res, 409, "browser not running"); + + try { + const tabs = await listTabs(state.cdpPort); + const chosen = targetId + ? tabs.find((t) => t.targetId === targetId) + : tabs.at(0); + if (!chosen?.wsUrl) return jsonError(res, 404, "tab not found"); + + const png = await captureScreenshotPng({ wsUrl: chosen.wsUrl, fullPage }); + await ensureMediaDir(); + const saved = await saveMediaBuffer(png, "image/png", "browser"); + const filePath = path.resolve(saved.path); + res.json({ + ok: true, + path: filePath, + targetId: chosen.targetId, + url: chosen.url, + }); + } catch (err) { + jsonError(res, 500, String(err)); + } + }); + + const port = resolved.controlPort; + const server = await new Promise((resolve, reject) => { + const s = app.listen(port, "127.0.0.1", () => resolve(s)); + s.once("error", reject); + }).catch((err) => { + logError( + `clawd browser server failed to bind 127.0.0.1:${port}: ${String(err)}`, + ); + return null; + }); + + if (!server) return null; + + state = { + server, + port, + cdpPort: resolved.cdpPort, + running: null, + resolved, + }; + + logInfo( + `🦞 clawd browser control listening on http://127.0.0.1:${port}/`, + runtime, + ); + return state; +} + +export async function stopBrowserControlServer( + runtime: RuntimeEnv = defaultRuntime, +) { + if (!state) return; + const current = state; + state = null; + try { + if (current.running) { + await stopClawdChrome(current.running).catch((err) => + logWarn(`clawd browser stop failed: ${String(err)}`, runtime), + ); + } + } catch { + // ignore + } + await new Promise((resolve) => current.server.close(() => resolve())); +} diff --git a/src/cli/program.ts b/src/cli/program.ts index 299c4a3b2..c35e71661 100644 --- a/src/cli/program.ts +++ b/src/cli/program.ts @@ -1,5 +1,16 @@ import chalk from "chalk"; import { Command } from "commander"; +import { + browserCloseTab, + browserFocusTab, + browserOpenTab, + browserScreenshot, + browserStart, + browserStatus, + browserStop, + browserTabs, + resolveBrowserControlUrl, +} from "../browser/client.js"; import { agentCommand } from "../commands/agent.js"; import { healthCommand } from "../commands/health.js"; import { sendCommand } from "../commands/send.js"; @@ -360,5 +371,218 @@ Shows token usage per session when the agent reports it; set inbound.reply.agent } }); + const browser = program + .command("browser") + .description("Manage clawd's dedicated browser (Chrome/Chromium)") + .option( + "--url ", + "Override browser control URL (default from ~/.clawdis/clawdis.json)", + ) + .option("--json", "Output machine-readable JSON", false) + .addHelpText( + "after", + ` +Examples: + clawdis browser status + clawdis browser start + clawdis browser tabs + clawdis browser open https://example.com + clawdis browser screenshot # emits MEDIA: + clawdis browser screenshot --full-page +`, + ) + .action(() => { + defaultRuntime.error( + danger('Missing subcommand. Try: "clawdis browser status"'), + ); + defaultRuntime.exit(1); + }); + + const parentOpts = (cmd: Command) => + cmd.parent?.opts?.() as { url?: string; json?: boolean }; + + browser + .command("status") + .description("Show browser status") + .action(async (_opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const status = await browserStatus(baseUrl); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(status, null, 2)); + return; + } + defaultRuntime.log( + [ + `enabled: ${status.enabled}`, + `running: ${status.running}`, + `controlUrl: ${status.controlUrl}`, + `cdpPort: ${status.cdpPort}`, + `browser: ${status.chosenBrowser ?? "unknown"}`, + `profileColor: ${status.color}`, + ].join("\n"), + ); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("start") + .description("Start the clawd browser (no-op if already running)") + .action(async (_opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + await browserStart(baseUrl); + const status = await browserStatus(baseUrl); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(status, null, 2)); + return; + } + defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`)); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("stop") + .description("Stop the clawd browser (best-effort)") + .action(async (_opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + await browserStop(baseUrl); + const status = await browserStatus(baseUrl); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(status, null, 2)); + return; + } + defaultRuntime.log(info(`🦞 clawd browser running: ${status.running}`)); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("tabs") + .description("List open tabs") + .action(async (_opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const tabs = await browserTabs(baseUrl); + if (parent?.json) { + defaultRuntime.log(JSON.stringify({ tabs }, null, 2)); + return; + } + if (tabs.length === 0) { + defaultRuntime.log("No tabs (browser closed or no targets)."); + return; + } + defaultRuntime.log( + tabs + .map( + (t, i) => + `${i + 1}. ${t.title || "(untitled)"}\n ${t.url}\n id: ${t.targetId}`, + ) + .join("\n"), + ); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("open") + .description("Open a URL in a new tab") + .argument("", "URL to open") + .action(async (url: string, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const tab = await browserOpenTab(baseUrl, url); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(tab, null, 2)); + return; + } + defaultRuntime.log(`opened: ${tab.url}\nid: ${tab.targetId}`); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("focus") + .description("Focus/activate a tab by target id") + .argument("", "CDP target id") + .action(async (targetId: string, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + await browserFocusTab(baseUrl, targetId); + if (parent?.json) { + defaultRuntime.log(JSON.stringify({ ok: true }, null, 2)); + return; + } + defaultRuntime.log("ok"); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("close") + .description("Close a tab by target id") + .argument("", "CDP target id") + .action(async (targetId: string, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + await browserCloseTab(baseUrl, targetId); + if (parent?.json) { + defaultRuntime.log(JSON.stringify({ ok: true }, null, 2)); + return; + } + defaultRuntime.log("ok"); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + + browser + .command("screenshot") + .description("Capture a screenshot (defaults to first tab)") + .argument("[targetId]", "CDP target id") + .option("--full-page", "Capture full page (best-effort)", false) + .action(async (targetId: string | undefined, opts, cmd) => { + const parent = parentOpts(cmd); + const baseUrl = resolveBrowserControlUrl(parent?.url); + try { + const result = await browserScreenshot(baseUrl, { + targetId: targetId?.trim() || undefined, + fullPage: Boolean(opts.fullPage), + }); + if (parent?.json) { + defaultRuntime.log(JSON.stringify(result, null, 2)); + return; + } + // Print MEDIA: token so the agent can forward the image as an attachment. + defaultRuntime.log(`MEDIA:${result.path}`); + } catch (err) { + defaultRuntime.error(danger(String(err))); + defaultRuntime.exit(1); + } + }); + return program; } diff --git a/src/config/config.ts b/src/config/config.ts index 1a7b05348..16c7936bf 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -49,6 +49,18 @@ export type WebChatConfig = { port?: number; }; +export type BrowserConfig = { + enabled?: boolean; + /** Base URL of the clawd browser control server. Default: http://127.0.0.1:18790 */ + controlUrl?: string; + /** Accent color for the clawd browser profile (hex). Default: #FF4500 */ + color?: string; + /** Start Chrome headless (best-effort). Default: false */ + headless?: boolean; + /** If true: never launch; only attach to an existing browser. Default: false */ + attachOnly?: boolean; +}; + export type CronConfig = { enabled?: boolean; store?: string; @@ -74,6 +86,7 @@ export type GroupChatConfig = { export type ClawdisConfig = { logging?: LoggingConfig; + browser?: BrowserConfig; inbound?: { allowFrom?: string[]; // E.164 numbers allowed to trigger auto-reply (without whatsapp:) messagePrefix?: string; // Prefix added to all inbound messages (default: "[clawdis]" if no allowFrom, else "") @@ -203,6 +216,15 @@ const ClawdisSchema = z.object({ file: z.string().optional(), }) .optional(), + browser: z + .object({ + enabled: z.boolean().optional(), + controlUrl: z.string().optional(), + color: z.string().optional(), + headless: z.boolean().optional(), + attachOnly: z.boolean().optional(), + }) + .optional(), inbound: z .object({ allowFrom: z.array(z.string()).optional(), diff --git a/src/gateway/server.ts b/src/gateway/server.ts index 25e57a197..12cb8fbf7 100644 --- a/src/gateway/server.ts +++ b/src/gateway/server.ts @@ -8,6 +8,10 @@ import os from "node:os"; import path from "node:path"; import chalk from "chalk"; import { type WebSocket, WebSocketServer } from "ws"; +import { + startBrowserControlServerFromConfig, + stopBrowserControlServer, +} from "../browser/server.js"; import { createDefaultDeps } from "../cli/deps.js"; import { agentCommand } from "../commands/agent.js"; import { getHealthSnapshot, type HealthSummary } from "../commands/health.js"; @@ -2109,6 +2113,11 @@ export async function startGatewayServer( logError(`gateway: webchat failed to start: ${String(err)}`); }); + // Start clawd browser control server (unless disabled via config). + void startBrowserControlServerFromConfig(defaultRuntime).catch((err) => { + logError(`gateway: clawd browser server failed to start: ${String(err)}`); + }); + // Launch configured providers (WhatsApp Web, Telegram) so gateway replies via the // surface the message came from. Tests can opt out via CLAWDIS_SKIP_PROVIDERS. if (process.env.CLAWDIS_SKIP_PROVIDERS !== "1") { @@ -2168,6 +2177,7 @@ export async function startGatewayServer( } } clients.clear(); + await stopBrowserControlServer().catch(() => {}); await Promise.allSettled(providerTasks); await new Promise((resolve) => wss.close(() => resolve())); await new Promise((resolve, reject) =>