diff --git a/CHANGELOG.md b/CHANGELOG.md index 10884763c..1234e2ec8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 2026.1.11-1 + +### New Features and Changes +- Agents/Browser: add `browser.target` (sandbox/host/custom) with sandbox host-control gating via `agents.defaults.sandbox.browser.allowHostControl`, and expand browser tool docs (remote control, profiles, internals). + ## 2026.1.10-4 ### Fixes diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index c92c5d327..f87eef231 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -1376,6 +1376,7 @@ Legacy: `perSession` is still supported (`true` → `scope: "session"`, noVncPort: 6080, headless: false, enableNoVnc: true, + allowHostControl: false, autoStart: true, autoStartTimeoutMs: 12000 }, @@ -1418,6 +1419,11 @@ the noVNC URL is injected into the system prompt so the agent can reference it. This does not require `browser.enabled` in the main config; the sandbox control URL is injected per session. +`agents.defaults.sandbox.browser.allowHostControl` (default: false) allows +sandboxed sessions to explicitly target the **host** browser control server +via the browser tool (`target: "host"`). Leave this off if you want strict +sandbox isolation. + ### `models` (custom providers + base URLs) Clawdbot uses the **pi-coding-agent** model catalog. You can add custom providers diff --git a/docs/gateway/sandboxing.md b/docs/gateway/sandboxing.md index 5aec128e2..0dd5eef72 100644 --- a/docs/gateway/sandboxing.md +++ b/docs/gateway/sandboxing.md @@ -21,6 +21,7 @@ and process access when the model does something dumb. - Optional sandboxed browser (`agents.defaults.sandbox.browser`). - By default, the sandbox browser auto-starts (ensures CDP is reachable) when the browser tool needs it. Configure via `agents.defaults.sandbox.browser.autoStart` and `agents.defaults.sandbox.browser.autoStartTimeoutMs`. + - `agents.defaults.sandbox.browser.allowHostControl` lets sandboxed sessions target the host browser explicitly. Not sandboxed: - The Gateway process itself. diff --git a/docs/tools/browser.md b/docs/tools/browser.md index 5c31fc3bb..3b29ab680 100644 --- a/docs/tools/browser.md +++ b/docs/tools/browser.md @@ -12,6 +12,11 @@ Clawdbot can run a **dedicated Chrome/Chromium profile** that the agent controls It is isolated from your personal browser and is managed through a small local control server. +Beginner view: +- Think of it as a **separate, agent-only browser**. +- It does **not** touch your personal Chrome profile. +- The agent can **open tabs, read pages, click, and type** in a safe lane. + ## What you get - A separate browser profile named **clawd** (orange accent by default). @@ -65,6 +70,7 @@ Notes: the default browser ports shift to stay in the same “family” (control = gateway + 2). - `cdpUrl` defaults to `controlUrl + 1` when unset. - `attachOnly: true` means “never launch Chrome; only attach if it is already running.” +- `color` + per-profile `color` tint the browser UI so you can see which profile is active. ## Local vs remote control @@ -75,6 +81,31 @@ Notes: - **Remote CDP:** set `browser.profiles..cdpUrl` (or `browser.cdpUrl`) to attach to a remote Chrome. In this case, Clawdbot will not launch a local browser. +## Remote browser (control server) + +You can run the **browser control server** on another machine and point your +Gateway at it with a remote `controlUrl`. This lets the agent drive a browser +outside the host (lab box, VM, remote desktop, etc.). + +Key points: +- The **control server** speaks to Chrome/Chromium via **CDP**. +- The **Gateway** only needs the HTTP control URL. +- Profiles are resolved on the **control server** side. + +Example: +```json5 +{ + browser: { + enabled: true, + controlUrl: "http://10.0.0.42:18791", + defaultProfile: "work" + } +} +``` + +Use `profiles..cdpUrl` for **remote CDP** if you want the Gateway to talk +directly to a Chrome instance without a remote control server. + ## Profiles (multi-browser) Clawdbot supports multiple named profiles. Each profile has its own: @@ -129,6 +160,18 @@ Some features (navigate/act/ai snapshot, element screenshots, PDF) require Playwright. In embedded gateway builds, Playwright may be unavailable; those endpoints return a clear 501 error. ARIA snapshots and basic screenshots still work. +## How it works (internal) + +High-level flow: +- A small **control server** accepts HTTP requests. +- It connects to Chrome/Chromium via **CDP**. +- For advanced actions (click/type/snapshot/PDF), it uses **Playwright** on top + of CDP. +- When Playwright is missing, only non-Playwright operations are available. + +This design keeps the agent on a stable, deterministic interface while letting +you swap local/remote browsers and profiles. + ## CLI quick reference All commands accept `--browser-profile ` to target a specific profile. @@ -185,3 +228,21 @@ Notes: For Linux-specific issues (especially snap Chromium), see [Browser troubleshooting](/tools/browser-linux-troubleshooting). + +## Agent tools + how control works + +The agent gets **one tool** for browser automation: +- `browser` — status/start/stop/tabs/open/focus/close/snapshot/screenshot/navigate/act + +How it maps: +- `browser snapshot` returns a stable UI tree (AI or ARIA). +- `browser act` uses the snapshot `ref` IDs to click/type/drag/select. +- `browser screenshot` captures pixels (full page or element). +- `browser` accepts: + - `profile` to choose a named browser profile (host or remote control server). + - `target` (`sandbox` | `host` | `custom`) to select where the browser lives. + - `controlUrl` sets `target: "custom"` implicitly (remote control server). + - In sandboxed sessions, `target: "host"` requires `agents.defaults.sandbox.browser.allowHostControl=true`. + - If `target` is omitted: sandboxed sessions default to `sandbox`, non-sandbox sessions default to `host`. + +This keeps the agent deterministic and avoids brittle selectors. diff --git a/src/agents/clawdbot-tools.ts b/src/agents/clawdbot-tools.ts index d59e8793f..7b5a9714a 100644 --- a/src/agents/clawdbot-tools.ts +++ b/src/agents/clawdbot-tools.ts @@ -17,6 +17,7 @@ import { createSessionsSpawnTool } from "./tools/sessions-spawn-tool.js"; export function createClawdbotTools(options?: { browserControlUrl?: string; + allowHostBrowserControl?: boolean; agentSessionKey?: string; agentProvider?: GatewayMessageProvider; agentAccountId?: string; @@ -37,7 +38,10 @@ export function createClawdbotTools(options?: { agentDir: options?.agentDir, }); return [ - createBrowserTool({ defaultControlUrl: options?.browserControlUrl }), + createBrowserTool({ + defaultControlUrl: options?.browserControlUrl, + allowHostControl: options?.allowHostBrowserControl, + }), createCanvasTool(), createNodesTool(), createCronTool(), diff --git a/src/agents/pi-embedded-runner.test.ts b/src/agents/pi-embedded-runner.test.ts index 705ad8dcf..6c85bff4b 100644 --- a/src/agents/pi-embedded-runner.test.ts +++ b/src/agents/pi-embedded-runner.test.ts @@ -45,6 +45,7 @@ describe("buildEmbeddedSandboxInfo", () => { allow: ["bash"], deny: ["browser"], }, + browserAllowHostControl: true, browser: { controlUrl: "http://localhost:9222", noVncUrl: "http://localhost:6080", @@ -59,6 +60,7 @@ describe("buildEmbeddedSandboxInfo", () => { agentWorkspaceMount: undefined, browserControlUrl: "http://localhost:9222", browserNoVncUrl: "http://localhost:6080", + hostBrowserAllowed: true, }); }); @@ -86,6 +88,7 @@ describe("buildEmbeddedSandboxInfo", () => { allow: ["bash"], deny: ["browser"], }, + browserAllowHostControl: false, } satisfies SandboxContext; expect( @@ -99,6 +102,7 @@ describe("buildEmbeddedSandboxInfo", () => { workspaceDir: "/tmp/clawdbot-sandbox", workspaceAccess: "none", agentWorkspaceMount: undefined, + hostBrowserAllowed: false, elevated: { allowed: true, defaultLevel: "on" }, }); }); diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index 35af105c9..6b52d5e12 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -479,6 +479,7 @@ type EmbeddedSandboxInfo = { agentWorkspaceMount?: string; browserControlUrl?: string; browserNoVncUrl?: string; + hostBrowserAllowed?: boolean; elevated?: { allowed: boolean; defaultLevel: "on" | "off"; @@ -570,6 +571,7 @@ export function buildEmbeddedSandboxInfo( sandbox.workspaceAccess === "ro" ? "/agent" : undefined, browserControlUrl: sandbox.browser?.controlUrl, browserNoVncUrl: sandbox.browser?.noVncUrl, + hostBrowserAllowed: sandbox.browserAllowHostControl, ...(elevatedAllowed ? { elevated: { diff --git a/src/agents/pi-tools-agent-config.test.ts b/src/agents/pi-tools-agent-config.test.ts index 4ed26ff96..485cc0be1 100644 --- a/src/agents/pi-tools-agent-config.test.ts +++ b/src/agents/pi-tools-agent-config.test.ts @@ -240,6 +240,7 @@ describe("Agent-specific tool filtering", () => { allow: ["read", "write", "bash"], deny: [], }, + browserAllowHostControl: false, }, }); diff --git a/src/agents/pi-tools.test.ts b/src/agents/pi-tools.test.ts index d89e2051b..0f2470b50 100644 --- a/src/agents/pi-tools.test.ts +++ b/src/agents/pi-tools.test.ts @@ -25,6 +25,7 @@ describe("createClawdbotCodingTools", () => { required?: string[]; }; expect(parameters.properties?.action).toBeDefined(); + expect(parameters.properties?.target).toBeDefined(); expect(parameters.properties?.controlUrl).toBeDefined(); expect(parameters.properties?.targetUrl).toBeDefined(); expect(parameters.properties?.request).toBeDefined(); @@ -326,6 +327,7 @@ describe("createClawdbotCodingTools", () => { allow: ["bash"], deny: ["browser"], }, + browserAllowHostControl: false, }; const tools = createClawdbotCodingTools({ sandbox }); expect(tools.some((tool) => tool.name === "bash")).toBe(true); @@ -357,6 +359,7 @@ describe("createClawdbotCodingTools", () => { allow: ["read", "write", "edit"], deny: [], }, + browserAllowHostControl: false, }; const tools = createClawdbotCodingTools({ sandbox }); expect(tools.some((tool) => tool.name === "read")).toBe(true); diff --git a/src/agents/pi-tools.ts b/src/agents/pi-tools.ts index bfc9af9fc..f8f7bee82 100644 --- a/src/agents/pi-tools.ts +++ b/src/agents/pi-tools.ts @@ -626,6 +626,9 @@ export function createClawdbotCodingTools(options?: { createWhatsAppLoginTool(), ...createClawdbotTools({ browserControlUrl: sandbox?.browser?.controlUrl, + allowHostBrowserControl: sandbox + ? sandbox.browserAllowHostControl + : true, agentSessionKey: options?.sessionKey, agentProvider: resolveGatewayMessageProvider(options?.messageProvider), agentAccountId: options?.agentAccountId, diff --git a/src/agents/pi-tools.workspace-paths.test.ts b/src/agents/pi-tools.workspace-paths.test.ts index e34977843..c27b3df53 100644 --- a/src/agents/pi-tools.workspace-paths.test.ts +++ b/src/agents/pi-tools.workspace-paths.test.ts @@ -186,6 +186,7 @@ describe("sandboxed workspace paths", () => { env: { LANG: "C.UTF-8" }, }, tools: { allow: [], deny: [] }, + browserAllowHostControl: false, }; const testFile = "sandbox.txt"; diff --git a/src/agents/sandbox.ts b/src/agents/sandbox.ts index c6aad1067..f0e49205b 100644 --- a/src/agents/sandbox.ts +++ b/src/agents/sandbox.ts @@ -78,6 +78,7 @@ export type SandboxBrowserConfig = { noVncPort: number; headless: boolean; enableNoVnc: boolean; + allowHostControl: boolean; autoStart: boolean; autoStartTimeoutMs: number; }; @@ -138,6 +139,7 @@ export type SandboxContext = { containerWorkdir: string; docker: SandboxDockerConfig; tools: SandboxToolPolicy; + browserAllowHostControl: boolean; browser?: SandboxBrowserContext; }; @@ -333,6 +335,10 @@ export function resolveSandboxBrowserConfig(params: { headless: agentBrowser?.headless ?? globalBrowser?.headless ?? false, enableNoVnc: agentBrowser?.enableNoVnc ?? globalBrowser?.enableNoVnc ?? true, + allowHostControl: + agentBrowser?.allowHostControl ?? + globalBrowser?.allowHostControl ?? + false, autoStart: agentBrowser?.autoStart ?? globalBrowser?.autoStart ?? true, autoStartTimeoutMs: agentBrowser?.autoStartTimeoutMs ?? @@ -1324,6 +1330,7 @@ export async function resolveSandboxContext(params: { containerWorkdir: cfg.docker.workdir, docker: cfg.docker, tools: cfg.tools, + browserAllowHostControl: cfg.browser.allowHostControl, browser: browser ?? undefined, }; } diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts index 00fcb3075..826dc788e 100644 --- a/src/agents/system-prompt.ts +++ b/src/agents/system-prompt.ts @@ -32,6 +32,7 @@ export function buildAgentSystemPrompt(params: { agentWorkspaceMount?: string; browserControlUrl?: string; browserNoVncUrl?: string; + hostBrowserAllowed?: boolean; elevated?: { allowed: boolean; defaultLevel: "on" | "off"; @@ -243,6 +244,11 @@ export function buildAgentSystemPrompt(params: { params.sandboxInfo.browserNoVncUrl ? `Sandbox browser observer (noVNC): ${params.sandboxInfo.browserNoVncUrl}` : "", + params.sandboxInfo.hostBrowserAllowed === true + ? "Host browser control: allowed." + : params.sandboxInfo.hostBrowserAllowed === false + ? "Host browser control: blocked." + : "", params.sandboxInfo.elevated?.allowed ? "Elevated bash is available for this session." : "", diff --git a/src/agents/tools/browser-tool.ts b/src/agents/tools/browser-tool.ts index 6e997a1ae..3cb561416 100644 --- a/src/agents/tools/browser-tool.ts +++ b/src/agents/tools/browser-tool.ts @@ -105,6 +105,13 @@ const BrowserToolSchema = Type.Object({ Type.Literal("dialog"), Type.Literal("act"), ]), + target: Type.Optional( + Type.Union([ + Type.Literal("sandbox"), + Type.Literal("host"), + Type.Literal("custom"), + ]), + ), profile: Type.Optional(Type.String()), controlUrl: Type.Optional(Type.String()), targetUrl: Type.Optional(Type.String()), @@ -124,20 +131,60 @@ const BrowserToolSchema = Type.Object({ request: Type.Optional(BrowserActSchema), }); -function resolveBrowserBaseUrl(controlUrl?: string) { +function resolveBrowserBaseUrl(params: { + target?: "sandbox" | "host" | "custom"; + controlUrl?: string; + defaultControlUrl?: string; + allowHostControl?: boolean; +}) { const cfg = loadConfig(); const resolved = resolveBrowserConfig(cfg.browser); - if (!resolved.enabled && !controlUrl?.trim()) { + const normalizedControlUrl = params.controlUrl?.trim() ?? ""; + const normalizedDefault = params.defaultControlUrl?.trim() ?? ""; + const target = + params.target ?? + (normalizedControlUrl + ? "custom" + : normalizedDefault + ? "sandbox" + : "host"); + + if (target !== "custom" && params.target && normalizedControlUrl) { + throw new Error( + 'controlUrl is only supported with target="custom".', + ); + } + + if (target === "custom") { + if (!normalizedControlUrl) { + throw new Error('Custom browser target requires controlUrl.'); + } + return normalizedControlUrl.replace(/\/$/, ""); + } + + if (target === "sandbox") { + if (!normalizedDefault) { + throw new Error( + 'Sandbox browser is unavailable. Enable agents.defaults.sandbox.browser.enabled or use target="host" if allowed.', + ); + } + return normalizedDefault.replace(/\/$/, ""); + } + + if (params.allowHostControl === false) { + throw new Error("Host browser control is disabled by sandbox policy."); + } + if (!resolved.enabled) { throw new Error( "Browser control is disabled. Set browser.enabled=true in ~/.clawdbot/clawdbot.json.", ); } - const url = controlUrl?.trim() ? controlUrl.trim() : resolved.controlUrl; - return url.replace(/\/$/, ""); + return resolved.controlUrl.replace(/\/$/, ""); } export function createBrowserTool(opts?: { defaultControlUrl?: string; + allowHostControl?: boolean; }): AnyAgentTool { return { label: "Browser", @@ -149,10 +196,18 @@ export function createBrowserTool(opts?: { const params = args as Record; const action = readStringParam(params, "action", { required: true }); const controlUrl = readStringParam(params, "controlUrl"); + const target = readStringParam(params, "target") as + | "sandbox" + | "host" + | "custom" + | undefined; const profile = readStringParam(params, "profile"); - const baseUrl = resolveBrowserBaseUrl( - controlUrl ?? opts?.defaultControlUrl, - ); + const baseUrl = resolveBrowserBaseUrl({ + target, + controlUrl, + defaultControlUrl: opts?.defaultControlUrl, + allowHostControl: opts?.allowHostControl, + }); switch (action) { case "status": diff --git a/src/config/types.ts b/src/config/types.ts index 22a317500..aabe7df39 100644 --- a/src/config/types.ts +++ b/src/config/types.ts @@ -860,6 +860,11 @@ export type SandboxBrowserSettings = { noVncPort?: number; headless?: boolean; enableNoVnc?: boolean; + /** + * Allow sandboxed sessions to target the host browser control server. + * Default: false. + */ + allowHostControl?: boolean; /** * When true (default), sandboxed browser control will try to start/reattach to * the sandbox browser container when a tool call needs it. diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index dfbd97c43..081c664d0 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -724,6 +724,7 @@ const SandboxBrowserSchema = z noVncPort: z.number().int().positive().optional(), headless: z.boolean().optional(), enableNoVnc: z.boolean().optional(), + allowHostControl: z.boolean().optional(), autoStart: z.boolean().optional(), autoStartTimeoutMs: z.number().int().positive().optional(), })