From f9170c5d029eeeac8557141afcb3979a8027151c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 15 Jan 2026 09:36:48 +0000 Subject: [PATCH] fix(browser): keep tab stable across snapshot and act --- src/agents/tools/browser-tool.ts | 1 + ...-tab-available.prefers-last-target.test.ts | 100 ++++++++++++++++++ src/browser/server-context.ts | 42 ++++++-- src/browser/server-context.types.ts | 2 + 4 files changed, 134 insertions(+), 11 deletions(-) create mode 100644 src/browser/server-context.ensure-tab-available.prefers-last-target.test.ts diff --git a/src/agents/tools/browser-tool.ts b/src/agents/tools/browser-tool.ts index 4ce72285c..0c5ad095f 100644 --- a/src/agents/tools/browser-tool.ts +++ b/src/agents/tools/browser-tool.ts @@ -127,6 +127,7 @@ export function createBrowserTool(opts?: { "Control the browser via Clawdbot's browser control server (status/start/stop/profiles/tabs/open/snapshot/screenshot/actions).", 'Profiles: use profile="chrome" for Chrome extension relay takeover (your existing Chrome tabs). Use profile="clawd" for the isolated clawd-managed browser.', "Chrome extension relay needs an attached tab: user must click the Clawdbot Browser Relay toolbar icon on the tab (badge ON). If no tab is connected, ask them to attach it.", + "When using refs from snapshot (e.g. e12), keep the same tab: prefer passing targetId from the snapshot response into subsequent actions (act/click/type/etc).", "Use snapshot+act for UI automation. Avoid act:wait by default; use only in exceptional cases when no reliable UI state exists.", `target selects browser location (sandbox|host|custom). Default: ${targetDefault}.`, "controlUrl implies target=custom (remote control server).", diff --git a/src/browser/server-context.ensure-tab-available.prefers-last-target.test.ts b/src/browser/server-context.ensure-tab-available.prefers-last-target.test.ts new file mode 100644 index 000000000..4b83074af --- /dev/null +++ b/src/browser/server-context.ensure-tab-available.prefers-last-target.test.ts @@ -0,0 +1,100 @@ +import { describe, expect, it, vi } from "vitest"; + +import type { BrowserServerState } from "./server-context.js"; +import { createBrowserRouteContext } from "./server-context.js"; + +vi.mock("./chrome.js", () => ({ + isChromeCdpReady: vi.fn(async () => true), + isChromeReachable: vi.fn(async () => true), + launchClawdChrome: vi.fn(async () => { + throw new Error("unexpected launch"); + }), + resolveClawdUserDataDir: vi.fn(() => "/tmp/clawd"), + stopClawdChrome: vi.fn(async () => {}), +})); + +describe("browser server-context ensureTabAvailable", () => { + it("sticks to the last selected target when targetId is omitted", async () => { + const fetchMock = vi.fn(); + // 1st call (snapshot): stable ordering A then B (twice) + // 2nd call (act): reversed ordering B then A (twice) + const responses = [ + [ + { id: "A", type: "page", url: "https://a.example", webSocketDebuggerUrl: "ws://x/a" }, + { id: "B", type: "page", url: "https://b.example", webSocketDebuggerUrl: "ws://x/b" }, + ], + [ + { id: "A", type: "page", url: "https://a.example", webSocketDebuggerUrl: "ws://x/a" }, + { id: "B", type: "page", url: "https://b.example", webSocketDebuggerUrl: "ws://x/b" }, + ], + [ + { id: "B", type: "page", url: "https://b.example", webSocketDebuggerUrl: "ws://x/b" }, + { id: "A", type: "page", url: "https://a.example", webSocketDebuggerUrl: "ws://x/a" }, + ], + [ + { id: "B", type: "page", url: "https://b.example", webSocketDebuggerUrl: "ws://x/b" }, + { id: "A", type: "page", url: "https://a.example", webSocketDebuggerUrl: "ws://x/a" }, + ], + ]; + + fetchMock.mockImplementation(async (url: unknown) => { + const u = String(url); + if (!u.includes("/json/list")) { + throw new Error(`unexpected fetch: ${u}`); + } + const next = responses.shift(); + if (!next) { + throw new Error("no more responses"); + } + return { + ok: true, + json: async () => next, + } as unknown as Response; + }); + + // @ts-expect-error test override + global.fetch = fetchMock; + + const state: BrowserServerState = { + // unused in these tests + // biome-ignore lint/suspicious/noExplicitAny: test stub + server: null as any, + port: 0, + resolved: { + enabled: true, + controlUrl: "http://127.0.0.1:18791", + controlHost: "127.0.0.1", + controlPort: 18791, + cdpProtocol: "http", + cdpHost: "127.0.0.1", + cdpIsLoopback: true, + color: "#FF4500", + headless: true, + noSandbox: false, + attachOnly: false, + defaultProfile: "chrome", + profiles: { + chrome: { + driver: "extension", + cdpUrl: "http://127.0.0.1:18792", + cdpPort: 18792, + color: "#00AA00", + }, + clawd: { cdpPort: 18800, color: "#FF4500" }, + }, + }, + profiles: new Map(), + }; + + const ctx = createBrowserRouteContext({ + getState: () => state, + }); + + const chrome = ctx.forProfile("chrome"); + const first = await chrome.ensureTabAvailable(); + expect(first.targetId).toBe("A"); + const second = await chrome.ensureTabAvailable(); + expect(second.targetId).toBe("A"); + }); +}); + diff --git a/src/browser/server-context.ts b/src/browser/server-context.ts index 9fa6094f9..7ab1389c1 100644 --- a/src/browser/server-context.ts +++ b/src/browser/server-context.ts @@ -86,7 +86,7 @@ function createProfileContext( const current = state(); let profileState = current.profiles.get(profile.name); if (!profileState) { - profileState = { profile, running: null }; + profileState = { profile, running: null, lastTargetId: null }; current.profiles.set(profile.name, profileState); } return profileState; @@ -158,6 +158,8 @@ function createProfileContext( }); if (!created.id) throw new Error("Failed to open tab (missing id)"); + const profileState = getProfileState(); + profileState.lastTargetId = created.id; return { targetId: created.id, title: created.title ?? "", @@ -276,27 +278,43 @@ function createProfileContext( const ensureTabAvailable = async (targetId?: string): Promise => { await ensureBrowserAvailable(); + const profileState = getProfileState(); const tabs1 = await listTabs(); if (tabs1.length === 0) { + if (profile.driver === "extension") { + throw new Error("tab not found"); + } await openTab("about:blank"); } const tabs = await listTabs(); - const chosen = targetId - ? (() => { - const resolved = resolveTargetIdFromTabs(targetId, tabs); - if (!resolved.ok) { - if (resolved.reason === "ambiguous") return "AMBIGUOUS" as const; - return null; - } - return tabs.find((t) => t.targetId === resolved.targetId) ?? null; - })() - : (tabs.at(0) ?? null); + const candidates = tabs.filter((t) => Boolean(t.wsUrl)); + + const resolveById = (raw: string) => { + const resolved = resolveTargetIdFromTabs(raw, candidates); + if (!resolved.ok) { + if (resolved.reason === "ambiguous") return "AMBIGUOUS" as const; + return null; + } + return candidates.find((t) => t.targetId === resolved.targetId) ?? null; + }; + + const pickDefault = () => { + const last = profileState.lastTargetId?.trim() || ""; + const lastResolved = last ? resolveById(last) : null; + if (lastResolved && lastResolved !== "AMBIGUOUS") return lastResolved; + // Prefer a real page tab first (avoid service workers/background targets). + const page = candidates.find((t) => (t.type ?? "page") === "page"); + return page ?? (candidates.at(0) ?? null); + }; + + const chosen = targetId ? resolveById(targetId) : pickDefault(); if (chosen === "AMBIGUOUS") { throw new Error("ambiguous target id prefix"); } if (!chosen?.wsUrl) throw new Error("tab not found"); + profileState.lastTargetId = chosen.targetId; return chosen; }; @@ -311,6 +329,8 @@ function createProfileContext( throw new Error("tab not found"); } await fetchOk(`${base}/json/activate/${resolved.targetId}`); + const profileState = getProfileState(); + profileState.lastTargetId = resolved.targetId; }; const closeTab = async (targetId: string): Promise => { diff --git a/src/browser/server-context.types.ts b/src/browser/server-context.types.ts index 555427afe..6532010e9 100644 --- a/src/browser/server-context.types.ts +++ b/src/browser/server-context.types.ts @@ -12,6 +12,8 @@ export type { BrowserTab }; export type ProfileRuntimeState = { profile: ResolvedBrowserProfile; running: RunningChrome | null; + /** Sticky tab selection when callers omit targetId (keeps snapshot+act consistent). */ + lastTargetId?: string | null; }; export type BrowserServerState = {