feat: add browser snapshot modes
This commit is contained in:
@@ -34,8 +34,7 @@ extension AttributedString {
|
||||
var ranges: [Range<AttributedString.Index>] = []
|
||||
for wordRange in wordRanges {
|
||||
if let lastRange = ranges.last,
|
||||
self[lastRange].characters.count + self[wordRange].characters.count <= maxLength
|
||||
{
|
||||
self[lastRange].characters.count + self[wordRange].characters.count <= maxLength {
|
||||
ranges[ranges.count - 1] = lastRange.lowerBound..<wordRange.upperBound
|
||||
} else {
|
||||
ranges.append(wordRange)
|
||||
|
||||
@@ -13,8 +13,7 @@ public actor TranscriptsStore {
|
||||
try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
|
||||
fileURL = dir.appendingPathComponent("transcripts.log")
|
||||
if let data = try? Data(contentsOf: fileURL),
|
||||
let text = String(data: data, encoding: .utf8)
|
||||
{
|
||||
let text = String(data: data, encoding: .utf8) {
|
||||
entries = text.split(separator: "\n").map(String.init).suffix(limit)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,8 +24,7 @@ public struct WakeWordGateConfig: Sendable, Equatable {
|
||||
public init(
|
||||
triggers: [String],
|
||||
minPostTriggerGap: TimeInterval = 0.45,
|
||||
minCommandLength: Int = 1)
|
||||
{
|
||||
minCommandLength: Int = 1) {
|
||||
self.triggers = triggers
|
||||
self.minPostTriggerGap = minPostTriggerGap
|
||||
self.minCommandLength = minCommandLength
|
||||
@@ -57,6 +56,12 @@ public enum WakeWordGate {
|
||||
let tokens: [String]
|
||||
}
|
||||
|
||||
private struct MatchCandidate {
|
||||
let index: Int
|
||||
let triggerEnd: TimeInterval
|
||||
let gap: TimeInterval
|
||||
}
|
||||
|
||||
public static func match(
|
||||
transcript: String,
|
||||
segments: [WakeWordSegment],
|
||||
@@ -68,7 +73,7 @@ public enum WakeWordGate {
|
||||
let tokens = normalizeSegments(segments)
|
||||
guard !tokens.isEmpty else { return nil }
|
||||
|
||||
var best: (index: Int, triggerEnd: TimeInterval, gap: TimeInterval)?
|
||||
var best: MatchCandidate?
|
||||
|
||||
for trigger in triggerTokens {
|
||||
let count = trigger.tokens.count
|
||||
@@ -84,7 +89,7 @@ public enum WakeWordGate {
|
||||
|
||||
if let best, i <= best.index { continue }
|
||||
|
||||
best = (i, triggerEnd, gap)
|
||||
best = MatchCandidate(index: i, triggerEnd: triggerEnd, gap: gap)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ enum CLIRegistry {
|
||||
subcommands: [
|
||||
descriptor(for: ServiceInstall.self),
|
||||
descriptor(for: ServiceUninstall.self),
|
||||
descriptor(for: ServiceStatus.self),
|
||||
descriptor(for: ServiceStatus.self)
|
||||
])
|
||||
let doctorDesc = descriptor(for: DoctorCommand.self)
|
||||
let setupDesc = descriptor(for: SetupCommand.self)
|
||||
@@ -54,7 +54,7 @@ enum CLIRegistry {
|
||||
startDesc,
|
||||
stopDesc,
|
||||
restartDesc,
|
||||
statusDesc,
|
||||
statusDesc
|
||||
])
|
||||
return [root]
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ private enum LaunchdHelper {
|
||||
"Label": label,
|
||||
"ProgramArguments": [executable, "serve"],
|
||||
"RunAtLoad": true,
|
||||
"KeepAlive": true,
|
||||
"KeepAlive": true
|
||||
]
|
||||
let data = try PropertyListSerialization.data(fromPropertyList: plist, format: .xml, options: 0)
|
||||
try data.write(to: plistURL)
|
||||
|
||||
@@ -25,78 +25,123 @@ private func dispatch(invocation: CommandInvocation) async throws {
|
||||
|
||||
switch first {
|
||||
case "swabble":
|
||||
guard path.count >= 2 else { throw CommanderProgramError.missingSubcommand(command: "swabble") }
|
||||
let sub = path[1]
|
||||
switch sub {
|
||||
case "serve":
|
||||
var cmd = ServeCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
case "transcribe":
|
||||
var cmd = TranscribeCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
case "test-hook":
|
||||
var cmd = TestHookCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
case "mic":
|
||||
guard path.count >= 3 else { throw CommanderProgramError.missingSubcommand(command: "mic") }
|
||||
let micSub = path[2]
|
||||
if micSub == "list" {
|
||||
var cmd = MicList(parsed: parsed)
|
||||
try await cmd.run()
|
||||
} else if micSub == "set" {
|
||||
var cmd = MicSet(parsed: parsed)
|
||||
try await cmd.run()
|
||||
} else {
|
||||
throw CommanderProgramError.unknownSubcommand(command: "mic", name: micSub)
|
||||
}
|
||||
case "service":
|
||||
guard path.count >= 3 else { throw CommanderProgramError.missingSubcommand(command: "service") }
|
||||
let svcSub = path[2]
|
||||
switch svcSub {
|
||||
case "install":
|
||||
var cmd = ServiceInstall()
|
||||
try await cmd.run()
|
||||
case "uninstall":
|
||||
var cmd = ServiceUninstall()
|
||||
try await cmd.run()
|
||||
case "status":
|
||||
var cmd = ServiceStatus()
|
||||
try await cmd.run()
|
||||
default:
|
||||
throw CommanderProgramError.unknownSubcommand(command: "service", name: svcSub)
|
||||
}
|
||||
case "doctor":
|
||||
var cmd = DoctorCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
case "setup":
|
||||
var cmd = SetupCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
case "health":
|
||||
var cmd = HealthCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
case "tail-log":
|
||||
var cmd = TailLogCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
case "start":
|
||||
var cmd = StartCommand()
|
||||
try await cmd.run()
|
||||
case "stop":
|
||||
var cmd = StopCommand()
|
||||
try await cmd.run()
|
||||
case "restart":
|
||||
var cmd = RestartCommand()
|
||||
try await cmd.run()
|
||||
case "status":
|
||||
var cmd = StatusCommand()
|
||||
try await cmd.run()
|
||||
default:
|
||||
throw CommanderProgramError.unknownSubcommand(command: "swabble", name: sub)
|
||||
}
|
||||
try await dispatchSwabble(parsed: parsed, path: path)
|
||||
default:
|
||||
throw CommanderProgramError.unknownCommand(first)
|
||||
}
|
||||
}
|
||||
|
||||
@available(macOS 26.0, *)
|
||||
@MainActor
|
||||
private func dispatchSwabble(parsed: ParsedValues, path: [String]) async throws {
|
||||
let sub = try subcommand(path, index: 1, command: "swabble")
|
||||
switch sub {
|
||||
case "mic":
|
||||
try await dispatchMic(parsed: parsed, path: path)
|
||||
case "service":
|
||||
try await dispatchService(path: path)
|
||||
default:
|
||||
let handlers = swabbleHandlers(parsed: parsed)
|
||||
guard let handler = handlers[sub] else {
|
||||
throw CommanderProgramError.unknownSubcommand(command: "swabble", name: sub)
|
||||
}
|
||||
try await handler()
|
||||
}
|
||||
}
|
||||
|
||||
@available(macOS 26.0, *)
|
||||
@MainActor
|
||||
private func swabbleHandlers(parsed: ParsedValues) -> [String: () async throws -> Void] {
|
||||
[
|
||||
"serve": {
|
||||
var cmd = ServeCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
},
|
||||
"transcribe": {
|
||||
var cmd = TranscribeCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
},
|
||||
"test-hook": {
|
||||
var cmd = TestHookCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
},
|
||||
"doctor": {
|
||||
var cmd = DoctorCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
},
|
||||
"setup": {
|
||||
var cmd = SetupCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
},
|
||||
"health": {
|
||||
var cmd = HealthCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
},
|
||||
"tail-log": {
|
||||
var cmd = TailLogCommand(parsed: parsed)
|
||||
try await cmd.run()
|
||||
},
|
||||
"start": {
|
||||
var cmd = StartCommand()
|
||||
try await cmd.run()
|
||||
},
|
||||
"stop": {
|
||||
var cmd = StopCommand()
|
||||
try await cmd.run()
|
||||
},
|
||||
"restart": {
|
||||
var cmd = RestartCommand()
|
||||
try await cmd.run()
|
||||
},
|
||||
"status": {
|
||||
var cmd = StatusCommand()
|
||||
try await cmd.run()
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@available(macOS 26.0, *)
|
||||
@MainActor
|
||||
private func dispatchMic(parsed: ParsedValues, path: [String]) async throws {
|
||||
let micSub = try subcommand(path, index: 2, command: "mic")
|
||||
switch micSub {
|
||||
case "list":
|
||||
var cmd = MicList(parsed: parsed)
|
||||
try await cmd.run()
|
||||
case "set":
|
||||
var cmd = MicSet(parsed: parsed)
|
||||
try await cmd.run()
|
||||
default:
|
||||
throw CommanderProgramError.unknownSubcommand(command: "mic", name: micSub)
|
||||
}
|
||||
}
|
||||
|
||||
@available(macOS 26.0, *)
|
||||
@MainActor
|
||||
private func dispatchService(path: [String]) async throws {
|
||||
let svcSub = try subcommand(path, index: 2, command: "service")
|
||||
switch svcSub {
|
||||
case "install":
|
||||
var cmd = ServiceInstall()
|
||||
try await cmd.run()
|
||||
case "uninstall":
|
||||
var cmd = ServiceUninstall()
|
||||
try await cmd.run()
|
||||
case "status":
|
||||
var cmd = ServiceStatus()
|
||||
try await cmd.run()
|
||||
default:
|
||||
throw CommanderProgramError.unknownSubcommand(command: "service", name: svcSub)
|
||||
}
|
||||
}
|
||||
|
||||
private func subcommand(_ path: [String], index: Int, command: String) throws -> String {
|
||||
guard path.count > index else {
|
||||
throw CommanderProgramError.missingSubcommand(command: command)
|
||||
}
|
||||
return path[index]
|
||||
}
|
||||
|
||||
if #available(macOS 26.0, *) {
|
||||
let exitCode = await runCLI()
|
||||
exit(exitCode)
|
||||
|
||||
@@ -4,22 +4,22 @@ import Testing
|
||||
|
||||
@Suite(.serialized)
|
||||
@MainActor
|
||||
struct ConnectionsSettingsSmokeTests {
|
||||
@Test func connectionsSettingsBuildsBodyWithSnapshot() {
|
||||
let store = ConnectionsStore(isPreview: true)
|
||||
store.snapshot = ChannelsStatusSnapshot(
|
||||
ts: 1_700_000_000_000,
|
||||
channelOrder: ["whatsapp", "telegram", "signal", "imessage"],
|
||||
channelLabels: [
|
||||
"whatsapp": "WhatsApp",
|
||||
"telegram": "Telegram",
|
||||
"signal": "Signal",
|
||||
"imessage": "iMessage",
|
||||
],
|
||||
channels: [
|
||||
"whatsapp": AnyCodable([
|
||||
"configured": true,
|
||||
"linked": true,
|
||||
struct ConnectionsSettingsSmokeTests {
|
||||
@Test func connectionsSettingsBuildsBodyWithSnapshot() {
|
||||
let store = ConnectionsStore(isPreview: true)
|
||||
store.snapshot = ChannelsStatusSnapshot(
|
||||
ts: 1_700_000_000_000,
|
||||
channelOrder: ["whatsapp", "telegram", "signal", "imessage"],
|
||||
channelLabels: [
|
||||
"whatsapp": "WhatsApp",
|
||||
"telegram": "Telegram",
|
||||
"signal": "Signal",
|
||||
"imessage": "iMessage",
|
||||
],
|
||||
channels: [
|
||||
"whatsapp": AnyCodable([
|
||||
"configured": true,
|
||||
"linked": true,
|
||||
"authAgeMs": 86_400_000,
|
||||
"self": ["e164": "+15551234567"],
|
||||
"running": true,
|
||||
@@ -70,13 +70,13 @@ import Testing
|
||||
"lastError": "not configured",
|
||||
"probe": ["ok": false, "error": "imsg not found (imsg)"],
|
||||
"lastProbeAt": 1_700_000_050_000,
|
||||
]),
|
||||
],
|
||||
channelAccounts: [:],
|
||||
channelDefaultAccountId: [
|
||||
"whatsapp": "default",
|
||||
"telegram": "default",
|
||||
"signal": "default",
|
||||
]),
|
||||
],
|
||||
channelAccounts: [:],
|
||||
channelDefaultAccountId: [
|
||||
"whatsapp": "default",
|
||||
"telegram": "default",
|
||||
"signal": "default",
|
||||
"imessage": "default",
|
||||
])
|
||||
|
||||
@@ -93,23 +93,23 @@ import Testing
|
||||
|
||||
let view = ConnectionsSettings(store: store)
|
||||
_ = view.body
|
||||
}
|
||||
}
|
||||
|
||||
@Test func connectionsSettingsBuildsBodyWithoutSnapshot() {
|
||||
let store = ConnectionsStore(isPreview: true)
|
||||
store.snapshot = ChannelsStatusSnapshot(
|
||||
ts: 1_700_000_000_000,
|
||||
channelOrder: ["whatsapp", "telegram", "signal", "imessage"],
|
||||
channelLabels: [
|
||||
"whatsapp": "WhatsApp",
|
||||
"telegram": "Telegram",
|
||||
"signal": "Signal",
|
||||
"imessage": "iMessage",
|
||||
],
|
||||
channels: [
|
||||
"whatsapp": AnyCodable([
|
||||
"configured": false,
|
||||
"linked": false,
|
||||
@Test func connectionsSettingsBuildsBodyWithoutSnapshot() {
|
||||
let store = ConnectionsStore(isPreview: true)
|
||||
store.snapshot = ChannelsStatusSnapshot(
|
||||
ts: 1_700_000_000_000,
|
||||
channelOrder: ["whatsapp", "telegram", "signal", "imessage"],
|
||||
channelLabels: [
|
||||
"whatsapp": "WhatsApp",
|
||||
"telegram": "Telegram",
|
||||
"signal": "Signal",
|
||||
"imessage": "iMessage",
|
||||
],
|
||||
channels: [
|
||||
"whatsapp": AnyCodable([
|
||||
"configured": false,
|
||||
"linked": false,
|
||||
"running": false,
|
||||
"connected": false,
|
||||
"reconnectAttempts": 0,
|
||||
@@ -146,13 +146,13 @@ import Testing
|
||||
"cliPath": "imsg",
|
||||
"probe": ["ok": false, "error": "imsg not found (imsg)"],
|
||||
"lastProbeAt": 1_700_000_200_000,
|
||||
]),
|
||||
],
|
||||
channelAccounts: [:],
|
||||
channelDefaultAccountId: [
|
||||
"whatsapp": "default",
|
||||
"telegram": "default",
|
||||
"signal": "default",
|
||||
]),
|
||||
],
|
||||
channelAccounts: [:],
|
||||
channelDefaultAccountId: [
|
||||
"whatsapp": "default",
|
||||
"telegram": "default",
|
||||
"signal": "default",
|
||||
"imessage": "default",
|
||||
])
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@ import Foundation
|
||||
import Testing
|
||||
@testable import Clawdbot
|
||||
|
||||
@Suite struct HealthDecodeTests {
|
||||
private let sampleJSON: String = // minimal but complete payload
|
||||
"""
|
||||
@Suite struct HealthDecodeTests {
|
||||
private let sampleJSON: String = // minimal but complete payload
|
||||
"""
|
||||
{"ts":1733622000,"durationMs":420,"channels":{"whatsapp":{"linked":true,"authAgeMs":120000},"telegram":{"configured":true,"probe":{"ok":true,"elapsedMs":800}}},"channelOrder":["whatsapp","telegram"],"heartbeatSeconds":60,"sessions":{"path":"/tmp/sessions.json","count":1,"recent":[{"key":"abc","updatedAt":1733621900,"age":120000}]}}
|
||||
"""
|
||||
|
||||
|
||||
@@ -204,6 +204,8 @@ Inspection:
|
||||
- `clawdbot browser snapshot`
|
||||
- `clawdbot browser snapshot --format aria --limit 200`
|
||||
- `clawdbot browser snapshot --interactive --compact --depth 6`
|
||||
- `clawdbot browser snapshot --efficient`
|
||||
- `clawdbot browser snapshot --labels`
|
||||
- `clawdbot browser snapshot --selector "#main" --interactive`
|
||||
- `clawdbot browser snapshot --frame "iframe#main" --interactive`
|
||||
- `clawdbot browser console --level error`
|
||||
@@ -260,9 +262,11 @@ Notes:
|
||||
- `snapshot`:
|
||||
- `--format ai` (default when Playwright is installed): returns an AI snapshot with numeric refs (`aria-ref="<n>"`).
|
||||
- `--format aria`: returns the accessibility tree (no refs; inspection only).
|
||||
- `--efficient` (or `--mode efficient`): compact role snapshot preset (interactive + compact + depth + lower maxChars).
|
||||
- Role snapshot options (`--interactive`, `--compact`, `--depth`, `--selector`) force a role-based snapshot with refs like `ref=e12`.
|
||||
- `--frame "<iframe selector>"` scopes role snapshots to an iframe (pairs with role refs like `e12`).
|
||||
- `--interactive` outputs a flat, easy-to-pick list of interactive elements (best for driving actions).
|
||||
- `--labels` adds a viewport-only screenshot with overlayed ref labels (prints `MEDIA:<path>`).
|
||||
- `click`/`type`/etc require a `ref` from `snapshot` (either numeric `12` or role ref `e12`).
|
||||
CSS selectors are intentionally not supported for actions.
|
||||
|
||||
@@ -279,6 +283,7 @@ Clawdbot supports two “snapshot” styles:
|
||||
- Output: a role-based list/tree with `[ref=e12]` (and optional `[nth=1]`).
|
||||
- Actions: `clawdbot browser click e12`, `clawdbot browser highlight e12`.
|
||||
- Internally, the ref is resolved via `getByRole(...)` (plus `nth()` for duplicates).
|
||||
- Add `--labels` to include a viewport screenshot with overlayed `e12` labels.
|
||||
|
||||
Ref behavior:
|
||||
- Refs are **not stable across navigations**; if something fails, re-run `snapshot` and use a fresh ref.
|
||||
|
||||
@@ -37,6 +37,7 @@ const BROWSER_TOOL_ACTIONS = [
|
||||
const BROWSER_TARGETS = ["sandbox", "host", "custom"] as const;
|
||||
|
||||
const BROWSER_SNAPSHOT_FORMATS = ["aria", "ai"] as const;
|
||||
const BROWSER_SNAPSHOT_MODES = ["efficient"] as const;
|
||||
|
||||
const BROWSER_IMAGE_TYPES = ["png", "jpeg"] as const;
|
||||
|
||||
@@ -87,12 +88,14 @@ export const BrowserToolSchema = Type.Object({
|
||||
targetId: Type.Optional(Type.String()),
|
||||
limit: Type.Optional(Type.Number()),
|
||||
maxChars: Type.Optional(Type.Number()),
|
||||
mode: optionalStringEnum(BROWSER_SNAPSHOT_MODES),
|
||||
format: optionalStringEnum(BROWSER_SNAPSHOT_FORMATS),
|
||||
interactive: Type.Optional(Type.Boolean()),
|
||||
compact: Type.Optional(Type.Boolean()),
|
||||
depth: Type.Optional(Type.Number()),
|
||||
selector: Type.Optional(Type.String()),
|
||||
frame: Type.Optional(Type.String()),
|
||||
labels: Type.Optional(Type.Boolean()),
|
||||
fullPage: Type.Optional(Type.Boolean()),
|
||||
ref: Type.Optional(Type.String()),
|
||||
element: Type.Optional(Type.String()),
|
||||
|
||||
@@ -182,6 +182,8 @@ export function createBrowserTool(opts?: {
|
||||
params.format === "ai" || params.format === "aria"
|
||||
? (params.format as "ai" | "aria")
|
||||
: "ai";
|
||||
const mode = params.mode === "efficient" ? "efficient" : undefined;
|
||||
const labels = typeof params.labels === "boolean" ? params.labels : undefined;
|
||||
const hasMaxChars = Object.hasOwn(params, "maxChars");
|
||||
const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
|
||||
const limit =
|
||||
@@ -195,7 +197,13 @@ export function createBrowserTool(opts?: {
|
||||
? Math.floor(params.maxChars)
|
||||
: undefined;
|
||||
const resolvedMaxChars =
|
||||
format === "ai" ? (hasMaxChars ? maxChars : DEFAULT_AI_SNAPSHOT_MAX_CHARS) : undefined;
|
||||
format === "ai"
|
||||
? hasMaxChars
|
||||
? maxChars
|
||||
: mode === "efficient"
|
||||
? undefined
|
||||
: DEFAULT_AI_SNAPSHOT_MAX_CHARS
|
||||
: undefined;
|
||||
const interactive =
|
||||
typeof params.interactive === "boolean" ? params.interactive : undefined;
|
||||
const compact = typeof params.compact === "boolean" ? params.compact : undefined;
|
||||
@@ -215,9 +223,19 @@ export function createBrowserTool(opts?: {
|
||||
depth,
|
||||
selector,
|
||||
frame,
|
||||
labels,
|
||||
mode,
|
||||
profile,
|
||||
});
|
||||
if (snapshot.format === "ai") {
|
||||
if (labels && snapshot.imagePath) {
|
||||
return await imageResultFromFile({
|
||||
label: "browser:snapshot",
|
||||
path: snapshot.imagePath,
|
||||
extraText: snapshot.snapshot,
|
||||
details: snapshot,
|
||||
});
|
||||
}
|
||||
return {
|
||||
content: [{ type: "text", text: snapshot.snapshot }],
|
||||
details: snapshot,
|
||||
|
||||
@@ -79,6 +79,11 @@ export type SnapshotResult =
|
||||
refs: number;
|
||||
interactive: number;
|
||||
};
|
||||
labels?: boolean;
|
||||
labelsCount?: number;
|
||||
labelsSkipped?: number;
|
||||
imagePath?: string;
|
||||
imageType?: "png" | "jpeg";
|
||||
};
|
||||
|
||||
export function resolveBrowserControlUrl(overrideUrl?: string) {
|
||||
@@ -264,6 +269,8 @@ export async function browserSnapshot(
|
||||
depth?: number;
|
||||
selector?: string;
|
||||
frame?: string;
|
||||
labels?: boolean;
|
||||
mode?: "efficient";
|
||||
profile?: string;
|
||||
},
|
||||
): Promise<SnapshotResult> {
|
||||
@@ -280,6 +287,8 @@ export async function browserSnapshot(
|
||||
q.set("depth", String(opts.depth));
|
||||
if (opts.selector?.trim()) q.set("selector", opts.selector.trim());
|
||||
if (opts.frame?.trim()) q.set("frame", opts.frame.trim());
|
||||
if (opts.labels === true) q.set("labels", "1");
|
||||
if (opts.mode) q.set("mode", opts.mode);
|
||||
if (opts.profile) q.set("profile", opts.profile);
|
||||
return await fetchBrowserJson<SnapshotResult>(`${baseUrl}/snapshot?${q.toString()}`, {
|
||||
timeoutMs: 20000,
|
||||
|
||||
@@ -3,3 +3,5 @@ export const DEFAULT_CLAWD_BROWSER_CONTROL_URL = "http://127.0.0.1:18791";
|
||||
export const DEFAULT_CLAWD_BROWSER_COLOR = "#FF4500";
|
||||
export const DEFAULT_CLAWD_BROWSER_PROFILE_NAME = "clawd";
|
||||
export const DEFAULT_AI_SNAPSHOT_MAX_CHARS = 80_000;
|
||||
export const DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS = 10_000;
|
||||
export const DEFAULT_AI_SNAPSHOT_EFFICIENT_DEPTH = 6;
|
||||
|
||||
@@ -42,6 +42,7 @@ export {
|
||||
setTimezoneViaPlaywright,
|
||||
snapshotAiViaPlaywright,
|
||||
snapshotRoleViaPlaywright,
|
||||
screenshotWithLabelsViaPlaywright,
|
||||
storageClearViaPlaywright,
|
||||
storageGetViaPlaywright,
|
||||
storageSetViaPlaywright,
|
||||
|
||||
@@ -347,6 +347,132 @@ export async function takeScreenshotViaPlaywright(opts: {
|
||||
return { buffer };
|
||||
}
|
||||
|
||||
export async function screenshotWithLabelsViaPlaywright(opts: {
|
||||
cdpUrl: string;
|
||||
targetId?: string;
|
||||
refs: Record<string, { role: string; name?: string; nth?: number }>;
|
||||
maxLabels?: number;
|
||||
type?: "png" | "jpeg";
|
||||
}): Promise<{ buffer: Buffer; labels: number; skipped: number }> {
|
||||
const page = await getPageForTargetId(opts);
|
||||
ensurePageState(page);
|
||||
const type = opts.type ?? "png";
|
||||
const maxLabels =
|
||||
typeof opts.maxLabels === "number" && Number.isFinite(opts.maxLabels)
|
||||
? Math.max(1, Math.floor(opts.maxLabels))
|
||||
: 150;
|
||||
|
||||
const viewport = await page.evaluate(() => ({
|
||||
scrollX: window.scrollX || 0,
|
||||
scrollY: window.scrollY || 0,
|
||||
width: window.innerWidth || 0,
|
||||
height: window.innerHeight || 0,
|
||||
}));
|
||||
|
||||
const refs = Object.keys(opts.refs ?? {});
|
||||
const boxes: Array<{ ref: string; x: number; y: number; w: number; h: number }> = [];
|
||||
let skipped = 0;
|
||||
|
||||
for (const ref of refs) {
|
||||
if (boxes.length >= maxLabels) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const box = await refLocator(page, ref).boundingBox();
|
||||
if (!box) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
const x0 = box.x;
|
||||
const y0 = box.y;
|
||||
const x1 = box.x + box.width;
|
||||
const y1 = box.y + box.height;
|
||||
const vx0 = viewport.scrollX;
|
||||
const vy0 = viewport.scrollY;
|
||||
const vx1 = viewport.scrollX + viewport.width;
|
||||
const vy1 = viewport.scrollY + viewport.height;
|
||||
if (x1 < vx0 || x0 > vx1 || y1 < vy0 || y0 > vy1) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
boxes.push({
|
||||
ref,
|
||||
x: x0 - viewport.scrollX,
|
||||
y: y0 - viewport.scrollY,
|
||||
w: Math.max(1, box.width),
|
||||
h: Math.max(1, box.height),
|
||||
});
|
||||
} catch {
|
||||
skipped += 1;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if (boxes.length > 0) {
|
||||
await page.evaluate((labels) => {
|
||||
const existing = document.querySelectorAll("[data-clawdbot-labels]");
|
||||
existing.forEach((el) => el.remove());
|
||||
|
||||
const root = document.createElement("div");
|
||||
root.setAttribute("data-clawdbot-labels", "1");
|
||||
root.style.position = "fixed";
|
||||
root.style.left = "0";
|
||||
root.style.top = "0";
|
||||
root.style.zIndex = "2147483647";
|
||||
root.style.pointerEvents = "none";
|
||||
root.style.fontFamily =
|
||||
'"SF Mono","SFMono-Regular",Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace';
|
||||
|
||||
const clamp = (value: number, min: number, max: number) =>
|
||||
Math.min(max, Math.max(min, value));
|
||||
|
||||
for (const label of labels) {
|
||||
const box = document.createElement("div");
|
||||
box.setAttribute("data-clawdbot-labels", "1");
|
||||
box.style.position = "absolute";
|
||||
box.style.left = `${label.x}px`;
|
||||
box.style.top = `${label.y}px`;
|
||||
box.style.width = `${label.w}px`;
|
||||
box.style.height = `${label.h}px`;
|
||||
box.style.border = "2px solid #ffb020";
|
||||
box.style.boxSizing = "border-box";
|
||||
|
||||
const tag = document.createElement("div");
|
||||
tag.setAttribute("data-clawdbot-labels", "1");
|
||||
tag.textContent = label.ref;
|
||||
tag.style.position = "absolute";
|
||||
tag.style.left = `${label.x}px`;
|
||||
tag.style.top = `${clamp(label.y - 18, 0, 20000)}px`;
|
||||
tag.style.background = "#ffb020";
|
||||
tag.style.color = "#1a1a1a";
|
||||
tag.style.fontSize = "12px";
|
||||
tag.style.lineHeight = "14px";
|
||||
tag.style.padding = "1px 4px";
|
||||
tag.style.borderRadius = "3px";
|
||||
tag.style.boxShadow = "0 1px 2px rgba(0,0,0,0.35)";
|
||||
tag.style.whiteSpace = "nowrap";
|
||||
|
||||
root.appendChild(box);
|
||||
root.appendChild(tag);
|
||||
}
|
||||
|
||||
document.documentElement.appendChild(root);
|
||||
}, boxes);
|
||||
}
|
||||
|
||||
const buffer = await page.screenshot({ type });
|
||||
return { buffer, labels: boxes.length, skipped };
|
||||
} finally {
|
||||
await page
|
||||
.evaluate(() => {
|
||||
const existing = document.querySelectorAll("[data-clawdbot-labels]");
|
||||
existing.forEach((el) => el.remove());
|
||||
})
|
||||
.catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
export async function setInputFilesViaPlaywright(opts: {
|
||||
cdpUrl: string;
|
||||
targetId?: string;
|
||||
|
||||
@@ -4,7 +4,11 @@ import type express from "express";
|
||||
|
||||
import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js";
|
||||
import { captureScreenshot, snapshotAria } from "../cdp.js";
|
||||
import { DEFAULT_AI_SNAPSHOT_MAX_CHARS } from "../constants.js";
|
||||
import {
|
||||
DEFAULT_AI_SNAPSHOT_EFFICIENT_DEPTH,
|
||||
DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS,
|
||||
DEFAULT_AI_SNAPSHOT_MAX_CHARS,
|
||||
} from "../constants.js";
|
||||
import {
|
||||
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
|
||||
DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
|
||||
@@ -138,14 +142,12 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
|
||||
const profileCtx = resolveProfileContext(req, res, ctx);
|
||||
if (!profileCtx) return;
|
||||
const targetId = typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
|
||||
const mode = req.query.mode === "efficient" ? "efficient" : undefined;
|
||||
const labels = toBoolean(req.query.labels) ?? undefined;
|
||||
const explicitFormat =
|
||||
req.query.format === "aria" ? "aria" : req.query.format === "ai" ? "ai" : undefined;
|
||||
const format =
|
||||
req.query.format === "aria"
|
||||
? "aria"
|
||||
: req.query.format === "ai"
|
||||
? "ai"
|
||||
: (await getPwAiModule())
|
||||
? "ai"
|
||||
: "aria";
|
||||
explicitFormat ?? (mode ? "ai" : (await getPwAiModule()) ? "ai" : "aria");
|
||||
const limitRaw = typeof req.query.limit === "string" ? Number(req.query.limit) : undefined;
|
||||
const hasMaxChars = Object.hasOwn(req.query, "maxChars");
|
||||
const maxCharsRaw =
|
||||
@@ -156,19 +158,34 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
|
||||
? Math.floor(maxCharsRaw)
|
||||
: undefined;
|
||||
const resolvedMaxChars =
|
||||
format === "ai" ? (hasMaxChars ? maxChars : DEFAULT_AI_SNAPSHOT_MAX_CHARS) : undefined;
|
||||
const interactive = toBoolean(req.query.interactive);
|
||||
const compact = toBoolean(req.query.compact);
|
||||
const depth = toNumber(req.query.depth);
|
||||
format === "ai"
|
||||
? hasMaxChars
|
||||
? maxChars
|
||||
: mode === "efficient"
|
||||
? DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS
|
||||
: DEFAULT_AI_SNAPSHOT_MAX_CHARS
|
||||
: undefined;
|
||||
const interactiveRaw = toBoolean(req.query.interactive);
|
||||
const compactRaw = toBoolean(req.query.compact);
|
||||
const depthRaw = toNumber(req.query.depth);
|
||||
const interactive = interactiveRaw ?? (mode === "efficient" ? true : undefined);
|
||||
const compact = compactRaw ?? (mode === "efficient" ? true : undefined);
|
||||
const depth =
|
||||
depthRaw ?? (mode === "efficient" ? DEFAULT_AI_SNAPSHOT_EFFICIENT_DEPTH : undefined);
|
||||
const selector = toStringOrEmpty(req.query.selector);
|
||||
const frameSelector = toStringOrEmpty(req.query.frame);
|
||||
|
||||
try {
|
||||
const tab = await profileCtx.ensureTabAvailable(targetId || undefined);
|
||||
if ((labels || mode === "efficient") && format === "aria") {
|
||||
return jsonError(res, 400, "labels/mode=efficient require format=ai");
|
||||
}
|
||||
if (format === "ai") {
|
||||
const pw = await requirePwAi(res, "ai snapshot");
|
||||
if (!pw) return;
|
||||
const wantsRoleSnapshot =
|
||||
labels === true ||
|
||||
mode === "efficient" ||
|
||||
interactive === true ||
|
||||
compact === true ||
|
||||
depth !== undefined ||
|
||||
@@ -210,6 +227,39 @@ export function registerBrowserAgentSnapshotRoutes(app: express.Express, ctx: Br
|
||||
}
|
||||
throw err;
|
||||
});
|
||||
if (labels) {
|
||||
const labeled = await pw.screenshotWithLabelsViaPlaywright({
|
||||
cdpUrl: profileCtx.profile.cdpUrl,
|
||||
targetId: tab.targetId,
|
||||
refs: "refs" in snap ? snap.refs : {},
|
||||
type: "png",
|
||||
});
|
||||
const normalized = await normalizeBrowserScreenshot(labeled.buffer, {
|
||||
maxSide: DEFAULT_BROWSER_SCREENSHOT_MAX_SIDE,
|
||||
maxBytes: DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
|
||||
});
|
||||
await ensureMediaDir();
|
||||
const saved = await saveMediaBuffer(
|
||||
normalized.buffer,
|
||||
normalized.contentType ?? "image/png",
|
||||
"browser",
|
||||
DEFAULT_BROWSER_SCREENSHOT_MAX_BYTES,
|
||||
);
|
||||
const imageType = normalized.contentType?.includes("jpeg") ? "jpeg" : "png";
|
||||
return res.json({
|
||||
ok: true,
|
||||
format,
|
||||
targetId: tab.targetId,
|
||||
url: tab.url,
|
||||
labels: true,
|
||||
labelsCount: labeled.labels,
|
||||
labelsSkipped: labeled.skipped,
|
||||
imagePath: path.resolve(saved.path),
|
||||
imageType,
|
||||
...snap,
|
||||
});
|
||||
}
|
||||
|
||||
return res.json({
|
||||
ok: true,
|
||||
format,
|
||||
|
||||
@@ -11,6 +11,8 @@ export const browserCoreExamples = [
|
||||
"clawdbot browser screenshot --ref 12",
|
||||
"clawdbot browser snapshot",
|
||||
"clawdbot browser snapshot --format aria --limit 200",
|
||||
"clawdbot browser snapshot --efficient",
|
||||
"clawdbot browser snapshot --labels",
|
||||
];
|
||||
|
||||
export const browserActionExamples = [
|
||||
|
||||
@@ -48,17 +48,22 @@ export function registerBrowserInspectCommands(
|
||||
.option("--format <aria|ai>", "Snapshot format (default: ai)", "ai")
|
||||
.option("--target-id <id>", "CDP target id (or unique prefix)")
|
||||
.option("--limit <n>", "Max nodes (default: 500/800)", (v: string) => Number(v))
|
||||
.option("--mode <efficient>", "Snapshot preset (efficient)")
|
||||
.option("--efficient", "Use the efficient snapshot preset", false)
|
||||
.option("--interactive", "Role snapshot: interactive elements only", false)
|
||||
.option("--compact", "Role snapshot: compact output", false)
|
||||
.option("--depth <n>", "Role snapshot: max depth", (v: string) => Number(v))
|
||||
.option("--selector <sel>", "Role snapshot: scope to CSS selector")
|
||||
.option("--frame <sel>", "Role snapshot: scope to an iframe selector")
|
||||
.option("--labels", "Include viewport label overlay screenshot", false)
|
||||
.option("--out <path>", "Write snapshot to a file")
|
||||
.action(async (opts, cmd) => {
|
||||
const parent = parentOpts(cmd);
|
||||
const baseUrl = resolveBrowserControlUrl(parent?.url);
|
||||
const profile = parent?.browserProfile;
|
||||
const format = opts.format === "aria" ? "aria" : "ai";
|
||||
const mode =
|
||||
opts.efficient === true || opts.mode === "efficient" ? "efficient" : undefined;
|
||||
try {
|
||||
const result = await browserSnapshot(baseUrl, {
|
||||
format,
|
||||
@@ -69,6 +74,8 @@ export function registerBrowserInspectCommands(
|
||||
depth: Number.isFinite(opts.depth) ? opts.depth : undefined,
|
||||
selector: opts.selector?.trim() || undefined,
|
||||
frame: opts.frame?.trim() || undefined,
|
||||
labels: Boolean(opts.labels) || undefined,
|
||||
mode,
|
||||
profile,
|
||||
});
|
||||
|
||||
@@ -81,9 +88,24 @@ export function registerBrowserInspectCommands(
|
||||
await fs.writeFile(opts.out, payload, "utf8");
|
||||
}
|
||||
if (parent?.json) {
|
||||
defaultRuntime.log(JSON.stringify({ ok: true, out: opts.out }, null, 2));
|
||||
defaultRuntime.log(
|
||||
JSON.stringify(
|
||||
{
|
||||
ok: true,
|
||||
out: opts.out,
|
||||
...(result.format === "ai" && result.imagePath
|
||||
? { imagePath: result.imagePath }
|
||||
: {}),
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
} else {
|
||||
defaultRuntime.log(opts.out);
|
||||
if (result.format === "ai" && result.imagePath) {
|
||||
defaultRuntime.log(`MEDIA:${result.imagePath}`);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -95,6 +117,9 @@ export function registerBrowserInspectCommands(
|
||||
|
||||
if (result.format === "ai") {
|
||||
defaultRuntime.log(result.snapshot);
|
||||
if (result.imagePath) {
|
||||
defaultRuntime.log(`MEDIA:${result.imagePath}`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user