feat(mac): host PeekabooBridge for ui
This commit is contained in:
@@ -1,6 +1,15 @@
|
||||
{
|
||||
"originHash" : "ee7127ff91914397f9991e22a0b06ab0bca0d83582adeed6011198c49167631b",
|
||||
"originHash" : "5de6834e5cb92c45c61a2e6792b780ac231c5741def70f1efa9ec857fa12f8cb",
|
||||
"pins" : [
|
||||
{
|
||||
"identity" : "eventsource",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/mattt/eventsource.git",
|
||||
"state" : {
|
||||
"revision" : "ca2a9d90cbe49e09b92f4b6ebd922c03ebea51d0",
|
||||
"version" : "1.3.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "menubarextraaccess",
|
||||
"kind" : "remoteSourceControl",
|
||||
@@ -19,6 +28,96 @@
|
||||
"version" : "2.8.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-algorithms",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-algorithms",
|
||||
"state" : {
|
||||
"revision" : "87e50f483c54e6efd60e885f7f5aa946cee68023",
|
||||
"version" : "1.2.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-asn1",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-asn1.git",
|
||||
"state" : {
|
||||
"revision" : "810496cf121e525d660cd0ea89a758740476b85f",
|
||||
"version" : "1.5.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-async-algorithms",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-async-algorithms",
|
||||
"state" : {
|
||||
"revision" : "6c050d5ef8e1aa6342528460db614e9770d7f804",
|
||||
"version" : "1.1.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-collections",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-collections",
|
||||
"state" : {
|
||||
"branch" : "main",
|
||||
"revision" : "8e5e4a8f3617283b556064574651fc0869943c9a"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-configuration",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-configuration",
|
||||
"state" : {
|
||||
"branch" : "main",
|
||||
"revision" : "3528deb75256d7dcbb0d71fa75077caae0a8c749"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-crypto",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-crypto.git",
|
||||
"state" : {
|
||||
"revision" : "6f70fa9eab24c1fd982af18c281c4525d05e3095",
|
||||
"version" : "4.2.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-log",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-log.git",
|
||||
"state" : {
|
||||
"revision" : "bc386b95f2a16ccd0150a8235e7c69eab2b866ca",
|
||||
"version" : "1.8.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-numerics",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-numerics.git",
|
||||
"state" : {
|
||||
"revision" : "0c0290ff6b24942dadb83a929ffaaa1481df04a2",
|
||||
"version" : "1.1.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-sdk",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/modelcontextprotocol/swift-sdk.git",
|
||||
"state" : {
|
||||
"revision" : "c0407a0b52677cb395d824cac2879b963075ba8c",
|
||||
"version" : "0.10.2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-service-lifecycle",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/swift-server/swift-service-lifecycle",
|
||||
"state" : {
|
||||
"revision" : "1de37290c0ab3c5a96028e0f02911b672fd42348",
|
||||
"version" : "2.9.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-subprocess",
|
||||
"kind" : "remoteSourceControl",
|
||||
|
||||
@@ -18,6 +18,9 @@ let package = Package(
|
||||
.package(url: "https://github.com/swiftlang/swift-subprocess.git", from: "0.1.0"),
|
||||
.package(url: "https://github.com/sparkle-project/Sparkle", from: "2.8.1"),
|
||||
.package(path: "../shared/ClawdisKit"),
|
||||
.package(path: "../../Peekaboo/Core/PeekabooCore"),
|
||||
.package(path: "../../Peekaboo/Core/PeekabooAutomationKit"),
|
||||
.package(path: "../../Peekaboo/Core/PeekabooVisualizer"),
|
||||
],
|
||||
targets: [
|
||||
.target(
|
||||
@@ -42,6 +45,9 @@ let package = Package(
|
||||
.product(name: "MenuBarExtraAccess", package: "MenuBarExtraAccess"),
|
||||
.product(name: "Subprocess", package: "swift-subprocess"),
|
||||
.product(name: "Sparkle", package: "Sparkle"),
|
||||
.product(name: "PeekabooBridge", package: "PeekabooCore"),
|
||||
.product(name: "PeekabooAutomationKit", package: "PeekabooAutomationKit"),
|
||||
.product(name: "PeekabooVisualizer", package: "PeekabooVisualizer"),
|
||||
],
|
||||
resources: [
|
||||
.copy("Resources/Clawdis.icns"),
|
||||
@@ -55,6 +61,8 @@ let package = Package(
|
||||
dependencies: [
|
||||
"ClawdisIPC",
|
||||
"ClawdisProtocol",
|
||||
.product(name: "PeekabooBridge", package: "PeekabooCore"),
|
||||
.product(name: "PeekabooAutomationKit", package: "PeekabooAutomationKit"),
|
||||
],
|
||||
swiftSettings: [
|
||||
.enableUpcomingFeature("StrictConcurrency"),
|
||||
|
||||
@@ -155,6 +155,15 @@ final class AppState: ObservableObject {
|
||||
didSet { self.ifNotPreview { UserDefaults.standard.set(self.canvasEnabled, forKey: canvasEnabledKey) } }
|
||||
}
|
||||
|
||||
@Published var peekabooBridgeEnabled: Bool {
|
||||
didSet {
|
||||
self.ifNotPreview {
|
||||
UserDefaults.standard.set(self.peekabooBridgeEnabled, forKey: peekabooBridgeEnabledKey)
|
||||
Task { await PeekabooBridgeHostCoordinator.shared.setEnabled(self.peekabooBridgeEnabled) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Published var attachExistingGatewayOnly: Bool {
|
||||
didSet {
|
||||
self.ifNotPreview {
|
||||
@@ -231,6 +240,8 @@ final class AppState: ObservableObject {
|
||||
let storedPort = UserDefaults.standard.integer(forKey: webChatPortKey)
|
||||
self.webChatPort = storedPort > 0 ? storedPort : 18788
|
||||
self.canvasEnabled = UserDefaults.standard.object(forKey: canvasEnabledKey) as? Bool ?? true
|
||||
self.peekabooBridgeEnabled = UserDefaults.standard
|
||||
.object(forKey: peekabooBridgeEnabledKey) as? Bool ?? true
|
||||
self.attachExistingGatewayOnly = UserDefaults.standard.bool(forKey: attachExistingGatewayOnlyKey)
|
||||
|
||||
if !self.isPreview {
|
||||
|
||||
@@ -24,6 +24,7 @@ let webChatEnabledKey = "clawdis.webChatEnabled"
|
||||
let webChatSwiftUIEnabledKey = "clawdis.webChatSwiftUIEnabled"
|
||||
let webChatPortKey = "clawdis.webChatPort"
|
||||
let canvasEnabledKey = "clawdis.canvasEnabled"
|
||||
let peekabooBridgeEnabledKey = "clawdis.peekabooBridgeEnabled"
|
||||
let deepLinkAgentEnabledKey = "clawdis.deepLinkAgentEnabled"
|
||||
let deepLinkKeyKey = "clawdis.deepLinkKey"
|
||||
let modelCatalogPathKey = "clawdis.modelCatalogPath"
|
||||
|
||||
@@ -58,53 +58,6 @@ enum ControlRequestHandler {
|
||||
let result = await AgentRPC.shared.status()
|
||||
return Response(ok: result.ok, message: result.error)
|
||||
|
||||
case .uiListScreens:
|
||||
let screens = await MainActor.run { UIScreenService.listScreens() }
|
||||
let payload = try JSONEncoder().encode(screens)
|
||||
return Response(ok: true, payload: payload)
|
||||
|
||||
case let .uiScreenshot(screenIndex, windowID):
|
||||
let authorized = await PermissionManager
|
||||
.ensure([.screenRecording], interactive: false)[.screenRecording] ?? false
|
||||
guard authorized else { return Response(ok: false, message: "screen recording permission missing") }
|
||||
|
||||
let resolution: (screenIndex: Int?, displayID: UInt32?) = await Task { @MainActor in
|
||||
if let screenIndex,
|
||||
let match = UIScreenService.listScreens().first(where: { $0.index == screenIndex })
|
||||
{
|
||||
return (screenIndex, match.displayID)
|
||||
}
|
||||
return (nil, nil)
|
||||
}.value
|
||||
|
||||
let data = await Task { @MainActor in
|
||||
await Screenshotter.capture(displayID: resolution.displayID, windowID: windowID)
|
||||
}.value
|
||||
|
||||
guard let data else {
|
||||
return Response(ok: false, message: "screenshot failed")
|
||||
}
|
||||
|
||||
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("clawdis-ui", isDirectory: true)
|
||||
try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
|
||||
let outURL = dir.appendingPathComponent("screenshot-\(Int(Date().timeIntervalSince1970 * 1000)).png")
|
||||
do {
|
||||
try data.write(to: outURL)
|
||||
} catch {
|
||||
return Response(ok: false, message: "failed to write screenshot: \(error.localizedDescription)")
|
||||
}
|
||||
|
||||
let size = ScreenshotSize.readPNGSize(data: data)
|
||||
let result = UIScreenshotResult(
|
||||
path: outURL.path,
|
||||
width: size?.width ?? 0,
|
||||
height: size?.height ?? 0,
|
||||
screenIndex: resolution.screenIndex,
|
||||
displayID: resolution.displayID,
|
||||
windowID: windowID)
|
||||
let payload = try JSONEncoder().encode(result)
|
||||
return Response(ok: true, payload: payload)
|
||||
|
||||
case let .runShell(command, cwd, env, timeoutSec, needsSR):
|
||||
if needsSR {
|
||||
let authorized = await PermissionManager
|
||||
|
||||
@@ -57,6 +57,11 @@ struct GeneralSettings: View {
|
||||
subtitle: "Allow the agent to show and control the Canvas panel.",
|
||||
binding: self.$state.canvasEnabled)
|
||||
|
||||
SettingsToggleRow(
|
||||
title: "Enable Peekaboo Bridge",
|
||||
subtitle: "Allow signed tools to drive UI automation via `clawdis-mac ui …`.",
|
||||
binding: self.$state.peekabooBridgeEnabled)
|
||||
|
||||
SettingsToggleRow(
|
||||
title: "Enable debug tools",
|
||||
subtitle: "Show the Debug tab with development utilities.",
|
||||
|
||||
@@ -183,6 +183,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate {
|
||||
Task { await HealthStore.shared.refresh(onDemand: true) }
|
||||
Task { await PortGuardian.shared.sweep(mode: AppStateStore.shared.connectionMode) }
|
||||
Task { await self.socketServer.start() }
|
||||
Task { await PeekabooBridgeHostCoordinator.shared.setEnabled(AppStateStore.shared.peekabooBridgeEnabled) }
|
||||
self.scheduleFirstRunOnboardingIfNeeded()
|
||||
|
||||
// Developer/testing helper: auto-open WebChat when launched with --webchat
|
||||
@@ -202,6 +203,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate {
|
||||
Task { await AgentRPC.shared.shutdown() }
|
||||
Task { await GatewayConnection.shared.shutdown() }
|
||||
Task { await self.socketServer.stop() }
|
||||
Task { await PeekabooBridgeHostCoordinator.shared.stop() }
|
||||
}
|
||||
|
||||
@MainActor
|
||||
|
||||
254
apps/macos/Sources/Clawdis/PeekabooBridgeHostCoordinator.swift
Normal file
254
apps/macos/Sources/Clawdis/PeekabooBridgeHostCoordinator.swift
Normal file
@@ -0,0 +1,254 @@
|
||||
import Foundation
|
||||
import os
|
||||
import PeekabooAutomationKit
|
||||
import PeekabooBridge
|
||||
import PeekabooFoundation
|
||||
import PeekabooVisualizer
|
||||
|
||||
@MainActor
|
||||
final class PeekabooBridgeHostCoordinator {
|
||||
static let shared = PeekabooBridgeHostCoordinator()
|
||||
|
||||
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "PeekabooBridge")
|
||||
|
||||
private var host: PeekabooBridgeHost?
|
||||
private var services: ClawdisPeekabooBridgeServices?
|
||||
|
||||
func setEnabled(_ enabled: Bool) async {
|
||||
if enabled {
|
||||
await self.startIfNeeded()
|
||||
} else {
|
||||
await self.stop()
|
||||
}
|
||||
}
|
||||
|
||||
func stop() async {
|
||||
guard let host else { return }
|
||||
await host.stop()
|
||||
self.host = nil
|
||||
self.services = nil
|
||||
self.logger.info("PeekabooBridge host stopped")
|
||||
}
|
||||
|
||||
private func startIfNeeded() async {
|
||||
guard self.host == nil else { return }
|
||||
|
||||
let allowlistedTeamIDs: Set<String> = ["Y5PE65HELJ"]
|
||||
let allowlistedBundles: Set<String> = []
|
||||
|
||||
let services = ClawdisPeekabooBridgeServices()
|
||||
let server = PeekabooBridgeServer(
|
||||
services: services,
|
||||
hostKind: .gui,
|
||||
allowlistedTeams: allowlistedTeamIDs,
|
||||
allowlistedBundles: allowlistedBundles)
|
||||
|
||||
let host = PeekabooBridgeHost(
|
||||
socketPath: PeekabooBridgeConstants.clawdisSocketPath,
|
||||
server: server,
|
||||
allowedTeamIDs: allowlistedTeamIDs,
|
||||
requestTimeoutSec: 10)
|
||||
|
||||
self.services = services
|
||||
self.host = host
|
||||
|
||||
await host.start()
|
||||
self.logger.info("PeekabooBridge host started at \(PeekabooBridgeConstants.clawdisSocketPath, privacy: .public)")
|
||||
}
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private final class ClawdisPeekabooBridgeServices: PeekabooBridgeServiceProviding {
|
||||
let permissions: PermissionsService
|
||||
let screenCapture: any ScreenCaptureServiceProtocol
|
||||
let automation: any UIAutomationServiceProtocol
|
||||
let windows: any WindowManagementServiceProtocol
|
||||
let applications: any ApplicationServiceProtocol
|
||||
let menu: any MenuServiceProtocol
|
||||
let dock: any DockServiceProtocol
|
||||
let dialogs: any DialogServiceProtocol
|
||||
let snapshots: any SnapshotManagerProtocol
|
||||
|
||||
init() {
|
||||
let logging = LoggingService(subsystem: "com.steipete.clawdis.peekaboo")
|
||||
let visualizer = PeekabooVisualizerFeedbackClient(client: .shared)
|
||||
|
||||
let snapshots = InMemorySnapshotManager(options: .init(
|
||||
snapshotValidityWindow: 600,
|
||||
maxSnapshots: 50,
|
||||
deleteArtifactsOnCleanup: false))
|
||||
let applications = ApplicationService(feedbackClient: visualizer)
|
||||
|
||||
let captureBase = ScreenCaptureService(loggingService: logging)
|
||||
let screenCapture = FeedbackScreenCaptureService(base: captureBase, feedbackClient: visualizer)
|
||||
|
||||
self.permissions = PermissionsService()
|
||||
self.snapshots = snapshots
|
||||
self.applications = applications
|
||||
self.screenCapture = screenCapture
|
||||
self.automation = UIAutomationService(
|
||||
snapshotManager: snapshots,
|
||||
loggingService: logging,
|
||||
searchPolicy: .balanced,
|
||||
feedbackClient: visualizer)
|
||||
self.windows = WindowManagementService(applicationService: applications, feedbackClient: visualizer)
|
||||
self.menu = MenuService(applicationService: applications, feedbackClient: visualizer)
|
||||
self.dock = DockService(feedbackClient: visualizer)
|
||||
self.dialogs = DialogService(feedbackClient: visualizer)
|
||||
}
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private final class PeekabooVisualizerFeedbackClient: AutomationFeedbackClient {
|
||||
private let client: VisualizationClient
|
||||
|
||||
init(client: VisualizationClient) {
|
||||
self.client = client
|
||||
}
|
||||
|
||||
func connect() {
|
||||
self.client.connect()
|
||||
}
|
||||
|
||||
func showClickFeedback(at point: CGPoint, type: ClickType) async -> Bool {
|
||||
await self.client.showClickFeedback(at: point, type: type)
|
||||
}
|
||||
|
||||
func showTypingFeedback(keys: [String], duration: TimeInterval, cadence: TypingCadence) async -> Bool {
|
||||
await self.client.showTypingFeedback(keys: keys, duration: duration, cadence: cadence)
|
||||
}
|
||||
|
||||
func showScrollFeedback(at point: CGPoint, direction: ScrollDirection, amount: Int) async -> Bool {
|
||||
await self.client.showScrollFeedback(at: point, direction: direction, amount: amount)
|
||||
}
|
||||
|
||||
func showHotkeyDisplay(keys: [String], duration: TimeInterval) async -> Bool {
|
||||
await self.client.showHotkeyDisplay(keys: keys, duration: duration)
|
||||
}
|
||||
|
||||
func showSwipeGesture(from: CGPoint, to: CGPoint, duration: TimeInterval) async -> Bool {
|
||||
await self.client.showSwipeGesture(from: from, to: to, duration: duration)
|
||||
}
|
||||
|
||||
func showMouseMovement(from: CGPoint, to: CGPoint, duration: TimeInterval) async -> Bool {
|
||||
await self.client.showMouseMovement(from: from, to: to, duration: duration)
|
||||
}
|
||||
|
||||
func showWindowOperation(_ kind: WindowOperationKind, windowRect: CGRect, duration: TimeInterval) async -> Bool {
|
||||
let mapped: WindowOperation = switch kind {
|
||||
case .close: .close
|
||||
case .minimize: .minimize
|
||||
case .maximize: .maximize
|
||||
case .move: .move
|
||||
case .resize: .resize
|
||||
case .setBounds: .setBounds
|
||||
case .focus: .focus
|
||||
}
|
||||
return await self.client.showWindowOperation(mapped, windowRect: windowRect, duration: duration)
|
||||
}
|
||||
|
||||
func showDialogInteraction(
|
||||
element: DialogElementType,
|
||||
elementRect: CGRect,
|
||||
action: DialogActionType) async -> Bool
|
||||
{
|
||||
await self.client.showDialogInteraction(element: element, elementRect: elementRect, action: action)
|
||||
}
|
||||
|
||||
func showMenuNavigation(menuPath: [String]) async -> Bool {
|
||||
await self.client.showMenuNavigation(menuPath: menuPath)
|
||||
}
|
||||
|
||||
func showSpaceSwitch(from: Int, to: Int, direction: SpaceSwitchDirection) async -> Bool {
|
||||
let mapped: SpaceDirection = direction == .left ? .left : .right
|
||||
return await self.client.showSpaceSwitch(from: from, to: to, direction: mapped)
|
||||
}
|
||||
|
||||
func showAppLaunch(appName: String, iconPath: String?) async -> Bool {
|
||||
await self.client.showAppLaunch(appName: appName, iconPath: iconPath)
|
||||
}
|
||||
|
||||
func showAppQuit(appName: String, iconPath: String?) async -> Bool {
|
||||
await self.client.showAppQuit(appName: appName, iconPath: iconPath)
|
||||
}
|
||||
|
||||
func showScreenshotFlash(in rect: CGRect) async -> Bool {
|
||||
await self.client.showScreenshotFlash(in: rect)
|
||||
}
|
||||
|
||||
func showWatchCapture(in rect: CGRect) async -> Bool {
|
||||
await self.client.showWatchCapture(in: rect)
|
||||
}
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private final class FeedbackScreenCaptureService: ScreenCaptureServiceProtocol {
|
||||
private let base: any ScreenCaptureServiceProtocol
|
||||
private let feedbackClient: any AutomationFeedbackClient
|
||||
|
||||
init(base: any ScreenCaptureServiceProtocol, feedbackClient: any AutomationFeedbackClient) {
|
||||
self.base = base
|
||||
self.feedbackClient = feedbackClient
|
||||
}
|
||||
|
||||
func captureScreen(
|
||||
displayIndex: Int?,
|
||||
visualizerMode: CaptureVisualizerMode,
|
||||
scale: CaptureScalePreference) async throws -> CaptureResult
|
||||
{
|
||||
let result = try await self.base.captureScreen(
|
||||
displayIndex: displayIndex,
|
||||
visualizerMode: visualizerMode,
|
||||
scale: scale)
|
||||
await self.showCaptureFeedback(mode: visualizerMode, rect: result.metadata.displayInfo?.bounds)
|
||||
return result
|
||||
}
|
||||
|
||||
func captureWindow(
|
||||
appIdentifier: String,
|
||||
windowIndex: Int?,
|
||||
visualizerMode: CaptureVisualizerMode,
|
||||
scale: CaptureScalePreference) async throws -> CaptureResult
|
||||
{
|
||||
let result = try await self.base.captureWindow(
|
||||
appIdentifier: appIdentifier,
|
||||
windowIndex: windowIndex,
|
||||
visualizerMode: visualizerMode,
|
||||
scale: scale)
|
||||
await self.showCaptureFeedback(mode: visualizerMode, rect: result.metadata.windowInfo?.bounds)
|
||||
return result
|
||||
}
|
||||
|
||||
func captureFrontmost(
|
||||
visualizerMode: CaptureVisualizerMode,
|
||||
scale: CaptureScalePreference) async throws -> CaptureResult
|
||||
{
|
||||
let result = try await self.base.captureFrontmost(visualizerMode: visualizerMode, scale: scale)
|
||||
await self.showCaptureFeedback(mode: visualizerMode, rect: result.metadata.windowInfo?.bounds)
|
||||
return result
|
||||
}
|
||||
|
||||
func captureArea(
|
||||
_ rect: CGRect,
|
||||
visualizerMode: CaptureVisualizerMode,
|
||||
scale: CaptureScalePreference) async throws -> CaptureResult
|
||||
{
|
||||
let result = try await self.base.captureArea(rect, visualizerMode: visualizerMode, scale: scale)
|
||||
await self.showCaptureFeedback(mode: visualizerMode, rect: rect)
|
||||
return result
|
||||
}
|
||||
|
||||
func hasScreenRecordingPermission() async -> Bool {
|
||||
await self.base.hasScreenRecordingPermission()
|
||||
}
|
||||
|
||||
private func showCaptureFeedback(mode: CaptureVisualizerMode, rect: CGRect?) async {
|
||||
guard let rect else { return }
|
||||
switch mode {
|
||||
case .screenshotFlash:
|
||||
_ = await self.feedbackClient.showScreenshotFlash(in: rect)
|
||||
case .watchCapture:
|
||||
_ = await self.feedbackClient.showWatchCapture(in: rect)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,80 +0,0 @@
|
||||
import AppKit
|
||||
import CoreGraphics
|
||||
import Foundation
|
||||
@preconcurrency import ScreenCaptureKit
|
||||
import VideoToolbox
|
||||
|
||||
enum Screenshotter {
|
||||
@MainActor
|
||||
static func capture(displayID: UInt32?, windowID: UInt32?) async -> Data? {
|
||||
guard let content = try? await SCShareableContent.current else { return nil }
|
||||
|
||||
let targetDisplay: SCDisplay? = if let displayID {
|
||||
content.displays.first(where: { $0.displayID == displayID })
|
||||
} else {
|
||||
content.displays.first
|
||||
}
|
||||
|
||||
let filter: SCContentFilter
|
||||
if let windowID, let win = content.windows.first(where: { $0.windowID == windowID }) {
|
||||
filter = SCContentFilter(desktopIndependentWindow: win)
|
||||
} else if let display = targetDisplay {
|
||||
filter = SCContentFilter(display: display, excludingWindows: [])
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let config = SCStreamConfiguration()
|
||||
if let display = targetDisplay {
|
||||
config.width = display.width
|
||||
config.height = display.height
|
||||
}
|
||||
config.scalesToFit = true
|
||||
config.colorSpaceName = CGColorSpace.displayP3
|
||||
|
||||
let stream = SCStream(filter: filter, configuration: config, delegate: nil)
|
||||
let grabber = FrameGrabber()
|
||||
try? stream.addStreamOutput(
|
||||
grabber,
|
||||
type: .screen,
|
||||
sampleHandlerQueue: DispatchQueue(label: "com.steipete.clawdis.sshot"))
|
||||
do {
|
||||
try await stream.startCapture()
|
||||
let data = await grabber.awaitPNG()
|
||||
try? await stream.stopCapture()
|
||||
return data
|
||||
} catch {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final class FrameGrabber: NSObject, SCStreamOutput {
|
||||
private var continuation: CheckedContinuation<Data?, Never>?
|
||||
private var delivered = false
|
||||
|
||||
func awaitPNG() async -> Data? {
|
||||
await withCheckedContinuation { cont in
|
||||
self.continuation = cont
|
||||
}
|
||||
}
|
||||
|
||||
nonisolated func stream(
|
||||
_ stream: SCStream,
|
||||
didOutputSampleBuffer sampleBuffer: CMSampleBuffer,
|
||||
of outputType: SCStreamOutputType)
|
||||
{
|
||||
guard outputType == .screen else { return }
|
||||
if self.delivered { return }
|
||||
guard let imageBuffer = sampleBuffer.imageBuffer else { return }
|
||||
var cgImage: CGImage?
|
||||
let result = VTCreateCGImageFromCVPixelBuffer(imageBuffer, options: nil, imageOut: &cgImage)
|
||||
guard result == noErr, let cgImage else { return }
|
||||
let rep = NSBitmapImageRep(cgImage: cgImage)
|
||||
guard let data = rep.representation(using: .png, properties: [:]) else { return }
|
||||
|
||||
self.delivered = true
|
||||
self.continuation?.resume(returning: data)
|
||||
self.continuation = nil
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
import AppKit
|
||||
import ClawdisIPC
|
||||
import CoreGraphics
|
||||
|
||||
enum UIScreenService {
|
||||
static func listScreens() -> [UIScreenInfo] {
|
||||
let screens = NSScreen.screens
|
||||
let mainScreen = NSScreen.main
|
||||
|
||||
return screens.enumerated().map { index, screen in
|
||||
UIScreenInfo(
|
||||
index: index,
|
||||
name: screen.peekabooName,
|
||||
frame: screen.frame,
|
||||
visibleFrame: screen.visibleFrame,
|
||||
isPrimary: screen == mainScreen,
|
||||
scaleFactor: screen.backingScaleFactor,
|
||||
displayID: screen.displayID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private extension NSScreen {
|
||||
var displayID: UInt32 {
|
||||
if let num = self.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber {
|
||||
return num.uint32Value
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
/// Match Peekaboo's `ScreenService` naming (built-in vs. resolution fallback).
|
||||
var peekabooName: String {
|
||||
let id = self.displayID
|
||||
guard id != 0 else { return "Display" }
|
||||
if CGDisplayIsBuiltin(id) != 0 { return "Built-in Display" }
|
||||
|
||||
if let mode = CGDisplayCopyDisplayMode(id) {
|
||||
return "\(mode.pixelWidth)×\(mode.pixelHeight) Display"
|
||||
}
|
||||
|
||||
return "External Display"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,11 @@ struct ClawdisCLI {
|
||||
exit(code)
|
||||
}
|
||||
|
||||
if args.first == "ui" {
|
||||
let code = try await UICLI.run(args: Array(args.dropFirst()), jsonOutput: jsonOutput)
|
||||
exit(code)
|
||||
}
|
||||
|
||||
let parsed = try parseCommandLine(args: args)
|
||||
let response = try await send(request: parsed.request)
|
||||
|
||||
@@ -42,8 +47,6 @@ struct ClawdisCLI {
|
||||
var kind: Kind
|
||||
|
||||
enum Kind {
|
||||
case uiScreens
|
||||
case uiScreenshot
|
||||
case generic
|
||||
}
|
||||
}
|
||||
@@ -100,29 +103,6 @@ struct ClawdisCLI {
|
||||
if caps.isEmpty { caps = Capability.allCases }
|
||||
return ParsedCLIRequest(request: .ensurePermissions(caps, interactive: interactive), kind: .generic)
|
||||
|
||||
case "ui":
|
||||
guard let sub = args.first else { throw CLIError.help }
|
||||
args = Array(args.dropFirst())
|
||||
|
||||
switch sub {
|
||||
case "screens":
|
||||
return ParsedCLIRequest(request: .uiListScreens, kind: .uiScreens)
|
||||
case "screenshot":
|
||||
var screenIndex: Int?
|
||||
var windowID: UInt32?
|
||||
while !args.isEmpty {
|
||||
let arg = args.removeFirst()
|
||||
switch arg {
|
||||
case "--screen-index": screenIndex = args.popFirst().flatMap(Int.init)
|
||||
case "--window-id": windowID = args.popFirst().flatMap(UInt32.init)
|
||||
default: break
|
||||
}
|
||||
}
|
||||
return ParsedCLIRequest(request: .uiScreenshot(screenIndex: screenIndex, windowID: windowID), kind: .uiScreenshot)
|
||||
default:
|
||||
throw CLIError.help
|
||||
}
|
||||
|
||||
case "run":
|
||||
var cwd: String?
|
||||
var env: [String: String] = [:]
|
||||
@@ -333,24 +313,6 @@ struct ClawdisCLI {
|
||||
}
|
||||
|
||||
switch parsed.kind {
|
||||
case .uiScreens:
|
||||
let screens = try self.decodePayload([UIScreenInfo].self, payload: response.payload)
|
||||
if screens.isEmpty {
|
||||
FileHandle.standardOutput.write(Data("No screens\n".utf8))
|
||||
return
|
||||
}
|
||||
for s in screens {
|
||||
let primary = s.isPrimary ? " (primary)" : ""
|
||||
let size = "\(Int(s.frame.width))×\(Int(s.frame.height))"
|
||||
let scale = String(format: "%.1f", Double(s.scaleFactor))
|
||||
let line = "Display \(s.index + 1)\(primary): \(s.name) \(size) @\(scale)x (id \(s.displayID))\n"
|
||||
FileHandle.standardOutput.write(Data(line.utf8))
|
||||
}
|
||||
|
||||
case .uiScreenshot:
|
||||
let result = try self.decodePayload(UIScreenshotResult.self, payload: response.payload)
|
||||
FileHandle.standardOutput.write(Data((result.path + "\n").utf8))
|
||||
|
||||
case .generic:
|
||||
if let payload = response.payload, let text = String(data: payload, encoding: .utf8), !text.isEmpty {
|
||||
FileHandle.standardOutput.write(payload)
|
||||
@@ -370,22 +332,6 @@ struct ClawdisCLI {
|
||||
]
|
||||
|
||||
switch parsed.kind {
|
||||
case .uiScreens:
|
||||
if let payload = response.payload,
|
||||
let obj = try? JSONSerialization.jsonObject(with: payload) {
|
||||
output["result"] = obj
|
||||
} else {
|
||||
output["result"] = []
|
||||
}
|
||||
|
||||
case .uiScreenshot:
|
||||
if let payload = response.payload,
|
||||
let obj = try? JSONSerialization.jsonObject(with: payload) {
|
||||
output["result"] = obj
|
||||
} else {
|
||||
output["result"] = NSNull()
|
||||
}
|
||||
|
||||
case .generic:
|
||||
if let payload = response.payload, !payload.isEmpty {
|
||||
if let obj = try? JSONSerialization.jsonObject(with: payload) {
|
||||
@@ -424,8 +370,12 @@ struct ClawdisCLI {
|
||||
[--interactive]
|
||||
|
||||
UI:
|
||||
clawdis-mac ui screens
|
||||
clawdis-mac ui screenshot [--screen-index <n>] [--window-id <u32>]
|
||||
clawdis-mac ui screenshot [...]
|
||||
clawdis-mac ui see [...]
|
||||
clawdis-mac ui click ...
|
||||
clawdis-mac ui type ...
|
||||
clawdis-mac ui wait ...
|
||||
clawdis-mac ui --help
|
||||
|
||||
Shell:
|
||||
clawdis-mac run [--cwd <path>] [--env KEY=VAL] [--timeout <sec>]
|
||||
|
||||
589
apps/macos/Sources/ClawdisCLI/UICLI.swift
Normal file
589
apps/macos/Sources/ClawdisCLI/UICLI.swift
Normal file
@@ -0,0 +1,589 @@
|
||||
import Foundation
|
||||
import Darwin
|
||||
import PeekabooAutomationKit
|
||||
import PeekabooBridge
|
||||
import PeekabooFoundation
|
||||
|
||||
enum UICLI {
|
||||
static func run(args: [String], jsonOutput: Bool) async throws -> Int32 {
|
||||
var args = args
|
||||
guard let sub = args.first else {
|
||||
self.printHelp()
|
||||
return 0
|
||||
}
|
||||
args.removeFirst()
|
||||
|
||||
if sub == "--help" || sub == "-h" || sub == "help" {
|
||||
self.printHelp()
|
||||
return 0
|
||||
}
|
||||
|
||||
let context = try await self.resolveContext()
|
||||
|
||||
switch sub {
|
||||
case "permissions":
|
||||
return try await self.runPermissions(args: args, jsonOutput: jsonOutput, context: context)
|
||||
case "frontmost":
|
||||
return try await self.runFrontmost(args: args, jsonOutput: jsonOutput, context: context)
|
||||
case "apps":
|
||||
return try await self.runApps(args: args, jsonOutput: jsonOutput, context: context)
|
||||
case "windows":
|
||||
return try await self.runWindows(args: args, jsonOutput: jsonOutput, context: context)
|
||||
case "screenshot":
|
||||
return try await self.runScreenshot(args: args, jsonOutput: jsonOutput, context: context)
|
||||
case "see":
|
||||
return try await self.runSee(args: args, jsonOutput: jsonOutput, context: context)
|
||||
case "click":
|
||||
return try await self.runClick(args: args, jsonOutput: jsonOutput, context: context)
|
||||
case "type":
|
||||
return try await self.runType(args: args, jsonOutput: jsonOutput, context: context)
|
||||
case "wait":
|
||||
return try await self.runWait(args: args, jsonOutput: jsonOutput, context: context)
|
||||
default:
|
||||
self.printHelp()
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Context
|
||||
|
||||
private struct Context {
|
||||
let client: PeekabooBridgeClient
|
||||
let hostDescription: String
|
||||
}
|
||||
|
||||
private static func resolveContext() async throws -> Context {
|
||||
let explicitSocket = ProcessInfo.processInfo.environment["PEEKABOO_BRIDGE_SOCKET"]
|
||||
let candidates: [String] = if let explicitSocket, !explicitSocket.isEmpty {
|
||||
[explicitSocket]
|
||||
} else {
|
||||
[
|
||||
PeekabooBridgeConstants.peekabooSocketPath,
|
||||
PeekabooBridgeConstants.clawdisSocketPath,
|
||||
]
|
||||
}
|
||||
|
||||
let identity = PeekabooBridgeClientIdentity(
|
||||
bundleIdentifier: Bundle.main.bundleIdentifier,
|
||||
teamIdentifier: nil,
|
||||
processIdentifier: getpid(),
|
||||
hostname: Host.current().name)
|
||||
|
||||
for socketPath in candidates {
|
||||
let client = PeekabooBridgeClient(socketPath: socketPath, requestTimeoutSec: 10)
|
||||
do {
|
||||
let handshake = try await client.handshake(client: identity, requestedHost: nil)
|
||||
return Context(
|
||||
client: client,
|
||||
hostDescription: "\(handshake.hostKind.rawValue) via \(socketPath)")
|
||||
} catch let envelope as PeekabooBridgeErrorEnvelope {
|
||||
if envelope.code == .unauthorizedClient {
|
||||
throw envelope
|
||||
}
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
throw NSError(domain: "clawdis.ui", code: 1, userInfo: [
|
||||
NSLocalizedDescriptionKey: "No PeekabooBridge host reachable (run Peekaboo.app or Clawdis.app).",
|
||||
])
|
||||
}
|
||||
|
||||
// MARK: - Commands
|
||||
|
||||
private static func runPermissions(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
|
||||
let sub = args.first ?? "status"
|
||||
if sub != "status" && sub != "--help" && sub != "-h" && sub != "help" {
|
||||
self.printHelp()
|
||||
return 1
|
||||
}
|
||||
let status = try await context.client.permissionsStatus()
|
||||
if jsonOutput {
|
||||
try self.writeJSON([
|
||||
"ok": true,
|
||||
"host": context.hostDescription,
|
||||
"result": try self.toJSONObject(status),
|
||||
])
|
||||
} else {
|
||||
FileHandle.standardOutput.write(Data((self.formatPermissions(status) + "\n").utf8))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
private static func runFrontmost(args _: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
|
||||
let app = try await context.client.getFrontmostApplication()
|
||||
let window = try await context.client.getFocusedWindow()
|
||||
if jsonOutput {
|
||||
let windowObject: Any = if let window {
|
||||
try self.toJSONObject(window)
|
||||
} else {
|
||||
NSNull()
|
||||
}
|
||||
try self.writeJSON([
|
||||
"ok": true,
|
||||
"host": context.hostDescription,
|
||||
"app": try self.toJSONObject(app),
|
||||
"window": windowObject,
|
||||
])
|
||||
} else {
|
||||
let bundle = app.bundleIdentifier ?? "<unknown>"
|
||||
let line = "\(bundle) (pid \(app.processIdentifier))"
|
||||
FileHandle.standardOutput.write(Data((line + "\n").utf8))
|
||||
if let window {
|
||||
FileHandle.standardOutput.write(Data(("window \(window.windowID): \(window.title)\n").utf8))
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
private static func runApps(args _: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
|
||||
let apps = try await context.client.listApplications()
|
||||
if jsonOutput {
|
||||
try self.writeJSON([
|
||||
"ok": true,
|
||||
"host": context.hostDescription,
|
||||
"result": try self.toJSONObject(apps),
|
||||
])
|
||||
} else {
|
||||
for app in apps {
|
||||
let bundle = app.bundleIdentifier ?? "<unknown>"
|
||||
FileHandle.standardOutput.write(Data(("\(bundle)\t\(app.name)\n").utf8))
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
private static func runWindows(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
|
||||
var args = args
|
||||
var bundleId: String?
|
||||
while !args.isEmpty {
|
||||
switch args.removeFirst() {
|
||||
case "--bundle-id":
|
||||
bundleId = args.popFirst()
|
||||
case "--help", "-h", "help":
|
||||
self.printHelp()
|
||||
return 0
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
let target: WindowTarget = if let bundleId, !bundleId.isEmpty { .application(bundleId) } else { .frontmost }
|
||||
let windows = try await context.client.listWindows(target: target)
|
||||
|
||||
if jsonOutput {
|
||||
try self.writeJSON([
|
||||
"ok": true,
|
||||
"host": context.hostDescription,
|
||||
"result": try self.toJSONObject(windows),
|
||||
])
|
||||
} else {
|
||||
for window in windows {
|
||||
FileHandle.standardOutput.write(Data(("\(window.windowID)\t\(window.title)\n").utf8))
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
private static func runScreenshot(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
|
||||
var args = args
|
||||
var displayIndex: Int?
|
||||
var bundleId: String?
|
||||
var windowIndex: Int?
|
||||
var mode: CaptureVisualizerMode = .screenshotFlash
|
||||
var scale: CaptureScalePreference = .logical1x
|
||||
|
||||
while !args.isEmpty {
|
||||
let arg = args.removeFirst()
|
||||
switch arg {
|
||||
case "--screen-index":
|
||||
displayIndex = args.popFirst().flatMap(Int.init)
|
||||
case "--bundle-id":
|
||||
bundleId = args.popFirst()
|
||||
case "--window-index":
|
||||
windowIndex = args.popFirst().flatMap(Int.init)
|
||||
case "--watch":
|
||||
mode = .watchCapture
|
||||
case "--scale":
|
||||
let raw = args.popFirst()?.lowercased()
|
||||
if raw == "native" { scale = .native }
|
||||
if raw == "1x" || raw == "logical" || raw == "logical1x" { scale = .logical1x }
|
||||
case "--help", "-h", "help":
|
||||
self.printHelp()
|
||||
return 0
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
let capture: CaptureResult
|
||||
if let bundleId, !bundleId.isEmpty {
|
||||
capture = try await context.client.captureWindow(
|
||||
appIdentifier: bundleId,
|
||||
windowIndex: windowIndex,
|
||||
visualizerMode: mode,
|
||||
scale: scale)
|
||||
} else if displayIndex != nil {
|
||||
capture = try await context.client.captureScreen(
|
||||
displayIndex: displayIndex,
|
||||
visualizerMode: mode,
|
||||
scale: scale)
|
||||
} else {
|
||||
capture = try await context.client.captureFrontmost(visualizerMode: mode, scale: scale)
|
||||
}
|
||||
|
||||
let path = try self.writeTempPNG(capture.imageData)
|
||||
|
||||
if jsonOutput {
|
||||
try self.writeJSON([
|
||||
"ok": true,
|
||||
"host": context.hostDescription,
|
||||
"path": path,
|
||||
"metadata": try self.toJSONObject(capture.metadata),
|
||||
"warning": capture.warning ?? "",
|
||||
])
|
||||
} else {
|
||||
FileHandle.standardOutput.write(Data((path + "\n").utf8))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
private static func runSee(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
|
||||
var args = args
|
||||
var bundleId: String?
|
||||
var windowIndex: Int?
|
||||
var snapshotId: String?
|
||||
|
||||
while !args.isEmpty {
|
||||
let arg = args.removeFirst()
|
||||
switch arg {
|
||||
case "--bundle-id":
|
||||
bundleId = args.popFirst()
|
||||
case "--window-index":
|
||||
windowIndex = args.popFirst().flatMap(Int.init)
|
||||
case "--snapshot-id":
|
||||
snapshotId = args.popFirst()
|
||||
case "--help", "-h", "help":
|
||||
self.printHelp()
|
||||
return 0
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
let capture: CaptureResult
|
||||
if let bundleId, !bundleId.isEmpty {
|
||||
capture = try await context.client.captureWindow(
|
||||
appIdentifier: bundleId,
|
||||
windowIndex: windowIndex,
|
||||
visualizerMode: .screenshotFlash,
|
||||
scale: .logical1x)
|
||||
} else {
|
||||
capture = try await context.client.captureFrontmost(visualizerMode: .screenshotFlash, scale: .logical1x)
|
||||
bundleId = capture.metadata.applicationInfo?.bundleIdentifier
|
||||
}
|
||||
|
||||
let resolvedSnapshotId: String = if let snapshotId, !snapshotId.isEmpty {
|
||||
snapshotId
|
||||
} else if let bundleId, !bundleId.isEmpty, let existing = try? await context.client
|
||||
.getMostRecentSnapshot(applicationBundleId: bundleId) {
|
||||
existing
|
||||
} else {
|
||||
try await context.client.createSnapshot()
|
||||
}
|
||||
|
||||
let screenshotPath = try self.writeTempPNG(capture.imageData)
|
||||
|
||||
try await context.client.storeScreenshot(
|
||||
snapshotId: resolvedSnapshotId,
|
||||
screenshotPath: screenshotPath,
|
||||
applicationBundleId: bundleId,
|
||||
applicationProcessId: capture.metadata.applicationInfo?.processIdentifier,
|
||||
applicationName: capture.metadata.applicationInfo?.name,
|
||||
windowTitle: capture.metadata.windowInfo?.title,
|
||||
windowBounds: capture.metadata.windowInfo?.bounds)
|
||||
|
||||
let windowContext = WindowContext(
|
||||
applicationName: capture.metadata.applicationInfo?.name,
|
||||
windowTitle: capture.metadata.windowInfo?.title,
|
||||
windowBounds: capture.metadata.windowInfo?.bounds)
|
||||
|
||||
let detection = try await context.client.detectElements(
|
||||
in: capture.imageData,
|
||||
snapshotId: resolvedSnapshotId,
|
||||
windowContext: windowContext)
|
||||
try await context.client.storeDetectionResult(snapshotId: resolvedSnapshotId, result: detection)
|
||||
|
||||
if jsonOutput {
|
||||
try self.writeJSON([
|
||||
"ok": true,
|
||||
"host": context.hostDescription,
|
||||
"snapshotId": resolvedSnapshotId,
|
||||
"screenshotPath": screenshotPath,
|
||||
"result": try self.toJSONObject(detection),
|
||||
])
|
||||
} else {
|
||||
FileHandle.standardOutput.write(Data((screenshotPath + "\n").utf8))
|
||||
for el in detection.elements.all {
|
||||
let b = el.bounds
|
||||
let label = (el.label ?? el.value ?? "").replacingOccurrences(of: "\n", with: " ")
|
||||
let line =
|
||||
"\(el.id)\t\(el.type)\t\(Int(b.origin.x)),\(Int(b.origin.y)) \(Int(b.size.width))x\(Int(b.size.height))\t\(label)\n"
|
||||
FileHandle.standardOutput.write(Data(line.utf8))
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
private static func runClick(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
|
||||
var args = args
|
||||
var bundleId: String?
|
||||
var snapshotId: String?
|
||||
var on: String?
|
||||
var clickType: ClickType = .single
|
||||
|
||||
while !args.isEmpty {
|
||||
let arg = args.removeFirst()
|
||||
switch arg {
|
||||
case "--bundle-id":
|
||||
bundleId = args.popFirst()
|
||||
case "--snapshot-id":
|
||||
snapshotId = args.popFirst()
|
||||
case "--on":
|
||||
on = args.popFirst()
|
||||
case "--double":
|
||||
clickType = .double
|
||||
case "--right":
|
||||
clickType = .right
|
||||
case "--help", "-h", "help":
|
||||
self.printHelp()
|
||||
return 0
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
guard let on, !on.isEmpty else {
|
||||
throw NSError(domain: "clawdis.ui", code: 2, userInfo: [
|
||||
NSLocalizedDescriptionKey: "Missing --on <elementId> (run `clawdis-mac ui see` first).",
|
||||
])
|
||||
}
|
||||
|
||||
let effectiveSnapshotId = try await self.resolveImplicitSnapshotId(
|
||||
snapshotId: snapshotId,
|
||||
bundleId: bundleId,
|
||||
client: context.client)
|
||||
|
||||
try await context.client.click(target: .elementId(on), clickType: clickType, snapshotId: effectiveSnapshotId)
|
||||
|
||||
if jsonOutput {
|
||||
try self.writeJSON([
|
||||
"ok": true,
|
||||
"host": context.hostDescription,
|
||||
])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
private static func runType(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
|
||||
var args = args
|
||||
var bundleId: String?
|
||||
var snapshotId: String?
|
||||
var into: String?
|
||||
var clearExisting = false
|
||||
var delayMs = 20
|
||||
var textParts: [String] = []
|
||||
|
||||
while !args.isEmpty {
|
||||
let arg = args.removeFirst()
|
||||
switch arg {
|
||||
case "--bundle-id":
|
||||
bundleId = args.popFirst()
|
||||
case "--snapshot-id":
|
||||
snapshotId = args.popFirst()
|
||||
case "--into":
|
||||
into = args.popFirst()
|
||||
case "--clear":
|
||||
clearExisting = true
|
||||
case "--delay-ms":
|
||||
delayMs = args.popFirst().flatMap(Int.init) ?? delayMs
|
||||
case "--text":
|
||||
if let next = args.popFirst() {
|
||||
textParts.append(next)
|
||||
}
|
||||
case "--help", "-h", "help":
|
||||
self.printHelp()
|
||||
return 0
|
||||
default:
|
||||
textParts.append(arg)
|
||||
}
|
||||
}
|
||||
|
||||
let text = textParts.joined(separator: " ").trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
guard !text.isEmpty else {
|
||||
throw NSError(domain: "clawdis.ui", code: 3, userInfo: [
|
||||
NSLocalizedDescriptionKey: "Missing text (use --text <value>).",
|
||||
])
|
||||
}
|
||||
|
||||
let effectiveSnapshotId = try await self.resolveImplicitSnapshotId(
|
||||
snapshotId: snapshotId,
|
||||
bundleId: bundleId,
|
||||
client: context.client)
|
||||
|
||||
try await context.client.type(
|
||||
text: text,
|
||||
target: into,
|
||||
clearExisting: clearExisting,
|
||||
typingDelay: delayMs,
|
||||
snapshotId: effectiveSnapshotId)
|
||||
|
||||
if jsonOutput {
|
||||
try self.writeJSON([
|
||||
"ok": true,
|
||||
"host": context.hostDescription,
|
||||
])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
private static func runWait(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
|
||||
var args = args
|
||||
var bundleId: String?
|
||||
var snapshotId: String?
|
||||
var on: String?
|
||||
var timeoutSec: Double = 10
|
||||
|
||||
while !args.isEmpty {
|
||||
let arg = args.removeFirst()
|
||||
switch arg {
|
||||
case "--bundle-id":
|
||||
bundleId = args.popFirst()
|
||||
case "--snapshot-id":
|
||||
snapshotId = args.popFirst()
|
||||
case "--on":
|
||||
on = args.popFirst()
|
||||
case "--timeout":
|
||||
timeoutSec = args.popFirst().flatMap(Double.init) ?? timeoutSec
|
||||
case "--help", "-h", "help":
|
||||
self.printHelp()
|
||||
return 0
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
guard let on, !on.isEmpty else {
|
||||
throw NSError(domain: "clawdis.ui", code: 4, userInfo: [
|
||||
NSLocalizedDescriptionKey: "Missing --on <elementId>.",
|
||||
])
|
||||
}
|
||||
|
||||
let effectiveSnapshotId = try await self.resolveImplicitSnapshotId(
|
||||
snapshotId: snapshotId,
|
||||
bundleId: bundleId,
|
||||
client: context.client)
|
||||
|
||||
let result = try await context.client.waitForElement(
|
||||
target: .elementId(on),
|
||||
timeout: timeoutSec,
|
||||
snapshotId: effectiveSnapshotId)
|
||||
|
||||
if jsonOutput {
|
||||
try self.writeJSON([
|
||||
"ok": true,
|
||||
"host": context.hostDescription,
|
||||
"result": try self.toJSONObject(result),
|
||||
])
|
||||
} else {
|
||||
FileHandle.standardOutput.write(Data((result.found ? "found\n" : "not found\n").utf8))
|
||||
}
|
||||
return result.found ? 0 : 1
|
||||
}
|
||||
|
||||
private static func resolveImplicitSnapshotId(
|
||||
snapshotId: String?,
|
||||
bundleId: String?,
|
||||
client: PeekabooBridgeClient) async throws -> String
|
||||
{
|
||||
if let snapshotId, !snapshotId.isEmpty { return snapshotId }
|
||||
|
||||
let resolvedBundle: String? = if let bundleId, !bundleId.isEmpty {
|
||||
bundleId
|
||||
} else {
|
||||
try await client.getFrontmostApplication().bundleIdentifier
|
||||
}
|
||||
|
||||
guard let resolvedBundle, !resolvedBundle.isEmpty else {
|
||||
throw NSError(domain: "clawdis.ui", code: 5, userInfo: [
|
||||
NSLocalizedDescriptionKey: "Could not determine bundle id for implicit snapshot.",
|
||||
])
|
||||
}
|
||||
|
||||
do {
|
||||
return try await client.getMostRecentSnapshot(applicationBundleId: resolvedBundle)
|
||||
} catch {
|
||||
throw NSError(domain: "clawdis.ui", code: 6, userInfo: [
|
||||
NSLocalizedDescriptionKey: "No recent snapshot for \(resolvedBundle). Run `clawdis-mac ui see --bundle-id \(resolvedBundle)` first.",
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - IO helpers
|
||||
|
||||
private static func writeTempPNG(_ data: Data) throws -> String {
|
||||
let dir = FileManager.default.temporaryDirectory
|
||||
let formatter = ISO8601DateFormatter()
|
||||
formatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds]
|
||||
let stamp = formatter.string(from: Date()).replacingOccurrences(of: ":", with: "-")
|
||||
let url = dir.appendingPathComponent("clawdis-ui-\(stamp).png")
|
||||
try data.write(to: url, options: [.atomic])
|
||||
return url.path
|
||||
}
|
||||
|
||||
private static func formatPermissions(_ status: PermissionsStatus) -> String {
|
||||
let sr = status.screenRecording ? "screen-recording=ok" : "screen-recording=missing"
|
||||
let ax = status.accessibility ? "accessibility=ok" : "accessibility=missing"
|
||||
let ascr = status.appleScript ? "applescript=ok" : "applescript=missing"
|
||||
return "\(sr) \(ax) \(ascr)"
|
||||
}
|
||||
|
||||
private static func toJSONObject<T: Encodable>(_ value: T) throws -> Any {
|
||||
let encoder = JSONEncoder()
|
||||
encoder.dateEncodingStrategy = .iso8601
|
||||
let data = try encoder.encode(value)
|
||||
return try JSONSerialization.jsonObject(with: data)
|
||||
}
|
||||
|
||||
private static func writeJSON(_ obj: [String: Any]) throws {
|
||||
let data = try JSONSerialization.data(withJSONObject: obj, options: [.prettyPrinted])
|
||||
FileHandle.standardOutput.write(data)
|
||||
FileHandle.standardOutput.write(Data([0x0A]))
|
||||
}
|
||||
|
||||
private static func printHelp() {
|
||||
let usage = """
|
||||
clawdis-mac ui — UI automation via PeekabooBridge
|
||||
|
||||
Usage:
|
||||
clawdis-mac [--json] ui <command> ...
|
||||
|
||||
Commands:
|
||||
permissions status
|
||||
frontmost
|
||||
apps
|
||||
windows [--bundle-id <id>]
|
||||
screenshot [--screen-index <n>] [--bundle-id <id>] [--window-index <n>] [--watch] [--scale native|1x]
|
||||
see [--bundle-id <id>] [--window-index <n>] [--snapshot-id <id>]
|
||||
click --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--double|--right]
|
||||
type --text <value> [--into <elementId>] [--bundle-id <id>] [--snapshot-id <id>] [--clear] [--delay-ms <n>]
|
||||
wait --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--timeout <sec>]
|
||||
|
||||
Notes:
|
||||
- Prefers Peekaboo.app’s bridge, then Clawdis.app’s bridge.
|
||||
- Default timeout is 10 seconds per action.
|
||||
"""
|
||||
FileHandle.standardError.write(Data((usage + "\n").utf8))
|
||||
}
|
||||
}
|
||||
@@ -50,64 +50,6 @@ public struct CanvasPlacement: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - UI (Peekaboo-aligned types)
|
||||
|
||||
/// Display info aligned with Peekaboo's `ScreenService.ScreenInfo`:
|
||||
/// - `index` is the 0-based position in `NSScreen.screens` at runtime.
|
||||
/// - `frame`/`visibleFrame` are AppKit screen rectangles (bottom-left origin).
|
||||
public struct UIScreenInfo: Codable, Sendable {
|
||||
public let index: Int
|
||||
public let name: String
|
||||
public let frame: CGRect
|
||||
public let visibleFrame: CGRect
|
||||
public let isPrimary: Bool
|
||||
public let scaleFactor: CGFloat
|
||||
public let displayID: UInt32
|
||||
|
||||
public init(
|
||||
index: Int,
|
||||
name: String,
|
||||
frame: CGRect,
|
||||
visibleFrame: CGRect,
|
||||
isPrimary: Bool,
|
||||
scaleFactor: CGFloat,
|
||||
displayID: UInt32)
|
||||
{
|
||||
self.index = index
|
||||
self.name = name
|
||||
self.frame = frame
|
||||
self.visibleFrame = visibleFrame
|
||||
self.isPrimary = isPrimary
|
||||
self.scaleFactor = scaleFactor
|
||||
self.displayID = displayID
|
||||
}
|
||||
}
|
||||
|
||||
public struct UIScreenshotResult: Codable, Sendable {
|
||||
public let path: String
|
||||
public let width: Int
|
||||
public let height: Int
|
||||
public let screenIndex: Int?
|
||||
public let displayID: UInt32?
|
||||
public let windowID: UInt32?
|
||||
|
||||
public init(
|
||||
path: String,
|
||||
width: Int,
|
||||
height: Int,
|
||||
screenIndex: Int? = nil,
|
||||
displayID: UInt32? = nil,
|
||||
windowID: UInt32? = nil)
|
||||
{
|
||||
self.path = path
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.screenIndex = screenIndex
|
||||
self.displayID = displayID
|
||||
self.windowID = windowID
|
||||
}
|
||||
}
|
||||
|
||||
public enum Request: Sendable {
|
||||
case notify(
|
||||
title: String,
|
||||
@@ -116,8 +58,6 @@ public enum Request: Sendable {
|
||||
priority: NotificationPriority?,
|
||||
delivery: NotificationDelivery?)
|
||||
case ensurePermissions([Capability], interactive: Bool)
|
||||
case uiListScreens
|
||||
case uiScreenshot(screenIndex: Int?, windowID: UInt32?)
|
||||
case runShell(
|
||||
command: [String],
|
||||
cwd: String?,
|
||||
@@ -158,7 +98,6 @@ extension Request: Codable {
|
||||
case type
|
||||
case title, body, sound, priority, delivery
|
||||
case caps, interactive
|
||||
case screenIndex, windowID
|
||||
case command, cwd, env, timeoutSec, needsScreenRecording
|
||||
case message, thinking, session, deliver, to
|
||||
case rpcStatus
|
||||
@@ -174,8 +113,6 @@ extension Request: Codable {
|
||||
private enum Kind: String, Codable {
|
||||
case notify
|
||||
case ensurePermissions
|
||||
case uiListScreens
|
||||
case uiScreenshot
|
||||
case runShell
|
||||
case status
|
||||
case agent
|
||||
@@ -205,14 +142,6 @@ extension Request: Codable {
|
||||
try container.encode(caps, forKey: .caps)
|
||||
try container.encode(interactive, forKey: .interactive)
|
||||
|
||||
case .uiListScreens:
|
||||
try container.encode(Kind.uiListScreens, forKey: .type)
|
||||
|
||||
case let .uiScreenshot(screenIndex, windowID):
|
||||
try container.encode(Kind.uiScreenshot, forKey: .type)
|
||||
try container.encodeIfPresent(screenIndex, forKey: .screenIndex)
|
||||
try container.encodeIfPresent(windowID, forKey: .windowID)
|
||||
|
||||
case let .runShell(command, cwd, env, timeoutSec, needsSR):
|
||||
try container.encode(Kind.runShell, forKey: .type)
|
||||
try container.encode(command, forKey: .command)
|
||||
@@ -289,14 +218,6 @@ extension Request: Codable {
|
||||
let interactive = try container.decode(Bool.self, forKey: .interactive)
|
||||
self = .ensurePermissions(caps, interactive: interactive)
|
||||
|
||||
case .uiListScreens:
|
||||
self = .uiListScreens
|
||||
|
||||
case .uiScreenshot:
|
||||
let screenIndex = try container.decodeIfPresent(Int.self, forKey: .screenIndex)
|
||||
let windowID = try container.decodeIfPresent(UInt32.self, forKey: .windowID)
|
||||
self = .uiScreenshot(screenIndex: screenIndex, windowID: windowID)
|
||||
|
||||
case .runShell:
|
||||
let command = try container.decode([String].self, forKey: .command)
|
||||
let cwd = try container.decodeIfPresent(String.self, forKey: .cwd)
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
---
|
||||
summary: "Spec for the Clawdis macOS companion menu bar app and XPC broker"
|
||||
summary: "Spec for the Clawdis macOS companion menu bar app and local broker (control socket + PeekabooBridge)"
|
||||
read_when:
|
||||
- Implementing macOS app features
|
||||
- Touching XPC/CLI bridging
|
||||
- Touching broker/CLI bridging
|
||||
---
|
||||
# Clawdis macOS Companion (menu bar + XPC broker)
|
||||
# Clawdis macOS Companion (menu bar + local broker)
|
||||
|
||||
Author: steipete · Status: draft spec · Date: 2025-12-05
|
||||
|
||||
@@ -12,21 +12,24 @@ Author: steipete · Status: draft spec · Date: 2025-12-05
|
||||
- Single macOS menu-bar app named **Clawdis** that:
|
||||
- Shows native notifications for Clawdis/clawdis events.
|
||||
- Owns TCC prompts (Notifications, Accessibility, Screen Recording, Automation/AppleScript, Microphone, Speech Recognition).
|
||||
- Brokers privileged actions (screen capture, shell with elevated UI context) via XPC.
|
||||
- Brokers privileged actions via local IPC:
|
||||
- Clawdis control socket (app-specific actions like notify/run)
|
||||
- PeekabooBridge socket (`bridge.sock`) for UI automation (see `docs/mac/peekaboo.md`)
|
||||
- Provides a tiny CLI (`clawdis-mac`) that talks to the app; Node/TS shells out to it.
|
||||
- Replace the separate notifier helper pattern (Oracle) with a built-in notifier.
|
||||
- Offer a first-run experience similar to VibeTunnel’s onboarding (permissions + CLI install).
|
||||
|
||||
## High-level design
|
||||
- SwiftPM package in `apps/macos/` (macOS 15+, Swift 6):
|
||||
- Dependency: `https://github.com/ChimeHQ/AsyncXPCConnection` (>=0.6.0).
|
||||
- Targets:
|
||||
- `ClawdisIPC` (shared Codable types + helpers).
|
||||
- `Clawdis` (LSUIElement MenuBarExtra app; embeds XPC listener and notifier).
|
||||
- `ClawdisCLI` (client that forms requests, talks XPC, prints JSON for scripts).
|
||||
- Bundle ID: `com.steipete.clawdis`; XPC service name: `com.steipete.clawdis.xpc`.
|
||||
- SwiftPM package in `apps/macos/` (macOS 15+, Swift 6).
|
||||
- Targets:
|
||||
- `ClawdisIPC` (shared Codable types + helpers for app-specific commands).
|
||||
- `Clawdis` (LSUIElement MenuBarExtra app; hosts control socket + optional PeekabooBridgeHost).
|
||||
- `ClawdisCLI` (`clawdis-mac`; prints text by default, `--json` for scripts).
|
||||
- Bundle ID: `com.steipete.clawdis`.
|
||||
- The CLI lives in the app bundle `Contents/Helpers/clawdis-mac`; dev symlink `bin/clawdis-mac` points there.
|
||||
- Node/TS layer calls the CLI; no direct XPC from Node.
|
||||
- Node/TS layer calls the CLI; no direct privileged API calls from Node.
|
||||
|
||||
Note: `docs/mac/xpc.md` describes an aspirational long-term Mach/XPC architecture. The current direction for UI automation is PeekabooBridge (socket-based).
|
||||
|
||||
## IPC contract (ClawdisIPC)
|
||||
- Codable enums; small payloads (<1 MB enforced in listener):
|
||||
@@ -36,13 +39,15 @@ enum Capability { notifications, accessibility, screenRecording, appleScript, mi
|
||||
enum Request {
|
||||
notify(title, body, sound?)
|
||||
ensurePermissions([Capability], interactive: Bool)
|
||||
uiScreenshot(screenIndex?, windowID?)
|
||||
runShell(command:[String], cwd?, env?, timeoutSec?, needsScreenRecording: Bool)
|
||||
status
|
||||
}
|
||||
struct Response { ok: Bool; message?: String; payload?: Data }
|
||||
```
|
||||
- Listener rejects oversize/unknown cases and validates the caller by code signature TeamID (with a `DEBUG`-only same-UID escape hatch controlled by `CLAWDIS_ALLOW_UNSIGNED_SOCKET_CLIENTS=1`).
|
||||
- The control-socket server rejects oversize/unknown cases and validates the caller by code signature TeamID (with a `DEBUG`-only same-UID escape hatch controlled by `CLAWDIS_ALLOW_UNSIGNED_SOCKET_CLIENTS=1`).
|
||||
|
||||
UI automation is not part of `ClawdisIPC.Request`:
|
||||
- `clawdis-mac ui …` speaks **PeekabooBridge** (see `docs/mac/peekaboo.md`).
|
||||
|
||||
## App UX (Clawdis)
|
||||
- MenuBarExtra icon only (LSUIElement; no Dock).
|
||||
@@ -52,28 +57,37 @@ struct Response { ok: Bool; message?: String; payload?: Data }
|
||||
- Permissions: live status + “Request” buttons for Notifications/Accessibility/Screen Recording; links to System Settings.
|
||||
- Debug (when enabled): PID/log links, restart/reveal app shortcuts, manual test notification.
|
||||
- About: version, links, license.
|
||||
- Pause behavior: matches Trimmy’s “Auto Trim” toggle. When paused, XPC listener returns `ok=false, message="clawdis paused"` for actions that would touch TCC (notify/run/screenshot). State is persisted (UserDefaults) and surfaced in menu and status view.
|
||||
- Pause behavior: matches Trimmy’s “Auto Trim” toggle. When paused, the broker returns `ok=false, message="clawdis paused"` for actions that would touch TCC. State is persisted (UserDefaults) and surfaced in menu and status view.
|
||||
- Onboarding (VibeTunnel-inspired): Welcome → What it does → Install CLI (shows `ln -s .../clawdis-mac /usr/local/bin`) → Permissions checklist with live status → Test notification → Done. Re-show when `welcomeVersion` bumps or CLI/app version mismatch.
|
||||
|
||||
## Built-in services
|
||||
- NotificationManager: UNUserNotificationCenter primary; AppleScript `display notification` fallback; respects the `--sound` value on each request.
|
||||
- PermissionManager: checks/requests Notifications, Accessibility (AX), Screen Recording (capture probe); publishes changes for UI.
|
||||
- ScreenCaptureManager: window/display PNG capture; gated on permission.
|
||||
- UI automation + capture: provided by **PeekabooBridgeHost** when enabled (see `docs/mac/peekaboo.md`).
|
||||
- ShellExecutor: executes `Process` with timeout; rejects when `needsScreenRecording` and permission missing; returns stdout/stderr in payload.
|
||||
- XPCListener actor: routes Request → managers; logs via OSLog.
|
||||
- ControlSocketServer actor: routes Request → managers; logs via OSLog.
|
||||
|
||||
## CLI (`clawdis-mac`)
|
||||
- Subcommands (text by default; `--json` for machine output; non-zero exit on failure):
|
||||
- `notify --title --body [--sound] [--priority passive|active|timeSensitive] [--delivery system|overlay|auto]`
|
||||
- `ensure-permissions --cap accessibility --cap screenRecording [--interactive]`
|
||||
- `ui screens`
|
||||
- `ui screenshot [--screen-index N] [--window-id N]`
|
||||
- `ui permissions status`
|
||||
- `ui frontmost`
|
||||
- `ui apps`
|
||||
- `ui windows [--bundle-id <id>]`
|
||||
- `ui screenshot [--screen-index <n>] [--bundle-id <id>] [--window-index <n>] [--watch] [--scale native|1x]`
|
||||
- `ui see [--bundle-id <id>] [--window-index <n>] [--snapshot-id <id>]`
|
||||
- `ui click --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--double|--right]`
|
||||
- `ui type --text <value> [--into <elementId>] [--bundle-id <id>] [--snapshot-id <id>] [--clear] [--delay-ms <n>]`
|
||||
- `ui wait --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--timeout <sec>]`
|
||||
- `run -- cmd args... [--cwd] [--env KEY=VAL] [--timeout 30] [--needs-screen-recording]`
|
||||
- `status`
|
||||
- Sounds: supply any macOS alert name with `--sound` per notification; omit the flag to use the system default. There is no longer a persisted “default sound” in the app UI.
|
||||
- Priority: `timeSensitive` is best-effort and falls back to `active` unless the app is signed with the Time Sensitive Notifications entitlement.
|
||||
- Delivery: `overlay` and `auto` show an in-app toast panel (bypasses Notification Center/Focus).
|
||||
- Internals: builds a `ClawdisIPC.Request`, sends it to the running app over the local control socket, and prints text by default (or JSON with `--json`).
|
||||
- Internals:
|
||||
- For app-specific commands (`notify`, `ensure-permissions`, `run`, `status`): build `ClawdisIPC.Request`, send over the control socket.
|
||||
- For UI automation (`ui …`): connect to PeekabooBridge hosts (Peekaboo.app → Clawdis.app) and send one JSON request per command (see `docs/mac/peekaboo.md`).
|
||||
|
||||
## Integration with clawdis/Clawdis (Node/TS)
|
||||
- Add helper module that shells to `clawdis-mac`:
|
||||
@@ -135,6 +149,6 @@ Notes:
|
||||
|
||||
## Open questions / decisions
|
||||
- Where to place the dev symlink `bin/clawdis-mac` (repo root vs. `apps/macos/bin`)?
|
||||
- Should `runShell` support streaming stdout/stderr (XPC with AsyncSequence) or just buffered? (Start buffered; streaming later.)
|
||||
- Should `runShell` support streaming stdout/stderr (IPC with AsyncSequence) or just buffered? (Start buffered; streaming later.)
|
||||
- Icon: reuse Clawdis lobster or new mac-specific glyph?
|
||||
- Sparkle updates: bundled via Sparkle; release builds point at `https://raw.githubusercontent.com/steipete/clawdis/main/appcast.xml` and enable auto-checks, while debug builds leave the feed blank and disable checks.
|
||||
|
||||
@@ -69,7 +69,7 @@ Implementation notes:
|
||||
|
||||
## Agent API surface (proposed)
|
||||
|
||||
Expose Canvas via the existing `clawdis-mac` → XPC → app routing so the agent can:
|
||||
Expose Canvas via the existing `clawdis-mac` → control socket → app routing so the agent can:
|
||||
- Show/hide the panel.
|
||||
- Navigate to a path (relative to the session root).
|
||||
- Evaluate JavaScript and optionally return results.
|
||||
|
||||
@@ -8,7 +8,7 @@ read_when:
|
||||
Date: 2025-12-06 · Status: draft · Owner: steipete
|
||||
|
||||
## Goal
|
||||
Run the Node-based Clawdis/clawdis gateway as a direct child of the LSUIElement app (instead of a launchd agent) while keeping all TCC-sensitive work inside the Swift app/XPC and wiring the existing “Clawdis Active” toggle to start/stop the child.
|
||||
Run the Node-based Clawdis/clawdis gateway as a direct child of the LSUIElement app (instead of a launchd agent) while keeping all TCC-sensitive work inside the Swift app/broker layer and wiring the existing “Clawdis Active” toggle to start/stop the child.
|
||||
|
||||
## When to prefer the child-process mode
|
||||
- You want gateway lifetime strictly coupled to the menu-bar app (dies when the app quits) and controlled by the “Clawdis Active” toggle without touching launchd.
|
||||
@@ -18,12 +18,13 @@ Run the Node-based Clawdis/clawdis gateway as a direct child of the LSUIElement
|
||||
## Tradeoffs vs. launchd
|
||||
- **Pros:** tighter coupling to UI state; simpler surface (no plist install/bootout); easier to stream stdout/stderr; fewer moving parts for beta users.
|
||||
- **Cons:** no built-in KeepAlive/login auto-start; app crash kills gateway; you must build your own restart/backoff; Activity Monitor will show both processes under the app; still need correct TCC handling (see below).
|
||||
- **TCC:** behaviorally, child processes often inherit the parent app’s “responsible process” for TCC, but this is *not a contract*. Continue to route all protected actions through the Swift app/XPC so prompts stay tied to the signed app bundle.
|
||||
- **TCC:** behaviorally, child processes often inherit the parent app’s “responsible process” for TCC, but this is *not a contract*. Continue to route all protected actions through the Swift app/broker so prompts stay tied to the signed app bundle.
|
||||
|
||||
## TCC guardrails (must keep)
|
||||
- Screen Recording, Accessibility, mic, and speech prompts must originate from the Swift app/XPC. The Node child should never call these APIs directly; use the existing XPC/CLI broker (`clawdis-mac`) for:
|
||||
- Screen Recording, Accessibility, mic, and speech prompts must originate from the signed Swift app/broker. The Node child should never call these APIs directly; use the CLI broker (`clawdis-mac`) for:
|
||||
- `ensure-permissions`
|
||||
- `ui screenshot` / ScreenCaptureKit work
|
||||
- `ui screenshot` (via PeekabooBridge host)
|
||||
- other `ui …` automation (see/click/type/scroll/wait) when implemented
|
||||
- mic/speech permission checks
|
||||
- notifications
|
||||
- shell runs that need `needs-screen-recording`
|
||||
@@ -48,7 +49,7 @@ Run the Node-based Clawdis/clawdis gateway as a direct child of the LSUIElement
|
||||
## Packaging and signing
|
||||
- Bundle the gateway payload (dist + production node_modules) under `Contents/Resources/Gateway/`; rely on host Node ≥22 instead of embedding a runtime.
|
||||
- Codesign native addons and dylibs inside the bundle; no nested runtime binary to sign now.
|
||||
- Host runtime should not call TCC APIs directly; keep privileged work inside the app/XPC.
|
||||
- Host runtime should not call TCC APIs directly; keep privileged work inside the app/broker.
|
||||
|
||||
## Logging and observability
|
||||
- Stream child stdout/stderr to `/tmp/clawdis-gateway.log`; surface the last N lines in the Debug tab.
|
||||
@@ -58,14 +59,14 @@ Run the Node-based Clawdis/clawdis gateway as a direct child of the LSUIElement
|
||||
## Failure/edge cases
|
||||
- App crash/quit kills the gateway. Decide if that is acceptable for the deployment tier; otherwise, stick with launchd for production and keep child-process for dev/experiments.
|
||||
- If the gateway exits repeatedly, back off (e.g., 1s/2s/5s/10s) and give up after N attempts with a menu warning.
|
||||
- Respect the existing pause semantics: when paused, the XPC should return `ok=false, "clawdis paused"`; the gateway should avoid calling privileged routes while paused.
|
||||
- Respect the existing pause semantics: when paused, the broker should return `ok=false, "clawdis paused"`; the gateway should avoid calling privileged routes while paused.
|
||||
|
||||
## Open questions / follow-ups
|
||||
- Do we need dual-mode (launchd for prod, child for dev)? If yes, gate via a setting or build flag.
|
||||
- Embedding a runtime is off the table for now; we rely on host Node for size/simplicity. Revisit only if host PATH drift becomes painful.
|
||||
- Do we want a tiny signed helper for rare TCC actions that cannot be brokered via XPC?
|
||||
- Do we want a tiny signed helper for rare TCC actions that cannot be brokered via the Swift app/broker?
|
||||
|
||||
## Decision snapshot (current recommendation)
|
||||
- Keep all TCC surfaces in the Swift app/XPC.
|
||||
- Keep all TCC surfaces in the Swift app/broker (control socket + PeekabooBridgeHost).
|
||||
- Implement `GatewayProcessManager` with Swift Subprocess to start/stop the gateway on the “Clawdis Active” toggle.
|
||||
- Maintain the launchd path as a fallback for uptime/login persistence until child-mode proves stable.
|
||||
|
||||
@@ -22,5 +22,5 @@ Shapes & sizes
|
||||
- Scurry uses leg wiggle up to ~1.0 with a small horizontal jiggle; it’s additive to any existing idle wiggle.
|
||||
|
||||
Behavioral notes
|
||||
- No external CLI/XPC toggle for ears/working; keep it internal to the app’s own signals to avoid accidental flapping.
|
||||
- No external CLI/broker toggle for ears/working; keep it internal to the app’s own signals to avoid accidental flapping.
|
||||
- Keep TTLs short (<10s) so the icon returns to baseline quickly if a job hangs.
|
||||
|
||||
@@ -1,44 +1,80 @@
|
||||
---
|
||||
summary: "Plan for integrating Peekaboo automation + visualizer into Clawdis macOS app (via clawdis-mac)"
|
||||
summary: "Plan for integrating Peekaboo automation into Clawdis via PeekabooBridge (socket-based TCC broker)"
|
||||
read_when:
|
||||
- Adding UI automation commands
|
||||
- Integrating Peekaboo as a submodule
|
||||
- Changing clawdis-mac IPC/output formats
|
||||
---
|
||||
# Peekaboo in Clawdis (macOS UI automation + visualizer)
|
||||
# Peekaboo Bridge in Clawdis (macOS UI automation broker)
|
||||
|
||||
## Goal
|
||||
Reuse Peekaboo’s mac automation “core” inside **Clawdis.app** so we piggyback on Clawdis’ existing TCC grants (Screen Recording, Accessibility, etc.). The CLI (`clawdis-mac`) stays a thin synchronous trigger surface for **single actions** (no batches), returning errors cleanly.
|
||||
## TL;DR
|
||||
- **Peekaboo removed its XPC helper** and now exposes privileged automation via a **UNIX domain socket bridge** (`PeekabooBridge` / `PeekabooBridgeHost`, socket name `bridge.sock`).
|
||||
- Clawdis integrates by **hosting the same bridge** inside **Clawdis.app** (optional, user-toggleable), and by making `clawdis-mac ui …` act as a **bridge client**.
|
||||
- For **visualizations**, we keep them in **Peekaboo.app** (best UX); Clawdis stays a thin broker host. No visualizer toggle in Clawdis.
|
||||
|
||||
Non-goals:
|
||||
- No AI/agent runtime parts from Peekaboo (no Tachikoma/MCP/Commander entrypoints).
|
||||
- No auto-onboarding or System Settings deep-linking from the automation layer (Clawdis onboarding already handles that).
|
||||
- No auto-launching Peekaboo.app.
|
||||
- No onboarding deep links from the automation endpoint (Clawdis onboarding already handles permissions).
|
||||
- No AI provider/agent runtime dependencies in Clawdis (avoid pulling Tachikoma/MCP into the Clawdis app/CLI).
|
||||
|
||||
## Where code lives
|
||||
- **Clawdis.app (macOS)**: owns all automation + visualization + TCC prompts.
|
||||
- **`clawdis-mac` CLI**: sends one request, waits, prints result, exits non-zero on failure.
|
||||
- **Gateway/Node/TS**: shells out to `clawdis-mac` when it needs TCC-backed actions.
|
||||
## Big refactor (Dec 2025): XPC → Bridge
|
||||
Peekaboo’s privileged execution moved from “CLI → XPC helper” to “CLI → socket bridge host”. For Clawdis this is a win:
|
||||
- It matches the existing “local socket + codesign checks” approach.
|
||||
- It lets us piggyback on **either** Peekaboo.app’s permissions **or** Clawdis.app’s permissions (whichever is running).
|
||||
- It avoids “two apps with two TCC bubbles” unless needed.
|
||||
|
||||
Transport: existing UNIX domain socket (`controlSocketPath`) already used by `clawdis-mac`.
|
||||
Reference (Peekaboo submodule): `docs/bridge-host.md`.
|
||||
|
||||
## Dependencies (submodule strategy)
|
||||
Integrate Peekaboo via git submodule (nested submodules OK).
|
||||
## Architecture
|
||||
### Processes
|
||||
- **Bridge hosts** (provide TCC-backed automation):
|
||||
- **Peekaboo.app** (preferred; also provides visualizations + controls)
|
||||
- **Clawdis.app** (secondary; “thin host” only)
|
||||
- **Bridge clients** (trigger single actions):
|
||||
- `clawdis-mac ui …`
|
||||
- Node/Gateway shells out to `clawdis-mac`
|
||||
|
||||
Consume only:
|
||||
- `PeekabooAutomationKit` (AX automation, element detection, capture helpers; no Tachikoma/MCP).
|
||||
- `AXorcist` (input driving / AX helpers).
|
||||
- `PeekabooVisualizer` (overlay visualizations).
|
||||
### Host discovery (client-side)
|
||||
Order is deliberate:
|
||||
1. Peekaboo.app host (full UX)
|
||||
2. Clawdis.app host (piggyback on Clawdis permissions)
|
||||
|
||||
Important nuance:
|
||||
- `PeekabooAutomationKit` is a standalone SwiftPM package and does **not** require Tachikoma/MCP/Commander.
|
||||
- `PeekabooVisualizer` ships as a product inside `PeekabooCore/Package.swift`. That package declares other dependencies (including a path dependency to Tachikoma). SwiftPM will still need those paths to exist during dependency resolution even if we don’t build those targets.
|
||||
- If this becomes annoying for Clawdis, the follow-up is to extract `PeekabooVisualizer` into its own standalone Swift package that depends only on `PeekabooFoundation`/`PeekabooProtocols`/`PeekabooExternalDependencies`.
|
||||
Socket paths (convention; exact paths must match Peekaboo):
|
||||
- Peekaboo: `~/Library/Application Support/Peekaboo/bridge.sock`
|
||||
- Clawdis: `~/Library/Application Support/clawdis/bridge.sock`
|
||||
|
||||
No auto-launch: if a host isn’t reachable, the command fails with a clear error (start Peekaboo.app or Clawdis.app).
|
||||
|
||||
Override (debugging): set `PEEKABOO_BRIDGE_SOCKET=/path/to/bridge.sock`.
|
||||
|
||||
### Protocol shape
|
||||
- **Single request per connection**: connect → write one JSON request → half-close → read one JSON response → close.
|
||||
- **Timeout**: 10 seconds end-to-end per action (client enforced; host should also enforce per-operation).
|
||||
- **Errors**: human-readable string by default; structured envelope in `--json`.
|
||||
|
||||
## Dependency strategy (submodule)
|
||||
Integrate Peekaboo via git submodule (nested submodules are OK).
|
||||
|
||||
Path in Clawdis repo:
|
||||
- `./Peekaboo` (Swabble-style; keep stable so SwiftPM path deps don’t churn).
|
||||
|
||||
What Clawdis should use:
|
||||
- **Client side**: `PeekabooBridge` (socket client + protocol models).
|
||||
- **Host side (Clawdis.app)**: `PeekabooBridgeHost` + the minimal Peekaboo services needed to implement operations.
|
||||
|
||||
What Clawdis should *not* embed:
|
||||
- **Visualizer UI**: keep it in Peekaboo.app for now (toggle + controls live there).
|
||||
- **XPC**: don’t reintroduce helper targets; use the bridge.
|
||||
|
||||
## IPC / CLI surface
|
||||
### Namespacing
|
||||
Add new automation commands behind a `ui` prefix:
|
||||
- `clawdis-mac ui …` for UI automation + visualization-related actions.
|
||||
- Keep existing top-level commands (`notify`, `run`, `canvas …`, etc.) for compatibility, but do a clean cutover for screenshots: remove the legacy top-level `screenshot` command and ship only `clawdis-mac ui screenshot`.
|
||||
- Keep existing top-level commands (`notify`, `run`, `canvas …`, etc.) for compatibility.
|
||||
|
||||
Screenshot cutover:
|
||||
- Remove legacy screenshot endpoints/commands.
|
||||
- Ship only `clawdis-mac ui screenshot` (no aliases).
|
||||
|
||||
### Output format
|
||||
Change `clawdis-mac` to default to human text output:
|
||||
@@ -50,14 +86,14 @@ This applies globally, not only `ui` commands.
|
||||
Note (current state as of 2025-12-13): `clawdis-mac` prints text by default; use `--json` for structured output.
|
||||
|
||||
### Timeouts
|
||||
Default timeout for UI actions: **10 seconds** end-to-end (CLI already defaults to 10s).
|
||||
- CLI: keep the fail-fast default at 10s (unless a command explicitly requests longer).
|
||||
- Server: only has a ~5s read/decode timeout today; UI operations must also enforce their own per-action timeout so “wait for element” can fail deterministically.
|
||||
Default timeout for UI actions: **10 seconds** end-to-end.
|
||||
|
||||
## Coordinate model (multi-display)
|
||||
Requirement: coordinates are **per screen**, not global.
|
||||
|
||||
Proposed API shape:
|
||||
Standardize for the CLI (agent-friendly): **top-left origin per screen**.
|
||||
|
||||
Proposed request shape:
|
||||
- Requests accept `screenIndex` + `{x, y}` in that screen’s local coordinate space.
|
||||
- Clawdis.app converts to global CG coordinates using `NSScreen.screens[screenIndex].frame.origin`.
|
||||
- Responses should echo both:
|
||||
@@ -68,53 +104,48 @@ Proposed API shape:
|
||||
Ordering: use `NSScreen.screens` ordering consistently (documented in the CLI help + JSON schema).
|
||||
|
||||
## Targeting (per app/window)
|
||||
Expose window/app targeting in the IPC surface (based on Peekaboo’s existing `WindowTarget` model):
|
||||
Expose window/app targeting in the UI surface (align with Peekaboo targeting):
|
||||
- frontmost
|
||||
- by app name / bundle id
|
||||
- by window title substring
|
||||
- by (app, index)
|
||||
- by window id
|
||||
|
||||
Current `clawdis-mac ui …` support:
|
||||
- `--bundle-id <id>` for app targeting
|
||||
- `--window-index <n>` (0-based) for disambiguating within an app when capturing (see/screenshot)
|
||||
|
||||
All “see/click/type/scroll/wait” requests should accept a target (default: frontmost).
|
||||
|
||||
## “See” + click packs (Playwright-style)
|
||||
Peekaboo already has the core ingredients:
|
||||
- element detection yielding stable IDs (e.g., `B1`, `T3`)
|
||||
- bounds + labels/values
|
||||
- snapshot IDs to allow follow-up actions without re-scanning
|
||||
Behavior stays aligned with Peekaboo:
|
||||
- `ui see` returns element IDs (e.g. `B1`, `T3`) with bounds/labels.
|
||||
- Follow-up actions reference those IDs without re-scanning.
|
||||
|
||||
Clawdis’s `ui see` should:
|
||||
`clawdis-mac ui see` should:
|
||||
- capture (optionally targeted) window/screen
|
||||
- return a **snapshot id**
|
||||
- return a list of elements with `{id, type, label/value?, bounds}`
|
||||
- optionally return screenshot path/bytes (pref: path)
|
||||
- return a screenshot **file path** (default: temp directory)
|
||||
- return a list of elements (text or JSON)
|
||||
|
||||
Snapshot lifecycle requirement:
|
||||
- Clawdis runs long-lived in memory, so “snapshot state” should be **in-memory by default** (no disk-backed JSON concept).
|
||||
- Peekaboo already supports this via an `InMemorySnapshotManager` (keep disk-backed snapshots as an optional debug mode later).
|
||||
- Host apps are long-lived, so snapshot state should be **in-memory by default**.
|
||||
- Snapshot scoping: “implicit snapshot” is **per target bundle id** (reuse last snapshot for that app when snapshot id is omitted).
|
||||
|
||||
Practical flow (agent-friendly):
|
||||
- `clawdis-mac ui frontmost` returns the focused app (bundle id) + focused window (title/id) so follow-up calls can pass `--bundle-id …`.
|
||||
- `clawdis-mac ui see --bundle-id X` updates the implicit snapshot for `X`.
|
||||
- `clawdis-mac ui click --bundle-id X --on B1` reuses the most recent snapshot for `X` when `--snapshot-id` is omitted.
|
||||
|
||||
## Visualizer integration
|
||||
Visualizer must be user-toggleable via a Clawdis setting.
|
||||
|
||||
Implementation sketch:
|
||||
- Add a Clawdis UserDefaults-backed setting (e.g. `clawdis.ui.visualizerEnabled`).
|
||||
- Implement Peekaboo’s `VisualizerSettingsProviding` in Clawdis (`visualizerEnabled`, animation speed, and per-effect toggles).
|
||||
- Create a Clawdis-specific `AutomationFeedbackClient` that forwards PeekabooAutomationKit feedback events into a shared `VisualizerCoordinator`.
|
||||
|
||||
Current state:
|
||||
- `PeekabooVisualizer` already includes the visualization implementation (SwiftUI overlay views + coordinator).
|
||||
The visualizer is intentionally display-only (no clickable overlays needed).
|
||||
Keep visualizations in **Peekaboo.app** for now.
|
||||
- Clawdis hosts the bridge, but does not render overlays.
|
||||
- Any “visualizer enabled/disabled” setting is controlled in Peekaboo.app.
|
||||
|
||||
## Screenshots (legacy → Peekaboo takeover)
|
||||
Clawdis uses `clawdis-mac ui screenshot` and returns a file path (default location: temp directory) instead of raw image bytes.
|
||||
|
||||
Migration plan:
|
||||
- Replace capture implementation with PeekabooAutomationKit’s capture service so we share:
|
||||
- per-screen mapping
|
||||
- window/app targeting
|
||||
- visual feedback (flash / watch HUD) when enabled
|
||||
- Keep writing images to a file path on the app side and returning the path (text-friendly), with `--json` providing the structured metadata.
|
||||
- No aliases: remove the old `Request.screenshot` and introduce a new `Request.uiScreenshot` (or similar) so the new behavior is explicit and there’s no “legacy mode” to maintain.
|
||||
- Bridge host performs capture and returns a temp file path.
|
||||
- No legacy aliases; make the old screenshot surface disappear cleanly.
|
||||
|
||||
## Permissions behavior
|
||||
If required permissions are missing:
|
||||
@@ -122,17 +153,32 @@ If required permissions are missing:
|
||||
- do not try to open System Settings from the automation endpoint
|
||||
|
||||
## Security (socket auth)
|
||||
Clawdis’ socket is protected by:
|
||||
Both hosts must enforce:
|
||||
- filesystem perms on the socket path (owner read/write only)
|
||||
- server-side caller check:
|
||||
- requires the caller’s code signature TeamID to be `Y5PE65HELJ`
|
||||
- in `DEBUG` builds only, an explicit escape hatch allows same-UID clients when `CLAWDIS_ALLOW_UNSIGNED_SOCKET_CLIENTS=1` is set (development convenience)
|
||||
- server-side caller validation:
|
||||
- require the caller’s code signature TeamID to be `Y5PE65HELJ`
|
||||
- optional bundle-id allowlist for tighter scoping
|
||||
|
||||
This ensures “any local process” can’t drive the privileged surface just because it runs under the same macOS user.
|
||||
Debug-only escape hatch (development convenience):
|
||||
- “allow same-UID callers” means: *skip codesign checks for clients running under the same Unix user*.
|
||||
- This must be **opt-in**, **DEBUG-only**, and guarded by an env var (Peekaboo uses `PEEKABOO_ALLOW_UNSIGNED_SOCKET_CLIENTS=1`).
|
||||
|
||||
## Current `clawdis-mac ui` commands (Dec 2025)
|
||||
All commands default to text output. Add `--json` right after `clawdis-mac` for a structured envelope.
|
||||
|
||||
- `clawdis-mac ui permissions status`
|
||||
- `clawdis-mac ui frontmost`
|
||||
- `clawdis-mac ui apps`
|
||||
- `clawdis-mac ui windows [--bundle-id <id>]`
|
||||
- `clawdis-mac ui screenshot [--screen-index <n>] [--bundle-id <id>] [--window-index <n>] [--watch] [--scale native|1x]`
|
||||
- `clawdis-mac ui see [--bundle-id <id>] [--window-index <n>] [--snapshot-id <id>]`
|
||||
- `clawdis-mac ui click --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--double|--right]`
|
||||
- `clawdis-mac ui type --text <value> [--into <elementId>] [--bundle-id <id>] [--snapshot-id <id>] [--clear] [--delay-ms <n>]`
|
||||
- `clawdis-mac ui wait --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--timeout <sec>]`
|
||||
|
||||
## Next integration steps (after this doc)
|
||||
1. Add Peekaboo as a git submodule (and required nested submodules).
|
||||
2. Wire SwiftPM deps in `apps/macos/Package.swift` to import `PeekabooAutomationKit` + `PeekabooVisualizer`.
|
||||
3. Extend `ClawdisIPC.Request` with `ui.*` commands (`see/click/type/scroll/wait/screenshot/windows/screens`).
|
||||
4. Implement handlers in Clawdis.app and route through PeekabooAutomationKit services.
|
||||
5. Update `clawdis-mac` output defaults (text + `--json`), and adjust any internal call sites that relied on JSON-by-default.
|
||||
1. Add Peekaboo as a git submodule (nested submodules OK).
|
||||
2. Add a small `clawdis-mac ui …` surface that speaks PeekabooBridge (text by default, `--json` for structured).
|
||||
3. Host `PeekabooBridgeHost` inside Clawdis.app behind a single setting (“Enable Peekaboo Bridge”, default on).
|
||||
4. Implement the minimum operation set needed for agents (see/click/type/scroll/wait/screenshot, plus list apps/windows/screens).
|
||||
5. Keep all protocol decisions aligned with Peekaboo (coordinate system, element IDs, snapshot scoping, error envelopes).
|
||||
|
||||
@@ -1,19 +1,29 @@
|
||||
---
|
||||
summary: "macOS XPC architecture for Clawdis app, CLI helper, and gateway bridge"
|
||||
summary: "macOS IPC architecture for Clawdis app, CLI helper, and gateway bridge (control socket + XPC + PeekabooBridge)"
|
||||
read_when:
|
||||
- Editing XPC contracts or menu bar app IPC
|
||||
- Editing IPC contracts or menu bar app IPC
|
||||
---
|
||||
# Clawdis macOS XPC architecture (Dec 2025)
|
||||
# Clawdis macOS IPC architecture (Dec 2025)
|
||||
|
||||
Note: the current implementation primarily uses a local UNIX-domain control socket (`controlSocketPath`) between `clawdis-mac` and the app. This doc describes the intended long-term XPC/Mach-service architecture and the security constraints; update it as the implementation converges.
|
||||
Note: the current implementation primarily uses a local UNIX-domain control socket (`controlSocketPath`) between `clawdis-mac` and the app. This doc captures the intended long-term Mach/XPC direction and the security constraints, and also documents the separate PeekabooBridge socket used for UI automation.
|
||||
|
||||
## Goals
|
||||
- Single GUI app instance that owns all TCC-facing work (notifications, screen recording, mic, speech, AppleScript).
|
||||
- A small surface for automation: the `clawdis-mac` CLI and the Node gateway talk to the app via a local XPC channel.
|
||||
- A small surface for automation: the `clawdis-mac` CLI and the Node gateway talk to the app via local IPC.
|
||||
- Predictable permissions: always the same signed bundle ID, launched by launchd, so TCC grants stick.
|
||||
- Limit who can connect: only signed clients from our team (with an explicit DEBUG-only escape hatch for development).
|
||||
|
||||
## How it works
|
||||
### Control socket (current)
|
||||
- `clawdis-mac` talks to the app via a local UNIX socket (`controlSocketPath`) for app-specific requests (notify, status, ensure-permissions, run, etc.).
|
||||
|
||||
### PeekabooBridge (UI automation)
|
||||
- UI automation uses a separate UNIX socket named `bridge.sock` and the PeekabooBridge JSON protocol.
|
||||
- Host preference order (client-side): Peekaboo.app → Clawdis.app → local execution.
|
||||
- Security: bridge hosts require TeamID `Y5PE65HELJ`; DEBUG-only same-UID escape hatch is guarded by `PEEKABOO_ALLOW_UNSIGNED_SOCKET_CLIENTS=1` (Peekaboo convention).
|
||||
- See: `docs/mac/peekaboo.md` for the Clawdis plan and naming.
|
||||
|
||||
### Mach/XPC (future direction)
|
||||
- The app registers a Mach service named `com.steipete.clawdis.xpc` via a user LaunchAgent at `~/Library/LaunchAgents/com.steipete.clawdis.plist`.
|
||||
- The launch agent runs `dist/Clawdis.app/Contents/MacOS/Clawdis` with `RunAtLoad=true`, `KeepAlive=false`, and a `MachServices` entry for the XPC name.
|
||||
- The app hosts the XPC listener (`NSXPCListener(machServiceName:)`) and exports `ClawdisXPCService`.
|
||||
@@ -35,6 +45,8 @@ Note: the current implementation primarily uses a local UNIX-domain control sock
|
||||
- RunAtLoad without KeepAlive means the app starts once; if it crashes it stays down (no unwanted respawn), but CLI calls will re-spawn via launchd.
|
||||
|
||||
## Hardening notes
|
||||
- Prefer requiring a TeamID match for all privileged surfaces. The codebase currently has a `DEBUG`-only same-UID escape hatch gated behind `CLAWDIS_ALLOW_UNSIGNED_SOCKET_CLIENTS=1` for local development.
|
||||
- Prefer requiring a TeamID match for all privileged surfaces.
|
||||
- Clawdis control socket: `CLAWDIS_ALLOW_UNSIGNED_SOCKET_CLIENTS=1` (DEBUG-only) may allow same-UID callers for local development.
|
||||
- PeekabooBridge: `PEEKABOO_ALLOW_UNSIGNED_SOCKET_CLIENTS=1` (DEBUG-only) may allow same-UID callers for local development.
|
||||
- All communication remains local-only; no network sockets are exposed.
|
||||
- TCC prompts originate only from the GUI app bundle; run scripts/package-mac-app.sh so the signed bundle ID stays stable.
|
||||
|
||||
@@ -2,18 +2,49 @@ import { spawn } from "node:child_process";
|
||||
import net from "node:net";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
|
||||
const waitForText = async (
|
||||
chunks: string[],
|
||||
pattern: RegExp,
|
||||
const waitForPortOpen = async (
|
||||
proc: ReturnType<typeof spawn>,
|
||||
chunksOut: string[],
|
||||
chunksErr: string[],
|
||||
port: number,
|
||||
timeoutMs: number,
|
||||
) => {
|
||||
const startedAt = Date.now();
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
const joined = chunks.join("");
|
||||
if (pattern.test(joined)) return;
|
||||
if (proc.exitCode !== null) {
|
||||
const stdout = chunksOut.join("");
|
||||
const stderr = chunksErr.join("");
|
||||
throw new Error(
|
||||
`gateway exited before listening (code=${String(proc.exitCode)} signal=${String(proc.signalCode)})\n` +
|
||||
`--- stdout ---\n${stdout}\n--- stderr ---\n${stderr}`,
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const socket = net.connect({ host: "127.0.0.1", port });
|
||||
socket.once("connect", () => {
|
||||
socket.destroy();
|
||||
resolve();
|
||||
});
|
||||
socket.once("error", (err) => {
|
||||
socket.destroy();
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
return;
|
||||
} catch {
|
||||
// keep polling
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
}
|
||||
throw new Error(`timeout waiting for ${String(pattern)}`);
|
||||
const stdout = chunksOut.join("");
|
||||
const stderr = chunksErr.join("");
|
||||
throw new Error(
|
||||
`timeout waiting for gateway to listen on port ${port}\n` +
|
||||
`--- stdout ---\n${stdout}\n--- stderr ---\n${stderr}`,
|
||||
);
|
||||
};
|
||||
|
||||
const getFreePort = async () => {
|
||||
@@ -67,9 +98,11 @@ describe("gateway SIGTERM", () => {
|
||||
child.stdout?.on("data", (d) => out.push(String(d)));
|
||||
child.stderr?.on("data", (d) => err.push(String(d)));
|
||||
|
||||
await waitForText(
|
||||
await waitForPortOpen(
|
||||
proc,
|
||||
out,
|
||||
new RegExp(`gateway listening on ws://127\\.0\\.0\\.1:${port}\\b`),
|
||||
err,
|
||||
port,
|
||||
20_000,
|
||||
);
|
||||
|
||||
|
||||
@@ -1015,7 +1015,7 @@ describe("web auto-reply", () => {
|
||||
|
||||
it(
|
||||
"compresses common formats to jpeg under the cap",
|
||||
{ timeout: 15_000 },
|
||||
{ timeout: 45_000 },
|
||||
async () => {
|
||||
const formats = [
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user