feat!(mac): add ui screens + text clawdis-mac

This commit is contained in:
Peter Steinberger
2025-12-13 11:31:31 +00:00
parent 8d1e73edc7
commit 0152e053e1
6 changed files with 259 additions and 39 deletions

View File

@@ -67,6 +67,11 @@ enum ControlRequestHandler {
}
return Response(ok: false, message: "screenshot failed")
case .uiListScreens:
let screens = await MainActor.run { UIScreenService.listScreens() }
let payload = try JSONEncoder().encode(screens)
return Response(ok: true, payload: payload)
case let .runShell(command, cwd, env, timeoutSec, needsSR):
if needsSR {
let authorized = await PermissionManager

View File

@@ -0,0 +1,44 @@
import AppKit
import ClawdisIPC
import CoreGraphics
enum UIScreenService {
static func listScreens() -> [UIScreenInfo] {
let screens = NSScreen.screens
let mainScreen = NSScreen.main
return screens.enumerated().map { index, screen in
UIScreenInfo(
index: index,
name: screen.peekabooName,
frame: screen.frame,
visibleFrame: screen.visibleFrame,
isPrimary: screen == mainScreen,
scaleFactor: screen.backingScaleFactor,
displayID: screen.displayID)
}
}
}
private extension NSScreen {
var displayID: UInt32 {
if let num = self.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber {
return num.uint32Value
}
return 0
}
/// Match Peekaboo's `ScreenService` naming (built-in vs. resolution fallback).
var peekabooName: String {
let id = self.displayID
guard id != 0 else { return "Display" }
if CGDisplayIsBuiltin(id) != 0 { return "Built-in Display" }
if let mode = CGDisplayCopyDisplayMode(id) {
return "\(mode.pixelWidth)×\(mode.pixelHeight) Display"
}
return "External Display"
}
}

View File

@@ -6,24 +6,19 @@ import Foundation
struct ClawdisCLI {
static func main() async {
do {
let request = try parseCommandLine()
let response = try await send(request: request)
let payloadString: String? = if let payload = response.payload, let text = String(
data: payload,
encoding: .utf8)
{
text
var args = Array(CommandLine.arguments.dropFirst())
let jsonOutput = args.contains("--json")
args.removeAll(where: { $0 == "--json" })
let parsed = try parseCommandLine(args: args)
let response = try await send(request: parsed.request)
if jsonOutput {
try self.printJSON(parsed: parsed, response: response)
} else {
nil
try self.printText(parsed: parsed, response: response)
}
let output: [String: Any] = [
"ok": response.ok,
"message": response.message ?? "",
"payload": payloadString ?? "",
]
let json = try JSONSerialization.data(withJSONObject: output, options: [.prettyPrinted])
FileHandle.standardOutput.write(json)
FileHandle.standardOutput.write(Data([0x0A]))
exit(response.ok ? 0 : 1)
} catch CLIError.help {
self.printHelp()
@@ -37,9 +32,20 @@ struct ClawdisCLI {
}
}
private struct ParsedCLIRequest {
var request: Request
var kind: Kind
enum Kind {
case screenshot(outPath: String?)
case uiScreens
case generic
}
}
// swiftlint:disable cyclomatic_complexity
private static func parseCommandLine() throws -> Request {
var args = Array(CommandLine.arguments.dropFirst())
private static func parseCommandLine(args: [String]) throws -> ParsedCLIRequest {
var args = args
guard let command = args.first else { throw CLIError.help }
args = Array(args.dropFirst())
@@ -70,7 +76,9 @@ struct ClawdisCLI {
}
}
guard let t = title, let b = body else { throw CLIError.help }
return .notify(title: t, body: b, sound: sound, priority: priority, delivery: delivery)
return ParsedCLIRequest(
request: .notify(title: t, body: b, sound: sound, priority: priority, delivery: delivery),
kind: .generic)
case "ensure-permissions":
var caps: [Capability] = []
@@ -85,20 +93,35 @@ struct ClawdisCLI {
}
}
if caps.isEmpty { caps = Capability.allCases }
return .ensurePermissions(caps, interactive: interactive)
return ParsedCLIRequest(request: .ensurePermissions(caps, interactive: interactive), kind: .generic)
case "screenshot":
var displayID: UInt32?
var windowID: UInt32?
var outPath: String?
while !args.isEmpty {
let arg = args.removeFirst()
switch arg {
case "--display-id": if let val = args.popFirst(), let num = UInt32(val) { displayID = num }
case "--window-id": if let val = args.popFirst(), let num = UInt32(val) { windowID = num }
case "--out": outPath = args.popFirst()
default: break
}
}
return .screenshot(displayID: displayID, windowID: windowID, format: "png")
return ParsedCLIRequest(
request: .screenshot(displayID: displayID, windowID: windowID, format: "png"),
kind: .screenshot(outPath: outPath))
case "ui":
guard let sub = args.first else { throw CLIError.help }
args = Array(args.dropFirst())
switch sub {
case "screens":
return ParsedCLIRequest(request: .uiListScreens, kind: .uiScreens)
default:
throw CLIError.help
}
case "run":
var cwd: String?
@@ -124,18 +147,18 @@ struct ClawdisCLI {
cmd.append(arg)
}
}
return .runShell(
return ParsedCLIRequest(request: .runShell(
command: cmd,
cwd: cwd,
env: env.isEmpty ? nil : env,
timeoutSec: timeout,
needsScreenRecording: needsSR)
needsScreenRecording: needsSR), kind: .generic)
case "status":
return .status
return ParsedCLIRequest(request: .status, kind: .generic)
case "rpc-status":
return .rpcStatus
return ParsedCLIRequest(request: .rpcStatus, kind: .generic)
case "agent":
var message: String?
@@ -161,7 +184,9 @@ struct ClawdisCLI {
}
guard let message else { throw CLIError.help }
return .agent(message: message, thinking: thinking, session: session, deliver: deliver, to: to)
return ParsedCLIRequest(
request: .agent(message: message, thinking: thinking, session: session, deliver: deliver, to: to),
kind: .generic)
case "node":
guard let sub = args.first else { throw CLIError.help }
@@ -169,7 +194,7 @@ struct ClawdisCLI {
switch sub {
case "list":
return .nodeList
return ParsedCLIRequest(request: .nodeList, kind: .generic)
case "invoke":
var nodeId: String?
@@ -185,7 +210,9 @@ struct ClawdisCLI {
}
}
guard let nodeId, let command else { throw CLIError.help }
return .nodeInvoke(nodeId: nodeId, command: command, paramsJSON: paramsJSON)
return ParsedCLIRequest(
request: .nodeInvoke(nodeId: nodeId, command: command, paramsJSON: paramsJSON),
kind: .generic)
default:
throw CLIError.help
@@ -218,7 +245,9 @@ struct ClawdisCLI {
let placement = (x != nil || y != nil || width != nil || height != nil)
? CanvasPlacement(x: x, y: y, width: width, height: height)
: nil
return .canvasShow(session: session, path: path, placement: placement)
return ParsedCLIRequest(
request: .canvasShow(session: session, path: path, placement: placement),
kind: .generic)
case "hide":
var session = "main"
@@ -229,7 +258,7 @@ struct ClawdisCLI {
default: break
}
}
return .canvasHide(session: session)
return ParsedCLIRequest(request: .canvasHide(session: session), kind: .generic)
case "goto":
var session = "main"
@@ -254,7 +283,9 @@ struct ClawdisCLI {
let placement = (x != nil || y != nil || width != nil || height != nil)
? CanvasPlacement(x: x, y: y, width: width, height: height)
: nil
return .canvasGoto(session: session, path: path, placement: placement)
return ParsedCLIRequest(
request: .canvasGoto(session: session, path: path, placement: placement),
kind: .generic)
case "eval":
var session = "main"
@@ -268,7 +299,7 @@ struct ClawdisCLI {
}
}
guard let js else { throw CLIError.help }
return .canvasEval(session: session, javaScript: js)
return ParsedCLIRequest(request: .canvasEval(session: session, javaScript: js), kind: .generic)
case "snapshot":
var session = "main"
@@ -281,7 +312,7 @@ struct ClawdisCLI {
default: break
}
}
return .canvasSnapshot(session: session, outPath: outPath)
return ParsedCLIRequest(request: .canvasSnapshot(session: session, outPath: outPath), kind: .generic)
default:
throw CLIError.help
@@ -294,16 +325,114 @@ struct ClawdisCLI {
// swiftlint:enable cyclomatic_complexity
private static func printText(parsed: ParsedCLIRequest, response: Response) throws {
guard response.ok else {
let msg = response.message ?? "failed"
fputs("\(msg)\n", stderr)
return
}
switch parsed.kind {
case let .screenshot(outPath):
let path = try self.writeScreenshotPayloadToFile(payload: response.payload, outPath: outPath)
FileHandle.standardOutput.write(Data((path + "\n").utf8))
case .uiScreens:
let screens = try self.decodePayload([UIScreenInfo].self, payload: response.payload)
if screens.isEmpty {
FileHandle.standardOutput.write(Data("No screens\n".utf8))
return
}
for s in screens {
let primary = s.isPrimary ? " (primary)" : ""
let size = "\(Int(s.frame.width))×\(Int(s.frame.height))"
let scale = String(format: "%.1f", Double(s.scaleFactor))
let line = "Display \(s.index + 1)\(primary): \(s.name) \(size) @\(scale)x (id \(s.displayID))\n"
FileHandle.standardOutput.write(Data(line.utf8))
}
case .generic:
if let payload = response.payload, let text = String(data: payload, encoding: .utf8), !text.isEmpty {
FileHandle.standardOutput.write(payload)
if !text.hasSuffix("\n") { FileHandle.standardOutput.write(Data([0x0A])) }
return
}
if let message = response.message, !message.isEmpty {
FileHandle.standardOutput.write(Data((message + "\n").utf8))
}
}
}
private static func printJSON(parsed: ParsedCLIRequest, response: Response) throws {
var output: [String: Any] = [
"ok": response.ok,
"message": response.message ?? "",
]
switch parsed.kind {
case let .screenshot(outPath):
if response.ok {
let path = try self.writeScreenshotPayloadToFile(payload: response.payload, outPath: outPath)
output["result"] = ["path": path]
} else {
output["result"] = NSNull()
}
case .uiScreens:
if let payload = response.payload,
let obj = try? JSONSerialization.jsonObject(with: payload) {
output["result"] = obj
} else {
output["result"] = []
}
case .generic:
if let payload = response.payload, !payload.isEmpty {
if let obj = try? JSONSerialization.jsonObject(with: payload) {
output["result"] = obj
} else if let text = String(data: payload, encoding: .utf8) {
output["payload"] = text
}
}
}
let json = try JSONSerialization.data(withJSONObject: output, options: [.prettyPrinted])
FileHandle.standardOutput.write(json)
FileHandle.standardOutput.write(Data([0x0A]))
}
private static func decodePayload<T: Decodable>(_ type: T.Type, payload: Data?) throws -> T {
guard let payload else { throw POSIXError(.EINVAL) }
return try JSONDecoder().decode(T.self, from: payload)
}
private static func writeScreenshotPayloadToFile(payload: Data?, outPath: String?) throws -> String {
guard let payload, !payload.isEmpty else { throw POSIXError(.EINVAL) }
let url: URL
if let outPath, !outPath.isEmpty {
url = URL(fileURLWithPath: outPath).resolvingSymlinksInPath()
} else {
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("clawdis-mac", isDirectory: true)
try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
let name = "screenshot-\(Int(Date().timeIntervalSince1970 * 1000)).png"
url = dir.appendingPathComponent(name)
}
try payload.write(to: url)
return url.path
}
private static func printHelp() {
let usage = """
clawdis-mac — talk to the running Clawdis.app XPC service
Usage:
clawdis-mac [--json] <command> ...
clawdis-mac notify --title <t> --body <b> [--sound <name>] [--priority <passive|active|timeSensitive>] [--delivery <system|overlay|auto>]
clawdis-mac ensure-permissions
[--cap <notifications|accessibility|screenRecording|microphone|speechRecognition>]
[--interactive]
clawdis-mac screenshot [--display-id <u32>] [--window-id <u32>]
clawdis-mac screenshot [--display-id <u32>] [--window-id <u32>] [--out <path>]
clawdis-mac ui screens
clawdis-mac run [--cwd <path>] [--env KEY=VAL] [--timeout <sec>] [--needs-screen-recording] <command ...>
clawdis-mac status
clawdis-mac rpc-status
@@ -320,8 +449,7 @@ struct ClawdisCLI {
clawdis-mac canvas snapshot [--out <path>] [--session <key>]
clawdis-mac --help
Returns JSON to stdout:
{"ok":<bool>,"message":"...","payload":"..."}
Default output is text. Use --json for machine-readable output.
"""
print(usage)
}

View File

@@ -1,3 +1,4 @@
import CoreGraphics
import Foundation
// MARK: - Capabilities
@@ -49,6 +50,39 @@ public struct CanvasPlacement: Codable, Sendable {
}
}
// MARK: - UI (Peekaboo-aligned types)
/// Display info aligned with Peekaboo's `ScreenService.ScreenInfo`:
/// - `index` is the 0-based position in `NSScreen.screens` at runtime.
/// - `frame`/`visibleFrame` are AppKit screen rectangles (bottom-left origin).
public struct UIScreenInfo: Codable, Sendable {
public let index: Int
public let name: String
public let frame: CGRect
public let visibleFrame: CGRect
public let isPrimary: Bool
public let scaleFactor: CGFloat
public let displayID: UInt32
public init(
index: Int,
name: String,
frame: CGRect,
visibleFrame: CGRect,
isPrimary: Bool,
scaleFactor: CGFloat,
displayID: UInt32)
{
self.index = index
self.name = name
self.frame = frame
self.visibleFrame = visibleFrame
self.isPrimary = isPrimary
self.scaleFactor = scaleFactor
self.displayID = displayID
}
}
public enum Request: Sendable {
case notify(
title: String,
@@ -58,6 +92,7 @@ public enum Request: Sendable {
delivery: NotificationDelivery?)
case ensurePermissions([Capability], interactive: Bool)
case screenshot(displayID: UInt32?, windowID: UInt32?, format: String)
case uiListScreens
case runShell(
command: [String],
cwd: String?,
@@ -115,6 +150,7 @@ extension Request: Codable {
case notify
case ensurePermissions
case screenshot
case uiListScreens
case runShell
case status
case agent
@@ -150,6 +186,9 @@ extension Request: Codable {
try container.encodeIfPresent(windowID, forKey: .windowID)
try container.encode(format, forKey: .format)
case .uiListScreens:
try container.encode(Kind.uiListScreens, forKey: .type)
case let .runShell(command, cwd, env, timeoutSec, needsSR):
try container.encode(Kind.runShell, forKey: .type)
try container.encode(command, forKey: .command)
@@ -232,6 +271,9 @@ extension Request: Codable {
let format = try container.decode(String.self, forKey: .format)
self = .screenshot(displayID: displayID, windowID: windowID, format: format)
case .uiListScreens:
self = .uiListScreens
case .runShell:
let command = try container.decode([String].self, forKey: .command)
let cwd = try container.decodeIfPresent(String.self, forKey: .cwd)

View File

@@ -63,16 +63,17 @@ struct Response { ok: Bool; message?: String; payload?: Data }
- XPCListener actor: routes Request → managers; logs via OSLog.
## CLI (`clawdis-mac`)
- Subcommands (JSON out, non-zero exit on failure):
- Subcommands (text by default; `--json` for machine output; non-zero exit on failure):
- `notify --title --body [--sound] [--priority passive|active|timeSensitive] [--delivery system|overlay|auto]`
- `ensure-permissions --cap accessibility --cap screenRecording [--interactive]`
- `screenshot [--display-id N | --window-id N] [--out path]`
- `ui screens`
- `run -- cmd args... [--cwd] [--env KEY=VAL] [--timeout 30] [--needs-screen-recording]`
- `status`
- Sounds: supply any macOS alert name with `--sound` per notification; omit the flag to use the system default. There is no longer a persisted “default sound” in the app UI.
- Priority: `timeSensitive` is best-effort and falls back to `active` unless the app is signed with the Time Sensitive Notifications entitlement.
- Delivery: `overlay` and `auto` show an in-app toast panel (bypasses Notification Center/Focus).
- Internals: builds Request, connects via AsyncXPCConnection, prints Response as JSON to stdout.
- Internals: builds a `ClawdisIPC.Request`, sends it to the running app over the local control socket, and prints text by default (or JSON with `--json`).
## Integration with clawdis/Clawdis (Node/TS)
- Add helper module that shells to `clawdis-mac`:

View File

@@ -47,7 +47,7 @@ Change `clawdis-mac` to default to human text output:
This applies globally, not only `ui` commands.
Note (current state as of 2025-12-13): `clawdis-mac` prints JSON by default. This is a planned behavior change.
Note (current state as of 2025-12-13): `clawdis-mac` prints text by default; use `--json` for structured output.
### Timeouts
Default timeout for UI actions: **10 seconds** end-to-end (CLI already defaults to 10s).