feat(ios): add ClawdisNode app scaffold

This commit is contained in:
Peter Steinberger
2025-12-12 21:18:54 +00:00
parent 0b532579d8
commit 6d6c3ad2c4
17 changed files with 1348 additions and 0 deletions

6
apps/ios/.swiftlint.yml Normal file
View File

@@ -0,0 +1,6 @@
parent_config: ../../.swiftlint.yml
included:
- Sources
- ../shared/ClawdisNodeKit/Sources

18
apps/ios/README.md Normal file
View File

@@ -0,0 +1,18 @@
# ClawdisNode (iOS)
Internal-only SwiftUI app scaffold.
## Lint/format (required)
```bash
brew install swiftformat swiftlint
```
## Generate the Xcode project
```bash
cd apps/ios
xcodegen generate
open ClawdisNode.xcodeproj
```
## Shared packages
- `../shared/ClawdisNodeKit` — shared types/constants used by iOS (and later macOS bridge + gateway routing).

View File

@@ -0,0 +1,121 @@
import ClawdisNodeKit
import Foundation
import Network
actor BridgeClient {
private let encoder = JSONEncoder()
private let decoder = JSONDecoder()
func pairAndHello(
endpoint: NWEndpoint,
nodeId: String,
displayName: String?,
platform: String,
version: String,
existingToken: String?) async throws -> String
{
let connection = NWConnection(to: endpoint, using: .tcp)
let queue = DispatchQueue(label: "com.steipete.clawdis.ios.bridge-client")
connection.start(queue: queue)
let token = existingToken
try await self.send(
BridgeHello(
nodeId: nodeId,
displayName: displayName,
token: token,
platform: platform,
version: version),
over: connection)
if let line = try await self.receiveLine(over: connection),
let data = line.data(using: .utf8),
let base = try? self.decoder.decode(BridgeBaseFrame.self, from: data)
{
if base.type == "hello-ok" {
connection.cancel()
return existingToken ?? ""
}
if base.type == "error" {
let err = try self.decoder.decode(BridgeErrorFrame.self, from: data)
if err.code == "NOT_PAIRED" || err.code == "UNAUTHORIZED" {
try await self.send(
BridgePairRequest(
nodeId: nodeId,
displayName: displayName,
platform: platform,
version: version),
over: connection)
while let next = try await self.receiveLine(over: connection) {
guard let nextData = next.data(using: .utf8) else { continue }
let nextBase = try self.decoder.decode(BridgeBaseFrame.self, from: nextData)
if nextBase.type == "pair-ok" {
let ok = try self.decoder.decode(BridgePairOk.self, from: nextData)
connection.cancel()
return ok.token
}
if nextBase.type == "error" {
let e = try self.decoder.decode(BridgeErrorFrame.self, from: nextData)
connection.cancel()
throw NSError(domain: "Bridge", code: 2, userInfo: [
NSLocalizedDescriptionKey: "\(e.code): \(e.message)",
])
}
}
}
connection.cancel()
throw NSError(domain: "Bridge", code: 1, userInfo: [
NSLocalizedDescriptionKey: "\(err.code): \(err.message)",
])
}
}
connection.cancel()
throw NSError(domain: "Bridge", code: 0, userInfo: [
NSLocalizedDescriptionKey: "Unexpected bridge response",
])
}
private func send(_ obj: some Encodable, over connection: NWConnection) async throws {
let data = try self.encoder.encode(obj)
var line = Data()
line.append(data)
line.append(0x0A)
try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Void, Error>) in
connection.send(content: line, completion: .contentProcessed { err in
if let err { cont.resume(throwing: err) } else { cont.resume(returning: ()) }
})
}
}
private func receiveLine(over connection: NWConnection) async throws -> String? {
var buffer = Data()
while true {
if let idx = buffer.firstIndex(of: 0x0A) {
let lineData = buffer.prefix(upTo: idx)
return String(data: lineData, encoding: .utf8)
}
let chunk = try await self.receiveChunk(over: connection)
if chunk.isEmpty { return nil }
buffer.append(chunk)
}
}
private func receiveChunk(over connection: NWConnection) async throws -> Data {
try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Data, Error>) in
connection.receive(minimumIncompleteLength: 1, maximumLength: 64 * 1024) { data, _, isComplete, error in
if let error {
cont.resume(throwing: error)
return
}
if isComplete {
cont.resume(returning: Data())
return
}
cont.resume(returning: data ?? Data())
}
}
}
}

View File

@@ -0,0 +1,74 @@
import ClawdisNodeKit
import Foundation
import Network
@MainActor
final class BridgeDiscoveryModel: ObservableObject {
struct DiscoveredBridge: Identifiable, Equatable {
var id: String { self.debugID }
var name: String
var endpoint: NWEndpoint
var debugID: String
}
@Published var bridges: [DiscoveredBridge] = []
@Published var statusText: String = "Idle"
private var browser: NWBrowser?
func start() {
if self.browser != nil { return }
let params = NWParameters.tcp
let browser = NWBrowser(
for: .bonjour(type: ClawdisBonjour.bridgeServiceType, domain: ClawdisBonjour.bridgeServiceDomain),
using: params)
browser.stateUpdateHandler = { [weak self] state in
Task { @MainActor in
guard let self else { return }
switch state {
case .setup:
self.statusText = "Setup"
case .ready:
self.statusText = "Searching…"
case let .failed(err):
self.statusText = "Failed: \(err)"
case .cancelled:
self.statusText = "Stopped"
case let .waiting(err):
self.statusText = "Waiting: \(err)"
@unknown default:
self.statusText = "Unknown"
}
}
}
browser.browseResultsChangedHandler = { [weak self] results, _ in
Task { @MainActor in
guard let self else { return }
self.bridges = results.compactMap { result -> DiscoveredBridge? in
switch result.endpoint {
case let .service(name, _, _, _):
return DiscoveredBridge(
name: name,
endpoint: result.endpoint,
debugID: String(describing: result.endpoint))
default:
return nil
}
}
.sorted { $0.name.localizedCaseInsensitiveCompare($1.name) == .orderedAscending }
}
}
self.browser = browser
browser.start(queue: DispatchQueue(label: "com.steipete.clawdis.ios.bridge-discovery"))
}
func stop() {
self.browser?.cancel()
self.browser = nil
self.bridges = []
self.statusText = "Stopped"
}
}

View File

@@ -0,0 +1,151 @@
import ClawdisNodeKit
import Foundation
import Network
actor BridgeSession {
enum State: Sendable, Equatable {
case idle
case connecting
case connected(serverName: String)
case failed(message: String)
}
private let encoder = JSONEncoder()
private let decoder = JSONDecoder()
private var connection: NWConnection?
private var queue: DispatchQueue?
private var buffer = Data()
private(set) var state: State = .idle
func connect(
endpoint: NWEndpoint,
hello: BridgeHello,
onConnected: (@Sendable (String) async -> Void)? = nil,
onInvoke: @escaping @Sendable (BridgeInvokeRequest) async -> BridgeInvokeResponse)
async throws
{
await self.disconnect()
self.state = .connecting
let connection = NWConnection(to: endpoint, using: .tcp)
let queue = DispatchQueue(label: "com.steipete.clawdis.ios.bridge-session")
self.connection = connection
self.queue = queue
connection.start(queue: queue)
try await self.send(hello)
guard let line = try await self.receiveLine(),
let data = line.data(using: .utf8),
let base = try? self.decoder.decode(BridgeBaseFrame.self, from: data)
else {
await self.disconnect()
throw NSError(domain: "Bridge", code: 1, userInfo: [
NSLocalizedDescriptionKey: "Unexpected bridge response",
])
}
if base.type == "hello-ok" {
let ok = try self.decoder.decode(BridgeHelloOk.self, from: data)
self.state = .connected(serverName: ok.serverName)
await onConnected?(ok.serverName)
} else if base.type == "error" {
let err = try self.decoder.decode(BridgeErrorFrame.self, from: data)
self.state = .failed(message: "\(err.code): \(err.message)")
await self.disconnect()
throw NSError(domain: "Bridge", code: 2, userInfo: [
NSLocalizedDescriptionKey: "\(err.code): \(err.message)",
])
} else {
self.state = .failed(message: "Unexpected bridge response")
await self.disconnect()
throw NSError(domain: "Bridge", code: 3, userInfo: [
NSLocalizedDescriptionKey: "Unexpected bridge response",
])
}
while true {
guard let next = try await self.receiveLine() else { break }
guard let nextData = next.data(using: .utf8) else { continue }
guard let nextBase = try? self.decoder.decode(BridgeBaseFrame.self, from: nextData) else { continue }
switch nextBase.type {
case "ping":
let ping = try self.decoder.decode(BridgePing.self, from: nextData)
try await self.send(BridgePong(type: "pong", id: ping.id))
case "invoke":
let req = try self.decoder.decode(BridgeInvokeRequest.self, from: nextData)
let res = await onInvoke(req)
try await self.send(res)
default:
continue
}
}
await self.disconnect()
}
func sendEvent(event: String, payloadJSON: String?) async throws {
try await self.send(BridgeEventFrame(type: "event", event: event, payloadJSON: payloadJSON))
}
func disconnect() async {
self.connection?.cancel()
self.connection = nil
self.queue = nil
self.buffer = Data()
self.state = .idle
}
private func send(_ obj: some Encodable) async throws {
guard let connection = self.connection else {
throw NSError(domain: "Bridge", code: 10, userInfo: [
NSLocalizedDescriptionKey: "not connected",
])
}
let data = try self.encoder.encode(obj)
var line = Data()
line.append(data)
line.append(0x0A)
try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Void, Error>) in
connection.send(content: line, completion: .contentProcessed { err in
if let err { cont.resume(throwing: err) } else { cont.resume(returning: ()) }
})
}
}
private func receiveLine() async throws -> String? {
while true {
if let idx = self.buffer.firstIndex(of: 0x0A) {
let lineData = self.buffer.prefix(upTo: idx)
self.buffer.removeSubrange(...idx)
return String(data: lineData, encoding: .utf8)
}
let chunk = try await self.receiveChunk()
if chunk.isEmpty { return nil }
self.buffer.append(chunk)
}
}
private func receiveChunk() async throws -> Data {
guard let connection = self.connection else { return Data() }
return try await withCheckedThrowingContinuation { (cont: CheckedContinuation<Data, Error>) in
connection.receive(minimumIncompleteLength: 1, maximumLength: 64 * 1024) { data, _, isComplete, error in
if let error {
cont.resume(throwing: error)
return
}
if isComplete {
cont.resume(returning: Data())
return
}
cont.resume(returning: data ?? Data())
}
}
}
}

View File

@@ -0,0 +1,49 @@
import Foundation
import Security
enum KeychainStore {
static func loadString(service: String, account: String) -> String? {
var query: [String: Any] = [
kSecClass as String: kSecClassGenericPassword,
kSecAttrService as String: service,
kSecAttrAccount as String: account,
kSecReturnData as String: true,
kSecMatchLimit as String: kSecMatchLimitOne,
]
query[kSecAttrAccessible as String] = kSecAttrAccessibleAfterFirstUnlockThisDeviceOnly
var item: CFTypeRef?
let status = SecItemCopyMatching(query as CFDictionary, &item)
guard status == errSecSuccess, let data = item as? Data else { return nil }
return String(data: data, encoding: .utf8)
}
static func saveString(_ value: String, service: String, account: String) -> Bool {
let data = Data(value.utf8)
let base: [String: Any] = [
kSecClass as String: kSecClassGenericPassword,
kSecAttrService as String: service,
kSecAttrAccount as String: account,
kSecAttrAccessible as String: kSecAttrAccessibleAfterFirstUnlockThisDeviceOnly,
]
let update: [String: Any] = [kSecValueData as String: data]
let status = SecItemUpdate(base as CFDictionary, update as CFDictionary)
if status == errSecSuccess { return true }
if status != errSecItemNotFound { return false }
var insert = base
insert[kSecValueData as String] = data
return SecItemAdd(insert as CFDictionary, nil) == errSecSuccess
}
static func delete(service: String, account: String) -> Bool {
let query: [String: Any] = [
kSecClass as String: kSecClassGenericPassword,
kSecAttrService as String: service,
kSecAttrAccount as String: account,
]
let status = SecItemDelete(query as CFDictionary)
return status == errSecSuccess || status == errSecItemNotFound
}
}

View File

@@ -0,0 +1,18 @@
import SwiftUI
@main
struct ClawdisNodeApp: App {
@StateObject private var appModel = NodeAppModel()
@Environment(\.scenePhase) private var scenePhase
var body: some Scene {
WindowGroup {
RootTabs()
.environmentObject(self.appModel)
.environmentObject(self.appModel.voiceWake)
.onChange(of: self.scenePhase) { _, newValue in
self.appModel.setScenePhase(newValue)
}
}
}
}

View File

@@ -0,0 +1,45 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>$(DEVELOPMENT_LANGUAGE)</string>
<key>CFBundleDisplayName</key>
<string>Clawdis Node</string>
<key>CFBundleExecutable</key>
<string>$(EXECUTABLE_NAME)</string>
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>$(PRODUCT_NAME)</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>NSBonjourServices</key>
<array>
<string>_clawdis-bridge._tcp</string>
</array>
<key>NSLocalNetworkUsageDescription</key>
<string>Clawdis Node discovers and connects to your Clawdis bridge on the local network.</string>
<key>NSMicrophoneUsageDescription</key>
<string>Clawdis Node needs microphone access for voice wake.</string>
<key>NSSpeechRecognitionUsageDescription</key>
<string>Clawdis Node uses on-device speech recognition for voice wake.</string>
<key>UIApplicationSceneManifest</key>
<dict>
<key>UIApplicationSupportsMultipleScenes</key>
<false/>
</dict>
<key>UIBackgroundModes</key>
<array>
<string>audio</string>
</array>
<key>UILaunchScreen</key>
<dict/>
</dict>
</plist>

View File

@@ -0,0 +1,185 @@
import ClawdisNodeKit
import Network
import SwiftUI
@MainActor
final class NodeAppModel: ObservableObject {
@Published var isBackgrounded: Bool = false
let screen = ScreenController()
@Published var bridgeStatusText: String = "Not connected"
@Published var bridgeServerName: String?
private let bridge = BridgeSession()
private var bridgeTask: Task<Void, Never>?
let voiceWake = VoiceWakeManager()
init() {
self.voiceWake.configure { [weak self] cmd in
guard let self else { return }
let nodeId = UserDefaults.standard.string(forKey: "node.instanceId") ?? "ios-node"
let sessionKey = "node-\(nodeId)"
do {
try await self.sendVoiceTranscript(text: cmd, sessionKey: sessionKey)
} catch {
// Best-effort only.
}
}
let enabled = UserDefaults.standard.bool(forKey: "voiceWake.enabled")
self.voiceWake.setEnabled(enabled)
}
func setScenePhase(_ phase: ScenePhase) {
switch phase {
case .background:
self.isBackgrounded = true
case .active, .inactive:
self.isBackgrounded = false
@unknown default:
self.isBackgrounded = false
}
}
func setVoiceWakeEnabled(_ enabled: Bool) {
self.voiceWake.setEnabled(enabled)
}
func connectToBridge(
endpoint: NWEndpoint,
token: String,
nodeId: String,
displayName: String?,
platform: String,
version: String)
{
self.bridgeTask?.cancel()
self.bridgeStatusText = "Connecting…"
self.bridgeServerName = nil
self.bridgeTask = Task {
do {
try await self.bridge.connect(
endpoint: endpoint,
hello: BridgeHello(
nodeId: nodeId,
displayName: displayName,
token: token,
platform: platform,
version: version),
onConnected: { [weak self] serverName in
await MainActor.run {
self?.bridgeStatusText = "Connected"
self?.bridgeServerName = serverName
}
},
onInvoke: { [weak self] req in
guard let self else {
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(code: .unavailable, message: "UNAVAILABLE: node not ready"))
}
return await self.handleInvoke(req)
})
await MainActor.run {
self.bridgeStatusText = "Disconnected"
self.bridgeServerName = nil
}
} catch {
await MainActor.run {
self.bridgeStatusText = "Bridge error: \(error.localizedDescription)"
self.bridgeServerName = nil
}
}
}
}
func disconnectBridge() {
self.bridgeTask?.cancel()
self.bridgeTask = nil
Task { await self.bridge.disconnect() }
self.bridgeStatusText = "Disconnected"
self.bridgeServerName = nil
}
func sendVoiceTranscript(text: String, sessionKey: String?) async throws {
struct Payload: Codable {
var text: String
var sessionKey: String?
}
let payload = Payload(text: text, sessionKey: sessionKey)
let data = try JSONEncoder().encode(payload)
let json = String(decoding: data, as: UTF8.self)
try await self.bridge.sendEvent(event: "voice.transcript", payloadJSON: json)
}
private func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse {
if req.command.hasPrefix("screen."), self.isBackgrounded {
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(
code: .backgroundUnavailable,
message: "NODE_BACKGROUND_UNAVAILABLE: screen commands require foreground"))
}
do {
switch req.command {
case ClawdisScreenCommand.show.rawValue:
return BridgeInvokeResponse(id: req.id, ok: true)
case ClawdisScreenCommand.hide.rawValue:
return BridgeInvokeResponse(id: req.id, ok: true)
case ClawdisScreenCommand.setMode.rawValue:
let params = try Self.decodeParams(ClawdisScreenSetModeParams.self, from: req.paramsJSON)
self.screen.setMode(params.mode)
return BridgeInvokeResponse(id: req.id, ok: true)
case ClawdisScreenCommand.navigate.rawValue:
let params = try Self.decodeParams(ClawdisScreenNavigateParams.self, from: req.paramsJSON)
self.screen.navigate(to: params.url)
return BridgeInvokeResponse(id: req.id, ok: true)
case ClawdisScreenCommand.evalJS.rawValue:
let params = try Self.decodeParams(ClawdisScreenEvalParams.self, from: req.paramsJSON)
let result = try await self.screen.eval(javaScript: params.javaScript)
let payload = try Self.encodePayload(["result": result])
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
case ClawdisScreenCommand.snapshot.rawValue:
let params = try? Self.decodeParams(ClawdisScreenSnapshotParams.self, from: req.paramsJSON)
let maxWidth = params?.maxWidth.map { CGFloat($0) }
let base64 = try await self.screen.snapshotPNGBase64(maxWidth: maxWidth)
let payload = try Self.encodePayload(["format": "png", "base64": base64])
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
default:
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command"))
}
} catch {
return BridgeInvokeResponse(
id: req.id,
ok: false,
error: ClawdisNodeError(code: .unavailable, message: error.localizedDescription))
}
}
private static func decodeParams<T: Decodable>(_ type: T.Type, from json: String?) throws -> T {
guard let json, let data = json.data(using: .utf8) else {
throw NSError(domain: "Bridge", code: 20, userInfo: [
NSLocalizedDescriptionKey: "INVALID_REQUEST: paramsJSON required",
])
}
return try JSONDecoder().decode(type, from: data)
}
private static func encodePayload(_ obj: some Encodable) throws -> String {
let data = try JSONEncoder().encode(obj)
return String(decoding: data, as: UTF8.self)
}
}

View File

@@ -0,0 +1,16 @@
import SwiftUI
struct RootTabs: View {
var body: some View {
TabView {
ScreenTab()
.tabItem { Label("Screen", systemImage: "rectangle.and.hand.point.up.left") }
VoiceTab()
.tabItem { Label("Voice", systemImage: "mic") }
SettingsTab()
.tabItem { Label("Settings", systemImage: "gearshape") }
}
}
}

View File

@@ -0,0 +1,120 @@
import ClawdisNodeKit
import SwiftUI
import WebKit
@MainActor
final class ScreenController: ObservableObject {
let webView: WKWebView
@Published var mode: ClawdisScreenMode = .web
@Published var urlString: String = "https://example.com"
@Published var errorText: String?
init() {
let config = WKWebViewConfiguration()
config.websiteDataStore = .nonPersistent()
self.webView = WKWebView(frame: .zero, configuration: config)
}
func setMode(_ mode: ClawdisScreenMode) {
self.mode = mode
self.reload()
}
func navigate(to urlString: String) {
self.urlString = urlString
self.reload()
}
func reload() {
switch self.mode {
case .web:
guard let url = URL(string: self.urlString.trimmingCharacters(in: .whitespacesAndNewlines)) else { return }
self.webView.load(URLRequest(url: url))
case .canvas:
self.webView.loadHTMLString(Self.canvasScaffoldHTML, baseURL: nil)
}
}
func eval(javaScript: String) async throws -> String {
try await withCheckedThrowingContinuation { cont in
self.webView.evaluateJavaScript(javaScript) { result, error in
if let error {
cont.resume(throwing: error)
return
}
if let result {
cont.resume(returning: String(describing: result))
} else {
cont.resume(returning: "")
}
}
}
}
func snapshotPNGBase64(maxWidth: CGFloat? = nil) async throws -> String {
let config = WKSnapshotConfiguration()
if let maxWidth {
config.snapshotWidth = NSNumber(value: Double(maxWidth))
}
let image: UIImage = try await withCheckedThrowingContinuation { (cont: CheckedContinuation<UIImage, Error>) in
self.webView.takeSnapshot(with: config) { image, error in
if let error {
cont.resume(throwing: error)
return
}
guard let image else {
cont.resume(throwing: NSError(domain: "Screen", code: 2, userInfo: [
NSLocalizedDescriptionKey: "snapshot failed",
]))
return
}
cont.resume(returning: image)
}
}
guard let data = image.pngData() else {
throw NSError(domain: "Screen", code: 1, userInfo: [
NSLocalizedDescriptionKey: "snapshot encode failed",
])
}
return data.base64EncodedString()
}
private static let canvasScaffoldHTML = """
<!doctype html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Canvas</title>
<style>
:root { color-scheme: dark; }
html,body { height:100%; margin:0; }
body {
font: 13px -apple-system, system-ui;
display:flex;
align-items:center;
justify-content:center;
background:#0b1020;
color:#e5e7eb;
}
.card {
max-width: 520px;
padding: 18px;
border-radius: 14px;
border: 1px solid rgba(255,255,255,.10);
background: rgba(255,255,255,.06);
box-shadow: 0 18px 60px rgba(0,0,0,.35);
}
.muted { color: rgba(229,231,235,.75); margin-top: 8px; }
</style>
</head>
<body>
<div class="card">
<div style="font-weight:600; font-size:14px;">Canvas scaffold</div>
<div class="muted">Next: agent-driven on-disk workspace.</div>
</div>
</body>
</html>
"""
}

View File

@@ -0,0 +1,74 @@
import ClawdisNodeKit
import SwiftUI
struct ScreenTab: View {
@EnvironmentObject private var appModel: NodeAppModel
var body: some View {
NavigationStack {
VStack(spacing: 0) {
ScreenWebView(controller: self.appModel.screen)
.overlay(alignment: .top) {
if let errorText = self.appModel.screen.errorText {
Text(errorText)
.font(.footnote)
.padding(10)
.background(.thinMaterial)
.clipShape(RoundedRectangle(cornerRadius: 12, style: .continuous))
.padding()
}
}
Divider()
VStack(spacing: 10) {
Picker(
"Mode",
selection: Binding(
get: { self.appModel.screen.mode },
set: { self.appModel.screen.setMode($0) }))
{
Text("Web").tag(ClawdisScreenMode.web)
Text("Canvas").tag(ClawdisScreenMode.canvas)
}
.pickerStyle(.segmented)
HStack(spacing: 10) {
TextField(
"URL",
text: Binding(
get: { self.appModel.screen.urlString },
set: { self.appModel.screen.urlString = $0 }))
.textInputAutocapitalization(.never)
.autocorrectionDisabled()
.keyboardType(.URL)
.textFieldStyle(.roundedBorder)
Button("Go") { self.navigate() }
.buttonStyle(.borderedProminent)
}
if self.appModel.isBackgrounded {
Text("Screen commands unavailable while backgrounded.")
.font(.footnote)
.foregroundStyle(.secondary)
.frame(maxWidth: .infinity, alignment: .leading)
}
}
.padding()
}
.navigationTitle("Screen")
.navigationBarTitleDisplayMode(.inline)
}
}
private func navigate() {
if self.appModel.isBackgrounded {
self.appModel.screen.errorText = ClawdisNodeError(
code: .backgroundUnavailable,
message: "NODE_BACKGROUND_UNAVAILABLE: screen commands require foreground").message
return
}
self.appModel.screen.errorText = nil
self.appModel.screen.reload()
}
}

View File

@@ -0,0 +1,15 @@
import ClawdisNodeKit
import SwiftUI
import WebKit
struct ScreenWebView: UIViewRepresentable {
@ObservedObject var controller: ScreenController
func makeUIView(context: Context) -> WKWebView {
self.controller.webView
}
func updateUIView(_ webView: WKWebView, context: Context) {
// State changes are driven by ScreenController.
}
}

View File

@@ -0,0 +1,30 @@
import SwiftUI
struct VoiceTab: View {
@EnvironmentObject private var appModel: NodeAppModel
@EnvironmentObject private var voiceWake: VoiceWakeManager
@AppStorage("voiceWake.enabled") private var voiceWakeEnabled: Bool = false
var body: some View {
NavigationStack {
List {
Section("Status") {
LabeledContent("Voice Wake", value: self.voiceWakeEnabled ? "Enabled" : "Disabled")
LabeledContent("Listener", value: self.voiceWake.isListening ? "Listening" : "Idle")
Text(self.voiceWake.statusText)
.font(.footnote)
.foregroundStyle(.secondary)
}
Section("Notes") {
Text("Say “clawdis …” to trigger.")
.foregroundStyle(.secondary)
}
}
.navigationTitle("Voice")
.onChange(of: self.voiceWakeEnabled) { _, newValue in
self.appModel.setVoiceWakeEnabled(newValue)
}
}
}
}

View File

@@ -0,0 +1,174 @@
import AVFAudio
import Foundation
import Speech
@MainActor
final class VoiceWakeManager: NSObject, ObservableObject {
@Published var isEnabled: Bool = false
@Published var isListening: Bool = false
@Published var statusText: String = "Off"
private let audioEngine = AVAudioEngine()
private var speechRecognizer: SFSpeechRecognizer?
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var lastDispatched: String?
private var onCommand: (@Sendable (String) async -> Void)?
func configure(onCommand: @escaping @Sendable (String) async -> Void) {
self.onCommand = onCommand
}
func setEnabled(_ enabled: Bool) {
self.isEnabled = enabled
if enabled {
Task { await self.start() }
} else {
self.stop()
}
}
func start() async {
guard self.isEnabled else { return }
if self.isListening { return }
self.statusText = "Requesting permissions…"
let micOk = await Self.requestMicrophonePermission()
guard micOk else {
self.statusText = "Microphone permission denied"
self.isListening = false
return
}
let speechOk = await Self.requestSpeechPermission()
guard speechOk else {
self.statusText = "Speech recognition permission denied"
self.isListening = false
return
}
self.speechRecognizer = SFSpeechRecognizer()
guard self.speechRecognizer != nil else {
self.statusText = "Speech recognizer unavailable"
self.isListening = false
return
}
do {
try Self.configureAudioSession()
try self.startRecognition()
self.isListening = true
self.statusText = "Listening"
} catch {
self.isListening = false
self.statusText = "Start failed: \(error.localizedDescription)"
}
}
func stop() {
self.isEnabled = false
self.isListening = false
self.statusText = "Off"
self.recognitionTask?.cancel()
self.recognitionTask = nil
self.recognitionRequest = nil
if self.audioEngine.isRunning {
self.audioEngine.stop()
self.audioEngine.inputNode.removeTap(onBus: 0)
}
try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
}
private func startRecognition() throws {
self.recognitionTask?.cancel()
self.recognitionTask = nil
let request = SFSpeechAudioBufferRecognitionRequest()
request.shouldReportPartialResults = true
self.recognitionRequest = request
let inputNode = self.audioEngine.inputNode
inputNode.removeTap(onBus: 0)
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { [weak self] buffer, _ in
guard let self else { return }
self.recognitionRequest?.append(buffer)
}
self.audioEngine.prepare()
try self.audioEngine.start()
self.recognitionTask = self.speechRecognizer?.recognitionTask(with: request) { [weak self] result, error in
guard let self else { return }
if let error {
self.statusText = "Recognizer error: \(error.localizedDescription)"
self.isListening = false
if self.isEnabled {
Task {
try? await Task.sleep(nanoseconds: 700_000_000)
await self.start()
}
}
return
}
guard let result else { return }
let transcript = result.bestTranscription.formattedString
if let cmd = self.extractCommand(from: transcript) {
if cmd != self.lastDispatched {
self.lastDispatched = cmd
self.statusText = "Triggered"
Task { [weak self] in
guard let self else { return }
await self.onCommand?(cmd)
if self.isEnabled {
await self.start()
}
}
}
}
}
}
private func extractCommand(from transcript: String) -> String? {
let lower = transcript.lowercased()
guard let range = lower.range(of: "clawdis", options: .backwards) else { return nil }
let after = lower[range.upperBound...]
let trimmed = after.trimmingCharacters(in: .whitespacesAndNewlines)
if trimmed.isEmpty { return nil }
return trimmed
}
private static func configureAudioSession() throws {
let session = AVAudioSession.sharedInstance()
try session.setCategory(.playAndRecord, mode: .measurement, options: [
.duckOthers,
.mixWithOthers,
.allowBluetooth,
.defaultToSpeaker,
])
try session.setActive(true, options: [])
}
private static func requestMicrophonePermission() async -> Bool {
await withCheckedContinuation { cont in
AVAudioSession.sharedInstance().requestRecordPermission { ok in
cont.resume(returning: ok)
}
}
}
private static func requestSpeechPermission() async -> Bool {
await withCheckedContinuation { cont in
SFSpeechRecognizer.requestAuthorization { status in
cont.resume(returning: status == .authorized)
}
}
}
}

56
apps/ios/project.yml Normal file
View File

@@ -0,0 +1,56 @@
name: ClawdisNode
options:
bundleIdPrefix: com.steipete.clawdis
deploymentTarget:
iOS: "17.0"
xcodeVersion: "16.0"
packages:
ClawdisNodeKit:
path: ../shared/ClawdisNodeKit
targets:
ClawdisNode:
type: application
platform: iOS
sources:
- path: Sources
dependencies:
- package: ClawdisNodeKit
preBuildScripts:
- name: SwiftFormat (lint)
script: |
set -euo pipefail
if ! command -v swiftformat >/dev/null 2>&1; then
echo "error: swiftformat not found (brew install swiftformat)" >&2
exit 1
fi
swiftformat --lint --config "$SRCROOT/../../.swiftformat" \
"$SRCROOT/Sources" \
"$SRCROOT/../shared/ClawdisNodeKit/Sources"
- name: SwiftLint
script: |
set -euo pipefail
if ! command -v swiftlint >/dev/null 2>&1; then
echo "error: swiftlint not found (brew install swiftlint)" >&2
exit 1
fi
swiftlint lint --config "$SRCROOT/.swiftlint.yml"
settings:
base:
PRODUCT_BUNDLE_IDENTIFIER: com.steipete.clawdis.node
SWIFT_VERSION: "6.0"
info:
path: Sources/Info.plist
properties:
CFBundleDisplayName: Clawdis Node
UILaunchScreen: {}
UIApplicationSceneManifest:
UIApplicationSupportsMultipleScenes: false
UIBackgroundModes:
- audio
NSLocalNetworkUsageDescription: Clawdis Node discovers and connects to your Clawdis bridge on the local network.
NSBonjourServices:
- _clawdis-bridge._tcp
NSMicrophoneUsageDescription: Clawdis Node needs microphone access for voice wake.
NSSpeechRecognitionUsageDescription: Clawdis Node uses on-device speech recognition for voice wake.

196
docs/ios/spec.md Normal file
View File

@@ -0,0 +1,196 @@
---
summary: "Plan for an iOS voice + screen (Canvas) node that connects via a secure Bonjour-discovered macOS bridge"
read_when:
- Designing iOS node + gateway integration
- Extending the Gateway protocol for node/screen commands
- Implementing Bonjour pairing or transport security
---
# iOS Node (internal) — Voice Trigger + Screen/Canvas
Status: design plan (internal/TestFlight) · Date: 2025-12-12
## Goals
- Build an **iOS app** that acts as a **remote node** for Clawdis:
- **Voice trigger** (wake-word / always-listening intent) that forwards transcripts to the Gateway `agent` method.
- **Screen/Canvas** surface that the agent can control: navigate, draw/render, evaluate JS, snapshot.
- **Dead-simple setup**:
- Auto-discover the host on the local network via **Bonjour**.
- One-tap pairing with an approval prompt on the Mac.
- iOS is **never** a local gateway; it is always a remote node.
- Operational clarity:
- When iOS is backgrounded, voice may still run; **screen/canvas commands must fail fast** with a structured error.
- Provide **settings**: node display name, enable/disable voice wake, pairing status.
Non-goals (v1):
- Exposing the Node Gateway directly on the LAN.
- Supporting arbitrary third-party “plugins” on iOS.
- Perfect App Store compliance; this is **internal-only** initially.
## Current repo reality (constraints we respect)
- The Gateway WebSocket server binds to `127.0.0.1:18789` (`src/gateway/server.ts`) with an optional `CLAWDIS_GATEWAY_TOKEN`.
- macOS “Canvas” exists today, but is **mac-only** and controlled via mac app IPC (`clawdis-mac canvas ...`) rather than the Gateway protocol (`docs/mac/canvas.md`).
- Voice wake forwards via `GatewayChannel` to Gateway `agent` (mac app: `VoiceWakeForwarder``AgentRPC`).
## Recommended topology (B): macOS Bridge + loopback Gateway
Keep the Node gateway loopback-only; expose a dedicated **macOS bridge** to the LAN.
**iOS App** ⇄ (TLS + pairing) ⇄ **macOS Bridge** ⇄ (loopback) ⇄ **Gateway WS** (`ws://127.0.0.1:18789`)
Why:
- Preserves current threat model: Gateway remains local-only.
- Centralizes auth, rate limiting, and allowlisting in the bridge.
- Lets us unify “screen node” semantics across mac + iOS without exposing raw gateway methods.
## Security plan (internal, but still robust)
### Transport
- Bridge listens on LAN and uses **TLS**.
- Prefer **mutual authentication** (mTLS-like) or explicit public key pinning after pairing.
### Pairing
- Bonjour discovery shows a candidate “Clawdis Bridge” on the LAN.
- First connection:
1) iOS generates a keypair (Secure Enclave if available).
2) iOS connects to the bridge and requests pairing.
3) macOS app shows “Approve node” with node name + device metadata.
4) On approve, mac stores the node public key + permissions; iOS stores bridge identity + trust anchor in Keychain.
- Subsequent connections:
- The bridge requires the paired identity. Unpaired clients get a structured “not paired” error and no access.
### Authorization / scope control (bridge-side ACL)
The bridge must not be a raw proxy to every gateway method.
- Allow by default:
- `agent` (with guardrails; idempotency required)
- minimal `system-event` beacons (presence updates for the node)
- node/screen methods defined below (new protocol surface)
- Deny by default:
- anything that widens control without explicit intent (future “shell”, “files”, etc.)
- Rate limit:
- handshake attempts
- voice forwards per minute
- snapshot frequency / payload size
## Protocol unification: add “node/screen” to Gateway protocol
### Principle
Unify mac Canvas + iOS Canvas under a single conceptual surface:
- The agent talks to the Gateway using a stable method set (typed protocol).
- The Gateway routes node-targeted requests to:
- local mac Canvas implementation, or
- remote iOS node via the bridge
### Minimal protocol additions (v1)
Add to `src/gateway/protocol/schema.ts` (and regenerate Swift models):
**Identity**
- Node identity comes from `hello.client.instanceId` (stable), and `hello.client.mode = "node"` (or `"ios-node"`).
**Methods**
- `node.list` → list paired/connected nodes + capabilities
- `node.invoke` → send a command to a specific node
- Params: `{ nodeId, command, params, idempotencyKey }`
**Events**
- `node.event` → async node status/errors
- e.g. background/foreground transitions, voice availability, screen availability
### Node command set (screen-focused)
These are values for `node.invoke.command`:
- `screen.show` / `screen.hide`
- `screen.navigate` with `{ url }` (Canvas URL or https URL)
- `screen.eval` with `{ javaScript }`
- `screen.snapshot` with `{ maxWidth?, quality?, format? }`
- `screen.setMode` with `{ mode: "canvas" | "web" }`
Result pattern:
- Request is a standard `req/res` with `ok` / `error`.
- Long operations (loads, streaming drawing, etc.) may also emit `node.event` progress.
### Background behavior requirement
When iOS is backgrounded:
- Voice may still be active (subject to iOS suspension).
- **All `screen.*` commands must fail** with a stable error code, e.g.:
- `NODE_BACKGROUND_UNAVAILABLE`
- Include `retryable: true` and `retryAfterMs` if we want the agent to wait.
## iOS app architecture (SwiftUI)
### App structure
- Tab bar:
- **Canvas/Screen** (WKWebView + overlay chrome)
- **Voice** (status + last transcript + test)
- **Settings** (node name, voice wake toggle, pairing state, debug)
### Components
- `BridgeDiscovery`: Bonjour browse + resolve (Network.framework `NWBrowser`)
- `BridgeConnection`: TLS session + pairing handshake + reconnect
- `NodeRuntime`:
- Voice pipeline (wake-word + capture + forward)
- Screen pipeline (WKWebView controller + snapshot + eval)
- Background state tracking; enforces “screen unavailable in background”
### Voice in background (internal)
- Enable background audio mode (and required session configuration) so the mic pipeline can keep running when the user switches apps.
- If iOS suspends the app anyway, surface a clear node status (`node.event`) so operators can see voice is unavailable.
## Code sharing (macOS + iOS)
Create/expand SwiftPM targets so both apps share:
- `ClawdisProtocol` (generated models; platform-neutral)
- `ClawdisGatewayClient` (shared WS framing + hello/req/res + seq-gap handling)
- `ClawdisNodeKit` (node.invoke command types + error codes)
macOS continues to own:
- local Canvas implementation details (custom scheme handler serving on-disk HTML, window/panel presentation)
iOS owns:
- iOS-specific audio/speech + WKWebView presentation and lifecycle
## Repo layout
- iOS app: `apps/ios/` (XcodeGen `project.yml`)
- Shared Swift packages: `apps/shared/`
- Lint/format: iOS target runs `swiftformat --lint` + `swiftlint lint` using repo configs (`.swiftformat`, `.swiftlint.yml`).
Generate the Xcode project:
```bash
cd apps/ios
xcodegen generate
open ClawdisNode.xcodeproj
```
## Storage plan (private by default)
### iOS
- Canvas/workspace files (persistent, private):
- `Application Support/Clawdis/canvas/<sessionKey>/...`
- Snapshots / temp exports (evictable):
- `Library/Caches/Clawdis/canvas-snapshots/<sessionKey>/...`
- Credentials:
- Keychain (paired identity + bridge trust anchor)
### macOS
- Keep current Canvas root (already implemented):
- `~/Library/Application Support/Clawdis/canvas/<session>/...`
- Bridge state:
- `~/Library/Application Support/Clawdis/bridge/paired-nodes.json`
- `~/Library/Application Support/Clawdis/bridge/keys/...`
## Rollout plan (phased)
1) **Bridge discovery + pairing (mac + iOS)**
- Bonjour browse + resolve
- Approve prompt on mac
- Persist pairing in Keychain/App Support
2) **Voice-only node**
- iOS voice wake toggle
- Forward transcript to Gateway `agent` via bridge
- Presence beacons via `system-event` (or node.event)
3) **Protocol additions for nodes**
- Add `node.list` / `node.invoke` / `node.event` to Gateway
- Implement bridge routing + ACLs
4) **iOS screen/canvas**
- WKWebView screen surface
- `screen.navigate/eval/snapshot`
- Background fast-fail for `screen.*`
5) **Unify mac Canvas under the same node.invoke**
- Keep existing implementation, but expose it through the unified protocol path so the agent uses one API.
## Open questions
- Should `hello.client.mode` be `"node"` with `platform="ios ..."` or a distinct mode `"ios-node"`? (Presence filtering currently excludes `"cli"` only.)
- Do we want a “permissions” model per node (voice only vs voice+screen) at pairing time?
- Should “website mode” allow arbitrary https, or enforce an allowlist to reduce risk?