diff --git a/apps/ios/.swiftlint.yml b/apps/ios/.swiftlint.yml new file mode 100644 index 000000000..7b64147b5 --- /dev/null +++ b/apps/ios/.swiftlint.yml @@ -0,0 +1,6 @@ +parent_config: ../../.swiftlint.yml + +included: + - Sources + - ../shared/ClawdisNodeKit/Sources + diff --git a/apps/ios/README.md b/apps/ios/README.md new file mode 100644 index 000000000..466e35f48 --- /dev/null +++ b/apps/ios/README.md @@ -0,0 +1,18 @@ +# ClawdisNode (iOS) + +Internal-only SwiftUI app scaffold. + +## Lint/format (required) +```bash +brew install swiftformat swiftlint +``` + +## Generate the Xcode project +```bash +cd apps/ios +xcodegen generate +open ClawdisNode.xcodeproj +``` + +## Shared packages +- `../shared/ClawdisNodeKit` — shared types/constants used by iOS (and later macOS bridge + gateway routing). diff --git a/apps/ios/Sources/Bridge/BridgeClient.swift b/apps/ios/Sources/Bridge/BridgeClient.swift new file mode 100644 index 000000000..c23aa0d42 --- /dev/null +++ b/apps/ios/Sources/Bridge/BridgeClient.swift @@ -0,0 +1,121 @@ +import ClawdisNodeKit +import Foundation +import Network + +actor BridgeClient { + private let encoder = JSONEncoder() + private let decoder = JSONDecoder() + + func pairAndHello( + endpoint: NWEndpoint, + nodeId: String, + displayName: String?, + platform: String, + version: String, + existingToken: String?) async throws -> String + { + let connection = NWConnection(to: endpoint, using: .tcp) + let queue = DispatchQueue(label: "com.steipete.clawdis.ios.bridge-client") + connection.start(queue: queue) + + let token = existingToken + try await self.send( + BridgeHello( + nodeId: nodeId, + displayName: displayName, + token: token, + platform: platform, + version: version), + over: connection) + + if let line = try await self.receiveLine(over: connection), + let data = line.data(using: .utf8), + let base = try? self.decoder.decode(BridgeBaseFrame.self, from: data) + { + if base.type == "hello-ok" { + connection.cancel() + return existingToken ?? "" + } + if base.type == "error" { + let err = try self.decoder.decode(BridgeErrorFrame.self, from: data) + if err.code == "NOT_PAIRED" || err.code == "UNAUTHORIZED" { + try await self.send( + BridgePairRequest( + nodeId: nodeId, + displayName: displayName, + platform: platform, + version: version), + over: connection) + + while let next = try await self.receiveLine(over: connection) { + guard let nextData = next.data(using: .utf8) else { continue } + let nextBase = try self.decoder.decode(BridgeBaseFrame.self, from: nextData) + if nextBase.type == "pair-ok" { + let ok = try self.decoder.decode(BridgePairOk.self, from: nextData) + connection.cancel() + return ok.token + } + if nextBase.type == "error" { + let e = try self.decoder.decode(BridgeErrorFrame.self, from: nextData) + connection.cancel() + throw NSError(domain: "Bridge", code: 2, userInfo: [ + NSLocalizedDescriptionKey: "\(e.code): \(e.message)", + ]) + } + } + } + connection.cancel() + throw NSError(domain: "Bridge", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "\(err.code): \(err.message)", + ]) + } + } + + connection.cancel() + throw NSError(domain: "Bridge", code: 0, userInfo: [ + NSLocalizedDescriptionKey: "Unexpected bridge response", + ]) + } + + private func send(_ obj: some Encodable, over connection: NWConnection) async throws { + let data = try self.encoder.encode(obj) + var line = Data() + line.append(data) + line.append(0x0A) + try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in + connection.send(content: line, completion: .contentProcessed { err in + if let err { cont.resume(throwing: err) } else { cont.resume(returning: ()) } + }) + } + } + + private func receiveLine(over connection: NWConnection) async throws -> String? { + var buffer = Data() + while true { + if let idx = buffer.firstIndex(of: 0x0A) { + let lineData = buffer.prefix(upTo: idx) + return String(data: lineData, encoding: .utf8) + } + + let chunk = try await self.receiveChunk(over: connection) + if chunk.isEmpty { return nil } + buffer.append(chunk) + } + } + + private func receiveChunk(over connection: NWConnection) async throws -> Data { + try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in + connection.receive(minimumIncompleteLength: 1, maximumLength: 64 * 1024) { data, _, isComplete, error in + if let error { + cont.resume(throwing: error) + return + } + if isComplete { + cont.resume(returning: Data()) + return + } + cont.resume(returning: data ?? Data()) + } + } + } +} diff --git a/apps/ios/Sources/Bridge/BridgeDiscoveryModel.swift b/apps/ios/Sources/Bridge/BridgeDiscoveryModel.swift new file mode 100644 index 000000000..df6d11eee --- /dev/null +++ b/apps/ios/Sources/Bridge/BridgeDiscoveryModel.swift @@ -0,0 +1,74 @@ +import ClawdisNodeKit +import Foundation +import Network + +@MainActor +final class BridgeDiscoveryModel: ObservableObject { + struct DiscoveredBridge: Identifiable, Equatable { + var id: String { self.debugID } + var name: String + var endpoint: NWEndpoint + var debugID: String + } + + @Published var bridges: [DiscoveredBridge] = [] + @Published var statusText: String = "Idle" + + private var browser: NWBrowser? + + func start() { + if self.browser != nil { return } + let params = NWParameters.tcp + let browser = NWBrowser( + for: .bonjour(type: ClawdisBonjour.bridgeServiceType, domain: ClawdisBonjour.bridgeServiceDomain), + using: params) + + browser.stateUpdateHandler = { [weak self] state in + Task { @MainActor in + guard let self else { return } + switch state { + case .setup: + self.statusText = "Setup" + case .ready: + self.statusText = "Searching…" + case let .failed(err): + self.statusText = "Failed: \(err)" + case .cancelled: + self.statusText = "Stopped" + case let .waiting(err): + self.statusText = "Waiting: \(err)" + @unknown default: + self.statusText = "Unknown" + } + } + } + + browser.browseResultsChangedHandler = { [weak self] results, _ in + Task { @MainActor in + guard let self else { return } + self.bridges = results.compactMap { result -> DiscoveredBridge? in + switch result.endpoint { + case let .service(name, _, _, _): + return DiscoveredBridge( + name: name, + endpoint: result.endpoint, + debugID: String(describing: result.endpoint)) + default: + return nil + } + } + .sorted { $0.name.localizedCaseInsensitiveCompare($1.name) == .orderedAscending } + } + } + + self.browser = browser + browser.start(queue: DispatchQueue(label: "com.steipete.clawdis.ios.bridge-discovery")) + } + + func stop() { + self.browser?.cancel() + self.browser = nil + self.bridges = [] + self.statusText = "Stopped" + } +} diff --git a/apps/ios/Sources/Bridge/BridgeSession.swift b/apps/ios/Sources/Bridge/BridgeSession.swift new file mode 100644 index 000000000..a5267a197 --- /dev/null +++ b/apps/ios/Sources/Bridge/BridgeSession.swift @@ -0,0 +1,151 @@ +import ClawdisNodeKit +import Foundation +import Network + +actor BridgeSession { + enum State: Sendable, Equatable { + case idle + case connecting + case connected(serverName: String) + case failed(message: String) + } + + private let encoder = JSONEncoder() + private let decoder = JSONDecoder() + + private var connection: NWConnection? + private var queue: DispatchQueue? + private var buffer = Data() + + private(set) var state: State = .idle + + func connect( + endpoint: NWEndpoint, + hello: BridgeHello, + onConnected: (@Sendable (String) async -> Void)? = nil, + onInvoke: @escaping @Sendable (BridgeInvokeRequest) async -> BridgeInvokeResponse) + async throws + { + await self.disconnect() + self.state = .connecting + + let connection = NWConnection(to: endpoint, using: .tcp) + let queue = DispatchQueue(label: "com.steipete.clawdis.ios.bridge-session") + self.connection = connection + self.queue = queue + connection.start(queue: queue) + + try await self.send(hello) + + guard let line = try await self.receiveLine(), + let data = line.data(using: .utf8), + let base = try? self.decoder.decode(BridgeBaseFrame.self, from: data) + else { + await self.disconnect() + throw NSError(domain: "Bridge", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "Unexpected bridge response", + ]) + } + + if base.type == "hello-ok" { + let ok = try self.decoder.decode(BridgeHelloOk.self, from: data) + self.state = .connected(serverName: ok.serverName) + await onConnected?(ok.serverName) + } else if base.type == "error" { + let err = try self.decoder.decode(BridgeErrorFrame.self, from: data) + self.state = .failed(message: "\(err.code): \(err.message)") + await self.disconnect() + throw NSError(domain: "Bridge", code: 2, userInfo: [ + NSLocalizedDescriptionKey: "\(err.code): \(err.message)", + ]) + } else { + self.state = .failed(message: "Unexpected bridge response") + await self.disconnect() + throw NSError(domain: "Bridge", code: 3, userInfo: [ + NSLocalizedDescriptionKey: "Unexpected bridge response", + ]) + } + + while true { + guard let next = try await self.receiveLine() else { break } + guard let nextData = next.data(using: .utf8) else { continue } + guard let nextBase = try? self.decoder.decode(BridgeBaseFrame.self, from: nextData) else { continue } + + switch nextBase.type { + case "ping": + let ping = try self.decoder.decode(BridgePing.self, from: nextData) + try await self.send(BridgePong(type: "pong", id: ping.id)) + + case "invoke": + let req = try self.decoder.decode(BridgeInvokeRequest.self, from: nextData) + let res = await onInvoke(req) + try await self.send(res) + + default: + continue + } + } + + await self.disconnect() + } + + func sendEvent(event: String, payloadJSON: String?) async throws { + try await self.send(BridgeEventFrame(type: "event", event: event, payloadJSON: payloadJSON)) + } + + func disconnect() async { + self.connection?.cancel() + self.connection = nil + self.queue = nil + self.buffer = Data() + self.state = .idle + } + + private func send(_ obj: some Encodable) async throws { + guard let connection = self.connection else { + throw NSError(domain: "Bridge", code: 10, userInfo: [ + NSLocalizedDescriptionKey: "not connected", + ]) + } + let data = try self.encoder.encode(obj) + var line = Data() + line.append(data) + line.append(0x0A) + try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in + connection.send(content: line, completion: .contentProcessed { err in + if let err { cont.resume(throwing: err) } else { cont.resume(returning: ()) } + }) + } + } + + private func receiveLine() async throws -> String? { + while true { + if let idx = self.buffer.firstIndex(of: 0x0A) { + let lineData = self.buffer.prefix(upTo: idx) + self.buffer.removeSubrange(...idx) + return String(data: lineData, encoding: .utf8) + } + + let chunk = try await self.receiveChunk() + if chunk.isEmpty { return nil } + self.buffer.append(chunk) + } + } + + private func receiveChunk() async throws -> Data { + guard let connection = self.connection else { return Data() } + return try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in + connection.receive(minimumIncompleteLength: 1, maximumLength: 64 * 1024) { data, _, isComplete, error in + if let error { + cont.resume(throwing: error) + return + } + if isComplete { + cont.resume(returning: Data()) + return + } + cont.resume(returning: data ?? Data()) + } + } + } +} diff --git a/apps/ios/Sources/Bridge/KeychainStore.swift b/apps/ios/Sources/Bridge/KeychainStore.swift new file mode 100644 index 000000000..6f6189da3 --- /dev/null +++ b/apps/ios/Sources/Bridge/KeychainStore.swift @@ -0,0 +1,49 @@ +import Foundation +import Security + +enum KeychainStore { + static func loadString(service: String, account: String) -> String? { + var query: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account, + kSecReturnData as String: true, + kSecMatchLimit as String: kSecMatchLimitOne, + ] + query[kSecAttrAccessible as String] = kSecAttrAccessibleAfterFirstUnlockThisDeviceOnly + + var item: CFTypeRef? + let status = SecItemCopyMatching(query as CFDictionary, &item) + guard status == errSecSuccess, let data = item as? Data else { return nil } + return String(data: data, encoding: .utf8) + } + + static func saveString(_ value: String, service: String, account: String) -> Bool { + let data = Data(value.utf8) + let base: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account, + kSecAttrAccessible as String: kSecAttrAccessibleAfterFirstUnlockThisDeviceOnly, + ] + + let update: [String: Any] = [kSecValueData as String: data] + let status = SecItemUpdate(base as CFDictionary, update as CFDictionary) + if status == errSecSuccess { return true } + if status != errSecItemNotFound { return false } + + var insert = base + insert[kSecValueData as String] = data + return SecItemAdd(insert as CFDictionary, nil) == errSecSuccess + } + + static func delete(service: String, account: String) -> Bool { + let query: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account, + ] + let status = SecItemDelete(query as CFDictionary) + return status == errSecSuccess || status == errSecItemNotFound + } +} diff --git a/apps/ios/Sources/ClawdisNodeApp.swift b/apps/ios/Sources/ClawdisNodeApp.swift new file mode 100644 index 000000000..d1690f4cf --- /dev/null +++ b/apps/ios/Sources/ClawdisNodeApp.swift @@ -0,0 +1,18 @@ +import SwiftUI + +@main +struct ClawdisNodeApp: App { + @StateObject private var appModel = NodeAppModel() + @Environment(\.scenePhase) private var scenePhase + + var body: some Scene { + WindowGroup { + RootTabs() + .environmentObject(self.appModel) + .environmentObject(self.appModel.voiceWake) + .onChange(of: self.scenePhase) { _, newValue in + self.appModel.setScenePhase(newValue) + } + } + } +} diff --git a/apps/ios/Sources/Info.plist b/apps/ios/Sources/Info.plist new file mode 100644 index 000000000..e525cf7b4 --- /dev/null +++ b/apps/ios/Sources/Info.plist @@ -0,0 +1,45 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleDisplayName + Clawdis Node + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + NSBonjourServices + + _clawdis-bridge._tcp + + NSLocalNetworkUsageDescription + Clawdis Node discovers and connects to your Clawdis bridge on the local network. + NSMicrophoneUsageDescription + Clawdis Node needs microphone access for voice wake. + NSSpeechRecognitionUsageDescription + Clawdis Node uses on-device speech recognition for voice wake. + UIApplicationSceneManifest + + UIApplicationSupportsMultipleScenes + + + UIBackgroundModes + + audio + + UILaunchScreen + + + diff --git a/apps/ios/Sources/Model/NodeAppModel.swift b/apps/ios/Sources/Model/NodeAppModel.swift new file mode 100644 index 000000000..e1ece7f83 --- /dev/null +++ b/apps/ios/Sources/Model/NodeAppModel.swift @@ -0,0 +1,185 @@ +import ClawdisNodeKit +import Network +import SwiftUI + +@MainActor +final class NodeAppModel: ObservableObject { + @Published var isBackgrounded: Bool = false + let screen = ScreenController() + @Published var bridgeStatusText: String = "Not connected" + @Published var bridgeServerName: String? + + private let bridge = BridgeSession() + private var bridgeTask: Task? + let voiceWake = VoiceWakeManager() + + init() { + self.voiceWake.configure { [weak self] cmd in + guard let self else { return } + let nodeId = UserDefaults.standard.string(forKey: "node.instanceId") ?? "ios-node" + let sessionKey = "node-\(nodeId)" + do { + try await self.sendVoiceTranscript(text: cmd, sessionKey: sessionKey) + } catch { + // Best-effort only. + } + } + + let enabled = UserDefaults.standard.bool(forKey: "voiceWake.enabled") + self.voiceWake.setEnabled(enabled) + } + + func setScenePhase(_ phase: ScenePhase) { + switch phase { + case .background: + self.isBackgrounded = true + case .active, .inactive: + self.isBackgrounded = false + @unknown default: + self.isBackgrounded = false + } + } + + func setVoiceWakeEnabled(_ enabled: Bool) { + self.voiceWake.setEnabled(enabled) + } + + func connectToBridge( + endpoint: NWEndpoint, + token: String, + nodeId: String, + displayName: String?, + platform: String, + version: String) + { + self.bridgeTask?.cancel() + self.bridgeStatusText = "Connecting…" + self.bridgeServerName = nil + + self.bridgeTask = Task { + do { + try await self.bridge.connect( + endpoint: endpoint, + hello: BridgeHello( + nodeId: nodeId, + displayName: displayName, + token: token, + platform: platform, + version: version), + onConnected: { [weak self] serverName in + await MainActor.run { + self?.bridgeStatusText = "Connected" + self?.bridgeServerName = serverName + } + }, + onInvoke: { [weak self] req in + guard let self else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdisNodeError(code: .unavailable, message: "UNAVAILABLE: node not ready")) + } + return await self.handleInvoke(req) + }) + + await MainActor.run { + self.bridgeStatusText = "Disconnected" + self.bridgeServerName = nil + } + } catch { + await MainActor.run { + self.bridgeStatusText = "Bridge error: \(error.localizedDescription)" + self.bridgeServerName = nil + } + } + } + } + + func disconnectBridge() { + self.bridgeTask?.cancel() + self.bridgeTask = nil + Task { await self.bridge.disconnect() } + self.bridgeStatusText = "Disconnected" + self.bridgeServerName = nil + } + + func sendVoiceTranscript(text: String, sessionKey: String?) async throws { + struct Payload: Codable { + var text: String + var sessionKey: String? + } + let payload = Payload(text: text, sessionKey: sessionKey) + let data = try JSONEncoder().encode(payload) + let json = String(decoding: data, as: UTF8.self) + try await self.bridge.sendEvent(event: "voice.transcript", payloadJSON: json) + } + + private func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse { + if req.command.hasPrefix("screen."), self.isBackgrounded { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdisNodeError( + code: .backgroundUnavailable, + message: "NODE_BACKGROUND_UNAVAILABLE: screen commands require foreground")) + } + + do { + switch req.command { + case ClawdisScreenCommand.show.rawValue: + return BridgeInvokeResponse(id: req.id, ok: true) + + case ClawdisScreenCommand.hide.rawValue: + return BridgeInvokeResponse(id: req.id, ok: true) + + case ClawdisScreenCommand.setMode.rawValue: + let params = try Self.decodeParams(ClawdisScreenSetModeParams.self, from: req.paramsJSON) + self.screen.setMode(params.mode) + return BridgeInvokeResponse(id: req.id, ok: true) + + case ClawdisScreenCommand.navigate.rawValue: + let params = try Self.decodeParams(ClawdisScreenNavigateParams.self, from: req.paramsJSON) + self.screen.navigate(to: params.url) + return BridgeInvokeResponse(id: req.id, ok: true) + + case ClawdisScreenCommand.evalJS.rawValue: + let params = try Self.decodeParams(ClawdisScreenEvalParams.self, from: req.paramsJSON) + let result = try await self.screen.eval(javaScript: params.javaScript) + let payload = try Self.encodePayload(["result": result]) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + + case ClawdisScreenCommand.snapshot.rawValue: + let params = try? Self.decodeParams(ClawdisScreenSnapshotParams.self, from: req.paramsJSON) + let maxWidth = params?.maxWidth.map { CGFloat($0) } + let base64 = try await self.screen.snapshotPNGBase64(maxWidth: maxWidth) + let payload = try Self.encodePayload(["format": "png", "base64": base64]) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) + + default: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdisNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + } + } catch { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: ClawdisNodeError(code: .unavailable, message: error.localizedDescription)) + } + } + + private static func decodeParams(_ type: T.Type, from json: String?) throws -> T { + guard let json, let data = json.data(using: .utf8) else { + throw NSError(domain: "Bridge", code: 20, userInfo: [ + NSLocalizedDescriptionKey: "INVALID_REQUEST: paramsJSON required", + ]) + } + return try JSONDecoder().decode(type, from: data) + } + + private static func encodePayload(_ obj: some Encodable) throws -> String { + let data = try JSONEncoder().encode(obj) + return String(decoding: data, as: UTF8.self) + } +} diff --git a/apps/ios/Sources/RootTabs.swift b/apps/ios/Sources/RootTabs.swift new file mode 100644 index 000000000..95174f63d --- /dev/null +++ b/apps/ios/Sources/RootTabs.swift @@ -0,0 +1,16 @@ +import SwiftUI + +struct RootTabs: View { + var body: some View { + TabView { + ScreenTab() + .tabItem { Label("Screen", systemImage: "rectangle.and.hand.point.up.left") } + + VoiceTab() + .tabItem { Label("Voice", systemImage: "mic") } + + SettingsTab() + .tabItem { Label("Settings", systemImage: "gearshape") } + } + } +} diff --git a/apps/ios/Sources/Screen/ScreenController.swift b/apps/ios/Sources/Screen/ScreenController.swift new file mode 100644 index 000000000..33143cd87 --- /dev/null +++ b/apps/ios/Sources/Screen/ScreenController.swift @@ -0,0 +1,120 @@ +import ClawdisNodeKit +import SwiftUI +import WebKit + +@MainActor +final class ScreenController: ObservableObject { + let webView: WKWebView + + @Published var mode: ClawdisScreenMode = .web + @Published var urlString: String = "https://example.com" + @Published var errorText: String? + + init() { + let config = WKWebViewConfiguration() + config.websiteDataStore = .nonPersistent() + self.webView = WKWebView(frame: .zero, configuration: config) + } + + func setMode(_ mode: ClawdisScreenMode) { + self.mode = mode + self.reload() + } + + func navigate(to urlString: String) { + self.urlString = urlString + self.reload() + } + + func reload() { + switch self.mode { + case .web: + guard let url = URL(string: self.urlString.trimmingCharacters(in: .whitespacesAndNewlines)) else { return } + self.webView.load(URLRequest(url: url)) + case .canvas: + self.webView.loadHTMLString(Self.canvasScaffoldHTML, baseURL: nil) + } + } + + func eval(javaScript: String) async throws -> String { + try await withCheckedThrowingContinuation { cont in + self.webView.evaluateJavaScript(javaScript) { result, error in + if let error { + cont.resume(throwing: error) + return + } + if let result { + cont.resume(returning: String(describing: result)) + } else { + cont.resume(returning: "") + } + } + } + } + + func snapshotPNGBase64(maxWidth: CGFloat? = nil) async throws -> String { + let config = WKSnapshotConfiguration() + if let maxWidth { + config.snapshotWidth = NSNumber(value: Double(maxWidth)) + } + let image: UIImage = try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in + self.webView.takeSnapshot(with: config) { image, error in + if let error { + cont.resume(throwing: error) + return + } + guard let image else { + cont.resume(throwing: NSError(domain: "Screen", code: 2, userInfo: [ + NSLocalizedDescriptionKey: "snapshot failed", + ])) + return + } + cont.resume(returning: image) + } + } + guard let data = image.pngData() else { + throw NSError(domain: "Screen", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "snapshot encode failed", + ]) + } + return data.base64EncodedString() + } + + private static let canvasScaffoldHTML = """ + + + + + + Canvas + + + +
+
Canvas scaffold
+
Next: agent-driven on-disk workspace.
+
+ + + """ +} diff --git a/apps/ios/Sources/Screen/ScreenTab.swift b/apps/ios/Sources/Screen/ScreenTab.swift new file mode 100644 index 000000000..68464eb53 --- /dev/null +++ b/apps/ios/Sources/Screen/ScreenTab.swift @@ -0,0 +1,74 @@ +import ClawdisNodeKit +import SwiftUI + +struct ScreenTab: View { + @EnvironmentObject private var appModel: NodeAppModel + + var body: some View { + NavigationStack { + VStack(spacing: 0) { + ScreenWebView(controller: self.appModel.screen) + .overlay(alignment: .top) { + if let errorText = self.appModel.screen.errorText { + Text(errorText) + .font(.footnote) + .padding(10) + .background(.thinMaterial) + .clipShape(RoundedRectangle(cornerRadius: 12, style: .continuous)) + .padding() + } + } + + Divider() + + VStack(spacing: 10) { + Picker( + "Mode", + selection: Binding( + get: { self.appModel.screen.mode }, + set: { self.appModel.screen.setMode($0) })) + { + Text("Web").tag(ClawdisScreenMode.web) + Text("Canvas").tag(ClawdisScreenMode.canvas) + } + .pickerStyle(.segmented) + + HStack(spacing: 10) { + TextField( + "URL", + text: Binding( + get: { self.appModel.screen.urlString }, + set: { self.appModel.screen.urlString = $0 })) + .textInputAutocapitalization(.never) + .autocorrectionDisabled() + .keyboardType(.URL) + .textFieldStyle(.roundedBorder) + Button("Go") { self.navigate() } + .buttonStyle(.borderedProminent) + } + + if self.appModel.isBackgrounded { + Text("Screen commands unavailable while backgrounded.") + .font(.footnote) + .foregroundStyle(.secondary) + .frame(maxWidth: .infinity, alignment: .leading) + } + } + .padding() + } + .navigationTitle("Screen") + .navigationBarTitleDisplayMode(.inline) + } + } + + private func navigate() { + if self.appModel.isBackgrounded { + self.appModel.screen.errorText = ClawdisNodeError( + code: .backgroundUnavailable, + message: "NODE_BACKGROUND_UNAVAILABLE: screen commands require foreground").message + return + } + self.appModel.screen.errorText = nil + self.appModel.screen.reload() + } +} diff --git a/apps/ios/Sources/Screen/ScreenWebView.swift b/apps/ios/Sources/Screen/ScreenWebView.swift new file mode 100644 index 000000000..4b4762ce5 --- /dev/null +++ b/apps/ios/Sources/Screen/ScreenWebView.swift @@ -0,0 +1,15 @@ +import ClawdisNodeKit +import SwiftUI +import WebKit + +struct ScreenWebView: UIViewRepresentable { + @ObservedObject var controller: ScreenController + + func makeUIView(context: Context) -> WKWebView { + self.controller.webView + } + + func updateUIView(_ webView: WKWebView, context: Context) { + // State changes are driven by ScreenController. + } +} diff --git a/apps/ios/Sources/Voice/VoiceTab.swift b/apps/ios/Sources/Voice/VoiceTab.swift new file mode 100644 index 000000000..040f3782d --- /dev/null +++ b/apps/ios/Sources/Voice/VoiceTab.swift @@ -0,0 +1,30 @@ +import SwiftUI + +struct VoiceTab: View { + @EnvironmentObject private var appModel: NodeAppModel + @EnvironmentObject private var voiceWake: VoiceWakeManager + @AppStorage("voiceWake.enabled") private var voiceWakeEnabled: Bool = false + + var body: some View { + NavigationStack { + List { + Section("Status") { + LabeledContent("Voice Wake", value: self.voiceWakeEnabled ? "Enabled" : "Disabled") + LabeledContent("Listener", value: self.voiceWake.isListening ? "Listening" : "Idle") + Text(self.voiceWake.statusText) + .font(.footnote) + .foregroundStyle(.secondary) + } + + Section("Notes") { + Text("Say “clawdis …” to trigger.") + .foregroundStyle(.secondary) + } + } + .navigationTitle("Voice") + .onChange(of: self.voiceWakeEnabled) { _, newValue in + self.appModel.setVoiceWakeEnabled(newValue) + } + } + } +} diff --git a/apps/ios/Sources/Voice/VoiceWakeManager.swift b/apps/ios/Sources/Voice/VoiceWakeManager.swift new file mode 100644 index 000000000..30d6d8282 --- /dev/null +++ b/apps/ios/Sources/Voice/VoiceWakeManager.swift @@ -0,0 +1,174 @@ +import AVFAudio +import Foundation +import Speech + +@MainActor +final class VoiceWakeManager: NSObject, ObservableObject { + @Published var isEnabled: Bool = false + @Published var isListening: Bool = false + @Published var statusText: String = "Off" + + private let audioEngine = AVAudioEngine() + private var speechRecognizer: SFSpeechRecognizer? + private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? + private var recognitionTask: SFSpeechRecognitionTask? + + private var lastDispatched: String? + private var onCommand: (@Sendable (String) async -> Void)? + + func configure(onCommand: @escaping @Sendable (String) async -> Void) { + self.onCommand = onCommand + } + + func setEnabled(_ enabled: Bool) { + self.isEnabled = enabled + if enabled { + Task { await self.start() } + } else { + self.stop() + } + } + + func start() async { + guard self.isEnabled else { return } + if self.isListening { return } + + self.statusText = "Requesting permissions…" + + let micOk = await Self.requestMicrophonePermission() + guard micOk else { + self.statusText = "Microphone permission denied" + self.isListening = false + return + } + + let speechOk = await Self.requestSpeechPermission() + guard speechOk else { + self.statusText = "Speech recognition permission denied" + self.isListening = false + return + } + + self.speechRecognizer = SFSpeechRecognizer() + guard self.speechRecognizer != nil else { + self.statusText = "Speech recognizer unavailable" + self.isListening = false + return + } + + do { + try Self.configureAudioSession() + try self.startRecognition() + self.isListening = true + self.statusText = "Listening" + } catch { + self.isListening = false + self.statusText = "Start failed: \(error.localizedDescription)" + } + } + + func stop() { + self.isEnabled = false + self.isListening = false + self.statusText = "Off" + + self.recognitionTask?.cancel() + self.recognitionTask = nil + self.recognitionRequest = nil + + if self.audioEngine.isRunning { + self.audioEngine.stop() + self.audioEngine.inputNode.removeTap(onBus: 0) + } + + try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation) + } + + private func startRecognition() throws { + self.recognitionTask?.cancel() + self.recognitionTask = nil + + let request = SFSpeechAudioBufferRecognitionRequest() + request.shouldReportPartialResults = true + self.recognitionRequest = request + + let inputNode = self.audioEngine.inputNode + inputNode.removeTap(onBus: 0) + + let recordingFormat = inputNode.outputFormat(forBus: 0) + inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { [weak self] buffer, _ in + guard let self else { return } + self.recognitionRequest?.append(buffer) + } + + self.audioEngine.prepare() + try self.audioEngine.start() + + self.recognitionTask = self.speechRecognizer?.recognitionTask(with: request) { [weak self] result, error in + guard let self else { return } + if let error { + self.statusText = "Recognizer error: \(error.localizedDescription)" + self.isListening = false + if self.isEnabled { + Task { + try? await Task.sleep(nanoseconds: 700_000_000) + await self.start() + } + } + return + } + guard let result else { return } + + let transcript = result.bestTranscription.formattedString + if let cmd = self.extractCommand(from: transcript) { + if cmd != self.lastDispatched { + self.lastDispatched = cmd + self.statusText = "Triggered" + Task { [weak self] in + guard let self else { return } + await self.onCommand?(cmd) + if self.isEnabled { + await self.start() + } + } + } + } + } + } + + private func extractCommand(from transcript: String) -> String? { + let lower = transcript.lowercased() + guard let range = lower.range(of: "clawdis", options: .backwards) else { return nil } + let after = lower[range.upperBound...] + let trimmed = after.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.isEmpty { return nil } + return trimmed + } + + private static func configureAudioSession() throws { + let session = AVAudioSession.sharedInstance() + try session.setCategory(.playAndRecord, mode: .measurement, options: [ + .duckOthers, + .mixWithOthers, + .allowBluetooth, + .defaultToSpeaker, + ]) + try session.setActive(true, options: []) + } + + private static func requestMicrophonePermission() async -> Bool { + await withCheckedContinuation { cont in + AVAudioSession.sharedInstance().requestRecordPermission { ok in + cont.resume(returning: ok) + } + } + } + + private static func requestSpeechPermission() async -> Bool { + await withCheckedContinuation { cont in + SFSpeechRecognizer.requestAuthorization { status in + cont.resume(returning: status == .authorized) + } + } + } +} diff --git a/apps/ios/project.yml b/apps/ios/project.yml new file mode 100644 index 000000000..f0f946f0c --- /dev/null +++ b/apps/ios/project.yml @@ -0,0 +1,56 @@ +name: ClawdisNode +options: + bundleIdPrefix: com.steipete.clawdis + deploymentTarget: + iOS: "17.0" + xcodeVersion: "16.0" + +packages: + ClawdisNodeKit: + path: ../shared/ClawdisNodeKit + +targets: + ClawdisNode: + type: application + platform: iOS + sources: + - path: Sources + dependencies: + - package: ClawdisNodeKit + preBuildScripts: + - name: SwiftFormat (lint) + script: | + set -euo pipefail + if ! command -v swiftformat >/dev/null 2>&1; then + echo "error: swiftformat not found (brew install swiftformat)" >&2 + exit 1 + fi + swiftformat --lint --config "$SRCROOT/../../.swiftformat" \ + "$SRCROOT/Sources" \ + "$SRCROOT/../shared/ClawdisNodeKit/Sources" + - name: SwiftLint + script: | + set -euo pipefail + if ! command -v swiftlint >/dev/null 2>&1; then + echo "error: swiftlint not found (brew install swiftlint)" >&2 + exit 1 + fi + swiftlint lint --config "$SRCROOT/.swiftlint.yml" + settings: + base: + PRODUCT_BUNDLE_IDENTIFIER: com.steipete.clawdis.node + SWIFT_VERSION: "6.0" + info: + path: Sources/Info.plist + properties: + CFBundleDisplayName: Clawdis Node + UILaunchScreen: {} + UIApplicationSceneManifest: + UIApplicationSupportsMultipleScenes: false + UIBackgroundModes: + - audio + NSLocalNetworkUsageDescription: Clawdis Node discovers and connects to your Clawdis bridge on the local network. + NSBonjourServices: + - _clawdis-bridge._tcp + NSMicrophoneUsageDescription: Clawdis Node needs microphone access for voice wake. + NSSpeechRecognitionUsageDescription: Clawdis Node uses on-device speech recognition for voice wake. diff --git a/docs/ios/spec.md b/docs/ios/spec.md new file mode 100644 index 000000000..663e7e07b --- /dev/null +++ b/docs/ios/spec.md @@ -0,0 +1,196 @@ +--- +summary: "Plan for an iOS voice + screen (Canvas) node that connects via a secure Bonjour-discovered macOS bridge" +read_when: + - Designing iOS node + gateway integration + - Extending the Gateway protocol for node/screen commands + - Implementing Bonjour pairing or transport security +--- +# iOS Node (internal) — Voice Trigger + Screen/Canvas + +Status: design plan (internal/TestFlight) · Date: 2025-12-12 + +## Goals +- Build an **iOS app** that acts as a **remote node** for Clawdis: + - **Voice trigger** (wake-word / always-listening intent) that forwards transcripts to the Gateway `agent` method. + - **Screen/Canvas** surface that the agent can control: navigate, draw/render, evaluate JS, snapshot. +- **Dead-simple setup**: + - Auto-discover the host on the local network via **Bonjour**. + - One-tap pairing with an approval prompt on the Mac. + - iOS is **never** a local gateway; it is always a remote node. +- Operational clarity: + - When iOS is backgrounded, voice may still run; **screen/canvas commands must fail fast** with a structured error. + - Provide **settings**: node display name, enable/disable voice wake, pairing status. + +Non-goals (v1): +- Exposing the Node Gateway directly on the LAN. +- Supporting arbitrary third-party “plugins” on iOS. +- Perfect App Store compliance; this is **internal-only** initially. + +## Current repo reality (constraints we respect) +- The Gateway WebSocket server binds to `127.0.0.1:18789` (`src/gateway/server.ts`) with an optional `CLAWDIS_GATEWAY_TOKEN`. +- macOS “Canvas” exists today, but is **mac-only** and controlled via mac app IPC (`clawdis-mac canvas ...`) rather than the Gateway protocol (`docs/mac/canvas.md`). +- Voice wake forwards via `GatewayChannel` to Gateway `agent` (mac app: `VoiceWakeForwarder` → `AgentRPC`). + +## Recommended topology (B): macOS Bridge + loopback Gateway +Keep the Node gateway loopback-only; expose a dedicated **macOS bridge** to the LAN. + +**iOS App** ⇄ (TLS + pairing) ⇄ **macOS Bridge** ⇄ (loopback) ⇄ **Gateway WS** (`ws://127.0.0.1:18789`) + +Why: +- Preserves current threat model: Gateway remains local-only. +- Centralizes auth, rate limiting, and allowlisting in the bridge. +- Lets us unify “screen node” semantics across mac + iOS without exposing raw gateway methods. + +## Security plan (internal, but still robust) +### Transport +- Bridge listens on LAN and uses **TLS**. +- Prefer **mutual authentication** (mTLS-like) or explicit public key pinning after pairing. + +### Pairing +- Bonjour discovery shows a candidate “Clawdis Bridge” on the LAN. +- First connection: + 1) iOS generates a keypair (Secure Enclave if available). + 2) iOS connects to the bridge and requests pairing. + 3) macOS app shows “Approve node” with node name + device metadata. + 4) On approve, mac stores the node public key + permissions; iOS stores bridge identity + trust anchor in Keychain. +- Subsequent connections: + - The bridge requires the paired identity. Unpaired clients get a structured “not paired” error and no access. + +### Authorization / scope control (bridge-side ACL) +The bridge must not be a raw proxy to every gateway method. + +- Allow by default: + - `agent` (with guardrails; idempotency required) + - minimal `system-event` beacons (presence updates for the node) + - node/screen methods defined below (new protocol surface) +- Deny by default: + - anything that widens control without explicit intent (future “shell”, “files”, etc.) +- Rate limit: + - handshake attempts + - voice forwards per minute + - snapshot frequency / payload size + +## Protocol unification: add “node/screen” to Gateway protocol +### Principle +Unify mac Canvas + iOS Canvas under a single conceptual surface: +- The agent talks to the Gateway using a stable method set (typed protocol). +- The Gateway routes node-targeted requests to: + - local mac Canvas implementation, or + - remote iOS node via the bridge + +### Minimal protocol additions (v1) +Add to `src/gateway/protocol/schema.ts` (and regenerate Swift models): + +**Identity** +- Node identity comes from `hello.client.instanceId` (stable), and `hello.client.mode = "node"` (or `"ios-node"`). + +**Methods** +- `node.list` → list paired/connected nodes + capabilities +- `node.invoke` → send a command to a specific node + - Params: `{ nodeId, command, params, idempotencyKey }` + +**Events** +- `node.event` → async node status/errors + - e.g. background/foreground transitions, voice availability, screen availability + +### Node command set (screen-focused) +These are values for `node.invoke.command`: +- `screen.show` / `screen.hide` +- `screen.navigate` with `{ url }` (Canvas URL or https URL) +- `screen.eval` with `{ javaScript }` +- `screen.snapshot` with `{ maxWidth?, quality?, format? }` +- `screen.setMode` with `{ mode: "canvas" | "web" }` + +Result pattern: +- Request is a standard `req/res` with `ok` / `error`. +- Long operations (loads, streaming drawing, etc.) may also emit `node.event` progress. + +### Background behavior requirement +When iOS is backgrounded: +- Voice may still be active (subject to iOS suspension). +- **All `screen.*` commands must fail** with a stable error code, e.g.: + - `NODE_BACKGROUND_UNAVAILABLE` + - Include `retryable: true` and `retryAfterMs` if we want the agent to wait. + +## iOS app architecture (SwiftUI) +### App structure +- Tab bar: + - **Canvas/Screen** (WKWebView + overlay chrome) + - **Voice** (status + last transcript + test) + - **Settings** (node name, voice wake toggle, pairing state, debug) + +### Components +- `BridgeDiscovery`: Bonjour browse + resolve (Network.framework `NWBrowser`) +- `BridgeConnection`: TLS session + pairing handshake + reconnect +- `NodeRuntime`: + - Voice pipeline (wake-word + capture + forward) + - Screen pipeline (WKWebView controller + snapshot + eval) + - Background state tracking; enforces “screen unavailable in background” + +### Voice in background (internal) +- Enable background audio mode (and required session configuration) so the mic pipeline can keep running when the user switches apps. +- If iOS suspends the app anyway, surface a clear node status (`node.event`) so operators can see voice is unavailable. + +## Code sharing (macOS + iOS) +Create/expand SwiftPM targets so both apps share: +- `ClawdisProtocol` (generated models; platform-neutral) +- `ClawdisGatewayClient` (shared WS framing + hello/req/res + seq-gap handling) +- `ClawdisNodeKit` (node.invoke command types + error codes) + +macOS continues to own: +- local Canvas implementation details (custom scheme handler serving on-disk HTML, window/panel presentation) + +iOS owns: +- iOS-specific audio/speech + WKWebView presentation and lifecycle + +## Repo layout +- iOS app: `apps/ios/` (XcodeGen `project.yml`) +- Shared Swift packages: `apps/shared/` +- Lint/format: iOS target runs `swiftformat --lint` + `swiftlint lint` using repo configs (`.swiftformat`, `.swiftlint.yml`). + +Generate the Xcode project: +```bash +cd apps/ios +xcodegen generate +open ClawdisNode.xcodeproj +``` + +## Storage plan (private by default) +### iOS +- Canvas/workspace files (persistent, private): + - `Application Support/Clawdis/canvas//...` +- Snapshots / temp exports (evictable): + - `Library/Caches/Clawdis/canvas-snapshots//...` +- Credentials: + - Keychain (paired identity + bridge trust anchor) + +### macOS +- Keep current Canvas root (already implemented): + - `~/Library/Application Support/Clawdis/canvas//...` +- Bridge state: + - `~/Library/Application Support/Clawdis/bridge/paired-nodes.json` + - `~/Library/Application Support/Clawdis/bridge/keys/...` + +## Rollout plan (phased) +1) **Bridge discovery + pairing (mac + iOS)** + - Bonjour browse + resolve + - Approve prompt on mac + - Persist pairing in Keychain/App Support +2) **Voice-only node** + - iOS voice wake toggle + - Forward transcript to Gateway `agent` via bridge + - Presence beacons via `system-event` (or node.event) +3) **Protocol additions for nodes** + - Add `node.list` / `node.invoke` / `node.event` to Gateway + - Implement bridge routing + ACLs +4) **iOS screen/canvas** + - WKWebView screen surface + - `screen.navigate/eval/snapshot` + - Background fast-fail for `screen.*` +5) **Unify mac Canvas under the same node.invoke** + - Keep existing implementation, but expose it through the unified protocol path so the agent uses one API. + +## Open questions +- Should `hello.client.mode` be `"node"` with `platform="ios ..."` or a distinct mode `"ios-node"`? (Presence filtering currently excludes `"cli"` only.) +- Do we want a “permissions” model per node (voice only vs voice+screen) at pairing time? +- Should “website mode” allow arbitrary https, or enforce an allowlist to reduce risk?