diff --git a/apps/macos/Sources/Clawdis/VoicePushToTalk.swift b/apps/macos/Sources/Clawdis/VoicePushToTalk.swift index 22948acb6..a044df889 100644 --- a/apps/macos/Sources/Clawdis/VoicePushToTalk.swift +++ b/apps/macos/Sources/Clawdis/VoicePushToTalk.swift @@ -239,10 +239,16 @@ actor VoicePushToTalk { private static func makeAttributed(committed: String, volatile: String, isFinal: Bool) -> NSAttributedString { let full = NSMutableAttributedString() - let committedAttr: [NSAttributedString.Key: Any] = [.foregroundColor: NSColor.labelColor] + let committedAttr: [NSAttributedString.Key: Any] = [ + .foregroundColor: NSColor.labelColor, + .font: NSFont.systemFont(ofSize: 13, weight: .regular), + ] full.append(NSAttributedString(string: committed, attributes: committedAttr)) - let volatileColor: NSColor = isFinal ? .labelColor : .secondaryLabelColor - let volatileAttr: [NSAttributedString.Key: Any] = [.foregroundColor: volatileColor] + let volatileColor: NSColor = isFinal ? .labelColor : NSColor.labelColor.withAlphaComponent(0.55) + let volatileAttr: [NSAttributedString.Key: Any] = [ + .foregroundColor: volatileColor, + .font: NSFont.systemFont(ofSize: 13, weight: .regular), + ] full.append(NSAttributedString(string: volatile, attributes: volatileAttr)) return full } diff --git a/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift b/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift index d9c776ce4..4e9166643 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeOverlay.swift @@ -25,6 +25,9 @@ final class VoiceWakeOverlayController: ObservableObject { private let width: CGFloat = 360 private let padding: CGFloat = 10 + private let buttonWidth: CGFloat = 36 + private let spacing: CGFloat = 8 + private let verticalPadding: CGFloat = 8 func showPartial(transcript: String, attributed: NSAttributedString? = nil) { self.autoSendTask?.cancel() @@ -33,7 +36,7 @@ final class VoiceWakeOverlayController: ObservableObject { self.model.isFinal = false self.model.forwardEnabled = false self.model.isSending = false - self.model.attributed = attributed ?? NSAttributedString(string: transcript) + self.model.attributed = attributed ?? self.makeAttributed(from: transcript) self.present() self.updateWindowFrame(animate: true) } @@ -45,7 +48,7 @@ final class VoiceWakeOverlayController: ObservableObject { self.model.isFinal = true self.model.forwardEnabled = forwardConfig.enabled self.model.isSending = false - self.model.attributed = attributed ?? NSAttributedString(string: transcript) + self.model.attributed = attributed ?? self.makeAttributed(from: transcript) self.present() self.scheduleAutoSend(after: delay) } @@ -58,7 +61,7 @@ final class VoiceWakeOverlayController: ObservableObject { func updateText(_ text: String) { self.model.text = text self.model.isSending = false - self.model.attributed = NSAttributedString(string: text) + self.model.attributed = self.makeAttributed(from: text) self.updateWindowFrame(animate: true) } @@ -160,13 +163,8 @@ final class VoiceWakeOverlayController: ObservableObject { } private func targetFrame() -> NSRect { - guard let screen = NSScreen.main, let host = self.hostingView else { - return .zero - } - host.layoutSubtreeIfNeeded() - host.invalidateIntrinsicContentSize() - let fit = host.fittingSize - let height = max(42, min(fit.height, 180)) + guard let screen = NSScreen.main else { return .zero } + let height = self.measuredHeight() let size = NSSize(width: self.width, height: height) let visible = screen.visibleFrame let origin = CGPoint( @@ -189,6 +187,18 @@ final class VoiceWakeOverlayController: ObservableObject { } } + private func measuredHeight() -> CGFloat { + let attributed = self.model.attributed.length > 0 ? self.model.attributed : self.makeAttributed(from: self.model.text) + let maxWidth = self.width - (self.padding * 2) - self.spacing - self.buttonWidth + let rect = attributed.boundingRect( + with: CGSize(width: maxWidth, height: .greatestFiniteMagnitude), + options: [.usesLineFragmentOrigin, .usesFontLeading], + context: nil) + let contentHeight = ceil(rect.height) + let total = contentHeight + self.verticalPadding * 2 + return max(42, min(total, 220)) + } + private func dismissTargetFrame(for frame: NSRect, reason: DismissReason, outcome: SendOutcome) -> NSRect? { switch (reason, outcome) { case (.empty, _): @@ -212,6 +222,15 @@ final class VoiceWakeOverlayController: ObservableObject { self?.sendNow() } } + + private func makeAttributed(from text: String) -> NSAttributedString { + NSAttributedString( + string: text, + attributes: [ + .foregroundColor: NSColor.labelColor, + .font: NSFont.systemFont(ofSize: 13, weight: .regular), + ]) + } } private struct VoiceWakeOverlayView: View { @@ -289,7 +308,7 @@ private struct TranscriptTextView: NSViewRepresentable { let textView = TranscriptNSTextView() textView.delegate = context.coordinator textView.drawsBackground = false - textView.isRichText = false + textView.isRichText = true textView.isAutomaticQuoteSubstitutionEnabled = false textView.isAutomaticTextReplacementEnabled = false textView.font = .systemFont(ofSize: 13, weight: .regular) @@ -299,6 +318,8 @@ private struct TranscriptTextView: NSViewRepresentable { textView.textContainer?.widthTracksTextView = true textView.textContainer?.containerSize = NSSize(width: CGFloat.greatestFiniteMagnitude, height: CGFloat.greatestFiniteMagnitude) textView.string = self.text + textView.textStorage?.setAttributedString(self.attributed) + textView.focusRingType = .none textView.onSend = { [weak textView] in textView?.window?.makeFirstResponder(nil) self.onSend() diff --git a/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift b/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift index 2e63fcc8e..6c21922c4 100644 --- a/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift +++ b/apps/macos/Sources/Clawdis/VoiceWakeRuntime.swift @@ -256,13 +256,14 @@ actor VoiceWakeRuntime { self.capturedTranscript = "" self.captureStartedAt = nil self.lastHeard = nil - let heardBeyondTrigger = self.heardBeyondTrigger self.heardBeyondTrigger = false await MainActor.run { AppStateStore.shared.stopVoiceEars() } let forwardConfig = await MainActor.run { AppStateStore.shared.voiceWakeForwardConfig } - let delay: TimeInterval = (heardBeyondTrigger && !finalTranscript.isEmpty) ? 1.0 : 3.0 + // Auto-send should fire as soon as the silence threshold is satisfied (2s after speech, 5s after trigger-only). + // Keep the overlay visible during capture; once we finalize, we dispatch immediately. + let delay: TimeInterval = 0.0 let finalAttributed = Self.makeAttributed( committed: finalTranscript, volatile: "", @@ -339,10 +340,16 @@ actor VoiceWakeRuntime { private static func makeAttributed(committed: String, volatile: String, isFinal: Bool) -> NSAttributedString { let full = NSMutableAttributedString() - let committedAttr: [NSAttributedString.Key: Any] = [.foregroundColor: NSColor.labelColor] + let committedAttr: [NSAttributedString.Key: Any] = [ + .foregroundColor: NSColor.labelColor, + .font: NSFont.systemFont(ofSize: 13, weight: .regular), + ] full.append(NSAttributedString(string: committed, attributes: committedAttr)) - let volatileColor: NSColor = isFinal ? .labelColor : .secondaryLabelColor - let volatileAttr: [NSAttributedString.Key: Any] = [.foregroundColor: volatileColor] + let volatileColor: NSColor = isFinal ? .labelColor : NSColor.labelColor.withAlphaComponent(0.55) + let volatileAttr: [NSAttributedString.Key: Any] = [ + .foregroundColor: volatileColor, + .font: NSFont.systemFont(ofSize: 13, weight: .regular), + ] full.append(NSAttributedString(string: volatile, attributes: volatileAttr)) return full } diff --git a/apps/macos/Sources/Clawdis/WebChatWindow.swift b/apps/macos/Sources/Clawdis/WebChatWindow.swift index cb1c7fe7b..a2753ee67 100644 --- a/apps/macos/Sources/Clawdis/WebChatWindow.swift +++ b/apps/macos/Sources/Clawdis/WebChatWindow.swift @@ -6,6 +6,7 @@ import WebKit private let webChatLogger = Logger(subsystem: "com.steipete.clawdis", category: "WebChat") +@MainActor final class WebChatWindowController: NSWindowController, WKNavigationDelegate { private let webView: WKWebView private let sessionKey: String @@ -43,7 +44,7 @@ final class WebChatWindowController: NSWindowController, WKNavigationDelegate { @available(*, unavailable) required init?(coder: NSCoder) { fatalError("init(coder:) is not supported") } - deinit { + @MainActor deinit { self.reachabilityTask?.cancel() self.tunnel?.terminate() } @@ -90,7 +91,8 @@ final class WebChatWindowController: NSWindowController, WKNavigationDelegate { if CommandResolver.connectionModeIsRemote() { return try await self.startOrRestartTunnel() } else { - return URL(string: "http://127.0.0.1:\(remotePort)/")! + return URL(string: "http://127.0.0.1:\(remotePort)/")! + } } private func loadWebChat(baseEndpoint: URL) { @@ -120,7 +122,6 @@ final class WebChatWindowController: NSWindowController, WKNavigationDelegate { throw NSError(domain: "WebChat", code: 7, userInfo: [NSLocalizedDescriptionKey: "webchat unreachable: \(error.localizedDescription)"]) } } - } private func startOrRestartTunnel() async throws -> URL { // Kill existing tunnel if any diff --git a/src/webchat/server.ts b/src/webchat/server.ts index 7627bc358..08968c4bd 100644 --- a/src/webchat/server.ts +++ b/src/webchat/server.ts @@ -5,7 +5,7 @@ import os from "node:os"; import path from "node:path"; import { fileURLToPath } from "node:url"; import sharp from "sharp"; -import { WebSocketServer, WebSocket } from "ws"; +import { type WebSocket, WebSocketServer } from "ws"; import { agentCommand } from "../commands/agent.js"; import { loadConfig } from "../config/config.js"; @@ -40,10 +40,16 @@ const wsSessions: Map> = new Map(); function resolveWebRoot() { const here = path.dirname(fileURLToPath(import.meta.url)); - const packagedRoot = path.resolve(path.dirname(process.execPath), "../WebChat"); + const packagedRoot = path.resolve( + path.dirname(process.execPath), + "../WebChat", + ); if (fs.existsSync(packagedRoot)) return packagedRoot; - return path.resolve(here, "../../apps/macos/Sources/Clawdis/Resources/WebChat"); + return path.resolve( + here, + "../../apps/macos/Sources/Clawdis/Resources/WebChat", + ); } function readBody(req: http.IncomingMessage): Promise { @@ -56,17 +62,27 @@ function readBody(req: http.IncomingMessage): Promise { }); } -function pickSessionId(sessionKey: string, store: Record): string | null { +function pickSessionId( + sessionKey: string, + store: Record, +): string | null { if (store[sessionKey]?.sessionId) return store[sessionKey].sessionId; const first = Object.values(store)[0]?.sessionId; return first ?? null; } -function readSessionMessages(sessionId: string, storePath: string): ChatMessage[] { +function readSessionMessages( + sessionId: string, + storePath: string, +): ChatMessage[] { const dir = path.dirname(storePath); const candidates = [ path.join(dir, `${sessionId}.jsonl`), - path.join(os.homedir(), ".tau/agent/sessions/clawdis", `${sessionId}.jsonl`), + path.join( + os.homedir(), + ".tau/agent/sessions/clawdis", + `${sessionId}.jsonl`, + ), ]; let content: string | null = null; for (const p of candidates) { @@ -96,7 +112,7 @@ function readSessionMessages(sessionId: string, storePath: string): ChatMessage[ return messages; } -function broadcastSession(sessionKey: string, payload: any) { +function broadcastSession(sessionKey: string, payload: unknown) { const conns = wsSessions.get(sessionKey); if (!conns || conns.size === 0) return; const msg = JSON.stringify(payload); @@ -116,7 +132,12 @@ async function persistAttachments( const out: { placeholder: string; path: string }[] = []; if (!attachments?.length) return out; - const root = path.join(os.homedir(), ".clawdis", "webchat-uploads", sessionId); + const root = path.join( + os.homedir(), + ".clawdis", + "webchat-uploads", + sessionId, + ); await fs.promises.mkdir(root, { recursive: true }); let idx = 1; @@ -124,9 +145,13 @@ async function persistAttachments( try { if (!att?.content || typeof att.content !== "string") continue; const mime = - typeof att.mimeType === "string" ? att.mimeType : "application/octet-stream"; + typeof att.mimeType === "string" + ? att.mimeType + : "application/octet-stream"; const baseName = att.fileName || `${att.type || "attachment"}-${idx}`; - const ext = mime.startsWith("image/") ? mime.split("/")[1] || "bin" : "bin"; + const ext = mime.startsWith("image/") + ? mime.split("/")[1] || "bin" + : "bin"; const fileName = `${baseName}.${ext}`.replace(/[^a-zA-Z0-9._-]/g, "_"); const buf = Buffer.from(att.content, "base64"); @@ -137,7 +162,8 @@ async function persistAttachments( const image = sharp(buf, { failOn: "none" }); meta = await image.metadata(); const needsResize = - (meta.width && meta.width > 2000) || (meta.height && meta.height > 2000); + (meta.width && meta.width > 2000) || + (meta.height && meta.height > 2000); if (needsResize) { const resized = await image .resize({ width: 2000, height: 2000, fit: "inside" }) @@ -160,7 +186,8 @@ async function persistAttachments( await fs.promises.writeFile(dest, finalBuf); const sizeLabel = `${(finalBuf.length / 1024).toFixed(0)} KB`; - const dimLabel = meta?.width && meta?.height ? `, ${meta.width}x${meta.height}` : ""; + const dimLabel = + meta?.width && meta?.height ? `, ${meta.width}x${meta.height}` : ""; const placeholder = `[Attachment saved: ${dest} (${mime}${dimLabel}, ${sizeLabel})]`; out.push({ placeholder, path: dest }); } catch (err) { @@ -198,7 +225,8 @@ async function handleRpc( const attachments = Array.isArray(payload.attachments) ? (payload.attachments as AttachmentInput[]) : []; - const thinking = typeof payload.thinking === "string" ? payload.thinking : undefined; + const thinking = + typeof payload.thinking === "string" ? payload.thinking : undefined; const to = typeof payload.to === "string" ? payload.to : undefined; const deliver = Boolean(payload.deliver); @@ -262,7 +290,7 @@ async function handleRpc( thinkingLevel: typeof persistedThinking === "string" ? persistedThinking - : cfg.inbound?.reply?.thinkingDefault ?? "off", + : (cfg.inbound?.reply?.thinkingDefault ?? "off"), }); } catch { // best-effort; ignore broadcast errors @@ -297,7 +325,10 @@ export async function startWebChatServer(port = WEBCHAT_DEFAULT_PORT) { const server = http.createServer(async (req, res) => { if (!req.url) return notFound(res); - if (req.socket.remoteAddress && !req.socket.remoteAddress.startsWith("127.")) { + if ( + req.socket.remoteAddress && + !req.socket.remoteAddress.startsWith("127.") + ) { res.statusCode = 403; res.end("loopback only"); return; @@ -336,7 +367,8 @@ export async function startWebChatServer(port = WEBCHAT_DEFAULT_PORT) { } catch { // ignore } - const sessionKey = typeof body.session === "string" ? body.session : "main"; + const sessionKey = + typeof body.session === "string" ? body.session : "main"; const result = await handleRpc(body, sessionKey); res.setHeader("Content-Type", "application/json"); res.end(JSON.stringify(result)); @@ -400,12 +432,19 @@ export async function startWebChatServer(port = WEBCHAT_DEFAULT_PORT) { socket.destroy(); return; } - if (req.socket.remoteAddress && !req.socket.remoteAddress.startsWith("127.")) { + if ( + req.socket.remoteAddress && + !req.socket.remoteAddress.startsWith("127.") + ) { socket.destroy(); return; } const sessionKey = url.searchParams.get("session") ?? "main"; - wss!.handleUpgrade(req, socket, head, (ws: WebSocket) => { + if (!wss) { + socket.destroy(); + return; + } + wss.handleUpgrade(req, socket, head, (ws: WebSocket) => { ws.on("close", () => { const set = wsSessions.get(sessionKey); if (set) { @@ -413,13 +452,18 @@ export async function startWebChatServer(port = WEBCHAT_DEFAULT_PORT) { if (set.size === 0) wsSessions.delete(sessionKey); } }); - wsSessions.set(sessionKey, (wsSessions.get(sessionKey) ?? new Set()).add(ws)); + wsSessions.set( + sessionKey, + (wsSessions.get(sessionKey) ?? new Set()).add(ws), + ); // Send initial snapshot const store = loadSessionStore(storePath); const sessionId = pickSessionId(sessionKey, store); const sessionEntry = sessionKey ? store[sessionKey] : undefined; const persistedThinking = sessionEntry?.thinkingLevel; - const messages = sessionId ? readSessionMessages(sessionId, storePath) : []; + const messages = sessionId + ? readSessionMessages(sessionId, storePath) + : []; ws.send( JSON.stringify({ type: "session", @@ -428,11 +472,11 @@ export async function startWebChatServer(port = WEBCHAT_DEFAULT_PORT) { thinkingLevel: typeof persistedThinking === "string" ? persistedThinking - : cfg.inbound?.reply?.thinkingDefault ?? "off", + : (cfg.inbound?.reply?.thinkingDefault ?? "off"), }), ); }); - } catch (err) { + } catch (_err) { socket.destroy(); } }); @@ -440,7 +484,7 @@ export async function startWebChatServer(port = WEBCHAT_DEFAULT_PORT) { // Watch for session/message file changes and push updates. try { if (fs.existsSync(storeDir)) { - fs.watch(storeDir, { persistent: false }, (event, filename) => { + fs.watch(storeDir, { persistent: false }, (_event, filename) => { if (!filename) return; // On any file change, refresh for active sessions. for (const sessionKey of wsSessions.keys()) { @@ -449,7 +493,9 @@ export async function startWebChatServer(port = WEBCHAT_DEFAULT_PORT) { const sessionId = pickSessionId(sessionKey, store); const sessionEntry = sessionKey ? store[sessionKey] : undefined; const persistedThinking = sessionEntry?.thinkingLevel; - const messages = sessionId ? readSessionMessages(sessionId, storePath) : []; + const messages = sessionId + ? readSessionMessages(sessionId, storePath) + : []; broadcastSession(sessionKey, { type: "session", sessionKey, @@ -457,7 +503,7 @@ export async function startWebChatServer(port = WEBCHAT_DEFAULT_PORT) { thinkingLevel: typeof persistedThinking === "string" ? persistedThinking - : cfg.inbound?.reply?.thinkingDefault ?? "off", + : (cfg.inbound?.reply?.thinkingDefault ?? "off"), }); } catch { // ignore