From c10a386d1724a1d4063c473b64b09d20c37dbdd6 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 26 Dec 2025 22:12:33 +0100 Subject: [PATCH] fix(macos): detect and reset stale SSH tunnels --- CHANGELOG.md | 1 + apps/macos/Sources/Clawdis/DebugActions.swift | 21 +++++++ .../macos/Sources/Clawdis/DebugSettings.swift | 32 ++++++++++ apps/macos/Sources/Clawdis/PortGuardian.swift | 59 +++++++++++++++++-- .../Sources/Clawdis/RemoteTunnelManager.swift | 41 ++++++++++++- 5 files changed, 146 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fccf07642..6a1005cc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,7 @@ - Session list polish: sleeping/disconnected/error states, usage bar restored, padding + bar sizing tuned, syncing menu removed, header hidden when disconnected. - Chat UI polish: tool call cards + merged tool results, glass background, tighter composer spacing, visual effect host tweaks. - OAuth storage moved; legacy session syncing metadata removed. +- Remote SSH tunnels now get health checks; Debug → Ports highlights unhealthy tunnels and offers Reset SSH tunnel. ### Nodes & Canvas - Debug status overlay gated and toggleable on macOS/iOS/Android nodes. diff --git a/apps/macos/Sources/Clawdis/DebugActions.swift b/apps/macos/Sources/Clawdis/DebugActions.swift index 781bc68bc..21c640841 100644 --- a/apps/macos/Sources/Clawdis/DebugActions.swift +++ b/apps/macos/Sources/Clawdis/DebugActions.swift @@ -108,6 +108,27 @@ enum DebugActions { } } + static func resetGatewayTunnel() async -> Result { + let mode = CommandResolver.connectionSettings().mode + guard mode == .remote else { + return .failure(.message("Remote mode is not enabled.")) + } + await RemoteTunnelManager.shared.stopAll() + await GatewayConnection.shared.shutdown() + do { + _ = try await RemoteTunnelManager.shared.ensureControlTunnel() + let settings = CommandResolver.connectionSettings() + try await ControlChannel.shared.configure(mode: .remote( + target: settings.target, + identity: settings.identity)) + await HealthStore.shared.refresh(onDemand: true) + return .success("SSH tunnel reset.") + } catch { + Task { await HealthStore.shared.refresh(onDemand: true) } + return .failure(.message(error.localizedDescription)) + } + } + static func pinoLogPath() -> String { LogLocator.bestLogFile()?.path ?? LogLocator.launchdLogPath } diff --git a/apps/macos/Sources/Clawdis/DebugSettings.swift b/apps/macos/Sources/Clawdis/DebugSettings.swift index 66faed99d..a730d5ef1 100644 --- a/apps/macos/Sources/Clawdis/DebugSettings.swift +++ b/apps/macos/Sources/Clawdis/DebugSettings.swift @@ -23,6 +23,8 @@ struct DebugSettings: View { @State private var portCheckInFlight = false @State private var portReports: [DebugActions.PortReport] = [] @State private var portKillStatus: String? + @State private var tunnelResetInFlight = false + @State private var tunnelResetStatus: String? @State private var pendingKill: DebugActions.PortListener? @AppStorage(attachExistingGatewayOnlyKey) private var attachExistingGatewayOnly: Bool = false @AppStorage(debugFileLogEnabledKey) private var diagnosticsFileLogEnabled: Bool = false @@ -264,6 +266,11 @@ struct DebugSettings: View { } .buttonStyle(.borderedProminent) .disabled(self.portCheckInFlight) + Button("Reset SSH tunnel") { + Task { await self.resetGatewayTunnel() } + } + .buttonStyle(.bordered) + .disabled(self.tunnelResetInFlight || !self.isRemoteMode) } if let portKillStatus { @@ -272,6 +279,12 @@ struct DebugSettings: View { .foregroundStyle(.secondary) .fixedSize(horizontal: false, vertical: true) } + if let tunnelResetStatus { + Text(tunnelResetStatus) + .font(.caption2) + .foregroundStyle(.secondary) + .fixedSize(horizontal: false, vertical: true) + } if self.portReports.isEmpty, !self.portCheckInFlight { Text("Check which process owns 18789 and suggest fixes.") @@ -593,6 +606,21 @@ struct DebugSettings: View { self.portCheckInFlight = false } + @MainActor + private func resetGatewayTunnel() async { + self.tunnelResetInFlight = true + self.tunnelResetStatus = nil + let result = await DebugActions.resetGatewayTunnel() + switch result { + case let .success(message): + self.tunnelResetStatus = message + case let .failure(err): + self.tunnelResetStatus = err.localizedDescription + } + await self.runPortCheck() + self.tunnelResetInFlight = false + } + @MainActor private func requestKill(_ listener: DebugActions.PortListener) { if listener.expected { @@ -730,6 +758,10 @@ struct DebugSettings: View { } } + private var isRemoteMode: Bool { + CommandResolver.connectionSettings().mode == .remote + } + private func configURL() -> URL { FileManager.default.homeDirectoryForCurrentUser .appendingPathComponent(".clawdis") diff --git a/apps/macos/Sources/Clawdis/PortGuardian.swift b/apps/macos/Sources/Clawdis/PortGuardian.swift index d84d2c5eb..26535b260 100644 --- a/apps/macos/Sources/Clawdis/PortGuardian.swift +++ b/apps/macos/Sources/Clawdis/PortGuardian.swift @@ -153,12 +153,37 @@ actor PortGuardian { for port in ports { let listeners = await self.listeners(on: port) - reports.append(Self.buildReport(port: port, listeners: listeners, mode: mode)) + let tunnelHealthy = await self.probeGatewayHealthIfNeeded( + port: port, + mode: mode, + listeners: listeners) + reports.append(Self.buildReport( + port: port, + listeners: listeners, + mode: mode, + tunnelHealthy: tunnelHealthy)) } return reports } + func probeGatewayHealth(port: Int, timeout: TimeInterval = 2.0) async -> Bool { + let url = URL(string: "http://127.0.0.1:\(port)/")! + let config = URLSessionConfiguration.ephemeral + config.timeoutIntervalForRequest = timeout + config.timeoutIntervalForResource = timeout + let session = URLSession(configuration: config) + var request = URLRequest(url: url) + request.cachePolicy = .reloadIgnoringLocalCacheData + request.timeoutInterval = timeout + do { + let (_, response) = try await session.data(for: request) + return response is HTTPURLResponse + } catch { + return false + } + } + private func listeners(on port: Int) async -> [Listener] { let res = await ShellExecutor.run( command: ["lsof", "-nP", "-iTCP:\(port)", "-sTCP:LISTEN", "-Fpcn"], @@ -227,7 +252,8 @@ actor PortGuardian { private static func buildReport( port: Int, listeners: [Listener], - mode: AppState.ConnectionMode) -> PortReport + mode: AppState.ConnectionMode, + tunnelHealthy: Bool?) -> PortReport { let expectedDesc: String let okPredicate: (Listener) -> Bool @@ -253,16 +279,28 @@ actor PortGuardian { return .init(port: port, expected: expectedDesc, status: .missing(text), listeners: []) } + let tunnelUnhealthy = mode == .remote && port == 18789 && tunnelHealthy == false let reportListeners = listeners.map { listener in - ReportListener( + var expected = okPredicate(listener) + if tunnelUnhealthy, expected { expected = false } + return ReportListener( pid: listener.pid, command: listener.command, fullCommand: listener.fullCommand, user: listener.user, - expected: okPredicate(listener)) + expected: expected) } let offenders = reportListeners.filter { !$0.expected } + if tunnelUnhealthy { + let list = listeners.map { "\($0.command) (\($0.pid))" }.joined(separator: ", ") + let reason = "Port \(port) is served by \(list), but the SSH tunnel is unhealthy." + return .init( + port: port, + expected: expectedDesc, + status: .interference(reason, offenders: offenders), + listeners: reportListeners) + } if offenders.isEmpty { let list = listeners.map { "\($0.command) (\($0.pid))" }.joined(separator: ", ") let okText = "Port \(port) is served by \(list)." @@ -318,6 +356,17 @@ actor PortGuardian { } } + private func probeGatewayHealthIfNeeded( + port: Int, + mode: AppState.ConnectionMode, + listeners: [Listener]) async -> Bool? + { + guard mode == .remote, port == 18789, !listeners.isEmpty else { return nil } + let hasSsh = listeners.contains { $0.command.lowercased().contains("ssh") } + guard hasSsh else { return nil } + return await self.probeGatewayHealth(port: port) + } + private static func loadRecords(from url: URL) -> [Record] { guard let data = try? Data(contentsOf: url), let decoded = try? JSONDecoder().decode([Record].self, from: data) @@ -352,7 +401,7 @@ extension PortGuardian { command: $0.command, fullCommand: $0.fullCommand, user: $0.user) } - return Self.buildReport(port: port, listeners: mapped, mode: mode) + return Self.buildReport(port: port, listeners: mapped, mode: mode, tunnelHealthy: nil) } } #endif diff --git a/apps/macos/Sources/Clawdis/RemoteTunnelManager.swift b/apps/macos/Sources/Clawdis/RemoteTunnelManager.swift index 40fa9b3dd..137d8e3ca 100644 --- a/apps/macos/Sources/Clawdis/RemoteTunnelManager.swift +++ b/apps/macos/Sources/Clawdis/RemoteTunnelManager.swift @@ -1,9 +1,11 @@ import Foundation +import OSLog /// Manages the SSH tunnel that forwards the remote gateway/control port to localhost. actor RemoteTunnelManager { static let shared = RemoteTunnelManager() + private let logger = Logger(subsystem: "com.steipete.clawdis", category: "remote-tunnel") private var controlTunnel: RemotePortTunnel? func controlTunnelPortIfRunning() async -> UInt16? { @@ -11,15 +13,19 @@ actor RemoteTunnelManager { tunnel.process.isRunning, let local = tunnel.localPort { - return local + if await self.isTunnelHealthy(port: local) { return local } + self.logger.error("active SSH tunnel on port \(local, privacy: .public) is unhealthy; restarting") + tunnel.terminate() + self.controlTunnel = nil } // If a previous Clawdis run already has an SSH listener on the expected port (common after restarts), // reuse it instead of spawning new ssh processes that immediately fail with "Address already in use". let desiredPort = UInt16(GatewayEnvironment.gatewayPort()) if let desc = await PortGuardian.shared.describe(port: Int(desiredPort)), - desc.command.lowercased().contains("ssh") + self.isSshProcess(desc) { - return desiredPort + if await self.isTunnelHealthy(port: desiredPort) { return desiredPort } + await self.cleanupStaleTunnel(desc: desc, port: desiredPort) } return nil } @@ -49,4 +55,33 @@ actor RemoteTunnelManager { self.controlTunnel?.terminate() self.controlTunnel = nil } + + private func isTunnelHealthy(port: UInt16) async -> Bool { + await PortGuardian.shared.probeGatewayHealth(port: Int(port)) + } + + private func isSshProcess(_ desc: PortGuardian.Descriptor) -> Bool { + let cmd = desc.command.lowercased() + if cmd.contains("ssh") { return true } + if let path = desc.executablePath?.lowercased(), path.contains("/ssh") { return true } + return false + } + + private func cleanupStaleTunnel(desc: PortGuardian.Descriptor, port: UInt16) async { + let pid = desc.pid + self.logger.error( + "stale SSH tunnel detected on port \(port, privacy: .public) pid \(pid, privacy: .public)") + let killed = await self.kill(pid: pid) + if !killed { + self.logger.error("failed to terminate stale SSH tunnel pid \(pid, privacy: .public)") + } + await PortGuardian.shared.removeRecord(pid: pid) + } + + private func kill(pid: Int32) async -> Bool { + let term = await ShellExecutor.run(command: ["kill", "-TERM", "\(pid)"], cwd: nil, env: nil, timeout: 2) + if term.ok { return true } + let sigkill = await ShellExecutor.run(command: ["kill", "-KILL", "\(pid)"], cwd: nil, env: nil, timeout: 2) + return sigkill.ok + } }