fix(macos): detect and reset stale SSH tunnels

This commit is contained in:
Peter Steinberger
2025-12-26 22:12:33 +01:00
parent a13db82d28
commit c10a386d17
5 changed files with 146 additions and 8 deletions

View File

@@ -65,6 +65,7 @@
- Session list polish: sleeping/disconnected/error states, usage bar restored, padding + bar sizing tuned, syncing menu removed, header hidden when disconnected.
- Chat UI polish: tool call cards + merged tool results, glass background, tighter composer spacing, visual effect host tweaks.
- OAuth storage moved; legacy session syncing metadata removed.
- Remote SSH tunnels now get health checks; Debug → Ports highlights unhealthy tunnels and offers Reset SSH tunnel.
### Nodes & Canvas
- Debug status overlay gated and toggleable on macOS/iOS/Android nodes.

View File

@@ -108,6 +108,27 @@ enum DebugActions {
}
}
static func resetGatewayTunnel() async -> Result<String, DebugActionError> {
let mode = CommandResolver.connectionSettings().mode
guard mode == .remote else {
return .failure(.message("Remote mode is not enabled."))
}
await RemoteTunnelManager.shared.stopAll()
await GatewayConnection.shared.shutdown()
do {
_ = try await RemoteTunnelManager.shared.ensureControlTunnel()
let settings = CommandResolver.connectionSettings()
try await ControlChannel.shared.configure(mode: .remote(
target: settings.target,
identity: settings.identity))
await HealthStore.shared.refresh(onDemand: true)
return .success("SSH tunnel reset.")
} catch {
Task { await HealthStore.shared.refresh(onDemand: true) }
return .failure(.message(error.localizedDescription))
}
}
static func pinoLogPath() -> String {
LogLocator.bestLogFile()?.path ?? LogLocator.launchdLogPath
}

View File

@@ -23,6 +23,8 @@ struct DebugSettings: View {
@State private var portCheckInFlight = false
@State private var portReports: [DebugActions.PortReport] = []
@State private var portKillStatus: String?
@State private var tunnelResetInFlight = false
@State private var tunnelResetStatus: String?
@State private var pendingKill: DebugActions.PortListener?
@AppStorage(attachExistingGatewayOnlyKey) private var attachExistingGatewayOnly: Bool = false
@AppStorage(debugFileLogEnabledKey) private var diagnosticsFileLogEnabled: Bool = false
@@ -264,6 +266,11 @@ struct DebugSettings: View {
}
.buttonStyle(.borderedProminent)
.disabled(self.portCheckInFlight)
Button("Reset SSH tunnel") {
Task { await self.resetGatewayTunnel() }
}
.buttonStyle(.bordered)
.disabled(self.tunnelResetInFlight || !self.isRemoteMode)
}
if let portKillStatus {
@@ -272,6 +279,12 @@ struct DebugSettings: View {
.foregroundStyle(.secondary)
.fixedSize(horizontal: false, vertical: true)
}
if let tunnelResetStatus {
Text(tunnelResetStatus)
.font(.caption2)
.foregroundStyle(.secondary)
.fixedSize(horizontal: false, vertical: true)
}
if self.portReports.isEmpty, !self.portCheckInFlight {
Text("Check which process owns 18789 and suggest fixes.")
@@ -593,6 +606,21 @@ struct DebugSettings: View {
self.portCheckInFlight = false
}
@MainActor
private func resetGatewayTunnel() async {
self.tunnelResetInFlight = true
self.tunnelResetStatus = nil
let result = await DebugActions.resetGatewayTunnel()
switch result {
case let .success(message):
self.tunnelResetStatus = message
case let .failure(err):
self.tunnelResetStatus = err.localizedDescription
}
await self.runPortCheck()
self.tunnelResetInFlight = false
}
@MainActor
private func requestKill(_ listener: DebugActions.PortListener) {
if listener.expected {
@@ -730,6 +758,10 @@ struct DebugSettings: View {
}
}
private var isRemoteMode: Bool {
CommandResolver.connectionSettings().mode == .remote
}
private func configURL() -> URL {
FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent(".clawdis")

View File

@@ -153,12 +153,37 @@ actor PortGuardian {
for port in ports {
let listeners = await self.listeners(on: port)
reports.append(Self.buildReport(port: port, listeners: listeners, mode: mode))
let tunnelHealthy = await self.probeGatewayHealthIfNeeded(
port: port,
mode: mode,
listeners: listeners)
reports.append(Self.buildReport(
port: port,
listeners: listeners,
mode: mode,
tunnelHealthy: tunnelHealthy))
}
return reports
}
func probeGatewayHealth(port: Int, timeout: TimeInterval = 2.0) async -> Bool {
let url = URL(string: "http://127.0.0.1:\(port)/")!
let config = URLSessionConfiguration.ephemeral
config.timeoutIntervalForRequest = timeout
config.timeoutIntervalForResource = timeout
let session = URLSession(configuration: config)
var request = URLRequest(url: url)
request.cachePolicy = .reloadIgnoringLocalCacheData
request.timeoutInterval = timeout
do {
let (_, response) = try await session.data(for: request)
return response is HTTPURLResponse
} catch {
return false
}
}
private func listeners(on port: Int) async -> [Listener] {
let res = await ShellExecutor.run(
command: ["lsof", "-nP", "-iTCP:\(port)", "-sTCP:LISTEN", "-Fpcn"],
@@ -227,7 +252,8 @@ actor PortGuardian {
private static func buildReport(
port: Int,
listeners: [Listener],
mode: AppState.ConnectionMode) -> PortReport
mode: AppState.ConnectionMode,
tunnelHealthy: Bool?) -> PortReport
{
let expectedDesc: String
let okPredicate: (Listener) -> Bool
@@ -253,16 +279,28 @@ actor PortGuardian {
return .init(port: port, expected: expectedDesc, status: .missing(text), listeners: [])
}
let tunnelUnhealthy = mode == .remote && port == 18789 && tunnelHealthy == false
let reportListeners = listeners.map { listener in
ReportListener(
var expected = okPredicate(listener)
if tunnelUnhealthy, expected { expected = false }
return ReportListener(
pid: listener.pid,
command: listener.command,
fullCommand: listener.fullCommand,
user: listener.user,
expected: okPredicate(listener))
expected: expected)
}
let offenders = reportListeners.filter { !$0.expected }
if tunnelUnhealthy {
let list = listeners.map { "\($0.command) (\($0.pid))" }.joined(separator: ", ")
let reason = "Port \(port) is served by \(list), but the SSH tunnel is unhealthy."
return .init(
port: port,
expected: expectedDesc,
status: .interference(reason, offenders: offenders),
listeners: reportListeners)
}
if offenders.isEmpty {
let list = listeners.map { "\($0.command) (\($0.pid))" }.joined(separator: ", ")
let okText = "Port \(port) is served by \(list)."
@@ -318,6 +356,17 @@ actor PortGuardian {
}
}
private func probeGatewayHealthIfNeeded(
port: Int,
mode: AppState.ConnectionMode,
listeners: [Listener]) async -> Bool?
{
guard mode == .remote, port == 18789, !listeners.isEmpty else { return nil }
let hasSsh = listeners.contains { $0.command.lowercased().contains("ssh") }
guard hasSsh else { return nil }
return await self.probeGatewayHealth(port: port)
}
private static func loadRecords(from url: URL) -> [Record] {
guard let data = try? Data(contentsOf: url),
let decoded = try? JSONDecoder().decode([Record].self, from: data)
@@ -352,7 +401,7 @@ extension PortGuardian {
command: $0.command,
fullCommand: $0.fullCommand,
user: $0.user) }
return Self.buildReport(port: port, listeners: mapped, mode: mode)
return Self.buildReport(port: port, listeners: mapped, mode: mode, tunnelHealthy: nil)
}
}
#endif

View File

@@ -1,9 +1,11 @@
import Foundation
import OSLog
/// Manages the SSH tunnel that forwards the remote gateway/control port to localhost.
actor RemoteTunnelManager {
static let shared = RemoteTunnelManager()
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "remote-tunnel")
private var controlTunnel: RemotePortTunnel?
func controlTunnelPortIfRunning() async -> UInt16? {
@@ -11,15 +13,19 @@ actor RemoteTunnelManager {
tunnel.process.isRunning,
let local = tunnel.localPort
{
return local
if await self.isTunnelHealthy(port: local) { return local }
self.logger.error("active SSH tunnel on port \(local, privacy: .public) is unhealthy; restarting")
tunnel.terminate()
self.controlTunnel = nil
}
// If a previous Clawdis run already has an SSH listener on the expected port (common after restarts),
// reuse it instead of spawning new ssh processes that immediately fail with "Address already in use".
let desiredPort = UInt16(GatewayEnvironment.gatewayPort())
if let desc = await PortGuardian.shared.describe(port: Int(desiredPort)),
desc.command.lowercased().contains("ssh")
self.isSshProcess(desc)
{
return desiredPort
if await self.isTunnelHealthy(port: desiredPort) { return desiredPort }
await self.cleanupStaleTunnel(desc: desc, port: desiredPort)
}
return nil
}
@@ -49,4 +55,33 @@ actor RemoteTunnelManager {
self.controlTunnel?.terminate()
self.controlTunnel = nil
}
private func isTunnelHealthy(port: UInt16) async -> Bool {
await PortGuardian.shared.probeGatewayHealth(port: Int(port))
}
private func isSshProcess(_ desc: PortGuardian.Descriptor) -> Bool {
let cmd = desc.command.lowercased()
if cmd.contains("ssh") { return true }
if let path = desc.executablePath?.lowercased(), path.contains("/ssh") { return true }
return false
}
private func cleanupStaleTunnel(desc: PortGuardian.Descriptor, port: UInt16) async {
let pid = desc.pid
self.logger.error(
"stale SSH tunnel detected on port \(port, privacy: .public) pid \(pid, privacy: .public)")
let killed = await self.kill(pid: pid)
if !killed {
self.logger.error("failed to terminate stale SSH tunnel pid \(pid, privacy: .public)")
}
await PortGuardian.shared.removeRecord(pid: pid)
}
private func kill(pid: Int32) async -> Bool {
let term = await ShellExecutor.run(command: ["kill", "-TERM", "\(pid)"], cwd: nil, env: nil, timeout: 2)
if term.ok { return true }
let sigkill = await ShellExecutor.run(command: ["kill", "-KILL", "\(pid)"], cwd: nil, env: nil, timeout: 2)
return sigkill.ok
}
}