fix(macos): clear stale gateway failures

This commit is contained in:
Peter Steinberger
2026-01-22 01:48:37 +00:00
parent 1b947dcdf9
commit 75dd1781b7
3 changed files with 193 additions and 9 deletions

View File

@@ -6,15 +6,20 @@ final class ConnectionModeCoordinator {
static let shared = ConnectionModeCoordinator()
private let logger = Logger(subsystem: "com.clawdbot", category: "connection")
private var lastMode: AppState.ConnectionMode?
/// Apply the requested connection mode by starting/stopping local gateway,
/// managing the control-channel SSH tunnel, and cleaning up chat windows/panels.
func apply(mode: AppState.ConnectionMode, paused: Bool) async {
if let lastMode = self.lastMode, lastMode != mode {
GatewayProcessManager.shared.clearLastFailure()
NodesStore.shared.lastError = nil
}
self.lastMode = mode
switch mode {
case .unconfigured:
if let error = await NodeServiceManager.stop() {
NodesStore.shared.lastError = "Node service stop failed: \(error)"
}
_ = await NodeServiceManager.stop()
NodesStore.shared.lastError = nil
await RemoteTunnelManager.shared.stopAll()
WebChatManager.shared.resetTunnels()
GatewayProcessManager.shared.stop()
@@ -23,9 +28,8 @@ final class ConnectionModeCoordinator {
Task.detached { await PortGuardian.shared.sweep(mode: .unconfigured) }
case .local:
if let error = await NodeServiceManager.stop() {
NodesStore.shared.lastError = "Node service stop failed: \(error)"
}
_ = await NodeServiceManager.stop()
NodesStore.shared.lastError = nil
await RemoteTunnelManager.shared.stopAll()
WebChatManager.shared.resetTunnels()
let shouldStart = GatewayAutostartPolicy.shouldStartGateway(mode: .local, paused: paused)
@@ -56,6 +60,7 @@ final class ConnectionModeCoordinator {
WebChatManager.shared.resetTunnels()
do {
NodesStore.shared.lastError = nil
if let error = await NodeServiceManager.start() {
NodesStore.shared.lastError = "Node service start failed: \(error)"
}

View File

@@ -42,10 +42,20 @@ final class GatewayProcessManager {
private var environmentRefreshTask: Task<Void, Never>?
private var lastEnvironmentRefresh: Date?
private var logRefreshTask: Task<Void, Never>?
#if DEBUG
private var testingConnection: GatewayConnection?
#endif
private let logger = Logger(subsystem: "com.clawdbot", category: "gateway.process")
private let logLimit = 20000 // characters to keep in-memory
private let environmentRefreshMinInterval: TimeInterval = 30
private var connection: GatewayConnection {
#if DEBUG
return self.testingConnection ?? .shared
#else
return .shared
#endif
}
func setActive(_ active: Bool) {
// Remote mode should never spawn a local gateway; treat as stopped.
@@ -126,6 +136,10 @@ final class GatewayProcessManager {
}
}
func clearLastFailure() {
self.lastFailureReason = nil
}
func refreshEnvironmentStatus(force: Bool = false) {
let now = Date()
if !force {
@@ -178,7 +192,7 @@ final class GatewayProcessManager {
let hasListener = instance != nil
let attemptAttach = {
try await GatewayConnection.shared.requestRaw(method: .health, timeoutMs: 2000)
try await self.connection.requestRaw(method: .health, timeoutMs: 2000)
}
for attempt in 0..<(hasListener ? 3 : 1) {
@@ -187,6 +201,7 @@ final class GatewayProcessManager {
let snap = decodeHealthSnapshot(from: data)
let details = self.describe(details: instanceText, port: port, snap: snap)
self.existingGatewayDetails = details
self.clearLastFailure()
self.status = .attachedExisting(details: details)
self.appendLog("[gateway] using existing instance: \(details)\n")
self.logger.info("gateway using existing instance details=\(details)")
@@ -310,9 +325,10 @@ final class GatewayProcessManager {
while Date() < deadline {
if !self.desiredActive { return }
do {
_ = try await GatewayConnection.shared.requestRaw(method: .health, timeoutMs: 1500)
_ = try await self.connection.requestRaw(method: .health, timeoutMs: 1500)
let instance = await PortGuardian.shared.describe(port: port)
let details = instance.map { "pid \($0.pid)" }
self.clearLastFailure()
self.status = .running(details: details)
self.logger.info("gateway started details=\(details ?? "ok")")
self.refreshControlChannelIfNeeded(reason: "gateway started")
@@ -352,7 +368,8 @@ final class GatewayProcessManager {
while Date() < deadline {
if !self.desiredActive { return false }
do {
_ = try await GatewayConnection.shared.requestRaw(method: .health, timeoutMs: 1500)
_ = try await self.connection.requestRaw(method: .health, timeoutMs: 1500)
self.clearLastFailure()
return true
} catch {
try? await Task.sleep(nanoseconds: 300_000_000)
@@ -385,3 +402,19 @@ final class GatewayProcessManager {
return String(text.suffix(limit))
}
}
#if DEBUG
extension GatewayProcessManager {
func setTestingConnection(_ connection: GatewayConnection?) {
self.testingConnection = connection
}
func setTestingDesiredActive(_ active: Bool) {
self.desiredActive = active
}
func setTestingLastFailureReason(_ reason: String?) {
self.lastFailureReason = reason
}
}
#endif