fix(macos): improve onboarding discovery + restart onboarding

This commit is contained in:
Peter Steinberger
2026-01-11 01:13:38 +01:00
parent 318f59ec3e
commit 3dbd6766ab
7 changed files with 122 additions and 45 deletions

View File

@@ -213,40 +213,51 @@ public final class GatewayDiscoveryModel {
private func scheduleWideAreaFallback() {
let domain = ClawdbotBonjour.wideAreaBridgeServiceDomain
if ProcessInfo.processInfo.isRunningTests { return }
guard self.wideAreaFallbackTask == nil else { return }
self.wideAreaFallbackTask = Task.detached(priority: .utility) { [weak self] in
guard let self else { return }
if Task.isCancelled { return }
let hasResults = await MainActor.run {
!(self.gatewaysByDomain[domain]?.isEmpty ?? true)
}
if hasResults { return }
let beacons = WideAreaGatewayDiscovery.discover(timeoutSeconds: 3.0)
if beacons.isEmpty { return }
await MainActor.run { [weak self] in
guard let self else { return }
self.wideAreaFallbackGateways = beacons.map { beacon in
let stableID = "wide-area|\(domain)|\(beacon.instanceName)"
let isLocal = Self.isLocalGateway(
lanHost: beacon.lanHost,
tailnetDns: beacon.tailnetDns,
displayName: beacon.displayName,
serviceName: beacon.instanceName,
local: self.localIdentity)
return DiscoveredGateway(
displayName: beacon.displayName,
lanHost: beacon.lanHost,
tailnetDns: beacon.tailnetDns,
sshPort: beacon.sshPort ?? 22,
gatewayPort: beacon.gatewayPort,
cliPath: beacon.cliPath,
stableID: stableID,
debugID: "\(beacon.instanceName)@\(beacon.host):\(beacon.port)",
isLocal: isLocal)
var attempt = 0
let startedAt = Date()
while !Task.isCancelled, Date().timeIntervalSince(startedAt) < 35.0 {
let hasResults = await MainActor.run {
!(self.gatewaysByDomain[domain]?.isEmpty ?? true)
}
self.recomputeGateways()
if hasResults { return }
// Wide-area discovery can be racy (Tailscale not yet up, DNS zone not
// published yet). Retry with a short backoff while onboarding is open.
let beacons = WideAreaGatewayDiscovery.discover(timeoutSeconds: 2.0)
if !beacons.isEmpty {
await MainActor.run { [weak self] in
guard let self else { return }
self.wideAreaFallbackGateways = beacons.map { beacon in
let stableID = "wide-area|\(domain)|\(beacon.instanceName)"
let isLocal = Self.isLocalGateway(
lanHost: beacon.lanHost,
tailnetDns: beacon.tailnetDns,
displayName: beacon.displayName,
serviceName: beacon.instanceName,
local: self.localIdentity)
return DiscoveredGateway(
displayName: beacon.displayName,
lanHost: beacon.lanHost,
tailnetDns: beacon.tailnetDns,
sshPort: beacon.sshPort ?? 22,
gatewayPort: beacon.gatewayPort,
cliPath: beacon.cliPath,
stableID: stableID,
debugID: "\(beacon.instanceName)@\(beacon.host):\(beacon.port)",
isLocal: isLocal)
}
self.recomputeGateways()
}
return
}
attempt += 1
let backoff = min(8.0, 0.6 + (Double(attempt) * 0.7))
try? await Task.sleep(nanoseconds: UInt64(backoff * 1_000_000_000))
}
}
}

View File

@@ -18,6 +18,7 @@ enum WideAreaGatewayDiscovery {
private static let maxCandidates = 40
private static let digPath = "/usr/bin/dig"
private static let defaultTimeoutSeconds: TimeInterval = 0.2
private static let nameserverProbeConcurrency = 6
struct DiscoveryContext: Sendable {
var tailscaleStatus: @Sendable () -> String?
@@ -153,25 +154,65 @@ enum WideAreaGatewayDiscovery {
private static func findNameserver(
candidates: inout [String],
remaining: () -> TimeInterval,
dig: @Sendable (_ args: [String], _ timeout: TimeInterval) -> String?) -> String?
dig: @escaping @Sendable (_ args: [String], _ timeout: TimeInterval) -> String?) -> String?
{
let domain = ClawdbotBonjour.wideAreaBridgeServiceDomain
let domainTrimmed = domain.trimmingCharacters(in: CharacterSet(charactersIn: "."))
let probeName = "_clawdbot-bridge._tcp.\(domainTrimmed)"
while !candidates.isEmpty {
if remaining() <= 0 { break }
let ip = candidates.removeFirst()
if let stdout = dig(
["+short", "+time=1", "+tries=1", "@\(ip)", probeName, "PTR"],
min(defaultTimeoutSeconds, remaining())),
stdout.split(whereSeparator: \.isNewline).isEmpty == false
{
return ip
let ips = candidates
candidates.removeAll(keepingCapacity: true)
if ips.isEmpty { return nil }
final class ProbeState: @unchecked Sendable {
let lock = NSLock()
var nextIndex = 0
var found: String?
}
let state = ProbeState()
let deadline = Date().addingTimeInterval(max(0, remaining()))
let workerCount = min(self.nameserverProbeConcurrency, ips.count)
let group = DispatchGroup()
for _ in 0..<workerCount {
group.enter()
DispatchQueue.global(qos: .utility).async {
defer { group.leave() }
while Date() < deadline {
state.lock.lock()
if state.found != nil {
state.lock.unlock()
return
}
let i = state.nextIndex
state.nextIndex += 1
state.lock.unlock()
if i >= ips.count { return }
let ip = ips[i]
let budget = deadline.timeIntervalSinceNow
if budget <= 0 { return }
if let stdout = dig(
["+short", "+time=1", "+tries=1", "@\(ip)", probeName, "PTR"],
min(defaultTimeoutSeconds, budget)),
stdout.split(whereSeparator: \.isNewline).isEmpty == false
{
state.lock.lock()
if state.found == nil {
state.found = ip
}
state.lock.unlock()
return
}
}
}
}
return nil
_ = group.wait(timeout: .now() + max(0.0, remaining()))
return state.found
}
private static func runDig(args: [String], timeout: TimeInterval) -> String? {