Files
clawdbot/apps/macos/Sources/Clawdbot/GatewayProcessManager.swift
Peter Steinberger 7acd26a2fc Move provider to a plugin-architecture (#661)
* refactor: introduce provider plugin registry

* refactor: move provider CLI to plugins

* docs: add provider plugin implementation notes

* refactor: shift provider runtime logic into plugins

* refactor: add plugin defaults and summaries

* docs: update provider plugin notes

* feat(commands): add /commands slash list

* Auto-reply: tidy help message

* Auto-reply: fix status command lint

* Tests: align google shared expectations

* Auto-reply: tidy help message

* Auto-reply: fix status command lint

* refactor: move provider routing into plugins

* test: align agent routing expectations

* docs: update provider plugin notes

* refactor: route replies via provider plugins

* docs: note route-reply plugin hooks

* refactor: extend provider plugin contract

* refactor: derive provider status from plugins

* refactor: unify gateway provider control

* refactor: use plugin metadata in auto-reply

* fix: parenthesize cron target selection

* refactor: derive gateway methods from plugins

* refactor: generalize provider logout

* refactor: route provider logout through plugins

* refactor: move WhatsApp web login methods into plugin

* refactor: generalize provider log prefixes

* refactor: centralize default chat provider

* refactor: derive provider lists from registry

* refactor: move provider reload noops into plugins

* refactor: resolve web login provider via alias

* refactor: derive CLI provider options from plugins

* refactor: derive prompt provider list from plugins

* style: apply biome lint fixes

* fix: resolve provider routing edge cases

* docs: update provider plugin refactor notes

* fix(gateway): harden agent provider routing

* refactor: move provider routing into plugins

* refactor: move provider CLI to plugins

* refactor: derive provider lists from registry

* fix: restore slash command parsing

* refactor: align provider ids for schema

* refactor: unify outbound target resolution

* fix: keep outbound labels stable

* feat: add msteams to cron surfaces

* fix: clean up lint build issues

* refactor: localize chat provider alias normalization

* refactor: drive gateway provider lists from plugins

* docs: update provider plugin notes

* style: format message-provider

* fix: avoid provider registry init cycles

* style: sort message-provider imports

* fix: relax provider alias map typing

* refactor: move provider routing into plugins

* refactor: add plugin pairing/config adapters

* refactor: route pairing and provider removal via plugins

* refactor: align auto-reply provider typing

* test: stabilize telegram media mocks

* docs: update provider plugin refactor notes

* refactor: pluginize outbound targets

* refactor: pluginize provider selection

* refactor: generalize text chunk limits

* docs: update provider plugin notes

* refactor: generalize group session/config

* fix: normalize provider id for room detection

* fix: avoid provider init in system prompt

* style: formatting cleanup

* refactor: normalize agent delivery targets

* test: update outbound delivery labels

* chore: fix lint regressions

* refactor: extend provider plugin adapters

* refactor: move elevated/block streaming defaults to plugins

* refactor: defer outbound send deps to plugins

* docs: note plugin-driven streaming/elevated defaults

* refactor: centralize webchat provider constant

* refactor: add provider setup adapters

* refactor: delegate provider add config to plugins

* docs: document plugin-driven provider add

* refactor: add plugin state/binding metadata

* refactor: build agent provider status from plugins

* docs: note plugin-driven agent bindings

* refactor: centralize internal provider constant usage

* fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing)

* refactor: centralize default chat provider

* refactor: centralize WhatsApp target normalization

* refactor: move provider routing into plugins

* refactor: normalize agent delivery targets

* chore: fix lint regressions

* fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing)

* feat: expand provider plugin adapters

* refactor: route auto-reply via provider plugins

* fix: align WhatsApp target normalization

* fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing)

* refactor: centralize WhatsApp target normalization

* feat: add /config chat config updates

* docs: add /config get alias

* feat(commands): add /commands slash list

* refactor: centralize default chat provider

* style: apply biome lint fixes

* chore: fix lint regressions

* fix: clean up whatsapp allowlist typing

* style: format config command helpers

* refactor: pluginize tool threading context

* refactor: normalize session announce targets

* docs: note new plugin threading and announce hooks

* refactor: pluginize message actions

* docs: update provider plugin actions notes

* fix: align provider action adapters

* refactor: centralize webchat checks

* style: format message provider helpers

* refactor: move provider onboarding into adapters

* docs: note onboarding provider adapters

* feat: add msteams onboarding adapter

* style: organize onboarding imports

* fix: normalize msteams allowFrom types

* feat: add plugin text chunk limits

* refactor: use plugin chunk limit fallbacks

* feat: add provider mention stripping hooks

* style: organize provider plugin type imports

* refactor: generalize health snapshots

* refactor: update macOS health snapshot handling

* docs: refresh health snapshot notes

* style: format health snapshot updates

* refactor: drive security warnings via plugins

* docs: note provider security adapter

* style: format provider security adapters

* refactor: centralize provider account defaults

* refactor: type gateway client identity constants

* chore: regen gateway protocol swift

* fix: degrade health on failed provider probe

* refactor: centralize pairing approve hint

* docs: add plugin CLI command references

* refactor: route auth and tool sends through plugins

* docs: expand provider plugin hooks

* refactor: document provider docking touchpoints

* refactor: normalize internal provider defaults

* refactor: streamline outbound delivery wiring

* refactor: make provider onboarding plugin-owned

* refactor: support provider-owned agent tools

* refactor: move telegram draft chunking into telegram module

* refactor: infer provider tool sends via extractToolSend

* fix: repair plugin onboarding imports

* refactor: de-dup outbound target normalization

* style: tidy plugin and agent imports

* refactor: data-drive provider selection line

* fix: satisfy lint after provider plugin rebase

* test: deflake gateway-cli coverage

* style: format gateway-cli coverage test

* refactor(provider-plugins): simplify provider ids

* test(pairing-cli): avoid provider-specific ternary

* style(macos): swiftformat HealthStore

* refactor(sandbox): derive provider tool denylist

* fix(sandbox): avoid plugin init in defaults

* refactor(provider-plugins): centralize provider aliases

* style(test): satisfy biome

* refactor(protocol): v3 providers.status maps

* refactor(ui): adapt to protocol v3

* refactor(macos): adapt to protocol v3

* test: update providers.status v3 fixtures

* refactor(gateway): map provider runtime snapshot

* test(gateway): update reload runtime snapshot

* refactor(whatsapp): normalize heartbeat provider id

* docs(refactor): update provider plugin notes

* style: satisfy biome after rebase

* fix: describe sandboxed elevated in prompt

* feat(gateway): add agent image attachments + live probe

* refactor: derive CLI provider options from plugins

* fix(gateway): harden agent provider routing

* fix(gateway): harden agent provider routing

* refactor: align provider ids for schema

* fix(protocol): keep agent provider string

* fix(gateway): harden agent provider routing

* fix(protocol): keep agent provider string

* refactor: normalize agent delivery targets

* refactor: support provider-owned agent tools

* refactor(config): provider-keyed elevated allowFrom

* style: satisfy biome

* fix(gateway): appease provider narrowing

* style: satisfy biome

* refactor(reply): move group intro hints into plugin

* fix(reply): avoid plugin registry init cycle

* refactor(providers): add lightweight provider dock

* refactor(gateway): use typed client id in connect

* refactor(providers): document docks and avoid init cycles

* refactor(providers): make media limit helper generic

* fix(providers): break plugin registry import cycles

* style: satisfy biome

* refactor(status-all): build providers table from plugins

* refactor(gateway): delegate web login to provider plugin

* refactor(provider): drop web alias

* refactor(provider): lazy-load monitors

* style: satisfy lint/format

* style: format status-all providers table

* style: swiftformat gateway discovery model

* test: make reload plan plugin-driven

* fix: avoid token stringification in status-all

* refactor: make provider IDs explicit in status

* feat: warn on signal/imessage provider runtime errors

* test: cover gateway provider runtime warnings in status

* fix: add runtime kind to provider status issues

* test: cover health degradation on probe failure

* fix: keep routeReply lightweight

* style: organize routeReply imports

* refactor(web): extract auth-store helpers

* refactor(whatsapp): lazy login imports

* refactor(outbound): route replies via plugin outbound

* docs: update provider plugin notes

* style: format provider status issues

* fix: make sandbox scope warning wrap-safe

* refactor: load outbound adapters from provider plugins

* docs: update provider plugin outbound notes

* style(macos): fix swiftformat lint

* docs: changelog for provider plugins

* fix(macos): satisfy swiftformat

* fix(macos): open settings via menu action

* style: format after rebase

* fix(macos): open Settings via menu action

---------

Co-authored-by: LK <luke@kyohere.com>
Co-authored-by: Luke K (pr-0f3t) <2609441+lc0rp@users.noreply.github.com>
Co-authored-by: Xin <xin@imfing.com>
2026-01-11 11:45:25 +00:00

387 lines
15 KiB
Swift

import Foundation
import Observation
@MainActor
@Observable
final class GatewayProcessManager {
static let shared = GatewayProcessManager()
enum Status: Equatable {
case stopped
case starting
case running(details: String?)
case attachedExisting(details: String?)
case failed(String)
var label: String {
switch self {
case .stopped: return "Stopped"
case .starting: return "Starting…"
case let .running(details):
if let details, !details.isEmpty { return "Running (\(details))" }
return "Running"
case let .attachedExisting(details):
if let details, !details.isEmpty {
return "Using existing gateway (\(details))"
}
return "Using existing gateway"
case let .failed(reason): return "Failed: \(reason)"
}
}
}
private(set) var status: Status = .stopped {
didSet { CanvasManager.shared.refreshDebugStatus() }
}
private(set) var log: String = ""
private(set) var environmentStatus: GatewayEnvironmentStatus = .checking
private(set) var existingGatewayDetails: String?
private(set) var lastFailureReason: String?
private var desiredActive = false
private var environmentRefreshTask: Task<Void, Never>?
private var lastEnvironmentRefresh: Date?
private var logRefreshTask: Task<Void, Never>?
private let logger = Logger(subsystem: "com.clawdbot", category: "gateway.process")
private let logLimit = 20000 // characters to keep in-memory
private let environmentRefreshMinInterval: TimeInterval = 30
func setActive(_ active: Bool) {
// Remote mode should never spawn a local gateway; treat as stopped.
if CommandResolver.connectionModeIsRemote() {
self.desiredActive = false
self.stop()
self.status = .stopped
self.appendLog("[gateway] remote mode active; skipping local gateway\n")
self.logger.info("gateway process skipped: remote mode active")
return
}
self.logger.debug("gateway active requested active=\(active)")
self.desiredActive = active
self.refreshEnvironmentStatus()
if active {
self.startIfNeeded()
} else {
self.stop()
}
}
func ensureLaunchAgentEnabledIfNeeded() async {
guard !CommandResolver.connectionModeIsRemote() else { return }
guard !AppStateStore.attachExistingGatewayOnly else { return }
let enabled = await GatewayLaunchAgentManager.isLoaded()
guard !enabled else { return }
let bundlePath = Bundle.main.bundleURL.path
let port = GatewayEnvironment.gatewayPort()
self.appendLog("[gateway] auto-enabling launchd job (\(gatewayLaunchdLabel)) on port \(port)\n")
let err = await GatewayLaunchAgentManager.set(enabled: true, bundlePath: bundlePath, port: port)
if let err {
self.appendLog("[gateway] launchd auto-enable failed: \(err)\n")
}
}
func startIfNeeded() {
guard self.desiredActive else { return }
// Do not spawn in remote mode (the gateway should run on the remote host).
guard !CommandResolver.connectionModeIsRemote() else {
self.status = .stopped
return
}
self.status = .starting
self.logger.debug("gateway start requested")
// First try to latch onto an already-running gateway to avoid spawning a duplicate.
Task { [weak self] in
guard let self else { return }
if await self.attachExistingGatewayIfAvailable() {
return
}
// Respect debug toggle: only attach, never spawn, when enabled.
if AppStateStore.attachExistingGatewayOnly {
await MainActor.run {
self.status = .failed("Attach-only enabled; no gateway to attach")
self.appendLog("[gateway] attach-only enabled; not spawning local gateway\n")
self.logger.warning("gateway attach-only enabled; not spawning")
}
return
}
await self.enableLaunchdGateway()
}
}
func stop() {
self.desiredActive = false
self.existingGatewayDetails = nil
self.lastFailureReason = nil
self.status = .stopped
self.logger.info("gateway stop requested")
let bundlePath = Bundle.main.bundleURL.path
Task {
_ = await GatewayLaunchAgentManager.set(
enabled: false,
bundlePath: bundlePath,
port: GatewayEnvironment.gatewayPort())
}
}
func refreshEnvironmentStatus(force: Bool = false) {
let now = Date()
if !force {
if self.environmentRefreshTask != nil { return }
if let last = self.lastEnvironmentRefresh,
now.timeIntervalSince(last) < self.environmentRefreshMinInterval
{
return
}
}
self.lastEnvironmentRefresh = now
self.environmentRefreshTask = Task { [weak self] in
let status = await Task.detached(priority: .utility) {
GatewayEnvironment.check()
}.value
await MainActor.run {
guard let self else { return }
self.environmentStatus = status
self.environmentRefreshTask = nil
}
}
}
func refreshLog() {
guard self.logRefreshTask == nil else { return }
let path = LogLocator.launchdGatewayLogPath
let limit = self.logLimit
self.logRefreshTask = Task { [weak self] in
let log = await Task.detached(priority: .utility) {
Self.readGatewayLog(path: path, limit: limit)
}.value
await MainActor.run {
guard let self else { return }
if !log.isEmpty {
self.log = log
}
self.logRefreshTask = nil
}
}
}
// MARK: - Internals
/// Attempt to connect to an already-running gateway on the configured port.
/// If successful, mark status as attached and skip spawning a new process.
private func attachExistingGatewayIfAvailable() async -> Bool {
let port = GatewayEnvironment.gatewayPort()
let instance = await PortGuardian.shared.describe(port: port)
let instanceText = instance.map { self.describe(instance: $0) }
let hasListener = instance != nil
let attemptAttach = {
try await GatewayConnection.shared.requestRaw(method: .health, timeoutMs: 2000)
}
for attempt in 0..<(hasListener ? 3 : 1) {
do {
let data = try await attemptAttach()
let snap = decodeHealthSnapshot(from: data)
let details = self.describe(details: instanceText, port: port, snap: snap)
self.existingGatewayDetails = details
self.status = .attachedExisting(details: details)
self.appendLog("[gateway] using existing instance: \(details)\n")
self.logger.info("gateway using existing instance details=\(details)")
self.refreshControlChannelIfNeeded(reason: "attach existing")
self.refreshLog()
return true
} catch {
if attempt < 2, hasListener {
try? await Task.sleep(nanoseconds: 250_000_000)
continue
}
if hasListener {
let reason = self.describeAttachFailure(error, port: port, instance: instance)
self.existingGatewayDetails = instanceText
self.status = .failed(reason)
self.lastFailureReason = reason
self.appendLog("[gateway] existing listener on port \(port) but attach failed: \(reason)\n")
self.logger.warning("gateway attach failed reason=\(reason)")
return true
}
// No reachable gateway (and no listener) fall through to spawn.
self.existingGatewayDetails = nil
return false
}
}
self.existingGatewayDetails = nil
return false
}
private func describe(details instance: String?, port: Int, snap: HealthSnapshot?) -> String {
let instanceText = instance ?? "pid unknown"
if let snap {
let linkId = snap.providerOrder?.first(where: {
if let summary = snap.providers[$0] { return summary.linked != nil }
return false
}) ?? snap.providers.keys.first(where: {
if let summary = snap.providers[$0] { return summary.linked != nil }
return false
})
let linked = linkId.flatMap { snap.providers[$0]?.linked } ?? false
let authAge = linkId.flatMap { snap.providers[$0]?.authAgeMs }.flatMap(msToAge) ?? "unknown age"
let label =
linkId.flatMap { snap.providerLabels?[$0] } ??
linkId?.capitalized ??
"provider"
let linkText = linked ? "linked" : "not linked"
return "port \(port), \(label) \(linkText), auth \(authAge), \(instanceText)"
}
return "port \(port), health probe succeeded, \(instanceText)"
}
private func describe(instance: PortGuardian.Descriptor) -> String {
let path = instance.executablePath ?? "path unknown"
return "pid \(instance.pid) \(instance.command) @ \(path)"
}
private func describeAttachFailure(_ error: Error, port: Int, instance: PortGuardian.Descriptor?) -> String {
let ns = error as NSError
let message = ns.localizedDescription.isEmpty ? "unknown error" : ns.localizedDescription
let lower = message.lowercased()
if self.isGatewayAuthFailure(error) {
return """
Gateway on port \(port) rejected auth. Set gateway.auth.token (or CLAWDBOT_GATEWAY_TOKEN) \
to match the running gateway (or clear it on the gateway) and retry.
"""
}
if lower.contains("protocol mismatch") {
return "Gateway on port \(port) is incompatible (protocol mismatch). Update the app/gateway."
}
if lower.contains("unexpected response") || lower.contains("invalid response") {
return "Port \(port) returned non-gateway data; another process is using it."
}
if let instance {
let instanceText = self.describe(instance: instance)
return "Gateway listener found on port \(port) (\(instanceText)) but health check failed: \(message)"
}
return "Gateway listener found on port \(port) but health check failed: \(message)"
}
private func isGatewayAuthFailure(_ error: Error) -> Bool {
if let urlError = error as? URLError, urlError.code == .dataNotAllowed {
return true
}
let ns = error as NSError
if ns.domain == "Gateway", ns.code == 1008 { return true }
let lower = ns.localizedDescription.lowercased()
return lower.contains("unauthorized") || lower.contains("auth")
}
private func enableLaunchdGateway() async {
self.existingGatewayDetails = nil
let resolution = await Task.detached(priority: .utility) {
GatewayEnvironment.resolveGatewayCommand()
}.value
await MainActor.run { self.environmentStatus = resolution.status }
guard resolution.command != nil else {
await MainActor.run {
self.status = .failed(resolution.status.message)
}
self.logger.error("gateway command resolve failed: \(resolution.status.message)")
return
}
let bundlePath = Bundle.main.bundleURL.path
let port = GatewayEnvironment.gatewayPort()
self.appendLog("[gateway] enabling launchd job (\(gatewayLaunchdLabel)) on port \(port)\n")
self.logger.info("gateway enabling launchd port=\(port)")
let err = await GatewayLaunchAgentManager.set(enabled: true, bundlePath: bundlePath, port: port)
if let err {
self.status = .failed(err)
self.lastFailureReason = err
self.logger.error("gateway launchd enable failed: \(err)")
return
}
// Best-effort: wait for the gateway to accept connections.
let deadline = Date().addingTimeInterval(6)
while Date() < deadline {
if !self.desiredActive { return }
do {
_ = try await GatewayConnection.shared.requestRaw(method: .health, timeoutMs: 1500)
let instance = await PortGuardian.shared.describe(port: port)
let details = instance.map { "pid \($0.pid)" }
self.status = .running(details: details)
self.logger.info("gateway started details=\(details ?? "ok")")
self.refreshControlChannelIfNeeded(reason: "gateway started")
self.refreshLog()
return
} catch {
try? await Task.sleep(nanoseconds: 400_000_000)
}
}
self.status = .failed("Gateway did not start in time")
self.lastFailureReason = "launchd start timeout"
self.logger.warning("gateway start timed out")
}
private func appendLog(_ chunk: String) {
self.log.append(chunk)
if self.log.count > self.logLimit {
self.log = String(self.log.suffix(self.logLimit))
}
}
private func refreshControlChannelIfNeeded(reason: String) {
switch ControlChannel.shared.state {
case .connected, .connecting:
return
case .disconnected, .degraded:
break
}
self.appendLog("[gateway] refreshing control channel (\(reason))\n")
self.logger.debug("gateway control channel refresh reason=\(reason)")
Task { await ControlChannel.shared.configure() }
}
func waitForGatewayReady(timeout: TimeInterval = 6) async -> Bool {
let deadline = Date().addingTimeInterval(timeout)
while Date() < deadline {
if !self.desiredActive { return false }
do {
_ = try await GatewayConnection.shared.requestRaw(method: .health, timeoutMs: 1500)
return true
} catch {
try? await Task.sleep(nanoseconds: 300_000_000)
}
}
self.appendLog("[gateway] readiness wait timed out\n")
self.logger.warning("gateway readiness wait timed out")
return false
}
func clearLog() {
self.log = ""
try? FileManager.default.removeItem(atPath: LogLocator.launchdGatewayLogPath)
self.logger.debug("gateway log cleared")
}
func setProjectRoot(path: String) {
CommandResolver.setProjectRoot(path)
}
func projectRootPath() -> String {
CommandResolver.projectRootPath()
}
private nonisolated static func readGatewayLog(path: String, limit: Int) -> String {
guard FileManager.default.fileExists(atPath: path) else { return "" }
guard let data = try? Data(contentsOf: URL(fileURLWithPath: path)) else { return "" }
let text = String(data: data, encoding: .utf8) ?? ""
if text.count <= limit { return text }
return String(text.suffix(limit))
}
}