Files
clawdbot/apps/macos/Sources/Clawdbot/HealthStore.swift
Peter Steinberger 7acd26a2fc Move provider to a plugin-architecture (#661)
* refactor: introduce provider plugin registry

* refactor: move provider CLI to plugins

* docs: add provider plugin implementation notes

* refactor: shift provider runtime logic into plugins

* refactor: add plugin defaults and summaries

* docs: update provider plugin notes

* feat(commands): add /commands slash list

* Auto-reply: tidy help message

* Auto-reply: fix status command lint

* Tests: align google shared expectations

* Auto-reply: tidy help message

* Auto-reply: fix status command lint

* refactor: move provider routing into plugins

* test: align agent routing expectations

* docs: update provider plugin notes

* refactor: route replies via provider plugins

* docs: note route-reply plugin hooks

* refactor: extend provider plugin contract

* refactor: derive provider status from plugins

* refactor: unify gateway provider control

* refactor: use plugin metadata in auto-reply

* fix: parenthesize cron target selection

* refactor: derive gateway methods from plugins

* refactor: generalize provider logout

* refactor: route provider logout through plugins

* refactor: move WhatsApp web login methods into plugin

* refactor: generalize provider log prefixes

* refactor: centralize default chat provider

* refactor: derive provider lists from registry

* refactor: move provider reload noops into plugins

* refactor: resolve web login provider via alias

* refactor: derive CLI provider options from plugins

* refactor: derive prompt provider list from plugins

* style: apply biome lint fixes

* fix: resolve provider routing edge cases

* docs: update provider plugin refactor notes

* fix(gateway): harden agent provider routing

* refactor: move provider routing into plugins

* refactor: move provider CLI to plugins

* refactor: derive provider lists from registry

* fix: restore slash command parsing

* refactor: align provider ids for schema

* refactor: unify outbound target resolution

* fix: keep outbound labels stable

* feat: add msteams to cron surfaces

* fix: clean up lint build issues

* refactor: localize chat provider alias normalization

* refactor: drive gateway provider lists from plugins

* docs: update provider plugin notes

* style: format message-provider

* fix: avoid provider registry init cycles

* style: sort message-provider imports

* fix: relax provider alias map typing

* refactor: move provider routing into plugins

* refactor: add plugin pairing/config adapters

* refactor: route pairing and provider removal via plugins

* refactor: align auto-reply provider typing

* test: stabilize telegram media mocks

* docs: update provider plugin refactor notes

* refactor: pluginize outbound targets

* refactor: pluginize provider selection

* refactor: generalize text chunk limits

* docs: update provider plugin notes

* refactor: generalize group session/config

* fix: normalize provider id for room detection

* fix: avoid provider init in system prompt

* style: formatting cleanup

* refactor: normalize agent delivery targets

* test: update outbound delivery labels

* chore: fix lint regressions

* refactor: extend provider plugin adapters

* refactor: move elevated/block streaming defaults to plugins

* refactor: defer outbound send deps to plugins

* docs: note plugin-driven streaming/elevated defaults

* refactor: centralize webchat provider constant

* refactor: add provider setup adapters

* refactor: delegate provider add config to plugins

* docs: document plugin-driven provider add

* refactor: add plugin state/binding metadata

* refactor: build agent provider status from plugins

* docs: note plugin-driven agent bindings

* refactor: centralize internal provider constant usage

* fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing)

* refactor: centralize default chat provider

* refactor: centralize WhatsApp target normalization

* refactor: move provider routing into plugins

* refactor: normalize agent delivery targets

* chore: fix lint regressions

* fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing)

* feat: expand provider plugin adapters

* refactor: route auto-reply via provider plugins

* fix: align WhatsApp target normalization

* fix: normalize WhatsApp targets for groups and E.164 (#631) (thanks @imfing)

* refactor: centralize WhatsApp target normalization

* feat: add /config chat config updates

* docs: add /config get alias

* feat(commands): add /commands slash list

* refactor: centralize default chat provider

* style: apply biome lint fixes

* chore: fix lint regressions

* fix: clean up whatsapp allowlist typing

* style: format config command helpers

* refactor: pluginize tool threading context

* refactor: normalize session announce targets

* docs: note new plugin threading and announce hooks

* refactor: pluginize message actions

* docs: update provider plugin actions notes

* fix: align provider action adapters

* refactor: centralize webchat checks

* style: format message provider helpers

* refactor: move provider onboarding into adapters

* docs: note onboarding provider adapters

* feat: add msteams onboarding adapter

* style: organize onboarding imports

* fix: normalize msteams allowFrom types

* feat: add plugin text chunk limits

* refactor: use plugin chunk limit fallbacks

* feat: add provider mention stripping hooks

* style: organize provider plugin type imports

* refactor: generalize health snapshots

* refactor: update macOS health snapshot handling

* docs: refresh health snapshot notes

* style: format health snapshot updates

* refactor: drive security warnings via plugins

* docs: note provider security adapter

* style: format provider security adapters

* refactor: centralize provider account defaults

* refactor: type gateway client identity constants

* chore: regen gateway protocol swift

* fix: degrade health on failed provider probe

* refactor: centralize pairing approve hint

* docs: add plugin CLI command references

* refactor: route auth and tool sends through plugins

* docs: expand provider plugin hooks

* refactor: document provider docking touchpoints

* refactor: normalize internal provider defaults

* refactor: streamline outbound delivery wiring

* refactor: make provider onboarding plugin-owned

* refactor: support provider-owned agent tools

* refactor: move telegram draft chunking into telegram module

* refactor: infer provider tool sends via extractToolSend

* fix: repair plugin onboarding imports

* refactor: de-dup outbound target normalization

* style: tidy plugin and agent imports

* refactor: data-drive provider selection line

* fix: satisfy lint after provider plugin rebase

* test: deflake gateway-cli coverage

* style: format gateway-cli coverage test

* refactor(provider-plugins): simplify provider ids

* test(pairing-cli): avoid provider-specific ternary

* style(macos): swiftformat HealthStore

* refactor(sandbox): derive provider tool denylist

* fix(sandbox): avoid plugin init in defaults

* refactor(provider-plugins): centralize provider aliases

* style(test): satisfy biome

* refactor(protocol): v3 providers.status maps

* refactor(ui): adapt to protocol v3

* refactor(macos): adapt to protocol v3

* test: update providers.status v3 fixtures

* refactor(gateway): map provider runtime snapshot

* test(gateway): update reload runtime snapshot

* refactor(whatsapp): normalize heartbeat provider id

* docs(refactor): update provider plugin notes

* style: satisfy biome after rebase

* fix: describe sandboxed elevated in prompt

* feat(gateway): add agent image attachments + live probe

* refactor: derive CLI provider options from plugins

* fix(gateway): harden agent provider routing

* fix(gateway): harden agent provider routing

* refactor: align provider ids for schema

* fix(protocol): keep agent provider string

* fix(gateway): harden agent provider routing

* fix(protocol): keep agent provider string

* refactor: normalize agent delivery targets

* refactor: support provider-owned agent tools

* refactor(config): provider-keyed elevated allowFrom

* style: satisfy biome

* fix(gateway): appease provider narrowing

* style: satisfy biome

* refactor(reply): move group intro hints into plugin

* fix(reply): avoid plugin registry init cycle

* refactor(providers): add lightweight provider dock

* refactor(gateway): use typed client id in connect

* refactor(providers): document docks and avoid init cycles

* refactor(providers): make media limit helper generic

* fix(providers): break plugin registry import cycles

* style: satisfy biome

* refactor(status-all): build providers table from plugins

* refactor(gateway): delegate web login to provider plugin

* refactor(provider): drop web alias

* refactor(provider): lazy-load monitors

* style: satisfy lint/format

* style: format status-all providers table

* style: swiftformat gateway discovery model

* test: make reload plan plugin-driven

* fix: avoid token stringification in status-all

* refactor: make provider IDs explicit in status

* feat: warn on signal/imessage provider runtime errors

* test: cover gateway provider runtime warnings in status

* fix: add runtime kind to provider status issues

* test: cover health degradation on probe failure

* fix: keep routeReply lightweight

* style: organize routeReply imports

* refactor(web): extract auth-store helpers

* refactor(whatsapp): lazy login imports

* refactor(outbound): route replies via plugin outbound

* docs: update provider plugin notes

* style: format provider status issues

* fix: make sandbox scope warning wrap-safe

* refactor: load outbound adapters from provider plugins

* docs: update provider plugin outbound notes

* style(macos): fix swiftformat lint

* docs: changelog for provider plugins

* fix(macos): satisfy swiftformat

* fix(macos): open settings via menu action

* style: format after rebase

* fix(macos): open Settings via menu action

---------

Co-authored-by: LK <luke@kyohere.com>
Co-authored-by: Luke K (pr-0f3t) <2609441+lc0rp@users.noreply.github.com>
Co-authored-by: Xin <xin@imfing.com>
2026-01-11 11:45:25 +00:00

297 lines
10 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import Foundation
import Network
import Observation
import SwiftUI
struct HealthSnapshot: Codable, Sendable {
struct ProviderSummary: Codable, Sendable {
struct Probe: Codable, Sendable {
struct Bot: Codable, Sendable {
let username: String?
}
struct Webhook: Codable, Sendable {
let url: String?
}
let ok: Bool?
let status: Int?
let error: String?
let elapsedMs: Double?
let bot: Bot?
let webhook: Webhook?
}
let configured: Bool?
let linked: Bool?
let authAgeMs: Double?
let probe: Probe?
let lastProbeAt: Double?
}
struct SessionInfo: Codable, Sendable {
let key: String
let updatedAt: Double?
let age: Double?
}
struct Sessions: Codable, Sendable {
let path: String
let count: Int
let recent: [SessionInfo]
}
let ok: Bool?
let ts: Double
let durationMs: Double
let providers: [String: ProviderSummary]
let providerOrder: [String]?
let providerLabels: [String: String]?
let heartbeatSeconds: Int?
let sessions: Sessions
}
enum HealthState: Equatable {
case unknown
case ok
case linkingNeeded
case degraded(String)
var tint: Color {
switch self {
case .ok: .green
case .linkingNeeded: .red
case .degraded: .orange
case .unknown: .secondary
}
}
}
@MainActor
@Observable
final class HealthStore {
static let shared = HealthStore()
private static let logger = Logger(subsystem: "com.clawdbot", category: "health")
private(set) var snapshot: HealthSnapshot?
private(set) var lastSuccess: Date?
private(set) var lastError: String?
private(set) var isRefreshing = false
private var loopTask: Task<Void, Never>?
private let refreshInterval: TimeInterval = 60
private init() {
// Avoid background health polling in SwiftUI previews and tests.
if !ProcessInfo.processInfo.isPreview, !ProcessInfo.processInfo.isRunningTests {
self.start()
}
}
// Test-only escape hatch: the HealthStore is a process-wide singleton but
// state derivation is pure from `snapshot` + `lastError`.
func __setSnapshotForTest(_ snapshot: HealthSnapshot?, lastError: String? = nil) {
self.snapshot = snapshot
self.lastError = lastError
}
func start() {
guard self.loopTask == nil else { return }
self.loopTask = Task { [weak self] in
guard let self else { return }
while !Task.isCancelled {
await self.refresh()
try? await Task.sleep(nanoseconds: UInt64(self.refreshInterval * 1_000_000_000))
}
}
}
func stop() {
self.loopTask?.cancel()
self.loopTask = nil
}
func refresh(onDemand: Bool = false) async {
guard !self.isRefreshing else { return }
self.isRefreshing = true
defer { self.isRefreshing = false }
let previousError = self.lastError
do {
let data = try await ControlChannel.shared.health(timeout: 15)
if let decoded = decodeHealthSnapshot(from: data) {
self.snapshot = decoded
self.lastSuccess = Date()
self.lastError = nil
if previousError != nil {
Self.logger.info("health refresh recovered")
}
} else {
self.lastError = "health output not JSON"
if onDemand { self.snapshot = nil }
if previousError != self.lastError {
Self.logger.warning("health refresh failed: output not JSON")
}
}
} catch {
let desc = error.localizedDescription
self.lastError = desc
if onDemand { self.snapshot = nil }
if previousError != desc {
Self.logger.error("health refresh failed \(desc, privacy: .public)")
}
}
}
private static func isProviderHealthy(_ summary: HealthSnapshot.ProviderSummary) -> Bool {
guard summary.configured == true else { return false }
// If probe is missing, treat it as "configured but unknown health" (not a hard fail).
return summary.probe?.ok ?? true
}
private static func describeProbeFailure(_ probe: HealthSnapshot.ProviderSummary.Probe) -> String {
let elapsed = probe.elapsedMs.map { "\(Int($0))ms" }
if let error = probe.error, error.lowercased().contains("timeout") || probe.status == nil {
if let elapsed { return "Health check timed out (\(elapsed))" }
return "Health check timed out"
}
let code = probe.status.map { "status \($0)" } ?? "status unknown"
let reason = probe.error?.isEmpty == false ? probe.error! : "health probe failed"
if let elapsed { return "\(reason) (\(code), \(elapsed))" }
return "\(reason) (\(code))"
}
private func resolveLinkProvider(
_ snap: HealthSnapshot) -> (id: String, summary: HealthSnapshot.ProviderSummary)?
{
let order = snap.providerOrder ?? Array(snap.providers.keys)
for id in order {
if let summary = snap.providers[id], summary.linked != nil {
return (id: id, summary: summary)
}
}
return nil
}
private func resolveFallbackProvider(
_ snap: HealthSnapshot,
excluding id: String?) -> (id: String, summary: HealthSnapshot.ProviderSummary)?
{
let order = snap.providerOrder ?? Array(snap.providers.keys)
for providerId in order {
if providerId == id { continue }
guard let summary = snap.providers[providerId] else { continue }
if Self.isProviderHealthy(summary) {
return (id: providerId, summary: summary)
}
}
return nil
}
var state: HealthState {
if let error = self.lastError, !error.isEmpty {
return .degraded(error)
}
guard let snap = self.snapshot else { return .unknown }
guard let link = self.resolveLinkProvider(snap) else { return .unknown }
if link.summary.linked != true {
// Linking is optional if any other provider is healthy; don't paint the whole app red.
let fallback = self.resolveFallbackProvider(snap, excluding: link.id)
return fallback != nil ? .degraded("Not linked") : .linkingNeeded
}
// A provider can be "linked" but still unhealthy (failed probe / cannot connect).
if let probe = link.summary.probe, probe.ok == false {
return .degraded(Self.describeProbeFailure(probe))
}
return .ok
}
var summaryLine: String {
if self.isRefreshing { return "Health check running…" }
if let error = self.lastError { return "Health check failed: \(error)" }
guard let snap = self.snapshot else { return "Health check pending" }
guard let link = self.resolveLinkProvider(snap) else { return "Health check pending" }
if link.summary.linked != true {
if let fallback = self.resolveFallbackProvider(snap, excluding: link.id) {
let fallbackLabel = snap.providerLabels?[fallback.id] ?? fallback.id.capitalized
let fallbackState = (fallback.summary.probe?.ok ?? true) ? "ok" : "degraded"
return "\(fallbackLabel) \(fallbackState) · Not linked — run clawdbot login"
}
return "Not linked — run clawdbot login"
}
let auth = link.summary.authAgeMs.map { msToAge($0) } ?? "unknown"
if let probe = link.summary.probe, probe.ok == false {
let status = probe.status.map(String.init) ?? "?"
let suffix = probe.status == nil ? "probe degraded" : "probe degraded · status \(status)"
return "linked · auth \(auth) · \(suffix)"
}
return "linked · auth \(auth)"
}
/// Short, human-friendly detail for the last failure, used in the UI.
var detailLine: String? {
if let error = self.lastError, !error.isEmpty {
let lower = error.lowercased()
if lower.contains("connection refused") {
let port = GatewayEnvironment.gatewayPort()
return "The gateway control port (127.0.0.1:\(port)) isnt listening — " +
"restart Clawdbot to bring it back."
}
if lower.contains("timeout") {
return "Timed out waiting for the control server; the gateway may be crashed or still starting."
}
return error
}
return nil
}
func describeFailure(from snap: HealthSnapshot, fallback: String?) -> String {
if let link = self.resolveLinkProvider(snap), link.summary.linked != true {
return "Not linked — run clawdbot login"
}
if let link = self.resolveLinkProvider(snap), let probe = link.summary.probe, probe.ok == false {
return Self.describeProbeFailure(probe)
}
if let fallback, !fallback.isEmpty {
return fallback
}
return "health probe failed"
}
var degradedSummary: String? {
guard case let .degraded(reason) = self.state else { return nil }
if reason == "[object Object]" || reason.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty,
let snap = self.snapshot
{
return self.describeFailure(from: snap, fallback: reason)
}
return reason
}
}
func msToAge(_ ms: Double) -> String {
let minutes = Int(round(ms / 60000))
if minutes < 1 { return "just now" }
if minutes < 60 { return "\(minutes)m" }
let hours = Int(round(Double(minutes) / 60))
if hours < 48 { return "\(hours)h" }
let days = Int(round(Double(hours) / 24))
return "\(days)d"
}
/// Decode a health snapshot, tolerating stray log lines before/after the JSON blob.
func decodeHealthSnapshot(from data: Data) -> HealthSnapshot? {
let decoder = JSONDecoder()
if let snap = try? decoder.decode(HealthSnapshot.self, from: data) {
return snap
}
guard let text = String(data: data, encoding: .utf8) else { return nil }
guard let firstBrace = text.firstIndex(of: "{"), let lastBrace = text.lastIndex(of: "}") else {
return nil
}
let slice = text[firstBrace...lastBrace]
let cleaned = Data(slice.utf8)
return try? decoder.decode(HealthSnapshot.self, from: cleaned)
}