From b0b4b33b6b02f9a106f130e2a1e53b947e4503d7 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 11 Jan 2026 01:51:07 +0100 Subject: [PATCH] fix: update gateway auth docs and clients --- CHANGELOG.md | 1 + .../Sources/Clawdbot/ControlChannel.swift | 2 +- .../Clawdbot/GatewayEndpointStore.swift | 42 +++++++++- .../Clawdbot/GatewayLaunchAgentManager.swift | 15 +++- .../Clawdbot/GatewayProcessManager.swift | 2 +- docs/gateway/configuration-examples.md | 14 +++- docs/gateway/configuration.md | 5 +- docs/gateway/doctor.md | 20 +++-- docs/gateway/index.md | 3 +- docs/gateway/security.md | 25 ++++++ docs/index.md | 1 + docs/nodes/audio.md | 30 +++---- docs/nodes/images.md | 2 +- docs/start/faq.md | 6 ++ docs/start/getting-started.md | 2 + docs/start/onboarding.md | 3 +- docs/start/wizard.md | 9 ++- docs/web/control-ui.md | 5 +- docs/web/index.md | 9 ++- src/auto-reply/reply.ts | 8 +- src/auto-reply/transcription.test.ts | 10 ++- src/cli/program.ts | 6 ++ src/commands/configure.ts | 9 ++- src/commands/doctor-prompter.ts | 1 + src/commands/onboard-non-interactive.ts | 2 +- src/config/legacy.ts | 78 +++++++++++++++++-- src/config/types.ts | 8 ++ src/config/zod-schema.ts | 32 +++++++- 28 files changed, 283 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b6d2f875..3de14f517 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ ### Fixes - CLI/Status: expand tables to full terminal width; improve update + daemon summary lines; keep `status --all` gateway log tail pasteable. - WhatsApp: detect @lid mentions in groups using authDir reverse mapping + resolve self JID E.164 for mention gating. (#692) — thanks @peschee. +- Gateway/Auth: default to token auth on loopback during onboarding, add doctor token generation flow, and tighten audio transcription config to Whisper-only. ## 2026.1.10 diff --git a/apps/macos/Sources/Clawdbot/ControlChannel.swift b/apps/macos/Sources/Clawdbot/ControlChannel.swift index 9634a2d31..33e403726 100644 --- a/apps/macos/Sources/Clawdbot/ControlChannel.swift +++ b/apps/macos/Sources/Clawdbot/ControlChannel.swift @@ -182,7 +182,7 @@ final class ControlChannel { { let reason = urlErr.failureURLString ?? urlErr.localizedDescription return - "Gateway rejected token; set CLAWDBOT_GATEWAY_TOKEN in the mac app environment " + + "Gateway rejected token; set gateway.auth.token (or CLAWDBOT_GATEWAY_TOKEN) " + "or clear it on the gateway. " + "Reason: \(reason)" } diff --git a/apps/macos/Sources/Clawdbot/GatewayEndpointStore.swift b/apps/macos/Sources/Clawdbot/GatewayEndpointStore.swift index d88f108e7..2f3fd3da1 100644 --- a/apps/macos/Sources/Clawdbot/GatewayEndpointStore.swift +++ b/apps/macos/Sources/Clawdbot/GatewayEndpointStore.swift @@ -26,7 +26,13 @@ actor GatewayEndpointStore { static let live = Deps( mode: { await MainActor.run { AppStateStore.shared.connectionMode } }, - token: { ProcessInfo.processInfo.environment["CLAWDBOT_GATEWAY_TOKEN"] }, + token: { + let root = ClawdbotConfigFile.loadDict() + return GatewayEndpointStore.resolveGatewayToken( + isRemote: CommandResolver.connectionModeIsRemote(), + root: root, + env: ProcessInfo.processInfo.environment) + }, password: { let root = ClawdbotConfigFile.loadDict() return GatewayEndpointStore.resolveGatewayPassword( @@ -83,6 +89,40 @@ actor GatewayEndpointStore { return nil } + private static func resolveGatewayToken( + isRemote: Bool, + root: [String: Any], + env: [String: String]) -> String? + { + let raw = env["CLAWDBOT_GATEWAY_TOKEN"] ?? "" + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmed.isEmpty { + return trimmed + } + if isRemote { + if let gateway = root["gateway"] as? [String: Any], + let remote = gateway["remote"] as? [String: Any], + let token = remote["token"] as? String + { + let value = token.trimmingCharacters(in: .whitespacesAndNewlines) + if !value.isEmpty { + return value + } + } + return nil + } + if let gateway = root["gateway"] as? [String: Any], + let auth = gateway["auth"] as? [String: Any], + let token = auth["token"] as? String + { + let value = token.trimmingCharacters(in: .whitespacesAndNewlines) + if !value.isEmpty { + return value + } + } + return nil + } + private let deps: Deps private let logger = Logger(subsystem: "com.clawdbot", category: "gateway-endpoint") diff --git a/apps/macos/Sources/Clawdbot/GatewayLaunchAgentManager.swift b/apps/macos/Sources/Clawdbot/GatewayLaunchAgentManager.swift index a4b718f35..a7b2c6003 100644 --- a/apps/macos/Sources/Clawdbot/GatewayLaunchAgentManager.swift +++ b/apps/macos/Sources/Clawdbot/GatewayLaunchAgentManager.swift @@ -204,7 +204,20 @@ enum GatewayLaunchAgentManager { private static func preferredGatewayToken() -> String? { let raw = ProcessInfo.processInfo.environment["CLAWDBOT_GATEWAY_TOKEN"] ?? "" let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) - return trimmed.isEmpty ? nil : trimmed + if !trimmed.isEmpty { + return trimmed + } + let root = ClawdbotConfigFile.loadDict() + if let gateway = root["gateway"] as? [String: Any], + let auth = gateway["auth"] as? [String: Any], + let token = auth["token"] as? String + { + let value = token.trimmingCharacters(in: .whitespacesAndNewlines) + if !value.isEmpty { + return value + } + } + return nil } private static func preferredGatewayPassword() -> String? { diff --git a/apps/macos/Sources/Clawdbot/GatewayProcessManager.swift b/apps/macos/Sources/Clawdbot/GatewayProcessManager.swift index 3d046d855..afb7802c0 100644 --- a/apps/macos/Sources/Clawdbot/GatewayProcessManager.swift +++ b/apps/macos/Sources/Clawdbot/GatewayProcessManager.swift @@ -239,7 +239,7 @@ final class GatewayProcessManager { let lower = message.lowercased() if self.isGatewayAuthFailure(error) { return """ - Gateway on port \(port) rejected auth. Set CLAWDBOT_GATEWAY_TOKEN in the app \ + Gateway on port \(port) rejected auth. Set gateway.auth.token (or CLAWDBOT_GATEWAY_TOKEN) \ to match the running gateway (or clear it on the gateway) and retry. """ } diff --git a/docs/gateway/configuration-examples.md b/docs/gateway/configuration-examples.md index 76149cb5a..976fb20a1 100644 --- a/docs/gateway/configuration-examples.md +++ b/docs/gateway/configuration-examples.md @@ -117,10 +117,16 @@ Save to `~/.clawdbot/clawdbot.json` and you can DM the bot from that number. imessage: "collect", webchat: "collect" } - }, - transcribeAudio: { - command: ["whisper", "--model", "base"], - timeoutSeconds: 120 + } + }, + + // Tooling + tools: { + audio: { + transcription: { + args: ["--model", "base", "{{MediaPath}}"], + timeoutSeconds: 120 + } } }, diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index f87eef231..a2d7ebddd 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -1790,7 +1790,7 @@ Defaults: port: 18789, // WS + HTTP multiplex bind: "loopback", // controlUi: { enabled: true, basePath: "/clawdbot" } - // auth: { mode: "token", token: "your-token" } // token is for multi-machine CLI access + // auth: { mode: "token", token: "your-token" } // token gates WS + Control UI access // tailscale: { mode: "off" | "serve" | "funnel" } } } @@ -1813,6 +1813,7 @@ Notes: - OpenAI Chat Completions endpoint: **disabled by default**; enable with `gateway.http.endpoints.chatCompletions.enabled: true`. - Precedence: `--port` > `CLAWDBOT_GATEWAY_PORT` > `gateway.port` > default `18789`. - Non-loopback binds (`lan`/`tailnet`/`auto`) require auth. Use `gateway.auth.token` (or `CLAWDBOT_GATEWAY_TOKEN`). +- The onboarding wizard generates a gateway token by default (even on loopback). - `gateway.remote.token` is **only** for remote CLI calls; it does not enable local gateway auth. `gateway.token` is ignored. Auth and Tailscale: @@ -2096,7 +2097,7 @@ clawdbot dns setup --apply ## Template variables -Template placeholders are expanded in `audio.transcription.command` (and any future templated command fields). +Template placeholders are expanded in `tools.audio.transcription.args` (and any future templated argument fields). | Variable | Description | |----------|-------------| diff --git a/docs/gateway/doctor.md b/docs/gateway/doctor.md index ee77da83d..22ec0ea31 100644 --- a/docs/gateway/doctor.md +++ b/docs/gateway/doctor.md @@ -71,6 +71,7 @@ cat ~/.clawdbot/clawdbot.json - Gateway runtime best-practice checks (Node vs Bun, version-manager paths). - Gateway port collision diagnostics (default `18789`). - Security warnings for open DM policies. +- Gateway auth warnings when no `gateway.auth.token` is set (offers token generation). - systemd linger check on Linux. - Writes updated config + wizard metadata. @@ -102,7 +103,7 @@ Current migrations: - `routing.bindings` → top-level `bindings` - `routing.agents`/`routing.defaultAgentId` → `agents.list` + `agents.list[].default` - `routing.agentToAgent` → `tools.agentToAgent` -- `routing.transcribeAudio` → `audio.transcription` +- `routing.transcribeAudio` → `tools.audio.transcription` - `identity` → `agents.list[].identity` - `agent.*` → `agents.defaults` + `tools.*` (tools/elevated/bash/sandbox/subagents) - `agent.model`/`allowedModels`/`modelAliases`/`modelFallbacks`/`imageModelFallbacks` @@ -186,11 +187,16 @@ gateway stays alive after logout. Doctor prints a quick summary of eligible/missing/blocked skills for the current workspace. -### 11) Gateway health check + restart +### 11) Gateway auth checks (local token) +Doctor warns when `gateway.auth` is missing on a local gateway and offers to +generate a token. Use `clawdbot doctor --generate-gateway-token` to force token +creation in automation. + +### 12) Gateway health check + restart Doctor runs a health check and offers to restart the gateway when it looks unhealthy. -### 12) Supervisor config audit + repair +### 13) Supervisor config audit + repair Doctor checks the installed supervisor config (launchd/systemd/schtasks) for missing or outdated defaults (e.g., systemd network-online dependencies and restart delay). When it finds a mismatch, it recommends an update and can @@ -203,24 +209,24 @@ Notes: - `clawdbot doctor --repair --force` overwrites custom supervisor configs. - You can always force a full rewrite via `clawdbot daemon install --force`. -### 13) Gateway runtime + port diagnostics +### 14) Gateway runtime + port diagnostics Doctor inspects the daemon runtime (PID, last exit status) and warns when the service is installed but not actually running. It also checks for port collisions on the gateway port (default `18789`) and reports likely causes (gateway already running, SSH tunnel). -### 14) Gateway runtime best practices +### 15) Gateway runtime best practices Doctor warns when the gateway service runs on Bun or a version-managed Node path (`nvm`, `fnm`, `volta`, `asdf`, etc.). WhatsApp + Telegram providers require Node, and version-manager paths can break after upgrades because the daemon does not load your shell init. Doctor offers to migrate to a system Node install when available (Homebrew/apt/choco). -### 15) Config write + wizard metadata +### 16) Config write + wizard metadata Doctor persists any config changes and stamps wizard metadata to record the doctor run. -### 16) Workspace tips (backup + memory system) +### 17) Workspace tips (backup + memory system) Doctor suggests a workspace memory system when missing and prints a backup tip if the workspace is not already under git. diff --git a/docs/gateway/index.md b/docs/gateway/index.md index b337135da..9643e9692 100644 --- a/docs/gateway/index.md +++ b/docs/gateway/index.md @@ -35,7 +35,8 @@ pnpm gateway:watch - `--force` uses `lsof` to find listeners on the chosen port, sends SIGTERM, logs what it killed, then starts the gateway (fails fast if `lsof` is missing). - If you run under a supervisor (launchd/systemd/mac app child-process mode), a stop/restart typically sends **SIGTERM**; older builds may surface this as `pnpm` `ELIFECYCLE` exit code **143** (SIGTERM), which is a normal shutdown, not a crash. - **SIGUSR1** triggers an in-process restart (no external supervisor required). This is what the `gateway` agent tool uses. -- Optional shared secret: pass `--token ` or set `CLAWDBOT_GATEWAY_TOKEN` to require clients to send `connect.params.auth.token`. +- Gateway auth: set `gateway.auth.mode=token` + `gateway.auth.token` (or pass `--token ` / `CLAWDBOT_GATEWAY_TOKEN`) to require clients to send `connect.params.auth.token`. +- The wizard now generates a token by default, even on loopback. - Port precedence: `--port` > `CLAWDBOT_GATEWAY_PORT` > `gateway.port` > default `18789`. ## Remote access diff --git a/docs/gateway/security.md b/docs/gateway/security.md index c8ab737b5..ec5813fac 100644 --- a/docs/gateway/security.md +++ b/docs/gateway/security.md @@ -110,6 +110,31 @@ Keep config + state private on the gateway host: `clawdbot doctor` can warn and offer to tighten these permissions. +### 0.5) Lock down the Gateway WebSocket (local auth) + +Gateway auth is **only** enforced when you set `gateway.auth`. If it’s unset, +loopback WS clients are unauthenticated — any local process can connect and call +`config.apply`. + +The onboarding wizard now generates a token by default (even for loopback) so +local clients must authenticate. If you skip the wizard or remove auth, you’re +back to open loopback. + +Set a token so **all** WS clients must authenticate: + +```json5 +{ + gateway: { + auth: { mode: "token", token: "your-token" } + } +} +``` + +Doctor can generate one for you: `clawdbot doctor --generate-gateway-token`. + +Note: `gateway.remote.token` is **only** for remote CLI calls; it does not +protect local WS access. + ### 1) DMs: pairing by default ```json5 diff --git a/docs/index.md b/docs/index.md index ef93a3a44..9914de66b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -68,6 +68,7 @@ Most operations flow through the **Gateway** (`clawdbot gateway`), a single long - **One Gateway per host**: it is the only process allowed to own the WhatsApp Web session. - **Loopback-first**: Gateway WS defaults to `ws://127.0.0.1:18789`. + - The wizard now generates a gateway token by default (even for loopback). - For Tailnet access, run `clawdbot gateway --bind tailnet --token ...` (token is required for non-loopback binds). - **Bridge for nodes**: optional LAN/tailnet-facing bridge on `tcp://0.0.0.0:18790` for paired nodes (Bonjour-discoverable). - **Canvas host**: HTTP file server on `canvasHost.port` (default `18793`), serving `/__clawdbot__/canvas/` for node WebViews; see [Gateway configuration](/gateway/configuration) (`canvasHost`). diff --git a/docs/nodes/audio.md b/docs/nodes/audio.md index a1cd694f1..349a8f212 100644 --- a/docs/nodes/audio.md +++ b/docs/nodes/audio.md @@ -6,38 +6,30 @@ read_when: # Audio / Voice Notes — 2025-12-05 ## What works -- **Optional transcription**: If `audio.transcription.command` is set in `~/.clawdbot/clawdbot.json`, Clawdbot will: +- **Optional transcription**: If `tools.audio.transcription` is set in `~/.clawdbot/clawdbot.json`, Clawdbot will: 1) Download inbound audio to a temp path when WhatsApp only provides a URL. - 2) Run the configured CLI (templated with `{{MediaPath}}`), expecting transcript on stdout. + 2) Run the configured CLI args (templated with `{{MediaPath}}`), expecting transcript on stdout. 3) Replace `Body` with the transcript, set `{{Transcript}}`, and prepend the original media path plus a `Transcript:` section in the command prompt so models see both. 4) Continue through the normal auto-reply pipeline (templating, sessions, Pi command). - **Verbose logging**: In `--verbose`, we log when transcription runs and when the transcript replaces the body. -## Config example (OpenAI Whisper CLI) -Requires `OPENAI_API_KEY` in env and `openai` CLI installed: +## Config example (Whisper CLI) +Requires `whisper` CLI installed: ```json5 { - audio: { - transcription: { - command: [ - "openai", - "api", - "audio.transcriptions.create", - "-m", - "whisper-1", - "-f", - "{{MediaPath}}", - "--response-format", - "text" - ], - timeoutSeconds: 45 + tools: { + audio: { + transcription: { + args: ["--model", "base", "{{MediaPath}}"], + timeoutSeconds: 45 + } } } } ``` ## Notes & limits -- We don’t ship a transcriber; you opt in with any CLI that prints text to stdout (Whisper cloud, whisper.cpp, vosk, Deepgram, etc.). +- We don’t ship a transcriber; you opt in with the Whisper CLI on your PATH. - Size guard: inbound audio must be ≤5 MB (matches the temp media store and transcript pipeline). - Outbound caps: web send supports audio/voice up to 16 MB (sent as a voice note with `ptt: true`). - If transcription fails, we fall back to the original body/media note; replies still go through. diff --git a/docs/nodes/images.md b/docs/nodes/images.md index 245c3d8c1..b58b3c500 100644 --- a/docs/nodes/images.md +++ b/docs/nodes/images.md @@ -38,7 +38,7 @@ Clawdbot is now **web-only** (Baileys). This document captures the current media - `{{MediaUrl}}` pseudo-URL for the inbound media. - `{{MediaPath}}` local temp path written before running the command. - When a per-session Docker sandbox is enabled, inbound media is copied into the sandbox workspace and `MediaPath`/`MediaUrl` are rewritten to a relative path like `media/inbound/`. -- Audio transcription (if configured) runs before templating and can replace `Body` with the transcript. +- Audio transcription (if configured via `tools.audio.transcription`) runs before templating and can replace `Body` with the transcript. ## Limits & Errors - Images: ~6 MB cap after recompression. diff --git a/docs/start/faq.md b/docs/start/faq.md index f3192ee54..ad762c412 100644 --- a/docs/start/faq.md +++ b/docs/start/faq.md @@ -198,6 +198,12 @@ Notes: - `gateway.remote.token` is for **remote CLI calls** only; it does not enable local gateway auth. - The Control UI authenticates via `connect.params.auth.token` (stored in app/UI settings). Avoid putting tokens in URLs. +### Why do I need a token on localhost now? + +The wizard generates a gateway token by default (even on loopback) so **local WS clients must authenticate**. This blocks other local processes from calling the Gateway. Paste the token into the Control UI settings (or your client config) to connect. + +If you **really** want open loopback, remove `gateway.auth` from your config. Doctor can generate a token for you any time: `clawdbot doctor --generate-gateway-token`. + ### Do I have to restart after changing config? The Gateway watches the config and supports hot‑reload: diff --git a/docs/start/getting-started.md b/docs/start/getting-started.md index 7b59da57a..7e5039f7e 100644 --- a/docs/start/getting-started.md +++ b/docs/start/getting-started.md @@ -68,6 +68,7 @@ What you’ll choose: - **Providers**: WhatsApp QR login, Telegram/Discord bot tokens, etc. - **Daemon**: background install (launchd/systemd; WSL2 uses systemd) - **Runtime**: Node (recommended; required for WhatsApp) or Bun (faster, but incompatible with WhatsApp) +- **Gateway token**: the wizard generates one by default (even on loopback) and stores it in `gateway.auth.token`. Wizard doc: [Wizard](/start/wizard) @@ -95,6 +96,7 @@ clawdbot gateway --port 18789 --verbose ``` Dashboard (local loopback): `http://127.0.0.1:18789/` +If a token is configured, paste it into the Control UI settings (stored as `connect.params.auth.token`). ⚠️ **WhatsApp + Bun warning:** Baileys (WhatsApp Web library) uses a WebSocket path that is currently incompatible with Bun and can cause memory corruption on diff --git a/docs/start/onboarding.md b/docs/start/onboarding.md index b996b4036..933cd94b3 100644 --- a/docs/start/onboarding.md +++ b/docs/start/onboarding.md @@ -32,7 +32,8 @@ Where does the **Gateway** run? - **Configure later:** skip setup and leave the app unconfigured. Gateway auth tip: -- If you only use Clawdbot locally (loopback), auth can be **Off**. +- The wizard now generates a **token** even for loopback, so local WS clients must authenticate. +- If you disable auth, any local process can connect; use that only on fully trusted machines. - Use a **token** for multi‑machine access or non‑loopback binds. ## 2) Local-only auth (Anthropic OAuth) diff --git a/docs/start/wizard.md b/docs/start/wizard.md index c0868be3a..4e69123c9 100644 --- a/docs/start/wizard.md +++ b/docs/start/wizard.md @@ -32,7 +32,7 @@ The wizard starts with **QuickStart** (defaults) vs **Advanced** (full control). - Local gateway (loopback) - Workspace default (or existing workspace) - Gateway port **18789** -- Gateway auth **Off** (loopback only) +- Gateway auth **Token** (auto‑generated, even on loopback) - Tailscale exposure **Off** - Telegram + WhatsApp DMs default to **allowlist** (you’ll be prompted for your phone number) @@ -96,8 +96,9 @@ Tip: `--json` does **not** imply non-interactive mode. Use `--non-interactive` ( 4) **Gateway** - Port, bind, auth mode, tailscale exposure. - - Auth recommendation: keep **Off** for single-machine loopback setups. Use **Token** for multi-machine access or non-loopback binds. - - Non‑loopback binds require auth. + - Auth recommendation: keep **Token** even for loopback so local WS clients must authenticate. + - Disable auth only if you fully trust every local process. + - Non‑loopback binds still require auth. 5) **Providers** - WhatsApp: optional QR login. @@ -135,7 +136,7 @@ Remote mode configures a local client to connect to a Gateway elsewhere. What you’ll set: - Remote Gateway URL (`ws://...`) -- Optional token +- Token if the remote Gateway requires auth (recommended) Notes: - No remote installs or daemon changes are performed. diff --git a/docs/web/control-ui.md b/docs/web/control-ui.md index 0b4e6e8e4..b6f689b55 100644 --- a/docs/web/control-ui.md +++ b/docs/web/control-ui.md @@ -25,6 +25,7 @@ Auth is supplied during the WebSocket handshake via: - `connect.params.auth.token` - `connect.params.auth.password` The dashboard settings panel lets you store a token; passwords are not persisted. +The onboarding wizard generates a gateway token by default, so paste it here on first connect. ## What it can do (today) - Chat with the model via Gateway WS (`chat.history`, `chat.send`, `chat.abort`) @@ -65,9 +66,9 @@ Open: - `https:///` (or your configured `gateway.controlUi.basePath`) By default, the gateway trusts Tailscale identity headers in serve mode. You can still set -`CLAWDBOT_GATEWAY_TOKEN` or `gateway.auth` if you want a shared secret instead. +`gateway.auth` (or `CLAWDBOT_GATEWAY_TOKEN`) if you want a shared secret instead. -### Bind to tailnet + token (legacy) +### Bind to tailnet + token ```bash clawdbot gateway --bind tailnet --token "$(openssl rand -hex 32)" diff --git a/docs/web/index.md b/docs/web/index.md index 8af5673f1..daf9bfd91 100644 --- a/docs/web/index.md +++ b/docs/web/index.md @@ -56,13 +56,14 @@ clawdbot gateway Open: - `https:///` (or your configured `gateway.controlUi.basePath`) -### Tailnet bind + token (legacy) +### Tailnet bind + token ```json5 { gateway: { bind: "tailnet", - controlUi: { enabled: true } + controlUi: { enabled: true }, + auth: { mode: "token", token: "your-token" } } } ``` @@ -70,7 +71,6 @@ Open: Then start the gateway (token required for non-loopback binds): ```bash -export CLAWDBOT_GATEWAY_TOKEN="…your token…" clawdbot gateway ``` @@ -91,7 +91,8 @@ Open: ## Security notes -- Binding the Gateway to a non-loopback address **requires** auth (`CLAWDBOT_GATEWAY_TOKEN` or `gateway.auth`). +- Binding the Gateway to a non-loopback address **requires** auth (`gateway.auth` or `CLAWDBOT_GATEWAY_TOKEN`). +- The wizard generates a gateway token by default (even on loopback). - The UI sends `connect.params.auth.token` or `connect.params.auth.password`. - Use `gateway.auth.allowTailscale: false` to require explicit credentials even in Serve mode. - `gateway.tailscale.mode: "funnel"` requires `gateway.auth.mode: "password"` (shared password). diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index 6f9be3111..b5d53cf77 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -92,7 +92,11 @@ import { type VerboseLevel, } from "./thinking.js"; import { SILENT_REPLY_TOKEN } from "./tokens.js"; -import { isAudio, transcribeInboundAudio } from "./transcription.js"; +import { + hasAudioTranscriptionConfig, + isAudio, + transcribeInboundAudio, +} from "./transcription.js"; import type { GetReplyOptions, ReplyPayload } from "./types.js"; export { @@ -367,7 +371,7 @@ export async function getReplyFromConfig( opts?.onTypingController?.(typing); let transcribedText: string | undefined; - if (cfg.audio?.transcription && isAudio(ctx.MediaType)) { + if (hasAudioTranscriptionConfig(cfg) && isAudio(ctx.MediaType)) { const transcribed = await transcribeInboundAudio(cfg, ctx, defaultRuntime); if (transcribed?.text) { transcribedText = transcribed.text; diff --git a/src/auto-reply/transcription.test.ts b/src/auto-reply/transcription.test.ts index 7347a9f86..c47d76daf 100644 --- a/src/auto-reply/transcription.test.ts +++ b/src/auto-reply/transcription.test.ts @@ -37,10 +37,12 @@ describe("transcribeInboundAudio", () => { vi.stubGlobal("fetch", fetchMock); const cfg = { - audio: { - transcription: { - command: ["echo", "{{MediaPath}}"], - timeoutSeconds: 5, + tools: { + audio: { + transcription: { + args: ["echo", "{{MediaPath}}"], + timeoutSeconds: 5, + }, }, }, }; diff --git a/src/cli/program.ts b/src/cli/program.ts index d4de1f6b5..502bfc7bd 100644 --- a/src/cli/program.ts +++ b/src/cli/program.ts @@ -438,6 +438,11 @@ export function buildProgram() { "Run without prompts (safe migrations only)", false, ) + .option( + "--generate-gateway-token", + "Generate and configure a gateway token", + false, + ) .option("--deep", "Scan system services for extra gateway installs", false) .action(async (opts) => { try { @@ -447,6 +452,7 @@ export function buildProgram() { repair: Boolean(opts.repair), force: Boolean(opts.force), nonInteractive: Boolean(opts.nonInteractive), + generateGatewayToken: Boolean(opts.generateGatewayToken), deep: Boolean(opts.deep), }); } catch (err) { diff --git a/src/commands/configure.ts b/src/commands/configure.ts index b81abcd7e..a41193d86 100644 --- a/src/commands/configure.ts +++ b/src/commands/configure.ts @@ -159,10 +159,15 @@ async function promptGatewayConfig( await select({ message: "Gateway auth", options: [ - { value: "off", label: "Off (loopback only)" }, - { value: "token", label: "Token" }, + { + value: "off", + label: "Off (loopback only)", + hint: "Not recommended unless you fully trust local processes", + }, + { value: "token", label: "Token", hint: "Recommended default" }, { value: "password", label: "Password" }, ], + initialValue: "token", }), runtime, ) as "off" | "token" | "password"; diff --git a/src/commands/doctor-prompter.ts b/src/commands/doctor-prompter.ts index 99f7d5a4f..770230548 100644 --- a/src/commands/doctor-prompter.ts +++ b/src/commands/doctor-prompter.ts @@ -14,6 +14,7 @@ export type DoctorOptions = { deep?: boolean; repair?: boolean; force?: boolean; + generateGatewayToken?: boolean; }; export type DoctorPrompter = { diff --git a/src/commands/onboard-non-interactive.ts b/src/commands/onboard-non-interactive.ts index c37f35cd7..a600edd6a 100644 --- a/src/commands/onboard-non-interactive.ts +++ b/src/commands/onboard-non-interactive.ts @@ -384,7 +384,7 @@ export async function runNonInteractiveOnboarding( ? (opts.gatewayPort as number) : resolveGatewayPort(baseConfig); let bind = opts.gatewayBind ?? "loopback"; - let authMode = opts.gatewayAuth ?? "off"; + let authMode = opts.gatewayAuth ?? "token"; const tailscaleMode = opts.tailscale ?? "off"; const tailscaleResetOnExit = Boolean(opts.tailscaleResetOnExit); diff --git a/src/config/legacy.ts b/src/config/legacy.ts index d90de4a09..3f895079b 100644 --- a/src/config/legacy.ts +++ b/src/config/legacy.ts @@ -46,6 +46,33 @@ const mergeMissing = ( } }; +const AUDIO_TRANSCRIPTION_CLI_ALLOWLIST = new Set(["whisper"]); + +const mapLegacyAudioTranscription = ( + value: unknown, +): Record | null => { + const transcriber = getRecord(value); + const command = Array.isArray(transcriber?.command) + ? transcriber?.command + : null; + if (!command || command.length === 0) return null; + const rawExecutable = String(command[0] ?? "").trim(); + if (!rawExecutable) return null; + const executableName = rawExecutable.split(/[\\/]/).pop() ?? rawExecutable; + if (!AUDIO_TRANSCRIPTION_CLI_ALLOWLIST.has(executableName)) return null; + + const args = command.slice(1).map((part) => String(part)); + const timeoutSeconds = + typeof transcriber?.timeoutSeconds === "number" + ? transcriber?.timeoutSeconds + : undefined; + + const result: Record = {}; + if (args.length > 0) result.args = args; + if (timeoutSeconds !== undefined) result.timeoutSeconds = timeoutSeconds; + return result; +}; + const getAgentsList = (agents: Record | null) => { const list = agents?.list; return Array.isArray(list) ? list : []; @@ -137,7 +164,7 @@ const LEGACY_CONFIG_RULES: LegacyConfigRule[] = [ { path: ["routing", "transcribeAudio"], message: - "routing.transcribeAudio was moved; use audio.transcription instead (run `clawdbot doctor` to migrate).", + "routing.transcribeAudio was moved; use tools.audio.transcription instead (run `clawdbot doctor` to migrate).", }, { path: ["telegram", "requireMention"], @@ -701,18 +728,57 @@ const LEGACY_CONFIG_MIGRATIONS: LegacyConfigMigration[] = [ } if (routing.transcribeAudio !== undefined) { - const audio = ensureRecord(raw, "audio"); - if (audio.transcription === undefined) { - audio.transcription = routing.transcribeAudio; - changes.push("Moved routing.transcribeAudio → audio.transcription."); + const mapped = mapLegacyAudioTranscription(routing.transcribeAudio); + if (mapped) { + const tools = ensureRecord(raw, "tools"); + const toolsAudio = ensureRecord(tools, "audio"); + if (toolsAudio.transcription === undefined) { + toolsAudio.transcription = mapped; + changes.push( + "Moved routing.transcribeAudio → tools.audio.transcription.", + ); + } else { + changes.push( + "Removed routing.transcribeAudio (tools.audio.transcription already set).", + ); + } } else { changes.push( - "Removed routing.transcribeAudio (audio.transcription already set).", + "Removed routing.transcribeAudio (unsupported transcription CLI).", ); } delete routing.transcribeAudio; } + const audio = getRecord(raw.audio); + if (audio?.transcription !== undefined) { + const mapped = mapLegacyAudioTranscription(audio.transcription); + if (mapped) { + const tools = ensureRecord(raw, "tools"); + const toolsAudio = ensureRecord(tools, "audio"); + if (toolsAudio.transcription === undefined) { + toolsAudio.transcription = mapped; + changes.push( + "Moved audio.transcription → tools.audio.transcription.", + ); + } else { + changes.push( + "Removed audio.transcription (tools.audio.transcription already set).", + ); + } + delete audio.transcription; + if (Object.keys(audio).length === 0) delete raw.audio; + else raw.audio = audio; + } else { + delete audio.transcription; + changes.push( + "Removed audio.transcription (unsupported transcription CLI).", + ); + if (Object.keys(audio).length === 0) delete raw.audio; + else raw.audio = audio; + } + } + if (Object.keys(routing).length === 0) { delete raw.routing; } diff --git a/src/config/types.ts b/src/config/types.ts index aabe7df39..e223853aa 100644 --- a/src/config/types.ts +++ b/src/config/types.ts @@ -915,6 +915,13 @@ export type AgentToolsConfig = { export type ToolsConfig = { allow?: string[]; deny?: string[]; + audio?: { + transcription?: { + /** CLI args (template-enabled). */ + args?: string[]; + timeoutSeconds?: number; + }; + }; agentToAgent?: { /** Enable agent-to-agent messaging tools. Default: false. */ enabled?: boolean; @@ -1023,6 +1030,7 @@ export type BroadcastConfig = { }; export type AudioConfig = { + /** @deprecated Use tools.audio.transcription instead. */ transcription?: { // Optional CLI to turn inbound audio into text; templated args, must output transcript to stdout. command: string[]; diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index 081c664d0..5fac4433d 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -1,6 +1,7 @@ import { z } from "zod"; import { parseDurationMs } from "../cli/parse-duration.js"; +import { isSafeExecutableValue } from "../infra/exec-safety.js"; const ModelApiSchema = z.union([ z.literal("openai-completions"), @@ -179,7 +180,16 @@ const QueueSchema = z const TranscribeAudioSchema = z .object({ - command: z.array(z.string()), + command: z.array(z.string()).superRefine((value, ctx) => { + const executable = value[0]; + if (!isSafeExecutableValue(executable)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: [0], + message: "expected safe executable name or path", + }); + } + }), timeoutSeconds: z.number().int().positive().optional(), }) .optional(); @@ -188,6 +198,17 @@ const HexColorSchema = z .string() .regex(/^#?[0-9a-fA-F]{6}$/, "expected hex color (RRGGBB)"); +const ExecutableTokenSchema = z + .string() + .refine(isSafeExecutableValue, "expected safe executable name or path"); + +const ToolsAudioTranscriptionSchema = z + .object({ + args: z.array(z.string()).optional(), + timeoutSeconds: z.number().int().positive().optional(), + }) + .optional(); + const TelegramTopicSchema = z.object({ requireMention: z.boolean().optional(), skills: z.array(z.string()).optional(), @@ -422,7 +443,7 @@ const SignalAccountSchemaBase = z.object({ httpUrl: z.string().optional(), httpHost: z.string().optional(), httpPort: z.number().int().positive().optional(), - cliPath: z.string().optional(), + cliPath: ExecutableTokenSchema.optional(), autoStart: z.boolean().optional(), receiveMode: z.union([z.literal("on-start"), z.literal("manual")]).optional(), ignoreAttachments: z.boolean().optional(), @@ -470,7 +491,7 @@ const IMessageAccountSchemaBase = z.object({ name: z.string().optional(), capabilities: z.array(z.string()).optional(), enabled: z.boolean().optional(), - cliPath: z.string().optional(), + cliPath: ExecutableTokenSchema.optional(), dbPath: z.string().optional(), service: z .union([z.literal("imessage"), z.literal("sms"), z.literal("auto")]) @@ -819,6 +840,11 @@ const ToolsSchema = z .object({ allow: z.array(z.string()).optional(), deny: z.array(z.string()).optional(), + audio: z + .object({ + transcription: ToolsAudioTranscriptionSchema, + }) + .optional(), agentToAgent: z .object({ enabled: z.boolean().optional(),