fix: update gateway auth docs and clients

This commit is contained in:
Peter Steinberger
2026-01-11 01:51:07 +01:00
parent d33285a9cd
commit b0b4b33b6b
28 changed files with 283 additions and 67 deletions

View File

@@ -41,6 +41,7 @@
### Fixes
- CLI/Status: expand tables to full terminal width; improve update + daemon summary lines; keep `status --all` gateway log tail pasteable.
- WhatsApp: detect @lid mentions in groups using authDir reverse mapping + resolve self JID E.164 for mention gating. (#692) — thanks @peschee.
- Gateway/Auth: default to token auth on loopback during onboarding, add doctor token generation flow, and tighten audio transcription config to Whisper-only.
## 2026.1.10

View File

@@ -182,7 +182,7 @@ final class ControlChannel {
{
let reason = urlErr.failureURLString ?? urlErr.localizedDescription
return
"Gateway rejected token; set CLAWDBOT_GATEWAY_TOKEN in the mac app environment " +
"Gateway rejected token; set gateway.auth.token (or CLAWDBOT_GATEWAY_TOKEN) " +
"or clear it on the gateway. " +
"Reason: \(reason)"
}

View File

@@ -26,7 +26,13 @@ actor GatewayEndpointStore {
static let live = Deps(
mode: { await MainActor.run { AppStateStore.shared.connectionMode } },
token: { ProcessInfo.processInfo.environment["CLAWDBOT_GATEWAY_TOKEN"] },
token: {
let root = ClawdbotConfigFile.loadDict()
return GatewayEndpointStore.resolveGatewayToken(
isRemote: CommandResolver.connectionModeIsRemote(),
root: root,
env: ProcessInfo.processInfo.environment)
},
password: {
let root = ClawdbotConfigFile.loadDict()
return GatewayEndpointStore.resolveGatewayPassword(
@@ -83,6 +89,40 @@ actor GatewayEndpointStore {
return nil
}
private static func resolveGatewayToken(
isRemote: Bool,
root: [String: Any],
env: [String: String]) -> String?
{
let raw = env["CLAWDBOT_GATEWAY_TOKEN"] ?? ""
let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines)
if !trimmed.isEmpty {
return trimmed
}
if isRemote {
if let gateway = root["gateway"] as? [String: Any],
let remote = gateway["remote"] as? [String: Any],
let token = remote["token"] as? String
{
let value = token.trimmingCharacters(in: .whitespacesAndNewlines)
if !value.isEmpty {
return value
}
}
return nil
}
if let gateway = root["gateway"] as? [String: Any],
let auth = gateway["auth"] as? [String: Any],
let token = auth["token"] as? String
{
let value = token.trimmingCharacters(in: .whitespacesAndNewlines)
if !value.isEmpty {
return value
}
}
return nil
}
private let deps: Deps
private let logger = Logger(subsystem: "com.clawdbot", category: "gateway-endpoint")

View File

@@ -204,7 +204,20 @@ enum GatewayLaunchAgentManager {
private static func preferredGatewayToken() -> String? {
let raw = ProcessInfo.processInfo.environment["CLAWDBOT_GATEWAY_TOKEN"] ?? ""
let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines)
return trimmed.isEmpty ? nil : trimmed
if !trimmed.isEmpty {
return trimmed
}
let root = ClawdbotConfigFile.loadDict()
if let gateway = root["gateway"] as? [String: Any],
let auth = gateway["auth"] as? [String: Any],
let token = auth["token"] as? String
{
let value = token.trimmingCharacters(in: .whitespacesAndNewlines)
if !value.isEmpty {
return value
}
}
return nil
}
private static func preferredGatewayPassword() -> String? {

View File

@@ -239,7 +239,7 @@ final class GatewayProcessManager {
let lower = message.lowercased()
if self.isGatewayAuthFailure(error) {
return """
Gateway on port \(port) rejected auth. Set CLAWDBOT_GATEWAY_TOKEN in the app \
Gateway on port \(port) rejected auth. Set gateway.auth.token (or CLAWDBOT_GATEWAY_TOKEN) \
to match the running gateway (or clear it on the gateway) and retry.
"""
}

View File

@@ -117,10 +117,16 @@ Save to `~/.clawdbot/clawdbot.json` and you can DM the bot from that number.
imessage: "collect",
webchat: "collect"
}
},
transcribeAudio: {
command: ["whisper", "--model", "base"],
timeoutSeconds: 120
}
},
// Tooling
tools: {
audio: {
transcription: {
args: ["--model", "base", "{{MediaPath}}"],
timeoutSeconds: 120
}
}
},

View File

@@ -1790,7 +1790,7 @@ Defaults:
port: 18789, // WS + HTTP multiplex
bind: "loopback",
// controlUi: { enabled: true, basePath: "/clawdbot" }
// auth: { mode: "token", token: "your-token" } // token is for multi-machine CLI access
// auth: { mode: "token", token: "your-token" } // token gates WS + Control UI access
// tailscale: { mode: "off" | "serve" | "funnel" }
}
}
@@ -1813,6 +1813,7 @@ Notes:
- OpenAI Chat Completions endpoint: **disabled by default**; enable with `gateway.http.endpoints.chatCompletions.enabled: true`.
- Precedence: `--port` > `CLAWDBOT_GATEWAY_PORT` > `gateway.port` > default `18789`.
- Non-loopback binds (`lan`/`tailnet`/`auto`) require auth. Use `gateway.auth.token` (or `CLAWDBOT_GATEWAY_TOKEN`).
- The onboarding wizard generates a gateway token by default (even on loopback).
- `gateway.remote.token` is **only** for remote CLI calls; it does not enable local gateway auth. `gateway.token` is ignored.
Auth and Tailscale:
@@ -2096,7 +2097,7 @@ clawdbot dns setup --apply
## Template variables
Template placeholders are expanded in `audio.transcription.command` (and any future templated command fields).
Template placeholders are expanded in `tools.audio.transcription.args` (and any future templated argument fields).
| Variable | Description |
|----------|-------------|

View File

@@ -71,6 +71,7 @@ cat ~/.clawdbot/clawdbot.json
- Gateway runtime best-practice checks (Node vs Bun, version-manager paths).
- Gateway port collision diagnostics (default `18789`).
- Security warnings for open DM policies.
- Gateway auth warnings when no `gateway.auth.token` is set (offers token generation).
- systemd linger check on Linux.
- Writes updated config + wizard metadata.
@@ -102,7 +103,7 @@ Current migrations:
- `routing.bindings` → top-level `bindings`
- `routing.agents`/`routing.defaultAgentId``agents.list` + `agents.list[].default`
- `routing.agentToAgent``tools.agentToAgent`
- `routing.transcribeAudio``audio.transcription`
- `routing.transcribeAudio``tools.audio.transcription`
- `identity``agents.list[].identity`
- `agent.*``agents.defaults` + `tools.*` (tools/elevated/bash/sandbox/subagents)
- `agent.model`/`allowedModels`/`modelAliases`/`modelFallbacks`/`imageModelFallbacks`
@@ -186,11 +187,16 @@ gateway stays alive after logout.
Doctor prints a quick summary of eligible/missing/blocked skills for the current
workspace.
### 11) Gateway health check + restart
### 11) Gateway auth checks (local token)
Doctor warns when `gateway.auth` is missing on a local gateway and offers to
generate a token. Use `clawdbot doctor --generate-gateway-token` to force token
creation in automation.
### 12) Gateway health check + restart
Doctor runs a health check and offers to restart the gateway when it looks
unhealthy.
### 12) Supervisor config audit + repair
### 13) Supervisor config audit + repair
Doctor checks the installed supervisor config (launchd/systemd/schtasks) for
missing or outdated defaults (e.g., systemd network-online dependencies and
restart delay). When it finds a mismatch, it recommends an update and can
@@ -203,24 +209,24 @@ Notes:
- `clawdbot doctor --repair --force` overwrites custom supervisor configs.
- You can always force a full rewrite via `clawdbot daemon install --force`.
### 13) Gateway runtime + port diagnostics
### 14) Gateway runtime + port diagnostics
Doctor inspects the daemon runtime (PID, last exit status) and warns when the
service is installed but not actually running. It also checks for port collisions
on the gateway port (default `18789`) and reports likely causes (gateway already
running, SSH tunnel).
### 14) Gateway runtime best practices
### 15) Gateway runtime best practices
Doctor warns when the gateway service runs on Bun or a version-managed Node path
(`nvm`, `fnm`, `volta`, `asdf`, etc.). WhatsApp + Telegram providers require Node,
and version-manager paths can break after upgrades because the daemon does not
load your shell init. Doctor offers to migrate to a system Node install when
available (Homebrew/apt/choco).
### 15) Config write + wizard metadata
### 16) Config write + wizard metadata
Doctor persists any config changes and stamps wizard metadata to record the
doctor run.
### 16) Workspace tips (backup + memory system)
### 17) Workspace tips (backup + memory system)
Doctor suggests a workspace memory system when missing and prints a backup tip
if the workspace is not already under git.

View File

@@ -35,7 +35,8 @@ pnpm gateway:watch
- `--force` uses `lsof` to find listeners on the chosen port, sends SIGTERM, logs what it killed, then starts the gateway (fails fast if `lsof` is missing).
- If you run under a supervisor (launchd/systemd/mac app child-process mode), a stop/restart typically sends **SIGTERM**; older builds may surface this as `pnpm` `ELIFECYCLE` exit code **143** (SIGTERM), which is a normal shutdown, not a crash.
- **SIGUSR1** triggers an in-process restart (no external supervisor required). This is what the `gateway` agent tool uses.
- Optional shared secret: pass `--token <value>` or set `CLAWDBOT_GATEWAY_TOKEN` to require clients to send `connect.params.auth.token`.
- Gateway auth: set `gateway.auth.mode=token` + `gateway.auth.token` (or pass `--token <value>` / `CLAWDBOT_GATEWAY_TOKEN`) to require clients to send `connect.params.auth.token`.
- The wizard now generates a token by default, even on loopback.
- Port precedence: `--port` > `CLAWDBOT_GATEWAY_PORT` > `gateway.port` > default `18789`.
## Remote access

View File

@@ -110,6 +110,31 @@ Keep config + state private on the gateway host:
`clawdbot doctor` can warn and offer to tighten these permissions.
### 0.5) Lock down the Gateway WebSocket (local auth)
Gateway auth is **only** enforced when you set `gateway.auth`. If its unset,
loopback WS clients are unauthenticated — any local process can connect and call
`config.apply`.
The onboarding wizard now generates a token by default (even for loopback) so
local clients must authenticate. If you skip the wizard or remove auth, youre
back to open loopback.
Set a token so **all** WS clients must authenticate:
```json5
{
gateway: {
auth: { mode: "token", token: "your-token" }
}
}
```
Doctor can generate one for you: `clawdbot doctor --generate-gateway-token`.
Note: `gateway.remote.token` is **only** for remote CLI calls; it does not
protect local WS access.
### 1) DMs: pairing by default
```json5

View File

@@ -68,6 +68,7 @@ Most operations flow through the **Gateway** (`clawdbot gateway`), a single long
- **One Gateway per host**: it is the only process allowed to own the WhatsApp Web session.
- **Loopback-first**: Gateway WS defaults to `ws://127.0.0.1:18789`.
- The wizard now generates a gateway token by default (even for loopback).
- For Tailnet access, run `clawdbot gateway --bind tailnet --token ...` (token is required for non-loopback binds).
- **Bridge for nodes**: optional LAN/tailnet-facing bridge on `tcp://0.0.0.0:18790` for paired nodes (Bonjour-discoverable).
- **Canvas host**: HTTP file server on `canvasHost.port` (default `18793`), serving `/__clawdbot__/canvas/` for node WebViews; see [Gateway configuration](/gateway/configuration) (`canvasHost`).

View File

@@ -6,38 +6,30 @@ read_when:
# Audio / Voice Notes — 2025-12-05
## What works
- **Optional transcription**: If `audio.transcription.command` is set in `~/.clawdbot/clawdbot.json`, Clawdbot will:
- **Optional transcription**: If `tools.audio.transcription` is set in `~/.clawdbot/clawdbot.json`, Clawdbot will:
1) Download inbound audio to a temp path when WhatsApp only provides a URL.
2) Run the configured CLI (templated with `{{MediaPath}}`), expecting transcript on stdout.
2) Run the configured CLI args (templated with `{{MediaPath}}`), expecting transcript on stdout.
3) Replace `Body` with the transcript, set `{{Transcript}}`, and prepend the original media path plus a `Transcript:` section in the command prompt so models see both.
4) Continue through the normal auto-reply pipeline (templating, sessions, Pi command).
- **Verbose logging**: In `--verbose`, we log when transcription runs and when the transcript replaces the body.
## Config example (OpenAI Whisper CLI)
Requires `OPENAI_API_KEY` in env and `openai` CLI installed:
## Config example (Whisper CLI)
Requires `whisper` CLI installed:
```json5
{
audio: {
transcription: {
command: [
"openai",
"api",
"audio.transcriptions.create",
"-m",
"whisper-1",
"-f",
"{{MediaPath}}",
"--response-format",
"text"
],
timeoutSeconds: 45
tools: {
audio: {
transcription: {
args: ["--model", "base", "{{MediaPath}}"],
timeoutSeconds: 45
}
}
}
}
```
## Notes & limits
- We dont ship a transcriber; you opt in with any CLI that prints text to stdout (Whisper cloud, whisper.cpp, vosk, Deepgram, etc.).
- We dont ship a transcriber; you opt in with the Whisper CLI on your PATH.
- Size guard: inbound audio must be ≤5MB (matches the temp media store and transcript pipeline).
- Outbound caps: web send supports audio/voice up to 16MB (sent as a voice note with `ptt: true`).
- If transcription fails, we fall back to the original body/media note; replies still go through.

View File

@@ -38,7 +38,7 @@ Clawdbot is now **web-only** (Baileys). This document captures the current media
- `{{MediaUrl}}` pseudo-URL for the inbound media.
- `{{MediaPath}}` local temp path written before running the command.
- When a per-session Docker sandbox is enabled, inbound media is copied into the sandbox workspace and `MediaPath`/`MediaUrl` are rewritten to a relative path like `media/inbound/<filename>`.
- Audio transcription (if configured) runs before templating and can replace `Body` with the transcript.
- Audio transcription (if configured via `tools.audio.transcription`) runs before templating and can replace `Body` with the transcript.
## Limits & Errors
- Images: ~6MB cap after recompression.

View File

@@ -198,6 +198,12 @@ Notes:
- `gateway.remote.token` is for **remote CLI calls** only; it does not enable local gateway auth.
- The Control UI authenticates via `connect.params.auth.token` (stored in app/UI settings). Avoid putting tokens in URLs.
### Why do I need a token on localhost now?
The wizard generates a gateway token by default (even on loopback) so **local WS clients must authenticate**. This blocks other local processes from calling the Gateway. Paste the token into the Control UI settings (or your client config) to connect.
If you **really** want open loopback, remove `gateway.auth` from your config. Doctor can generate a token for you any time: `clawdbot doctor --generate-gateway-token`.
### Do I have to restart after changing config?
The Gateway watches the config and supports hotreload:

View File

@@ -68,6 +68,7 @@ What youll choose:
- **Providers**: WhatsApp QR login, Telegram/Discord bot tokens, etc.
- **Daemon**: background install (launchd/systemd; WSL2 uses systemd)
- **Runtime**: Node (recommended; required for WhatsApp) or Bun (faster, but incompatible with WhatsApp)
- **Gateway token**: the wizard generates one by default (even on loopback) and stores it in `gateway.auth.token`.
Wizard doc: [Wizard](/start/wizard)
@@ -95,6 +96,7 @@ clawdbot gateway --port 18789 --verbose
```
Dashboard (local loopback): `http://127.0.0.1:18789/`
If a token is configured, paste it into the Control UI settings (stored as `connect.params.auth.token`).
⚠️ **WhatsApp + Bun warning:** Baileys (WhatsApp Web library) uses a WebSocket
path that is currently incompatible with Bun and can cause memory corruption on

View File

@@ -32,7 +32,8 @@ Where does the **Gateway** run?
- **Configure later:** skip setup and leave the app unconfigured.
Gateway auth tip:
- If you only use Clawdbot locally (loopback), auth can be **Off**.
- The wizard now generates a **token** even for loopback, so local WS clients must authenticate.
- If you disable auth, any local process can connect; use that only on fully trusted machines.
- Use a **token** for multimachine access or nonloopback binds.
## 2) Local-only auth (Anthropic OAuth)

View File

@@ -32,7 +32,7 @@ The wizard starts with **QuickStart** (defaults) vs **Advanced** (full control).
- Local gateway (loopback)
- Workspace default (or existing workspace)
- Gateway port **18789**
- Gateway auth **Off** (loopback only)
- Gateway auth **Token** (autogenerated, even on loopback)
- Tailscale exposure **Off**
- Telegram + WhatsApp DMs default to **allowlist** (youll be prompted for your phone number)
@@ -96,8 +96,9 @@ Tip: `--json` does **not** imply non-interactive mode. Use `--non-interactive` (
4) **Gateway**
- Port, bind, auth mode, tailscale exposure.
- Auth recommendation: keep **Off** for single-machine loopback setups. Use **Token** for multi-machine access or non-loopback binds.
- Nonloopback binds require auth.
- Auth recommendation: keep **Token** even for loopback so local WS clients must authenticate.
- Disable auth only if you fully trust every local process.
- Nonloopback binds still require auth.
5) **Providers**
- WhatsApp: optional QR login.
@@ -135,7 +136,7 @@ Remote mode configures a local client to connect to a Gateway elsewhere.
What youll set:
- Remote Gateway URL (`ws://...`)
- Optional token
- Token if the remote Gateway requires auth (recommended)
Notes:
- No remote installs or daemon changes are performed.

View File

@@ -25,6 +25,7 @@ Auth is supplied during the WebSocket handshake via:
- `connect.params.auth.token`
- `connect.params.auth.password`
The dashboard settings panel lets you store a token; passwords are not persisted.
The onboarding wizard generates a gateway token by default, so paste it here on first connect.
## What it can do (today)
- Chat with the model via Gateway WS (`chat.history`, `chat.send`, `chat.abort`)
@@ -65,9 +66,9 @@ Open:
- `https://<magicdns>/` (or your configured `gateway.controlUi.basePath`)
By default, the gateway trusts Tailscale identity headers in serve mode. You can still set
`CLAWDBOT_GATEWAY_TOKEN` or `gateway.auth` if you want a shared secret instead.
`gateway.auth` (or `CLAWDBOT_GATEWAY_TOKEN`) if you want a shared secret instead.
### Bind to tailnet + token (legacy)
### Bind to tailnet + token
```bash
clawdbot gateway --bind tailnet --token "$(openssl rand -hex 32)"

View File

@@ -56,13 +56,14 @@ clawdbot gateway
Open:
- `https://<magicdns>/` (or your configured `gateway.controlUi.basePath`)
### Tailnet bind + token (legacy)
### Tailnet bind + token
```json5
{
gateway: {
bind: "tailnet",
controlUi: { enabled: true }
controlUi: { enabled: true },
auth: { mode: "token", token: "your-token" }
}
}
```
@@ -70,7 +71,6 @@ Open:
Then start the gateway (token required for non-loopback binds):
```bash
export CLAWDBOT_GATEWAY_TOKEN="…your token…"
clawdbot gateway
```
@@ -91,7 +91,8 @@ Open:
## Security notes
- Binding the Gateway to a non-loopback address **requires** auth (`CLAWDBOT_GATEWAY_TOKEN` or `gateway.auth`).
- Binding the Gateway to a non-loopback address **requires** auth (`gateway.auth` or `CLAWDBOT_GATEWAY_TOKEN`).
- The wizard generates a gateway token by default (even on loopback).
- The UI sends `connect.params.auth.token` or `connect.params.auth.password`.
- Use `gateway.auth.allowTailscale: false` to require explicit credentials even in Serve mode.
- `gateway.tailscale.mode: "funnel"` requires `gateway.auth.mode: "password"` (shared password).

View File

@@ -92,7 +92,11 @@ import {
type VerboseLevel,
} from "./thinking.js";
import { SILENT_REPLY_TOKEN } from "./tokens.js";
import { isAudio, transcribeInboundAudio } from "./transcription.js";
import {
hasAudioTranscriptionConfig,
isAudio,
transcribeInboundAudio,
} from "./transcription.js";
import type { GetReplyOptions, ReplyPayload } from "./types.js";
export {
@@ -367,7 +371,7 @@ export async function getReplyFromConfig(
opts?.onTypingController?.(typing);
let transcribedText: string | undefined;
if (cfg.audio?.transcription && isAudio(ctx.MediaType)) {
if (hasAudioTranscriptionConfig(cfg) && isAudio(ctx.MediaType)) {
const transcribed = await transcribeInboundAudio(cfg, ctx, defaultRuntime);
if (transcribed?.text) {
transcribedText = transcribed.text;

View File

@@ -37,10 +37,12 @@ describe("transcribeInboundAudio", () => {
vi.stubGlobal("fetch", fetchMock);
const cfg = {
audio: {
transcription: {
command: ["echo", "{{MediaPath}}"],
timeoutSeconds: 5,
tools: {
audio: {
transcription: {
args: ["echo", "{{MediaPath}}"],
timeoutSeconds: 5,
},
},
},
};

View File

@@ -438,6 +438,11 @@ export function buildProgram() {
"Run without prompts (safe migrations only)",
false,
)
.option(
"--generate-gateway-token",
"Generate and configure a gateway token",
false,
)
.option("--deep", "Scan system services for extra gateway installs", false)
.action(async (opts) => {
try {
@@ -447,6 +452,7 @@ export function buildProgram() {
repair: Boolean(opts.repair),
force: Boolean(opts.force),
nonInteractive: Boolean(opts.nonInteractive),
generateGatewayToken: Boolean(opts.generateGatewayToken),
deep: Boolean(opts.deep),
});
} catch (err) {

View File

@@ -159,10 +159,15 @@ async function promptGatewayConfig(
await select({
message: "Gateway auth",
options: [
{ value: "off", label: "Off (loopback only)" },
{ value: "token", label: "Token" },
{
value: "off",
label: "Off (loopback only)",
hint: "Not recommended unless you fully trust local processes",
},
{ value: "token", label: "Token", hint: "Recommended default" },
{ value: "password", label: "Password" },
],
initialValue: "token",
}),
runtime,
) as "off" | "token" | "password";

View File

@@ -14,6 +14,7 @@ export type DoctorOptions = {
deep?: boolean;
repair?: boolean;
force?: boolean;
generateGatewayToken?: boolean;
};
export type DoctorPrompter = {

View File

@@ -384,7 +384,7 @@ export async function runNonInteractiveOnboarding(
? (opts.gatewayPort as number)
: resolveGatewayPort(baseConfig);
let bind = opts.gatewayBind ?? "loopback";
let authMode = opts.gatewayAuth ?? "off";
let authMode = opts.gatewayAuth ?? "token";
const tailscaleMode = opts.tailscale ?? "off";
const tailscaleResetOnExit = Boolean(opts.tailscaleResetOnExit);

View File

@@ -46,6 +46,33 @@ const mergeMissing = (
}
};
const AUDIO_TRANSCRIPTION_CLI_ALLOWLIST = new Set(["whisper"]);
const mapLegacyAudioTranscription = (
value: unknown,
): Record<string, unknown> | null => {
const transcriber = getRecord(value);
const command = Array.isArray(transcriber?.command)
? transcriber?.command
: null;
if (!command || command.length === 0) return null;
const rawExecutable = String(command[0] ?? "").trim();
if (!rawExecutable) return null;
const executableName = rawExecutable.split(/[\\/]/).pop() ?? rawExecutable;
if (!AUDIO_TRANSCRIPTION_CLI_ALLOWLIST.has(executableName)) return null;
const args = command.slice(1).map((part) => String(part));
const timeoutSeconds =
typeof transcriber?.timeoutSeconds === "number"
? transcriber?.timeoutSeconds
: undefined;
const result: Record<string, unknown> = {};
if (args.length > 0) result.args = args;
if (timeoutSeconds !== undefined) result.timeoutSeconds = timeoutSeconds;
return result;
};
const getAgentsList = (agents: Record<string, unknown> | null) => {
const list = agents?.list;
return Array.isArray(list) ? list : [];
@@ -137,7 +164,7 @@ const LEGACY_CONFIG_RULES: LegacyConfigRule[] = [
{
path: ["routing", "transcribeAudio"],
message:
"routing.transcribeAudio was moved; use audio.transcription instead (run `clawdbot doctor` to migrate).",
"routing.transcribeAudio was moved; use tools.audio.transcription instead (run `clawdbot doctor` to migrate).",
},
{
path: ["telegram", "requireMention"],
@@ -701,18 +728,57 @@ const LEGACY_CONFIG_MIGRATIONS: LegacyConfigMigration[] = [
}
if (routing.transcribeAudio !== undefined) {
const audio = ensureRecord(raw, "audio");
if (audio.transcription === undefined) {
audio.transcription = routing.transcribeAudio;
changes.push("Moved routing.transcribeAudio → audio.transcription.");
const mapped = mapLegacyAudioTranscription(routing.transcribeAudio);
if (mapped) {
const tools = ensureRecord(raw, "tools");
const toolsAudio = ensureRecord(tools, "audio");
if (toolsAudio.transcription === undefined) {
toolsAudio.transcription = mapped;
changes.push(
"Moved routing.transcribeAudio → tools.audio.transcription.",
);
} else {
changes.push(
"Removed routing.transcribeAudio (tools.audio.transcription already set).",
);
}
} else {
changes.push(
"Removed routing.transcribeAudio (audio.transcription already set).",
"Removed routing.transcribeAudio (unsupported transcription CLI).",
);
}
delete routing.transcribeAudio;
}
const audio = getRecord(raw.audio);
if (audio?.transcription !== undefined) {
const mapped = mapLegacyAudioTranscription(audio.transcription);
if (mapped) {
const tools = ensureRecord(raw, "tools");
const toolsAudio = ensureRecord(tools, "audio");
if (toolsAudio.transcription === undefined) {
toolsAudio.transcription = mapped;
changes.push(
"Moved audio.transcription → tools.audio.transcription.",
);
} else {
changes.push(
"Removed audio.transcription (tools.audio.transcription already set).",
);
}
delete audio.transcription;
if (Object.keys(audio).length === 0) delete raw.audio;
else raw.audio = audio;
} else {
delete audio.transcription;
changes.push(
"Removed audio.transcription (unsupported transcription CLI).",
);
if (Object.keys(audio).length === 0) delete raw.audio;
else raw.audio = audio;
}
}
if (Object.keys(routing).length === 0) {
delete raw.routing;
}

View File

@@ -915,6 +915,13 @@ export type AgentToolsConfig = {
export type ToolsConfig = {
allow?: string[];
deny?: string[];
audio?: {
transcription?: {
/** CLI args (template-enabled). */
args?: string[];
timeoutSeconds?: number;
};
};
agentToAgent?: {
/** Enable agent-to-agent messaging tools. Default: false. */
enabled?: boolean;
@@ -1023,6 +1030,7 @@ export type BroadcastConfig = {
};
export type AudioConfig = {
/** @deprecated Use tools.audio.transcription instead. */
transcription?: {
// Optional CLI to turn inbound audio into text; templated args, must output transcript to stdout.
command: string[];

View File

@@ -1,6 +1,7 @@
import { z } from "zod";
import { parseDurationMs } from "../cli/parse-duration.js";
import { isSafeExecutableValue } from "../infra/exec-safety.js";
const ModelApiSchema = z.union([
z.literal("openai-completions"),
@@ -179,7 +180,16 @@ const QueueSchema = z
const TranscribeAudioSchema = z
.object({
command: z.array(z.string()),
command: z.array(z.string()).superRefine((value, ctx) => {
const executable = value[0];
if (!isSafeExecutableValue(executable)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: [0],
message: "expected safe executable name or path",
});
}
}),
timeoutSeconds: z.number().int().positive().optional(),
})
.optional();
@@ -188,6 +198,17 @@ const HexColorSchema = z
.string()
.regex(/^#?[0-9a-fA-F]{6}$/, "expected hex color (RRGGBB)");
const ExecutableTokenSchema = z
.string()
.refine(isSafeExecutableValue, "expected safe executable name or path");
const ToolsAudioTranscriptionSchema = z
.object({
args: z.array(z.string()).optional(),
timeoutSeconds: z.number().int().positive().optional(),
})
.optional();
const TelegramTopicSchema = z.object({
requireMention: z.boolean().optional(),
skills: z.array(z.string()).optional(),
@@ -422,7 +443,7 @@ const SignalAccountSchemaBase = z.object({
httpUrl: z.string().optional(),
httpHost: z.string().optional(),
httpPort: z.number().int().positive().optional(),
cliPath: z.string().optional(),
cliPath: ExecutableTokenSchema.optional(),
autoStart: z.boolean().optional(),
receiveMode: z.union([z.literal("on-start"), z.literal("manual")]).optional(),
ignoreAttachments: z.boolean().optional(),
@@ -470,7 +491,7 @@ const IMessageAccountSchemaBase = z.object({
name: z.string().optional(),
capabilities: z.array(z.string()).optional(),
enabled: z.boolean().optional(),
cliPath: z.string().optional(),
cliPath: ExecutableTokenSchema.optional(),
dbPath: z.string().optional(),
service: z
.union([z.literal("imessage"), z.literal("sms"), z.literal("auto")])
@@ -819,6 +840,11 @@ const ToolsSchema = z
.object({
allow: z.array(z.string()).optional(),
deny: z.array(z.string()).optional(),
audio: z
.object({
transcription: ToolsAudioTranscriptionSchema,
})
.optional(),
agentToAgent: z
.object({
enabled: z.boolean().optional(),