diff --git a/src/auto-reply/transcription.ts b/src/auto-reply/transcription.ts index 462a07171..b303ed1be 100644 --- a/src/auto-reply/transcription.ts +++ b/src/auto-reply/transcription.ts @@ -9,19 +9,35 @@ import { runExec } from "../process/exec.js"; import type { RuntimeEnv } from "../runtime.js"; import { applyTemplate, type MsgContext } from "./templating.js"; +const AUDIO_TRANSCRIPTION_BINARY = "whisper"; + export function isAudio(mediaType?: string | null) { return Boolean(mediaType?.startsWith("audio")); } +export function hasAudioTranscriptionConfig(cfg: ClawdbotConfig): boolean { + if (cfg.tools?.audio?.transcription?.args?.length) return true; + return Boolean(cfg.audio?.transcription?.command?.length); +} + export async function transcribeInboundAudio( cfg: ClawdbotConfig, ctx: MsgContext, runtime: RuntimeEnv, ): Promise<{ text: string } | undefined> { - const transcriber = cfg.audio?.transcription; - if (!transcriber?.command?.length) return undefined; + const toolTranscriber = cfg.tools?.audio?.transcription; + const legacyTranscriber = cfg.audio?.transcription; + const hasToolTranscriber = Boolean(toolTranscriber?.args?.length); + if (!hasToolTranscriber && !legacyTranscriber?.command?.length) { + return undefined; + } - const timeoutMs = Math.max((transcriber.timeoutSeconds ?? 45) * 1000, 1_000); + const timeoutMs = Math.max( + (toolTranscriber?.timeoutSeconds ?? + legacyTranscriber?.timeoutSeconds ?? + 45) * 1000, + 1_000, + ); let tmpPath: string | undefined; let mediaPath = ctx.MediaPath; try { @@ -45,9 +61,13 @@ export async function transcribeInboundAudio( if (!mediaPath) return undefined; const templCtx: MsgContext = { ...ctx, MediaPath: mediaPath }; - const argv = transcriber.command.map((part) => - applyTemplate(part, templCtx), - ); + const argv = hasToolTranscriber + ? [AUDIO_TRANSCRIPTION_BINARY, ...(toolTranscriber?.args ?? [])].map( + (part, index) => (index === 0 ? part : applyTemplate(part, templCtx)), + ) + : (legacyTranscriber?.command ?? []).map((part) => + applyTemplate(part, templCtx), + ); if (shouldLogVerbose()) { logVerbose(`Transcribing audio via command: ${argv.join(" ")}`); } diff --git a/src/commands/doctor.test.ts b/src/commands/doctor.test.ts index a3c6a1312..dc2e7581d 100644 --- a/src/commands/doctor.test.ts +++ b/src/commands/doctor.test.ts @@ -288,6 +288,7 @@ vi.mock("./onboard-helpers.js", () => ({ DEFAULT_WORKSPACE: "/tmp", guardCancel: (value: unknown) => value, printWizardHeader: vi.fn(), + randomToken: vi.fn(() => "test-gateway-token"), })); vi.mock("./doctor-state-migrations.js", () => ({ @@ -749,7 +750,10 @@ describe("doctor", () => { return Promise.resolve({ stdout: "", stderr: "" }); }); - confirm.mockResolvedValueOnce(false).mockResolvedValueOnce(true); + confirm + .mockResolvedValueOnce(false) // skip gateway token prompt + .mockResolvedValueOnce(false) // skip build + .mockResolvedValueOnce(true); // accept legacy fallback const { doctorCommand } = await import("./doctor.js"); const runtime = { diff --git a/src/commands/doctor.ts b/src/commands/doctor.ts index 4d3d985e8..de895de9a 100644 --- a/src/commands/doctor.ts +++ b/src/commands/doctor.ts @@ -84,7 +84,11 @@ import { } from "./doctor-workspace.js"; import { healthCommand } from "./health.js"; import { formatHealthCheckFailure } from "./health-format.js"; -import { applyWizardMetadata, printWizardHeader } from "./onboard-helpers.js"; +import { + applyWizardMetadata, + printWizardHeader, + randomToken, +} from "./onboard-helpers.js"; import { ensureSystemdUserLingerInteractive } from "./systemd-linger.js"; const intro = (message: string) => @@ -279,6 +283,45 @@ export async function doctorCommand( if (gatewayDetails.remoteFallbackNote) { note(gatewayDetails.remoteFallbackNote, "Gateway"); } + if (resolveMode(cfg) === "local") { + const authMode = cfg.gateway?.auth?.mode; + const token = + typeof cfg.gateway?.auth?.token === "string" + ? cfg.gateway?.auth?.token.trim() + : ""; + const needsToken = + authMode !== "password" && (authMode !== "token" || !token); + if (needsToken) { + note( + "Gateway auth is off or missing a token. Token auth is now the recommended default (including loopback).", + "Gateway auth", + ); + const shouldSetToken = + options.generateGatewayToken === true + ? true + : options.nonInteractive === true + ? false + : await prompter.confirmRepair({ + message: "Generate and configure a gateway token now?", + initialValue: true, + }); + if (shouldSetToken) { + const nextToken = randomToken(); + cfg = { + ...cfg, + gateway: { + ...cfg.gateway, + auth: { + ...cfg.gateway?.auth, + mode: "token", + token: nextToken, + }, + }, + }; + note("Gateway token configured.", "Gateway auth"); + } + } + } const legacyState = await detectLegacyStateMigrations({ cfg }); if (legacyState.preview.length > 0) { diff --git a/src/commands/onboard-helpers.ts b/src/commands/onboard-helpers.ts index fea853718..7b7ef066d 100644 --- a/src/commands/onboard-helpers.ts +++ b/src/commands/onboard-helpers.ts @@ -14,6 +14,7 @@ import { CONFIG_PATH_CLAWDBOT } from "../config/config.js"; import { resolveSessionTranscriptsDirForAgent } from "../config/sessions.js"; import { callGateway } from "../gateway/call.js"; import { normalizeControlUiBasePath } from "../gateway/control-ui.js"; +import { isSafeExecutableValue } from "../infra/exec-safety.js"; import { pickPrimaryTailnetIPv4 } from "../infra/tailnet.js"; import { runCommandWithTimeout } from "../process/exec.js"; import type { RuntimeEnv } from "../runtime.js"; @@ -288,8 +289,14 @@ export async function handleReset( export async function detectBinary(name: string): Promise { if (!name?.trim()) return false; + if (!isSafeExecutableValue(name)) return false; const resolved = name.startsWith("~") ? resolveUserPath(name) : name; - if (path.isAbsolute(resolved) || resolved.startsWith(".")) { + if ( + path.isAbsolute(resolved) || + resolved.startsWith(".") || + resolved.includes("/") || + resolved.includes("\\") + ) { try { await fs.access(resolved); return true; @@ -301,7 +308,7 @@ export async function detectBinary(name: string): Promise { const command = process.platform === "win32" ? ["where", name] - : ["/usr/bin/env", "sh", "-lc", `command -v ${name}`]; + : ["/usr/bin/env", "which", name]; try { const result = await runCommandWithTimeout(command, { timeoutMs: 2000 }); return result.code === 0 && result.stdout.trim().length > 0; diff --git a/src/config/config.test.ts b/src/config/config.test.ts index 13c114451..fc337bcca 100644 --- a/src/config/config.test.ts +++ b/src/config/config.test.ts @@ -975,7 +975,10 @@ describe("legacy config detection", () => { routing: { agentToAgent: { enabled: true, allow: ["main"] }, queue: { mode: "queue", cap: 3 }, - transcribeAudio: { command: ["echo", "hi"], timeoutSeconds: 2 }, + transcribeAudio: { + command: ["whisper", "--model", "base"], + timeoutSeconds: 2, + }, }, }); expect(res.changes).toContain( @@ -983,7 +986,7 @@ describe("legacy config detection", () => { ); expect(res.changes).toContain("Moved routing.queue → messages.queue."); expect(res.changes).toContain( - "Moved routing.transcribeAudio → audio.transcription.", + "Moved routing.transcribeAudio → tools.audio.transcription.", ); expect(res.config?.tools?.agentToAgent).toEqual({ enabled: true, @@ -993,8 +996,8 @@ describe("legacy config detection", () => { mode: "queue", cap: 3, }); - expect(res.config?.audio?.transcription).toEqual({ - command: ["echo", "hi"], + expect(res.config?.tools?.audio?.transcription).toEqual({ + args: ["--model", "base"], timeoutSeconds: 2, }); expect(res.config?.routing).toBeUndefined(); @@ -1287,6 +1290,44 @@ describe("legacy config detection", () => { } }); + it("rejects unsafe executable config values", async () => { + vi.resetModules(); + const { validateConfigObject } = await import("./config.js"); + const res = validateConfigObject({ + imessage: { cliPath: "imsg; rm -rf /" }, + tools: { audio: { transcription: { args: ["--model", "base"] } } }, + }); + expect(res.ok).toBe(false); + if (!res.ok) { + expect(res.issues.some((i) => i.path === "imessage.cliPath")).toBe(true); + } + }); + + it("accepts tools audio transcription without cli", async () => { + vi.resetModules(); + const { validateConfigObject } = await import("./config.js"); + const res = validateConfigObject({ + tools: { audio: { transcription: { args: ["--model", "base"] } } }, + }); + expect(res.ok).toBe(true); + }); + + it("accepts path-like executable values with spaces", async () => { + vi.resetModules(); + const { validateConfigObject } = await import("./config.js"); + const res = validateConfigObject({ + imessage: { cliPath: "/Applications/Imsg Tools/imsg" }, + tools: { + audio: { + transcription: { + args: ["--model"], + }, + }, + }, + }); + expect(res.ok).toBe(true); + }); + it('rejects discord.dm.policy="open" without allowFrom "*"', async () => { vi.resetModules(); const { validateConfigObject } = await import("./config.js"); diff --git a/src/config/schema.ts b/src/config/schema.ts index 05840ac7f..94aad9b3c 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -89,6 +89,9 @@ const FIELD_LABELS: Record = { "gateway.remote.password": "Remote Gateway Password", "gateway.auth.token": "Gateway Token", "gateway.auth.password": "Gateway Password", + "tools.audio.transcription.args": "Audio Transcription Args", + "tools.audio.transcription.timeoutSeconds": + "Audio Transcription Timeout (sec)", "gateway.controlUi.basePath": "Control UI Base Path", "gateway.http.endpoints.chatCompletions.enabled": "OpenAI Chat Completions Endpoint", @@ -156,7 +159,7 @@ const FIELD_HELP: Record = { "gateway.remote.sshIdentity": "Optional SSH identity file path (passed to ssh -i).", "gateway.auth.token": - "Required for multi-machine access or non-loopback binds.", + "Recommended for all gateways; required for non-loopback binds.", "gateway.auth.password": "Required for Tailscale funnel.", "gateway.controlUi.basePath": "Optional URL prefix where the Control UI is served (e.g. /clawdbot).", diff --git a/src/infra/exec-safety.ts b/src/infra/exec-safety.ts new file mode 100644 index 000000000..c565255bd --- /dev/null +++ b/src/infra/exec-safety.ts @@ -0,0 +1,26 @@ +const SHELL_METACHARS = /[;&|`$<>]/; +const CONTROL_CHARS = /[\r\n]/; +const QUOTE_CHARS = /["']/; +const BARE_NAME_PATTERN = /^[A-Za-z0-9._+-]+$/; + +function isLikelyPath(value: string): boolean { + if (value.startsWith(".") || value.startsWith("~")) return true; + if (value.includes("/") || value.includes("\\")) return true; + return /^[A-Za-z]:[\\/]/.test(value); +} + +export function isSafeExecutableValue( + value: string | null | undefined, +): boolean { + if (!value) return false; + const trimmed = value.trim(); + if (!trimmed) return false; + if (trimmed.includes("\0")) return false; + if (CONTROL_CHARS.test(trimmed)) return false; + if (SHELL_METACHARS.test(trimmed)) return false; + if (QUOTE_CHARS.test(trimmed)) return false; + + if (isLikelyPath(trimmed)) return true; + if (trimmed.startsWith("-")) return false; + return BARE_NAME_PATTERN.test(trimmed); +} diff --git a/src/wizard/onboarding.ts b/src/wizard/onboarding.ts index ed5e6d2bb..6a1d7d5f3 100644 --- a/src/wizard/onboarding.ts +++ b/src/wizard/onboarding.ts @@ -174,7 +174,7 @@ export async function runOnboardingWizard( ? bindRaw : "loopback"; - let authMode: GatewayAuthChoice = "off"; + let authMode: GatewayAuthChoice = "token"; if ( baseConfig.gateway?.auth?.mode === "token" || baseConfig.gateway?.auth?.mode === "password" @@ -215,7 +215,7 @@ export async function runOnboardingWizard( }; const formatAuth = (value: GatewayAuthChoice) => { if (value === "off") return "Off (loopback only)"; - if (value === "token") return "Token"; + if (value === "token") return "Token (default)"; return "Password"; }; const formatTailscale = (value: "off" | "serve" | "funnel") => { @@ -237,7 +237,7 @@ export async function runOnboardingWizard( : [ `Gateway port: ${DEFAULT_GATEWAY_PORT}`, "Gateway bind: Loopback (127.0.0.1)", - "Gateway auth: Off (loopback only)", + "Gateway auth: Token (default)", "Tailscale exposure: Off", "Direct to chat providers.", ]; @@ -248,7 +248,8 @@ export async function runOnboardingWizard( const localUrl = `ws://127.0.0.1:${localPort}`; const localProbe = await probeGatewayReachable({ url: localUrl, - token: process.env.CLAWDBOT_GATEWAY_TOKEN, + token: + baseConfig.gateway?.auth?.token ?? process.env.CLAWDBOT_GATEWAY_TOKEN, password: baseConfig.gateway?.auth?.password ?? process.env.CLAWDBOT_GATEWAY_PASSWORD, @@ -402,15 +403,16 @@ export async function runOnboardingWizard( { value: "off", label: "Off (loopback only)", - hint: "Recommended for single-machine setups", + hint: "Not recommended unless you fully trust local processes", }, { value: "token", label: "Token", - hint: "Use for multi-machine access or non-loopback binds", + hint: "Recommended default (local + remote)", }, { value: "password", label: "Password" }, ], + initialValue: "token", })) as GatewayAuthChoice) ) as GatewayAuthChoice; @@ -477,8 +479,8 @@ export async function runOnboardingWizard( let gatewayToken: string | undefined; if (authMode === "token") { - if (flow === "quickstart" && quickstartGateway.token) { - gatewayToken = quickstartGateway.token; + if (flow === "quickstart") { + gatewayToken = quickstartGateway.token ?? randomToken(); } else { const tokenInput = await prompter.text({ message: "Gateway token (blank to generate)", @@ -815,5 +817,10 @@ export async function runOnboardingWizard( "Workspace backup", ); + await prompter.note( + "Running agents on your computer is risky — harden your setup: https://docs.clawd.bot/security", + "Security", + ); + await prompter.outro("Onboarding complete."); } diff --git a/test/gateway.multi.e2e.test.ts b/test/gateway.multi.e2e.test.ts index 7e15e96c0..0022e71db 100644 --- a/test/gateway.multi.e2e.test.ts +++ b/test/gateway.multi.e2e.test.ts @@ -16,7 +16,9 @@ type GatewayInstance = { port: number; bridgePort: number; hookToken: string; + gatewayToken: string; homeDir: string; + stateDir: string; configPath: string; child: ChildProcessWithoutNullStreams; stdout: string[]; @@ -99,14 +101,16 @@ const spawnGatewayInstance = async (name: string): Promise => { const port = await getFreePort(); const bridgePort = await getFreePort(); const hookToken = `token-${name}-${randomUUID()}`; + const gatewayToken = `gateway-${name}-${randomUUID()}`; const homeDir = await fs.mkdtemp( path.join(os.tmpdir(), `clawdbot-e2e-${name}-`), ); const configDir = path.join(homeDir, ".clawdbot"); await fs.mkdir(configDir, { recursive: true }); const configPath = path.join(configDir, "clawdbot.json"); + const stateDir = path.join(configDir, "state"); const config = { - gateway: { port }, + gateway: { port, auth: { mode: "token", token: gatewayToken } }, hooks: { enabled: true, token: hookToken, path: "/hooks" }, bridge: { bind: "loopback", port: bridgePort }, }; @@ -134,7 +138,7 @@ const spawnGatewayInstance = async (name: string): Promise => { ...process.env, HOME: homeDir, CLAWDBOT_CONFIG_PATH: configPath, - CLAWDBOT_STATE_DIR: path.join(homeDir, ".clawdbot", "state"), + CLAWDBOT_STATE_DIR: stateDir, CLAWDBOT_GATEWAY_TOKEN: "", CLAWDBOT_GATEWAY_PASSWORD: "", CLAWDBOT_SKIP_PROVIDERS: "1", @@ -166,7 +170,9 @@ const spawnGatewayInstance = async (name: string): Promise => { port, bridgePort, hookToken, + gatewayToken, homeDir, + stateDir, configPath, child, stdout, @@ -369,7 +375,7 @@ const pairNode = async (inst: GatewayInstance, nodeId: string) => { version: "1.0.0", }); - const baseDir = path.join(inst.homeDir, ".clawdbot"); + const baseDir = inst.stateDir; const requestId = await waitForPairRequest(baseDir, nodeId); const approved = await approveNodePairing(requestId, baseDir); expect(approved).toBeTruthy(); @@ -409,12 +415,12 @@ describe("gateway multi-instance e2e", () => { const [healthA, healthB] = (await Promise.all([ runCliJson(["health", "--json", "--timeout", "10000"], { CLAWDBOT_GATEWAY_PORT: String(gwA.port), - CLAWDBOT_GATEWAY_TOKEN: "", + CLAWDBOT_GATEWAY_TOKEN: gwA.gatewayToken, CLAWDBOT_GATEWAY_PASSWORD: "", }), runCliJson(["health", "--json", "--timeout", "10000"], { CLAWDBOT_GATEWAY_PORT: String(gwB.port), - CLAWDBOT_GATEWAY_TOKEN: "", + CLAWDBOT_GATEWAY_TOKEN: gwB.gatewayToken, CLAWDBOT_GATEWAY_PASSWORD: "", }), ])) as [HealthPayload, HealthPayload]; @@ -443,14 +449,14 @@ describe("gateway multi-instance e2e", () => { runCliJson( ["nodes", "status", "--json", "--url", `ws://127.0.0.1:${gwA.port}`], { - CLAWDBOT_GATEWAY_TOKEN: "", + CLAWDBOT_GATEWAY_TOKEN: gwA.gatewayToken, CLAWDBOT_GATEWAY_PASSWORD: "", }, ), runCliJson( ["nodes", "status", "--json", "--url", `ws://127.0.0.1:${gwB.port}`], { - CLAWDBOT_GATEWAY_TOKEN: "", + CLAWDBOT_GATEWAY_TOKEN: gwB.gatewayToken, CLAWDBOT_GATEWAY_PASSWORD: "", }, ),