fix: update gateway auth docs and clients

2026-01-11 01:51:07 +01:00
parent d33285a9cd
commit b0b4b33b6b
28 changed files with 283 additions and 67 deletions
--- a/src/config/legacy.ts
+++ b/src/config/legacy.ts
@@ -46,6 +46,33 @@ const mergeMissing = (
  }
 };

+const AUDIO_TRANSCRIPTION_CLI_ALLOWLIST = new Set(["whisper"]);
+
+const mapLegacyAudioTranscription = (
+  value: unknown,
+): Record<string, unknown> | null => {
+  const transcriber = getRecord(value);
+  const command = Array.isArray(transcriber?.command)
+    ? transcriber?.command
+    : null;
+  if (!command || command.length === 0) return null;
+  const rawExecutable = String(command[0] ?? "").trim();
+  if (!rawExecutable) return null;
+  const executableName = rawExecutable.split(/[\\/]/).pop() ?? rawExecutable;
+  if (!AUDIO_TRANSCRIPTION_CLI_ALLOWLIST.has(executableName)) return null;
+
+  const args = command.slice(1).map((part) => String(part));
+  const timeoutSeconds =
+    typeof transcriber?.timeoutSeconds === "number"
+      ? transcriber?.timeoutSeconds
+      : undefined;
+
+  const result: Record<string, unknown> = {};
+  if (args.length > 0) result.args = args;
+  if (timeoutSeconds !== undefined) result.timeoutSeconds = timeoutSeconds;
+  return result;
+};
+
 const getAgentsList = (agents: Record<string, unknown> | null) => {
  const list = agents?.list;
  return Array.isArray(list) ? list : [];
@@ -137,7 +164,7 @@ const LEGACY_CONFIG_RULES: LegacyConfigRule[] = [
  {
    path: ["routing", "transcribeAudio"],
    message:
-      "routing.transcribeAudio was moved; use audio.transcription instead (run `clawdbot doctor` to migrate).",
+      "routing.transcribeAudio was moved; use tools.audio.transcription instead (run `clawdbot doctor` to migrate).",
  },
  {
    path: ["telegram", "requireMention"],
@@ -701,18 +728,57 @@ const LEGACY_CONFIG_MIGRATIONS: LegacyConfigMigration[] = [
      }

      if (routing.transcribeAudio !== undefined) {
-        const audio = ensureRecord(raw, "audio");
-        if (audio.transcription === undefined) {
-          audio.transcription = routing.transcribeAudio;
-          changes.push("Moved routing.transcribeAudio → audio.transcription.");
+        const mapped = mapLegacyAudioTranscription(routing.transcribeAudio);
+        if (mapped) {
+          const tools = ensureRecord(raw, "tools");
+          const toolsAudio = ensureRecord(tools, "audio");
+          if (toolsAudio.transcription === undefined) {
+            toolsAudio.transcription = mapped;
+            changes.push(
+              "Moved routing.transcribeAudio → tools.audio.transcription.",
+            );
+          } else {
+            changes.push(
+              "Removed routing.transcribeAudio (tools.audio.transcription already set).",
+            );
+          }
        } else {
          changes.push(
-            "Removed routing.transcribeAudio (audio.transcription already set).",
+            "Removed routing.transcribeAudio (unsupported transcription CLI).",
          );
        }
        delete routing.transcribeAudio;
      }

+      const audio = getRecord(raw.audio);
+      if (audio?.transcription !== undefined) {
+        const mapped = mapLegacyAudioTranscription(audio.transcription);
+        if (mapped) {
+          const tools = ensureRecord(raw, "tools");
+          const toolsAudio = ensureRecord(tools, "audio");
+          if (toolsAudio.transcription === undefined) {
+            toolsAudio.transcription = mapped;
+            changes.push(
+              "Moved audio.transcription → tools.audio.transcription.",
+            );
+          } else {
+            changes.push(
+              "Removed audio.transcription (tools.audio.transcription already set).",
+            );
+          }
+          delete audio.transcription;
+          if (Object.keys(audio).length === 0) delete raw.audio;
+          else raw.audio = audio;
+        } else {
+          delete audio.transcription;
+          changes.push(
+            "Removed audio.transcription (unsupported transcription CLI).",
+          );
+          if (Object.keys(audio).length === 0) delete raw.audio;
+          else raw.audio = audio;
+        }
+      }
+
      if (Object.keys(routing).length === 0) {
        delete raw.routing;
      }
--- a/src/config/types.ts
+++ b/src/config/types.ts
@@ -915,6 +915,13 @@ export type AgentToolsConfig = {
 export type ToolsConfig = {
  allow?: string[];
  deny?: string[];
+  audio?: {
+    transcription?: {
+      /** CLI args (template-enabled). */
+      args?: string[];
+      timeoutSeconds?: number;
+    };
+  };
  agentToAgent?: {
    /** Enable agent-to-agent messaging tools. Default: false. */
    enabled?: boolean;
@@ -1023,6 +1030,7 @@ export type BroadcastConfig = {
 };

 export type AudioConfig = {
+  /** @deprecated Use tools.audio.transcription instead. */
  transcription?: {
    // Optional CLI to turn inbound audio into text; templated args, must output transcript to stdout.
    command: string[];
--- a/src/config/zod-schema.ts
+++ b/src/config/zod-schema.ts
@@ -1,6 +1,7 @@
 import { z } from "zod";

 import { parseDurationMs } from "../cli/parse-duration.js";
+import { isSafeExecutableValue } from "../infra/exec-safety.js";

 const ModelApiSchema = z.union([
  z.literal("openai-completions"),
@@ -179,7 +180,16 @@ const QueueSchema = z

 const TranscribeAudioSchema = z
  .object({
-    command: z.array(z.string()),
+    command: z.array(z.string()).superRefine((value, ctx) => {
+      const executable = value[0];
+      if (!isSafeExecutableValue(executable)) {
+        ctx.addIssue({
+          code: z.ZodIssueCode.custom,
+          path: [0],
+          message: "expected safe executable name or path",
+        });
+      }
+    }),
    timeoutSeconds: z.number().int().positive().optional(),
  })
  .optional();
@@ -188,6 +198,17 @@ const HexColorSchema = z
  .string()
  .regex(/^#?[0-9a-fA-F]{6}$/, "expected hex color (RRGGBB)");

+const ExecutableTokenSchema = z
+  .string()
+  .refine(isSafeExecutableValue, "expected safe executable name or path");
+
+const ToolsAudioTranscriptionSchema = z
+  .object({
+    args: z.array(z.string()).optional(),
+    timeoutSeconds: z.number().int().positive().optional(),
+  })
+  .optional();
+
 const TelegramTopicSchema = z.object({
  requireMention: z.boolean().optional(),
  skills: z.array(z.string()).optional(),
@@ -422,7 +443,7 @@ const SignalAccountSchemaBase = z.object({
  httpUrl: z.string().optional(),
  httpHost: z.string().optional(),
  httpPort: z.number().int().positive().optional(),
-  cliPath: z.string().optional(),
+  cliPath: ExecutableTokenSchema.optional(),
  autoStart: z.boolean().optional(),
  receiveMode: z.union([z.literal("on-start"), z.literal("manual")]).optional(),
  ignoreAttachments: z.boolean().optional(),
@@ -470,7 +491,7 @@ const IMessageAccountSchemaBase = z.object({
  name: z.string().optional(),
  capabilities: z.array(z.string()).optional(),
  enabled: z.boolean().optional(),
-  cliPath: z.string().optional(),
+  cliPath: ExecutableTokenSchema.optional(),
  dbPath: z.string().optional(),
  service: z
    .union([z.literal("imessage"), z.literal("sms"), z.literal("auto")])
@@ -819,6 +840,11 @@ const ToolsSchema = z
  .object({
    allow: z.array(z.string()).optional(),
    deny: z.array(z.string()).optional(),
+    audio: z
+      .object({
+        transcription: ToolsAudioTranscriptionSchema,
+      })
+      .optional(),
    agentToAgent: z
      .object({
        enabled: z.boolean().optional(),