diff --git a/docs/refactor/2025-11-26-auto-reply-split.md b/docs/refactor/2025-11-26-auto-reply-split.md
new file mode 100644
index 000000000..b329d4fbe
--- /dev/null
+++ b/docs/refactor/2025-11-26-auto-reply-split.md
@@ -0,0 +1,11 @@
+# Auto-reply refactor notes (2025-11-26)
+
+- Split `src/auto-reply/reply.ts` into smaller helpers:
+  - Command handling lives in `src/auto-reply/command-reply.ts`.
+  - Audio transcription helpers live in `src/auto-reply/transcription.ts`.
+  - Shared reply types live in `src/auto-reply/types.ts` (re-exported from `reply.ts`).
+- `runCommandReply` now returns `{ payload, meta }`, supports injected enqueue runners for tests, logs structured metadata, and respects `mediaMaxMb` for local media paths.
+- Added focused tests:
+  - `src/auto-reply/command-reply.test.ts` exercises Claude flag injection, session args, timeout messaging, media token handling, and Claude metadata reporting.
+  - `src/auto-reply/transcription.test.ts` covers media download + transcription command invocation.
+- Existing public surface (`getReplyFromConfig`, `autoReplyIfConfigured`, `ReplyPayload`) remains unchanged; integration tests still pass (`pnpm test`).
diff --git a/src/auto-reply/command-reply.test.ts b/src/auto-reply/command-reply.test.ts
new file mode 100644
index 000000000..0713184d1
--- /dev/null
+++ b/src/auto-reply/command-reply.test.ts
@@ -0,0 +1,186 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+
+import { describe, expect, it, vi } from "vitest";
+
+import { runCommandReply, summarizeClaudeMetadata } from "./command-reply.js";
+import type { ReplyPayload } from "./types.js";
+
+const noopTemplateCtx = {
+  Body: "hello",
+  BodyStripped: "hello",
+  SessionId: "sess",
+  IsNewSession: "true",
+};
+
+type RunnerResult = {
+  stdout?: string;
+  stderr?: string;
+  code?: number;
+  signal?: string | null;
+  killed?: boolean;
+};
+
+function makeRunner(result: RunnerResult, capture: ReplyPayload[] = []) {
+  return vi.fn(async (argv: string[]) => {
+    capture.push({ text: argv.join(" "), argv });
+    return {
+      stdout: result.stdout ?? "",
+      stderr: result.stderr ?? "",
+      code: result.code ?? 0,
+      signal: result.signal ?? null,
+      killed: result.killed ?? false,
+    };
+  });
+}
+
+const enqueueImmediate = vi.fn(
+  async <T>(task: () => Promise<T>, opts?: { onWait?: (ms: number, ahead: number) => void }) => {
+    opts?.onWait?.(25, 2);
+    return task();
+  },
+);
+
+describe("summarizeClaudeMetadata", () => {
+  it("builds concise meta string", () => {
+    const meta = summarizeClaudeMetadata({
+      duration_ms: 1200,
+      num_turns: 3,
+      total_cost_usd: 0.012345,
+      usage: { server_tool_use: { a: 1, b: 2 } },
+      modelUsage: { "claude-3": 2, "haiku": 1 },
+    });
+    expect(meta).toContain("duration=1200ms");
+    expect(meta).toContain("turns=3");
+    expect(meta).toContain("cost=$0.0123");
+    expect(meta).toContain("tool_calls=3");
+    expect(meta).toContain("models=claude-3,haiku");
+  });
+});
+
+describe("runCommandReply", () => {
+  it("injects claude flags and identity prefix", async () => {
+    const captures: ReplyPayload[] = [];
+    const runner = makeRunner({ stdout: "ok" }, captures);
+    const { payload } = await runCommandReply({
+      reply: {
+        mode: "command",
+        command: ["claude", "{{Body}}"],
+        claudeOutputFormat: "json",
+      },
+      templatingCtx: noopTemplateCtx,
+      sendSystemOnce: false,
+      isNewSession: true,
+      isFirstTurnInSession: true,
+      systemSent: false,
+      timeoutMs: 1000,
+      timeoutSeconds: 1,
+      commandRunner: runner,
+      enqueue: enqueueImmediate,
+    });
+
+    expect(payload?.text).toBe("ok");
+    const finalArgv = captures[0].argv as string[];
+    expect(finalArgv).toContain("--output-format");
+    expect(finalArgv).toContain("json");
+    expect(finalArgv).toContain("-p");
+    expect(finalArgv.at(-1)).toContain("You are Clawd (Claude)");
+  });
+
+  it("picks session resume args when not new", async () => {
+    const captures: ReplyPayload[] = [];
+    const runner = makeRunner({ stdout: "hi" }, captures);
+    await runCommandReply({
+      reply: {
+        mode: "command",
+        command: ["cli", "{{Body}}"],
+        session: {
+          sessionArgNew: ["--new", "{{SessionId}}"],
+          sessionArgResume: ["--resume", "{{SessionId}}"],
+        },
+      },
+      templatingCtx: { ...noopTemplateCtx, SessionId: "abc" },
+      sendSystemOnce: true,
+      isNewSession: false,
+      isFirstTurnInSession: false,
+      systemSent: true,
+      timeoutMs: 1000,
+      timeoutSeconds: 1,
+      commandRunner: runner,
+      enqueue: enqueueImmediate,
+    });
+    const argv = captures[0].argv as string[];
+    expect(argv).toContain("--resume");
+    expect(argv).toContain("abc");
+  });
+
+  it("returns timeout text with partial snippet", async () => {
+    const runner = vi.fn(async () => {
+      throw { stdout: "partial output here", killed: true, signal: "SIGKILL" };
+    });
+    const { payload, meta } = await runCommandReply({
+      reply: { mode: "command", command: ["echo", "hi"] },
+      templatingCtx: noopTemplateCtx,
+      sendSystemOnce: false,
+      isNewSession: true,
+      isFirstTurnInSession: true,
+      systemSent: false,
+      timeoutMs: 10,
+      timeoutSeconds: 1,
+      commandRunner: runner,
+      enqueue: enqueueImmediate,
+    });
+    expect(payload?.text).toContain("Command timed out after 1s");
+    expect(payload?.text).toContain("partial output");
+    expect(meta.killed).toBe(true);
+  });
+
+  it("parses MEDIA tokens and respects mediaMaxMb for local files", async () => {
+    const tmp = path.join(os.tmpdir(), `warelay-test-${Date.now()}.bin`);
+    const bigBuffer = Buffer.alloc(2 * 1024 * 1024, 1);
+    await fs.writeFile(tmp, bigBuffer);
+    const runner = makeRunner({
+      stdout: `hi\nMEDIA:${tmp}\nMEDIA:https://example.com/img.jpg`,
+    });
+    const { payload } = await runCommandReply({
+      reply: { mode: "command", command: ["echo", "hi"], mediaMaxMb: 1 },
+      templatingCtx: noopTemplateCtx,
+      sendSystemOnce: false,
+      isNewSession: true,
+      isFirstTurnInSession: true,
+      systemSent: false,
+      timeoutMs: 1000,
+      timeoutSeconds: 1,
+      commandRunner: runner,
+      enqueue: enqueueImmediate,
+    });
+    expect(payload?.mediaUrls).toEqual(["https://example.com/img.jpg"]);
+    await fs.unlink(tmp);
+  });
+
+  it("emits Claude metadata", async () => {
+    const runner = makeRunner({
+      stdout:
+        '{"text":"hi","duration_ms":50,"total_cost_usd":0.0001,"usage":{"server_tool_use":{"a":1}}}',
+    });
+    const { meta } = await runCommandReply({
+      reply: {
+        mode: "command",
+        command: ["claude", "{{Body}}"],
+        claudeOutputFormat: "json",
+      },
+      templatingCtx: noopTemplateCtx,
+      sendSystemOnce: false,
+      isNewSession: true,
+      isFirstTurnInSession: true,
+      systemSent: false,
+      timeoutMs: 1000,
+      timeoutSeconds: 1,
+      commandRunner: runner,
+      enqueue: enqueueImmediate,
+    });
+    expect(meta.claudeMeta).toContain("duration=50ms");
+    expect(meta.claudeMeta).toContain("tool_calls=1");
+  });
+});
diff --git a/src/auto-reply/command-reply.ts b/src/auto-reply/command-reply.ts
index 2ca71606d..af8b00c17 100644
--- a/src/auto-reply/command-reply.ts
+++ b/src/auto-reply/command-reply.ts
@@ -1,3 +1,4 @@
+import fs from "node:fs/promises";
 import path from "node:path";
 
 import type { WarelayConfig } from "../config/config.js";
@@ -19,6 +20,8 @@ type CommandReplyConfig = NonNullable<WarelayConfig["inbound"]>["reply"] & {
   mode: "command";
 };
 
+type EnqueueRunner = typeof enqueueCommand;
+
 type CommandReplyParams = {
   reply: CommandReplyConfig;
   templatingCtx: TemplateContext;
@@ -29,9 +32,25 @@ type CommandReplyParams = {
   timeoutMs: number;
   timeoutSeconds: number;
   commandRunner: typeof runCommandWithTimeout;
+  enqueue?: EnqueueRunner;
 };
 
-function summarizeClaudeMetadata(payload: unknown): string | undefined {
+export type CommandReplyMeta = {
+  durationMs: number;
+  queuedMs?: number;
+  queuedAhead?: number;
+  exitCode?: number | null;
+  signal?: string | null;
+  killed?: boolean;
+  claudeMeta?: string;
+};
+
+export type CommandReplyResult = {
+  payload?: ReplyPayload;
+  meta: CommandReplyMeta;
+};
+
+export function summarizeClaudeMetadata(payload: unknown): string | undefined {
   if (!payload || typeof payload !== "object") return undefined;
   const obj = payload as Record<string, unknown>;
   const parts: string[] = [];
@@ -83,7 +102,7 @@ function summarizeClaudeMetadata(payload: unknown): string | undefined {
 
 export async function runCommandReply(
   params: CommandReplyParams,
-): Promise<ReplyPayload | undefined> {
+): Promise<CommandReplyResult> {
   const {
     reply,
     templatingCtx,
@@ -94,6 +113,7 @@ export async function runCommandReply(
     timeoutMs,
     timeoutSeconds,
     commandRunner,
+    enqueue = enqueueCommand,
   } = params;
 
   let argv = reply.command.map((part) => applyTemplate(part, templatingCtx));
@@ -167,11 +187,15 @@ export async function runCommandReply(
   );
 
   const started = Date.now();
+  let queuedMs: number | undefined;
+  let queuedAhead: number | undefined;
   try {
-    const { stdout, stderr, code, signal, killed } = await enqueueCommand(
+    const { stdout, stderr, code, signal, killed } = await enqueue(
       () => commandRunner(finalArgv, { timeoutMs, cwd: reply.cwd }),
       {
-        onWait: (waitMs, queuedAhead) => {
+        onWait: (waitMs, ahead) => {
+          queuedMs = waitMs;
+          queuedAhead = ahead;
           if (isVerbose()) {
             logVerbose(
               `Command auto-reply queued for ${waitMs}ms (${queuedAhead} ahead)`,
@@ -223,23 +247,86 @@ export async function runCommandReply(
       console.error(
         `Command auto-reply exited with code ${code ?? "unknown"} (signal: ${signal ?? "none"})`,
       );
-      return undefined;
+      return {
+        payload: undefined,
+        meta: {
+          durationMs: Date.now() - started,
+          queuedMs,
+          queuedAhead,
+          exitCode: code,
+          signal,
+          killed,
+          claudeMeta: parsed ? summarizeClaudeMetadata(parsed.parsed) : undefined,
+        },
+      };
     }
     if (killed && !signal) {
       console.error(
         `Command auto-reply process killed before completion (exit code ${code ?? "unknown"})`,
       );
-      return undefined;
+      return {
+        payload: undefined,
+        meta: {
+          durationMs: Date.now() - started,
+          queuedMs,
+          queuedAhead,
+          exitCode: code,
+          signal,
+          killed,
+          claudeMeta: parsed ? summarizeClaudeMetadata(parsed.parsed) : undefined,
+        },
+      };
     }
-    const mediaUrls =
+    let mediaUrls =
       mediaFromCommand ?? (reply.mediaUrl ? [reply.mediaUrl] : undefined);
-    return trimmed || mediaUrls?.length
-      ? {
-          text: trimmed || undefined,
-          mediaUrl: mediaUrls?.[0],
-          mediaUrls,
+
+    // If mediaMaxMb is set, skip local media paths larger than the cap.
+    if (mediaUrls?.length && reply.mediaMaxMb) {
+      const maxBytes = reply.mediaMaxMb * 1024 * 1024;
+      const filtered: string[] = [];
+      for (const url of mediaUrls) {
+        if (/^https?:\/\//i.test(url)) {
+          filtered.push(url);
+          continue;
         }
-      : undefined;
+        const abs = path.isAbsolute(url) ? url : path.resolve(url);
+        try {
+          const stats = await fs.stat(abs);
+          if (stats.size <= maxBytes) {
+            filtered.push(url);
+          } else if (isVerbose()) {
+            logVerbose(
+              `Skipping media ${url} (${(stats.size / (1024 * 1024)).toFixed(2)}MB) over cap ${reply.mediaMaxMb}MB`,
+            );
+          }
+        } catch {
+          filtered.push(url);
+        }
+      }
+      mediaUrls = filtered;
+    }
+
+    const payload =
+      trimmed || mediaUrls?.length
+        ? {
+            text: trimmed || undefined,
+            mediaUrl: mediaUrls?.[0],
+            mediaUrls,
+          }
+        : undefined;
+    const meta: CommandReplyMeta = {
+      durationMs: Date.now() - started,
+      queuedMs,
+      queuedAhead,
+      exitCode: code,
+      signal,
+      killed,
+      claudeMeta: parsed ? summarizeClaudeMetadata(parsed.parsed) : undefined,
+    };
+    if (isVerbose()) {
+      logVerbose(`Command auto-reply meta: ${JSON.stringify(meta)}`);
+    }
+    return { payload, meta };
   } catch (err) {
     const elapsed = Date.now() - started;
     const anyErr = err as { killed?: boolean; signal?: string };
@@ -261,9 +348,29 @@ export async function runCommandReply(
       const text = partialSnippet
         ? `${baseMsg}\n\nPartial output before timeout:\n${partialSnippet}`
         : baseMsg;
-      return { text };
+      return {
+        payload: { text },
+        meta: {
+          durationMs: elapsed,
+          queuedMs,
+          queuedAhead,
+          exitCode: undefined,
+          signal: anyErr.signal,
+          killed: anyErr.killed,
+        },
+      };
     }
     logError(`Command auto-reply failed after ${elapsed}ms: ${String(err)}`);
-    return undefined;
+    return {
+      payload: undefined,
+      meta: {
+        durationMs: elapsed,
+        queuedMs,
+        queuedAhead,
+        exitCode: undefined,
+        signal: anyErr.signal,
+        killed: anyErr.killed,
+      },
+    };
   }
 }
diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts
index fef2efb38..d204d987c 100644
--- a/src/auto-reply/reply.ts
+++ b/src/auto-reply/reply.ts
@@ -241,7 +241,7 @@ export async function getReplyFromConfig(
   if (reply.mode === "command" && reply.command?.length) {
     await onReplyStart();
     try {
-      const result = await runCommandReply({
+      const { payload, meta } = await runCommandReply({
         reply,
         templatingCtx,
         sendSystemOnce,
@@ -252,7 +252,10 @@ export async function getReplyFromConfig(
         timeoutSeconds,
         commandRunner,
       });
-      return result;
+      if (meta.claudeMeta && isVerbose()) {
+        logVerbose(`Claude JSON meta: ${meta.claudeMeta}`);
+      }
+      return payload;
     } finally {
       cleanupTyping();
     }
diff --git a/src/auto-reply/transcription.test.ts b/src/auto-reply/transcription.test.ts
new file mode 100644
index 000000000..7d0e12702
--- /dev/null
+++ b/src/auto-reply/transcription.test.ts
@@ -0,0 +1,63 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+import { transcribeInboundAudio } from "./transcription.js";
+
+vi.mock("../globals.js", () => ({
+  isVerbose: () => false,
+  logVerbose: vi.fn(),
+}));
+
+vi.mock("../process/exec.js", () => ({
+  runExec: vi.fn(async () => ({ stdout: "transcribed text\n" })),
+}));
+
+const runtime = {
+  error: vi.fn(),
+};
+
+describe("transcribeInboundAudio", () => {
+  afterEach(() => {
+    vi.resetAllMocks();
+  });
+
+  it("downloads mediaUrl to temp file and returns transcript", async () => {
+    const tmpBuf = Buffer.from("audio-bytes");
+    const tmpFile = path.join(os.tmpdir(), `warelay-audio-${Date.now()}.ogg`);
+    await fs.writeFile(tmpFile, tmpBuf);
+
+    const fetchMock = vi.fn(async () => ({
+      ok: true,
+      status: 200,
+      arrayBuffer: async () => tmpBuf,
+    })) as unknown as typeof fetch;
+    // @ts-expect-error override global fetch for test
+    global.fetch = fetchMock;
+
+    const cfg = {
+      inbound: {
+        transcribeAudio: {
+          command: ["echo", "{{MediaPath}}"],
+          timeoutSeconds: 5,
+        },
+      },
+    };
+    const ctx = { MediaUrl: "https://example.com/audio.ogg" };
+
+    const result = await transcribeInboundAudio(
+      cfg as never,
+      ctx as never,
+      runtime as never,
+    );
+    expect(result?.text).toBe("transcribed text");
+    expect(fetchMock).toHaveBeenCalled();
+  });
+
+  it("returns undefined when no transcription command", async () => {
+    const res = await transcribeInboundAudio({ inbound: {} } as never, {} as never, runtime as never);
+    expect(res).toBeUndefined();
+  });
+});