diff --git a/CHANGELOG.md b/CHANGELOG.md
index 239240014..e72e9b7ef 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ Docs: https://docs.clawd.bot
 ### Changes
 - TTS: add Edge TTS provider fallback, defaulting to keyless Edge with MP3 retry on format failures. (#1668) Thanks @steipete. https://docs.clawd.bot/tts
 - Web search: add Brave freshness filter parameter for time-scoped results. (#1688) Thanks @JonUleis. https://docs.clawd.bot/tools/web
+- TTS: add auto mode enum (off/always/inbound/tagged) with per-session `/tts` override. (#1667) Thanks @sebslight. https://docs.clawd.bot/tts
 - Docs: expand FAQ (migration, scheduling, concurrency, model recommendations, OpenAI subscription auth, Pi sizing, hackable install, docs SSL workaround).
 - Docs: add verbose installer troubleshooting guidance.
 - Docs: update Fly.io guide notes.
diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md
index 67701f946..12226e1f3 100644
--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@@ -1509,7 +1509,7 @@ voice notes; other channels send MP3 audio.
 {
   messages: {
     tts: {
-      enabled: true,
+      auto: "always", // off | always | inbound | tagged
       mode: "final", // final | all (include tool/block replies)
       provider: "elevenlabs",
       summaryModel: "openai/gpt-4.1-mini",
@@ -1546,8 +1546,10 @@ voice notes; other channels send MP3 audio.
 ```
 
 Notes:
-- `messages.tts.enabled` can be overridden by local user prefs (see `/tts on`, `/tts off`).
-- `prefsPath` stores local overrides (enabled/provider/limit/summarize).
+- `messages.tts.auto` controls auto‑TTS (`off`, `always`, `inbound`, `tagged`).
+- `/tts off|always|inbound|tagged` sets the per‑session auto mode (overrides config).
+- `messages.tts.enabled` is legacy; doctor migrates it to `messages.tts.auto`.
+- `prefsPath` stores local overrides (provider/limit/summarize).
 - `maxTextLength` is a hard cap for TTS input; summaries are truncated to fit.
 - `summaryModel` overrides `agents.defaults.model.primary` for auto-summary.
   - Accepts `provider/model` or an alias from `agents.defaults.models`.
diff --git a/docs/tools/slash-commands.md b/docs/tools/slash-commands.md
index 1c45fe95b..84a087dba 100644
--- a/docs/tools/slash-commands.md
+++ b/docs/tools/slash-commands.md
@@ -68,7 +68,7 @@ Text + native (when enabled):
 - `/config show|get|set|unset` (persist config to disk, owner-only; requires `commands.config: true`)
 - `/debug show|set|unset|reset` (runtime overrides, owner-only; requires `commands.debug: true`)
 - `/usage off|tokens|full|cost` (per-response usage footer or local cost summary)
-- `/tts on|off|status|provider|limit|summary|audio` (control TTS; see [/tts](/tts))
+- `/tts off|always|inbound|tagged|status|provider|limit|summary|audio` (control TTS; see [/tts](/tts))
   - Discord: native command is `/voice` (Discord reserves `/tts`); text `/tts` still works.
 - `/stop`
 - `/restart`
diff --git a/docs/tts.md b/docs/tts.md
index 61da1f0dc..22dacd611 100644
--- a/docs/tts.md
+++ b/docs/tts.md
@@ -53,8 +53,8 @@ so that provider must also be authenticated if you enable summaries.
 
 ## Is it enabled by default?
 
-No. TTS is **disabled** by default. Enable it in config or with `/tts on`,
-which writes a local preference override.
+No. Auto‑TTS is **off** by default. Enable it in config with
+`messages.tts.auto` or per session with `/tts always` (alias: `/tts on`).
 
 Edge TTS **is** enabled by default once TTS is on, and is used automatically
 when no OpenAI or ElevenLabs API keys are available.
@@ -70,7 +70,7 @@ Full schema is in [Gateway configuration](/gateway/configuration).
 {
   messages: {
     tts: {
-      enabled: true,
+      auto: "always",
       provider: "elevenlabs"
     }
   }
@@ -83,7 +83,7 @@ Full schema is in [Gateway configuration](/gateway/configuration).
 {
   messages: {
     tts: {
-      enabled: true,
+      auto: "always",
       provider: "openai",
       summaryModel: "openai/gpt-4.1-mini",
       modelOverrides: {
@@ -121,7 +121,7 @@ Full schema is in [Gateway configuration](/gateway/configuration).
 {
   messages: {
     tts: {
-      enabled: true,
+      auto: "always",
       provider: "edge",
       edge: {
         enabled: true,
@@ -156,7 +156,7 @@ Full schema is in [Gateway configuration](/gateway/configuration).
 {
   messages: {
     tts: {
-      enabled: true,
+      auto: "always",
       maxTextLength: 4000,
       timeoutMs: 30000,
       prefsPath: "~/.clawdbot/settings/tts.json"
@@ -165,13 +165,25 @@ Full schema is in [Gateway configuration](/gateway/configuration).
 }
 ```
 
+### Only reply with audio after an inbound voice note
+
+```json5
+{
+  messages: {
+    tts: {
+      auto: "inbound"
+    }
+  }
+}
+```
+
 ### Disable auto-summary for long replies
 
 ```json5
 {
   messages: {
     tts: {
-      enabled: true
+      auto: "always"
     }
   }
 }
@@ -185,7 +197,10 @@ Then run:
 
 ### Notes on fields
 
-- `enabled`: master toggle (default `false`; local prefs can override).
+- `auto`: auto‑TTS mode (`off`, `always`, `inbound`, `tagged`).
+  - `inbound` only sends audio after an inbound voice note.
+  - `tagged` only sends audio when the reply includes `[[tts]]` tags.
+- `enabled`: legacy toggle (doctor migrates this to `auto`).
 - `mode`: `"final"` (default) or `"all"` (includes tool/block replies).
 - `provider`: `"elevenlabs"`, `"openai"`, or `"edge"` (fallback is automatic).
 - If `provider` is **unset**, Clawdbot prefers `openai` (if key), then `elevenlabs` (if key),
@@ -195,7 +210,7 @@ Then run:
 - `modelOverrides`: allow the model to emit TTS directives (on by default).
 - `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded.
 - `timeoutMs`: request timeout (ms).
-- `prefsPath`: override the local prefs JSON path.
+- `prefsPath`: override the local prefs JSON path (provider/limit/summary).
 - `apiKey` values fall back to env vars (`ELEVENLABS_API_KEY`/`XI_API_KEY`, `OPENAI_API_KEY`).
 - `elevenlabs.baseUrl`: override ElevenLabs API base URL.
 - `elevenlabs.voiceSettings`:
@@ -218,6 +233,7 @@ Then run:
 ## Model-driven overrides (default on)
 
 By default, the model **can** emit TTS directives for a single reply.
+When `messages.tts.auto` is `tagged`, these directives are required to trigger audio.
 
 When enabled, the model can emit `[[tts:...]]` directives to override the voice
 for a single reply, plus an optional `[[tts:text]]...[[/tts:text]]` block to
@@ -338,8 +354,10 @@ Discord note: `/tts` is a built-in Discord command, so Clawdbot registers
 `/voice` as the native command there. Text `/tts ...` still works.
 
 ```
-/tts on
 /tts off
+/tts always
+/tts inbound
+/tts tagged
 /tts status
 /tts provider openai
 /tts limit 2000
@@ -350,6 +368,7 @@ Discord note: `/tts` is a built-in Discord command, so Clawdbot registers
 Notes:
 - Commands require an authorized sender (allowlist/owner rules still apply).
 - `commands.text` or native command registration must be enabled.
+- `off|always|inbound|tagged` are per‑session toggles (`/tts on` is an alias for `/tts always`).
 - `limit` and `summary` are stored in local prefs, not the main config.
 - `/tts audio` generates a one-off audio reply (does not toggle TTS on).
 
diff --git a/src/auto-reply/reply/commands-tts.ts b/src/auto-reply/reply/commands-tts.ts
index 3e8c71288..5c65fb94c 100644
--- a/src/auto-reply/reply/commands-tts.ts
+++ b/src/auto-reply/reply/commands-tts.ts
@@ -6,19 +6,20 @@ import {
   getTtsMaxLength,
   getTtsProvider,
   isSummarizationEnabled,
-  isTtsEnabled,
   isTtsProviderConfigured,
+  normalizeTtsAutoMode,
+  resolveTtsAutoMode,
   resolveTtsApiKey,
   resolveTtsConfig,
   resolveTtsPrefsPath,
   resolveTtsProviderOrder,
   setLastTtsAttempt,
   setSummarizationEnabled,
-  setTtsEnabled,
   setTtsMaxLength,
   setTtsProvider,
   textToSpeech,
 } from "../../tts/tts.js";
+import { updateSessionStore } from "../../config/sessions.js";
 
 type ParsedTtsCommand = {
   action: string;
@@ -39,9 +40,9 @@ function ttsUsage(): ReplyPayload {
   // Keep usage in one place so help/validation stays consistent.
   return {
     text:
-      "⚙️ Usage: /tts <on|off|status|provider|limit|summary|audio> [value]" +
+      "⚙️ Usage: /tts <off|always|inbound|tagged|status|provider|limit|summary|audio> [value]" +
       "\nExamples:\n" +
-      "/tts on\n" +
+      "/tts always\n" +
       "/tts provider openai\n" +
       "/tts provider edge\n" +
       "/tts limit 2000\n" +
@@ -71,14 +72,30 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
     return { shouldContinue: false, reply: ttsUsage() };
   }
 
-  if (action === "on") {
-    setTtsEnabled(prefsPath, true);
-    return { shouldContinue: false, reply: { text: "🔊 TTS enabled." } };
-  }
-
-  if (action === "off") {
-    setTtsEnabled(prefsPath, false);
-    return { shouldContinue: false, reply: { text: "🔇 TTS disabled." } };
+  const requestedAuto = normalizeTtsAutoMode(
+    action === "on" ? "always" : action === "off" ? "off" : action,
+  );
+  if (requestedAuto) {
+    const entry = params.sessionEntry;
+    const sessionKey = params.sessionKey;
+    const store = params.sessionStore;
+    if (entry && store && sessionKey) {
+      entry.ttsAuto = requestedAuto;
+      entry.updatedAt = Date.now();
+      store[sessionKey] = entry;
+      if (params.storePath) {
+        await updateSessionStore(params.storePath, (store) => {
+          store[sessionKey] = entry;
+        });
+      }
+    }
+    const label = requestedAuto === "always" ? "enabled (always)" : requestedAuto;
+    return {
+      shouldContinue: false,
+      reply: {
+        text: requestedAuto === "off" ? "🔇 TTS disabled." : `🔊 TTS ${label}.`,
+      },
+    };
   }
 
   if (action === "audio") {
@@ -212,7 +229,9 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
   }
 
   if (action === "status") {
-    const enabled = isTtsEnabled(config, prefsPath);
+    const sessionAuto = params.sessionEntry?.ttsAuto;
+    const autoMode = resolveTtsAutoMode({ config, prefsPath, sessionAuto });
+    const enabled = autoMode !== "off";
     const provider = getTtsProvider(config, prefsPath);
     const hasKey = isTtsProviderConfigured(config, provider);
     const providerStatus =
@@ -226,9 +245,10 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
     const maxLength = getTtsMaxLength(prefsPath);
     const summarize = isSummarizationEnabled(prefsPath);
     const last = getLastTtsAttempt();
+    const autoLabel = sessionAuto ? `${autoMode} (session)` : autoMode;
     const lines = [
       "📊 TTS status",
-      `State: ${enabled ? "✅ enabled" : "❌ disabled"}`,
+      `Auto: ${enabled ? autoLabel : "off"}`,
       `Provider: ${provider} (${providerStatus})`,
       `Text limit: ${maxLength} chars`,
       `Auto-summary: ${summarize ? "on" : "off"}`,
diff --git a/src/auto-reply/reply/dispatch-from-config.ts b/src/auto-reply/reply/dispatch-from-config.ts
index 5885d729e..16c83bf30 100644
--- a/src/auto-reply/reply/dispatch-from-config.ts
+++ b/src/auto-reply/reply/dispatch-from-config.ts
@@ -1,4 +1,6 @@
 import type { ClawdbotConfig } from "../../config/config.js";
+import { resolveSessionAgentId } from "../../agents/agent-scope.js";
+import { loadSessionStore, resolveStorePath } from "../../config/sessions.js";
 import { logVerbose } from "../../globals.js";
 import { isDiagnosticsEnabled } from "../../infra/diagnostic-events.js";
 import {
@@ -14,7 +16,55 @@ import { formatAbortReplyText, tryFastAbortFromMessage } from "./abort.js";
 import { shouldSkipDuplicateInbound } from "./inbound-dedupe.js";
 import type { ReplyDispatcher, ReplyDispatchKind } from "./reply-dispatcher.js";
 import { isRoutableChannel, routeReply } from "./route-reply.js";
-import { maybeApplyTtsToPayload } from "../../tts/tts.js";
+import { maybeApplyTtsToPayload, normalizeTtsAutoMode } from "../../tts/tts.js";
+
+const AUDIO_PLACEHOLDER_RE = /^<media:audio>(\s*\([^)]*\))?$/i;
+const AUDIO_HEADER_RE = /^\[Audio\b/i;
+
+const normalizeMediaType = (value: string): string => value.split(";")[0]?.trim().toLowerCase();
+
+const isInboundAudioContext = (ctx: FinalizedMsgContext): boolean => {
+  const rawTypes = [
+    typeof ctx.MediaType === "string" ? ctx.MediaType : undefined,
+    ...(Array.isArray(ctx.MediaTypes) ? ctx.MediaTypes : []),
+  ].filter(Boolean) as string[];
+  const types = rawTypes.map((type) => normalizeMediaType(type));
+  if (types.some((type) => type === "audio" || type.startsWith("audio/"))) return true;
+
+  const body =
+    typeof ctx.BodyForCommands === "string"
+      ? ctx.BodyForCommands
+      : typeof ctx.CommandBody === "string"
+        ? ctx.CommandBody
+        : typeof ctx.RawBody === "string"
+          ? ctx.RawBody
+          : typeof ctx.Body === "string"
+            ? ctx.Body
+            : "";
+  const trimmed = body.trim();
+  if (!trimmed) return false;
+  if (AUDIO_PLACEHOLDER_RE.test(trimmed)) return true;
+  return AUDIO_HEADER_RE.test(trimmed);
+};
+
+const resolveSessionTtsAuto = (
+  ctx: FinalizedMsgContext,
+  cfg: ClawdbotConfig,
+): string | undefined => {
+  const targetSessionKey =
+    ctx.CommandSource === "native" ? ctx.CommandTargetSessionKey?.trim() : undefined;
+  const sessionKey = (targetSessionKey ?? ctx.SessionKey)?.trim();
+  if (!sessionKey) return undefined;
+  const agentId = resolveSessionAgentId({ sessionKey, config: cfg });
+  const storePath = resolveStorePath(cfg.session?.store, { agentId });
+  try {
+    const store = loadSessionStore(storePath);
+    const entry = store[sessionKey.toLowerCase()] ?? store[sessionKey];
+    return normalizeTtsAutoMode(entry?.ttsAuto);
+  } catch {
+    return undefined;
+  }
+};
 
 export type DispatchFromConfigResult = {
   queuedFinal: boolean;
@@ -81,6 +131,8 @@ export async function dispatchReplyFromConfig(params: {
     return { queuedFinal: false, counts: dispatcher.getQueuedCounts() };
   }
 
+  const inboundAudio = isInboundAudioContext(ctx);
+  const sessionTtsAuto = resolveSessionTtsAuto(ctx, cfg);
   const hookRunner = getGlobalHookRunner();
   if (hookRunner?.hasHooks("message_received")) {
     const timestamp =
@@ -223,6 +275,8 @@ export async function dispatchReplyFromConfig(params: {
               cfg,
               channel: ttsChannel,
               kind: "tool",
+              inboundAudio,
+              ttsAuto: sessionTtsAuto,
             });
             if (shouldRouteToOriginating) {
               await sendPayloadAsync(ttsPayload);
@@ -239,6 +293,8 @@ export async function dispatchReplyFromConfig(params: {
               cfg,
               channel: ttsChannel,
               kind: "block",
+              inboundAudio,
+              ttsAuto: sessionTtsAuto,
             });
             if (shouldRouteToOriginating) {
               await sendPayloadAsync(ttsPayload, context?.abortSignal);
@@ -262,6 +318,8 @@ export async function dispatchReplyFromConfig(params: {
         cfg,
         channel: ttsChannel,
         kind: "final",
+        inboundAudio,
+        ttsAuto: sessionTtsAuto,
       });
       if (shouldRouteToOriginating && originatingChannel && originatingTo) {
         // Route final reply to originating channel.
diff --git a/src/auto-reply/reply/session.ts b/src/auto-reply/reply/session.ts
index da8ca8acf..45f37afdb 100644
--- a/src/auto-reply/reply/session.ts
+++ b/src/auto-reply/reply/session.ts
@@ -5,6 +5,7 @@ import path from "node:path";
 import { CURRENT_SESSION_VERSION, SessionManager } from "@mariozechner/pi-coding-agent";
 import { resolveSessionAgentId } from "../../agents/agent-scope.js";
 import type { ClawdbotConfig } from "../../config/config.js";
+import type { TtsAutoMode } from "../../config/types.tts.js";
 import {
   DEFAULT_RESET_TRIGGERS,
   deriveSessionMetaPatch,
@@ -128,6 +129,7 @@ export async function initSessionState(params: {
   let persistedThinking: string | undefined;
   let persistedVerbose: string | undefined;
   let persistedReasoning: string | undefined;
+  let persistedTtsAuto: TtsAutoMode | undefined;
   let persistedModelOverride: string | undefined;
   let persistedProviderOverride: string | undefined;
 
@@ -220,6 +222,7 @@ export async function initSessionState(params: {
     persistedThinking = entry.thinkingLevel;
     persistedVerbose = entry.verboseLevel;
     persistedReasoning = entry.reasoningLevel;
+    persistedTtsAuto = entry.ttsAuto;
     persistedModelOverride = entry.modelOverride;
     persistedProviderOverride = entry.providerOverride;
   } else {
@@ -258,6 +261,7 @@ export async function initSessionState(params: {
     thinkingLevel: persistedThinking ?? baseEntry?.thinkingLevel,
     verboseLevel: persistedVerbose ?? baseEntry?.verboseLevel,
     reasoningLevel: persistedReasoning ?? baseEntry?.reasoningLevel,
+    ttsAuto: persistedTtsAuto ?? baseEntry?.ttsAuto,
     responseUsage: baseEntry?.responseUsage,
     modelOverride: persistedModelOverride ?? baseEntry?.modelOverride,
     providerOverride: persistedProviderOverride ?? baseEntry?.providerOverride,
diff --git a/src/auto-reply/status.ts b/src/auto-reply/status.ts
index 410e2f38c..b6f0f44db 100644
--- a/src/auto-reply/status.ts
+++ b/src/auto-reply/status.ts
@@ -17,7 +17,7 @@ import {
   getTtsMaxLength,
   getTtsProvider,
   isSummarizationEnabled,
-  isTtsEnabled,
+  resolveTtsAutoMode,
   resolveTtsConfig,
   resolveTtsPrefsPath,
 } from "../tts/tts.js";
@@ -252,15 +252,23 @@ const formatMediaUnderstandingLine = (decisions?: MediaUnderstandingDecision[])
   return `📎 Media: ${parts.join(" · ")}`;
 };
 
-const formatVoiceModeLine = (config?: ClawdbotConfig): string | null => {
+const formatVoiceModeLine = (
+  config?: ClawdbotConfig,
+  sessionEntry?: SessionEntry,
+): string | null => {
   if (!config) return null;
   const ttsConfig = resolveTtsConfig(config);
   const prefsPath = resolveTtsPrefsPath(ttsConfig);
-  if (!isTtsEnabled(ttsConfig, prefsPath)) return null;
+  const autoMode = resolveTtsAutoMode({
+    config: ttsConfig,
+    prefsPath,
+    sessionAuto: sessionEntry?.ttsAuto,
+  });
+  if (autoMode === "off") return null;
   const provider = getTtsProvider(ttsConfig, prefsPath);
   const maxLength = getTtsMaxLength(prefsPath);
   const summarize = isSummarizationEnabled(prefsPath) ? "on" : "off";
-  return `🔊 Voice: on · provider=${provider} · limit=${maxLength} · summary=${summarize}`;
+  return `🔊 Voice: ${autoMode} · provider=${provider} · limit=${maxLength} · summary=${summarize}`;
 };
 
 export function buildStatusMessage(args: StatusArgs): string {
@@ -398,7 +406,7 @@ export function buildStatusMessage(args: StatusArgs): string {
   const usageCostLine =
     usagePair && costLine ? `${usagePair} · ${costLine}` : (usagePair ?? costLine);
   const mediaLine = formatMediaUnderstandingLine(args.mediaDecisions);
-  const voiceLine = formatVoiceModeLine(args.config);
+  const voiceLine = formatVoiceModeLine(args.config, args.sessionEntry);
 
   return [
     versionLine,
diff --git a/src/config/config.legacy-config-detection.accepts-imessage-dmpolicy.test.ts b/src/config/config.legacy-config-detection.accepts-imessage-dmpolicy.test.ts
index 58bf62425..8abd285ee 100644
--- a/src/config/config.legacy-config-detection.accepts-imessage-dmpolicy.test.ts
+++ b/src/config/config.legacy-config-detection.accepts-imessage-dmpolicy.test.ts
@@ -138,6 +138,16 @@ describe("legacy config detection", () => {
     expect(res.config?.channels?.telegram?.groups?.["*"]?.requireMention).toBe(false);
     expect(res.config?.channels?.telegram?.requireMention).toBeUndefined();
   });
+  it("migrates messages.tts.enabled to messages.tts.auto", async () => {
+    vi.resetModules();
+    const { migrateLegacyConfig } = await import("./config.js");
+    const res = migrateLegacyConfig({
+      messages: { tts: { enabled: true } },
+    });
+    expect(res.changes).toContain("Moved messages.tts.enabled → messages.tts.auto (always).");
+    expect(res.config?.messages?.tts?.auto).toBe("always");
+    expect(res.config?.messages?.tts?.enabled).toBeUndefined();
+  });
   it("migrates legacy model config to agent.models + model lists", async () => {
     vi.resetModules();
     const { migrateLegacyConfig } = await import("./config.js");
diff --git a/src/config/legacy.migrations.part-3.ts b/src/config/legacy.migrations.part-3.ts
index fc34b1768..9db9e3ede 100644
--- a/src/config/legacy.migrations.part-3.ts
+++ b/src/config/legacy.migrations.part-3.ts
@@ -40,6 +40,26 @@ export const LEGACY_CONFIG_MIGRATIONS_PART_3: LegacyConfigMigration[] = [
       delete tools.bash;
     },
   },
+  {
+    id: "messages.tts.enabled->auto",
+    describe: "Move messages.tts.enabled to messages.tts.auto",
+    apply: (raw, changes) => {
+      const messages = getRecord(raw.messages);
+      const tts = getRecord(messages?.tts);
+      if (!tts) return;
+      if (tts.auto !== undefined) {
+        if ("enabled" in tts) {
+          delete tts.enabled;
+          changes.push("Removed messages.tts.enabled (messages.tts.auto already set).");
+        }
+        return;
+      }
+      if (typeof tts.enabled !== "boolean") return;
+      tts.auto = tts.enabled ? "always" : "off";
+      delete tts.enabled;
+      changes.push(`Moved messages.tts.enabled → messages.tts.auto (${String(tts.auto)}).`);
+    },
+  },
   {
     id: "agent.defaults-v2",
     describe: "Move agent config to agents.defaults and tools",
diff --git a/src/config/legacy.rules.ts b/src/config/legacy.rules.ts
index 1ec76bc79..4de788a69 100644
--- a/src/config/legacy.rules.ts
+++ b/src/config/legacy.rules.ts
@@ -120,6 +120,10 @@ export const LEGACY_CONFIG_RULES: LegacyConfigRule[] = [
     message:
       "agent.imageModelFallbacks was replaced by agents.defaults.imageModel.fallbacks (auto-migrated on load).",
   },
+  {
+    path: ["messages", "tts", "enabled"],
+    message: "messages.tts.enabled was replaced by messages.tts.auto (auto-migrated on load).",
+  },
   {
     path: ["gateway", "token"],
     message: "gateway.token is ignored; use gateway.auth.token instead (auto-migrated on load).",
diff --git a/src/config/sessions/types.ts b/src/config/sessions/types.ts
index f7ed268ec..48ce428c1 100644
--- a/src/config/sessions/types.ts
+++ b/src/config/sessions/types.ts
@@ -4,6 +4,7 @@ import type { Skill } from "@mariozechner/pi-coding-agent";
 import type { NormalizedChatType } from "../../channels/chat-type.js";
 import type { ChannelId } from "../../channels/plugins/types.js";
 import type { DeliveryContext } from "../../utils/delivery-context.js";
+import type { TtsAutoMode } from "../types.tts.js";
 
 export type SessionScope = "per-sender" | "global";
 
@@ -42,6 +43,7 @@ export type SessionEntry = {
   verboseLevel?: string;
   reasoningLevel?: string;
   elevatedLevel?: string;
+  ttsAuto?: TtsAutoMode;
   execHost?: string;
   execSecurity?: string;
   execAsk?: string;
diff --git a/src/config/types.tts.ts b/src/config/types.tts.ts
index 28b65c96d..4eb4989b9 100644
--- a/src/config/types.tts.ts
+++ b/src/config/types.tts.ts
@@ -2,6 +2,8 @@ export type TtsProvider = "elevenlabs" | "openai" | "edge";
 
 export type TtsMode = "final" | "all";
 
+export type TtsAutoMode = "off" | "always" | "inbound" | "tagged";
+
 export type TtsModelOverrideConfig = {
   /** Enable model-provided overrides for TTS. */
   enabled?: boolean;
@@ -22,7 +24,9 @@ export type TtsModelOverrideConfig = {
 };
 
 export type TtsConfig = {
-  /** Enable auto-TTS (can be overridden by local prefs). */
+  /** Auto-TTS mode (preferred). */
+  auto?: TtsAutoMode;
+  /** Legacy: enable auto-TTS when `auto` is not set. */
   enabled?: boolean;
   /** Apply TTS to final replies only or to all replies (tool/block/final). */
   mode?: TtsMode;
diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts
index bcf769b67..4a8c80bcc 100644
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -158,8 +158,10 @@ export const MarkdownConfigSchema = z
 
 export const TtsProviderSchema = z.enum(["elevenlabs", "openai", "edge"]);
 export const TtsModeSchema = z.enum(["final", "all"]);
+export const TtsAutoSchema = z.enum(["off", "always", "inbound", "tagged"]);
 export const TtsConfigSchema = z
   .object({
+    auto: TtsAutoSchema.optional(),
     enabled: z.boolean().optional(),
     mode: TtsModeSchema.optional(),
     provider: TtsProviderSchema.optional(),
diff --git a/src/discord/monitor/message-handler.process.ts b/src/discord/monitor/message-handler.process.ts
index 0be4b6d84..6d502be21 100644
--- a/src/discord/monitor/message-handler.process.ts
+++ b/src/discord/monitor/message-handler.process.ts
@@ -136,9 +136,8 @@ export async function processDiscordMessage(ctx: DiscordMessagePreflightContext)
   const forumParentSlug =
     isForumParent && threadParentName ? normalizeDiscordSlug(threadParentName) : "";
   const threadChannelId = threadChannel?.id;
-  const isForumStarter = Boolean(
-    threadChannelId && isForumParent && forumParentSlug && message.id === threadChannelId,
-  );
+  const isForumStarter =
+    Boolean(threadChannelId && isForumParent && forumParentSlug) && message.id === threadChannelId;
   const forumContextLine = isForumStarter ? `[Forum parent: #${forumParentSlug}]` : null;
   const groupChannel = isGuildMessage && displayChannelSlug ? `#${displayChannelSlug}` : undefined;
   const groupSubject = isDirectMessage ? undefined : groupChannel;
diff --git a/src/gateway/server-methods/tts.ts b/src/gateway/server-methods/tts.ts
index e70fb112f..5e4e8254e 100644
--- a/src/gateway/server-methods/tts.ts
+++ b/src/gateway/server-methods/tts.ts
@@ -5,6 +5,7 @@ import {
   getTtsProvider,
   isTtsEnabled,
   isTtsProviderConfigured,
+  resolveTtsAutoMode,
   resolveTtsApiKey,
   resolveTtsConfig,
   resolveTtsPrefsPath,
@@ -24,11 +25,13 @@ export const ttsHandlers: GatewayRequestHandlers = {
       const config = resolveTtsConfig(cfg);
       const prefsPath = resolveTtsPrefsPath(config);
       const provider = getTtsProvider(config, prefsPath);
+      const autoMode = resolveTtsAutoMode({ config, prefsPath });
       const fallbackProviders = resolveTtsProviderOrder(provider)
         .slice(1)
         .filter((candidate) => isTtsProviderConfigured(config, candidate));
       respond(true, {
         enabled: isTtsEnabled(config, prefsPath),
+        auto: autoMode,
         provider,
         fallbackProvider: fallbackProviders[0] ?? null,
         fallbackProviders,
diff --git a/src/tts/tts.test.ts b/src/tts/tts.test.ts
index fafe3bbdf..a8c9dce9c 100644
--- a/src/tts/tts.test.ts
+++ b/src/tts/tts.test.ts
@@ -4,7 +4,7 @@ import { completeSimple } from "@mariozechner/pi-ai";
 
 import { getApiKeyForModel } from "../agents/model-auth.js";
 import { resolveModel } from "../agents/pi-embedded-runner/model.js";
-import { _test, getTtsProvider, resolveTtsConfig } from "./tts.js";
+import * as tts from "./tts.js";
 
 vi.mock("@mariozechner/pi-ai", () => ({
   completeSimple: vi.fn(),
@@ -37,6 +37,8 @@ vi.mock("../agents/model-auth.js", () => ({
   requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? ""),
 }));
 
+const { _test, resolveTtsConfig, maybeApplyTtsToPayload, getTtsProvider } = tts;
+
 const {
   isValidVoiceId,
   isValidOpenAIVoice,
@@ -431,4 +433,129 @@ describe("tts", () => {
       );
     });
   });
+
+  describe("maybeApplyTtsToPayload", () => {
+    const baseCfg = {
+      agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } },
+      messages: {
+        tts: {
+          auto: "inbound",
+          provider: "openai",
+          openai: { apiKey: "test-key", model: "gpt-4o-mini-tts", voice: "alloy" },
+        },
+      },
+    };
+
+    it("skips auto-TTS when inbound audio gating is on and the message is not audio", async () => {
+      const prevPrefs = process.env.CLAWDBOT_TTS_PREFS;
+      process.env.CLAWDBOT_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`;
+      const originalFetch = globalThis.fetch;
+      const fetchMock = vi.fn(async () => ({
+        ok: true,
+        arrayBuffer: async () => new ArrayBuffer(1),
+      }));
+      globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+      const payload = { text: "Hello world" };
+      const result = await maybeApplyTtsToPayload({
+        payload,
+        cfg: baseCfg,
+        kind: "final",
+        inboundAudio: false,
+      });
+
+      expect(result).toBe(payload);
+      expect(fetchMock).not.toHaveBeenCalled();
+
+      globalThis.fetch = originalFetch;
+      process.env.CLAWDBOT_TTS_PREFS = prevPrefs;
+    });
+
+    it("attempts auto-TTS when inbound audio gating is on and the message is audio", async () => {
+      const prevPrefs = process.env.CLAWDBOT_TTS_PREFS;
+      process.env.CLAWDBOT_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`;
+      const originalFetch = globalThis.fetch;
+      const fetchMock = vi.fn(async () => ({
+        ok: true,
+        arrayBuffer: async () => new ArrayBuffer(1),
+      }));
+      globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+      const result = await maybeApplyTtsToPayload({
+        payload: { text: "Hello world" },
+        cfg: baseCfg,
+        kind: "final",
+        inboundAudio: true,
+      });
+
+      expect(result.mediaUrl).toBeDefined();
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+
+      globalThis.fetch = originalFetch;
+      process.env.CLAWDBOT_TTS_PREFS = prevPrefs;
+    });
+
+    it("skips auto-TTS in tagged mode unless a tts tag is present", async () => {
+      const prevPrefs = process.env.CLAWDBOT_TTS_PREFS;
+      process.env.CLAWDBOT_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`;
+      const originalFetch = globalThis.fetch;
+      const fetchMock = vi.fn(async () => ({
+        ok: true,
+        arrayBuffer: async () => new ArrayBuffer(1),
+      }));
+      globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+      const cfg = {
+        ...baseCfg,
+        messages: {
+          ...baseCfg.messages,
+          tts: { ...baseCfg.messages.tts, auto: "tagged" },
+        },
+      };
+
+      const payload = { text: "Hello world" };
+      const result = await maybeApplyTtsToPayload({
+        payload,
+        cfg,
+        kind: "final",
+      });
+
+      expect(result).toBe(payload);
+      expect(fetchMock).not.toHaveBeenCalled();
+
+      globalThis.fetch = originalFetch;
+      process.env.CLAWDBOT_TTS_PREFS = prevPrefs;
+    });
+
+    it("runs auto-TTS in tagged mode when tags are present", async () => {
+      const prevPrefs = process.env.CLAWDBOT_TTS_PREFS;
+      process.env.CLAWDBOT_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`;
+      const originalFetch = globalThis.fetch;
+      const fetchMock = vi.fn(async () => ({
+        ok: true,
+        arrayBuffer: async () => new ArrayBuffer(1),
+      }));
+      globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+      const cfg = {
+        ...baseCfg,
+        messages: {
+          ...baseCfg.messages,
+          tts: { ...baseCfg.messages.tts, auto: "tagged" },
+        },
+      };
+
+      const result = await maybeApplyTtsToPayload({
+        payload: { text: "[[tts:text]]Hello world[[/tts:text]]" },
+        cfg,
+        kind: "final",
+      });
+
+      expect(result.mediaUrl).toBeDefined();
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+
+      globalThis.fetch = originalFetch;
+      process.env.CLAWDBOT_TTS_PREFS = prevPrefs;
+    });
+  });
 });
diff --git a/src/tts/tts.ts b/src/tts/tts.ts
index cf2823f95..5fa06f8d4 100644
--- a/src/tts/tts.ts
+++ b/src/tts/tts.ts
@@ -20,6 +20,7 @@ import type { ChannelId } from "../channels/plugins/types.js";
 import type { ClawdbotConfig } from "../config/config.js";
 import type {
   TtsConfig,
+  TtsAutoMode,
   TtsMode,
   TtsProvider,
   TtsModelOverrideConfig,
@@ -75,8 +76,10 @@ const DEFAULT_OUTPUT = {
   voiceCompatible: false,
 };
 
+const TTS_AUTO_MODES = new Set<TtsAutoMode>(["off", "always", "inbound", "tagged"]);
+
 export type ResolvedTtsConfig = {
-  enabled: boolean;
+  auto: TtsAutoMode;
   mode: TtsMode;
   provider: TtsProvider;
   providerSource: "config" | "default";
@@ -123,6 +126,7 @@ export type ResolvedTtsConfig = {
 
 type TtsUserPrefs = {
   tts?: {
+    auto?: TtsAutoMode;
     enabled?: boolean;
     provider?: TtsProvider;
     maxLength?: number;
@@ -161,6 +165,7 @@ type TtsDirectiveOverrides = {
 type TtsDirectiveParseResult = {
   cleanedText: string;
   ttsText?: string;
+  hasDirective: boolean;
   overrides: TtsDirectiveOverrides;
   warnings: string[];
 };
@@ -187,6 +192,15 @@ type TtsStatusEntry = {
 
 let lastTtsAttempt: TtsStatusEntry | undefined;
 
+export function normalizeTtsAutoMode(value: unknown): TtsAutoMode | undefined {
+  if (typeof value !== "string") return undefined;
+  const normalized = value.trim().toLowerCase();
+  if (TTS_AUTO_MODES.has(normalized as TtsAutoMode)) {
+    return normalized as TtsAutoMode;
+  }
+  return undefined;
+}
+
 function resolveModelOverridePolicy(
   overrides: TtsModelOverrideConfig | undefined,
 ): ResolvedTtsModelOverrides {
@@ -220,8 +234,9 @@ export function resolveTtsConfig(cfg: ClawdbotConfig): ResolvedTtsConfig {
   const raw: TtsConfig = cfg.messages?.tts ?? {};
   const providerSource = raw.provider ? "config" : "default";
   const edgeOutputFormat = raw.edge?.outputFormat?.trim();
+  const auto = normalizeTtsAutoMode(raw.auto) ?? (raw.enabled ? "always" : "off");
   return {
-    enabled: raw.enabled ?? false,
+    auto,
     mode: raw.mode ?? "final",
     provider: raw.provider ?? "edge",
     providerSource,
@@ -279,17 +294,48 @@ export function resolveTtsPrefsPath(config: ResolvedTtsConfig): string {
   return path.join(CONFIG_DIR, "settings", "tts.json");
 }
 
+function resolveTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefined {
+  const auto = normalizeTtsAutoMode(prefs.tts?.auto);
+  if (auto) return auto;
+  if (typeof prefs.tts?.enabled === "boolean") {
+    return prefs.tts.enabled ? "always" : "off";
+  }
+  return undefined;
+}
+
+export function resolveTtsAutoMode(params: {
+  config: ResolvedTtsConfig;
+  prefsPath: string;
+  sessionAuto?: string;
+}): TtsAutoMode {
+  const sessionAuto = normalizeTtsAutoMode(params.sessionAuto);
+  if (sessionAuto) return sessionAuto;
+  const prefsAuto = resolveTtsAutoModeFromPrefs(readPrefs(params.prefsPath));
+  if (prefsAuto) return prefsAuto;
+  return params.config.auto;
+}
+
 export function buildTtsSystemPromptHint(cfg: ClawdbotConfig): string | undefined {
   const config = resolveTtsConfig(cfg);
   const prefsPath = resolveTtsPrefsPath(config);
-  if (!isTtsEnabled(config, prefsPath)) return undefined;
+  const autoMode = resolveTtsAutoMode({ config, prefsPath });
+  if (autoMode === "off") return undefined;
   const maxLength = getTtsMaxLength(prefsPath);
   const summarize = isSummarizationEnabled(prefsPath) ? "on" : "off";
+  const autoHint =
+    autoMode === "inbound"
+      ? "Only use TTS when the user's last message includes audio/voice."
+      : autoMode === "tagged"
+        ? "Only use TTS when you include [[tts]] or [[tts:text]] tags."
+        : undefined;
   return [
     "Voice (TTS) is enabled.",
+    autoHint,
     `Keep spoken text ≤${maxLength} chars to avoid auto-summary (summary ${summarize}).`,
     "Use [[tts:...]] and optional [[tts:text]]...[[/tts:text]] to control voice/expressiveness.",
-  ].join("\n");
+  ]
+    .filter(Boolean)
+    .join("\n");
 }
 
 function readPrefs(prefsPath: string): TtsUserPrefs {
@@ -323,16 +369,25 @@ function updatePrefs(prefsPath: string, update: (prefs: TtsUserPrefs) => void):
   atomicWriteFileSync(prefsPath, JSON.stringify(prefs, null, 2));
 }
 
-export function isTtsEnabled(config: ResolvedTtsConfig, prefsPath: string): boolean {
-  const prefs = readPrefs(prefsPath);
-  if (prefs.tts?.enabled !== undefined) return prefs.tts.enabled === true;
-  return config.enabled;
+export function isTtsEnabled(
+  config: ResolvedTtsConfig,
+  prefsPath: string,
+  sessionAuto?: string,
+): boolean {
+  return resolveTtsAutoMode({ config, prefsPath, sessionAuto }) !== "off";
+}
+
+export function setTtsAutoMode(prefsPath: string, mode: TtsAutoMode): void {
+  updatePrefs(prefsPath, (prefs) => {
+    const next = { ...prefs.tts };
+    delete next.enabled;
+    next.auto = mode;
+    prefs.tts = next;
+  });
 }
 
 export function setTtsEnabled(prefsPath: string, enabled: boolean): void {
-  updatePrefs(prefsPath, (prefs) => {
-    prefs.tts = { ...prefs.tts, enabled };
-  });
+  setTtsAutoMode(prefsPath, enabled ? "always" : "off");
 }
 
 export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): TtsProvider {
@@ -485,15 +540,17 @@ function parseTtsDirectives(
   policy: ResolvedTtsModelOverrides,
 ): TtsDirectiveParseResult {
   if (!policy.enabled) {
-    return { cleanedText: text, overrides: {}, warnings: [] };
+    return { cleanedText: text, overrides: {}, warnings: [], hasDirective: false };
   }
 
   const overrides: TtsDirectiveOverrides = {};
   const warnings: string[] = [];
   let cleanedText = text;
+  let hasDirective = false;
 
   const blockRegex = /\[\[tts:text\]\]([\s\S]*?)\[\[\/tts:text\]\]/gi;
   cleanedText = cleanedText.replace(blockRegex, (_match, inner: string) => {
+    hasDirective = true;
     if (policy.allowText && overrides.ttsText == null) {
       overrides.ttsText = inner.trim();
     }
@@ -502,6 +559,7 @@ function parseTtsDirectives(
 
   const directiveRegex = /\[\[tts:([^\]]+)\]\]/gi;
   cleanedText = cleanedText.replace(directiveRegex, (_match, body: string) => {
+    hasDirective = true;
     const tokens = body.split(/\s+/).filter(Boolean);
     for (const token of tokens) {
       const eqIndex = token.indexOf("=");
@@ -672,6 +730,7 @@ function parseTtsDirectives(
   return {
     cleanedText,
     ttsText: overrides.ttsText,
+    hasDirective,
     overrides,
     warnings,
   };
@@ -1156,13 +1215,17 @@ export async function maybeApplyTtsToPayload(params: {
   cfg: ClawdbotConfig;
   channel?: string;
   kind?: "tool" | "block" | "final";
+  inboundAudio?: boolean;
+  ttsAuto?: string;
 }): Promise<ReplyPayload> {
   const config = resolveTtsConfig(params.cfg);
   const prefsPath = resolveTtsPrefsPath(config);
-  if (!isTtsEnabled(config, prefsPath)) return params.payload;
-
-  const mode = config.mode ?? "final";
-  if (mode === "final" && params.kind && params.kind !== "final") return params.payload;
+  const autoMode = resolveTtsAutoMode({
+    config,
+    prefsPath,
+    sessionAuto: params.ttsAuto,
+  });
+  if (autoMode === "off") return params.payload;
 
   const text = params.payload.text ?? "";
   const directives = parseTtsDirectives(text, config.modelOverrides);
@@ -1183,6 +1246,12 @@ export async function maybeApplyTtsToPayload(params: {
           text: visibleText.length > 0 ? visibleText : undefined,
         };
 
+  if (autoMode === "tagged" && !directives.hasDirective) return nextPayload;
+  if (autoMode === "inbound" && params.inboundAudio !== true) return nextPayload;
+
+  const mode = config.mode ?? "final";
+  if (mode === "final" && params.kind && params.kind !== "final") return nextPayload;
+
   if (!ttsText.trim()) return nextPayload;
   if (params.payload.mediaUrl || (params.payload.mediaUrls?.length ?? 0) > 0) return nextPayload;
   if (text.includes("MEDIA:")) return nextPayload;
@@ -1197,7 +1266,7 @@ export async function maybeApplyTtsToPayload(params: {
       logVerbose(
         `TTS: skipping long text (${textForAudio.length} > ${maxLength}), summarization disabled.`,
       );
-      return params.payload;
+      return nextPayload;
     }
 
     try {
@@ -1219,7 +1288,7 @@ export async function maybeApplyTtsToPayload(params: {
     } catch (err) {
       const error = err as Error;
       logVerbose(`TTS: summarization failed: ${error.message}`);
-      return params.payload;
+      return nextPayload;
     }
   }
 
diff --git a/src/types/node-edge-tts.d.ts b/src/types/node-edge-tts.d.ts
new file mode 100644
index 000000000..eaaaa9cdf
--- /dev/null
+++ b/src/types/node-edge-tts.d.ts
@@ -0,0 +1,18 @@
+declare module "node-edge-tts" {
+  export type EdgeTTSOptions = {
+    voice?: string;
+    lang?: string;
+    outputFormat?: string;
+    saveSubtitles?: boolean;
+    proxy?: string;
+    rate?: string;
+    pitch?: string;
+    volume?: string;
+    timeout?: number;
+  };
+
+  export class EdgeTTS {
+    constructor(options?: EdgeTTSOptions);
+    ttsPromise(text: string, outputPath: string): Promise<void>;
+  }
+}
diff --git a/src/web/inbound.media.test.ts b/src/web/inbound.media.test.ts
index fcd53a68b..de23f10a9 100644
--- a/src/web/inbound.media.test.ts
+++ b/src/web/inbound.media.test.ts
@@ -127,9 +127,9 @@ describe("web inbound media saves with extension", () => {
     realSock.ev.emit("messages.upsert", upsert);
 
     // Allow a brief window for the async handler to fire on slower hosts.
-    for (let i = 0; i < 10; i++) {
+    for (let i = 0; i < 50; i++) {
       if (onMessage.mock.calls.length > 0) break;
-      await new Promise((resolve) => setTimeout(resolve, 5));
+      await new Promise((resolve) => setTimeout(resolve, 10));
     }
 
     expect(onMessage).toHaveBeenCalledTimes(1);
@@ -178,9 +178,9 @@ describe("web inbound media saves with extension", () => {
 
     realSock.ev.emit("messages.upsert", upsert);
 
-    for (let i = 0; i < 10; i++) {
+    for (let i = 0; i < 50; i++) {
       if (onMessage.mock.calls.length > 0) break;
-      await new Promise((resolve) => setTimeout(resolve, 5));
+      await new Promise((resolve) => setTimeout(resolve, 10));
     }
 
     expect(onMessage).toHaveBeenCalledTimes(1);
@@ -218,9 +218,9 @@ describe("web inbound media saves with extension", () => {
 
     realSock.ev.emit("messages.upsert", upsert);
 
-    for (let i = 0; i < 10; i++) {
+    for (let i = 0; i < 50; i++) {
       if (onMessage.mock.calls.length > 0) break;
-      await new Promise((resolve) => setTimeout(resolve, 5));
+      await new Promise((resolve) => setTimeout(resolve, 10));
     }
 
     expect(onMessage).toHaveBeenCalledTimes(1);