From d9a467fe3b315e76a355491b1d3cfb58ead2145b Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sat, 24 Jan 2026 07:57:46 +0000
Subject: [PATCH] feat: move TTS into core (#1559) (thanks @Glucksberg)

---
 CHANGELOG.md                                 |    1 +
 docs/gateway/configuration.md                |   38 +
 docs/tools/slash-commands.md                 |    7 +
 extensions/telegram-tts/README.md            |  146 ---
 extensions/telegram-tts/clawdbot.plugin.json |  117 --
 extensions/telegram-tts/index.test.ts        |  218 ----
 extensions/telegram-tts/index.ts             | 1042 ------------------
 extensions/telegram-tts/package.json         |    8 -
 src/agents/clawdbot-tools.ts                 |    5 +
 src/agents/tools/tts-tool.ts                 |   60 +
 src/auto-reply/commands-registry.data.ts     |   75 ++
 src/auto-reply/reply/commands-core.ts        |    2 +
 src/auto-reply/reply/commands-tts.ts         |  214 ++++
 src/auto-reply/reply/dispatch-from-config.ts |   54 +-
 src/auto-reply/reply/route-reply.ts          |   43 -
 src/config/types.messages.ts                 |    3 +
 src/config/types.ts                          |    1 +
 src/config/types.tts.ts                      |   30 +
 src/config/zod-schema.core.ts                |   30 +
 src/config/zod-schema.session.ts             |    2 +
 src/gateway/server-methods-list.ts           |    6 +
 src/gateway/server-methods.ts                |    8 +
 src/gateway/server-methods/tts.ts            |  138 +++
 src/telegram/bot/delivery.ts                 |   59 -
 src/tts/tts.test.ts                          |  234 ++++
 src/tts/tts.ts                               |  630 +++++++++++
 26 files changed, 1522 insertions(+), 1649 deletions(-)
 delete mode 100644 extensions/telegram-tts/README.md
 delete mode 100644 extensions/telegram-tts/clawdbot.plugin.json
 delete mode 100644 extensions/telegram-tts/index.test.ts
 delete mode 100644 extensions/telegram-tts/index.ts
 delete mode 100644 extensions/telegram-tts/package.json
 create mode 100644 src/agents/tools/tts-tool.ts
 create mode 100644 src/auto-reply/reply/commands-tts.ts
 create mode 100644 src/config/types.tts.ts
 create mode 100644 src/gateway/server-methods/tts.ts
 create mode 100644 src/tts/tts.test.ts
 create mode 100644 src/tts/tts.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 965d5ea07..a0c945eea 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ Docs: https://docs.clawd.bot
 - Markdown: add per-channel table conversion (bullets for Signal/WhatsApp, code blocks elsewhere). (#1495) Thanks @odysseus0.
 - Tlon: add Urbit channel plugin (DMs, group mentions, thread replies). (#1544) Thanks @wca4a.
 - Channels: allow per-group tool allow/deny policies across built-in + plugin channels. (#1546) Thanks @adam91holt.
+- TTS: move Telegram TTS into core with auto-replies, commands, and gateway methods. (#1559) Thanks @Glucksberg.
 
 ### Fixes
 - Skills: gate bird Homebrew install to macOS. (#1569) Thanks @bradleypriest.
diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md
index ab41221a7..59d332190 100644
--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@@ -1446,6 +1446,44 @@ active agent’s `identity.emoji` when set, otherwise `"👀"`. Set it to `""` t
 `removeAckAfterReply` removes the bot’s ack reaction after a reply is sent
 (Slack/Discord/Telegram only). Default: `false`.
 
+#### `messages.tts`
+
+Enable text-to-speech for outbound replies. When on, Clawdbot generates audio
+using ElevenLabs or OpenAI and attaches it to responses. Telegram uses Opus
+voice notes; other channels send MP3 audio.
+
+```json5
+{
+  messages: {
+    tts: {
+      enabled: true,
+      mode: "final", // final | all (include tool/block replies)
+      provider: "elevenlabs",
+      maxTextLength: 4000,
+      timeoutMs: 30000,
+      prefsPath: "~/.clawdbot/settings/tts.json",
+      elevenlabs: {
+        apiKey: "elevenlabs_api_key",
+        voiceId: "voice_id",
+        modelId: "eleven_multilingual_v2"
+      },
+      openai: {
+        apiKey: "openai_api_key",
+        model: "gpt-4o-mini-tts",
+        voice: "alloy"
+      }
+    }
+  }
+}
+```
+
+Notes:
+- `messages.tts.enabled` can be overridden by local user prefs (see `/tts_on`, `/tts_off`).
+- `prefsPath` stores local overrides (enabled/provider/limit/summarize).
+- `maxTextLength` is a hard cap for TTS input; summaries are truncated to fit.
+- `/tts_limit` and `/tts_summary` control per-user summarization settings.
+- `apiKey` values fall back to `ELEVENLABS_API_KEY`/`XI_API_KEY` and `OPENAI_API_KEY`.
+
 ### `talk`
 
 Defaults for Talk mode (macOS/iOS/Android). Voice IDs fall back to `ELEVENLABS_VOICE_ID` or `SAG_VOICE_ID` when unset.
diff --git a/docs/tools/slash-commands.md b/docs/tools/slash-commands.md
index 6ab3c87aa..b8ccb7c83 100644
--- a/docs/tools/slash-commands.md
+++ b/docs/tools/slash-commands.md
@@ -67,6 +67,13 @@ Text + native (when enabled):
 - `/config show|get|set|unset` (persist config to disk, owner-only; requires `commands.config: true`)
 - `/debug show|set|unset|reset` (runtime overrides, owner-only; requires `commands.debug: true`)
 - `/usage off|tokens|full|cost` (per-response usage footer or local cost summary)
+- `/tts_on` (enable TTS replies)
+- `/tts_off` (disable TTS replies)
+- `/tts_provider [openai|elevenlabs]` (set or show TTS provider)
+- `/tts_limit <chars>` (max chars before TTS summarization)
+- `/tts_summary on|off` (toggle TTS auto-summary)
+- `/tts_status` (show TTS status)
+- `/audio <text>` (convert text to a TTS audio reply)
 - `/stop`
 - `/restart`
 - `/dock-telegram` (alias: `/dock_telegram`) (switch replies to Telegram)
diff --git a/extensions/telegram-tts/README.md b/extensions/telegram-tts/README.md
deleted file mode 100644
index 0ea774bab..000000000
--- a/extensions/telegram-tts/README.md
+++ /dev/null
@@ -1,146 +0,0 @@
-# Telegram TTS Extension
-
-Automatic text-to-speech for chat responses using ElevenLabs or OpenAI.
-
-## Features
-
-- **Auto-TTS**: Automatically converts all text responses to voice when enabled
-- **`speak` Tool**: Converts text to speech and sends as voice message
-- **RPC Methods**: Control TTS via Gateway (`tts.status`, `tts.enable`, `tts.disable`, `tts.convert`, `tts.providers`)
-- **User Commands**: `/tts_on`, `/tts_off`, `/tts_provider`, `/tts_limit`, `/tts_summary`, `/tts_status`
-- **Auto-Summarization**: Long texts are automatically summarized before TTS conversion
-- **Multi-provider**: ElevenLabs and OpenAI TTS with automatic fallback
-- **Self-contained**: No external CLI dependencies - calls APIs directly
-
-## Requirements
-
-- **For TTS**: ElevenLabs API key OR OpenAI API key
-- **For Auto-Summarization**: OpenAI API key (uses gpt-4o-mini to summarize long texts)
-
-## Installation
-
-The extension is bundled with Clawdbot. Enable it in your config:
-
-```json
-{
-  "plugins": {
-    "entries": {
-      "telegram-tts": {
-        "enabled": true,
-        "provider": "elevenlabs",
-        "elevenlabs": {
-          "apiKey": "your-api-key"
-        }
-      }
-    }
-  }
-}
-```
-
-Or use OpenAI:
-
-```json
-{
-  "plugins": {
-    "entries": {
-      "telegram-tts": {
-        "enabled": true,
-        "provider": "openai",
-        "openai": {
-          "apiKey": "your-api-key",
-          "voice": "nova"
-        }
-      }
-    }
-  }
-}
-```
-
-Or set API keys via environment variables:
-
-```bash
-# For ElevenLabs
-export ELEVENLABS_API_KEY=your-api-key
-# or
-export XI_API_KEY=your-api-key
-
-# For OpenAI
-export OPENAI_API_KEY=your-api-key
-```
-
-## Configuration
-
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `enabled` | boolean | `false` | Enable the plugin |
-| `provider` | string | `"openai"` | TTS provider (`elevenlabs` or `openai`) |
-| `elevenlabs.apiKey` | string | - | ElevenLabs API key |
-| `elevenlabs.voiceId` | string | `"pMsXgVXv3BLzUgSXRplE"` | ElevenLabs Voice ID |
-| `elevenlabs.modelId` | string | `"eleven_multilingual_v2"` | ElevenLabs Model ID |
-| `openai.apiKey` | string | - | OpenAI API key |
-| `openai.model` | string | `"gpt-4o-mini-tts"` | OpenAI model (`gpt-4o-mini-tts`, `tts-1`, or `tts-1-hd`) |
-| `openai.voice` | string | `"alloy"` | OpenAI voice |
-| `prefsPath` | string | `~/clawd/.user-preferences.json` | User preferences file |
-| `maxTextLength` | number | `4000` | Max characters for TTS |
-| `timeoutMs` | number | `30000` | API request timeout in milliseconds |
-
-### OpenAI Voices
-
-Available voices: `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`
-
-## Usage
-
-### Agent Tool
-
-The agent can use the `speak` tool to send voice messages:
-
-```
-User: Send me a voice message saying hello
-Agent: [calls speak({ text: "Hello! How can I help you today?" })]
-```
-
-### RPC Methods
-
-```bash
-# Check TTS status
-clawdbot gateway call tts.status
-
-# Enable/disable TTS
-clawdbot gateway call tts.enable
-clawdbot gateway call tts.disable
-
-# Convert text to audio
-clawdbot gateway call tts.convert '{"text": "Hello world"}'
-
-# List available providers
-clawdbot gateway call tts.providers
-```
-
-### Telegram Commands
-
-The plugin registers the following commands automatically:
-
-| Command | Description |
-|---------|-------------|
-| `/tts_on` | Enable auto-TTS for all responses |
-| `/tts_off` | Disable auto-TTS |
-| `/tts_provider [openai\|elevenlabs]` | Switch TTS provider (with fallback) |
-| `/tts_limit [chars]` | Set max text length before summarization (default: 1500) |
-| `/tts_summary [on\|off]` | Enable/disable auto-summarization for long texts |
-| `/tts_status` | Show TTS status, config, and last attempt result |
-
-## Auto-Summarization
-
-When enabled (default), texts exceeding the configured limit are automatically summarized using OpenAI's gpt-4o-mini before TTS conversion. This ensures long responses can still be converted to audio.
-
-**Requirements**: OpenAI API key must be configured for summarization to work, even if using ElevenLabs for TTS.
-
-**Behavior**:
-- Texts under the limit are converted directly
-- Texts over the limit are summarized first, then converted
-- If summarization is disabled (`/tts_summary off`), long texts are skipped (no audio)
-- After summarization, a hard limit is applied to prevent oversized TTS requests
-
-## License
-
-MIT
diff --git a/extensions/telegram-tts/clawdbot.plugin.json b/extensions/telegram-tts/clawdbot.plugin.json
deleted file mode 100644
index c92258cd0..000000000
--- a/extensions/telegram-tts/clawdbot.plugin.json
+++ /dev/null
@@ -1,117 +0,0 @@
-{
-  "id": "telegram-tts",
-  "uiHints": {
-    "enabled": {
-      "label": "Enable TTS",
-      "help": "Automatically convert text responses to voice messages"
-    },
-    "provider": {
-      "label": "TTS Provider",
-      "help": "Choose between ElevenLabs or OpenAI for voice synthesis"
-    },
-    "elevenlabs.apiKey": {
-      "label": "ElevenLabs API Key",
-      "sensitive": true
-    },
-    "elevenlabs.voiceId": {
-      "label": "ElevenLabs Voice ID",
-      "help": "Default: pMsXgVXv3BLzUgSXRplE (Borislav)"
-    },
-    "elevenlabs.modelId": {
-      "label": "ElevenLabs Model ID",
-      "help": "Default: eleven_multilingual_v2"
-    },
-    "openai.apiKey": {
-      "label": "OpenAI API Key",
-      "sensitive": true
-    },
-    "openai.model": {
-      "label": "OpenAI TTS Model",
-      "help": "gpt-4o-mini-tts (recommended)"
-    },
-    "openai.voice": {
-      "label": "OpenAI Voice",
-      "help": "alloy, echo, fable, onyx, nova, or shimmer"
-    },
-    "prefsPath": {
-      "label": "User Preferences File",
-      "help": "Path to JSON file storing TTS state",
-      "advanced": true
-    },
-    "maxTextLength": {
-      "label": "Max Text Length",
-      "help": "Maximum characters to convert to speech",
-      "advanced": true
-    },
-    "timeoutMs": {
-      "label": "Request Timeout (ms)",
-      "help": "Maximum time to wait for TTS API response (default: 30000)",
-      "advanced": true
-    }
-  },
-  "configSchema": {
-    "type": "object",
-    "additionalProperties": false,
-    "properties": {
-      "enabled": {
-        "type": "boolean",
-        "default": false
-      },
-      "provider": {
-        "type": "string",
-        "enum": ["elevenlabs", "openai"],
-        "default": "elevenlabs"
-      },
-      "elevenlabs": {
-        "type": "object",
-        "additionalProperties": false,
-        "properties": {
-          "apiKey": {
-            "type": "string"
-          },
-          "voiceId": {
-            "type": "string",
-            "default": "pMsXgVXv3BLzUgSXRplE"
-          },
-          "modelId": {
-            "type": "string",
-            "default": "eleven_multilingual_v2"
-          }
-        }
-      },
-      "openai": {
-        "type": "object",
-        "additionalProperties": false,
-        "properties": {
-          "apiKey": {
-            "type": "string"
-          },
-          "model": {
-            "type": "string",
-            "enum": ["gpt-4o-mini-tts"],
-            "default": "gpt-4o-mini-tts"
-          },
-          "voice": {
-            "type": "string",
-            "enum": ["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
-            "default": "alloy"
-          }
-        }
-      },
-      "prefsPath": {
-        "type": "string"
-      },
-      "maxTextLength": {
-        "type": "integer",
-        "minimum": 1,
-        "default": 4000
-      },
-      "timeoutMs": {
-        "type": "integer",
-        "minimum": 1000,
-        "maximum": 120000,
-        "default": 30000
-      }
-    }
-  }
-}
diff --git a/extensions/telegram-tts/index.test.ts b/extensions/telegram-tts/index.test.ts
deleted file mode 100644
index add0d38c1..000000000
--- a/extensions/telegram-tts/index.test.ts
+++ /dev/null
@@ -1,218 +0,0 @@
-/**
- * Unit tests for telegram-tts extension
- */
-
-import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
-import { _test, meta } from "./index.js";
-
-const { isValidVoiceId, isValidOpenAIVoice, isValidOpenAIModel, OPENAI_TTS_MODELS, summarizeText } = _test;
-
-describe("telegram-tts", () => {
-  describe("meta", () => {
-    it("should have correct plugin metadata", () => {
-      expect(meta.id).toBe("telegram-tts");
-      expect(meta.name).toBe("Telegram TTS");
-      expect(meta.version).toMatch(/^\d+\.\d+\.\d+$/);
-    });
-  });
-
-  describe("isValidVoiceId", () => {
-    it("should accept valid ElevenLabs voice IDs", () => {
-      // Real ElevenLabs voice ID format (20 alphanumeric chars)
-      expect(isValidVoiceId("pMsXgVXv3BLzUgSXRplE")).toBe(true);
-      expect(isValidVoiceId("21m00Tcm4TlvDq8ikWAM")).toBe(true);
-      expect(isValidVoiceId("EXAVITQu4vr4xnSDxMaL")).toBe(true);
-    });
-
-    it("should accept voice IDs of varying valid lengths", () => {
-      expect(isValidVoiceId("a1b2c3d4e5")).toBe(true); // 10 chars (min)
-      expect(isValidVoiceId("a".repeat(40))).toBe(true); // 40 chars (max)
-    });
-
-    it("should reject too short voice IDs", () => {
-      expect(isValidVoiceId("")).toBe(false);
-      expect(isValidVoiceId("abc")).toBe(false);
-      expect(isValidVoiceId("123456789")).toBe(false); // 9 chars
-    });
-
-    it("should reject too long voice IDs", () => {
-      expect(isValidVoiceId("a".repeat(41))).toBe(false);
-      expect(isValidVoiceId("a".repeat(100))).toBe(false);
-    });
-
-    it("should reject voice IDs with invalid characters", () => {
-      expect(isValidVoiceId("pMsXgVXv3BLz-gSXRplE")).toBe(false); // hyphen
-      expect(isValidVoiceId("pMsXgVXv3BLz_gSXRplE")).toBe(false); // underscore
-      expect(isValidVoiceId("pMsXgVXv3BLz gSXRplE")).toBe(false); // space
-      expect(isValidVoiceId("../../../etc/passwd")).toBe(false); // path traversal
-      expect(isValidVoiceId("voice?param=value")).toBe(false); // query string
-    });
-  });
-
-  describe("isValidOpenAIVoice", () => {
-    it("should accept all valid OpenAI voices", () => {
-      const validVoices = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"];
-      for (const voice of validVoices) {
-        expect(isValidOpenAIVoice(voice)).toBe(true);
-      }
-    });
-
-    it("should reject invalid voice names", () => {
-      expect(isValidOpenAIVoice("invalid")).toBe(false);
-      expect(isValidOpenAIVoice("")).toBe(false);
-      expect(isValidOpenAIVoice("ALLOY")).toBe(false); // case sensitive
-      expect(isValidOpenAIVoice("alloy ")).toBe(false); // trailing space
-      expect(isValidOpenAIVoice(" alloy")).toBe(false); // leading space
-    });
-  });
-
-  describe("isValidOpenAIModel", () => {
-    it("should accept gpt-4o-mini-tts model", () => {
-      expect(isValidOpenAIModel("gpt-4o-mini-tts")).toBe(true);
-    });
-
-    it("should reject other models", () => {
-      expect(isValidOpenAIModel("tts-1")).toBe(false);
-      expect(isValidOpenAIModel("tts-1-hd")).toBe(false);
-      expect(isValidOpenAIModel("invalid")).toBe(false);
-      expect(isValidOpenAIModel("")).toBe(false);
-      expect(isValidOpenAIModel("gpt-4")).toBe(false);
-    });
-  });
-
-  describe("OPENAI_TTS_MODELS", () => {
-    it("should contain only gpt-4o-mini-tts", () => {
-      expect(OPENAI_TTS_MODELS).toContain("gpt-4o-mini-tts");
-      expect(OPENAI_TTS_MODELS).toHaveLength(1);
-    });
-
-    it("should be a non-empty array", () => {
-      expect(Array.isArray(OPENAI_TTS_MODELS)).toBe(true);
-      expect(OPENAI_TTS_MODELS.length).toBeGreaterThan(0);
-    });
-  });
-
-  describe("summarizeText", () => {
-    const mockApiKey = "test-api-key";
-    const originalFetch = globalThis.fetch;
-
-    beforeEach(() => {
-      vi.useFakeTimers({ shouldAdvanceTime: true });
-    });
-
-    afterEach(() => {
-      globalThis.fetch = originalFetch;
-      vi.useRealTimers();
-    });
-
-    it("should summarize text and return result with metrics", async () => {
-      const mockSummary = "This is a summarized version of the text.";
-      globalThis.fetch = vi.fn().mockResolvedValue({
-        ok: true,
-        json: () => Promise.resolve({
-          choices: [{ message: { content: mockSummary } }],
-        }),
-      });
-
-      const longText = "A".repeat(2000); // Text longer than default limit
-      const result = await summarizeText(longText, 1500, mockApiKey);
-
-      expect(result.summary).toBe(mockSummary);
-      expect(result.inputLength).toBe(2000);
-      expect(result.outputLength).toBe(mockSummary.length);
-      expect(result.latencyMs).toBeGreaterThanOrEqual(0);
-      expect(globalThis.fetch).toHaveBeenCalledTimes(1);
-    });
-
-    it("should call OpenAI API with correct parameters", async () => {
-      globalThis.fetch = vi.fn().mockResolvedValue({
-        ok: true,
-        json: () => Promise.resolve({
-          choices: [{ message: { content: "Summary" } }],
-        }),
-      });
-
-      await summarizeText("Long text to summarize", 500, mockApiKey);
-
-      expect(globalThis.fetch).toHaveBeenCalledWith(
-        "https://api.openai.com/v1/chat/completions",
-        expect.objectContaining({
-          method: "POST",
-          headers: {
-            Authorization: `Bearer ${mockApiKey}`,
-            "Content-Type": "application/json",
-          },
-        })
-      );
-
-      const callArgs = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
-      const body = JSON.parse(callArgs[1].body);
-      expect(body.model).toBe("gpt-4o-mini");
-      expect(body.temperature).toBe(0.3);
-      expect(body.max_tokens).toBe(250); // Math.ceil(500 / 2)
-    });
-
-    it("should reject targetLength below minimum (100)", async () => {
-      await expect(summarizeText("text", 99, mockApiKey)).rejects.toThrow(
-        "Invalid targetLength: 99"
-      );
-    });
-
-    it("should reject targetLength above maximum (10000)", async () => {
-      await expect(summarizeText("text", 10001, mockApiKey)).rejects.toThrow(
-        "Invalid targetLength: 10001"
-      );
-    });
-
-    it("should accept targetLength at boundaries", async () => {
-      globalThis.fetch = vi.fn().mockResolvedValue({
-        ok: true,
-        json: () => Promise.resolve({
-          choices: [{ message: { content: "Summary" } }],
-        }),
-      });
-
-      // Min boundary
-      await expect(summarizeText("text", 100, mockApiKey)).resolves.toBeDefined();
-      // Max boundary
-      await expect(summarizeText("text", 10000, mockApiKey)).resolves.toBeDefined();
-    });
-
-    it("should throw error when API returns non-ok response", async () => {
-      globalThis.fetch = vi.fn().mockResolvedValue({
-        ok: false,
-        status: 500,
-      });
-
-      await expect(summarizeText("text", 500, mockApiKey)).rejects.toThrow(
-        "Summarization service unavailable"
-      );
-    });
-
-    it("should throw error when no summary is returned", async () => {
-      globalThis.fetch = vi.fn().mockResolvedValue({
-        ok: true,
-        json: () => Promise.resolve({
-          choices: [],
-        }),
-      });
-
-      await expect(summarizeText("text", 500, mockApiKey)).rejects.toThrow(
-        "No summary returned"
-      );
-    });
-
-    it("should throw error when summary content is empty", async () => {
-      globalThis.fetch = vi.fn().mockResolvedValue({
-        ok: true,
-        json: () => Promise.resolve({
-          choices: [{ message: { content: "   " } }], // whitespace only
-        }),
-      });
-
-      await expect(summarizeText("text", 500, mockApiKey)).rejects.toThrow(
-        "No summary returned"
-      );
-    });
-  });
-});
diff --git a/extensions/telegram-tts/index.ts b/extensions/telegram-tts/index.ts
deleted file mode 100644
index 984bb1abd..000000000
--- a/extensions/telegram-tts/index.ts
+++ /dev/null
@@ -1,1042 +0,0 @@
-/**
- * telegram-tts - Automatic TTS for chat responses
- *
- * Self-contained TTS extension that calls ElevenLabs/OpenAI APIs directly.
- * No external CLI dependencies.
- *
- * Features:
- * - speak tool for programmatic TTS
- * - Multi-provider support (ElevenLabs, OpenAI)
- * - RPC methods for status and control
- *
- * Note: Slash commands (/tts_on, /tts_off, /audio) should be configured
- * via Telegram customCommands and handled by the agent workspace.
- */
-
-import { existsSync, readFileSync, writeFileSync, mkdtempSync, rmSync, renameSync, unlinkSync } from "fs";
-import { join } from "path";
-import { tmpdir } from "os";
-import type { PluginApi } from "clawdbot";
-
-const PLUGIN_ID = "telegram-tts";
-const DEFAULT_TIMEOUT_MS = 30000;
-const TEMP_FILE_CLEANUP_DELAY_MS = 5 * 60 * 1000; // 5 minutes
-
-// =============================================================================
-// Types
-// =============================================================================
-
-interface TtsConfig {
-  enabled?: boolean;
-  provider?: "elevenlabs" | "openai";
-  elevenlabs?: {
-    apiKey?: string;
-    voiceId?: string;
-    modelId?: string;
-  };
-  openai?: {
-    apiKey?: string;
-    model?: string;
-    voice?: string;
-  };
-  prefsPath?: string;
-  maxTextLength?: number;
-  timeoutMs?: number;
-}
-
-interface UserPreferences {
-  tts?: {
-    enabled?: boolean;
-    provider?: "openai" | "elevenlabs";
-    maxLength?: number; // Max chars before summarizing (default 1500)
-    summarize?: boolean; // Enable auto-summarization (default true)
-  };
-}
-
-const DEFAULT_TTS_MAX_LENGTH = 1500;
-const DEFAULT_TTS_SUMMARIZE = true;
-
-interface TtsResult {
-  success: boolean;
-  audioPath?: string;
-  error?: string;
-  latencyMs?: number;
-  provider?: string;
-}
-
-interface TtsStatusEntry {
-  timestamp: number;
-  success: boolean;
-  textLength: number;
-  summarized: boolean;
-  provider?: string;
-  latencyMs?: number;
-  error?: string;
-}
-
-// Track last TTS attempt for diagnostics (global, not per-user)
-// Note: This shows the most recent TTS attempt system-wide, not user-specific
-let lastTtsAttempt: TtsStatusEntry | undefined;
-
-// =============================================================================
-// Validation
-// =============================================================================
-
-/**
- * Validates ElevenLabs voiceId format to prevent URL injection.
- * Voice IDs are alphanumeric strings, typically 20 characters.
- */
-function isValidVoiceId(voiceId: string): boolean {
-  return /^[a-zA-Z0-9]{10,40}$/.test(voiceId);
-}
-
-/**
- * Validates OpenAI voice name.
- */
-function isValidOpenAIVoice(voice: string): boolean {
-  const validVoices = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"];
-  return validVoices.includes(voice);
-}
-
-/**
- * Available OpenAI TTS models.
- */
-const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"];
-
-/**
- * Validates OpenAI TTS model name.
- */
-function isValidOpenAIModel(model: string): boolean {
-  return OPENAI_TTS_MODELS.includes(model);
-}
-
-// =============================================================================
-// Configuration & Preferences
-// =============================================================================
-
-function getPrefsPath(config: TtsConfig): string {
-  return (
-    config.prefsPath ||
-    process.env.CLAWDBOT_TTS_PREFS ||
-    join(process.env.HOME || "/home/dev", "clawd", ".user-preferences.json")
-  );
-}
-
-function isTtsEnabled(prefsPath: string): boolean {
-  try {
-    if (!existsSync(prefsPath)) return false;
-    const prefs: UserPreferences = JSON.parse(readFileSync(prefsPath, "utf8"));
-    return prefs?.tts?.enabled === true;
-  } catch {
-    return false;
-  }
-}
-
-/**
- * Atomically writes to a file using temp file + rename pattern.
- * Prevents race conditions when multiple processes write simultaneously.
- */
-function atomicWriteFileSync(filePath: string, content: string): void {
-  const tmpPath = `${filePath}.tmp.${Date.now()}.${Math.random().toString(36).slice(2)}`;
-  writeFileSync(tmpPath, content);
-  try {
-    renameSync(tmpPath, filePath);
-  } catch (err) {
-    // Clean up temp file on rename failure
-    try {
-      unlinkSync(tmpPath);
-    } catch {
-      // Ignore cleanup errors
-    }
-    throw err;
-  }
-}
-
-function updatePrefs(prefsPath: string, update: (prefs: UserPreferences) => void): void {
-  let prefs: UserPreferences = {};
-  try {
-    if (existsSync(prefsPath)) {
-      prefs = JSON.parse(readFileSync(prefsPath, "utf8"));
-    }
-  } catch {
-    // ignore
-  }
-  update(prefs);
-  atomicWriteFileSync(prefsPath, JSON.stringify(prefs, null, 2));
-}
-
-function setTtsEnabled(prefsPath: string, enabled: boolean): void {
-  updatePrefs(prefsPath, (prefs) => {
-    prefs.tts = { ...prefs.tts, enabled };
-  });
-}
-
-function getTtsProvider(prefsPath: string): "openai" | "elevenlabs" | undefined {
-  try {
-    if (!existsSync(prefsPath)) return undefined;
-    const prefs: UserPreferences = JSON.parse(readFileSync(prefsPath, "utf8"));
-    return prefs?.tts?.provider;
-  } catch {
-    return undefined;
-  }
-}
-
-function setTtsProvider(prefsPath: string, provider: "openai" | "elevenlabs"): void {
-  updatePrefs(prefsPath, (prefs) => {
-    prefs.tts = { ...prefs.tts, provider };
-  });
-}
-
-function getTtsMaxLength(prefsPath: string): number {
-  try {
-    if (!existsSync(prefsPath)) return DEFAULT_TTS_MAX_LENGTH;
-    const prefs: UserPreferences = JSON.parse(readFileSync(prefsPath, "utf8"));
-    return prefs?.tts?.maxLength ?? DEFAULT_TTS_MAX_LENGTH;
-  } catch {
-    return DEFAULT_TTS_MAX_LENGTH;
-  }
-}
-
-function setTtsMaxLength(prefsPath: string, maxLength: number): void {
-  updatePrefs(prefsPath, (prefs) => {
-    prefs.tts = { ...prefs.tts, maxLength };
-  });
-}
-
-function isSummarizationEnabled(prefsPath: string): boolean {
-  try {
-    if (!existsSync(prefsPath)) return DEFAULT_TTS_SUMMARIZE;
-    const prefs: UserPreferences = JSON.parse(readFileSync(prefsPath, "utf8"));
-    return prefs?.tts?.summarize ?? DEFAULT_TTS_SUMMARIZE;
-  } catch {
-    return DEFAULT_TTS_SUMMARIZE;
-  }
-}
-
-function setSummarizationEnabled(prefsPath: string, enabled: boolean): void {
-  updatePrefs(prefsPath, (prefs) => {
-    prefs.tts = { ...prefs.tts, summarize: enabled };
-  });
-}
-
-// =============================================================================
-// Text Summarization (for long texts)
-// =============================================================================
-
-interface SummarizeResult {
-  summary: string;
-  latencyMs: number;
-  inputLength: number;
-  outputLength: number;
-}
-
-async function summarizeText(
-  text: string,
-  targetLength: number,
-  apiKey: string,
-  timeoutMs: number = 30000
-): Promise<SummarizeResult> {
-  // Validate targetLength
-  if (targetLength < 100 || targetLength > 10000) {
-    throw new Error(`Invalid targetLength: ${targetLength}`);
-  }
-
-  const startTime = Date.now();
-  const controller = new AbortController();
-  const timeout = setTimeout(() => controller.abort(), timeoutMs);
-
-  try {
-    const response = await fetch("https://api.openai.com/v1/chat/completions", {
-      method: "POST",
-      headers: {
-        Authorization: `Bearer ${apiKey}`,
-        "Content-Type": "application/json",
-      },
-      body: JSON.stringify({
-        model: "gpt-4o-mini",
-        messages: [
-          {
-            role: "system",
-            content: `You are an assistant that summarizes texts concisely while keeping the most important information. Summarize the text to approximately ${targetLength} characters. Maintain the original tone and style. Reply only with the summary, without additional explanations.`,
-          },
-          {
-            role: "user",
-            content: `<text_to_summarize>\n${text}\n</text_to_summarize>`,
-          },
-        ],
-        max_tokens: Math.ceil(targetLength / 2), // Conservative estimate for multilingual text
-        temperature: 0.3,
-      }),
-      signal: controller.signal,
-    });
-
-    if (!response.ok) {
-      throw new Error("Summarization service unavailable");
-    }
-
-    const data = await response.json() as {
-      choices?: Array<{ message?: { content?: string } }>;
-    };
-    const summary = data.choices?.[0]?.message?.content?.trim();
-
-    if (!summary) {
-      throw new Error("No summary returned");
-    }
-
-    const latencyMs = Date.now() - startTime;
-    return {
-      summary,
-      latencyMs,
-      inputLength: text.length,
-      outputLength: summary.length,
-    };
-  } finally {
-    clearTimeout(timeout);
-  }
-}
-
-function getApiKey(config: TtsConfig, provider: string): string | undefined {
-  if (provider === "elevenlabs") {
-    return (
-      config.elevenlabs?.apiKey ||
-      process.env.ELEVENLABS_API_KEY ||
-      process.env.XI_API_KEY
-    );
-  }
-  if (provider === "openai") {
-    return config.openai?.apiKey || process.env.OPENAI_API_KEY;
-  }
-  return undefined;
-}
-
-// =============================================================================
-// Temp File Cleanup
-// =============================================================================
-
-/**
- * Schedules cleanup of a temp directory after a delay.
- * This ensures the file is consumed before deletion.
- */
-function scheduleCleanup(tempDir: string, delayMs: number = TEMP_FILE_CLEANUP_DELAY_MS): void {
-  const timer = setTimeout(() => {
-    try {
-      rmSync(tempDir, { recursive: true, force: true });
-    } catch {
-      // Ignore cleanup errors
-    }
-  }, delayMs);
-  timer.unref(); // Allow process to exit without waiting for cleanup
-}
-
-// =============================================================================
-// TTS Providers
-// =============================================================================
-
-async function elevenLabsTTS(
-  text: string,
-  apiKey: string,
-  voiceId: string = "pMsXgVXv3BLzUgSXRplE",
-  modelId: string = "eleven_multilingual_v2",
-  timeoutMs: number = DEFAULT_TIMEOUT_MS
-): Promise<Buffer> {
-  // Validate voiceId to prevent URL injection
-  if (!isValidVoiceId(voiceId)) {
-    throw new Error(`Invalid voiceId format`);
-  }
-
-  const controller = new AbortController();
-  const timeout = setTimeout(() => controller.abort(), timeoutMs);
-
-  try {
-    const response = await fetch(
-      `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
-      {
-        method: "POST",
-        headers: {
-          "xi-api-key": apiKey,
-          "Content-Type": "application/json",
-          Accept: "audio/mpeg",
-        },
-        body: JSON.stringify({
-          text,
-          model_id: modelId,
-          voice_settings: {
-            stability: 0.5,
-            similarity_boost: 0.75,
-            style: 0.0,
-            use_speaker_boost: true,
-          },
-        }),
-        signal: controller.signal,
-      }
-    );
-
-    if (!response.ok) {
-      // Don't leak API error details to users
-      throw new Error(`ElevenLabs API error (${response.status})`);
-    }
-
-    return Buffer.from(await response.arrayBuffer());
-  } finally {
-    clearTimeout(timeout);
-  }
-}
-
-async function openaiTTS(
-  text: string,
-  apiKey: string,
-  model: string = "gpt-4o-mini-tts",
-  voice: string = "alloy",
-  timeoutMs: number = DEFAULT_TIMEOUT_MS
-): Promise<Buffer> {
-  // Validate model
-  if (!isValidOpenAIModel(model)) {
-    throw new Error(`Invalid model: ${model}`);
-  }
-  // Validate voice
-  if (!isValidOpenAIVoice(voice)) {
-    throw new Error(`Invalid voice: ${voice}`);
-  }
-
-  const controller = new AbortController();
-  const timeout = setTimeout(() => controller.abort(), timeoutMs);
-
-  try {
-    const response = await fetch("https://api.openai.com/v1/audio/speech", {
-      method: "POST",
-      headers: {
-        Authorization: `Bearer ${apiKey}`,
-        "Content-Type": "application/json",
-      },
-      body: JSON.stringify({
-        model,
-        input: text,
-        voice,
-        response_format: "mp3",
-      }),
-      signal: controller.signal,
-    });
-
-    if (!response.ok) {
-      // Don't leak API error details to users
-      throw new Error(`OpenAI TTS API error (${response.status})`);
-    }
-
-    return Buffer.from(await response.arrayBuffer());
-  } finally {
-    clearTimeout(timeout);
-  }
-}
-
-// =============================================================================
-// Core TTS Function
-// =============================================================================
-
-async function textToSpeech(text: string, config: TtsConfig, prefsPath?: string): Promise<TtsResult> {
-  // Get user's preferred provider (from prefs) or fall back to config
-  const userProvider = prefsPath ? getTtsProvider(prefsPath) : undefined;
-  const primaryProvider = userProvider || config.provider || "elevenlabs";
-  const fallbackProvider = primaryProvider === "openai" ? "elevenlabs" : "openai";
-  const timeoutMs = config.timeoutMs || DEFAULT_TIMEOUT_MS;
-
-  const maxLen = config.maxTextLength || 4000;
-  if (text.length > maxLen) {
-    return {
-      success: false,
-      error: `Text too long (${text.length} chars, max ${maxLen})`,
-    };
-  }
-
-  // Try primary provider first, then fallback
-  const providers = [primaryProvider, fallbackProvider];
-  let lastError: string | undefined;
-
-  for (const provider of providers) {
-    const apiKey = getApiKey(config, provider);
-    if (!apiKey) {
-      lastError = `No API key for ${provider}`;
-      continue;
-    }
-
-    const providerStartTime = Date.now();
-    try {
-      let audioBuffer: Buffer;
-
-      if (provider === "elevenlabs") {
-        audioBuffer = await elevenLabsTTS(
-          text,
-          apiKey,
-          config.elevenlabs?.voiceId,
-          config.elevenlabs?.modelId,
-          timeoutMs
-        );
-      } else if (provider === "openai") {
-        audioBuffer = await openaiTTS(
-          text,
-          apiKey,
-          config.openai?.model || "gpt-4o-mini-tts",
-          config.openai?.voice,
-          timeoutMs
-        );
-      } else {
-        lastError = `Unknown provider: ${provider}`;
-        continue;
-      }
-
-      const latencyMs = Date.now() - providerStartTime;
-
-      // Save to temp file
-      const tempDir = mkdtempSync(join(tmpdir(), "tts-"));
-      const audioPath = join(tempDir, `voice-${Date.now()}.mp3`);
-      writeFileSync(audioPath, audioBuffer);
-
-      // Schedule cleanup after delay (file should be consumed by then)
-      scheduleCleanup(tempDir);
-
-      return { success: true, audioPath, latencyMs, provider };
-    } catch (err) {
-      const error = err as Error;
-      if (error.name === "AbortError") {
-        lastError = `${provider}: request timed out`;
-      } else {
-        lastError = `${provider}: ${error.message}`;
-      }
-      // Continue to try fallback provider
-    }
-  }
-
-  return {
-    success: false,
-    error: `TTS conversion failed: ${lastError || "no providers available"}`,
-  };
-}
-
-// =============================================================================
-// Plugin Registration
-// =============================================================================
-
-export default function register(api: PluginApi) {
-  const log = api.logger;
-  const config: TtsConfig = {
-    enabled: false,
-    provider: "elevenlabs",
-    maxTextLength: 4000,
-    timeoutMs: DEFAULT_TIMEOUT_MS,
-    ...(api.pluginConfig || {}),
-  };
-  const prefsPath = getPrefsPath(config);
-
-  log.info(`[${PLUGIN_ID}] Registering plugin...`);
-  log.info(`[${PLUGIN_ID}] Provider: ${config.provider}`);
-  log.info(`[${PLUGIN_ID}] Preferences: ${prefsPath}`);
-
-  // ===========================================================================
-  // Tool: speak
-  // ===========================================================================
-  api.registerTool({
-    name: "speak",
-    description: `Convert text to speech and generate voice message.
-Use this tool when TTS mode is enabled or user requests audio.
-
-IMPORTANT: After calling this tool, you MUST output the result exactly as returned.
-The tool returns "MEDIA:/path/to/audio.mp3" - copy this EXACTLY to your response.
-This MEDIA: directive tells the system to send the audio file.
-
-Example flow:
-1. User asks a question with TTS enabled
-2. You call speak({text: "Your answer here"})
-3. Tool returns: MEDIA:/tmp/tts-xxx/voice-123.mp3
-4. You output: MEDIA:/tmp/tts-xxx/voice-123.mp3
-
-Do NOT add extra text around the MEDIA directive.`,
-    parameters: {
-      type: "object",
-      properties: {
-        text: {
-          type: "string",
-          description: "The text to convert to speech",
-        },
-      },
-      required: ["text"],
-    },
-    execute: async (_id: string, params: { text?: unknown }) => {
-      // Validate text parameter
-      if (typeof params?.text !== "string" || params.text.length === 0) {
-        return { content: [{ type: "text", text: "Error: Invalid or missing text parameter" }] };
-      }
-
-      const text = params.text;
-      log.info(`[${PLUGIN_ID}] speak() called, length: ${text.length}`);
-
-      const result = await textToSpeech(text, config, prefsPath);
-
-      if (result.success && result.audioPath) {
-        log.info(`[${PLUGIN_ID}] Audio generated: ${result.audioPath}`);
-        // Return with MEDIA directive for clawdbot to send
-        return {
-          content: [
-            {
-              type: "text",
-              text: `MEDIA:${result.audioPath}`,
-            },
-          ],
-        };
-      }
-
-      log.error(`[${PLUGIN_ID}] TTS failed: ${result.error}`);
-      return {
-        content: [
-          {
-            type: "text",
-            text: result.error || "TTS conversion failed",
-          },
-        ],
-      };
-    },
-  });
-
-  // ===========================================================================
-  // RPC Methods
-  // ===========================================================================
-
-  // tts.status - Check if TTS is enabled
-  api.registerGatewayMethod("tts.status", async () => {
-    const userProvider = getTtsProvider(prefsPath);
-    const activeProvider = userProvider || config.provider || "elevenlabs";
-    return {
-      enabled: isTtsEnabled(prefsPath),
-      provider: activeProvider,
-      fallbackProvider: activeProvider === "openai" ? "elevenlabs" : "openai",
-      prefsPath,
-      hasOpenAIKey: !!getApiKey(config, "openai"),
-      hasElevenLabsKey: !!getApiKey(config, "elevenlabs"),
-    };
-  });
-
-  // tts.enable - Enable TTS mode
-  api.registerGatewayMethod("tts.enable", async () => {
-    setTtsEnabled(prefsPath, true);
-    log.info(`[${PLUGIN_ID}] TTS enabled via RPC`);
-    return { ok: true, enabled: true };
-  });
-
-  // tts.disable - Disable TTS mode
-  api.registerGatewayMethod("tts.disable", async () => {
-    setTtsEnabled(prefsPath, false);
-    log.info(`[${PLUGIN_ID}] TTS disabled via RPC`);
-    return { ok: true, enabled: false };
-  });
-
-  // tts.convert - Convert text to audio (returns path)
-  api.registerGatewayMethod("tts.convert", async (params: { text?: unknown }) => {
-    // Validate text parameter
-    if (typeof params?.text !== "string" || params.text.length === 0) {
-      return { ok: false, error: "Invalid or missing 'text' parameter" };
-    }
-    const result = await textToSpeech(params.text, config, prefsPath);
-    if (result.success) {
-      return { ok: true, audioPath: result.audioPath };
-    }
-    return { ok: false, error: result.error };
-  });
-
-  // tts.setProvider - Set primary TTS provider
-  api.registerGatewayMethod("tts.setProvider", async (params: { provider?: unknown }) => {
-    if (params?.provider !== "openai" && params?.provider !== "elevenlabs") {
-      return { ok: false, error: "Invalid provider. Use 'openai' or 'elevenlabs'" };
-    }
-    setTtsProvider(prefsPath, params.provider);
-    log.info(`[${PLUGIN_ID}] Provider set to ${params.provider} via RPC`);
-    return { ok: true, provider: params.provider };
-  });
-
-  // tts.providers - List available providers and their status
-  api.registerGatewayMethod("tts.providers", async () => {
-    const userProvider = getTtsProvider(prefsPath);
-    return {
-      providers: [
-        {
-          id: "openai",
-          name: "OpenAI",
-          configured: !!getApiKey(config, "openai"),
-          models: ["gpt-4o-mini-tts"],
-          voices: ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
-        },
-        {
-          id: "elevenlabs",
-          name: "ElevenLabs",
-          configured: !!getApiKey(config, "elevenlabs"),
-          models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_monolingual_v1"],
-        },
-      ],
-      active: userProvider || config.provider || "elevenlabs",
-    };
-  });
-
-  // ===========================================================================
-  // Plugin Commands (LLM-free, intercepted automatically)
-  // ===========================================================================
-
-  // /tts_on - Enable TTS mode
-  api.registerCommand({
-    name: "tts_on",
-    description: "Enable text-to-speech for responses",
-    handler: () => {
-      setTtsEnabled(prefsPath, true);
-      log.info(`[${PLUGIN_ID}] TTS enabled via /tts_on command`);
-      return { text: "🔊 TTS enabled! I'll now respond with audio." };
-    },
-  });
-
-  // /tts_off - Disable TTS mode
-  api.registerCommand({
-    name: "tts_off",
-    description: "Disable text-to-speech for responses",
-    handler: () => {
-      setTtsEnabled(prefsPath, false);
-      log.info(`[${PLUGIN_ID}] TTS disabled via /tts_off command`);
-      return { text: "🔇 TTS disabled. Back to text mode." };
-    },
-  });
-
-  // /audio <text> - Convert text to audio immediately
-  api.registerCommand({
-    name: "audio",
-    description: "Convert text to audio message",
-    acceptsArgs: true,
-    handler: async (ctx) => {
-      const text = ctx.args?.trim();
-      if (!text) {
-        return { text: "❌ Usage: /audio <text to convert to audio>" };
-      }
-
-      log.info(`[${PLUGIN_ID}] /audio command, text length: ${text.length}`);
-      const result = await textToSpeech(text, config, prefsPath);
-
-      if (result.success && result.audioPath) {
-        log.info(`[${PLUGIN_ID}] Audio generated: ${result.audioPath}`);
-        return { text: `MEDIA:${result.audioPath}` };
-      }
-
-      log.error(`[${PLUGIN_ID}] /audio failed: ${result.error}`);
-      return { text: `❌ Error generating audio: ${result.error}` };
-    },
-  });
-
-  // /tts_provider [openai|elevenlabs] - Set or show TTS provider
-  api.registerCommand({
-    name: "tts_provider",
-    description: "Set or show TTS provider (openai or elevenlabs)",
-    acceptsArgs: true,
-    handler: (ctx) => {
-      const arg = ctx.args?.trim().toLowerCase();
-      const currentProvider = getTtsProvider(prefsPath) || config.provider || "elevenlabs";
-
-      if (!arg) {
-        // Show current provider
-        const fallback = currentProvider === "openai" ? "elevenlabs" : "openai";
-        const hasOpenAI = !!getApiKey(config, "openai");
-        const hasElevenLabs = !!getApiKey(config, "elevenlabs");
-        return {
-          text: `🎙️ **TTS Provider**\n\n` +
-            `Primary: **${currentProvider}** ${currentProvider === "openai" ? "(gpt-4o-mini-tts)" : "(eleven_multilingual_v2)"}\n` +
-            `Fallback: ${fallback}\n\n` +
-            `OpenAI: ${hasOpenAI ? "✅ configured" : "❌ no API key"}\n` +
-            `ElevenLabs: ${hasElevenLabs ? "✅ configured" : "❌ no API key"}\n\n` +
-            `Usage: /tts_provider openai or /tts_provider elevenlabs`,
-        };
-      }
-
-      if (arg !== "openai" && arg !== "elevenlabs") {
-        return { text: "❌ Invalid provider. Use: /tts_provider openai or /tts_provider elevenlabs" };
-      }
-
-      setTtsProvider(prefsPath, arg);
-      const fallback = arg === "openai" ? "elevenlabs" : "openai";
-      log.info(`[${PLUGIN_ID}] Provider set to ${arg} via /tts_provider command`);
-      return {
-        text: `✅ TTS provider changed!\n\n` +
-          `Primary: **${arg}** ${arg === "openai" ? "(gpt-4o-mini-tts)" : "(eleven_multilingual_v2)"}\n` +
-          `Fallback: ${fallback}`,
-      };
-    },
-  });
-
-  // /tts_limit [number] - Set or show max text length before summarizing
-  api.registerCommand({
-    name: "tts_limit",
-    description: "Set or show max text length for TTS (longer texts are summarized)",
-    acceptsArgs: true,
-    handler: (ctx) => {
-      const arg = ctx.args?.trim();
-      const currentLimit = getTtsMaxLength(prefsPath);
-
-      if (!arg) {
-        // Show current limit
-        return {
-          text: `📏 **TTS Limit**\n\n` +
-            `Current limit: **${currentLimit}** characters\n\n` +
-            `Texts longer than ${currentLimit} chars will be automatically summarized with gpt-4o-mini before converting to audio.\n\n` +
-            `Usage: /tts_limit 2000 (sets new limit)`,
-        };
-      }
-
-      const newLimit = parseInt(arg, 10);
-      if (isNaN(newLimit) || newLimit < 100 || newLimit > 10000) {
-        return { text: "❌ Invalid limit. Use a number between 100 and 10000." };
-      }
-
-      setTtsMaxLength(prefsPath, newLimit);
-      log.info(`[${PLUGIN_ID}] Max length set to ${newLimit} via /tts_limit command`);
-      return {
-        text: `✅ TTS limit changed to **${newLimit}** characters!\n\n` +
-          `Longer texts will be automatically summarized before converting to audio.`,
-      };
-    },
-  });
-
-  // /tts_summary [on|off] - Enable/disable auto-summarization
-  api.registerCommand({
-    name: "tts_summary",
-    description: "Enable or disable auto-summarization for long texts",
-    acceptsArgs: true,
-    handler: (ctx) => {
-      const arg = ctx.args?.trim().toLowerCase();
-      const currentEnabled = isSummarizationEnabled(prefsPath);
-      const maxLength = getTtsMaxLength(prefsPath);
-
-      if (!arg) {
-        // Show current status
-        return {
-          text: `📝 **TTS Auto-Summary**\n\n` +
-            `Status: ${currentEnabled ? "✅ Enabled" : "❌ Disabled"}\n` +
-            `Limit: ${maxLength} characters\n\n` +
-            `When enabled, texts longer than ${maxLength} chars are summarized with gpt-4o-mini before converting to audio.\n\n` +
-            `Usage: /tts_summary on or /tts_summary off`,
-        };
-      }
-
-      if (arg !== "on" && arg !== "off") {
-        return { text: "❌ Use: /tts_summary on or /tts_summary off" };
-      }
-
-      const newEnabled = arg === "on";
-      setSummarizationEnabled(prefsPath, newEnabled);
-      log.info(`[${PLUGIN_ID}] Summarization ${newEnabled ? "enabled" : "disabled"} via /tts_summary command`);
-      return {
-        text: newEnabled
-          ? `✅ Auto-summary **enabled**!\n\nLong texts will be summarized before converting to audio.`
-          : `❌ Auto-summary **disabled**!\n\nLong texts will be skipped (no audio).`,
-      };
-    },
-  });
-
-  // /tts_status - Show TTS status and last attempt result
-  api.registerCommand({
-    name: "tts_status",
-    description: "Show TTS status, configuration, and last attempt result",
-    acceptsArgs: false,
-    handler: () => {
-      const enabled = isTtsEnabled(prefsPath);
-      const userProvider = getTtsProvider(prefsPath);
-      const activeProvider = userProvider || config.provider || "elevenlabs";
-      const maxLength = getTtsMaxLength(prefsPath);
-      const summarizationEnabled = isSummarizationEnabled(prefsPath);
-      const hasKey = !!getApiKey(config, activeProvider);
-
-      let statusLines = [
-        `📊 **TTS Status**\n`,
-        `State: ${enabled ? "✅ Enabled" : "❌ Disabled"}`,
-        `Provider: ${activeProvider} (API Key: ${hasKey ? "✅" : "❌"})`,
-        `Text limit: ${maxLength} characters`,
-        `Auto-summary: ${summarizationEnabled ? "✅ Enabled" : "❌ Disabled"}`,
-      ];
-
-      if (lastTtsAttempt) {
-        const timeAgo = Math.round((Date.now() - lastTtsAttempt.timestamp) / 1000);
-        statusLines.push(``);
-        statusLines.push(`**Last attempt** (${timeAgo}s ago):`);
-        statusLines.push(`Result: ${lastTtsAttempt.success ? "✅ Success" : "❌ Failed"}`);
-        statusLines.push(`Text: ${lastTtsAttempt.textLength} chars${lastTtsAttempt.summarized ? " (summarized)" : ""}`);
-        if (lastTtsAttempt.success) {
-          statusLines.push(`Provider: ${lastTtsAttempt.provider}`);
-          statusLines.push(`Latency: ${lastTtsAttempt.latencyMs}ms`);
-        } else if (lastTtsAttempt.error) {
-          statusLines.push(`Error: ${lastTtsAttempt.error}`);
-        }
-      } else {
-        statusLines.push(``);
-        statusLines.push(`_No TTS attempts recorded in this session._`);
-      }
-
-      return { text: statusLines.join("\n") };
-    },
-  });
-
-  // ===========================================================================
-  // Auto-TTS Hook (message_sending)
-  // ===========================================================================
-
-  // Automatically convert text responses to audio when TTS is enabled
-  api.on("message_sending", async (event) => {
-    // Check if TTS is enabled
-    if (!isTtsEnabled(prefsPath)) {
-      return; // TTS disabled, don't modify message
-    }
-
-    const content = event.content?.trim();
-    if (!content) {
-      return; // Empty content, skip
-    }
-
-    // Skip if already contains MEDIA directive (avoid double conversion)
-    if (content.includes("MEDIA:")) {
-      return;
-    }
-
-    // Skip very short messages (likely errors or status)
-    if (content.length < 10) {
-      return;
-    }
-
-    const maxLength = getTtsMaxLength(prefsPath);
-    let textForAudio = content;
-    const summarizationEnabled = isSummarizationEnabled(prefsPath);
-
-    // If text exceeds limit, summarize it first (if enabled)
-    if (content.length > maxLength) {
-      if (!summarizationEnabled) {
-        log.info(`[${PLUGIN_ID}] Auto-TTS: Text too long (${content.length} > ${maxLength}), summarization disabled, skipping audio`);
-        return; // User disabled summarization, skip audio for long texts
-      }
-
-      log.info(`[${PLUGIN_ID}] Auto-TTS: Text too long (${content.length} > ${maxLength}), summarizing...`);
-
-      const openaiKey = getApiKey(config, "openai");
-      if (!openaiKey) {
-        log.warn(`[${PLUGIN_ID}] Auto-TTS: No OpenAI key for summarization, skipping audio`);
-        return; // Can't summarize without OpenAI key
-      }
-
-      try {
-        const summarizeResult = await summarizeText(content, maxLength, openaiKey, config.timeoutMs);
-        textForAudio = summarizeResult.summary;
-        log.info(
-          `[${PLUGIN_ID}] Auto-TTS: Summarized ${summarizeResult.inputLength} → ${summarizeResult.outputLength} chars in ${summarizeResult.latencyMs}ms`
-        );
-
-        // Safeguard: if summary still exceeds hard limit, truncate
-        const hardLimit = config.maxTextLength || 4000;
-        if (textForAudio.length > hardLimit) {
-          log.warn(`[${PLUGIN_ID}] Auto-TTS: Summary exceeded hard limit (${textForAudio.length} > ${hardLimit}), truncating`);
-          textForAudio = textForAudio.slice(0, hardLimit - 3) + "...";
-        }
-      } catch (err) {
-        const error = err as Error;
-        log.error(`[${PLUGIN_ID}] Auto-TTS: Summarization failed: ${error.message}`);
-        return; // On summarization failure, skip audio
-      }
-    } else {
-      log.info(`[${PLUGIN_ID}] Auto-TTS: Converting ${content.length} chars`);
-    }
-
-    const wasSummarized = textForAudio !== content;
-
-    try {
-      const ttsStartTime = Date.now();
-      const result = await textToSpeech(textForAudio, config, prefsPath);
-
-      if (result.success && result.audioPath) {
-        const totalLatency = Date.now() - ttsStartTime;
-        log.info(
-          `[${PLUGIN_ID}] Auto-TTS: Generated via ${result.provider} in ${result.latencyMs}ms (total: ${totalLatency}ms)`
-        );
-
-        // Track successful attempt
-        lastTtsAttempt = {
-          timestamp: Date.now(),
-          success: true,
-          textLength: content.length,
-          summarized: wasSummarized,
-          provider: result.provider,
-          latencyMs: result.latencyMs,
-        };
-
-        // Return modified content with MEDIA directive
-        // The text is kept for accessibility, audio is appended
-        return {
-          content: `MEDIA:${result.audioPath}`,
-        };
-      } else {
-        log.warn(`[${PLUGIN_ID}] Auto-TTS: TTS conversion failed - ${result.error}`);
-
-        // Track failed attempt
-        lastTtsAttempt = {
-          timestamp: Date.now(),
-          success: false,
-          textLength: content.length,
-          summarized: wasSummarized,
-          error: result.error,
-        };
-
-        // On failure, send original text without audio
-        return;
-      }
-    } catch (err) {
-      const error = err as Error;
-      log.error(`[${PLUGIN_ID}] Auto-TTS: Unexpected error - ${error.message}`);
-
-      // Track error
-      lastTtsAttempt = {
-        timestamp: Date.now(),
-        success: false,
-        textLength: content.length,
-        summarized: wasSummarized,
-        error: error.message,
-      };
-
-      // On error, send original text
-      return;
-    }
-  });
-
-  // ===========================================================================
-  // Startup
-  // ===========================================================================
-
-  const ttsEnabled = isTtsEnabled(prefsPath);
-  const userProvider = getTtsProvider(prefsPath);
-  const activeProvider = userProvider || config.provider || "elevenlabs";
-  const hasKey = !!getApiKey(config, activeProvider);
-
-  log.info(`[${PLUGIN_ID}] Ready. TTS: ${ttsEnabled ? "ON" : "OFF"}, Provider: ${activeProvider}, API Key: ${hasKey ? "OK" : "MISSING"}`);
-
-  if (!hasKey) {
-    log.warn(
-      `[${PLUGIN_ID}] No API key configured. Set ELEVENLABS_API_KEY or OPENAI_API_KEY.`
-    );
-  }
-}
-
-// =============================================================================
-// Plugin Metadata
-// =============================================================================
-
-export const meta = {
-  id: PLUGIN_ID,
-  name: "Telegram TTS",
-  description: "Text-to-speech for chat responses using ElevenLabs or OpenAI",
-  version: "0.3.0",
-};
-
-// =============================================================================
-// Test Exports (for unit testing)
-// =============================================================================
-
-export const _test = {
-  isValidVoiceId,
-  isValidOpenAIVoice,
-  isValidOpenAIModel,
-  OPENAI_TTS_MODELS,
-  summarizeText,
-};
diff --git a/extensions/telegram-tts/package.json b/extensions/telegram-tts/package.json
deleted file mode 100644
index a3cbc51b7..000000000
--- a/extensions/telegram-tts/package.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "name": "@clawdbot/telegram-tts",
-  "version": "0.3.0",
-  "private": true,
-  "description": "Text-to-speech for chat responses using ElevenLabs or OpenAI",
-  "main": "index.ts",
-  "keywords": ["clawdbot", "tts", "elevenlabs", "openai", "telegram", "voice"]
-}
diff --git a/src/agents/clawdbot-tools.ts b/src/agents/clawdbot-tools.ts
index 60fde06fb..91de31937 100644
--- a/src/agents/clawdbot-tools.ts
+++ b/src/agents/clawdbot-tools.ts
@@ -17,6 +17,7 @@ import { createSessionsListTool } from "./tools/sessions-list-tool.js";
 import { createSessionsSendTool } from "./tools/sessions-send-tool.js";
 import { createSessionsSpawnTool } from "./tools/sessions-spawn-tool.js";
 import { createWebFetchTool, createWebSearchTool } from "./tools/web-tools.js";
+import { createTtsTool } from "./tools/tts-tool.js";
 
 export function createClawdbotTools(options?: {
   browserControlUrl?: string;
@@ -96,6 +97,10 @@ export function createClawdbotTools(options?: {
       replyToMode: options?.replyToMode,
       hasRepliedRef: options?.hasRepliedRef,
     }),
+    createTtsTool({
+      agentChannel: options?.agentChannel,
+      config: options?.config,
+    }),
     createGatewayTool({
       agentSessionKey: options?.agentSessionKey,
       config: options?.config,
diff --git a/src/agents/tools/tts-tool.ts b/src/agents/tools/tts-tool.ts
new file mode 100644
index 000000000..e0a49cf16
--- /dev/null
+++ b/src/agents/tools/tts-tool.ts
@@ -0,0 +1,60 @@
+import { Type } from "@sinclair/typebox";
+
+import { loadConfig } from "../../config/config.js";
+import type { ClawdbotConfig } from "../../config/config.js";
+import type { GatewayMessageChannel } from "../../utils/message-channel.js";
+import { textToSpeech } from "../../tts/tts.js";
+import type { AnyAgentTool } from "./common.js";
+import { readStringParam } from "./common.js";
+
+const TtsToolSchema = Type.Object({
+  text: Type.String({ description: "Text to convert to speech." }),
+  channel: Type.Optional(
+    Type.String({ description: "Optional channel id to pick output format (e.g. telegram)." }),
+  ),
+});
+
+export function createTtsTool(opts?: {
+  config?: ClawdbotConfig;
+  agentChannel?: GatewayMessageChannel;
+}): AnyAgentTool {
+  return {
+    label: "TTS",
+    name: "tts",
+    description:
+      "Convert text to speech and return a MEDIA: path. Use when the user requests audio or TTS is enabled. Copy the MEDIA line exactly.",
+    parameters: TtsToolSchema,
+    execute: async (_toolCallId, args) => {
+      const params = args as Record<string, unknown>;
+      const text = readStringParam(params, "text", { required: true });
+      const channel = readStringParam(params, "channel");
+      const cfg = opts?.config ?? loadConfig();
+      const result = await textToSpeech({
+        text,
+        cfg,
+        channel: channel ?? opts?.agentChannel,
+      });
+
+      if (result.success && result.audioPath) {
+        const lines: string[] = [];
+        // Tag Telegram Opus output as a voice bubble instead of a file attachment.
+        if (result.voiceCompatible) lines.push("[[audio_as_voice]]");
+        lines.push(`MEDIA:${result.audioPath}`);
+        return {
+          content: [{ type: "text", text: lines.join("\n") }],
+          details: { audioPath: result.audioPath, provider: result.provider },
+        };
+      }
+
+      return {
+        content: [
+          {
+            type: "text",
+            text: result.error ?? "TTS conversion failed",
+          },
+        ],
+        details: { error: result.error },
+      };
+    },
+  };
+}
diff --git a/src/auto-reply/commands-registry.data.ts b/src/auto-reply/commands-registry.data.ts
index 7e6d76399..3e2ad8775 100644
--- a/src/auto-reply/commands-registry.data.ts
+++ b/src/auto-reply/commands-registry.data.ts
@@ -272,6 +272,81 @@ function buildChatCommands(): ChatCommandDefinition[] {
       ],
       argsMenu: "auto",
     }),
+    defineChatCommand({
+      key: "audio",
+      nativeName: "audio",
+      description: "Convert text to a TTS audio reply.",
+      textAlias: "/audio",
+      args: [
+        {
+          name: "text",
+          description: "Text to speak",
+          type: "string",
+          captureRemaining: true,
+        },
+      ],
+    }),
+    defineChatCommand({
+      key: "tts_on",
+      nativeName: "tts_on",
+      description: "Enable text-to-speech for replies.",
+      textAlias: "/tts_on",
+    }),
+    defineChatCommand({
+      key: "tts_off",
+      nativeName: "tts_off",
+      description: "Disable text-to-speech for replies.",
+      textAlias: "/tts_off",
+    }),
+    defineChatCommand({
+      key: "tts_provider",
+      nativeName: "tts_provider",
+      description: "Set or show the TTS provider.",
+      textAlias: "/tts_provider",
+      args: [
+        {
+          name: "provider",
+          description: "openai or elevenlabs",
+          type: "string",
+          choices: ["openai", "elevenlabs"],
+        },
+      ],
+      argsMenu: "auto",
+    }),
+    defineChatCommand({
+      key: "tts_limit",
+      nativeName: "tts_limit",
+      description: "Set or show the max TTS text length.",
+      textAlias: "/tts_limit",
+      args: [
+        {
+          name: "maxLength",
+          description: "Max chars before summarizing",
+          type: "number",
+        },
+      ],
+    }),
+    defineChatCommand({
+      key: "tts_summary",
+      nativeName: "tts_summary",
+      description: "Enable or disable TTS auto-summary.",
+      textAlias: "/tts_summary",
+      args: [
+        {
+          name: "mode",
+          description: "on or off",
+          type: "string",
+          choices: ["on", "off"],
+        },
+      ],
+      argsMenu: "auto",
+    }),
+    defineChatCommand({
+      key: "tts_status",
+      nativeName: "tts_status",
+      description: "Show TTS status and last attempt.",
+      textAlias: "/tts_status",
+    }),
     defineChatCommand({
       key: "stop",
       nativeName: "stop",
diff --git a/src/auto-reply/reply/commands-core.ts b/src/auto-reply/reply/commands-core.ts
index ad39e198c..5cf40dfb2 100644
--- a/src/auto-reply/reply/commands-core.ts
+++ b/src/auto-reply/reply/commands-core.ts
@@ -16,6 +16,7 @@ import {
 import { handleAllowlistCommand } from "./commands-allowlist.js";
 import { handleSubagentsCommand } from "./commands-subagents.js";
 import { handleModelsCommand } from "./commands-models.js";
+import { handleTtsCommands } from "./commands-tts.js";
 import {
   handleAbortTrigger,
   handleActivationCommand,
@@ -39,6 +40,7 @@ const HANDLERS: CommandHandler[] = [
   handleSendPolicyCommand,
   handleUsageCommand,
   handleRestartCommand,
+  handleTtsCommands,
   handleHelpCommand,
   handleCommandsListCommand,
   handleStatusCommand,
diff --git a/src/auto-reply/reply/commands-tts.ts b/src/auto-reply/reply/commands-tts.ts
new file mode 100644
index 000000000..9582143af
--- /dev/null
+++ b/src/auto-reply/reply/commands-tts.ts
@@ -0,0 +1,214 @@
+import { logVerbose } from "../../globals.js";
+import type { ReplyPayload } from "../types.js";
+import type { CommandHandler } from "./commands-types.js";
+import {
+  getLastTtsAttempt,
+  getTtsMaxLength,
+  getTtsProvider,
+  isSummarizationEnabled,
+  isTtsEnabled,
+  resolveTtsApiKey,
+  resolveTtsConfig,
+  resolveTtsPrefsPath,
+  setLastTtsAttempt,
+  setSummarizationEnabled,
+  setTtsEnabled,
+  setTtsMaxLength,
+  setTtsProvider,
+  textToSpeech,
+} from "../../tts/tts.js";
+
+function parseCommandArg(normalized: string, command: string): string | null {
+  if (normalized === command) return "";
+  if (normalized.startsWith(`${command} `)) return normalized.slice(command.length).trim();
+  return null;
+}
+
+export const handleTtsCommands: CommandHandler = async (params, allowTextCommands) => {
+  if (!allowTextCommands) return null;
+  const normalized = params.command.commandBodyNormalized;
+  if (
+    !normalized.startsWith("/tts_") &&
+    normalized !== "/audio" &&
+    !normalized.startsWith("/audio ")
+  ) {
+    return null;
+  }
+
+  if (!params.command.isAuthorizedSender) {
+    logVerbose(
+      `Ignoring TTS command from unauthorized sender: ${params.command.senderId || "<unknown>"}`,
+    );
+    return { shouldContinue: false };
+  }
+
+  const config = resolveTtsConfig(params.cfg);
+  const prefsPath = resolveTtsPrefsPath(config);
+
+  if (normalized === "/tts_on") {
+    setTtsEnabled(prefsPath, true);
+    return { shouldContinue: false, reply: { text: "🔊 TTS enabled." } };
+  }
+
+  if (normalized === "/tts_off") {
+    setTtsEnabled(prefsPath, false);
+    return { shouldContinue: false, reply: { text: "🔇 TTS disabled." } };
+  }
+
+  const audioArg = parseCommandArg(normalized, "/audio");
+  if (audioArg !== null) {
+    if (!audioArg.trim()) {
+      return { shouldContinue: false, reply: { text: "⚙️ Usage: /audio <text>" } };
+    }
+
+    const start = Date.now();
+    const result = await textToSpeech({
+      text: audioArg,
+      cfg: params.cfg,
+      channel: params.command.channel,
+      prefsPath,
+    });
+
+    if (result.success && result.audioPath) {
+      setLastTtsAttempt({
+        timestamp: Date.now(),
+        success: true,
+        textLength: audioArg.length,
+        summarized: false,
+        provider: result.provider,
+        latencyMs: result.latencyMs,
+      });
+      const payload: ReplyPayload = {
+        mediaUrl: result.audioPath,
+        audioAsVoice: result.voiceCompatible === true,
+      };
+      return { shouldContinue: false, reply: payload };
+    }
+
+    setLastTtsAttempt({
+      timestamp: Date.now(),
+      success: false,
+      textLength: audioArg.length,
+      summarized: false,
+      error: result.error,
+      latencyMs: Date.now() - start,
+    });
+    return {
+      shouldContinue: false,
+      reply: { text: `❌ Error generating audio: ${result.error ?? "unknown error"}` },
+    };
+  }
+
+  const providerArg = parseCommandArg(normalized, "/tts_provider");
+  if (providerArg !== null) {
+    const currentProvider = getTtsProvider(config, prefsPath);
+    if (!providerArg.trim()) {
+      const fallback = currentProvider === "openai" ? "elevenlabs" : "openai";
+      const hasOpenAI = Boolean(resolveTtsApiKey(config, "openai"));
+      const hasElevenLabs = Boolean(resolveTtsApiKey(config, "elevenlabs"));
+      return {
+        shouldContinue: false,
+        reply: {
+          text:
+            `🎙️ TTS provider\n` +
+            `Primary: ${currentProvider}\n` +
+            `Fallback: ${fallback}\n` +
+            `OpenAI key: ${hasOpenAI ? "✅" : "❌"}\n` +
+            `ElevenLabs key: ${hasElevenLabs ? "✅" : "❌"}\n` +
+            `Usage: /tts_provider openai | elevenlabs`,
+        },
+      };
+    }
+
+    const requested = providerArg.trim().toLowerCase();
+    if (requested !== "openai" && requested !== "elevenlabs") {
+      return {
+        shouldContinue: false,
+        reply: { text: "⚙️ Usage: /tts_provider openai | elevenlabs" },
+      };
+    }
+
+    setTtsProvider(prefsPath, requested);
+    const fallback = requested === "openai" ? "elevenlabs" : "openai";
+    return {
+      shouldContinue: false,
+      reply: { text: `✅ TTS provider set to ${requested} (fallback: ${fallback}).` },
+    };
+  }
+
+  const limitArg = parseCommandArg(normalized, "/tts_limit");
+  if (limitArg !== null) {
+    if (!limitArg.trim()) {
+      const currentLimit = getTtsMaxLength(prefsPath);
+      return {
+        shouldContinue: false,
+        reply: { text: `📏 TTS limit: ${currentLimit} characters.` },
+      };
+    }
+    const next = Number.parseInt(limitArg.trim(), 10);
+    if (!Number.isFinite(next) || next < 100 || next > 10_000) {
+      return {
+        shouldContinue: false,
+        reply: { text: "⚙️ Usage: /tts_limit <100-10000>" },
+      };
+    }
+    setTtsMaxLength(prefsPath, next);
+    return {
+      shouldContinue: false,
+      reply: { text: `✅ TTS limit set to ${next} characters.` },
+    };
+  }
+
+  const summaryArg = parseCommandArg(normalized, "/tts_summary");
+  if (summaryArg !== null) {
+    if (!summaryArg.trim()) {
+      const enabled = isSummarizationEnabled(prefsPath);
+      return {
+        shouldContinue: false,
+        reply: { text: `📝 TTS auto-summary: ${enabled ? "on" : "off"}.` },
+      };
+    }
+    const requested = summaryArg.trim().toLowerCase();
+    if (requested !== "on" && requested !== "off") {
+      return { shouldContinue: false, reply: { text: "⚙️ Usage: /tts_summary on|off" } };
+    }
+    setSummarizationEnabled(prefsPath, requested === "on");
+    return {
+      shouldContinue: false,
+      reply: {
+        text: requested === "on" ? "✅ TTS auto-summary enabled." : "❌ TTS auto-summary disabled.",
+      },
+    };
+  }
+
+  if (normalized === "/tts_status") {
+    const enabled = isTtsEnabled(config, prefsPath);
+    const provider = getTtsProvider(config, prefsPath);
+    const hasKey = Boolean(resolveTtsApiKey(config, provider));
+    const maxLength = getTtsMaxLength(prefsPath);
+    const summarize = isSummarizationEnabled(prefsPath);
+    const last = getLastTtsAttempt();
+    const lines = [
+      "📊 TTS status",
+      `State: ${enabled ? "✅ enabled" : "❌ disabled"}`,
+      `Provider: ${provider} (${hasKey ? "✅ key" : "❌ no key"})`,
+      `Text limit: ${maxLength} chars`,
+      `Auto-summary: ${summarize ? "on" : "off"}`,
+    ];
+    if (last) {
+      const timeAgo = Math.round((Date.now() - last.timestamp) / 1000);
+      lines.push("");
+      lines.push(`Last attempt (${timeAgo}s ago): ${last.success ? "✅" : "❌"}`);
+      lines.push(`Text: ${last.textLength} chars${last.summarized ? " (summarized)" : ""}`);
+      if (last.success) {
+        lines.push(`Provider: ${last.provider ?? "unknown"}`);
+        lines.push(`Latency: ${last.latencyMs ?? 0}ms`);
+      } else if (last.error) {
+        lines.push(`Error: ${last.error}`);
+      }
+    }
+    return { shouldContinue: false, reply: { text: lines.join("\n") } };
+  }
+
+  return null;
+};
diff --git a/src/auto-reply/reply/dispatch-from-config.ts b/src/auto-reply/reply/dispatch-from-config.ts
index 47989026c..eb8d303b7 100644
--- a/src/auto-reply/reply/dispatch-from-config.ts
+++ b/src/auto-reply/reply/dispatch-from-config.ts
@@ -13,6 +13,7 @@ import { formatAbortReplyText, tryFastAbortFromMessage } from "./abort.js";
 import { shouldSkipDuplicateInbound } from "./inbound-dedupe.js";
 import type { ReplyDispatcher, ReplyDispatchKind } from "./reply-dispatcher.js";
 import { isRoutableChannel, routeReply } from "./route-reply.js";
+import { maybeApplyTtsToPayload } from "../../tts/tts.js";
 
 export type DispatchFromConfigResult = {
   queuedFinal: boolean;
@@ -91,6 +92,7 @@ export async function dispatchReplyFromConfig(params: {
   const currentSurface = (ctx.Surface ?? ctx.Provider)?.toLowerCase();
   const shouldRouteToOriginating =
     isRoutableChannel(originatingChannel) && originatingTo && originatingChannel !== currentSurface;
+  const ttsChannel = shouldRouteToOriginating ? originatingChannel : currentSurface;
 
   /**
    * Helper to send a payload via route-reply (async).
@@ -164,22 +166,36 @@ export async function dispatchReplyFromConfig(params: {
       {
         ...params.replyOptions,
         onToolResult: (payload: ReplyPayload) => {
-          if (shouldRouteToOriginating) {
-            // Fire-and-forget for streaming tool results when routing.
-            void sendPayloadAsync(payload);
-          } else {
-            // Synchronous dispatch to preserve callback timing.
-            dispatcher.sendToolResult(payload);
-          }
+          const run = async () => {
+            const ttsPayload = await maybeApplyTtsToPayload({
+              payload,
+              cfg,
+              channel: ttsChannel,
+              kind: "tool",
+            });
+            if (shouldRouteToOriginating) {
+              await sendPayloadAsync(ttsPayload);
+            } else {
+              dispatcher.sendToolResult(ttsPayload);
+            }
+          };
+          return run();
         },
         onBlockReply: (payload: ReplyPayload, context) => {
-          if (shouldRouteToOriginating) {
-            // Await routed sends so upstream can enforce ordering/timeouts.
-            return sendPayloadAsync(payload, context?.abortSignal);
-          } else {
-            // Synchronous dispatch to preserve callback timing.
-            dispatcher.sendBlockReply(payload);
-          }
+          const run = async () => {
+            const ttsPayload = await maybeApplyTtsToPayload({
+              payload,
+              cfg,
+              channel: ttsChannel,
+              kind: "block",
+            });
+            if (shouldRouteToOriginating) {
+              await sendPayloadAsync(ttsPayload, context?.abortSignal);
+            } else {
+              dispatcher.sendBlockReply(ttsPayload);
+            }
+          };
+          return run();
         },
       },
       cfg,
@@ -190,10 +206,16 @@ export async function dispatchReplyFromConfig(params: {
     let queuedFinal = false;
     let routedFinalCount = 0;
     for (const reply of replies) {
+      const ttsReply = await maybeApplyTtsToPayload({
+        payload: reply,
+        cfg,
+        channel: ttsChannel,
+        kind: "final",
+      });
       if (shouldRouteToOriginating && originatingChannel && originatingTo) {
         // Route final reply to originating channel.
         const result = await routeReply({
-          payload: reply,
+          payload: ttsReply,
           channel: originatingChannel,
           to: originatingTo,
           sessionKey: ctx.SessionKey,
@@ -209,7 +231,7 @@ export async function dispatchReplyFromConfig(params: {
         queuedFinal = result.ok || queuedFinal;
         if (result.ok) routedFinalCount += 1;
       } else {
-        queuedFinal = dispatcher.sendFinalReply(reply) || queuedFinal;
+        queuedFinal = dispatcher.sendFinalReply(ttsReply) || queuedFinal;
       }
     }
     await dispatcher.waitForIdle();
diff --git a/src/auto-reply/reply/route-reply.ts b/src/auto-reply/reply/route-reply.ts
index c874d1c04..bbc7efa7d 100644
--- a/src/auto-reply/reply/route-reply.ts
+++ b/src/auto-reply/reply/route-reply.ts
@@ -10,7 +10,6 @@
 import { resolveSessionAgentId } from "../../agents/agent-scope.js";
 import { resolveEffectiveMessagesConfig } from "../../agents/identity.js";
 import { normalizeChannelId } from "../../channels/plugins/index.js";
-import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
 import type { ClawdbotConfig } from "../../config/config.js";
 import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js";
 import type { OriginatingChannelType } from "../templating.js";
@@ -81,48 +80,6 @@ export async function routeReply(params: RouteReplyParams): Promise<RouteReplyRe
       : [];
   const replyToId = normalized.replyToId;
 
-  // Run message_sending hook (allows plugins to modify or cancel)
-  const hookRunner = getGlobalHookRunner();
-  const normalizedChannel = normalizeChannelId(channel);
-  if (hookRunner && text.trim() && normalizedChannel) {
-    try {
-      const hookResult = await hookRunner.runMessageSending(
-        {
-          to,
-          content: text,
-          metadata: { channel, accountId, threadId },
-        },
-        {
-          channelId: normalizedChannel,
-          accountId: accountId ?? undefined,
-          conversationId: to,
-        },
-      );
-
-      // Check if hook wants to cancel the message
-      if (hookResult?.cancel) {
-        return { ok: true }; // Silently cancel
-      }
-
-      // Check if hook modified the content
-      if (hookResult?.content !== undefined) {
-        // Check if the modified content contains MEDIA: directive
-        const mediaMatch = hookResult.content.match(/^MEDIA:(.+)$/m);
-        if (mediaMatch) {
-          // Extract media path and add to mediaUrls
-          const mediaPath = mediaMatch[1].trim();
-          mediaUrls = [mediaPath];
-          // Remove MEDIA: directive from text (send audio only)
-          text = hookResult.content.replace(/^MEDIA:.+$/m, "").trim();
-        } else {
-          text = hookResult.content;
-        }
-      }
-    } catch {
-      // Hook errors shouldn't block message sending
-    }
-  }
-
   // Skip empty replies.
   if (!text.trim() && mediaUrls.length === 0) {
     return { ok: true };
diff --git a/src/config/types.messages.ts b/src/config/types.messages.ts
index 7499f79a0..37ef4e942 100644
--- a/src/config/types.messages.ts
+++ b/src/config/types.messages.ts
@@ -1,4 +1,5 @@
 import type { QueueDropPolicy, QueueMode, QueueModeByProvider } from "./types.queue.js";
+import type { TtsConfig } from "./types.tts.js";
 
 export type GroupChatConfig = {
   mentionPatterns?: string[];
@@ -81,6 +82,8 @@ export type MessagesConfig = {
   ackReactionScope?: "group-mentions" | "group-all" | "direct" | "all";
   /** Remove ack reaction after reply is sent (default: false). */
   removeAckAfterReply?: boolean;
+  /** Text-to-speech settings for outbound replies. */
+  tts?: TtsConfig;
 };
 
 export type NativeCommandsSetting = boolean | "auto";
diff --git a/src/config/types.ts b/src/config/types.ts
index ecb722ef1..767b1d915 100644
--- a/src/config/types.ts
+++ b/src/config/types.ts
@@ -23,5 +23,6 @@ export * from "./types.signal.js";
 export * from "./types.skills.js";
 export * from "./types.slack.js";
 export * from "./types.telegram.js";
+export * from "./types.tts.js";
 export * from "./types.tools.js";
 export * from "./types.whatsapp.js";
diff --git a/src/config/types.tts.ts b/src/config/types.tts.ts
new file mode 100644
index 000000000..cd991a82e
--- /dev/null
+++ b/src/config/types.tts.ts
@@ -0,0 +1,30 @@
+export type TtsProvider = "elevenlabs" | "openai";
+
+export type TtsMode = "final" | "all";
+
+export type TtsConfig = {
+  /** Enable auto-TTS (can be overridden by local prefs). */
+  enabled?: boolean;
+  /** Apply TTS to final replies only or to all replies (tool/block/final). */
+  mode?: TtsMode;
+  /** Primary TTS provider (fallbacks are automatic). */
+  provider?: TtsProvider;
+  /** ElevenLabs configuration. */
+  elevenlabs?: {
+    apiKey?: string;
+    voiceId?: string;
+    modelId?: string;
+  };
+  /** OpenAI configuration. */
+  openai?: {
+    apiKey?: string;
+    model?: string;
+    voice?: string;
+  };
+  /** Optional path for local TTS user preferences JSON. */
+  prefsPath?: string;
+  /** Hard cap for text sent to TTS (chars). */
+  maxTextLength?: number;
+  /** API request timeout (ms). */
+  timeoutMs?: number;
+};
diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts
index 35e4bf008..f58f467f0 100644
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -155,6 +155,36 @@ export const MarkdownConfigSchema = z
   .strict()
   .optional();
 
+export const TtsProviderSchema = z.enum(["elevenlabs", "openai"]);
+export const TtsModeSchema = z.enum(["final", "all"]);
+export const TtsConfigSchema = z
+  .object({
+    enabled: z.boolean().optional(),
+    mode: TtsModeSchema.optional(),
+    provider: TtsProviderSchema.optional(),
+    elevenlabs: z
+      .object({
+        apiKey: z.string().optional(),
+        voiceId: z.string().optional(),
+        modelId: z.string().optional(),
+      })
+      .strict()
+      .optional(),
+    openai: z
+      .object({
+        apiKey: z.string().optional(),
+        model: z.string().optional(),
+        voice: z.string().optional(),
+      })
+      .strict()
+      .optional(),
+    prefsPath: z.string().optional(),
+    maxTextLength: z.number().int().min(1).optional(),
+    timeoutMs: z.number().int().min(1000).max(120000).optional(),
+  })
+  .strict()
+  .optional();
+
 export const HumanDelaySchema = z
   .object({
     mode: z.union([z.literal("off"), z.literal("natural"), z.literal("custom")]).optional(),
diff --git a/src/config/zod-schema.session.ts b/src/config/zod-schema.session.ts
index 6cc3084d6..b9e7b42cc 100644
--- a/src/config/zod-schema.session.ts
+++ b/src/config/zod-schema.session.ts
@@ -5,6 +5,7 @@ import {
   InboundDebounceSchema,
   NativeCommandsSettingSchema,
   QueueSchema,
+  TtsConfigSchema,
 } from "./zod-schema.core.js";
 
 const SessionResetConfigSchema = z
@@ -90,6 +91,7 @@ export const MessagesSchema = z
     ackReaction: z.string().optional(),
     ackReactionScope: z.enum(["group-mentions", "group-all", "direct", "all"]).optional(),
     removeAckAfterReply: z.boolean().optional(),
+    tts: TtsConfigSchema,
   })
   .strict()
   .optional();
diff --git a/src/gateway/server-methods-list.ts b/src/gateway/server-methods-list.ts
index b02902b5e..28ee5be54 100644
--- a/src/gateway/server-methods-list.ts
+++ b/src/gateway/server-methods-list.ts
@@ -8,6 +8,12 @@ const BASE_METHODS = [
   "status",
   "usage.status",
   "usage.cost",
+  "tts.status",
+  "tts.providers",
+  "tts.enable",
+  "tts.disable",
+  "tts.convert",
+  "tts.setProvider",
   "config.get",
   "config.set",
   "config.apply",
diff --git a/src/gateway/server-methods.ts b/src/gateway/server-methods.ts
index 9651add19..48bf32b59 100644
--- a/src/gateway/server-methods.ts
+++ b/src/gateway/server-methods.ts
@@ -17,6 +17,7 @@ import { sessionsHandlers } from "./server-methods/sessions.js";
 import { skillsHandlers } from "./server-methods/skills.js";
 import { systemHandlers } from "./server-methods/system.js";
 import { talkHandlers } from "./server-methods/talk.js";
+import { ttsHandlers } from "./server-methods/tts.js";
 import type { GatewayRequestHandlers, GatewayRequestOptions } from "./server-methods/types.js";
 import { updateHandlers } from "./server-methods/update.js";
 import { usageHandlers } from "./server-methods/usage.js";
@@ -53,6 +54,8 @@ const READ_METHODS = new Set([
   "status",
   "usage.status",
   "usage.cost",
+  "tts.status",
+  "tts.providers",
   "models.list",
   "agents.list",
   "agent.identity.get",
@@ -75,6 +78,10 @@ const WRITE_METHODS = new Set([
   "agent.wait",
   "wake",
   "talk.mode",
+  "tts.enable",
+  "tts.disable",
+  "tts.convert",
+  "tts.setProvider",
   "voicewake.set",
   "node.invoke",
   "chat.send",
@@ -151,6 +158,7 @@ export const coreGatewayHandlers: GatewayRequestHandlers = {
   ...configHandlers,
   ...wizardHandlers,
   ...talkHandlers,
+  ...ttsHandlers,
   ...skillsHandlers,
   ...sessionsHandlers,
   ...systemHandlers,
diff --git a/src/gateway/server-methods/tts.ts b/src/gateway/server-methods/tts.ts
new file mode 100644
index 000000000..1b1436988
--- /dev/null
+++ b/src/gateway/server-methods/tts.ts
@@ -0,0 +1,138 @@
+import { loadConfig } from "../../config/config.js";
+import {
+  OPENAI_TTS_MODELS,
+  OPENAI_TTS_VOICES,
+  getTtsProvider,
+  isTtsEnabled,
+  resolveTtsApiKey,
+  resolveTtsConfig,
+  resolveTtsPrefsPath,
+  setTtsEnabled,
+  setTtsProvider,
+  textToSpeech,
+} from "../../tts/tts.js";
+import { ErrorCodes, errorShape } from "../protocol/index.js";
+import { formatForLog } from "../ws-log.js";
+import type { GatewayRequestHandlers } from "./types.js";
+
+export const ttsHandlers: GatewayRequestHandlers = {
+  "tts.status": async ({ respond }) => {
+    try {
+      const cfg = loadConfig();
+      const config = resolveTtsConfig(cfg);
+      const prefsPath = resolveTtsPrefsPath(config);
+      const provider = getTtsProvider(config, prefsPath);
+      respond(true, {
+        enabled: isTtsEnabled(config, prefsPath),
+        provider,
+        fallbackProvider: provider === "openai" ? "elevenlabs" : "openai",
+        prefsPath,
+        hasOpenAIKey: Boolean(resolveTtsApiKey(config, "openai")),
+        hasElevenLabsKey: Boolean(resolveTtsApiKey(config, "elevenlabs")),
+      });
+    } catch (err) {
+      respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
+    }
+  },
+  "tts.enable": async ({ respond }) => {
+    try {
+      const cfg = loadConfig();
+      const config = resolveTtsConfig(cfg);
+      const prefsPath = resolveTtsPrefsPath(config);
+      setTtsEnabled(prefsPath, true);
+      respond(true, { enabled: true });
+    } catch (err) {
+      respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
+    }
+  },
+  "tts.disable": async ({ respond }) => {
+    try {
+      const cfg = loadConfig();
+      const config = resolveTtsConfig(cfg);
+      const prefsPath = resolveTtsPrefsPath(config);
+      setTtsEnabled(prefsPath, false);
+      respond(true, { enabled: false });
+    } catch (err) {
+      respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
+    }
+  },
+  "tts.convert": async ({ params, respond }) => {
+    const text = typeof params.text === "string" ? params.text.trim() : "";
+    if (!text) {
+      respond(
+        false,
+        undefined,
+        errorShape(ErrorCodes.INVALID_REQUEST, "tts.convert requires text"),
+      );
+      return;
+    }
+    try {
+      const cfg = loadConfig();
+      const channel = typeof params.channel === "string" ? params.channel.trim() : undefined;
+      const result = await textToSpeech({ text, cfg, channel });
+      if (result.success && result.audioPath) {
+        respond(true, {
+          audioPath: result.audioPath,
+          provider: result.provider,
+          outputFormat: result.outputFormat,
+          voiceCompatible: result.voiceCompatible,
+        });
+        return;
+      }
+      respond(
+        false,
+        undefined,
+        errorShape(ErrorCodes.UNAVAILABLE, result.error ?? "TTS conversion failed"),
+      );
+    } catch (err) {
+      respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
+    }
+  },
+  "tts.setProvider": async ({ params, respond }) => {
+    const provider = typeof params.provider === "string" ? params.provider.trim() : "";
+    if (provider !== "openai" && provider !== "elevenlabs") {
+      respond(
+        false,
+        undefined,
+        errorShape(ErrorCodes.INVALID_REQUEST, "Invalid provider. Use openai or elevenlabs."),
+      );
+      return;
+    }
+    try {
+      const cfg = loadConfig();
+      const config = resolveTtsConfig(cfg);
+      const prefsPath = resolveTtsPrefsPath(config);
+      setTtsProvider(prefsPath, provider);
+      respond(true, { provider });
+    } catch (err) {
+      respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
+    }
+  },
+  "tts.providers": async ({ respond }) => {
+    try {
+      const cfg = loadConfig();
+      const config = resolveTtsConfig(cfg);
+      const prefsPath = resolveTtsPrefsPath(config);
+      respond(true, {
+        providers: [
+          {
+            id: "openai",
+            name: "OpenAI",
+            configured: Boolean(resolveTtsApiKey(config, "openai")),
+            models: [...OPENAI_TTS_MODELS],
+            voices: [...OPENAI_TTS_VOICES],
+          },
+          {
+            id: "elevenlabs",
+            name: "ElevenLabs",
+            configured: Boolean(resolveTtsApiKey(config, "elevenlabs")),
+            models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_monolingual_v1"],
+          },
+        ],
+        active: getTtsProvider(config, prefsPath),
+      });
+    } catch (err) {
+      respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
+    }
+  },
+};
diff --git a/src/telegram/bot/delivery.ts b/src/telegram/bot/delivery.ts
index 7839f9ced..653474d50 100644
--- a/src/telegram/bot/delivery.ts
+++ b/src/telegram/bot/delivery.ts
@@ -14,7 +14,6 @@ import { mediaKindFromMime } from "../../media/constants.js";
 import { fetchRemoteMedia } from "../../media/fetch.js";
 import { isGifMedia } from "../../media/mime.js";
 import { saveMediaBuffer } from "../../media/store.js";
-import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
 import type { RuntimeEnv } from "../../runtime.js";
 import { loadWebMedia } from "../../web/media.js";
 import { resolveTelegramVoiceSend } from "../voice.js";
@@ -40,45 +39,6 @@ export async function deliverReplies(params: {
   const threadParams = buildTelegramThreadParams(messageThreadId);
   let hasReplied = false;
   for (const reply of replies) {
-    // Track if hook wants to send audio after text
-    let audioToSendAfter: string | undefined;
-
-    // Run message_sending hook (allows plugins like TTS to generate audio)
-    const hookRunner = getGlobalHookRunner();
-    if (hookRunner && reply?.text?.trim()) {
-      try {
-        const hookResult = await hookRunner.runMessageSending(
-          {
-            to: chatId,
-            content: reply.text,
-            metadata: { channel: "telegram", threadId: messageThreadId },
-          },
-          {
-            channelId: "telegram",
-            accountId: undefined,
-            conversationId: chatId,
-          },
-        );
-
-        // Check if hook wants to cancel the message
-        if (hookResult?.cancel) {
-          continue; // Skip this reply
-        }
-
-        // Check if hook returned a MEDIA directive (TTS audio)
-        if (hookResult?.content !== undefined) {
-          const mediaMatch = hookResult.content.match(/^MEDIA:(.+)$/m);
-          if (mediaMatch) {
-            // Save audio path to send AFTER the text message
-            audioToSendAfter = mediaMatch[1].trim();
-          }
-        }
-      } catch (err) {
-        // Hook errors shouldn't block message sending
-        logVerbose(`[telegram delivery] hook error: ${String(err)}`);
-      }
-    }
-
     const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0;
     if (!reply?.text && !hasMedia) {
       if (reply?.audioAsVoice) {
@@ -110,25 +70,6 @@ export async function deliverReplies(params: {
           hasReplied = true;
         }
       }
-
-      // Send TTS audio after text (if hook generated one)
-      if (audioToSendAfter) {
-        try {
-          const audioMedia = await loadWebMedia(audioToSendAfter);
-          const audioFile = new InputFile(audioMedia.buffer, "voice.mp3");
-          // Switch typing indicator to record_voice before sending
-          await params.onVoiceRecording?.();
-          const audioParams: Record<string, unknown> = {};
-          if (threadParams) {
-            audioParams.message_thread_id = threadParams.message_thread_id;
-          }
-          await bot.api.sendVoice(chatId, audioFile, audioParams);
-          logVerbose(`[telegram delivery] TTS audio sent: ${audioToSendAfter}`);
-        } catch (err) {
-          logVerbose(`[telegram delivery] TTS audio send failed: ${String(err)}`);
-        }
-      }
-
       continue;
     }
     // media with optional caption on first item
diff --git a/src/tts/tts.test.ts b/src/tts/tts.test.ts
new file mode 100644
index 000000000..c4725a723
--- /dev/null
+++ b/src/tts/tts.test.ts
@@ -0,0 +1,234 @@
+import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
+
+import { _test } from "./tts.js";
+
+const {
+  isValidVoiceId,
+  isValidOpenAIVoice,
+  isValidOpenAIModel,
+  OPENAI_TTS_MODELS,
+  OPENAI_TTS_VOICES,
+  summarizeText,
+  resolveOutputFormat,
+} = _test;
+
+describe("tts", () => {
+  describe("isValidVoiceId", () => {
+    it("accepts valid ElevenLabs voice IDs", () => {
+      expect(isValidVoiceId("pMsXgVXv3BLzUgSXRplE")).toBe(true);
+      expect(isValidVoiceId("21m00Tcm4TlvDq8ikWAM")).toBe(true);
+      expect(isValidVoiceId("EXAVITQu4vr4xnSDxMaL")).toBe(true);
+    });
+
+    it("accepts voice IDs of varying valid lengths", () => {
+      expect(isValidVoiceId("a1b2c3d4e5")).toBe(true);
+      expect(isValidVoiceId("a".repeat(40))).toBe(true);
+    });
+
+    it("rejects too short voice IDs", () => {
+      expect(isValidVoiceId("")).toBe(false);
+      expect(isValidVoiceId("abc")).toBe(false);
+      expect(isValidVoiceId("123456789")).toBe(false);
+    });
+
+    it("rejects too long voice IDs", () => {
+      expect(isValidVoiceId("a".repeat(41))).toBe(false);
+      expect(isValidVoiceId("a".repeat(100))).toBe(false);
+    });
+
+    it("rejects voice IDs with invalid characters", () => {
+      expect(isValidVoiceId("pMsXgVXv3BLz-gSXRplE")).toBe(false);
+      expect(isValidVoiceId("pMsXgVXv3BLz_gSXRplE")).toBe(false);
+      expect(isValidVoiceId("pMsXgVXv3BLz gSXRplE")).toBe(false);
+      expect(isValidVoiceId("../../../etc/passwd")).toBe(false);
+      expect(isValidVoiceId("voice?param=value")).toBe(false);
+    });
+  });
+
+  describe("isValidOpenAIVoice", () => {
+    it("accepts all valid OpenAI voices", () => {
+      for (const voice of OPENAI_TTS_VOICES) {
+        expect(isValidOpenAIVoice(voice)).toBe(true);
+      }
+    });
+
+    it("rejects invalid voice names", () => {
+      expect(isValidOpenAIVoice("invalid")).toBe(false);
+      expect(isValidOpenAIVoice("")).toBe(false);
+      expect(isValidOpenAIVoice("ALLOY")).toBe(false);
+      expect(isValidOpenAIVoice("alloy ")).toBe(false);
+      expect(isValidOpenAIVoice(" alloy")).toBe(false);
+    });
+  });
+
+  describe("isValidOpenAIModel", () => {
+    it("accepts gpt-4o-mini-tts model", () => {
+      expect(isValidOpenAIModel("gpt-4o-mini-tts")).toBe(true);
+    });
+
+    it("rejects other models", () => {
+      expect(isValidOpenAIModel("tts-1")).toBe(false);
+      expect(isValidOpenAIModel("tts-1-hd")).toBe(false);
+      expect(isValidOpenAIModel("invalid")).toBe(false);
+      expect(isValidOpenAIModel("")).toBe(false);
+      expect(isValidOpenAIModel("gpt-4")).toBe(false);
+    });
+  });
+
+  describe("OPENAI_TTS_MODELS", () => {
+    it("contains only gpt-4o-mini-tts", () => {
+      expect(OPENAI_TTS_MODELS).toContain("gpt-4o-mini-tts");
+      expect(OPENAI_TTS_MODELS).toHaveLength(1);
+    });
+
+    it("is a non-empty array", () => {
+      expect(Array.isArray(OPENAI_TTS_MODELS)).toBe(true);
+      expect(OPENAI_TTS_MODELS.length).toBeGreaterThan(0);
+    });
+  });
+
+  describe("resolveOutputFormat", () => {
+    it("uses Opus for Telegram", () => {
+      const output = resolveOutputFormat("telegram");
+      expect(output.openai).toBe("opus");
+      expect(output.elevenlabs).toBe("opus_48000_64");
+      expect(output.extension).toBe(".opus");
+      expect(output.voiceCompatible).toBe(true);
+    });
+
+    it("uses MP3 for other channels", () => {
+      const output = resolveOutputFormat("discord");
+      expect(output.openai).toBe("mp3");
+      expect(output.elevenlabs).toBe("mp3_44100_128");
+      expect(output.extension).toBe(".mp3");
+      expect(output.voiceCompatible).toBe(false);
+    });
+  });
+
+  describe("summarizeText", () => {
+    const mockApiKey = "test-api-key";
+    const originalFetch = globalThis.fetch;
+
+    beforeEach(() => {
+      vi.useFakeTimers({ shouldAdvanceTime: true });
+    });
+
+    afterEach(() => {
+      globalThis.fetch = originalFetch;
+      vi.useRealTimers();
+    });
+
+    it("summarizes text and returns result with metrics", async () => {
+      const mockSummary = "This is a summarized version of the text.";
+      globalThis.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () =>
+          Promise.resolve({
+            choices: [{ message: { content: mockSummary } }],
+          }),
+      });
+
+      const longText = "A".repeat(2000);
+      const result = await summarizeText(longText, 1500, mockApiKey, 30_000);
+
+      expect(result.summary).toBe(mockSummary);
+      expect(result.inputLength).toBe(2000);
+      expect(result.outputLength).toBe(mockSummary.length);
+      expect(result.latencyMs).toBeGreaterThanOrEqual(0);
+      expect(globalThis.fetch).toHaveBeenCalledTimes(1);
+    });
+
+    it("calls OpenAI API with correct parameters", async () => {
+      globalThis.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () =>
+          Promise.resolve({
+            choices: [{ message: { content: "Summary" } }],
+          }),
+      });
+
+      await summarizeText("Long text to summarize", 500, mockApiKey, 30_000);
+
+      expect(globalThis.fetch).toHaveBeenCalledWith(
+        "https://api.openai.com/v1/chat/completions",
+        expect.objectContaining({
+          method: "POST",
+          headers: {
+            Authorization: `Bearer ${mockApiKey}`,
+            "Content-Type": "application/json",
+          },
+        }),
+      );
+
+      const callArgs = (globalThis.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
+      const body = JSON.parse(callArgs[1].body);
+      expect(body.model).toBe("gpt-4o-mini");
+      expect(body.temperature).toBe(0.3);
+      expect(body.max_tokens).toBe(250);
+    });
+
+    it("rejects targetLength below minimum (100)", async () => {
+      await expect(summarizeText("text", 99, mockApiKey, 30_000)).rejects.toThrow(
+        "Invalid targetLength: 99",
+      );
+    });
+
+    it("rejects targetLength above maximum (10000)", async () => {
+      await expect(summarizeText("text", 10001, mockApiKey, 30_000)).rejects.toThrow(
+        "Invalid targetLength: 10001",
+      );
+    });
+
+    it("accepts targetLength at boundaries", async () => {
+      globalThis.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () =>
+          Promise.resolve({
+            choices: [{ message: { content: "Summary" } }],
+          }),
+      });
+
+      await expect(summarizeText("text", 100, mockApiKey, 30_000)).resolves.toBeDefined();
+      await expect(summarizeText("text", 10000, mockApiKey, 30_000)).resolves.toBeDefined();
+    });
+
+    it("throws error when API returns non-ok response", async () => {
+      globalThis.fetch = vi.fn().mockResolvedValue({
+        ok: false,
+        status: 500,
+      });
+
+      await expect(summarizeText("text", 500, mockApiKey, 30_000)).rejects.toThrow(
+        "Summarization service unavailable",
+      );
+    });
+
+    it("throws error when no summary is returned", async () => {
+      globalThis.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () =>
+          Promise.resolve({
+            choices: [],
+          }),
+      });
+
+      await expect(summarizeText("text", 500, mockApiKey, 30_000)).rejects.toThrow(
+        "No summary returned",
+      );
+    });
+
+    it("throws error when summary content is empty", async () => {
+      globalThis.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        json: () =>
+          Promise.resolve({
+            choices: [{ message: { content: "   " } }],
+          }),
+      });
+
+      await expect(summarizeText("text", 500, mockApiKey, 30_000)).rejects.toThrow(
+        "No summary returned",
+      );
+    });
+  });
+});
diff --git a/src/tts/tts.ts b/src/tts/tts.ts
new file mode 100644
index 000000000..0a03063a9
--- /dev/null
+++ b/src/tts/tts.ts
@@ -0,0 +1,630 @@
+import {
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  writeFileSync,
+  mkdtempSync,
+  rmSync,
+  renameSync,
+  unlinkSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import path from "node:path";
+
+import type { ReplyPayload } from "../auto-reply/types.js";
+import { normalizeChannelId } from "../channels/plugins/index.js";
+import type { ChannelId } from "../channels/plugins/types.js";
+import type { ClawdbotConfig } from "../config/config.js";
+import type { TtsConfig, TtsMode, TtsProvider } from "../config/types.tts.js";
+import { logVerbose } from "../globals.js";
+import { CONFIG_DIR, resolveUserPath } from "../utils.js";
+
+const DEFAULT_TIMEOUT_MS = 30_000;
+const DEFAULT_TTS_MAX_LENGTH = 1500;
+const DEFAULT_TTS_SUMMARIZE = true;
+const DEFAULT_MAX_TEXT_LENGTH = 4000;
+const TEMP_FILE_CLEANUP_DELAY_MS = 5 * 60 * 1000; // 5 minutes
+
+const DEFAULT_ELEVENLABS_VOICE_ID = "pMsXgVXv3BLzUgSXRplE";
+const DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2";
+const DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts";
+const DEFAULT_OPENAI_VOICE = "alloy";
+
+const TELEGRAM_OUTPUT = {
+  openai: "opus" as const,
+  // ElevenLabs output formats use codec_sample_rate_bitrate naming.
+  // Opus @ 48kHz/64kbps is a good voice-note tradeoff for Telegram.
+  elevenlabs: "opus_48000_64",
+  extension: ".opus",
+  voiceCompatible: true,
+};
+
+const DEFAULT_OUTPUT = {
+  openai: "mp3" as const,
+  elevenlabs: "mp3_44100_128",
+  extension: ".mp3",
+  voiceCompatible: false,
+};
+
+export type ResolvedTtsConfig = {
+  enabled: boolean;
+  mode: TtsMode;
+  provider: TtsProvider;
+  elevenlabs: {
+    apiKey?: string;
+    voiceId: string;
+    modelId: string;
+  };
+  openai: {
+    apiKey?: string;
+    model: string;
+    voice: string;
+  };
+  prefsPath?: string;
+  maxTextLength: number;
+  timeoutMs: number;
+};
+
+type TtsUserPrefs = {
+  tts?: {
+    enabled?: boolean;
+    provider?: TtsProvider;
+    maxLength?: number;
+    summarize?: boolean;
+  };
+};
+
+export type TtsResult = {
+  success: boolean;
+  audioPath?: string;
+  error?: string;
+  latencyMs?: number;
+  provider?: string;
+  outputFormat?: string;
+  voiceCompatible?: boolean;
+};
+
+type TtsStatusEntry = {
+  timestamp: number;
+  success: boolean;
+  textLength: number;
+  summarized: boolean;
+  provider?: string;
+  latencyMs?: number;
+  error?: string;
+};
+
+let lastTtsAttempt: TtsStatusEntry | undefined;
+
+export function resolveTtsConfig(cfg: ClawdbotConfig): ResolvedTtsConfig {
+  const raw: TtsConfig = cfg.messages?.tts ?? {};
+  return {
+    enabled: raw.enabled ?? false,
+    mode: raw.mode ?? "final",
+    provider: raw.provider ?? "elevenlabs",
+    elevenlabs: {
+      apiKey: raw.elevenlabs?.apiKey,
+      voiceId: raw.elevenlabs?.voiceId ?? DEFAULT_ELEVENLABS_VOICE_ID,
+      modelId: raw.elevenlabs?.modelId ?? DEFAULT_ELEVENLABS_MODEL_ID,
+    },
+    openai: {
+      apiKey: raw.openai?.apiKey,
+      model: raw.openai?.model ?? DEFAULT_OPENAI_MODEL,
+      voice: raw.openai?.voice ?? DEFAULT_OPENAI_VOICE,
+    },
+    prefsPath: raw.prefsPath,
+    maxTextLength: raw.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH,
+    timeoutMs: raw.timeoutMs ?? DEFAULT_TIMEOUT_MS,
+  };
+}
+
+export function resolveTtsPrefsPath(config: ResolvedTtsConfig): string {
+  if (config.prefsPath?.trim()) return resolveUserPath(config.prefsPath.trim());
+  const envPath = process.env.CLAWDBOT_TTS_PREFS?.trim();
+  if (envPath) return resolveUserPath(envPath);
+  return path.join(CONFIG_DIR, "settings", "tts.json");
+}
+
+function readPrefs(prefsPath: string): TtsUserPrefs {
+  try {
+    if (!existsSync(prefsPath)) return {};
+    return JSON.parse(readFileSync(prefsPath, "utf8")) as TtsUserPrefs;
+  } catch {
+    return {};
+  }
+}
+
+function atomicWriteFileSync(filePath: string, content: string): void {
+  const tmpPath = `${filePath}.tmp.${Date.now()}.${Math.random().toString(36).slice(2)}`;
+  writeFileSync(tmpPath, content);
+  try {
+    renameSync(tmpPath, filePath);
+  } catch (err) {
+    try {
+      unlinkSync(tmpPath);
+    } catch {
+      // ignore
+    }
+    throw err;
+  }
+}
+
+function updatePrefs(prefsPath: string, update: (prefs: TtsUserPrefs) => void): void {
+  const prefs = readPrefs(prefsPath);
+  update(prefs);
+  mkdirSync(path.dirname(prefsPath), { recursive: true });
+  atomicWriteFileSync(prefsPath, JSON.stringify(prefs, null, 2));
+}
+
+export function isTtsEnabled(config: ResolvedTtsConfig, prefsPath: string): boolean {
+  const prefs = readPrefs(prefsPath);
+  if (prefs.tts?.enabled !== undefined) return prefs.tts.enabled === true;
+  return config.enabled;
+}
+
+export function setTtsEnabled(prefsPath: string, enabled: boolean): void {
+  updatePrefs(prefsPath, (prefs) => {
+    prefs.tts = { ...prefs.tts, enabled };
+  });
+}
+
+export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): TtsProvider {
+  const prefs = readPrefs(prefsPath);
+  return prefs.tts?.provider ?? config.provider;
+}
+
+export function setTtsProvider(prefsPath: string, provider: TtsProvider): void {
+  updatePrefs(prefsPath, (prefs) => {
+    prefs.tts = { ...prefs.tts, provider };
+  });
+}
+
+export function getTtsMaxLength(prefsPath: string): number {
+  const prefs = readPrefs(prefsPath);
+  return prefs.tts?.maxLength ?? DEFAULT_TTS_MAX_LENGTH;
+}
+
+export function setTtsMaxLength(prefsPath: string, maxLength: number): void {
+  updatePrefs(prefsPath, (prefs) => {
+    prefs.tts = { ...prefs.tts, maxLength };
+  });
+}
+
+export function isSummarizationEnabled(prefsPath: string): boolean {
+  const prefs = readPrefs(prefsPath);
+  return prefs.tts?.summarize ?? DEFAULT_TTS_SUMMARIZE;
+}
+
+export function setSummarizationEnabled(prefsPath: string, enabled: boolean): void {
+  updatePrefs(prefsPath, (prefs) => {
+    prefs.tts = { ...prefs.tts, summarize: enabled };
+  });
+}
+
+export function getLastTtsAttempt(): TtsStatusEntry | undefined {
+  return lastTtsAttempt;
+}
+
+export function setLastTtsAttempt(entry: TtsStatusEntry | undefined): void {
+  lastTtsAttempt = entry;
+}
+
+function resolveOutputFormat(channelId?: string | null) {
+  if (channelId === "telegram") return TELEGRAM_OUTPUT;
+  return DEFAULT_OUTPUT;
+}
+
+function resolveChannelId(channel: string | undefined): ChannelId | null {
+  return channel ? normalizeChannelId(channel) : null;
+}
+
+export function resolveTtsApiKey(
+  config: ResolvedTtsConfig,
+  provider: TtsProvider,
+): string | undefined {
+  if (provider === "elevenlabs") {
+    return config.elevenlabs.apiKey || process.env.ELEVENLABS_API_KEY || process.env.XI_API_KEY;
+  }
+  if (provider === "openai") {
+    return config.openai.apiKey || process.env.OPENAI_API_KEY;
+  }
+  return undefined;
+}
+
+function isValidVoiceId(voiceId: string): boolean {
+  return /^[a-zA-Z0-9]{10,40}$/.test(voiceId);
+}
+
+export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"] as const;
+export const OPENAI_TTS_VOICES = [
+  "alloy",
+  "ash",
+  "coral",
+  "echo",
+  "fable",
+  "onyx",
+  "nova",
+  "sage",
+  "shimmer",
+] as const;
+
+type OpenAiTtsVoice = (typeof OPENAI_TTS_VOICES)[number];
+
+function isValidOpenAIModel(model: string): boolean {
+  return OPENAI_TTS_MODELS.includes(model as (typeof OPENAI_TTS_MODELS)[number]);
+}
+
+function isValidOpenAIVoice(voice: string): voice is OpenAiTtsVoice {
+  return OPENAI_TTS_VOICES.includes(voice as OpenAiTtsVoice);
+}
+
+type SummarizeResult = {
+  summary: string;
+  latencyMs: number;
+  inputLength: number;
+  outputLength: number;
+};
+
+async function summarizeText(
+  text: string,
+  targetLength: number,
+  apiKey: string,
+  timeoutMs: number,
+): Promise<SummarizeResult> {
+  if (targetLength < 100 || targetLength > 10_000) {
+    throw new Error(`Invalid targetLength: ${targetLength}`);
+  }
+
+  const startTime = Date.now();
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), timeoutMs);
+
+  try {
+    const response = await fetch("https://api.openai.com/v1/chat/completions", {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        model: "gpt-4o-mini",
+        messages: [
+          {
+            role: "system",
+            content: `You are an assistant that summarizes texts concisely while keeping the most important information. Summarize the text to approximately ${targetLength} characters. Maintain the original tone and style. Reply only with the summary, without additional explanations.`,
+          },
+          {
+            role: "user",
+            content: `<text_to_summarize>\n${text}\n</text_to_summarize>`,
+          },
+        ],
+        max_tokens: Math.ceil(targetLength / 2),
+        temperature: 0.3,
+      }),
+      signal: controller.signal,
+    });
+
+    if (!response.ok) {
+      throw new Error("Summarization service unavailable");
+    }
+
+    const data = (await response.json()) as {
+      choices?: Array<{ message?: { content?: string } }>;
+    };
+    const summary = data.choices?.[0]?.message?.content?.trim();
+
+    if (!summary) {
+      throw new Error("No summary returned");
+    }
+
+    return {
+      summary,
+      latencyMs: Date.now() - startTime,
+      inputLength: text.length,
+      outputLength: summary.length,
+    };
+  } finally {
+    clearTimeout(timeout);
+  }
+}
+
+function scheduleCleanup(tempDir: string, delayMs: number = TEMP_FILE_CLEANUP_DELAY_MS): void {
+  const timer = setTimeout(() => {
+    try {
+      rmSync(tempDir, { recursive: true, force: true });
+    } catch {
+      // ignore cleanup errors
+    }
+  }, delayMs);
+  timer.unref();
+}
+
+async function elevenLabsTTS(params: {
+  text: string;
+  apiKey: string;
+  voiceId: string;
+  modelId: string;
+  outputFormat: string;
+  timeoutMs: number;
+}): Promise<Buffer> {
+  const { text, apiKey, voiceId, modelId, outputFormat, timeoutMs } = params;
+  if (!isValidVoiceId(voiceId)) {
+    throw new Error("Invalid voiceId format");
+  }
+
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), timeoutMs);
+
+  try {
+    const url = new URL(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`);
+    if (outputFormat) {
+      url.searchParams.set("output_format", outputFormat);
+    }
+
+    const response = await fetch(url.toString(), {
+      method: "POST",
+      headers: {
+        "xi-api-key": apiKey,
+        "Content-Type": "application/json",
+        Accept: "audio/mpeg",
+      },
+      body: JSON.stringify({
+        text,
+        model_id: modelId,
+        voice_settings: {
+          stability: 0.5,
+          similarity_boost: 0.75,
+          style: 0.0,
+          use_speaker_boost: true,
+        },
+      }),
+      signal: controller.signal,
+    });
+
+    if (!response.ok) {
+      throw new Error(`ElevenLabs API error (${response.status})`);
+    }
+
+    return Buffer.from(await response.arrayBuffer());
+  } finally {
+    clearTimeout(timeout);
+  }
+}
+
+async function openaiTTS(params: {
+  text: string;
+  apiKey: string;
+  model: string;
+  voice: string;
+  responseFormat: "mp3" | "opus";
+  timeoutMs: number;
+}): Promise<Buffer> {
+  const { text, apiKey, model, voice, responseFormat, timeoutMs } = params;
+
+  if (!isValidOpenAIModel(model)) {
+    throw new Error(`Invalid model: ${model}`);
+  }
+  if (!isValidOpenAIVoice(voice)) {
+    throw new Error(`Invalid voice: ${voice}`);
+  }
+
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), timeoutMs);
+
+  try {
+    const response = await fetch("https://api.openai.com/v1/audio/speech", {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        model,
+        input: text,
+        voice,
+        response_format: responseFormat,
+      }),
+      signal: controller.signal,
+    });
+
+    if (!response.ok) {
+      throw new Error(`OpenAI TTS API error (${response.status})`);
+    }
+
+    return Buffer.from(await response.arrayBuffer());
+  } finally {
+    clearTimeout(timeout);
+  }
+}
+
+export async function textToSpeech(params: {
+  text: string;
+  cfg: ClawdbotConfig;
+  prefsPath?: string;
+  channel?: string;
+}): Promise<TtsResult> {
+  const config = resolveTtsConfig(params.cfg);
+  const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
+  const channelId = resolveChannelId(params.channel);
+  const output = resolveOutputFormat(channelId);
+
+  if (params.text.length > config.maxTextLength) {
+    return {
+      success: false,
+      error: `Text too long (${params.text.length} chars, max ${config.maxTextLength})`,
+    };
+  }
+
+  const userProvider = getTtsProvider(config, prefsPath);
+  const providers: TtsProvider[] = [
+    userProvider,
+    userProvider === "openai" ? "elevenlabs" : "openai",
+  ];
+
+  let lastError: string | undefined;
+
+  for (const provider of providers) {
+    const apiKey = resolveTtsApiKey(config, provider);
+    if (!apiKey) {
+      lastError = `No API key for ${provider}`;
+      continue;
+    }
+
+    const providerStart = Date.now();
+    try {
+      let audioBuffer: Buffer;
+      if (provider === "elevenlabs") {
+        audioBuffer = await elevenLabsTTS({
+          text: params.text,
+          apiKey,
+          voiceId: config.elevenlabs.voiceId,
+          modelId: config.elevenlabs.modelId,
+          outputFormat: output.elevenlabs,
+          timeoutMs: config.timeoutMs,
+        });
+      } else {
+        audioBuffer = await openaiTTS({
+          text: params.text,
+          apiKey,
+          model: config.openai.model,
+          voice: config.openai.voice,
+          responseFormat: output.openai,
+          timeoutMs: config.timeoutMs,
+        });
+      }
+
+      const latencyMs = Date.now() - providerStart;
+
+      const tempDir = mkdtempSync(path.join(tmpdir(), "tts-"));
+      const audioPath = path.join(tempDir, `voice-${Date.now()}${output.extension}`);
+      writeFileSync(audioPath, audioBuffer);
+      scheduleCleanup(tempDir);
+
+      return {
+        success: true,
+        audioPath,
+        latencyMs,
+        provider,
+        outputFormat: provider === "openai" ? output.openai : output.elevenlabs,
+        voiceCompatible: output.voiceCompatible,
+      };
+    } catch (err) {
+      const error = err as Error;
+      if (error.name === "AbortError") {
+        lastError = `${provider}: request timed out`;
+      } else {
+        lastError = `${provider}: ${error.message}`;
+      }
+    }
+  }
+
+  return {
+    success: false,
+    error: `TTS conversion failed: ${lastError || "no providers available"}`,
+  };
+}
+
+export async function maybeApplyTtsToPayload(params: {
+  payload: ReplyPayload;
+  cfg: ClawdbotConfig;
+  channel?: string;
+  kind?: "tool" | "block" | "final";
+}): Promise<ReplyPayload> {
+  const config = resolveTtsConfig(params.cfg);
+  const prefsPath = resolveTtsPrefsPath(config);
+  if (!isTtsEnabled(config, prefsPath)) return params.payload;
+
+  const mode = config.mode ?? "final";
+  if (mode === "final" && params.kind && params.kind !== "final") return params.payload;
+
+  const text = params.payload.text ?? "";
+  if (!text.trim()) return params.payload;
+  if (params.payload.mediaUrl || (params.payload.mediaUrls?.length ?? 0) > 0) return params.payload;
+  if (text.includes("MEDIA:")) return params.payload;
+  if (text.trim().length < 10) return params.payload;
+
+  const maxLength = getTtsMaxLength(prefsPath);
+  let textForAudio = text.trim();
+  let wasSummarized = false;
+
+  if (textForAudio.length > maxLength) {
+    if (!isSummarizationEnabled(prefsPath)) {
+      logVerbose(
+        `TTS: skipping long text (${textForAudio.length} > ${maxLength}), summarization disabled.`,
+      );
+      return params.payload;
+    }
+
+    const openaiKey = resolveTtsApiKey(config, "openai");
+    if (!openaiKey) {
+      logVerbose("TTS: skipping summarization - OpenAI key missing.");
+      return params.payload;
+    }
+
+    try {
+      const summary = await summarizeText(textForAudio, maxLength, openaiKey, config.timeoutMs);
+      textForAudio = summary.summary;
+      wasSummarized = true;
+      if (textForAudio.length > config.maxTextLength) {
+        logVerbose(
+          `TTS: summary exceeded hard limit (${textForAudio.length} > ${config.maxTextLength}); truncating.`,
+        );
+        textForAudio = `${textForAudio.slice(0, config.maxTextLength - 3)}...`;
+      }
+    } catch (err) {
+      const error = err as Error;
+      logVerbose(`TTS: summarization failed: ${error.message}`);
+      return params.payload;
+    }
+  }
+
+  const ttsStart = Date.now();
+  const result = await textToSpeech({
+    text: textForAudio,
+    cfg: params.cfg,
+    prefsPath,
+    channel: params.channel,
+  });
+
+  if (result.success && result.audioPath) {
+    lastTtsAttempt = {
+      timestamp: Date.now(),
+      success: true,
+      textLength: text.length,
+      summarized: wasSummarized,
+      provider: result.provider,
+      latencyMs: result.latencyMs,
+    };
+
+    const channelId = resolveChannelId(params.channel);
+    const shouldVoice = channelId === "telegram" && result.voiceCompatible === true;
+
+    return {
+      ...params.payload,
+      mediaUrl: result.audioPath,
+      audioAsVoice: shouldVoice || params.payload.audioAsVoice,
+    };
+  }
+
+  lastTtsAttempt = {
+    timestamp: Date.now(),
+    success: false,
+    textLength: text.length,
+    summarized: wasSummarized,
+    error: result.error,
+  };
+
+  const latency = Date.now() - ttsStart;
+  logVerbose(`TTS: conversion failed after ${latency}ms (${result.error ?? "unknown"}).`);
+  return params.payload;
+}
+
+export const _test = {
+  isValidVoiceId,
+  isValidOpenAIVoice,
+  isValidOpenAIModel,
+  OPENAI_TTS_MODELS,
+  OPENAI_TTS_VOICES,
+  summarizeText,
+  resolveOutputFormat,
+};