diff --git a/api/routers/tts.py b/api/routers/tts.py
index 9660468..4ad747e 100644
--- a/api/routers/tts.py
+++ b/api/routers/tts.py
@@ -23,7 +23,7 @@ async def tts_synthesize(
     Convert text to speech audio.
     
     - **text**: Text to synthesize
-    - **voice_id**: Voice ID (e.g., 'zh-CN-YunjianNeural', 'en-US-AriaNeural')
+    - **voice_id**: Voice ID (e.g., '[Chinese] zh-CN Yunjian', '[English] en-US Aria')
     
     Returns path to generated audio file and duration.
     """
diff --git a/api/schemas/tts.py b/api/schemas/tts.py
index 92bf98d..de41df8 100644
--- a/api/schemas/tts.py
+++ b/api/schemas/tts.py
@@ -8,13 +8,13 @@ from pydantic import BaseModel, Field
 class TTSSynthesizeRequest(BaseModel):
     """TTS synthesis request"""
     text: str = Field(..., description="Text to synthesize")
-    voice_id: str = Field("zh-CN-YunjianNeural", description="Voice ID")
+    voice_id: str = Field("[Chinese] zh-CN Yunjian", description="Voice ID")
     
     class Config:
         json_schema_extra = {
             "example": {
                 "text": "Hello, welcome to ReelForge!",
-                "voice_id": "zh-CN-YunjianNeural"
+                "voice_id": "[Chinese] zh-CN Yunjian"
             }
         }
 
diff --git a/api/schemas/video.py b/api/schemas/video.py
index d98e66c..39ad8e3 100644
--- a/api/schemas/video.py
+++ b/api/schemas/video.py
@@ -23,7 +23,7 @@ class VideoGenerateRequest(BaseModel):
     
     # === Basic Config ===
     n_scenes: int = Field(5, ge=1, le=20, description="Number of scenes (generate mode only)")
-    voice_id: str = Field("zh-CN-YunjianNeural", description="TTS voice ID")
+    voice_id: str = Field("[Chinese] zh-CN Yunjian", description="TTS voice ID")
     
     # === LLM Parameters ===
     min_narration_words: int = Field(5, ge=1, le=100, description="Min narration words")
@@ -57,7 +57,7 @@ class VideoGenerateRequest(BaseModel):
                 "text": "Atomic Habits teaches us that small changes compound over time to produce remarkable results.",
                 "mode": "generate",
                 "n_scenes": 5,
-                "voice_id": "zh-CN-YunjianNeural",
+                "voice_id": "[Chinese] zh-CN Yunjian",
                 "title": "The Power of Atomic Habits"
             }
         }
diff --git a/config.example.yaml b/config.example.yaml
index 0826440..b8661b6 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -25,7 +25,7 @@ comfyui:
   
   # TTS-specific configuration
   tts:
-    default: selfhost/tts_edge.json  # TTS workflow to use
+    default_workflow: selfhost/tts_edge.json  # TTS workflow to use
   
   # Image-specific configuration
   image:
diff --git a/reelforge/config/schema.py b/reelforge/config/schema.py
index 7a61900..c0f6683 100644
--- a/reelforge/config/schema.py
+++ b/reelforge/config/schema.py
@@ -15,16 +15,12 @@ class LLMConfig(BaseModel):
 
 class TTSSubConfig(BaseModel):
     """TTS-specific configuration (under comfyui.tts)"""
-    model_config = {"populate_by_name": True}  # Allow both field name and alias
-    
-    default_workflow: str = Field(default=None, description="Default TTS workflow (required, no fallback)", alias="default")
+    default_workflow: str = Field(default=None, description="Default TTS workflow (required, no fallback)")
 
 
 class ImageSubConfig(BaseModel):
     """Image-specific configuration (under comfyui.image)"""
-    model_config = {"populate_by_name": True}  # Allow both field name and alias
-    
-    default_workflow: str = Field(default=None, description="Default image workflow (required, no fallback)", alias="default")
+    default_workflow: str = Field(default=None, description="Default image workflow (required, no fallback)")
     prompt_prefix: str = Field(
         default="Pure white background, minimalist illustration, matchstick figure style, black and white line drawing, simple clean lines",
         description="Prompt prefix for all image generation"
diff --git a/reelforge/models/storyboard.py b/reelforge/models/storyboard.py
index 0420683..e483769 100644
--- a/reelforge/models/storyboard.py
+++ b/reelforge/models/storyboard.py
@@ -26,7 +26,9 @@ class StoryboardConfig:
     video_fps: int = 30                        # Frame rate
     
     # Audio parameters
-    voice_id: str = "zh-CN-YunjianNeural"     # Default voice
+    voice_id: str = "[Chinese] zh-CN Yunjian"     # Default voice
+    tts_workflow: Optional[str] = None         # TTS workflow filename (None = use default)
+    tts_speed: float = 1.2                     # TTS speed multiplier (1.0 = normal, >1.0 = faster)
     
     # Image parameters
     image_width: int = 1024
diff --git a/reelforge/services/frame_processor.py b/reelforge/services/frame_processor.py
index c61786e..ed249f0 100644
--- a/reelforge/services/frame_processor.py
+++ b/reelforge/services/frame_processor.py
@@ -124,11 +124,12 @@ class FrameProcessor:
         from reelforge.utils.os_util import get_task_frame_path
         output_path = get_task_frame_path(config.task_id, frame.index, "audio")
         
-        # Call TTS with specific output path
+        # Call TTS with specific output path and workflow
         audio_path = await self.core.tts(
             text=frame.narration,
+            workflow=config.tts_workflow,  # Use workflow from config
             voice=config.voice_id,
-            rate="+20%",
+            speed=config.tts_speed,  # Use speed (not rate) from config
             output_path=output_path,
         )
         
diff --git a/reelforge/services/image_prompt_generator.py b/reelforge/services/image_prompt_generator.py
index 786d0e3..d8a8b02 100644
--- a/reelforge/services/image_prompt_generator.py
+++ b/reelforge/services/image_prompt_generator.py
@@ -116,8 +116,8 @@ class ImagePromptGeneratorService:
         # 5. Apply prompt prefix to each prompt
         from reelforge.utils.prompt_helper import build_image_prompt
         
-        # Get prompt prefix from config
-        image_config = self.core.config.get("image", {})
+        # Get prompt prefix from config (fix: correct path is comfyui.image.prompt_prefix)
+        image_config = self.core.config.get("comfyui", {}).get("image", {})
         prompt_prefix = image_config.get("prompt_prefix", "")
         
         # Apply prefix to each base prompt
diff --git a/reelforge/services/tts_service.py b/reelforge/services/tts_service.py
index 6f4d11f..2e21993 100644
--- a/reelforge/services/tts_service.py
+++ b/reelforge/services/tts_service.py
@@ -52,8 +52,8 @@ class TTSService(ComfyBaseService):
         comfyui_url: Optional[str] = None,
         runninghub_api_key: Optional[str] = None,
         # TTS parameters
-        voice: Optional[str] = None,
-        speed: float = 1.0,
+        voice: str = "[Chinese] zh-CN Yunjian",
+        speed: float = 1.2,
         # Output path
         output_path: Optional[str] = None,
         **params
@@ -88,7 +88,7 @@ class TTSService(ComfyBaseService):
             audio_path = await reelforge.tts(
                 text="Hello",
                 workflow="tts_edge.json",
-                voice="zh-CN-XiaoxiaoNeural",
+                voice="[Chinese] zh-CN Xiaoxiao",
                 speed=1.2
             )
             
diff --git a/reelforge/services/video_generator.py b/reelforge/services/video_generator.py
index f017b30..74e1a0c 100644
--- a/reelforge/services/video_generator.py
+++ b/reelforge/services/video_generator.py
@@ -54,7 +54,9 @@ class VideoGeneratorService:
         
         # === Basic Config ===
         n_scenes: int = 5,  # Only used in generate mode; ignored in fixed mode
-        voice_id: str = "zh-CN-YunjianNeural",
+        voice_id: str = "[Chinese] zh-CN Yunjian",
+        tts_workflow: Optional[str] = None,
+        tts_speed: float = 1.2,
         output_path: Optional[str] = None,
         
         # === LLM Parameters ===
@@ -111,7 +113,9 @@ class VideoGeneratorService:
             n_scenes: Number of storyboard scenes (default 5)
                       Only effective in generate mode; ignored in fixed mode
             
-            voice_id: TTS voice ID (default "zh-CN-YunjianNeural")
+            voice_id: TTS voice ID (default "[Chinese] zh-CN Yunjian")
+            tts_workflow: TTS workflow filename (e.g., "tts_edge.json", None = use default)
+            tts_speed: TTS speed multiplier (1.0 = normal, 1.2 = 20% faster, default 1.2)
             output_path: Output video path (auto-generated if None)
             
             min_narration_words: Min narration length (generate mode only)
@@ -219,6 +223,8 @@ class VideoGeneratorService:
             video_height=video_height,
             video_fps=video_fps,
             voice_id=voice_id,
+            tts_workflow=tts_workflow,
+            tts_speed=tts_speed,
             image_width=image_width,
             image_height=image_height,
             image_workflow=image_workflow,
@@ -259,7 +265,8 @@ class VideoGeneratorService:
             # Override prompt_prefix if provided (temporarily modify config)
             original_prefix = None
             if prompt_prefix is not None:
-                image_config = self.core.config.get("image", {})
+                # Fix: image config is under comfyui.image, not directly under config
+                image_config = self.core.config.get("comfyui", {}).get("image", {})
                 original_prefix = image_config.get("prompt_prefix")
                 image_config["prompt_prefix"] = prompt_prefix
                 logger.info(f"Using custom prompt_prefix: '{prompt_prefix}'")
diff --git a/reelforge/utils/tts_util.py b/reelforge/utils/tts_util.py
index 8280b57..f69ca71 100644
--- a/reelforge/utils/tts_util.py
+++ b/reelforge/utils/tts_util.py
@@ -31,7 +31,7 @@ _request_semaphore = asyncio.Semaphore(_MAX_CONCURRENT_REQUESTS)
 
 async def edge_tts(
     text: str,
-    voice: str = "zh-CN-YunjianNeural",
+    voice: str = "[Chinese] zh-CN Yunjian",
     rate: str = "+0%",
     volume: str = "+0%",
     pitch: str = "+0Hz",
@@ -53,7 +53,7 @@ async def edge_tts(
     
     Args:
         text: Text to convert to speech
-        voice: Voice ID (e.g., zh-CN-YunjianNeural, en-US-JennyNeural)
+        voice: Voice ID (e.g., [Chinese] zh-CN Yunjian, [English] en-US Jenny)
         rate: Speech rate (e.g., +0%, +50%, -20%)
         volume: Speech volume (e.g., +0%, +50%, -20%)
         pitch: Speech pitch (e.g., +0Hz, +10Hz, -5Hz)
@@ -65,20 +65,20 @@ async def edge_tts(
         Audio data as bytes (MP3 format)
     
     Popular Chinese voices:
-    - zh-CN-YunjianNeural (male, default)
-    - zh-CN-XiaoxiaoNeural (female)
-    - zh-CN-YunxiNeural (male)
-    - zh-CN-XiaoyiNeural (female)
+    - [Chinese] zh-CN Yunjian (male, default)
+    - [Chinese] zh-CN Xiaoxiao (female)
+    - [Chinese] zh-CN Yunxi (male)
+    - [Chinese] zh-CN Xiaoyi (female)
     
     Popular English voices:
-    - en-US-JennyNeural (female)
-    - en-US-GuyNeural (male)
-    - en-GB-SoniaNeural (female, British)
+    - [English] en-US Jenny (female)
+    - [English] en-US Guy (male)
+    - [English] en-GB Sonia (female, British)
     
     Example:
         audio_bytes = await edge_tts(
             text="你好，世界！",
-            voice="zh-CN-YunjianNeural",
+            voice="[Chinese] zh-CN Yunjian",
             rate="+20%"
         )
     """
@@ -235,11 +235,11 @@ async def list_voices(locale: str = None, retry_count: int = _RETRY_COUNT, retry
     Example:
         # List all voices
         voices = await list_voices()
-        # Returns: ['zh-CN-YunjianNeural', 'zh-CN-XiaoxiaoNeural', ...]
+        # Returns: ['[Chinese] zh-CN Yunjian', '[Chinese] zh-CN Xiaoxiao', ...]
         
         # List Chinese voices only
         voices = await list_voices(locale="zh-CN")
-        # Returns: ['zh-CN-YunjianNeural', 'zh-CN-XiaoxiaoNeural', ...]
+        # Returns: ['[Chinese] zh-CN Yunjian', '[Chinese] zh-CN Xiaoxiao', ...]
     """
     logger.debug(f"Fetching Edge TTS voices, locale filter: {locale}, retry_count: {retry_count}")
     
diff --git a/web/app.py b/web/app.py
index 23bfc3e..9c1bbb8 100644
--- a/web/app.py
+++ b/web/app.py
@@ -459,7 +459,7 @@ def main():
             else:
                 tts_workflow_key = "selfhost/tts_edge.json"  # fallback
             
-            # TTS preview expander (similar to image preview)
+            # TTS preview expander (simplified, uses default voice and speed)
             with st.expander(tr("tts.preview_title"), expanded=False):
                 # Preview text input
                 preview_text = st.text_input(
@@ -473,7 +473,7 @@ def main():
                 if st.button(tr("tts.preview_button"), key="preview_tts", use_container_width=True):
                     with st.spinner(tr("tts.previewing")):
                         try:
-                            # Generate preview audio using selected workflow
+                            # Generate preview audio using selected workflow (use default voice and speed)
                             audio_path = run_async(reelforge.tts(
                                 text=preview_text,
                                 workflow=tts_workflow_key