From ec395196cdea7f4914de1af9c7728b45ce815cd1 Mon Sep 17 00:00:00 2001
From: puke <1129090915@qq.com>
Date: Mon, 3 Nov 2025 17:44:33 +0800
Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E7=A4=BA=E4=BE=8B=E5=BA=93?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                |  57 ++--
 docs/capabilities-guide.md               | 343 -----------------------
 docs/en/development/architecture.md      |  54 ++++
 docs/en/development/contributing.md      |  50 ++++
 docs/en/faq.md                           |  78 ++++++
 docs/en/gallery/index.md                 |  45 +++
 docs/en/getting-started/configuration.md |  60 ++++
 docs/en/getting-started/installation.md  | 115 ++++++++
 docs/en/getting-started/quick-start.md   | 107 +++++++
 docs/en/index.md                         |  97 +++++++
 docs/en/reference/api-overview.md        |  52 ++++
 docs/en/reference/config-schema.md       |  60 ++++
 docs/en/troubleshooting.md               | 108 +++++++
 docs/en/tutorials/custom-style.md        |  36 +++
 docs/en/tutorials/voice-cloning.md       |  35 +++
 docs/en/tutorials/your-first-video.md    |  33 +++
 docs/en/user-guide/api.md                |  42 +++
 docs/en/user-guide/templates.md          |  48 ++++
 docs/en/user-guide/web-ui.md             |  77 +++++
 docs/en/user-guide/workflows.md          |  37 +++
 docs/gallery/index.md                    |  78 ++++++
 docs/gallery/reading-habit/prompts.txt   |   1 +
 docs/stylesheets/extra.css               |  17 ++
 docs/zh/development/architecture.md      |  54 ++++
 docs/zh/development/contributing.md      |  50 ++++
 docs/zh/faq.md                           |  78 ++++++
 docs/zh/gallery/index.md                 |  45 +++
 docs/zh/getting-started/configuration.md |  60 ++++
 docs/zh/getting-started/installation.md  | 115 ++++++++
 docs/zh/getting-started/quick-start.md   | 107 +++++++
 docs/zh/index.md                         |  97 +++++++
 docs/zh/reference/api-overview.md        |  52 ++++
 docs/zh/reference/config-schema.md       |  60 ++++
 docs/zh/troubleshooting.md               | 108 +++++++
 docs/zh/tutorials/custom-style.md        |  36 +++
 docs/zh/tutorials/voice-cloning.md       |  35 +++
 docs/zh/tutorials/your-first-video.md    |  33 +++
 docs/zh/user-guide/api.md                |  42 +++
 docs/zh/user-guide/templates.md          |  48 ++++
 docs/zh/user-guide/web-ui.md             |  77 +++++
 docs/zh/user-guide/workflows.md          |  37 +++
 mkdocs.yml                               | 171 +++++++++++
 pixelle_video/cli.py                     |  38 ---
 43 files changed, 2567 insertions(+), 406 deletions(-)
 delete mode 100644 docs/capabilities-guide.md
 create mode 100644 docs/en/development/architecture.md
 create mode 100644 docs/en/development/contributing.md
 create mode 100644 docs/en/faq.md
 create mode 100644 docs/en/gallery/index.md
 create mode 100644 docs/en/getting-started/configuration.md
 create mode 100644 docs/en/getting-started/installation.md
 create mode 100644 docs/en/getting-started/quick-start.md
 create mode 100644 docs/en/index.md
 create mode 100644 docs/en/reference/api-overview.md
 create mode 100644 docs/en/reference/config-schema.md
 create mode 100644 docs/en/troubleshooting.md
 create mode 100644 docs/en/tutorials/custom-style.md
 create mode 100644 docs/en/tutorials/voice-cloning.md
 create mode 100644 docs/en/tutorials/your-first-video.md
 create mode 100644 docs/en/user-guide/api.md
 create mode 100644 docs/en/user-guide/templates.md
 create mode 100644 docs/en/user-guide/web-ui.md
 create mode 100644 docs/en/user-guide/workflows.md
 create mode 100644 docs/gallery/index.md
 create mode 100644 docs/gallery/reading-habit/prompts.txt
 create mode 100644 docs/stylesheets/extra.css
 create mode 100644 docs/zh/development/architecture.md
 create mode 100644 docs/zh/development/contributing.md
 create mode 100644 docs/zh/faq.md
 create mode 100644 docs/zh/gallery/index.md
 create mode 100644 docs/zh/getting-started/configuration.md
 create mode 100644 docs/zh/getting-started/installation.md
 create mode 100644 docs/zh/getting-started/quick-start.md
 create mode 100644 docs/zh/index.md
 create mode 100644 docs/zh/reference/api-overview.md
 create mode 100644 docs/zh/reference/config-schema.md
 create mode 100644 docs/zh/troubleshooting.md
 create mode 100644 docs/zh/tutorials/custom-style.md
 create mode 100644 docs/zh/tutorials/voice-cloning.md
 create mode 100644 docs/zh/tutorials/your-first-video.md
 create mode 100644 docs/zh/user-guide/api.md
 create mode 100644 docs/zh/user-guide/templates.md
 create mode 100644 docs/zh/user-guide/web-ui.md
 create mode 100644 docs/zh/user-guide/workflows.md
 create mode 100644 mkdocs.yml
 delete mode 100644 pixelle_video/cli.py

diff --git a/README.md b/README.md
index cc9a0bd..8aa0607 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@
 - ✅ **全自动生成** - 输入主题，3 分钟自动生成完整视频
 - ✅ **AI 智能文案** - 根据主题智能创作解说词，无需自己写脚本
 - ✅ **AI 生成配图** - 每句话都配上精美的 AI 插图
-- ✅ **真人语音** - 100+ 种真人声音可选，告别机械音
+- ✅ **AI 生成语音** - 支持 Edge-TTS、Index-TTS 等众多主流 TTS 方案
 - ✅ **背景音乐** - 支持添加 BGM，让视频更有氛围
 - ✅ **视觉风格** - 多种模板可选，打造独特视频风格
 - ✅ **灵活尺寸** - 支持竖屏、横屏等多种视频尺寸
@@ -123,16 +123,7 @@ uv run streamlit run web/app.py
 - **固定文案内容**: 直接输入完整文案，跳过 AI 创作
   - 适合：已有现成文案，直接生成视频
 
----
-
-### 🎵 音频设置（左侧栏）
-
-#### 语音选择
-- 从下拉菜单选择解说声音
-- 提供 4 种精选声音（男声/女声、专业/年轻）
-- 点击「试听语音」可以预览效果
-
-#### 背景音乐
+#### 背景音乐（BGM）
 - **无 BGM**: 纯人声解说
 - **内置音乐**: 选择预置的背景音乐（如 default.mp3）
 - **自定义音乐**: 将你的音乐文件（MP3/WAV 等）放到 `bgm/` 文件夹
@@ -140,16 +131,40 @@ uv run streamlit run web/app.py
 
 ---
 
+### 🎤 语音设置（中间栏）
+
+#### TTS 工作流
+- 从下拉菜单选择 TTS 工作流（支持 Edge-TTS、Index-TTS 等）
+- 系统会自动扫描 `workflows/` 文件夹中的 TTS 工作流
+- 如果懂 ComfyUI，可以自定义 TTS 工作流
+
+#### 参考音频（可选）
+- 上传参考音频文件用于声音克隆（支持 MP3/WAV/FLAC 等格式）
+- 适用于支持声音克隆的 TTS 工作流（如 Index-TTS）
+- 上传后可以直接试听
+
+#### 预览功能
+- 输入测试文本，点击「预览语音」即可试听效果
+- 支持使用参考音频进行预览
+
+---
+
 ### 🎨 视觉设置（中间栏）
 
-#### 视觉风格
+#### 图像生成
 决定 AI 生成什么风格的配图。
 
 **ComfyUI 工作流**  
-- 选择图像生成的工作流文件
+- 从下拉菜单选择图像生成工作流
+- 支持本地部署（selfhost）和云端（RunningHub）工作流
 - 默认使用 `image_flux.json`
 - 如果懂 ComfyUI，可以放自己的工作流到 `workflows/` 文件夹
 
+**图像尺寸**  
+- 设置生成图像的宽度和高度（单位：像素）
+- 默认 1024x1024，可根据需要调整
+- 注意：不同的模型对尺寸有不同的限制
+
 **提示词前缀（Prompt Prefix）**  
 - 控制图像的整体风格（语言需要是英文的）
 - 例如：Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style
@@ -158,7 +173,7 @@ uv run streamlit run web/app.py
 #### 视频模板
 决定视频画面的布局和设计。
 
-- 从下拉菜单选择模板（default.html、modern.html、classic.html 等）
+- 从下拉菜单选择模板，按尺寸分组显示（竖屏/横屏/方形）
 - 点击「预览模板」可以自定义参数测试效果
 - 如果懂 HTML，可以在 `templates/` 文件夹创建自己的模板
 
@@ -190,9 +205,9 @@ A: 生成一个 3 段视频大约需要 2-5 分钟，取决于你的网络和 AI
 **Q: 视频效果不满意怎么办？**  
 A: 可以尝试：
 1. 更换 LLM 模型（不同模型文案风格不同）
-2. 调整提示词前缀（改变配图风格）
-3. 更换语音（不同声音适合不同内容）
-4. 尝试不同的视频模板
+2. 调整图像尺寸和提示词前缀（改变配图风格）
+3. 更换 TTS 工作流或上传参考音频（改变语音效果）
+4. 尝试不同的视频模板和尺寸
 
 **Q: 费用大概多少？**  
 A: **本项目完全支持免费运行！**
@@ -237,11 +252,3 @@ Pixelle-Video 的设计受到以下优秀开源项目的启发：
 
 [![Star History Chart](https://api.star-history.com/svg?repos=PixelleLab/Pixelle-Video&type=Date)](https://star-history.com/#PixelleLab/Pixelle-Video&Date)
 
----
-
-<div align="center">
-  <p>Made with ❤️ by PixelleLab</p>
-  <p>
-    <a href="#top">回到顶部 ⬆️</a>
-  </p>
-</div>
diff --git a/docs/capabilities-guide.md b/docs/capabilities-guide.md
deleted file mode 100644
index ba6f4c5..0000000
--- a/docs/capabilities-guide.md
+++ /dev/null
@@ -1,343 +0,0 @@
-# Pixelle-Video Capabilities Guide
-
-> Complete guide to using LLM, TTS, and Image generation capabilities
-
-## Overview
-
-Pixelle-Video provides three core AI capabilities:
-- **LLM**: Text generation using LiteLLM (supports 100+ models)
-- **TTS**: Text-to-speech using Edge TTS (free, 400+ voices)
-- **Image**: Image generation using ComfyKit (local or cloud)
-
-## Quick Start
-
-```python
-from pixelle_video.service import pixelle_video
-
-# LLM - Generate text
-answer = await pixelle_video.llm("Summarize 'Atomic Habits' in 3 sentences")
-
-# TTS - Generate speech
-audio_path = await pixelle_video.tts("Hello, world!")
-
-# Image - Generate images
-image_url = await pixelle_video.image(
-    workflow="workflows/book_cover_simple.json",
-    prompt="minimalist book cover design"
-)
-```
-
----
-
-## 1. LLM (Large Language Model)
-
-### Configuration
-
-Edit `config.yaml`:
-
-```yaml
-llm:
-  default: qwen  # Choose: qwen, openai, deepseek, ollama
-  
-  qwen:
-    api_key: "your-dashscope-api-key"
-    base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
-    model: "openai/qwen-max"
-  
-  openai:
-    api_key: "your-openai-api-key"
-    model: "gpt-4"
-  
-  deepseek:
-    api_key: "your-deepseek-api-key"
-    base_url: "https://api.deepseek.com"
-    model: "openai/deepseek-chat"
-  
-  ollama:
-    base_url: "http://localhost:11434"
-    model: "ollama/llama3.2"
-```
-
-### Usage
-
-```python
-# Basic usage
-answer = await pixelle_video.llm("What is machine learning?")
-
-# With parameters
-answer = await pixelle_video.llm(
-    prompt="Explain atomic habits",
-    temperature=0.7,  # 0.0-2.0 (lower = more deterministic)
-    max_tokens=2000
-)
-```
-
-### Environment Variables (Alternative)
-
-Instead of `config.yaml`, you can use environment variables:
-
-```bash
-# Qwen
-export DASHSCOPE_API_KEY="your-key"
-
-# OpenAI
-export OPENAI_API_KEY="your-key"
-
-# DeepSeek
-export DEEPSEEK_API_KEY="your-key"
-```
-
----
-
-## 2. TTS (Text-to-Speech)
-
-### Configuration
-
-Edit `config.yaml`:
-
-```yaml
-tts:
-  default: edge
-  
-  edge:
-    # No configuration needed - free to use!
-```
-
-### Usage
-
-```python
-# Basic usage (auto-generates temp path)
-audio_path = await pixelle_video.tts("Hello, world!")
-# Returns: "temp/abc123def456.mp3"
-
-# With Chinese text
-audio_path = await pixelle_video.tts(
-    text="你好，世界！",
-    voice="zh-CN-YunjianNeural"
-)
-
-# With custom parameters
-audio_path = await pixelle_video.tts(
-    text="Welcome to Pixelle-Video",
-    voice="en-US-JennyNeural",
-    rate="+20%",  # Speed: +50% = faster, -20% = slower
-    volume="+0%",
-    pitch="+0Hz"
-)
-
-# Specify output path
-audio_path = await pixelle_video.tts(
-    text="Hello",
-    output_path="output/greeting.mp3"
-)
-```
-
-### Popular Voices
-
-**Chinese:**
-- `zh-CN-YunjianNeural` (male, default)
-- `zh-CN-XiaoxiaoNeural` (female)
-- `zh-CN-YunxiNeural` (male)
-- `zh-CN-XiaoyiNeural` (female)
-
-**English:**
-- `en-US-JennyNeural` (female)
-- `en-US-GuyNeural` (male)
-- `en-GB-SoniaNeural` (female, British)
-
-### List All Voices
-
-```python
-# Get all available voices
-voices = await pixelle_video.tts.list_voices()
-
-# Get Chinese voices only
-voices = await pixelle_video.tts.list_voices(locale="zh-CN")
-
-# Get English voices only
-voices = await pixelle_video.tts.list_voices(locale="en-US")
-```
-
----
-
-## 3. Image Generation
-
-### Configuration
-
-Edit `config.yaml`:
-
-```yaml
-image:
-  default: comfykit
-  
-  comfykit:
-    # Local ComfyUI (optional, default: http://127.0.0.1:8188)
-    comfyui_url: "http://127.0.0.1:8188"
-    
-    # RunningHub cloud (optional)
-    runninghub_api_key: "rh-key-xxx"
-```
-
-### Usage
-
-```python
-# Basic usage (local ComfyUI)
-image_url = await pixelle_video.image(
-    workflow="workflows/book_cover_simple.json",
-    prompt="minimalist book cover design, blue and white"
-)
-
-# With full parameters
-image_url = await pixelle_video.image(
-    workflow="workflows/book_cover_simple.json",
-    prompt="book cover for 'Atomic Habits', professional, minimalist",
-    negative_prompt="ugly, blurry, low quality",
-    width=1024,
-    height=1536,
-    steps=20,
-    seed=42
-)
-
-# Using RunningHub cloud
-image_url = await pixelle_video.image(
-    workflow="12345",  # RunningHub workflow ID
-    prompt="a beautiful landscape"
-)
-
-# Check available workflows
-workflows = pixelle_video.image.list_workflows()
-print(f"Available workflows: {workflows}")
-```
-
-### Environment Variables (Alternative)
-
-```bash
-# Local ComfyUI
-export COMFYUI_BASE_URL="http://127.0.0.1:8188"
-
-# RunningHub cloud
-export RUNNINGHUB_API_KEY="rh-key-xxx"
-```
-
-### Workflow DSL
-
-Pixelle-Video uses ComfyKit's DSL for workflow parameters:
-
-```json
-{
-  "6": {
-    "class_type": "CLIPTextEncode",
-    "_meta": {
-      "title": "$prompt!"
-    },
-    "inputs": {
-      "text": "default prompt",
-      "clip": ["4", 1]
-    }
-  }
-}
-```
-
-**DSL Markers:**
-- `$param!` - Required parameter
-- `$param` - Optional parameter
-- `$param~` - Upload parameter (for images/audio/video)
-- `$output.name` - Output variable
-
----
-
-## Combined Workflow Example
-
-Generate a complete book cover with narration:
-
-```python
-import asyncio
-from pixelle_video.service import pixelle_video
-
-async def create_book_content(book_title, author):
-    """Generate book summary, audio, and cover image"""
-    
-    # 1. Generate book summary with LLM
-    summary = await pixelle_video.llm(
-        prompt=f"Write a compelling 2-sentence summary for a book titled '{book_title}' by {author}",
-        max_tokens=100
-    )
-    print(f"Summary: {summary}")
-    
-    # 2. Generate audio narration with TTS
-    audio_path = await pixelle_video.tts(
-        text=summary,
-        voice="en-US-JennyNeural"
-    )
-    print(f"Audio: {audio_path}")
-    
-    # 3. Generate book cover image
-    image_url = await pixelle_video.image(
-        workflow="workflows/book_cover_simple.json",
-        prompt=f"book cover for '{book_title}' by {author}, professional, modern design",
-        width=1024,
-        height=1536
-    )
-    print(f"Cover: {image_url}")
-    
-    return {
-        "summary": summary,
-        "audio": audio_path,
-        "cover": image_url
-    }
-
-# Run
-result = asyncio.run(create_book_content("Atomic Habits", "James Clear"))
-```
-
----
-
-## Troubleshooting
-
-### LLM Issues
-
-**"API key not found"**
-- Make sure you've set the API key in `config.yaml` or environment variables
-- For Qwen: `DASHSCOPE_API_KEY`
-- For OpenAI: `OPENAI_API_KEY`
-- For DeepSeek: `DEEPSEEK_API_KEY`
-
-**"Connection error"**
-- Check `base_url` in config
-- Verify API endpoint is accessible
-- For Ollama, make sure server is running (`ollama serve`)
-
-### TTS Issues
-
-**"SSL error"**
-- Edge TTS is free but requires internet connection
-- SSL verification is disabled by default for development
-
-### Image Issues
-
-**"ComfyUI connection refused"**
-- Make sure ComfyUI is running at http://127.0.0.1:8188
-- Or configure RunningHub API key for cloud execution
-
-**"Workflow file not found"**
-- Check workflow path is correct
-- Use relative path from project root: `workflows/your_workflow.json`
-
-**"No images generated"**
-- Check workflow has `SaveImage` node
-- Verify workflow parameters are correct
-- Check ComfyUI logs for errors
-
----
-
-## Next Steps
-
-- See `/examples/` directory for complete examples
-- Run `python test_integration.py` to test all capabilities
-- Create custom workflows in `/workflows/` directory
-- Check ComfyKit documentation: https://puke3615.github.io/ComfyKit
-
----
-
-**Happy creating with Pixelle-Video!** 📚🎬
-
diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md
new file mode 100644
index 0000000..9dc2cd6
--- /dev/null
+++ b/docs/en/development/architecture.md
@@ -0,0 +1,54 @@
+# Architecture
+
+Technical architecture overview of Pixelle-Video.
+
+---
+
+## Core Architecture
+
+Pixelle-Video uses a layered architecture design:
+
+- **Web Layer**: Streamlit Web interface
+- **Service Layer**: Core business logic
+- **ComfyUI Layer**: Image and TTS generation
+
+---
+
+## Main Components
+
+### PixelleVideoCore
+
+Core service class coordinating all sub-services.
+
+### LLM Service
+
+Responsible for calling large language models to generate scripts.
+
+### Image Service
+
+Responsible for calling ComfyUI to generate images.
+
+### TTS Service
+
+Responsible for calling ComfyUI to generate speech.
+
+### Video Generator
+
+Responsible for composing the final video.
+
+---
+
+## Tech Stack
+
+- **Backend**: Python 3.10+, AsyncIO
+- **Web**: Streamlit
+- **AI**: OpenAI API, ComfyUI
+- **Configuration**: YAML
+- **Tools**: uv (package management)
+
+---
+
+## More Information
+
+Detailed architecture documentation coming soon.
+
diff --git a/docs/en/development/contributing.md b/docs/en/development/contributing.md
new file mode 100644
index 0000000..bef2f18
--- /dev/null
+++ b/docs/en/development/contributing.md
@@ -0,0 +1,50 @@
+# Contributing
+
+Thank you for your interest in contributing to Pixelle-Video!
+
+---
+
+## How to Contribute
+
+1. Fork the repository
+2. Create a feature branch (`git checkout -b feature/AmazingFeature`)
+3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
+4. Push to the branch (`git push origin feature/AmazingFeature`)
+5. Open a Pull Request
+
+---
+
+## Development Setup
+
+```bash
+# Clone your fork
+git clone https://github.com/your-username/Pixelle-Video.git
+cd Pixelle-Video
+
+# Install development dependencies
+uv sync
+
+# Run tests
+pytest
+```
+
+---
+
+## Code Standards
+
+- All code and comments in English
+- Follow PEP 8 standards
+- Add appropriate tests
+
+---
+
+## Submit Issues
+
+Having problems or feature suggestions? Please submit at [GitHub Issues](https://github.com/PixelleLab/Pixelle-Video/issues).
+
+---
+
+## Code of Conduct
+
+Please be friendly and respectful. We are committed to fostering an inclusive community environment.
+
diff --git a/docs/en/faq.md b/docs/en/faq.md
new file mode 100644
index 0000000..9c8a722
--- /dev/null
+++ b/docs/en/faq.md
@@ -0,0 +1,78 @@
+# FAQ
+
+Frequently Asked Questions.
+
+---
+
+## Installation
+
+### Q: How to install uv?
+
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+### Q: Can I use something other than uv?
+
+Yes, you can use traditional pip + venv approach.
+
+---
+
+## Configuration
+
+### Q: Do I need to configure ComfyUI?
+
+Not necessarily. You can use RunningHub cloud service without local deployment.
+
+### Q: Which LLMs are supported?
+
+All OpenAI-compatible LLMs, including:
+- Qianwen
+- GPT-4o
+- DeepSeek
+- Ollama (local)
+
+---
+
+## Usage
+
+### Q: How long does first-time usage take?
+
+Generating a 3-5 scene video takes approximately 2-5 minutes.
+
+### Q: What if I'm not satisfied with the video?
+
+Try:
+1. Change LLM model
+2. Adjust image dimensions and prompt prefix
+3. Change TTS workflow
+4. Try different video templates
+
+### Q: What are the costs?
+
+- **Completely Free**: Ollama + Local ComfyUI = $0
+- **Recommended**: Qianwen + Local ComfyUI ≈ $0.01-0.05/video
+- **Cloud Solution**: OpenAI + RunningHub (higher cost)
+
+---
+
+## Troubleshooting
+
+### Q: ComfyUI connection failed
+
+1. Confirm ComfyUI is running
+2. Check if URL is correct
+3. Click "Test Connection" in Web interface
+
+### Q: LLM API call failed
+
+1. Check if API Key is correct
+2. Check network connection
+3. Review error messages
+
+---
+
+## Other Questions
+
+Have other questions? Check [Troubleshooting](troubleshooting.md) or submit an [Issue](https://github.com/PixelleLab/Pixelle-Video/issues).
+
diff --git a/docs/en/gallery/index.md b/docs/en/gallery/index.md
new file mode 100644
index 0000000..36e4c04
--- /dev/null
+++ b/docs/en/gallery/index.md
@@ -0,0 +1,45 @@
+# 🎬 Video Gallery
+
+Showcase of videos created with Pixelle-Video. Click on cards to view complete workflows and configuration files.
+
+---
+
+<div class="grid cards" markdown>
+
+-   **Reading Habit**
+
+    ---
+
+    <video controls width="100%" style="border-radius: 8px;">
+      <source src="https://your-oss-bucket.oss-cn-hangzhou.aliyuncs.com/pixelle-video/reading-habit/video.mp4" type="video/mp4">
+    </video>
+    
+    [:octicons-mark-github-16: View Workflows & Config](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/reading-habit)
+
+-   **Work Efficiency**
+
+    ---
+
+    <video controls width="100%" style="border-radius: 8px;">
+      <source src="https://your-oss-bucket.oss-cn-hangzhou.aliyuncs.com/pixelle-video/work-efficiency/video.mp4" type="video/mp4">
+    </video>
+    
+    [:octicons-mark-github-16: View Workflows & Config](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/work-efficiency)
+
+-   **Healthy Diet**
+
+    ---
+
+    <video controls width="100%" style="border-radius: 8px;">
+      <source src="https://your-oss-bucket.oss-cn-hangzhou.aliyuncs.com/pixelle-video/healthy-diet/video.mp4" type="video/mp4">
+    </video>
+    
+    [:octicons-mark-github-16: View Workflows & Config](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/healthy-diet)
+
+</div>
+
+---
+
+!!! tip "How to Use"
+    Click on a case card to jump to GitHub, download workflow files and configuration, and reproduce the video effect with one click.
+
diff --git a/docs/en/getting-started/configuration.md b/docs/en/getting-started/configuration.md
new file mode 100644
index 0000000..bf37a14
--- /dev/null
+++ b/docs/en/getting-started/configuration.md
@@ -0,0 +1,60 @@
+# Configuration
+
+After installation, you need to configure services to use Pixelle-Video.
+
+---
+
+## LLM Configuration
+
+LLM (Large Language Model) is used to generate video scripts.
+
+### Quick Preset Selection
+
+1. Select a preset model from the dropdown:
+   - Qianwen (recommended, great value)
+   - GPT-4o
+   - DeepSeek
+   - Ollama (local, completely free)
+
+2. The system will auto-fill `base_url` and `model`
+
+3. Click「🔑 Get API Key」to register and obtain credentials
+
+4. Enter your API Key
+
+---
+
+## Image Configuration
+
+Two options available:
+
+### Local Deployment (Recommended)
+
+Using local ComfyUI service:
+
+1. Install and start ComfyUI
+2. Enter ComfyUI URL (default `http://127.0.0.1:8188`)
+3. Click "Test Connection" to verify
+
+### Cloud Deployment
+
+Using RunningHub cloud service:
+
+1. Register for a RunningHub account
+2. Obtain API Key
+3. Enter API Key in configuration
+
+---
+
+## Save Configuration
+
+After filling in all required configuration, click the "Save Configuration" button.
+
+Configuration will be saved to `config.yaml` file.
+
+---
+
+## Next Steps
+
+- [Quick Start](quick-start.md) - Create your first video
+
diff --git a/docs/en/getting-started/installation.md b/docs/en/getting-started/installation.md
new file mode 100644
index 0000000..8c38d5f
--- /dev/null
+++ b/docs/en/getting-started/installation.md
@@ -0,0 +1,115 @@
+# Installation
+
+This page will guide you through installing Pixelle-Video.
+
+---
+
+## System Requirements
+
+### Required
+
+- **Python**: 3.10 or higher
+- **Operating System**: Windows, macOS, or Linux
+- **Package Manager**: uv (recommended) or pip
+
+### Optional
+
+- **GPU**: NVIDIA GPU with 6GB+ VRAM recommended for local ComfyUI
+- **Network**: Stable internet connection for LLM API and image generation services
+
+---
+
+## Installation Steps
+
+### Step 1: Clone the Repository
+
+```bash
+git clone https://github.com/PixelleLab/Pixelle-Video.git
+cd Pixelle-Video
+```
+
+### Step 2: Install Dependencies
+
+!!! tip "Recommended: Use uv"
+    This project uses `uv` as the package manager, which is faster and more reliable than traditional pip.
+
+#### Using uv (Recommended)
+
+```bash
+# Install uv if you haven't already
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Install project dependencies (uv will create a virtual environment automatically)
+uv sync
+```
+
+#### Using pip
+
+```bash
+# Create virtual environment
+python -m venv venv
+
+# Activate virtual environment
+# Windows:
+venv\Scripts\activate
+# macOS/Linux:
+source venv/bin/activate
+
+# Install dependencies
+pip install -e .
+```
+
+---
+
+## Verify Installation
+
+Run the following command to verify the installation:
+
+```bash
+# Using uv
+uv run streamlit run web/app.py
+
+# Or using pip (activate virtual environment first)
+streamlit run web/app.py
+```
+
+Your browser should automatically open `http://localhost:8501` and display the Pixelle-Video web interface.
+
+!!! success "Installation Successful!"
+    If you can see the web interface, the installation was successful! Next, check out the [Configuration Guide](configuration.md) to set up your services.
+
+---
+
+## Optional: Install ComfyUI (Local Deployment)
+
+If you want to run image generation locally, you'll need to install ComfyUI:
+
+### Quick Install
+
+```bash
+# Clone ComfyUI
+git clone https://github.com/comfyanonymous/ComfyUI.git
+cd ComfyUI
+
+# Install dependencies
+pip install -r requirements.txt
+```
+
+### Start ComfyUI
+
+```bash
+python main.py
+```
+
+ComfyUI runs on `http://127.0.0.1:8188` by default.
+
+!!! info "ComfyUI Models"
+    ComfyUI requires downloading model files to work. Please refer to the [ComfyUI documentation](https://github.com/comfyanonymous/ComfyUI) for information on downloading and configuring models.
+
+---
+
+## Next Steps
+
+- [Configuration](configuration.md) - Configure LLM and image generation services
+- [Quick Start](quick-start.md) - Create your first video
+
diff --git a/docs/en/getting-started/quick-start.md b/docs/en/getting-started/quick-start.md
new file mode 100644
index 0000000..7ba8a52
--- /dev/null
+++ b/docs/en/getting-started/quick-start.md
@@ -0,0 +1,107 @@
+# Quick Start
+
+Already installed and configured? Let's create your first video!
+
+---
+
+## Start the Web Interface
+
+```bash
+# Using uv
+uv run streamlit run web/app.py
+```
+
+Your browser will automatically open `http://localhost:8501`
+
+---
+
+## Create Your First Video
+
+### Step 1: Check Configuration
+
+On first use, expand the「⚙️ System Configuration」panel and confirm:
+
+- **LLM Configuration**: Select an AI model (e.g., Qianwen, GPT) and enter API Key
+- **Image Configuration**: Configure ComfyUI address or RunningHub API Key
+
+If not yet configured, see the [Configuration Guide](configuration.md).
+
+Click "Save Configuration" when done.
+
+---
+
+### Step 2: Enter a Topic
+
+In the left panel's「📝 Content Input」section:
+
+1. Select「**AI Generate Content**」mode
+2. Enter a topic in the text box, for example:
+   ```
+   Why develop a reading habit
+   ```
+3. (Optional) Set number of scenes, default is 5 frames
+
+!!! tip "Topic Examples"
+    - Why develop a reading habit
+    - How to improve work efficiency
+    - The importance of healthy eating
+    - The meaning of travel
+
+---
+
+### Step 3: Configure Voice and Visuals
+
+In the middle panel:
+
+**Voice Settings**
+- Select TTS workflow (default Edge-TTS works well)
+- For voice cloning, upload a reference audio file
+
+**Visual Settings**
+- Select image generation workflow (default works well)
+- Set image dimensions (default 1024x1024)
+- Choose video template (recommend portrait 1080x1920)
+
+---
+
+### Step 4: Generate Video
+
+Click the「🎬 Generate Video」button in the right panel!
+
+The system will show real-time progress:
+- Generate script
+- Generate images (for each scene)
+- Synthesize voice
+- Compose video
+
+!!! info "Generation Time"
+    Generating a 5-scene video takes about 2-5 minutes, depending on: LLM API response speed, image generation speed, TTS workflow type, and network conditions
+
+---
+
+### Step 5: Preview Video
+
+Once complete, the video will automatically play in the right panel!
+
+You'll see:
+- 📹 Video preview player
+- ⏱️ Video duration
+- 📦 File size
+- 🎬 Number of scenes
+- 📐 Video dimensions
+
+The video file is saved in the `output/` folder.
+
+---
+
+## Next Steps
+
+Congratulations! You've successfully created your first video 🎉
+
+Next, you can:
+
+- **Adjust Styles** - See the [Custom Visual Style](../tutorials/custom-style.md) tutorial
+- **Clone Voices** - See the [Voice Cloning with Reference Audio](../tutorials/voice-cloning.md) tutorial
+- **Use API** - See the [API Usage Guide](../user-guide/api.md)
+- **Develop Templates** - See the [Template Development Guide](../user-guide/templates.md)
+
diff --git a/docs/en/index.md b/docs/en/index.md
new file mode 100644
index 0000000..6f12a61
--- /dev/null
+++ b/docs/en/index.md
@@ -0,0 +1,97 @@
+# Pixelle-Video 🎬
+
+<div align="center" markdown="1">
+
+**AI Video Creator - Generate a short video in 3 minutes**
+
+[![Stars](https://img.shields.io/github/stars/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/stargazers)
+[![Issues](https://img.shields.io/github/issues/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/issues)
+[![License](https://img.shields.io/github/license/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/blob/main/LICENSE)
+
+</div>
+
+---
+
+## 🎯 Overview
+
+Simply input a **topic**, and Pixelle-Video will automatically:
+
+- ✍️ Write video scripts
+- 🎨 Generate AI images  
+- 🗣️ Synthesize voice narration
+- 🎵 Add background music
+- 🎬 Create the final video
+
+**No barriers, no video editing experience required** - turn video creation into a one-line task!
+
+---
+
+## ✨ Features
+
+- ✅ **Fully Automated** - Input a topic, get a complete video in 3 minutes
+- ✅ **AI-Powered Scripts** - Intelligently create narration based on your topic
+- ✅ **AI-Generated Images** - Each sentence comes with beautiful AI illustrations
+- ✅ **AI Voice Synthesis** - Support for Edge-TTS, Index-TTS and more mainstream TTS solutions
+- ✅ **Background Music** - Add BGM for enhanced atmosphere
+- ✅ **Visual Styles** - Multiple templates to create unique video styles
+- ✅ **Flexible Dimensions** - Support for portrait, landscape and more video sizes
+- ✅ **Multiple AI Models** - Support for GPT, Qianwen, DeepSeek, Ollama, etc.
+- ✅ **Flexible Composition** - Based on ComfyUI architecture, use preset workflows or customize any capability
+
+---
+
+## 🎬 Video Examples
+
+!!! info "Sample Videos"
+    Coming soon: Video examples will be added here
+
+---
+
+## 🚀 Quick Start
+
+Ready to get started? Just three steps:
+
+1. **[Install Pixelle-Video](getting-started/installation.md)** - Download and install the project
+2. **[Configure Services](getting-started/configuration.md)** - Set up LLM and image generation services
+3. **[Create Your First Video](getting-started/quick-start.md)** - Start creating your first video
+
+---
+
+## 💰 Pricing
+
+!!! success "Completely free to run!"
+    
+    - **Completely Free**: Use Ollama (local) + Local ComfyUI = $0
+    - **Recommended**: Use Qianwen LLM (≈$0.01-0.05 per 3-scene video) + Local ComfyUI
+    - **Cloud Solution**: Use OpenAI + RunningHub (higher cost but no local setup required)
+    
+    **Recommendation**: If you have a local GPU, go with the completely free solution. Otherwise, we recommend Qianwen for best value.
+
+---
+
+## 🤝 Acknowledgments
+
+Pixelle-Video was inspired by the following excellent open source projects:
+
+- [Pixelle-MCP](https://github.com/AIDC-AI/Pixelle-MCP) - ComfyUI MCP server
+- [MoneyPrinterTurbo](https://github.com/harry0703/MoneyPrinterTurbo) - Excellent video generation tool
+- [NarratoAI](https://github.com/linyqh/NarratoAI) - Video narration automation tool
+- [MoneyPrinterPlus](https://github.com/ddean2009/MoneyPrinterPlus) - Video creation platform
+- [ComfyKit](https://github.com/puke3615/ComfyKit) - ComfyUI workflow wrapper library
+
+Thanks to these projects for their open source spirit! 🙏
+
+---
+
+## 📢 Feedback & Support
+
+- 🐛 **Found a bug**: Submit an [Issue](https://github.com/PixelleLab/Pixelle-Video/issues)
+- 💡 **Feature request**: Submit a [Feature Request](https://github.com/PixelleLab/Pixelle-Video/issues)
+- ⭐ **Give us a Star**: If this project helps you, please give us a star!
+
+---
+
+## 📝 License
+
+This project is licensed under the MIT License. See the [LICENSE](https://github.com/PixelleLab/Pixelle-Video/blob/main/LICENSE) file for details.
+
diff --git a/docs/en/reference/api-overview.md b/docs/en/reference/api-overview.md
new file mode 100644
index 0000000..057bb2a
--- /dev/null
+++ b/docs/en/reference/api-overview.md
@@ -0,0 +1,52 @@
+# API Overview
+
+Pixelle-Video Python API reference documentation.
+
+---
+
+## Core Classes
+
+### PixelleVideoCore
+
+Main service class providing video generation functionality.
+
+```python
+from pixelle_video.service import PixelleVideoCore
+
+pixelle = PixelleVideoCore()
+await pixelle.initialize()
+```
+
+---
+
+## Main Methods
+
+### generate_video()
+
+Primary method for generating videos.
+
+**Parameters**:
+
+- `text` (str): Topic or complete script
+- `mode` (str): Generation mode ("generate" or "fixed")
+- `n_scenes` (int): Number of scenes
+- `title` (str, optional): Video title
+- `tts_workflow` (str): TTS workflow
+- `image_workflow` (str): Image generation workflow
+- `frame_template` (str): Video template
+- `bgm_path` (str, optional): BGM file path
+
+**Returns**: `VideoResult` object
+
+---
+
+## Examples
+
+Check the `examples/` directory for more examples.
+
+---
+
+## More Information
+
+Detailed API documentation coming soon.
+
diff --git a/docs/en/reference/config-schema.md b/docs/en/reference/config-schema.md
new file mode 100644
index 0000000..32ac8a3
--- /dev/null
+++ b/docs/en/reference/config-schema.md
@@ -0,0 +1,60 @@
+# Config Schema
+
+Detailed explanation of the `config.yaml` configuration file.
+
+---
+
+## Configuration Structure
+
+```yaml
+llm:
+  provider: openai
+  api_key: "your-api-key"
+  base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
+  model: "qwen-plus"
+
+comfyui:
+  comfyui_url: "http://127.0.0.1:8188"
+  runninghub_api_key: ""
+  
+  image:
+    default_workflow: "runninghub/image_flux.json"
+    prompt_prefix: "Minimalist illustration style"
+  
+  tts:
+    default_workflow: "selfhost/tts_edge.json"
+```
+
+---
+
+## LLM Configuration
+
+- `provider`: Provider (currently only supports openai-compatible interfaces)
+- `api_key`: API key
+- `base_url`: API service address
+- `model`: Model name
+
+---
+
+## ComfyUI Configuration
+
+### Basic Configuration
+
+- `comfyui_url`: Local ComfyUI address
+- `runninghub_api_key`: RunningHub API key (optional)
+
+### Image Configuration
+
+- `default_workflow`: Default image generation workflow
+- `prompt_prefix`: Prompt prefix
+
+### TTS Configuration
+
+- `default_workflow`: Default TTS workflow
+
+---
+
+## More Information
+
+The configuration file is automatically created on first run.
+
diff --git a/docs/en/troubleshooting.md b/docs/en/troubleshooting.md
new file mode 100644
index 0000000..1d74164
--- /dev/null
+++ b/docs/en/troubleshooting.md
@@ -0,0 +1,108 @@
+# Troubleshooting
+
+Having issues? Here are solutions to common problems.
+
+---
+
+## Installation Issues
+
+### Dependency installation failed
+
+```bash
+# Clean cache
+uv cache clean
+
+# Reinstall
+uv sync
+```
+
+---
+
+## Configuration Issues
+
+### ComfyUI connection failed
+
+**Possible Causes**:
+- ComfyUI not running
+- Incorrect URL configuration
+- Firewall blocking
+
+**Solutions**:
+1. Confirm ComfyUI is running
+2. Check URL configuration (default `http://127.0.0.1:8188`)
+3. Test by accessing ComfyUI address in browser
+4. Check firewall settings
+
+### LLM API call failed
+
+**Possible Causes**:
+- Incorrect API Key
+- Network issues
+- Insufficient balance
+
+**Solutions**:
+1. Verify API Key is correct
+2. Check network connection
+3. Review error message details
+4. Check account balance
+
+---
+
+## Generation Issues
+
+### Video generation failed
+
+**Possible Causes**:
+- Corrupted workflow file
+- Models not downloaded
+- Insufficient resources
+
+**Solutions**:
+1. Check if workflow file exists
+2. Confirm ComfyUI has downloaded required models
+3. Check disk space and memory
+
+### Image generation failed
+
+**Solutions**:
+1. Check if ComfyUI is running properly
+2. Try manually testing workflow in ComfyUI
+3. Check workflow configuration
+
+### TTS generation failed
+
+**Solutions**:
+1. Check if TTS workflow is correct
+2. If using voice cloning, check reference audio format
+3. Review error logs
+
+---
+
+## Performance Issues
+
+### Slow generation speed
+
+**Optimization Tips**:
+1. Use local ComfyUI (faster than cloud)
+2. Reduce number of scenes
+3. Use faster LLM (e.g., Qianwen)
+4. Check network connection
+
+---
+
+## Other Issues
+
+Still having problems?
+
+1. Check project [GitHub Issues](https://github.com/PixelleLab/Pixelle-Video/issues)
+2. Submit a new Issue describing your problem
+3. Include error logs and configuration details for quick diagnosis
+
+---
+
+## View Logs
+
+Log files are located in project root:
+- `api_server.log` - API service logs
+- `test_output.log` - Test logs
+
diff --git a/docs/en/tutorials/custom-style.md b/docs/en/tutorials/custom-style.md
new file mode 100644
index 0000000..ac1fdd5
--- /dev/null
+++ b/docs/en/tutorials/custom-style.md
@@ -0,0 +1,36 @@
+# Custom Visual Style
+
+Learn how to adjust image generation parameters to create unique visual styles.
+
+---
+
+## Adjust Prompt Prefix
+
+The prompt prefix controls overall visual style:
+
+```
+Minimalist black-and-white illustration, clean lines, simple style
+```
+
+---
+
+## Adjust Image Dimensions
+
+Different dimensions for different scenarios:
+
+- **1024x1024**: Square, suitable for Xiaohongshu
+- **1080x1920**: Portrait, suitable for TikTok, Kuaishou
+- **1920x1080**: Landscape, suitable for Bilibili, YouTube
+
+---
+
+## Preview Effects
+
+Use the "Preview Style" feature to test different configurations.
+
+---
+
+## More Information
+
+More style customization tips coming soon.
+
diff --git a/docs/en/tutorials/voice-cloning.md b/docs/en/tutorials/voice-cloning.md
new file mode 100644
index 0000000..9e2d2e9
--- /dev/null
+++ b/docs/en/tutorials/voice-cloning.md
@@ -0,0 +1,35 @@
+# Voice Cloning
+
+Use reference audio to implement voice cloning functionality.
+
+---
+
+## Prepare Reference Audio
+
+1. Prepare a clear audio file (MP3/WAV/FLAC)
+2. Recommended duration: 10-30 seconds
+3. Avoid background noise
+
+---
+
+## Usage Steps
+
+1. Select a TTS workflow that supports voice cloning (e.g., Index-TTS) in voice settings
+2. Upload reference audio file
+3. Test effects with "Preview Voice"
+4. Generate video
+
+---
+
+## Notes
+
+- Not all TTS workflows support voice cloning
+- Reference audio quality affects cloning results
+- Edge-TTS does not support voice cloning
+
+---
+
+## More Information
+
+Detailed voice cloning tutorial coming soon.
+
diff --git a/docs/en/tutorials/your-first-video.md b/docs/en/tutorials/your-first-video.md
new file mode 100644
index 0000000..53d39ac
--- /dev/null
+++ b/docs/en/tutorials/your-first-video.md
@@ -0,0 +1,33 @@
+# Your First Video
+
+Step-by-step guide to creating your first video with Pixelle-Video.
+
+---
+
+## Prerequisites
+
+Make sure you've completed:
+
+- ✅ [Installation](../getting-started/installation.md)
+- ✅ [Configuration](../getting-started/configuration.md)
+
+---
+
+## Tutorial Steps
+
+For detailed steps, see [Quick Start](../getting-started/quick-start.md).
+
+---
+
+## Tips
+
+- Choose an appropriate topic for better results
+- Start with 3-5 scenes for first generation
+- Preview voice and image effects before generating
+
+---
+
+## Troubleshooting
+
+Having issues? Check out [FAQ](../faq.md) or [Troubleshooting](../troubleshooting.md).
+
diff --git a/docs/en/user-guide/api.md b/docs/en/user-guide/api.md
new file mode 100644
index 0000000..e39a0a8
--- /dev/null
+++ b/docs/en/user-guide/api.md
@@ -0,0 +1,42 @@
+# API Usage
+
+Pixelle-Video provides a complete Python API for easy integration into your projects.
+
+---
+
+## Quick Start
+
+```python
+from pixelle_video.service import PixelleVideoCore
+import asyncio
+
+async def main():
+    # Initialize
+    pixelle = PixelleVideoCore()
+    await pixelle.initialize()
+    
+    # Generate video
+    result = await pixelle.generate_video(
+        text="Why develop a reading habit",
+        mode="generate",
+        n_scenes=5
+    )
+    
+    print(f"Video generated: {result.video_path}")
+
+# Run
+asyncio.run(main())
+```
+
+---
+
+## API Reference
+
+For detailed API documentation, see [API Overview](../reference/api-overview.md).
+
+---
+
+## Examples
+
+For more usage examples, check the `examples/` directory in the project.
+
diff --git a/docs/en/user-guide/templates.md b/docs/en/user-guide/templates.md
new file mode 100644
index 0000000..346749c
--- /dev/null
+++ b/docs/en/user-guide/templates.md
@@ -0,0 +1,48 @@
+# Template Development
+
+How to create custom video templates.
+
+---
+
+## Template Introduction
+
+Video templates use HTML to define the layout and style of video frames.
+
+---
+
+## Template Structure
+
+Templates are located in the `templates/` directory, grouped by size:
+
+```
+templates/
+├── 1080x1920/  # Portrait
+├── 1920x1080/  # Landscape
+└── 1080x1080/  # Square
+```
+
+---
+
+## Creating Templates
+
+1. Copy an existing template file
+2. Modify HTML and CSS
+3. Save to the corresponding size directory
+4. Select and use in Web interface
+
+---
+
+## Template Variables
+
+Templates support the following variables:
+
+- `{{ title }}` - Video title
+- `{{ text }}` - Scene text
+- `{{ image }}` - Scene image
+
+---
+
+## More Information
+
+Detailed template development guide coming soon.
+
diff --git a/docs/en/user-guide/web-ui.md b/docs/en/user-guide/web-ui.md
new file mode 100644
index 0000000..76846af
--- /dev/null
+++ b/docs/en/user-guide/web-ui.md
@@ -0,0 +1,77 @@
+# Web UI Guide
+
+Detailed introduction to the Pixelle-Video Web interface features.
+
+---
+
+## Interface Layout
+
+The Web interface uses a three-column layout:
+
+- **Left Panel**: Content input and audio settings
+- **Middle Panel**: Voice and visual settings  
+- **Right Panel**: Video generation and preview
+
+---
+
+## System Configuration
+
+First-time use requires configuring LLM and image generation services. See [Configuration Guide](../getting-started/configuration.md).
+
+---
+
+## Content Input
+
+### Generation Mode
+
+- **AI Generate Content**: Enter a topic, AI creates script automatically
+- **Fixed Script Content**: Enter complete script directly
+
+### Background Music
+
+- Built-in music supported
+- Custom music files supported
+
+---
+
+## Voice Settings
+
+### TTS Workflow
+
+- Select TTS workflow
+- Supports Edge-TTS, Index-TTS, etc.
+
+### Reference Audio
+
+- Upload reference audio for voice cloning
+- Supports MP3/WAV/FLAC formats
+
+---
+
+## Visual Settings
+
+### Image Generation
+
+- Select image generation workflow
+- Set image dimensions
+- Adjust prompt prefix to control style
+
+### Video Template
+
+- Choose video template
+- Supports portrait/landscape/square
+- Preview template effects
+
+---
+
+## Generate Video
+
+After clicking "Generate Video", the system will:
+
+1. Generate video script
+2. Generate images for each scene
+3. Synthesize voice narration
+4. Compose final video
+
+Automatically previews when complete.
+
diff --git a/docs/en/user-guide/workflows.md b/docs/en/user-guide/workflows.md
new file mode 100644
index 0000000..0538376
--- /dev/null
+++ b/docs/en/user-guide/workflows.md
@@ -0,0 +1,37 @@
+# Workflow Customization
+
+How to customize ComfyUI workflows to achieve specific functionality.
+
+---
+
+## Workflow Introduction
+
+Pixelle-Video is built on the ComfyUI architecture and supports custom workflows.
+
+---
+
+## Workflow Types
+
+### TTS Workflows
+
+Located in `workflows/selfhost/` or `workflows/runninghub/`
+
+### Image Generation Workflows
+
+Located in `workflows/selfhost/` or `workflows/runninghub/`
+
+---
+
+## Custom Workflows
+
+1. Design your workflow in ComfyUI
+2. Export as JSON file
+3. Place in `workflows/` directory
+4. Select and use in Web interface
+
+---
+
+## More Information
+
+Detailed workflow customization guide coming soon.
+
diff --git a/docs/gallery/index.md b/docs/gallery/index.md
new file mode 100644
index 0000000..015c9af
--- /dev/null
+++ b/docs/gallery/index.md
@@ -0,0 +1,78 @@
+# 🎬 视频示例库 / Video Gallery
+
+展示使用 Pixelle-Video 制作的各类视频案例，包含完整的制作参数和资源下载。
+
+Showcase of videos created with Pixelle-Video, including complete production parameters and downloadable resources.
+
+---
+
+## 📚 案例列表 / Cases
+
+<div class="grid cards" markdown>
+
+-   :material-book-open-variant:{ .lg .middle } **阅读习惯养成**
+
+    ---
+
+    ![视频缩略图](https://via.placeholder.com/400x225?text=Reading+Habit)
+    
+    **时长 Duration**: 45s | **分镜 Scenes**: 5 | **尺寸 Size**: 1080x1920
+    
+    一个关于为什么要养成阅读习惯的教育科普视频。
+    
+    An educational video about why we should develop reading habits.
+    
+    [:octicons-arrow-right-24: 查看详情 View Details](reading-habit/)
+
+-   :material-chart-line:{ .lg .middle } **提高工作效率**
+
+    ---
+
+    ![视频缩略图](https://via.placeholder.com/400x225?text=Work+Efficiency)
+    
+    **时长 Duration**: 30s | **分镜 Scenes**: 3 | **尺寸 Size**: 1920x1080
+    
+    关于如何提高工作效率的实用技巧分享。
+    
+    Practical tips on improving work efficiency.
+    
+    [:octicons-arrow-right-24: 查看详情 View Details](#) *(即将推出 Coming soon)*
+
+-   :material-food-apple:{ .lg .middle } **健康饮食**
+
+    ---
+
+    ![视频缩略图](https://via.placeholder.com/400x225?text=Healthy+Diet)
+    
+    **时长 Duration**: 60s | **分镜 Scenes**: 6 | **尺寸 Size**: 1080x1080
+    
+    健康饮食的重要性和实用建议。
+    
+    The importance of healthy eating and practical advice.
+    
+    [:octicons-arrow-right-24: 查看详情 View Details](#) *(即将推出 Coming soon)*
+
+</div>
+
+---
+
+## 🎯 如何使用这些案例 / How to Use
+
+每个案例都包含：/ Each case includes:
+
+- **📹 成品视频 Video**: OSS 托管的完整视频 / Complete video hosted on OSS
+- **⚙️ 工作流文件 Workflows**: ComfyUI 工作流 JSON / ComfyUI workflow JSON files
+- **📝 配置文件 Config**: 完整的生成配置 / Complete generation configuration
+- **🎨 提示词 Prompts**: 所有使用的提示词 / All prompts used
+- **📥 一键复现 Reproduce**: 可直接导入使用 / Can be imported directly
+
+---
+
+## 💡 贡献你的案例 / Contribute Your Case
+
+制作了优秀的视频？欢迎分享！/ Created an awesome video? Share it with us!
+
+查看 [贡献指南](../en/development/contributing.md) 了解如何提交你的案例。
+
+See [Contributing Guide](../en/development/contributing.md) to learn how to submit your case.
+
diff --git a/docs/gallery/reading-habit/prompts.txt b/docs/gallery/reading-habit/prompts.txt
new file mode 100644
index 0000000..e4cf9c6
--- /dev/null
+++ b/docs/gallery/reading-habit/prompts.txt
@@ -0,0 +1 @@
+为什么要养成阅读习惯
\ No newline at end of file
diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css
new file mode 100644
index 0000000..7055315
--- /dev/null
+++ b/docs/stylesheets/extra.css
@@ -0,0 +1,17 @@
+/* Custom styles for Pixelle-Video documentation */
+
+:root {
+  --md-primary-fg-color: #5C6BC0;
+  --md-accent-fg-color: #FF4081;
+}
+
+/* Better code block styling */
+.highlight pre {
+  border-radius: 4px;
+}
+
+/* Admonition custom styling */
+.md-typeset .admonition {
+  border-radius: 4px;
+}
+
diff --git a/docs/zh/development/architecture.md b/docs/zh/development/architecture.md
new file mode 100644
index 0000000..c318796
--- /dev/null
+++ b/docs/zh/development/architecture.md
@@ -0,0 +1,54 @@
+# 架构设计
+
+Pixelle-Video 的技术架构概览。
+
+---
+
+## 核心架构
+
+Pixelle-Video 采用分层架构设计：
+
+- **Web 层**: Streamlit Web 界面
+- **服务层**: 核心业务逻辑
+- **ComfyUI 层**: 图像和TTS生成
+
+---
+
+## 主要组件
+
+### PixelleVideoCore
+
+核心服务类，协调各个子服务。
+
+### LLM Service
+
+负责调用大语言模型生成文案。
+
+### Image Service
+
+负责调用 ComfyUI 生成图像。
+
+### TTS Service
+
+负责调用 ComfyUI 生成语音。
+
+### Video Generator
+
+负责合成最终视频。
+
+---
+
+## 技术栈
+
+- **后端**: Python 3.10+, AsyncIO
+- **Web**: Streamlit
+- **AI**: OpenAI API, ComfyUI
+- **配置**: YAML
+- **工具**: uv (包管理)
+
+---
+
+## 更多信息
+
+详细的架构文档即将推出。
+
diff --git a/docs/zh/development/contributing.md b/docs/zh/development/contributing.md
new file mode 100644
index 0000000..5030270
--- /dev/null
+++ b/docs/zh/development/contributing.md
@@ -0,0 +1,50 @@
+# 贡献指南
+
+感谢你对 Pixelle-Video 的贡献兴趣！
+
+---
+
+## 如何贡献
+
+1. Fork 项目仓库
+2. 创建功能分支 (`git checkout -b feature/AmazingFeature`)
+3. 提交更改 (`git commit -m 'Add some AmazingFeature'`)
+4. 推送到分支 (`git push origin feature/AmazingFeature`)
+5. 开启 Pull Request
+
+---
+
+## 开发设置
+
+```bash
+# 克隆你的 fork
+git clone https://github.com/your-username/Pixelle-Video.git
+cd Pixelle-Video
+
+# 安装开发依赖
+uv sync
+
+# 运行测试
+pytest
+```
+
+---
+
+## 代码规范
+
+- 所有代码和注释使用英文
+- 遵循 PEP 8 规范
+- 添加适当的测试
+
+---
+
+## 提交 Issue
+
+遇到问题或有功能建议？请在 [GitHub Issues](https://github.com/PixelleLab/Pixelle-Video/issues) 提交。
+
+---
+
+## 行为准则
+
+请保持友好和尊重，我们致力于营造包容的社区环境。
+
diff --git a/docs/zh/faq.md b/docs/zh/faq.md
new file mode 100644
index 0000000..7253900
--- /dev/null
+++ b/docs/zh/faq.md
@@ -0,0 +1,78 @@
+# 常见问题
+
+常见问题解答。
+
+---
+
+## 安装相关
+
+### Q: 如何安装 uv？
+
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+### Q: 可以不用 uv 吗？
+
+可以，你也可以使用传统的 pip + venv 方式。
+
+---
+
+## 配置相关
+
+### Q: 必须要配置 ComfyUI 吗？
+
+不一定。你可以使用 RunningHub 云端服务，无需本地部署。
+
+### Q: 支持哪些 LLM？
+
+支持所有 OpenAI 兼容接口的 LLM，包括：
+- 通义千问
+- GPT-4o
+- DeepSeek
+- Ollama（本地）
+
+---
+
+## 使用相关
+
+### Q: 第一次使用需要多久？
+
+生成一个 3-5 分镜的视频大约需要 2-5 分钟。
+
+### Q: 视频效果不满意怎么办？
+
+可以尝试：
+1. 更换 LLM 模型
+2. 调整图像尺寸和提示词前缀
+3. 更换 TTS 工作流
+4. 尝试不同的视频模板
+
+### Q: 费用大概多少？
+
+- **完全免费**: Ollama + 本地 ComfyUI = 0 元
+- **推荐方案**: 通义千问 + 本地 ComfyUI ≈ 0.01-0.05 元/视频
+- **云端方案**: OpenAI + RunningHub（费用较高）
+
+---
+
+## 故障排查
+
+### Q: ComfyUI 连接失败
+
+1. 确认 ComfyUI 正在运行
+2. 检查 URL 是否正确
+3. 在 Web 界面点击「测试连接」
+
+### Q: LLM API 调用失败
+
+1. 检查 API Key 是否正确
+2. 检查网络连接
+3. 查看错误提示
+
+---
+
+## 其他问题
+
+有其他问题？请查看 [故障排查](troubleshooting.md) 或提交 [Issue](https://github.com/PixelleLab/Pixelle-Video/issues)。
+
diff --git a/docs/zh/gallery/index.md b/docs/zh/gallery/index.md
new file mode 100644
index 0000000..04d8d12
--- /dev/null
+++ b/docs/zh/gallery/index.md
@@ -0,0 +1,45 @@
+# 🎬 视频示例库
+
+展示使用 Pixelle-Video 制作的视频案例。点击卡片查看完整的工作流和配置文件。
+
+---
+
+<div class="grid cards" markdown>
+
+-   **阅读习惯养成**
+
+    ---
+
+    <video controls width="100%" style="border-radius: 8px;">
+      <source src="https://your-oss-bucket.oss-cn-hangzhou.aliyuncs.com/pixelle-video/reading-habit/video.mp4" type="video/mp4">
+    </video>
+    
+    [:octicons-mark-github-16: 查看工作流和配置](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/reading-habit)
+
+-   **工作效率提升**
+
+    ---
+
+    <video controls width="100%" style="border-radius: 8px;">
+      <source src="https://your-oss-bucket.oss-cn-hangzhou.aliyuncs.com/pixelle-video/work-efficiency/video.mp4" type="video/mp4">
+    </video>
+    
+    [:octicons-mark-github-16: 查看工作流和配置](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/work-efficiency)
+
+-   **健康饮食**
+
+    ---
+
+    <video controls width="100%" style="border-radius: 8px;">
+      <source src="https://your-oss-bucket.oss-cn-hangzhou.aliyuncs.com/pixelle-video/healthy-diet/video.mp4" type="video/mp4">
+    </video>
+    
+    [:octicons-mark-github-16: 查看工作流和配置](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/healthy-diet)
+
+</div>
+
+---
+
+!!! tip "如何使用"
+    点击案例卡片跳转到 GitHub，下载工作流文件和配置，即可一键复现视频效果。
+
diff --git a/docs/zh/getting-started/configuration.md b/docs/zh/getting-started/configuration.md
new file mode 100644
index 0000000..76a4875
--- /dev/null
+++ b/docs/zh/getting-started/configuration.md
@@ -0,0 +1,60 @@
+# 配置说明
+
+完成安装后，需要配置服务才能使用 Pixelle-Video。
+
+---
+
+## LLM 配置
+
+LLM（大语言模型）用于生成视频文案。
+
+### 快速选择预设
+
+1. 从下拉菜单选择预设模型：
+   - 通义千问（推荐，性价比高）
+   - GPT-4o
+   - DeepSeek
+   - Ollama（本地运行，完全免费）
+
+2. 系统会自动填充 `base_url` 和 `model`
+
+3. 点击「🔑 获取 API Key」链接，注册并获取密钥
+
+4. 填入 API Key
+
+---
+
+## 图像配置
+
+支持两种方式：
+
+### 本地部署（推荐）
+
+使用本地 ComfyUI 服务：
+
+1. 安装并启动 ComfyUI
+2. 填写 ComfyUI URL（默认 `http://127.0.0.1:8188`）
+3. 点击「测试连接」确认服务可用
+
+### 云端部署
+
+使用 RunningHub 云端服务：
+
+1. 注册 RunningHub 账号
+2. 获取 API Key
+3. 在配置中填写 API Key
+
+---
+
+## 保存配置
+
+填写完所有必需的配置后，点击「保存配置」按钮。
+
+配置会保存到 `config.yaml` 文件中。
+
+---
+
+## 下一步
+
+- [快速开始](quick-start.md) - 生成你的第一个视频
+
diff --git a/docs/zh/getting-started/installation.md b/docs/zh/getting-started/installation.md
new file mode 100644
index 0000000..c177237
--- /dev/null
+++ b/docs/zh/getting-started/installation.md
@@ -0,0 +1,115 @@
+# 安装
+
+本页面将指导你完成 Pixelle-Video 的安装。
+
+---
+
+## 系统要求
+
+### 必需条件
+
+- **Python**: 3.10 或更高版本
+- **操作系统**: Windows、macOS 或 Linux
+- **包管理器**: uv（推荐）或 pip
+
+### 可选条件
+
+- **GPU**: 如需本地运行 ComfyUI，建议配备 NVIDIA 显卡（6GB+ 显存）
+- **网络**: 稳定的网络连接（用于调用 LLM API 和图像生成服务）
+
+---
+
+## 安装步骤
+
+### 第一步：克隆项目
+
+```bash
+git clone https://github.com/PixelleLab/Pixelle-Video.git
+cd Pixelle-Video
+```
+
+### 第二步：安装依赖
+
+!!! tip "推荐使用 uv"
+    本项目使用 `uv` 作为包管理器，它比传统的 pip 更快、更可靠。
+
+#### 使用 uv（推荐）
+
+```bash
+# 如果还没有安装 uv，先安装它
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# 安装项目依赖（uv 会自动创建虚拟环境）
+uv sync
+```
+
+#### 使用 pip
+
+```bash
+# 创建虚拟环境
+python -m venv venv
+
+# 激活虚拟环境
+# Windows:
+venv\Scripts\activate
+# macOS/Linux:
+source venv/bin/activate
+
+# 安装依赖
+pip install -e .
+```
+
+---
+
+## 验证安装
+
+运行以下命令验证安装是否成功：
+
+```bash
+# 使用 uv
+uv run streamlit run web/app.py
+
+# 或使用 pip（需先激活虚拟环境）
+streamlit run web/app.py
+```
+
+浏览器应该会自动打开 `http://localhost:8501`，显示 Pixelle-Video 的 Web 界面。
+
+!!! success "安装成功！"
+    如果能看到 Web 界面，说明安装成功了！接下来请查看 [配置说明](configuration.md) 来设置服务。
+
+---
+
+## 可选：安装 ComfyUI（本地部署）
+
+如果希望本地运行图像生成服务，需要安装 ComfyUI：
+
+### 快速安装
+
+```bash
+# 克隆 ComfyUI
+git clone https://github.com/comfyanonymous/ComfyUI.git
+cd ComfyUI
+
+# 安装依赖
+pip install -r requirements.txt
+```
+
+### 启动 ComfyUI
+
+```bash
+python main.py
+```
+
+ComfyUI 默认运行在 `http://127.0.0.1:8188`
+
+!!! info "ComfyUI 模型"
+    ComfyUI 需要下载对应的模型文件才能工作。请参考 [ComfyUI 官方文档](https://github.com/comfyanonymous/ComfyUI) 了解如何下载和配置模型。
+
+---
+
+## 下一步
+
+- [配置服务](configuration.md) - 配置 LLM 和图像生成服务
+- [快速开始](quick-start.md) - 生成第一个视频
+
diff --git a/docs/zh/getting-started/quick-start.md b/docs/zh/getting-started/quick-start.md
new file mode 100644
index 0000000..83c4fcb
--- /dev/null
+++ b/docs/zh/getting-started/quick-start.md
@@ -0,0 +1,107 @@
+# 快速开始
+
+已经完成安装和配置？让我们生成第一个视频吧！
+
+---
+
+## 启动 Web 界面
+
+```bash
+# 使用 uv 运行
+uv run streamlit run web/app.py
+```
+
+浏览器会自动打开 `http://localhost:8501`
+
+---
+
+## 生成你的第一个视频
+
+### 步骤一：检查配置
+
+首次使用时，展开「⚙️ 系统配置」面板，确认已配置：
+
+- **LLM 配置**: 选择 AI 模型（如通义千问、GPT 等）并填入 API Key
+- **图像配置**: 配置 ComfyUI 地址或 RunningHub API Key
+
+如果还没有配置，请查看 [配置说明](configuration.md)。
+
+配置好后点击「保存配置」。
+
+---
+
+### 步骤二：输入主题
+
+在左侧栏的「📝 内容输入」区域：
+
+1. 选择「**AI 生成内容**」模式
+2. 在文本框中输入一个主题，例如：
+   ```
+   为什么要养成阅读习惯
+   ```
+3. （可选）设置场景数量，默认 5 个分镜
+
+!!! tip "主题示例"
+    - 为什么要养成阅读习惯
+    - 如何提高工作效率
+    - 健康饮食的重要性
+    - 旅行的意义
+
+---
+
+### 步骤三：配置语音和视觉
+
+在中间栏：
+
+**语音设置**
+- 选择 TTS 工作流（默认 Edge-TTS 即可）
+- 如需声音克隆，可上传参考音频
+
+**视觉设置**
+- 选择图像生成工作流（默认即可）
+- 设置图像尺寸（默认 1024x1024）
+- 选择视频模板（推荐竖屏 1080x1920）
+
+---
+
+### 步骤四：生成视频
+
+点击右侧栏的「🎬 生成视频」按钮！
+
+系统会显示实时进度：
+- 生成文案
+- 生成配图（每个分镜）
+- 合成语音
+- 合成视频
+
+!!! info "生成时间"
+    生成一个 5 分镜的视频大约需要 2-5 分钟，具体时间取决于：LLM API 响应速度、图像生成速度、TTS 工作流类型、网络状况
+
+---
+
+### 步骤五：预览视频
+
+生成完成后，视频会自动在右侧栏播放！
+
+你可以看到：
+- 📹 视频预览播放器
+- ⏱️ 视频时长
+- 📦 文件大小
+- 🎬 分镜数量
+- 📐 视频尺寸
+
+视频文件保存在 `output/` 文件夹中。
+
+---
+
+## 下一步探索
+
+恭喜！你已经成功生成了第一个视频 🎉
+
+接下来你可以：
+
+- **调整风格** - 查看 [自定义视觉风格](../tutorials/custom-style.md) 教程
+- **克隆声音** - 查看 [使用参考音频克隆声音](../tutorials/voice-cloning.md) 教程
+- **使用 API** - 查看 [API 使用指南](../user-guide/api.md)
+- **开发模板** - 查看 [模板开发指南](../user-guide/templates.md)
+
diff --git a/docs/zh/index.md b/docs/zh/index.md
new file mode 100644
index 0000000..a092cca
--- /dev/null
+++ b/docs/zh/index.md
@@ -0,0 +1,97 @@
+# Pixelle-Video 🎬
+
+<div align="center" markdown="1">
+
+**AI 视频创作工具 - 3 分钟生成一个短视频**
+
+[![Stars](https://img.shields.io/github/stars/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/stargazers)
+[![Issues](https://img.shields.io/github/issues/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/issues)
+[![License](https://img.shields.io/github/license/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/blob/main/LICENSE)
+
+</div>
+
+---
+
+## 🎯 项目简介
+
+只需输入一个 **主题**，Pixelle-Video 就能自动完成：
+
+- ✍️ 撰写视频文案
+- 🎨 生成 AI 配图  
+- 🗣️ 合成语音解说
+- 🎵 添加背景音乐
+- 🎬 一键合成视频
+
+**零门槛，零剪辑经验**，让视频创作成为一句话的事！
+
+---
+
+## ✨ 功能亮点
+
+- ✅ **全自动生成** - 输入主题，3 分钟自动生成完整视频
+- ✅ **AI 智能文案** - 根据主题智能创作解说词，无需自己写脚本
+- ✅ **AI 生成配图** - 每句话都配上精美的 AI 插图
+- ✅ **AI 生成语音** - 支持 Edge-TTS、Index-TTS 等众多主流 TTS 方案
+- ✅ **背景音乐** - 支持添加 BGM，让视频更有氛围
+- ✅ **视觉风格** - 多种模板可选，打造独特视频风格
+- ✅ **灵活尺寸** - 支持竖屏、横屏等多种视频尺寸
+- ✅ **多种 AI 模型** - 支持 GPT、通义千问、DeepSeek、Ollama 等
+- ✅ **原子能力灵活组合** - 基于 ComfyUI 架构，可使用预置工作流，也可自定义任意能力
+
+---
+
+## 🎬 视频示例
+
+!!! info "示例视频"
+    待补充：这里可以添加一些生成的视频示例
+
+---
+
+## 🚀 快速开始
+
+想马上体验？只需三步：
+
+1. **[安装 Pixelle-Video](getting-started/installation.md)** - 下载并安装项目
+2. **[配置服务](getting-started/configuration.md)** - 配置 LLM 和图像生成服务
+3. **[生成第一个视频](getting-started/quick-start.md)** - 开始创作你的第一个视频
+
+---
+
+## 💰 费用说明
+
+!!! success "完全支持免费运行！"
+    
+    - **完全免费方案**: LLM 使用 Ollama（本地运行）+ ComfyUI 本地部署 = 0 元
+    - **推荐方案**: LLM 使用通义千问（生成一个 3 段视频约 0.01-0.05 元）+ ComfyUI 本地部署
+    - **云端方案**: LLM 使用 OpenAI + 图像使用 RunningHub（费用较高但无需本地环境）
+    
+    **选择建议**：本地有显卡建议完全免费方案，否则推荐使用通义千问（性价比高）
+
+---
+
+## 🤝 参考项目
+
+Pixelle-Video 的设计受到以下优秀开源项目的启发：
+
+- [Pixelle-MCP](https://github.com/AIDC-AI/Pixelle-MCP) - ComfyUI MCP 服务器，让 AI 助手直接调用 ComfyUI
+- [MoneyPrinterTurbo](https://github.com/harry0703/MoneyPrinterTurbo) - 优秀的视频生成工具
+- [NarratoAI](https://github.com/linyqh/NarratoAI) - 影视解说自动化工具
+- [MoneyPrinterPlus](https://github.com/ddean2009/MoneyPrinterPlus) - 视频创作平台
+- [ComfyKit](https://github.com/puke3615/ComfyKit) - ComfyUI 工作流封装库
+
+感谢这些项目的开源精神！🙏
+
+---
+
+## 📢 反馈与支持
+
+- 🐛 **遇到问题**: 提交 [Issue](https://github.com/PixelleLab/Pixelle-Video/issues)
+- 💡 **功能建议**: 提交 [Feature Request](https://github.com/PixelleLab/Pixelle-Video/issues)
+- ⭐ **给个 Star**: 如果这个项目对你有帮助，欢迎给个 Star 支持一下！
+
+---
+
+## 📝 许可证
+
+本项目采用 MIT 许可证，详情请查看 [LICENSE](https://github.com/PixelleLab/Pixelle-Video/blob/main/LICENSE) 文件。
+
diff --git a/docs/zh/reference/api-overview.md b/docs/zh/reference/api-overview.md
new file mode 100644
index 0000000..2939719
--- /dev/null
+++ b/docs/zh/reference/api-overview.md
@@ -0,0 +1,52 @@
+# API 概览
+
+Pixelle-Video Python API 参考文档。
+
+---
+
+## 核心类
+
+### PixelleVideoCore
+
+主要服务类，提供视频生成功能。
+
+```python
+from pixelle_video.service import PixelleVideoCore
+
+pixelle = PixelleVideoCore()
+await pixelle.initialize()
+```
+
+---
+
+## 主要方法
+
+### generate_video()
+
+生成视频的主要方法。
+
+**参数**:
+
+- `text` (str): 主题或完整文案
+- `mode` (str): 生成模式 ("generate" 或 "fixed")
+- `n_scenes` (int): 分镜数量
+- `title` (str, optional): 视频标题
+- `tts_workflow` (str): TTS 工作流
+- `image_workflow` (str): 图像生成工作流
+- `frame_template` (str): 视频模板
+- `bgm_path` (str, optional): BGM 文件路径
+
+**返回**: `VideoResult` 对象
+
+---
+
+## 示例
+
+查看 `examples/` 目录获取更多示例。
+
+---
+
+## 更多信息
+
+详细的 API 文档即将推出。
+
diff --git a/docs/zh/reference/config-schema.md b/docs/zh/reference/config-schema.md
new file mode 100644
index 0000000..b917771
--- /dev/null
+++ b/docs/zh/reference/config-schema.md
@@ -0,0 +1,60 @@
+# 配置文件详解
+
+`config.yaml` 配置文件的详细说明。
+
+---
+
+## 配置结构
+
+```yaml
+llm:
+  provider: openai
+  api_key: "your-api-key"
+  base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
+  model: "qwen-plus"
+
+comfyui:
+  comfyui_url: "http://127.0.0.1:8188"
+  runninghub_api_key: ""
+  
+  image:
+    default_workflow: "runninghub/image_flux.json"
+    prompt_prefix: "Minimalist illustration style"
+  
+  tts:
+    default_workflow: "selfhost/tts_edge.json"
+```
+
+---
+
+## LLM 配置
+
+- `provider`: 提供商（目前仅支持 openai 兼容接口）
+- `api_key`: API 密钥
+- `base_url`: API 服务地址
+- `model`: 模型名称
+
+---
+
+## ComfyUI 配置
+
+### 基础配置
+
+- `comfyui_url`: 本地 ComfyUI 地址
+- `runninghub_api_key`: RunningHub API 密钥（可选）
+
+### 图像配置
+
+- `default_workflow`: 默认图像生成工作流
+- `prompt_prefix`: 提示词前缀
+
+### TTS 配置
+
+- `default_workflow`: 默认 TTS 工作流
+
+---
+
+## 更多信息
+
+配置文件会自动在首次运行时创建。
+
diff --git a/docs/zh/troubleshooting.md b/docs/zh/troubleshooting.md
new file mode 100644
index 0000000..bbe164c
--- /dev/null
+++ b/docs/zh/troubleshooting.md
@@ -0,0 +1,108 @@
+# 故障排查
+
+遇到问题？这里有一些常见问题的解决方案。
+
+---
+
+## 安装问题
+
+### 依赖安装失败
+
+```bash
+# 清理缓存
+uv cache clean
+
+# 重新安装
+uv sync
+```
+
+---
+
+## 配置问题
+
+### ComfyUI 连接失败
+
+**可能原因**:
+- ComfyUI 未运行
+- URL 配置错误
+- 防火墙阻止
+
+**解决方案**:
+1. 确认 ComfyUI 正在运行
+2. 检查 URL 配置（默认 `http://127.0.0.1:8188`）
+3. 在浏览器中访问 ComfyUI 地址测试
+4. 检查防火墙设置
+
+### LLM API 调用失败
+
+**可能原因**:
+- API Key 错误
+- 网络问题
+- 余额不足
+
+**解决方案**:
+1. 检查 API Key 是否正确
+2. 检查网络连接
+3. 查看错误提示中的具体原因
+4. 检查账户余额
+
+---
+
+## 生成问题
+
+### 视频生成失败
+
+**可能原因**:
+- 工作流文件损坏
+- 模型未下载
+- 资源不足
+
+**解决方案**:
+1. 检查工作流文件是否存在
+2. 确认 ComfyUI 已下载所需模型
+3. 检查磁盘空间和内存
+
+### 图像生成失败
+
+**解决方案**:
+1. 检查 ComfyUI 是否正常运行
+2. 尝试在 ComfyUI 中手动测试工作流
+3. 检查工作流配置
+
+### TTS 生成失败
+
+**解决方案**:
+1. 检查 TTS 工作流是否正确
+2. 如使用声音克隆，检查参考音频格式
+3. 查看错误日志
+
+---
+
+## 性能问题
+
+### 生成速度慢
+
+**优化建议**:
+1. 使用本地 ComfyUI（比云端快）
+2. 减少分镜数量
+3. 使用更快的 LLM（如 Qianwen）
+4. 检查网络连接
+
+---
+
+## 其他问题
+
+仍有问题？
+
+1. 查看项目 [GitHub Issues](https://github.com/PixelleLab/Pixelle-Video/issues)
+2. 提交新的 Issue 描述你的问题
+3. 包含错误日志和配置信息以便快速定位
+
+---
+
+## 日志查看
+
+日志文件位于项目根目录：
+- `api_server.log` - API 服务日志
+- `test_output.log` - 测试日志
+
diff --git a/docs/zh/tutorials/custom-style.md b/docs/zh/tutorials/custom-style.md
new file mode 100644
index 0000000..e579547
--- /dev/null
+++ b/docs/zh/tutorials/custom-style.md
@@ -0,0 +1,36 @@
+# 自定义视觉风格
+
+学习如何调整图像生成参数以创建独特的视觉风格。
+
+---
+
+## 调整提示词前缀
+
+提示词前缀控制整体视觉风格：
+
+```
+Minimalist black-and-white illustration, clean lines, simple style
+```
+
+---
+
+## 调整图像尺寸
+
+不同尺寸适用于不同场景：
+
+- **1024x1024**: 方形，适合小红书
+- **1080x1920**: 竖屏，适合抖音、快手
+- **1920x1080**: 横屏，适合B站、YouTube
+
+---
+
+## 预览效果
+
+使用「预览风格」功能测试不同配置的效果。
+
+---
+
+## 更多信息
+
+即将推出更多风格定制技巧。
+
diff --git a/docs/zh/tutorials/voice-cloning.md b/docs/zh/tutorials/voice-cloning.md
new file mode 100644
index 0000000..b7b5b1c
--- /dev/null
+++ b/docs/zh/tutorials/voice-cloning.md
@@ -0,0 +1,35 @@
+# 声音克隆
+
+使用参考音频实现声音克隆功能。
+
+---
+
+## 准备参考音频
+
+1. 准备一段清晰的音频文件（MP3/WAV/FLAC）
+2. 建议时长 10-30 秒
+3. 避免背景噪音
+
+---
+
+## 使用步骤
+
+1. 在语音设置中选择支持声音克隆的 TTS 工作流（如 Index-TTS）
+2. 上传参考音频文件
+3. 使用「预览语音」测试效果
+4. 生成视频
+
+---
+
+## 注意事项
+
+- 不是所有 TTS 工作流都支持声音克隆
+- 参考音频质量会影响克隆效果
+- Edge-TTS 不支持声音克隆
+
+---
+
+## 更多信息
+
+即将推出更详细的声音克隆教程。
+
diff --git a/docs/zh/tutorials/your-first-video.md b/docs/zh/tutorials/your-first-video.md
new file mode 100644
index 0000000..735bf28
--- /dev/null
+++ b/docs/zh/tutorials/your-first-video.md
@@ -0,0 +1,33 @@
+# 生成你的第一个视频
+
+手把手教你使用 Pixelle-Video 生成第一个视频。
+
+---
+
+## 前置准备
+
+确保已完成：
+
+- ✅ [安装](../getting-started/installation.md)
+- ✅ [配置](../getting-started/configuration.md)
+
+---
+
+## 教程步骤
+
+详细步骤请查看 [快速开始](../getting-started/quick-start.md)。
+
+---
+
+## 小贴士
+
+- 选择合适的主题可以获得更好的效果
+- 首次生成建议使用3-5个分镜
+- 可以先预览语音和图像效果
+
+---
+
+## 常见问题
+
+遇到问题？查看 [FAQ](../faq.md) 或 [故障排查](../troubleshooting.md)。
+
diff --git a/docs/zh/user-guide/api.md b/docs/zh/user-guide/api.md
new file mode 100644
index 0000000..44416c0
--- /dev/null
+++ b/docs/zh/user-guide/api.md
@@ -0,0 +1,42 @@
+# API 使用
+
+Pixelle-Video 提供完整的 Python API，方便集成到你的项目中。
+
+---
+
+## 快速开始
+
+```python
+from pixelle_video.service import PixelleVideoCore
+import asyncio
+
+async def main():
+    # 初始化
+    pixelle = PixelleVideoCore()
+    await pixelle.initialize()
+    
+    # 生成视频
+    result = await pixelle.generate_video(
+        text="为什么要养成阅读习惯",
+        mode="generate",
+        n_scenes=5
+    )
+    
+    print(f"视频已生成: {result.video_path}")
+
+# 运行
+asyncio.run(main())
+```
+
+---
+
+## API 参考
+
+详细 API 文档请查看 [API 概览](../reference/api-overview.md)。
+
+---
+
+## 示例
+
+更多使用示例请参考项目的 `examples/` 目录。
+
diff --git a/docs/zh/user-guide/templates.md b/docs/zh/user-guide/templates.md
new file mode 100644
index 0000000..3965bcf
--- /dev/null
+++ b/docs/zh/user-guide/templates.md
@@ -0,0 +1,48 @@
+# 模板开发
+
+如何创建自定义视频模板。
+
+---
+
+## 模板简介
+
+视频模板使用 HTML 定义视频画面的布局和样式。
+
+---
+
+## 模板结构
+
+模板位于 `templates/` 目录，按尺寸分组：
+
+```
+templates/
+├── 1080x1920/  # 竖屏
+├── 1920x1080/  # 横屏
+└── 1080x1080/  # 方形
+```
+
+---
+
+## 创建模板
+
+1. 复制现有模板文件
+2. 修改 HTML 和 CSS
+3. 保存到对应尺寸目录
+4. 在 Web 界面中选择使用
+
+---
+
+## 模板变量
+
+模板支持以下变量：
+
+- `{{ title }}` - 视频标题
+- `{{ text }}` - 分镜文本
+- `{{ image }}` - 分镜图片
+
+---
+
+## 更多信息
+
+详细的模板开发指南即将推出。
+
diff --git a/docs/zh/user-guide/web-ui.md b/docs/zh/user-guide/web-ui.md
new file mode 100644
index 0000000..a0cad90
--- /dev/null
+++ b/docs/zh/user-guide/web-ui.md
@@ -0,0 +1,77 @@
+# Web 界面使用指南
+
+详细介绍 Pixelle-Video Web 界面的各项功能。
+
+---
+
+## 界面布局
+
+Web 界面采用三栏布局：
+
+- **左侧栏**: 内容输入与音频设置
+- **中间栏**: 语音与视觉设置  
+- **右侧栏**: 视频生成与预览
+
+---
+
+## 系统配置
+
+首次使用需要配置 LLM 和图像生成服务。详见 [配置说明](../getting-started/configuration.md)。
+
+---
+
+## 内容输入
+
+### 生成模式
+
+- **AI 生成内容**: 输入主题，AI 自动创作文案
+- **固定文案内容**: 直接输入完整文案
+
+### 背景音乐
+
+- 支持内置音乐
+- 支持自定义音乐文件
+
+---
+
+## 语音设置
+
+### TTS 工作流
+
+- 选择 TTS 工作流
+- 支持 Edge-TTS、Index-TTS 等
+
+### 参考音频
+
+- 上传参考音频进行声音克隆
+- 支持 MP3/WAV/FLAC 等格式
+
+---
+
+## 视觉设置
+
+### 图像生成
+
+- 选择图像生成工作流
+- 设置图像尺寸
+- 调整提示词前缀控制风格
+
+### 视频模板
+
+- 选择视频模板
+- 支持竖屏/横屏/方形
+- 可预览模板效果
+
+---
+
+## 生成视频
+
+点击「生成视频」按钮后，系统会：
+
+1. 生成视频文案
+2. 为每个分镜生成配图
+3. 合成语音解说
+4. 合成最终视频
+
+生成完成后自动预览。
+
diff --git a/docs/zh/user-guide/workflows.md b/docs/zh/user-guide/workflows.md
new file mode 100644
index 0000000..a63e324
--- /dev/null
+++ b/docs/zh/user-guide/workflows.md
@@ -0,0 +1,37 @@
+# 工作流定制
+
+如何自定义 ComfyUI 工作流以实现特定功能。
+
+---
+
+## 工作流简介
+
+Pixelle-Video 基于 ComfyUI 架构，支持自定义工作流。
+
+---
+
+## 工作流类型
+
+### TTS 工作流
+
+位于 `workflows/selfhost/` 或 `workflows/runninghub/`
+
+### 图像生成工作流
+
+位于 `workflows/selfhost/` 或 `workflows/runninghub/`
+
+---
+
+## 自定义工作流
+
+1. 在 ComfyUI 中设计你的工作流
+2. 导出为 JSON 文件
+3. 放置到 `workflows/` 目录
+4. 在 Web 界面中选择使用
+
+---
+
+## 更多信息
+
+即将推出更详细的工作流定制指南。
+
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..3847531
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,171 @@
+site_name: Pixelle-Video
+site_description: AI Video Creator - Generate a short video in 3 minutes
+site_author: PixelleLab
+site_url: https://pixellelab.github.io/Pixelle-Video/
+
+repo_name: PixelleLab/Pixelle-Video
+repo_url: https://github.com/PixelleLab/Pixelle-Video
+edit_uri: edit/main/docs/
+
+copyright: Copyright &copy; 2025 PixelleLab
+
+theme:
+  name: material
+  language: en
+  palette:
+    # Light mode
+    - media: "(prefers-color-scheme: light)"
+      scheme: default
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+    # Dark mode
+    - media: "(prefers-color-scheme: dark)"
+      scheme: slate
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/brightness-4
+        name: Switch to light mode
+  
+  font:
+    text: Roboto
+    code: Roboto Mono
+  
+  features:
+    - navigation.instant       # Instant loading
+    - navigation.tracking      # Anchor tracking
+    - navigation.tabs          # Top-level tabs
+    - navigation.tabs.sticky   # Sticky tabs
+    - navigation.sections      # Sidebar sections
+    - navigation.expand        # Expand sections
+    - navigation.top           # Back to top button
+    - navigation.footer        # Footer navigation
+    - search.suggest           # Search suggestions
+    - search.highlight         # Search highlighting
+    - search.share             # Share search results
+    - content.code.copy        # Copy button for code blocks
+    - content.code.annotate    # Code annotations
+    - content.tabs.link        # Link content tabs
+  
+  icon:
+    repo: fontawesome/brands/github
+
+plugins:
+  - search:
+      lang:
+        - en
+        - zh
+  - i18n:
+      docs_structure: folder
+      languages:
+        - locale: en
+          default: true
+          name: English
+          build: true
+        - locale: zh
+          name: 中文
+          build: true
+          nav_translations:
+            Home: 首页
+            Getting Started: 快速开始
+            Installation: 安装
+            Quick Start: 快速入门
+            Configuration: 配置
+            User Guide: 用户指南
+            Web UI: Web 界面
+            API Usage: API 使用
+            Workflows: 工作流定制
+            Templates: 模板开发
+            Gallery: 示例库
+            Tutorials: 教程
+            Your First Video: 生成你的第一个视频
+            Custom Style: 自定义视觉风格
+            Voice Cloning: 声音克隆
+            Reference: 参考
+            API Overview: API 概览
+            Config Schema: 配置文件详解
+            Development: 开发指南
+            Architecture: 架构设计
+            Contributing: 贡献指南
+            FAQ: 常见问题
+            Troubleshooting: 故障排查
+  - git-revision-date-localized:
+      enable_creation_date: true
+      type: datetime
+
+markdown_extensions:
+  # Python Markdown
+  - abbr
+  - admonition
+  - attr_list
+  - def_list
+  - footnotes
+  - md_in_html
+  - toc:
+      permalink: true
+  
+  # Python Markdown Extensions
+  - pymdownx.arithmatex:
+      generic: true
+  - pymdownx.betterem:
+      smart_enable: all
+  - pymdownx.caret
+  - pymdownx.details
+  - pymdownx.emoji:
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.keys
+  - pymdownx.mark
+  - pymdownx.smartsymbols
+  - pymdownx.superfences:
+      custom_fences:
+        - name: mermaid
+          class: mermaid
+          format: !!python/name:pymdownx.superfences.fence_code_format
+  - pymdownx.tabbed:
+      alternate_style: true
+  - pymdownx.tasklist:
+      custom_checkbox: true
+  - pymdownx.tilde
+
+nav:
+  - Home: index.md
+  - Getting Started:
+    - Installation: getting-started/installation.md
+    - Quick Start: getting-started/quick-start.md
+    - Configuration: getting-started/configuration.md
+  - User Guide:
+    - Web UI: user-guide/web-ui.md
+    - API Usage: user-guide/api.md
+    - Workflows: user-guide/workflows.md
+    - Templates: user-guide/templates.md
+  - Gallery: gallery/index.md
+  - Tutorials:
+    - Your First Video: tutorials/your-first-video.md
+    - Custom Style: tutorials/custom-style.md
+    - Voice Cloning: tutorials/voice-cloning.md
+  - Reference:
+    - API Overview: reference/api-overview.md
+    - Config Schema: reference/config-schema.md
+  - Development:
+    - Architecture: development/architecture.md
+    - Contributing: development/contributing.md
+  - FAQ: faq.md
+  - Troubleshooting: troubleshooting.md
+
+extra:
+  social:
+    - icon: fontawesome/brands/github
+      link: https://github.com/PixelleLab/Pixelle-Video
+      name: GitHub Repository
+
+extra_css:
+  - stylesheets/extra.css
diff --git a/pixelle_video/cli.py b/pixelle_video/cli.py
deleted file mode 100644
index 6640eca..0000000
--- a/pixelle_video/cli.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-Pixelle-Video CLI
-"""
-
-import asyncio
-
-from loguru import logger
-
-from pixelle_video.service import pixelle_video
-
-
-async def test_llm():
-    """Test LLM capability"""
-    # Initialize pixelle_video
-    await pixelle_video.initialize()
-    
-    # Test prompt
-    prompt = "Explain the concept of atomic habits in 3 sentences."
-    
-    logger.info(f"\n📝 Test Prompt: {prompt}\n")
-    
-    # Call LLM
-    result = await pixelle_video.llm(prompt)
-    
-    logger.info(f"\n✨ Result:\n{result}\n")
-
-
-def main():
-    """Main CLI entry point"""
-    logger.info("🚀 Pixelle-Video CLI\n")
-    
-    # Run test
-    asyncio.run(test_llm())
-
-
-if __name__ == "__main__":
-    main()
-