From ec395196cdea7f4914de1af9c7728b45ce815cd1 Mon Sep 17 00:00:00 2001 From: puke <1129090915@qq.com> Date: Mon, 3 Nov 2025 17:44:33 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E7=A4=BA=E4=BE=8B=E5=BA=93?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 57 ++-- docs/capabilities-guide.md | 343 ----------------------- docs/en/development/architecture.md | 54 ++++ docs/en/development/contributing.md | 50 ++++ docs/en/faq.md | 78 ++++++ docs/en/gallery/index.md | 45 +++ docs/en/getting-started/configuration.md | 60 ++++ docs/en/getting-started/installation.md | 115 ++++++++ docs/en/getting-started/quick-start.md | 107 +++++++ docs/en/index.md | 97 +++++++ docs/en/reference/api-overview.md | 52 ++++ docs/en/reference/config-schema.md | 60 ++++ docs/en/troubleshooting.md | 108 +++++++ docs/en/tutorials/custom-style.md | 36 +++ docs/en/tutorials/voice-cloning.md | 35 +++ docs/en/tutorials/your-first-video.md | 33 +++ docs/en/user-guide/api.md | 42 +++ docs/en/user-guide/templates.md | 48 ++++ docs/en/user-guide/web-ui.md | 77 +++++ docs/en/user-guide/workflows.md | 37 +++ docs/gallery/index.md | 78 ++++++ docs/gallery/reading-habit/prompts.txt | 1 + docs/stylesheets/extra.css | 17 ++ docs/zh/development/architecture.md | 54 ++++ docs/zh/development/contributing.md | 50 ++++ docs/zh/faq.md | 78 ++++++ docs/zh/gallery/index.md | 45 +++ docs/zh/getting-started/configuration.md | 60 ++++ docs/zh/getting-started/installation.md | 115 ++++++++ docs/zh/getting-started/quick-start.md | 107 +++++++ docs/zh/index.md | 97 +++++++ docs/zh/reference/api-overview.md | 52 ++++ docs/zh/reference/config-schema.md | 60 ++++ docs/zh/troubleshooting.md | 108 +++++++ docs/zh/tutorials/custom-style.md | 36 +++ docs/zh/tutorials/voice-cloning.md | 35 +++ docs/zh/tutorials/your-first-video.md | 33 +++ docs/zh/user-guide/api.md | 42 +++ docs/zh/user-guide/templates.md | 48 ++++ docs/zh/user-guide/web-ui.md | 77 +++++ docs/zh/user-guide/workflows.md | 37 +++ mkdocs.yml | 171 +++++++++++ pixelle_video/cli.py | 38 --- 43 files changed, 2567 insertions(+), 406 deletions(-) delete mode 100644 docs/capabilities-guide.md create mode 100644 docs/en/development/architecture.md create mode 100644 docs/en/development/contributing.md create mode 100644 docs/en/faq.md create mode 100644 docs/en/gallery/index.md create mode 100644 docs/en/getting-started/configuration.md create mode 100644 docs/en/getting-started/installation.md create mode 100644 docs/en/getting-started/quick-start.md create mode 100644 docs/en/index.md create mode 100644 docs/en/reference/api-overview.md create mode 100644 docs/en/reference/config-schema.md create mode 100644 docs/en/troubleshooting.md create mode 100644 docs/en/tutorials/custom-style.md create mode 100644 docs/en/tutorials/voice-cloning.md create mode 100644 docs/en/tutorials/your-first-video.md create mode 100644 docs/en/user-guide/api.md create mode 100644 docs/en/user-guide/templates.md create mode 100644 docs/en/user-guide/web-ui.md create mode 100644 docs/en/user-guide/workflows.md create mode 100644 docs/gallery/index.md create mode 100644 docs/gallery/reading-habit/prompts.txt create mode 100644 docs/stylesheets/extra.css create mode 100644 docs/zh/development/architecture.md create mode 100644 docs/zh/development/contributing.md create mode 100644 docs/zh/faq.md create mode 100644 docs/zh/gallery/index.md create mode 100644 docs/zh/getting-started/configuration.md create mode 100644 docs/zh/getting-started/installation.md create mode 100644 docs/zh/getting-started/quick-start.md create mode 100644 docs/zh/index.md create mode 100644 docs/zh/reference/api-overview.md create mode 100644 docs/zh/reference/config-schema.md create mode 100644 docs/zh/troubleshooting.md create mode 100644 docs/zh/tutorials/custom-style.md create mode 100644 docs/zh/tutorials/voice-cloning.md create mode 100644 docs/zh/tutorials/your-first-video.md create mode 100644 docs/zh/user-guide/api.md create mode 100644 docs/zh/user-guide/templates.md create mode 100644 docs/zh/user-guide/web-ui.md create mode 100644 docs/zh/user-guide/workflows.md create mode 100644 mkdocs.yml delete mode 100644 pixelle_video/cli.py diff --git a/README.md b/README.md index cc9a0bd..8aa0607 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ - ✅ **全自动生成** - 输入主题,3 分钟自动生成完整视频 - ✅ **AI 智能文案** - 根据主题智能创作解说词,无需自己写脚本 - ✅ **AI 生成配图** - 每句话都配上精美的 AI 插图 -- ✅ **真人语音** - 100+ 种真人声音可选,告别机械音 +- ✅ **AI 生成语音** - 支持 Edge-TTS、Index-TTS 等众多主流 TTS 方案 - ✅ **背景音乐** - 支持添加 BGM,让视频更有氛围 - ✅ **视觉风格** - 多种模板可选,打造独特视频风格 - ✅ **灵活尺寸** - 支持竖屏、横屏等多种视频尺寸 @@ -123,16 +123,7 @@ uv run streamlit run web/app.py - **固定文案内容**: 直接输入完整文案,跳过 AI 创作 - 适合:已有现成文案,直接生成视频 ---- - -### 🎵 音频设置(左侧栏) - -#### 语音选择 -- 从下拉菜单选择解说声音 -- 提供 4 种精选声音(男声/女声、专业/年轻) -- 点击「试听语音」可以预览效果 - -#### 背景音乐 +#### 背景音乐(BGM) - **无 BGM**: 纯人声解说 - **内置音乐**: 选择预置的背景音乐(如 default.mp3) - **自定义音乐**: 将你的音乐文件(MP3/WAV 等)放到 `bgm/` 文件夹 @@ -140,16 +131,40 @@ uv run streamlit run web/app.py --- +### 🎤 语音设置(中间栏) + +#### TTS 工作流 +- 从下拉菜单选择 TTS 工作流(支持 Edge-TTS、Index-TTS 等) +- 系统会自动扫描 `workflows/` 文件夹中的 TTS 工作流 +- 如果懂 ComfyUI,可以自定义 TTS 工作流 + +#### 参考音频(可选) +- 上传参考音频文件用于声音克隆(支持 MP3/WAV/FLAC 等格式) +- 适用于支持声音克隆的 TTS 工作流(如 Index-TTS) +- 上传后可以直接试听 + +#### 预览功能 +- 输入测试文本,点击「预览语音」即可试听效果 +- 支持使用参考音频进行预览 + +--- + ### 🎨 视觉设置(中间栏) -#### 视觉风格 +#### 图像生成 决定 AI 生成什么风格的配图。 **ComfyUI 工作流** -- 选择图像生成的工作流文件 +- 从下拉菜单选择图像生成工作流 +- 支持本地部署(selfhost)和云端(RunningHub)工作流 - 默认使用 `image_flux.json` - 如果懂 ComfyUI,可以放自己的工作流到 `workflows/` 文件夹 +**图像尺寸** +- 设置生成图像的宽度和高度(单位:像素) +- 默认 1024x1024,可根据需要调整 +- 注意:不同的模型对尺寸有不同的限制 + **提示词前缀(Prompt Prefix)** - 控制图像的整体风格(语言需要是英文的) - 例如:Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style @@ -158,7 +173,7 @@ uv run streamlit run web/app.py #### 视频模板 决定视频画面的布局和设计。 -- 从下拉菜单选择模板(default.html、modern.html、classic.html 等) +- 从下拉菜单选择模板,按尺寸分组显示(竖屏/横屏/方形) - 点击「预览模板」可以自定义参数测试效果 - 如果懂 HTML,可以在 `templates/` 文件夹创建自己的模板 @@ -190,9 +205,9 @@ A: 生成一个 3 段视频大约需要 2-5 分钟,取决于你的网络和 AI **Q: 视频效果不满意怎么办?** A: 可以尝试: 1. 更换 LLM 模型(不同模型文案风格不同) -2. 调整提示词前缀(改变配图风格) -3. 更换语音(不同声音适合不同内容) -4. 尝试不同的视频模板 +2. 调整图像尺寸和提示词前缀(改变配图风格) +3. 更换 TTS 工作流或上传参考音频(改变语音效果) +4. 尝试不同的视频模板和尺寸 **Q: 费用大概多少?** A: **本项目完全支持免费运行!** @@ -237,11 +252,3 @@ Pixelle-Video 的设计受到以下优秀开源项目的启发: [![Star History Chart](https://api.star-history.com/svg?repos=PixelleLab/Pixelle-Video&type=Date)](https://star-history.com/#PixelleLab/Pixelle-Video&Date) ---- - -
-

Made with ❤️ by PixelleLab

-

- 回到顶部 ⬆️ -

-
diff --git a/docs/capabilities-guide.md b/docs/capabilities-guide.md deleted file mode 100644 index ba6f4c5..0000000 --- a/docs/capabilities-guide.md +++ /dev/null @@ -1,343 +0,0 @@ -# Pixelle-Video Capabilities Guide - -> Complete guide to using LLM, TTS, and Image generation capabilities - -## Overview - -Pixelle-Video provides three core AI capabilities: -- **LLM**: Text generation using LiteLLM (supports 100+ models) -- **TTS**: Text-to-speech using Edge TTS (free, 400+ voices) -- **Image**: Image generation using ComfyKit (local or cloud) - -## Quick Start - -```python -from pixelle_video.service import pixelle_video - -# LLM - Generate text -answer = await pixelle_video.llm("Summarize 'Atomic Habits' in 3 sentences") - -# TTS - Generate speech -audio_path = await pixelle_video.tts("Hello, world!") - -# Image - Generate images -image_url = await pixelle_video.image( - workflow="workflows/book_cover_simple.json", - prompt="minimalist book cover design" -) -``` - ---- - -## 1. LLM (Large Language Model) - -### Configuration - -Edit `config.yaml`: - -```yaml -llm: - default: qwen # Choose: qwen, openai, deepseek, ollama - - qwen: - api_key: "your-dashscope-api-key" - base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1" - model: "openai/qwen-max" - - openai: - api_key: "your-openai-api-key" - model: "gpt-4" - - deepseek: - api_key: "your-deepseek-api-key" - base_url: "https://api.deepseek.com" - model: "openai/deepseek-chat" - - ollama: - base_url: "http://localhost:11434" - model: "ollama/llama3.2" -``` - -### Usage - -```python -# Basic usage -answer = await pixelle_video.llm("What is machine learning?") - -# With parameters -answer = await pixelle_video.llm( - prompt="Explain atomic habits", - temperature=0.7, # 0.0-2.0 (lower = more deterministic) - max_tokens=2000 -) -``` - -### Environment Variables (Alternative) - -Instead of `config.yaml`, you can use environment variables: - -```bash -# Qwen -export DASHSCOPE_API_KEY="your-key" - -# OpenAI -export OPENAI_API_KEY="your-key" - -# DeepSeek -export DEEPSEEK_API_KEY="your-key" -``` - ---- - -## 2. TTS (Text-to-Speech) - -### Configuration - -Edit `config.yaml`: - -```yaml -tts: - default: edge - - edge: - # No configuration needed - free to use! -``` - -### Usage - -```python -# Basic usage (auto-generates temp path) -audio_path = await pixelle_video.tts("Hello, world!") -# Returns: "temp/abc123def456.mp3" - -# With Chinese text -audio_path = await pixelle_video.tts( - text="你好,世界!", - voice="zh-CN-YunjianNeural" -) - -# With custom parameters -audio_path = await pixelle_video.tts( - text="Welcome to Pixelle-Video", - voice="en-US-JennyNeural", - rate="+20%", # Speed: +50% = faster, -20% = slower - volume="+0%", - pitch="+0Hz" -) - -# Specify output path -audio_path = await pixelle_video.tts( - text="Hello", - output_path="output/greeting.mp3" -) -``` - -### Popular Voices - -**Chinese:** -- `zh-CN-YunjianNeural` (male, default) -- `zh-CN-XiaoxiaoNeural` (female) -- `zh-CN-YunxiNeural` (male) -- `zh-CN-XiaoyiNeural` (female) - -**English:** -- `en-US-JennyNeural` (female) -- `en-US-GuyNeural` (male) -- `en-GB-SoniaNeural` (female, British) - -### List All Voices - -```python -# Get all available voices -voices = await pixelle_video.tts.list_voices() - -# Get Chinese voices only -voices = await pixelle_video.tts.list_voices(locale="zh-CN") - -# Get English voices only -voices = await pixelle_video.tts.list_voices(locale="en-US") -``` - ---- - -## 3. Image Generation - -### Configuration - -Edit `config.yaml`: - -```yaml -image: - default: comfykit - - comfykit: - # Local ComfyUI (optional, default: http://127.0.0.1:8188) - comfyui_url: "http://127.0.0.1:8188" - - # RunningHub cloud (optional) - runninghub_api_key: "rh-key-xxx" -``` - -### Usage - -```python -# Basic usage (local ComfyUI) -image_url = await pixelle_video.image( - workflow="workflows/book_cover_simple.json", - prompt="minimalist book cover design, blue and white" -) - -# With full parameters -image_url = await pixelle_video.image( - workflow="workflows/book_cover_simple.json", - prompt="book cover for 'Atomic Habits', professional, minimalist", - negative_prompt="ugly, blurry, low quality", - width=1024, - height=1536, - steps=20, - seed=42 -) - -# Using RunningHub cloud -image_url = await pixelle_video.image( - workflow="12345", # RunningHub workflow ID - prompt="a beautiful landscape" -) - -# Check available workflows -workflows = pixelle_video.image.list_workflows() -print(f"Available workflows: {workflows}") -``` - -### Environment Variables (Alternative) - -```bash -# Local ComfyUI -export COMFYUI_BASE_URL="http://127.0.0.1:8188" - -# RunningHub cloud -export RUNNINGHUB_API_KEY="rh-key-xxx" -``` - -### Workflow DSL - -Pixelle-Video uses ComfyKit's DSL for workflow parameters: - -```json -{ - "6": { - "class_type": "CLIPTextEncode", - "_meta": { - "title": "$prompt!" - }, - "inputs": { - "text": "default prompt", - "clip": ["4", 1] - } - } -} -``` - -**DSL Markers:** -- `$param!` - Required parameter -- `$param` - Optional parameter -- `$param~` - Upload parameter (for images/audio/video) -- `$output.name` - Output variable - ---- - -## Combined Workflow Example - -Generate a complete book cover with narration: - -```python -import asyncio -from pixelle_video.service import pixelle_video - -async def create_book_content(book_title, author): - """Generate book summary, audio, and cover image""" - - # 1. Generate book summary with LLM - summary = await pixelle_video.llm( - prompt=f"Write a compelling 2-sentence summary for a book titled '{book_title}' by {author}", - max_tokens=100 - ) - print(f"Summary: {summary}") - - # 2. Generate audio narration with TTS - audio_path = await pixelle_video.tts( - text=summary, - voice="en-US-JennyNeural" - ) - print(f"Audio: {audio_path}") - - # 3. Generate book cover image - image_url = await pixelle_video.image( - workflow="workflows/book_cover_simple.json", - prompt=f"book cover for '{book_title}' by {author}, professional, modern design", - width=1024, - height=1536 - ) - print(f"Cover: {image_url}") - - return { - "summary": summary, - "audio": audio_path, - "cover": image_url - } - -# Run -result = asyncio.run(create_book_content("Atomic Habits", "James Clear")) -``` - ---- - -## Troubleshooting - -### LLM Issues - -**"API key not found"** -- Make sure you've set the API key in `config.yaml` or environment variables -- For Qwen: `DASHSCOPE_API_KEY` -- For OpenAI: `OPENAI_API_KEY` -- For DeepSeek: `DEEPSEEK_API_KEY` - -**"Connection error"** -- Check `base_url` in config -- Verify API endpoint is accessible -- For Ollama, make sure server is running (`ollama serve`) - -### TTS Issues - -**"SSL error"** -- Edge TTS is free but requires internet connection -- SSL verification is disabled by default for development - -### Image Issues - -**"ComfyUI connection refused"** -- Make sure ComfyUI is running at http://127.0.0.1:8188 -- Or configure RunningHub API key for cloud execution - -**"Workflow file not found"** -- Check workflow path is correct -- Use relative path from project root: `workflows/your_workflow.json` - -**"No images generated"** -- Check workflow has `SaveImage` node -- Verify workflow parameters are correct -- Check ComfyUI logs for errors - ---- - -## Next Steps - -- See `/examples/` directory for complete examples -- Run `python test_integration.py` to test all capabilities -- Create custom workflows in `/workflows/` directory -- Check ComfyKit documentation: https://puke3615.github.io/ComfyKit - ---- - -**Happy creating with Pixelle-Video!** 📚🎬 - diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md new file mode 100644 index 0000000..9dc2cd6 --- /dev/null +++ b/docs/en/development/architecture.md @@ -0,0 +1,54 @@ +# Architecture + +Technical architecture overview of Pixelle-Video. + +--- + +## Core Architecture + +Pixelle-Video uses a layered architecture design: + +- **Web Layer**: Streamlit Web interface +- **Service Layer**: Core business logic +- **ComfyUI Layer**: Image and TTS generation + +--- + +## Main Components + +### PixelleVideoCore + +Core service class coordinating all sub-services. + +### LLM Service + +Responsible for calling large language models to generate scripts. + +### Image Service + +Responsible for calling ComfyUI to generate images. + +### TTS Service + +Responsible for calling ComfyUI to generate speech. + +### Video Generator + +Responsible for composing the final video. + +--- + +## Tech Stack + +- **Backend**: Python 3.10+, AsyncIO +- **Web**: Streamlit +- **AI**: OpenAI API, ComfyUI +- **Configuration**: YAML +- **Tools**: uv (package management) + +--- + +## More Information + +Detailed architecture documentation coming soon. + diff --git a/docs/en/development/contributing.md b/docs/en/development/contributing.md new file mode 100644 index 0000000..bef2f18 --- /dev/null +++ b/docs/en/development/contributing.md @@ -0,0 +1,50 @@ +# Contributing + +Thank you for your interest in contributing to Pixelle-Video! + +--- + +## How to Contribute + +1. Fork the repository +2. Create a feature branch (`git checkout -b feature/AmazingFeature`) +3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) +4. Push to the branch (`git push origin feature/AmazingFeature`) +5. Open a Pull Request + +--- + +## Development Setup + +```bash +# Clone your fork +git clone https://github.com/your-username/Pixelle-Video.git +cd Pixelle-Video + +# Install development dependencies +uv sync + +# Run tests +pytest +``` + +--- + +## Code Standards + +- All code and comments in English +- Follow PEP 8 standards +- Add appropriate tests + +--- + +## Submit Issues + +Having problems or feature suggestions? Please submit at [GitHub Issues](https://github.com/PixelleLab/Pixelle-Video/issues). + +--- + +## Code of Conduct + +Please be friendly and respectful. We are committed to fostering an inclusive community environment. + diff --git a/docs/en/faq.md b/docs/en/faq.md new file mode 100644 index 0000000..9c8a722 --- /dev/null +++ b/docs/en/faq.md @@ -0,0 +1,78 @@ +# FAQ + +Frequently Asked Questions. + +--- + +## Installation + +### Q: How to install uv? + +```bash +curl -LsSf https://astral.sh/uv/install.sh | sh +``` + +### Q: Can I use something other than uv? + +Yes, you can use traditional pip + venv approach. + +--- + +## Configuration + +### Q: Do I need to configure ComfyUI? + +Not necessarily. You can use RunningHub cloud service without local deployment. + +### Q: Which LLMs are supported? + +All OpenAI-compatible LLMs, including: +- Qianwen +- GPT-4o +- DeepSeek +- Ollama (local) + +--- + +## Usage + +### Q: How long does first-time usage take? + +Generating a 3-5 scene video takes approximately 2-5 minutes. + +### Q: What if I'm not satisfied with the video? + +Try: +1. Change LLM model +2. Adjust image dimensions and prompt prefix +3. Change TTS workflow +4. Try different video templates + +### Q: What are the costs? + +- **Completely Free**: Ollama + Local ComfyUI = $0 +- **Recommended**: Qianwen + Local ComfyUI ≈ $0.01-0.05/video +- **Cloud Solution**: OpenAI + RunningHub (higher cost) + +--- + +## Troubleshooting + +### Q: ComfyUI connection failed + +1. Confirm ComfyUI is running +2. Check if URL is correct +3. Click "Test Connection" in Web interface + +### Q: LLM API call failed + +1. Check if API Key is correct +2. Check network connection +3. Review error messages + +--- + +## Other Questions + +Have other questions? Check [Troubleshooting](troubleshooting.md) or submit an [Issue](https://github.com/PixelleLab/Pixelle-Video/issues). + diff --git a/docs/en/gallery/index.md b/docs/en/gallery/index.md new file mode 100644 index 0000000..36e4c04 --- /dev/null +++ b/docs/en/gallery/index.md @@ -0,0 +1,45 @@ +# 🎬 Video Gallery + +Showcase of videos created with Pixelle-Video. Click on cards to view complete workflows and configuration files. + +--- + +
+ +- **Reading Habit** + + --- + + + + [:octicons-mark-github-16: View Workflows & Config](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/reading-habit) + +- **Work Efficiency** + + --- + + + + [:octicons-mark-github-16: View Workflows & Config](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/work-efficiency) + +- **Healthy Diet** + + --- + + + + [:octicons-mark-github-16: View Workflows & Config](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/healthy-diet) + +
+ +--- + +!!! tip "How to Use" + Click on a case card to jump to GitHub, download workflow files and configuration, and reproduce the video effect with one click. + diff --git a/docs/en/getting-started/configuration.md b/docs/en/getting-started/configuration.md new file mode 100644 index 0000000..bf37a14 --- /dev/null +++ b/docs/en/getting-started/configuration.md @@ -0,0 +1,60 @@ +# Configuration + +After installation, you need to configure services to use Pixelle-Video. + +--- + +## LLM Configuration + +LLM (Large Language Model) is used to generate video scripts. + +### Quick Preset Selection + +1. Select a preset model from the dropdown: + - Qianwen (recommended, great value) + - GPT-4o + - DeepSeek + - Ollama (local, completely free) + +2. The system will auto-fill `base_url` and `model` + +3. Click「🔑 Get API Key」to register and obtain credentials + +4. Enter your API Key + +--- + +## Image Configuration + +Two options available: + +### Local Deployment (Recommended) + +Using local ComfyUI service: + +1. Install and start ComfyUI +2. Enter ComfyUI URL (default `http://127.0.0.1:8188`) +3. Click "Test Connection" to verify + +### Cloud Deployment + +Using RunningHub cloud service: + +1. Register for a RunningHub account +2. Obtain API Key +3. Enter API Key in configuration + +--- + +## Save Configuration + +After filling in all required configuration, click the "Save Configuration" button. + +Configuration will be saved to `config.yaml` file. + +--- + +## Next Steps + +- [Quick Start](quick-start.md) - Create your first video + diff --git a/docs/en/getting-started/installation.md b/docs/en/getting-started/installation.md new file mode 100644 index 0000000..8c38d5f --- /dev/null +++ b/docs/en/getting-started/installation.md @@ -0,0 +1,115 @@ +# Installation + +This page will guide you through installing Pixelle-Video. + +--- + +## System Requirements + +### Required + +- **Python**: 3.10 or higher +- **Operating System**: Windows, macOS, or Linux +- **Package Manager**: uv (recommended) or pip + +### Optional + +- **GPU**: NVIDIA GPU with 6GB+ VRAM recommended for local ComfyUI +- **Network**: Stable internet connection for LLM API and image generation services + +--- + +## Installation Steps + +### Step 1: Clone the Repository + +```bash +git clone https://github.com/PixelleLab/Pixelle-Video.git +cd Pixelle-Video +``` + +### Step 2: Install Dependencies + +!!! tip "Recommended: Use uv" + This project uses `uv` as the package manager, which is faster and more reliable than traditional pip. + +#### Using uv (Recommended) + +```bash +# Install uv if you haven't already +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Install project dependencies (uv will create a virtual environment automatically) +uv sync +``` + +#### Using pip + +```bash +# Create virtual environment +python -m venv venv + +# Activate virtual environment +# Windows: +venv\Scripts\activate +# macOS/Linux: +source venv/bin/activate + +# Install dependencies +pip install -e . +``` + +--- + +## Verify Installation + +Run the following command to verify the installation: + +```bash +# Using uv +uv run streamlit run web/app.py + +# Or using pip (activate virtual environment first) +streamlit run web/app.py +``` + +Your browser should automatically open `http://localhost:8501` and display the Pixelle-Video web interface. + +!!! success "Installation Successful!" + If you can see the web interface, the installation was successful! Next, check out the [Configuration Guide](configuration.md) to set up your services. + +--- + +## Optional: Install ComfyUI (Local Deployment) + +If you want to run image generation locally, you'll need to install ComfyUI: + +### Quick Install + +```bash +# Clone ComfyUI +git clone https://github.com/comfyanonymous/ComfyUI.git +cd ComfyUI + +# Install dependencies +pip install -r requirements.txt +``` + +### Start ComfyUI + +```bash +python main.py +``` + +ComfyUI runs on `http://127.0.0.1:8188` by default. + +!!! info "ComfyUI Models" + ComfyUI requires downloading model files to work. Please refer to the [ComfyUI documentation](https://github.com/comfyanonymous/ComfyUI) for information on downloading and configuring models. + +--- + +## Next Steps + +- [Configuration](configuration.md) - Configure LLM and image generation services +- [Quick Start](quick-start.md) - Create your first video + diff --git a/docs/en/getting-started/quick-start.md b/docs/en/getting-started/quick-start.md new file mode 100644 index 0000000..7ba8a52 --- /dev/null +++ b/docs/en/getting-started/quick-start.md @@ -0,0 +1,107 @@ +# Quick Start + +Already installed and configured? Let's create your first video! + +--- + +## Start the Web Interface + +```bash +# Using uv +uv run streamlit run web/app.py +``` + +Your browser will automatically open `http://localhost:8501` + +--- + +## Create Your First Video + +### Step 1: Check Configuration + +On first use, expand the「⚙️ System Configuration」panel and confirm: + +- **LLM Configuration**: Select an AI model (e.g., Qianwen, GPT) and enter API Key +- **Image Configuration**: Configure ComfyUI address or RunningHub API Key + +If not yet configured, see the [Configuration Guide](configuration.md). + +Click "Save Configuration" when done. + +--- + +### Step 2: Enter a Topic + +In the left panel's「📝 Content Input」section: + +1. Select「**AI Generate Content**」mode +2. Enter a topic in the text box, for example: + ``` + Why develop a reading habit + ``` +3. (Optional) Set number of scenes, default is 5 frames + +!!! tip "Topic Examples" + - Why develop a reading habit + - How to improve work efficiency + - The importance of healthy eating + - The meaning of travel + +--- + +### Step 3: Configure Voice and Visuals + +In the middle panel: + +**Voice Settings** +- Select TTS workflow (default Edge-TTS works well) +- For voice cloning, upload a reference audio file + +**Visual Settings** +- Select image generation workflow (default works well) +- Set image dimensions (default 1024x1024) +- Choose video template (recommend portrait 1080x1920) + +--- + +### Step 4: Generate Video + +Click the「🎬 Generate Video」button in the right panel! + +The system will show real-time progress: +- Generate script +- Generate images (for each scene) +- Synthesize voice +- Compose video + +!!! info "Generation Time" + Generating a 5-scene video takes about 2-5 minutes, depending on: LLM API response speed, image generation speed, TTS workflow type, and network conditions + +--- + +### Step 5: Preview Video + +Once complete, the video will automatically play in the right panel! + +You'll see: +- 📹 Video preview player +- ⏱️ Video duration +- 📦 File size +- 🎬 Number of scenes +- 📐 Video dimensions + +The video file is saved in the `output/` folder. + +--- + +## Next Steps + +Congratulations! You've successfully created your first video 🎉 + +Next, you can: + +- **Adjust Styles** - See the [Custom Visual Style](../tutorials/custom-style.md) tutorial +- **Clone Voices** - See the [Voice Cloning with Reference Audio](../tutorials/voice-cloning.md) tutorial +- **Use API** - See the [API Usage Guide](../user-guide/api.md) +- **Develop Templates** - See the [Template Development Guide](../user-guide/templates.md) + diff --git a/docs/en/index.md b/docs/en/index.md new file mode 100644 index 0000000..6f12a61 --- /dev/null +++ b/docs/en/index.md @@ -0,0 +1,97 @@ +# Pixelle-Video 🎬 + +
+ +**AI Video Creator - Generate a short video in 3 minutes** + +[![Stars](https://img.shields.io/github/stars/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/stargazers) +[![Issues](https://img.shields.io/github/issues/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/issues) +[![License](https://img.shields.io/github/license/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/blob/main/LICENSE) + +
+ +--- + +## 🎯 Overview + +Simply input a **topic**, and Pixelle-Video will automatically: + +- ✍️ Write video scripts +- 🎨 Generate AI images +- 🗣️ Synthesize voice narration +- 🎵 Add background music +- 🎬 Create the final video + +**No barriers, no video editing experience required** - turn video creation into a one-line task! + +--- + +## ✨ Features + +- ✅ **Fully Automated** - Input a topic, get a complete video in 3 minutes +- ✅ **AI-Powered Scripts** - Intelligently create narration based on your topic +- ✅ **AI-Generated Images** - Each sentence comes with beautiful AI illustrations +- ✅ **AI Voice Synthesis** - Support for Edge-TTS, Index-TTS and more mainstream TTS solutions +- ✅ **Background Music** - Add BGM for enhanced atmosphere +- ✅ **Visual Styles** - Multiple templates to create unique video styles +- ✅ **Flexible Dimensions** - Support for portrait, landscape and more video sizes +- ✅ **Multiple AI Models** - Support for GPT, Qianwen, DeepSeek, Ollama, etc. +- ✅ **Flexible Composition** - Based on ComfyUI architecture, use preset workflows or customize any capability + +--- + +## 🎬 Video Examples + +!!! info "Sample Videos" + Coming soon: Video examples will be added here + +--- + +## 🚀 Quick Start + +Ready to get started? Just three steps: + +1. **[Install Pixelle-Video](getting-started/installation.md)** - Download and install the project +2. **[Configure Services](getting-started/configuration.md)** - Set up LLM and image generation services +3. **[Create Your First Video](getting-started/quick-start.md)** - Start creating your first video + +--- + +## 💰 Pricing + +!!! success "Completely free to run!" + + - **Completely Free**: Use Ollama (local) + Local ComfyUI = $0 + - **Recommended**: Use Qianwen LLM (≈$0.01-0.05 per 3-scene video) + Local ComfyUI + - **Cloud Solution**: Use OpenAI + RunningHub (higher cost but no local setup required) + + **Recommendation**: If you have a local GPU, go with the completely free solution. Otherwise, we recommend Qianwen for best value. + +--- + +## 🤝 Acknowledgments + +Pixelle-Video was inspired by the following excellent open source projects: + +- [Pixelle-MCP](https://github.com/AIDC-AI/Pixelle-MCP) - ComfyUI MCP server +- [MoneyPrinterTurbo](https://github.com/harry0703/MoneyPrinterTurbo) - Excellent video generation tool +- [NarratoAI](https://github.com/linyqh/NarratoAI) - Video narration automation tool +- [MoneyPrinterPlus](https://github.com/ddean2009/MoneyPrinterPlus) - Video creation platform +- [ComfyKit](https://github.com/puke3615/ComfyKit) - ComfyUI workflow wrapper library + +Thanks to these projects for their open source spirit! 🙏 + +--- + +## 📢 Feedback & Support + +- 🐛 **Found a bug**: Submit an [Issue](https://github.com/PixelleLab/Pixelle-Video/issues) +- 💡 **Feature request**: Submit a [Feature Request](https://github.com/PixelleLab/Pixelle-Video/issues) +- ⭐ **Give us a Star**: If this project helps you, please give us a star! + +--- + +## 📝 License + +This project is licensed under the MIT License. See the [LICENSE](https://github.com/PixelleLab/Pixelle-Video/blob/main/LICENSE) file for details. + diff --git a/docs/en/reference/api-overview.md b/docs/en/reference/api-overview.md new file mode 100644 index 0000000..057bb2a --- /dev/null +++ b/docs/en/reference/api-overview.md @@ -0,0 +1,52 @@ +# API Overview + +Pixelle-Video Python API reference documentation. + +--- + +## Core Classes + +### PixelleVideoCore + +Main service class providing video generation functionality. + +```python +from pixelle_video.service import PixelleVideoCore + +pixelle = PixelleVideoCore() +await pixelle.initialize() +``` + +--- + +## Main Methods + +### generate_video() + +Primary method for generating videos. + +**Parameters**: + +- `text` (str): Topic or complete script +- `mode` (str): Generation mode ("generate" or "fixed") +- `n_scenes` (int): Number of scenes +- `title` (str, optional): Video title +- `tts_workflow` (str): TTS workflow +- `image_workflow` (str): Image generation workflow +- `frame_template` (str): Video template +- `bgm_path` (str, optional): BGM file path + +**Returns**: `VideoResult` object + +--- + +## Examples + +Check the `examples/` directory for more examples. + +--- + +## More Information + +Detailed API documentation coming soon. + diff --git a/docs/en/reference/config-schema.md b/docs/en/reference/config-schema.md new file mode 100644 index 0000000..32ac8a3 --- /dev/null +++ b/docs/en/reference/config-schema.md @@ -0,0 +1,60 @@ +# Config Schema + +Detailed explanation of the `config.yaml` configuration file. + +--- + +## Configuration Structure + +```yaml +llm: + provider: openai + api_key: "your-api-key" + base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1" + model: "qwen-plus" + +comfyui: + comfyui_url: "http://127.0.0.1:8188" + runninghub_api_key: "" + + image: + default_workflow: "runninghub/image_flux.json" + prompt_prefix: "Minimalist illustration style" + + tts: + default_workflow: "selfhost/tts_edge.json" +``` + +--- + +## LLM Configuration + +- `provider`: Provider (currently only supports openai-compatible interfaces) +- `api_key`: API key +- `base_url`: API service address +- `model`: Model name + +--- + +## ComfyUI Configuration + +### Basic Configuration + +- `comfyui_url`: Local ComfyUI address +- `runninghub_api_key`: RunningHub API key (optional) + +### Image Configuration + +- `default_workflow`: Default image generation workflow +- `prompt_prefix`: Prompt prefix + +### TTS Configuration + +- `default_workflow`: Default TTS workflow + +--- + +## More Information + +The configuration file is automatically created on first run. + diff --git a/docs/en/troubleshooting.md b/docs/en/troubleshooting.md new file mode 100644 index 0000000..1d74164 --- /dev/null +++ b/docs/en/troubleshooting.md @@ -0,0 +1,108 @@ +# Troubleshooting + +Having issues? Here are solutions to common problems. + +--- + +## Installation Issues + +### Dependency installation failed + +```bash +# Clean cache +uv cache clean + +# Reinstall +uv sync +``` + +--- + +## Configuration Issues + +### ComfyUI connection failed + +**Possible Causes**: +- ComfyUI not running +- Incorrect URL configuration +- Firewall blocking + +**Solutions**: +1. Confirm ComfyUI is running +2. Check URL configuration (default `http://127.0.0.1:8188`) +3. Test by accessing ComfyUI address in browser +4. Check firewall settings + +### LLM API call failed + +**Possible Causes**: +- Incorrect API Key +- Network issues +- Insufficient balance + +**Solutions**: +1. Verify API Key is correct +2. Check network connection +3. Review error message details +4. Check account balance + +--- + +## Generation Issues + +### Video generation failed + +**Possible Causes**: +- Corrupted workflow file +- Models not downloaded +- Insufficient resources + +**Solutions**: +1. Check if workflow file exists +2. Confirm ComfyUI has downloaded required models +3. Check disk space and memory + +### Image generation failed + +**Solutions**: +1. Check if ComfyUI is running properly +2. Try manually testing workflow in ComfyUI +3. Check workflow configuration + +### TTS generation failed + +**Solutions**: +1. Check if TTS workflow is correct +2. If using voice cloning, check reference audio format +3. Review error logs + +--- + +## Performance Issues + +### Slow generation speed + +**Optimization Tips**: +1. Use local ComfyUI (faster than cloud) +2. Reduce number of scenes +3. Use faster LLM (e.g., Qianwen) +4. Check network connection + +--- + +## Other Issues + +Still having problems? + +1. Check project [GitHub Issues](https://github.com/PixelleLab/Pixelle-Video/issues) +2. Submit a new Issue describing your problem +3. Include error logs and configuration details for quick diagnosis + +--- + +## View Logs + +Log files are located in project root: +- `api_server.log` - API service logs +- `test_output.log` - Test logs + diff --git a/docs/en/tutorials/custom-style.md b/docs/en/tutorials/custom-style.md new file mode 100644 index 0000000..ac1fdd5 --- /dev/null +++ b/docs/en/tutorials/custom-style.md @@ -0,0 +1,36 @@ +# Custom Visual Style + +Learn how to adjust image generation parameters to create unique visual styles. + +--- + +## Adjust Prompt Prefix + +The prompt prefix controls overall visual style: + +``` +Minimalist black-and-white illustration, clean lines, simple style +``` + +--- + +## Adjust Image Dimensions + +Different dimensions for different scenarios: + +- **1024x1024**: Square, suitable for Xiaohongshu +- **1080x1920**: Portrait, suitable for TikTok, Kuaishou +- **1920x1080**: Landscape, suitable for Bilibili, YouTube + +--- + +## Preview Effects + +Use the "Preview Style" feature to test different configurations. + +--- + +## More Information + +More style customization tips coming soon. + diff --git a/docs/en/tutorials/voice-cloning.md b/docs/en/tutorials/voice-cloning.md new file mode 100644 index 0000000..9e2d2e9 --- /dev/null +++ b/docs/en/tutorials/voice-cloning.md @@ -0,0 +1,35 @@ +# Voice Cloning + +Use reference audio to implement voice cloning functionality. + +--- + +## Prepare Reference Audio + +1. Prepare a clear audio file (MP3/WAV/FLAC) +2. Recommended duration: 10-30 seconds +3. Avoid background noise + +--- + +## Usage Steps + +1. Select a TTS workflow that supports voice cloning (e.g., Index-TTS) in voice settings +2. Upload reference audio file +3. Test effects with "Preview Voice" +4. Generate video + +--- + +## Notes + +- Not all TTS workflows support voice cloning +- Reference audio quality affects cloning results +- Edge-TTS does not support voice cloning + +--- + +## More Information + +Detailed voice cloning tutorial coming soon. + diff --git a/docs/en/tutorials/your-first-video.md b/docs/en/tutorials/your-first-video.md new file mode 100644 index 0000000..53d39ac --- /dev/null +++ b/docs/en/tutorials/your-first-video.md @@ -0,0 +1,33 @@ +# Your First Video + +Step-by-step guide to creating your first video with Pixelle-Video. + +--- + +## Prerequisites + +Make sure you've completed: + +- ✅ [Installation](../getting-started/installation.md) +- ✅ [Configuration](../getting-started/configuration.md) + +--- + +## Tutorial Steps + +For detailed steps, see [Quick Start](../getting-started/quick-start.md). + +--- + +## Tips + +- Choose an appropriate topic for better results +- Start with 3-5 scenes for first generation +- Preview voice and image effects before generating + +--- + +## Troubleshooting + +Having issues? Check out [FAQ](../faq.md) or [Troubleshooting](../troubleshooting.md). + diff --git a/docs/en/user-guide/api.md b/docs/en/user-guide/api.md new file mode 100644 index 0000000..e39a0a8 --- /dev/null +++ b/docs/en/user-guide/api.md @@ -0,0 +1,42 @@ +# API Usage + +Pixelle-Video provides a complete Python API for easy integration into your projects. + +--- + +## Quick Start + +```python +from pixelle_video.service import PixelleVideoCore +import asyncio + +async def main(): + # Initialize + pixelle = PixelleVideoCore() + await pixelle.initialize() + + # Generate video + result = await pixelle.generate_video( + text="Why develop a reading habit", + mode="generate", + n_scenes=5 + ) + + print(f"Video generated: {result.video_path}") + +# Run +asyncio.run(main()) +``` + +--- + +## API Reference + +For detailed API documentation, see [API Overview](../reference/api-overview.md). + +--- + +## Examples + +For more usage examples, check the `examples/` directory in the project. + diff --git a/docs/en/user-guide/templates.md b/docs/en/user-guide/templates.md new file mode 100644 index 0000000..346749c --- /dev/null +++ b/docs/en/user-guide/templates.md @@ -0,0 +1,48 @@ +# Template Development + +How to create custom video templates. + +--- + +## Template Introduction + +Video templates use HTML to define the layout and style of video frames. + +--- + +## Template Structure + +Templates are located in the `templates/` directory, grouped by size: + +``` +templates/ +├── 1080x1920/ # Portrait +├── 1920x1080/ # Landscape +└── 1080x1080/ # Square +``` + +--- + +## Creating Templates + +1. Copy an existing template file +2. Modify HTML and CSS +3. Save to the corresponding size directory +4. Select and use in Web interface + +--- + +## Template Variables + +Templates support the following variables: + +- `{{ title }}` - Video title +- `{{ text }}` - Scene text +- `{{ image }}` - Scene image + +--- + +## More Information + +Detailed template development guide coming soon. + diff --git a/docs/en/user-guide/web-ui.md b/docs/en/user-guide/web-ui.md new file mode 100644 index 0000000..76846af --- /dev/null +++ b/docs/en/user-guide/web-ui.md @@ -0,0 +1,77 @@ +# Web UI Guide + +Detailed introduction to the Pixelle-Video Web interface features. + +--- + +## Interface Layout + +The Web interface uses a three-column layout: + +- **Left Panel**: Content input and audio settings +- **Middle Panel**: Voice and visual settings +- **Right Panel**: Video generation and preview + +--- + +## System Configuration + +First-time use requires configuring LLM and image generation services. See [Configuration Guide](../getting-started/configuration.md). + +--- + +## Content Input + +### Generation Mode + +- **AI Generate Content**: Enter a topic, AI creates script automatically +- **Fixed Script Content**: Enter complete script directly + +### Background Music + +- Built-in music supported +- Custom music files supported + +--- + +## Voice Settings + +### TTS Workflow + +- Select TTS workflow +- Supports Edge-TTS, Index-TTS, etc. + +### Reference Audio + +- Upload reference audio for voice cloning +- Supports MP3/WAV/FLAC formats + +--- + +## Visual Settings + +### Image Generation + +- Select image generation workflow +- Set image dimensions +- Adjust prompt prefix to control style + +### Video Template + +- Choose video template +- Supports portrait/landscape/square +- Preview template effects + +--- + +## Generate Video + +After clicking "Generate Video", the system will: + +1. Generate video script +2. Generate images for each scene +3. Synthesize voice narration +4. Compose final video + +Automatically previews when complete. + diff --git a/docs/en/user-guide/workflows.md b/docs/en/user-guide/workflows.md new file mode 100644 index 0000000..0538376 --- /dev/null +++ b/docs/en/user-guide/workflows.md @@ -0,0 +1,37 @@ +# Workflow Customization + +How to customize ComfyUI workflows to achieve specific functionality. + +--- + +## Workflow Introduction + +Pixelle-Video is built on the ComfyUI architecture and supports custom workflows. + +--- + +## Workflow Types + +### TTS Workflows + +Located in `workflows/selfhost/` or `workflows/runninghub/` + +### Image Generation Workflows + +Located in `workflows/selfhost/` or `workflows/runninghub/` + +--- + +## Custom Workflows + +1. Design your workflow in ComfyUI +2. Export as JSON file +3. Place in `workflows/` directory +4. Select and use in Web interface + +--- + +## More Information + +Detailed workflow customization guide coming soon. + diff --git a/docs/gallery/index.md b/docs/gallery/index.md new file mode 100644 index 0000000..015c9af --- /dev/null +++ b/docs/gallery/index.md @@ -0,0 +1,78 @@ +# 🎬 视频示例库 / Video Gallery + +展示使用 Pixelle-Video 制作的各类视频案例,包含完整的制作参数和资源下载。 + +Showcase of videos created with Pixelle-Video, including complete production parameters and downloadable resources. + +--- + +## 📚 案例列表 / Cases + +
+ +- :material-book-open-variant:{ .lg .middle } **阅读习惯养成** + + --- + + ![视频缩略图](https://via.placeholder.com/400x225?text=Reading+Habit) + + **时长 Duration**: 45s | **分镜 Scenes**: 5 | **尺寸 Size**: 1080x1920 + + 一个关于为什么要养成阅读习惯的教育科普视频。 + + An educational video about why we should develop reading habits. + + [:octicons-arrow-right-24: 查看详情 View Details](reading-habit/) + +- :material-chart-line:{ .lg .middle } **提高工作效率** + + --- + + ![视频缩略图](https://via.placeholder.com/400x225?text=Work+Efficiency) + + **时长 Duration**: 30s | **分镜 Scenes**: 3 | **尺寸 Size**: 1920x1080 + + 关于如何提高工作效率的实用技巧分享。 + + Practical tips on improving work efficiency. + + [:octicons-arrow-right-24: 查看详情 View Details](#) *(即将推出 Coming soon)* + +- :material-food-apple:{ .lg .middle } **健康饮食** + + --- + + ![视频缩略图](https://via.placeholder.com/400x225?text=Healthy+Diet) + + **时长 Duration**: 60s | **分镜 Scenes**: 6 | **尺寸 Size**: 1080x1080 + + 健康饮食的重要性和实用建议。 + + The importance of healthy eating and practical advice. + + [:octicons-arrow-right-24: 查看详情 View Details](#) *(即将推出 Coming soon)* + +
+ +--- + +## 🎯 如何使用这些案例 / How to Use + +每个案例都包含:/ Each case includes: + +- **📹 成品视频 Video**: OSS 托管的完整视频 / Complete video hosted on OSS +- **⚙️ 工作流文件 Workflows**: ComfyUI 工作流 JSON / ComfyUI workflow JSON files +- **📝 配置文件 Config**: 完整的生成配置 / Complete generation configuration +- **🎨 提示词 Prompts**: 所有使用的提示词 / All prompts used +- **📥 一键复现 Reproduce**: 可直接导入使用 / Can be imported directly + +--- + +## 💡 贡献你的案例 / Contribute Your Case + +制作了优秀的视频?欢迎分享!/ Created an awesome video? Share it with us! + +查看 [贡献指南](../en/development/contributing.md) 了解如何提交你的案例。 + +See [Contributing Guide](../en/development/contributing.md) to learn how to submit your case. + diff --git a/docs/gallery/reading-habit/prompts.txt b/docs/gallery/reading-habit/prompts.txt new file mode 100644 index 0000000..e4cf9c6 --- /dev/null +++ b/docs/gallery/reading-habit/prompts.txt @@ -0,0 +1 @@ +为什么要养成阅读习惯 \ No newline at end of file diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000..7055315 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,17 @@ +/* Custom styles for Pixelle-Video documentation */ + +:root { + --md-primary-fg-color: #5C6BC0; + --md-accent-fg-color: #FF4081; +} + +/* Better code block styling */ +.highlight pre { + border-radius: 4px; +} + +/* Admonition custom styling */ +.md-typeset .admonition { + border-radius: 4px; +} + diff --git a/docs/zh/development/architecture.md b/docs/zh/development/architecture.md new file mode 100644 index 0000000..c318796 --- /dev/null +++ b/docs/zh/development/architecture.md @@ -0,0 +1,54 @@ +# 架构设计 + +Pixelle-Video 的技术架构概览。 + +--- + +## 核心架构 + +Pixelle-Video 采用分层架构设计: + +- **Web 层**: Streamlit Web 界面 +- **服务层**: 核心业务逻辑 +- **ComfyUI 层**: 图像和TTS生成 + +--- + +## 主要组件 + +### PixelleVideoCore + +核心服务类,协调各个子服务。 + +### LLM Service + +负责调用大语言模型生成文案。 + +### Image Service + +负责调用 ComfyUI 生成图像。 + +### TTS Service + +负责调用 ComfyUI 生成语音。 + +### Video Generator + +负责合成最终视频。 + +--- + +## 技术栈 + +- **后端**: Python 3.10+, AsyncIO +- **Web**: Streamlit +- **AI**: OpenAI API, ComfyUI +- **配置**: YAML +- **工具**: uv (包管理) + +--- + +## 更多信息 + +详细的架构文档即将推出。 + diff --git a/docs/zh/development/contributing.md b/docs/zh/development/contributing.md new file mode 100644 index 0000000..5030270 --- /dev/null +++ b/docs/zh/development/contributing.md @@ -0,0 +1,50 @@ +# 贡献指南 + +感谢你对 Pixelle-Video 的贡献兴趣! + +--- + +## 如何贡献 + +1. Fork 项目仓库 +2. 创建功能分支 (`git checkout -b feature/AmazingFeature`) +3. 提交更改 (`git commit -m 'Add some AmazingFeature'`) +4. 推送到分支 (`git push origin feature/AmazingFeature`) +5. 开启 Pull Request + +--- + +## 开发设置 + +```bash +# 克隆你的 fork +git clone https://github.com/your-username/Pixelle-Video.git +cd Pixelle-Video + +# 安装开发依赖 +uv sync + +# 运行测试 +pytest +``` + +--- + +## 代码规范 + +- 所有代码和注释使用英文 +- 遵循 PEP 8 规范 +- 添加适当的测试 + +--- + +## 提交 Issue + +遇到问题或有功能建议?请在 [GitHub Issues](https://github.com/PixelleLab/Pixelle-Video/issues) 提交。 + +--- + +## 行为准则 + +请保持友好和尊重,我们致力于营造包容的社区环境。 + diff --git a/docs/zh/faq.md b/docs/zh/faq.md new file mode 100644 index 0000000..7253900 --- /dev/null +++ b/docs/zh/faq.md @@ -0,0 +1,78 @@ +# 常见问题 + +常见问题解答。 + +--- + +## 安装相关 + +### Q: 如何安装 uv? + +```bash +curl -LsSf https://astral.sh/uv/install.sh | sh +``` + +### Q: 可以不用 uv 吗? + +可以,你也可以使用传统的 pip + venv 方式。 + +--- + +## 配置相关 + +### Q: 必须要配置 ComfyUI 吗? + +不一定。你可以使用 RunningHub 云端服务,无需本地部署。 + +### Q: 支持哪些 LLM? + +支持所有 OpenAI 兼容接口的 LLM,包括: +- 通义千问 +- GPT-4o +- DeepSeek +- Ollama(本地) + +--- + +## 使用相关 + +### Q: 第一次使用需要多久? + +生成一个 3-5 分镜的视频大约需要 2-5 分钟。 + +### Q: 视频效果不满意怎么办? + +可以尝试: +1. 更换 LLM 模型 +2. 调整图像尺寸和提示词前缀 +3. 更换 TTS 工作流 +4. 尝试不同的视频模板 + +### Q: 费用大概多少? + +- **完全免费**: Ollama + 本地 ComfyUI = 0 元 +- **推荐方案**: 通义千问 + 本地 ComfyUI ≈ 0.01-0.05 元/视频 +- **云端方案**: OpenAI + RunningHub(费用较高) + +--- + +## 故障排查 + +### Q: ComfyUI 连接失败 + +1. 确认 ComfyUI 正在运行 +2. 检查 URL 是否正确 +3. 在 Web 界面点击「测试连接」 + +### Q: LLM API 调用失败 + +1. 检查 API Key 是否正确 +2. 检查网络连接 +3. 查看错误提示 + +--- + +## 其他问题 + +有其他问题?请查看 [故障排查](troubleshooting.md) 或提交 [Issue](https://github.com/PixelleLab/Pixelle-Video/issues)。 + diff --git a/docs/zh/gallery/index.md b/docs/zh/gallery/index.md new file mode 100644 index 0000000..04d8d12 --- /dev/null +++ b/docs/zh/gallery/index.md @@ -0,0 +1,45 @@ +# 🎬 视频示例库 + +展示使用 Pixelle-Video 制作的视频案例。点击卡片查看完整的工作流和配置文件。 + +--- + +
+ +- **阅读习惯养成** + + --- + + + + [:octicons-mark-github-16: 查看工作流和配置](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/reading-habit) + +- **工作效率提升** + + --- + + + + [:octicons-mark-github-16: 查看工作流和配置](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/work-efficiency) + +- **健康饮食** + + --- + + + + [:octicons-mark-github-16: 查看工作流和配置](https://github.com/PixelleLab/Pixelle-Video/tree/main/docs/gallery/healthy-diet) + +
+ +--- + +!!! tip "如何使用" + 点击案例卡片跳转到 GitHub,下载工作流文件和配置,即可一键复现视频效果。 + diff --git a/docs/zh/getting-started/configuration.md b/docs/zh/getting-started/configuration.md new file mode 100644 index 0000000..76a4875 --- /dev/null +++ b/docs/zh/getting-started/configuration.md @@ -0,0 +1,60 @@ +# 配置说明 + +完成安装后,需要配置服务才能使用 Pixelle-Video。 + +--- + +## LLM 配置 + +LLM(大语言模型)用于生成视频文案。 + +### 快速选择预设 + +1. 从下拉菜单选择预设模型: + - 通义千问(推荐,性价比高) + - GPT-4o + - DeepSeek + - Ollama(本地运行,完全免费) + +2. 系统会自动填充 `base_url` 和 `model` + +3. 点击「🔑 获取 API Key」链接,注册并获取密钥 + +4. 填入 API Key + +--- + +## 图像配置 + +支持两种方式: + +### 本地部署(推荐) + +使用本地 ComfyUI 服务: + +1. 安装并启动 ComfyUI +2. 填写 ComfyUI URL(默认 `http://127.0.0.1:8188`) +3. 点击「测试连接」确认服务可用 + +### 云端部署 + +使用 RunningHub 云端服务: + +1. 注册 RunningHub 账号 +2. 获取 API Key +3. 在配置中填写 API Key + +--- + +## 保存配置 + +填写完所有必需的配置后,点击「保存配置」按钮。 + +配置会保存到 `config.yaml` 文件中。 + +--- + +## 下一步 + +- [快速开始](quick-start.md) - 生成你的第一个视频 + diff --git a/docs/zh/getting-started/installation.md b/docs/zh/getting-started/installation.md new file mode 100644 index 0000000..c177237 --- /dev/null +++ b/docs/zh/getting-started/installation.md @@ -0,0 +1,115 @@ +# 安装 + +本页面将指导你完成 Pixelle-Video 的安装。 + +--- + +## 系统要求 + +### 必需条件 + +- **Python**: 3.10 或更高版本 +- **操作系统**: Windows、macOS 或 Linux +- **包管理器**: uv(推荐)或 pip + +### 可选条件 + +- **GPU**: 如需本地运行 ComfyUI,建议配备 NVIDIA 显卡(6GB+ 显存) +- **网络**: 稳定的网络连接(用于调用 LLM API 和图像生成服务) + +--- + +## 安装步骤 + +### 第一步:克隆项目 + +```bash +git clone https://github.com/PixelleLab/Pixelle-Video.git +cd Pixelle-Video +``` + +### 第二步:安装依赖 + +!!! tip "推荐使用 uv" + 本项目使用 `uv` 作为包管理器,它比传统的 pip 更快、更可靠。 + +#### 使用 uv(推荐) + +```bash +# 如果还没有安装 uv,先安装它 +curl -LsSf https://astral.sh/uv/install.sh | sh + +# 安装项目依赖(uv 会自动创建虚拟环境) +uv sync +``` + +#### 使用 pip + +```bash +# 创建虚拟环境 +python -m venv venv + +# 激活虚拟环境 +# Windows: +venv\Scripts\activate +# macOS/Linux: +source venv/bin/activate + +# 安装依赖 +pip install -e . +``` + +--- + +## 验证安装 + +运行以下命令验证安装是否成功: + +```bash +# 使用 uv +uv run streamlit run web/app.py + +# 或使用 pip(需先激活虚拟环境) +streamlit run web/app.py +``` + +浏览器应该会自动打开 `http://localhost:8501`,显示 Pixelle-Video 的 Web 界面。 + +!!! success "安装成功!" + 如果能看到 Web 界面,说明安装成功了!接下来请查看 [配置说明](configuration.md) 来设置服务。 + +--- + +## 可选:安装 ComfyUI(本地部署) + +如果希望本地运行图像生成服务,需要安装 ComfyUI: + +### 快速安装 + +```bash +# 克隆 ComfyUI +git clone https://github.com/comfyanonymous/ComfyUI.git +cd ComfyUI + +# 安装依赖 +pip install -r requirements.txt +``` + +### 启动 ComfyUI + +```bash +python main.py +``` + +ComfyUI 默认运行在 `http://127.0.0.1:8188` + +!!! info "ComfyUI 模型" + ComfyUI 需要下载对应的模型文件才能工作。请参考 [ComfyUI 官方文档](https://github.com/comfyanonymous/ComfyUI) 了解如何下载和配置模型。 + +--- + +## 下一步 + +- [配置服务](configuration.md) - 配置 LLM 和图像生成服务 +- [快速开始](quick-start.md) - 生成第一个视频 + diff --git a/docs/zh/getting-started/quick-start.md b/docs/zh/getting-started/quick-start.md new file mode 100644 index 0000000..83c4fcb --- /dev/null +++ b/docs/zh/getting-started/quick-start.md @@ -0,0 +1,107 @@ +# 快速开始 + +已经完成安装和配置?让我们生成第一个视频吧! + +--- + +## 启动 Web 界面 + +```bash +# 使用 uv 运行 +uv run streamlit run web/app.py +``` + +浏览器会自动打开 `http://localhost:8501` + +--- + +## 生成你的第一个视频 + +### 步骤一:检查配置 + +首次使用时,展开「⚙️ 系统配置」面板,确认已配置: + +- **LLM 配置**: 选择 AI 模型(如通义千问、GPT 等)并填入 API Key +- **图像配置**: 配置 ComfyUI 地址或 RunningHub API Key + +如果还没有配置,请查看 [配置说明](configuration.md)。 + +配置好后点击「保存配置」。 + +--- + +### 步骤二:输入主题 + +在左侧栏的「📝 内容输入」区域: + +1. 选择「**AI 生成内容**」模式 +2. 在文本框中输入一个主题,例如: + ``` + 为什么要养成阅读习惯 + ``` +3. (可选)设置场景数量,默认 5 个分镜 + +!!! tip "主题示例" + - 为什么要养成阅读习惯 + - 如何提高工作效率 + - 健康饮食的重要性 + - 旅行的意义 + +--- + +### 步骤三:配置语音和视觉 + +在中间栏: + +**语音设置** +- 选择 TTS 工作流(默认 Edge-TTS 即可) +- 如需声音克隆,可上传参考音频 + +**视觉设置** +- 选择图像生成工作流(默认即可) +- 设置图像尺寸(默认 1024x1024) +- 选择视频模板(推荐竖屏 1080x1920) + +--- + +### 步骤四:生成视频 + +点击右侧栏的「🎬 生成视频」按钮! + +系统会显示实时进度: +- 生成文案 +- 生成配图(每个分镜) +- 合成语音 +- 合成视频 + +!!! info "生成时间" + 生成一个 5 分镜的视频大约需要 2-5 分钟,具体时间取决于:LLM API 响应速度、图像生成速度、TTS 工作流类型、网络状况 + +--- + +### 步骤五:预览视频 + +生成完成后,视频会自动在右侧栏播放! + +你可以看到: +- 📹 视频预览播放器 +- ⏱️ 视频时长 +- 📦 文件大小 +- 🎬 分镜数量 +- 📐 视频尺寸 + +视频文件保存在 `output/` 文件夹中。 + +--- + +## 下一步探索 + +恭喜!你已经成功生成了第一个视频 🎉 + +接下来你可以: + +- **调整风格** - 查看 [自定义视觉风格](../tutorials/custom-style.md) 教程 +- **克隆声音** - 查看 [使用参考音频克隆声音](../tutorials/voice-cloning.md) 教程 +- **使用 API** - 查看 [API 使用指南](../user-guide/api.md) +- **开发模板** - 查看 [模板开发指南](../user-guide/templates.md) + diff --git a/docs/zh/index.md b/docs/zh/index.md new file mode 100644 index 0000000..a092cca --- /dev/null +++ b/docs/zh/index.md @@ -0,0 +1,97 @@ +# Pixelle-Video 🎬 + +
+ +**AI 视频创作工具 - 3 分钟生成一个短视频** + +[![Stars](https://img.shields.io/github/stars/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/stargazers) +[![Issues](https://img.shields.io/github/issues/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/issues) +[![License](https://img.shields.io/github/license/PixelleLab/Pixelle-Video.svg?style=flat-square)](https://github.com/PixelleLab/Pixelle-Video/blob/main/LICENSE) + +
+ +--- + +## 🎯 项目简介 + +只需输入一个 **主题**,Pixelle-Video 就能自动完成: + +- ✍️ 撰写视频文案 +- 🎨 生成 AI 配图 +- 🗣️ 合成语音解说 +- 🎵 添加背景音乐 +- 🎬 一键合成视频 + +**零门槛,零剪辑经验**,让视频创作成为一句话的事! + +--- + +## ✨ 功能亮点 + +- ✅ **全自动生成** - 输入主题,3 分钟自动生成完整视频 +- ✅ **AI 智能文案** - 根据主题智能创作解说词,无需自己写脚本 +- ✅ **AI 生成配图** - 每句话都配上精美的 AI 插图 +- ✅ **AI 生成语音** - 支持 Edge-TTS、Index-TTS 等众多主流 TTS 方案 +- ✅ **背景音乐** - 支持添加 BGM,让视频更有氛围 +- ✅ **视觉风格** - 多种模板可选,打造独特视频风格 +- ✅ **灵活尺寸** - 支持竖屏、横屏等多种视频尺寸 +- ✅ **多种 AI 模型** - 支持 GPT、通义千问、DeepSeek、Ollama 等 +- ✅ **原子能力灵活组合** - 基于 ComfyUI 架构,可使用预置工作流,也可自定义任意能力 + +--- + +## 🎬 视频示例 + +!!! info "示例视频" + 待补充:这里可以添加一些生成的视频示例 + +--- + +## 🚀 快速开始 + +想马上体验?只需三步: + +1. **[安装 Pixelle-Video](getting-started/installation.md)** - 下载并安装项目 +2. **[配置服务](getting-started/configuration.md)** - 配置 LLM 和图像生成服务 +3. **[生成第一个视频](getting-started/quick-start.md)** - 开始创作你的第一个视频 + +--- + +## 💰 费用说明 + +!!! success "完全支持免费运行!" + + - **完全免费方案**: LLM 使用 Ollama(本地运行)+ ComfyUI 本地部署 = 0 元 + - **推荐方案**: LLM 使用通义千问(生成一个 3 段视频约 0.01-0.05 元)+ ComfyUI 本地部署 + - **云端方案**: LLM 使用 OpenAI + 图像使用 RunningHub(费用较高但无需本地环境) + + **选择建议**:本地有显卡建议完全免费方案,否则推荐使用通义千问(性价比高) + +--- + +## 🤝 参考项目 + +Pixelle-Video 的设计受到以下优秀开源项目的启发: + +- [Pixelle-MCP](https://github.com/AIDC-AI/Pixelle-MCP) - ComfyUI MCP 服务器,让 AI 助手直接调用 ComfyUI +- [MoneyPrinterTurbo](https://github.com/harry0703/MoneyPrinterTurbo) - 优秀的视频生成工具 +- [NarratoAI](https://github.com/linyqh/NarratoAI) - 影视解说自动化工具 +- [MoneyPrinterPlus](https://github.com/ddean2009/MoneyPrinterPlus) - 视频创作平台 +- [ComfyKit](https://github.com/puke3615/ComfyKit) - ComfyUI 工作流封装库 + +感谢这些项目的开源精神!🙏 + +--- + +## 📢 反馈与支持 + +- 🐛 **遇到问题**: 提交 [Issue](https://github.com/PixelleLab/Pixelle-Video/issues) +- 💡 **功能建议**: 提交 [Feature Request](https://github.com/PixelleLab/Pixelle-Video/issues) +- ⭐ **给个 Star**: 如果这个项目对你有帮助,欢迎给个 Star 支持一下! + +--- + +## 📝 许可证 + +本项目采用 MIT 许可证,详情请查看 [LICENSE](https://github.com/PixelleLab/Pixelle-Video/blob/main/LICENSE) 文件。 + diff --git a/docs/zh/reference/api-overview.md b/docs/zh/reference/api-overview.md new file mode 100644 index 0000000..2939719 --- /dev/null +++ b/docs/zh/reference/api-overview.md @@ -0,0 +1,52 @@ +# API 概览 + +Pixelle-Video Python API 参考文档。 + +--- + +## 核心类 + +### PixelleVideoCore + +主要服务类,提供视频生成功能。 + +```python +from pixelle_video.service import PixelleVideoCore + +pixelle = PixelleVideoCore() +await pixelle.initialize() +``` + +--- + +## 主要方法 + +### generate_video() + +生成视频的主要方法。 + +**参数**: + +- `text` (str): 主题或完整文案 +- `mode` (str): 生成模式 ("generate" 或 "fixed") +- `n_scenes` (int): 分镜数量 +- `title` (str, optional): 视频标题 +- `tts_workflow` (str): TTS 工作流 +- `image_workflow` (str): 图像生成工作流 +- `frame_template` (str): 视频模板 +- `bgm_path` (str, optional): BGM 文件路径 + +**返回**: `VideoResult` 对象 + +--- + +## 示例 + +查看 `examples/` 目录获取更多示例。 + +--- + +## 更多信息 + +详细的 API 文档即将推出。 + diff --git a/docs/zh/reference/config-schema.md b/docs/zh/reference/config-schema.md new file mode 100644 index 0000000..b917771 --- /dev/null +++ b/docs/zh/reference/config-schema.md @@ -0,0 +1,60 @@ +# 配置文件详解 + +`config.yaml` 配置文件的详细说明。 + +--- + +## 配置结构 + +```yaml +llm: + provider: openai + api_key: "your-api-key" + base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1" + model: "qwen-plus" + +comfyui: + comfyui_url: "http://127.0.0.1:8188" + runninghub_api_key: "" + + image: + default_workflow: "runninghub/image_flux.json" + prompt_prefix: "Minimalist illustration style" + + tts: + default_workflow: "selfhost/tts_edge.json" +``` + +--- + +## LLM 配置 + +- `provider`: 提供商(目前仅支持 openai 兼容接口) +- `api_key`: API 密钥 +- `base_url`: API 服务地址 +- `model`: 模型名称 + +--- + +## ComfyUI 配置 + +### 基础配置 + +- `comfyui_url`: 本地 ComfyUI 地址 +- `runninghub_api_key`: RunningHub API 密钥(可选) + +### 图像配置 + +- `default_workflow`: 默认图像生成工作流 +- `prompt_prefix`: 提示词前缀 + +### TTS 配置 + +- `default_workflow`: 默认 TTS 工作流 + +--- + +## 更多信息 + +配置文件会自动在首次运行时创建。 + diff --git a/docs/zh/troubleshooting.md b/docs/zh/troubleshooting.md new file mode 100644 index 0000000..bbe164c --- /dev/null +++ b/docs/zh/troubleshooting.md @@ -0,0 +1,108 @@ +# 故障排查 + +遇到问题?这里有一些常见问题的解决方案。 + +--- + +## 安装问题 + +### 依赖安装失败 + +```bash +# 清理缓存 +uv cache clean + +# 重新安装 +uv sync +``` + +--- + +## 配置问题 + +### ComfyUI 连接失败 + +**可能原因**: +- ComfyUI 未运行 +- URL 配置错误 +- 防火墙阻止 + +**解决方案**: +1. 确认 ComfyUI 正在运行 +2. 检查 URL 配置(默认 `http://127.0.0.1:8188`) +3. 在浏览器中访问 ComfyUI 地址测试 +4. 检查防火墙设置 + +### LLM API 调用失败 + +**可能原因**: +- API Key 错误 +- 网络问题 +- 余额不足 + +**解决方案**: +1. 检查 API Key 是否正确 +2. 检查网络连接 +3. 查看错误提示中的具体原因 +4. 检查账户余额 + +--- + +## 生成问题 + +### 视频生成失败 + +**可能原因**: +- 工作流文件损坏 +- 模型未下载 +- 资源不足 + +**解决方案**: +1. 检查工作流文件是否存在 +2. 确认 ComfyUI 已下载所需模型 +3. 检查磁盘空间和内存 + +### 图像生成失败 + +**解决方案**: +1. 检查 ComfyUI 是否正常运行 +2. 尝试在 ComfyUI 中手动测试工作流 +3. 检查工作流配置 + +### TTS 生成失败 + +**解决方案**: +1. 检查 TTS 工作流是否正确 +2. 如使用声音克隆,检查参考音频格式 +3. 查看错误日志 + +--- + +## 性能问题 + +### 生成速度慢 + +**优化建议**: +1. 使用本地 ComfyUI(比云端快) +2. 减少分镜数量 +3. 使用更快的 LLM(如 Qianwen) +4. 检查网络连接 + +--- + +## 其他问题 + +仍有问题? + +1. 查看项目 [GitHub Issues](https://github.com/PixelleLab/Pixelle-Video/issues) +2. 提交新的 Issue 描述你的问题 +3. 包含错误日志和配置信息以便快速定位 + +--- + +## 日志查看 + +日志文件位于项目根目录: +- `api_server.log` - API 服务日志 +- `test_output.log` - 测试日志 + diff --git a/docs/zh/tutorials/custom-style.md b/docs/zh/tutorials/custom-style.md new file mode 100644 index 0000000..e579547 --- /dev/null +++ b/docs/zh/tutorials/custom-style.md @@ -0,0 +1,36 @@ +# 自定义视觉风格 + +学习如何调整图像生成参数以创建独特的视觉风格。 + +--- + +## 调整提示词前缀 + +提示词前缀控制整体视觉风格: + +``` +Minimalist black-and-white illustration, clean lines, simple style +``` + +--- + +## 调整图像尺寸 + +不同尺寸适用于不同场景: + +- **1024x1024**: 方形,适合小红书 +- **1080x1920**: 竖屏,适合抖音、快手 +- **1920x1080**: 横屏,适合B站、YouTube + +--- + +## 预览效果 + +使用「预览风格」功能测试不同配置的效果。 + +--- + +## 更多信息 + +即将推出更多风格定制技巧。 + diff --git a/docs/zh/tutorials/voice-cloning.md b/docs/zh/tutorials/voice-cloning.md new file mode 100644 index 0000000..b7b5b1c --- /dev/null +++ b/docs/zh/tutorials/voice-cloning.md @@ -0,0 +1,35 @@ +# 声音克隆 + +使用参考音频实现声音克隆功能。 + +--- + +## 准备参考音频 + +1. 准备一段清晰的音频文件(MP3/WAV/FLAC) +2. 建议时长 10-30 秒 +3. 避免背景噪音 + +--- + +## 使用步骤 + +1. 在语音设置中选择支持声音克隆的 TTS 工作流(如 Index-TTS) +2. 上传参考音频文件 +3. 使用「预览语音」测试效果 +4. 生成视频 + +--- + +## 注意事项 + +- 不是所有 TTS 工作流都支持声音克隆 +- 参考音频质量会影响克隆效果 +- Edge-TTS 不支持声音克隆 + +--- + +## 更多信息 + +即将推出更详细的声音克隆教程。 + diff --git a/docs/zh/tutorials/your-first-video.md b/docs/zh/tutorials/your-first-video.md new file mode 100644 index 0000000..735bf28 --- /dev/null +++ b/docs/zh/tutorials/your-first-video.md @@ -0,0 +1,33 @@ +# 生成你的第一个视频 + +手把手教你使用 Pixelle-Video 生成第一个视频。 + +--- + +## 前置准备 + +确保已完成: + +- ✅ [安装](../getting-started/installation.md) +- ✅ [配置](../getting-started/configuration.md) + +--- + +## 教程步骤 + +详细步骤请查看 [快速开始](../getting-started/quick-start.md)。 + +--- + +## 小贴士 + +- 选择合适的主题可以获得更好的效果 +- 首次生成建议使用3-5个分镜 +- 可以先预览语音和图像效果 + +--- + +## 常见问题 + +遇到问题?查看 [FAQ](../faq.md) 或 [故障排查](../troubleshooting.md)。 + diff --git a/docs/zh/user-guide/api.md b/docs/zh/user-guide/api.md new file mode 100644 index 0000000..44416c0 --- /dev/null +++ b/docs/zh/user-guide/api.md @@ -0,0 +1,42 @@ +# API 使用 + +Pixelle-Video 提供完整的 Python API,方便集成到你的项目中。 + +--- + +## 快速开始 + +```python +from pixelle_video.service import PixelleVideoCore +import asyncio + +async def main(): + # 初始化 + pixelle = PixelleVideoCore() + await pixelle.initialize() + + # 生成视频 + result = await pixelle.generate_video( + text="为什么要养成阅读习惯", + mode="generate", + n_scenes=5 + ) + + print(f"视频已生成: {result.video_path}") + +# 运行 +asyncio.run(main()) +``` + +--- + +## API 参考 + +详细 API 文档请查看 [API 概览](../reference/api-overview.md)。 + +--- + +## 示例 + +更多使用示例请参考项目的 `examples/` 目录。 + diff --git a/docs/zh/user-guide/templates.md b/docs/zh/user-guide/templates.md new file mode 100644 index 0000000..3965bcf --- /dev/null +++ b/docs/zh/user-guide/templates.md @@ -0,0 +1,48 @@ +# 模板开发 + +如何创建自定义视频模板。 + +--- + +## 模板简介 + +视频模板使用 HTML 定义视频画面的布局和样式。 + +--- + +## 模板结构 + +模板位于 `templates/` 目录,按尺寸分组: + +``` +templates/ +├── 1080x1920/ # 竖屏 +├── 1920x1080/ # 横屏 +└── 1080x1080/ # 方形 +``` + +--- + +## 创建模板 + +1. 复制现有模板文件 +2. 修改 HTML 和 CSS +3. 保存到对应尺寸目录 +4. 在 Web 界面中选择使用 + +--- + +## 模板变量 + +模板支持以下变量: + +- `{{ title }}` - 视频标题 +- `{{ text }}` - 分镜文本 +- `{{ image }}` - 分镜图片 + +--- + +## 更多信息 + +详细的模板开发指南即将推出。 + diff --git a/docs/zh/user-guide/web-ui.md b/docs/zh/user-guide/web-ui.md new file mode 100644 index 0000000..a0cad90 --- /dev/null +++ b/docs/zh/user-guide/web-ui.md @@ -0,0 +1,77 @@ +# Web 界面使用指南 + +详细介绍 Pixelle-Video Web 界面的各项功能。 + +--- + +## 界面布局 + +Web 界面采用三栏布局: + +- **左侧栏**: 内容输入与音频设置 +- **中间栏**: 语音与视觉设置 +- **右侧栏**: 视频生成与预览 + +--- + +## 系统配置 + +首次使用需要配置 LLM 和图像生成服务。详见 [配置说明](../getting-started/configuration.md)。 + +--- + +## 内容输入 + +### 生成模式 + +- **AI 生成内容**: 输入主题,AI 自动创作文案 +- **固定文案内容**: 直接输入完整文案 + +### 背景音乐 + +- 支持内置音乐 +- 支持自定义音乐文件 + +--- + +## 语音设置 + +### TTS 工作流 + +- 选择 TTS 工作流 +- 支持 Edge-TTS、Index-TTS 等 + +### 参考音频 + +- 上传参考音频进行声音克隆 +- 支持 MP3/WAV/FLAC 等格式 + +--- + +## 视觉设置 + +### 图像生成 + +- 选择图像生成工作流 +- 设置图像尺寸 +- 调整提示词前缀控制风格 + +### 视频模板 + +- 选择视频模板 +- 支持竖屏/横屏/方形 +- 可预览模板效果 + +--- + +## 生成视频 + +点击「生成视频」按钮后,系统会: + +1. 生成视频文案 +2. 为每个分镜生成配图 +3. 合成语音解说 +4. 合成最终视频 + +生成完成后自动预览。 + diff --git a/docs/zh/user-guide/workflows.md b/docs/zh/user-guide/workflows.md new file mode 100644 index 0000000..a63e324 --- /dev/null +++ b/docs/zh/user-guide/workflows.md @@ -0,0 +1,37 @@ +# 工作流定制 + +如何自定义 ComfyUI 工作流以实现特定功能。 + +--- + +## 工作流简介 + +Pixelle-Video 基于 ComfyUI 架构,支持自定义工作流。 + +--- + +## 工作流类型 + +### TTS 工作流 + +位于 `workflows/selfhost/` 或 `workflows/runninghub/` + +### 图像生成工作流 + +位于 `workflows/selfhost/` 或 `workflows/runninghub/` + +--- + +## 自定义工作流 + +1. 在 ComfyUI 中设计你的工作流 +2. 导出为 JSON 文件 +3. 放置到 `workflows/` 目录 +4. 在 Web 界面中选择使用 + +--- + +## 更多信息 + +即将推出更详细的工作流定制指南。 + diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..3847531 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,171 @@ +site_name: Pixelle-Video +site_description: AI Video Creator - Generate a short video in 3 minutes +site_author: PixelleLab +site_url: https://pixellelab.github.io/Pixelle-Video/ + +repo_name: PixelleLab/Pixelle-Video +repo_url: https://github.com/PixelleLab/Pixelle-Video +edit_uri: edit/main/docs/ + +copyright: Copyright © 2025 PixelleLab + +theme: + name: material + language: en + palette: + # Light mode + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + # Dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: indigo + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to light mode + + font: + text: Roboto + code: Roboto Mono + + features: + - navigation.instant # Instant loading + - navigation.tracking # Anchor tracking + - navigation.tabs # Top-level tabs + - navigation.tabs.sticky # Sticky tabs + - navigation.sections # Sidebar sections + - navigation.expand # Expand sections + - navigation.top # Back to top button + - navigation.footer # Footer navigation + - search.suggest # Search suggestions + - search.highlight # Search highlighting + - search.share # Share search results + - content.code.copy # Copy button for code blocks + - content.code.annotate # Code annotations + - content.tabs.link # Link content tabs + + icon: + repo: fontawesome/brands/github + +plugins: + - search: + lang: + - en + - zh + - i18n: + docs_structure: folder + languages: + - locale: en + default: true + name: English + build: true + - locale: zh + name: 中文 + build: true + nav_translations: + Home: 首页 + Getting Started: 快速开始 + Installation: 安装 + Quick Start: 快速入门 + Configuration: 配置 + User Guide: 用户指南 + Web UI: Web 界面 + API Usage: API 使用 + Workflows: 工作流定制 + Templates: 模板开发 + Gallery: 示例库 + Tutorials: 教程 + Your First Video: 生成你的第一个视频 + Custom Style: 自定义视觉风格 + Voice Cloning: 声音克隆 + Reference: 参考 + API Overview: API 概览 + Config Schema: 配置文件详解 + Development: 开发指南 + Architecture: 架构设计 + Contributing: 贡献指南 + FAQ: 常见问题 + Troubleshooting: 故障排查 + - git-revision-date-localized: + enable_creation_date: true + type: datetime + +markdown_extensions: + # Python Markdown + - abbr + - admonition + - attr_list + - def_list + - footnotes + - md_in_html + - toc: + permalink: true + + # Python Markdown Extensions + - pymdownx.arithmatex: + generic: true + - pymdownx.betterem: + smart_enable: all + - pymdownx.caret + - pymdownx.details + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.keys + - pymdownx.mark + - pymdownx.smartsymbols + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.tabbed: + alternate_style: true + - pymdownx.tasklist: + custom_checkbox: true + - pymdownx.tilde + +nav: + - Home: index.md + - Getting Started: + - Installation: getting-started/installation.md + - Quick Start: getting-started/quick-start.md + - Configuration: getting-started/configuration.md + - User Guide: + - Web UI: user-guide/web-ui.md + - API Usage: user-guide/api.md + - Workflows: user-guide/workflows.md + - Templates: user-guide/templates.md + - Gallery: gallery/index.md + - Tutorials: + - Your First Video: tutorials/your-first-video.md + - Custom Style: tutorials/custom-style.md + - Voice Cloning: tutorials/voice-cloning.md + - Reference: + - API Overview: reference/api-overview.md + - Config Schema: reference/config-schema.md + - Development: + - Architecture: development/architecture.md + - Contributing: development/contributing.md + - FAQ: faq.md + - Troubleshooting: troubleshooting.md + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/PixelleLab/Pixelle-Video + name: GitHub Repository + +extra_css: + - stylesheets/extra.css diff --git a/pixelle_video/cli.py b/pixelle_video/cli.py deleted file mode 100644 index 6640eca..0000000 --- a/pixelle_video/cli.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -Pixelle-Video CLI -""" - -import asyncio - -from loguru import logger - -from pixelle_video.service import pixelle_video - - -async def test_llm(): - """Test LLM capability""" - # Initialize pixelle_video - await pixelle_video.initialize() - - # Test prompt - prompt = "Explain the concept of atomic habits in 3 sentences." - - logger.info(f"\n📝 Test Prompt: {prompt}\n") - - # Call LLM - result = await pixelle_video.llm(prompt) - - logger.info(f"\n✨ Result:\n{result}\n") - - -def main(): - """Main CLI entry point""" - logger.info("🚀 Pixelle-Video CLI\n") - - # Run test - asyncio.run(test_llm()) - - -if __name__ == "__main__": - main() -