From d39759926e41d3a45e3a08563fb16377c59cc28a Mon Sep 17 00:00:00 2001 From: Connor <963408438@qq.com> Date: Sun, 18 Jan 2026 05:21:34 +0800 Subject: [PATCH] =?UTF-8?q?1=E3=80=81=E6=B7=BB=E5=8A=A0=E4=B8=AD=E8=8B=B1?= =?UTF-8?q?=E6=96=87=E7=89=88=E6=9C=AC=202=E3=80=81=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E5=B7=B2=E7=9F=A5BUG=203=E3=80=81=E5=AE=8C=E5=96=84=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=204=E3=80=81=E6=B7=BB=E5=8A=A0minimax=E8=A7=86?= =?UTF-8?q?=E9=A2=91=E6=B8=A0=E9=81=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README-EN.md | 537 ++++++++ README.md | 3 +- api/handlers/audio_extraction.go | 88 ++ api/handlers/character_library.go | 2 +- api/handlers/frame_prompt.go | 10 +- api/handlers/image_generation.go | 19 +- api/handlers/scene.go | 16 + api/handlers/script_generation.go | 43 +- api/handlers/settings.go | 67 + api/handlers/storyboard.go | 21 +- api/middlewares/ratelimit.go | 2 +- api/routes/routes.go | 21 +- application/services/ai_service.go | 40 +- application/services/asset_duration_update.go | 66 + application/services/asset_service.go | 11 +- .../services/audio_extraction_service.go | 97 ++ application/services/frame_prompt_service.go | 204 ++- .../services/image_generation_service.go | 225 ++- application/services/prompt_i18n.go | 516 +++++++ .../services/script_generation_service.go | 458 +------ .../storyboard_composition_service.go | 30 + application/services/storyboard_service.go | 171 ++- .../services/video_generation_service.go | 44 +- configs/config.example.yaml | 1 + infrastructure/external/ffmpeg/ffmpeg.go | 325 ++++- pkg/ai/openai_client.go | 21 + pkg/config/config.go | 7 +- pkg/utils/json_parser.go | 80 +- pkg/video/minimax_client.go | 181 ++- web/package.json | 1 + web/src/api/audio.ts | 45 + web/src/api/drama.ts | 8 +- web/src/api/generation.ts | 18 +- web/src/api/settings.ts | 13 + web/src/assets/styles/main.css | 5 + web/src/components/LanguageSwitcher.vue | 84 +- web/src/components/common/AIConfigDialog.vue | 52 +- web/src/components/common/AppHeader.vue | 2 +- web/src/components/common/AppLayout.vue | 2 +- .../components/editor/StoryboardEditor.vue | 203 ++- .../components/editor/VideoTimelineEditor.vue | 269 +++- web/src/locales/en-US.ts | 7 + web/src/locales/zh-CN.ts | 7 + web/src/router/index.ts | 5 - web/src/types/generation.ts | 44 +- web/src/views/drama/DramaManagement.vue | 84 +- web/src/views/drama/EpisodeWorkflow.vue | 50 +- web/src/views/drama/ProfessionalEditor.vue | 14 +- .../views/drama/components/GenerateDialog.vue | 425 ------ web/src/views/settings/AIConfig.vue | 39 +- web/src/views/settings/SystemSettings.vue | 169 +++ web/src/views/workflow/ScriptGeneration.vue | 1221 ----------------- 52 files changed, 3456 insertions(+), 2617 deletions(-) create mode 100644 README-EN.md create mode 100644 api/handlers/audio_extraction.go create mode 100644 api/handlers/settings.go create mode 100644 application/services/asset_duration_update.go create mode 100644 application/services/audio_extraction_service.go create mode 100644 application/services/prompt_i18n.go create mode 100644 web/src/api/audio.ts create mode 100644 web/src/api/settings.ts delete mode 100644 web/src/views/drama/components/GenerateDialog.vue create mode 100644 web/src/views/settings/SystemSettings.vue delete mode 100644 web/src/views/workflow/ScriptGeneration.vue diff --git a/README-EN.md b/README-EN.md new file mode 100644 index 0000000..4c1733d --- /dev/null +++ b/README-EN.md @@ -0,0 +1,537 @@ +# 🎬 Huobao Drama - AI Short Drama Production Platform + +
+ +**Full-stack AI Short Drama Automation Platform Based on Go + Vue3** + +[![Go Version](https://img.shields.io/badge/Go-1.23+-00ADD8?style=flat&logo=go)](https://golang.org) +[![Vue Version](https://img.shields.io/badge/Vue-3.x-4FC08D?style=flat&logo=vue.js)](https://vuejs.org) +[![License](https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by-nc-sa/4.0/) + +[Features](#features) • [Quick Start](#quick-start) • [Deployment](#deployment) + +[简体中文](README.md) | [English](README-EN.md) + +
+ +--- + +## 📖 About + +Huobao Drama is an AI-powered short drama production platform that automates the entire workflow from script generation, character design, storyboarding to video composition. + +### 🎯 Core Features + +- **🤖 AI-Driven**: Parse scripts using large language models to extract characters, scenes, and storyboards +- **🎨 Intelligent Creation**: AI-generated character portraits and scene backgrounds +- **📹 Video Generation**: Automatic storyboard video generation using text-to-video and image-to-video models +- **🔄 Complete Workflow**: End-to-end production workflow from idea to final video + +### 🛠️ Technical Architecture + +Based on **DDD (Domain-Driven Design)** with clear layering: + +``` +├── API Layer (Gin HTTP) +├── Application Service Layer (Business Logic) +├── Domain Layer (Domain Models) +└── Infrastructure Layer (Database, External Services) +``` + +### 🎥 Demo Videos + +Experience AI short drama generation: + +
+ +**Sample Work 1** + + + +**Sample Work 2** + + + +[Watch Video 1](https://ffile.chatfire.site/cf/public/20260114094337396.mp4) | [Watch Video 2](https://ffile.chatfire.site/cf/public/fcede75e8aeafe22031dbf78f86285b8.mp4) + +
+ +--- + +## ✨ Features + +### 🎭 Character Management +- ✅ AI-generated character portraits +- ✅ Batch character generation +- ✅ Character image upload and management + +### 🎬 Storyboard Production +- ✅ Automatic storyboard script generation +- ✅ Scene descriptions and shot design +- ✅ Storyboard image generation (text-to-image) +- ✅ Frame type selection (first frame/key frame/last frame/panel) + +### 🎥 Video Generation +- ✅ Automatic image-to-video generation +- ✅ Video composition and editing +- ✅ Transition effects + +### 📦 Asset Management +- ✅ Unified asset library management +- ✅ Local storage support +- ✅ Asset import/export +- ✅ Task progress tracking + +--- + +## 🚀 Quick Start + +### 📋 Prerequisites + +| Software | Version | Description | +|----------|---------|-------------| +| **Go** | 1.23+ | Backend runtime | +| **Node.js** | 18+ | Frontend build environment | +| **npm** | 9+ | Package manager | +| **FFmpeg** | 4.0+ | Video processing (**Required**) | +| **SQLite** | 3.x | Database (built-in) | + +#### Installing FFmpeg + +**macOS:** +```bash +brew install ffmpeg +``` + +**Ubuntu/Debian:** +```bash +sudo apt update +sudo apt install ffmpeg +``` + +**Windows:** +Download from [FFmpeg Official Site](https://ffmpeg.org/download.html) and configure environment variables + +Verify installation: +```bash +ffmpeg -version +``` + +### ⚙️ Configuration + +Copy and edit the configuration file: + +```bash +cp configs/config.example.yaml configs/config.yaml +vim configs/config.yaml +``` + +Configuration file format (`configs/config.yaml`): + +```yaml +app: + name: "Huobao Drama API" + version: "1.0.0" + debug: true # Set to true for development, false for production + +server: + port: 5678 + host: "0.0.0.0" + cors_origins: + - "http://localhost:3012" + read_timeout: 600 + write_timeout: 600 + +database: + type: "sqlite" + path: "./data/drama_generator.db" + max_idle: 10 + max_open: 100 + +storage: + type: "local" + local_path: "./data/storage" + base_url: "http://localhost:5678/static" + +ai: + default_text_provider: "openai" + default_image_provider: "openai" + default_video_provider: "doubao" +``` + +**Key Configuration Items:** +- `app.debug`: Debug mode switch (recommended true for development) +- `server.port`: Service port +- `server.cors_origins`: Allowed CORS origins for frontend +- `database.path`: SQLite database file path +- `storage.local_path`: Local file storage path +- `storage.base_url`: Static resource access URL +- `ai.default_*_provider`: AI service provider configuration (API keys configured in Web UI) + +### 📥 Installation + +```bash +# Clone the project +git clone https://github.com/chatfire-AI/huobao-drama.git +cd huobao-drama + +# Install Go dependencies +go mod download + +# Install frontend dependencies +cd web +npm install +cd .. +``` + +### 🎯 Starting the Project + +#### Method 1: Development Mode (Recommended) + +**Frontend and backend separation with hot reload** + +```bash +# Terminal 1: Start backend service +go run main.go + +# Terminal 2: Start frontend dev server +cd web +npm run dev +``` + +- Frontend: `http://localhost:3012` +- Backend API: `http://localhost:5678/api/v1` +- Frontend automatically proxies API requests to backend + +#### Method 2: Single Service Mode + +**Backend serves both API and frontend static files** + +```bash +# 1. Build frontend +cd web +npm run build +cd .. + +# 2. Start service +go run main.go +``` + +Access: `http://localhost:5678` + +### 🗄️ Database Initialization + +Database tables are automatically created on first startup (using GORM AutoMigrate), no manual migration needed. + +--- + +## 📦 Deployment + +### 🐳 Docker Deployment (Recommended) + +#### Method 1: Docker Compose (Recommended) + +```bash +# Start services +docker-compose up -d + +# View logs +docker-compose logs -f + +# Stop services +docker-compose down +``` + +#### Method 2: Docker Command + +> **Note**: Linux users need to add `--add-host=host.docker.internal:host-gateway` to access host services + +```bash +# Run from Docker Hub +docker run -d \ + --name huobao-drama \ + -p 5678:5678 \ + -v $(pwd)/data:/app/data \ + --restart unless-stopped \ + huobao/huobao-drama:latest + +# View logs +docker logs -f huobao-drama +``` + +**Local Build** (optional): +```bash +docker build -t huobao-drama:latest . +docker run -d --name huobao-drama -p 5678:5678 -v $(pwd)/data:/app/data huobao-drama:latest +``` + +**Docker Deployment Advantages:** +- ✅ Ready to use with default configuration +- ✅ Environment consistency, avoiding dependency issues +- ✅ One-click start, no need to install Go, Node.js, FFmpeg +- ✅ Easy to migrate and scale +- ✅ Automatic health checks and restarts +- ✅ Automatic file permission handling + +#### 🔗 Accessing Host Services (Ollama/Local Models) + +The container is configured to access host services using `http://host.docker.internal:PORT`. + +**Configuration Steps:** + +1. **Start service on host (listen on all interfaces)** + ```bash + export OLLAMA_HOST=0.0.0.0:11434 && ollama serve + ``` + +2. **Frontend AI Service Configuration** + - Base URL: `http://host.docker.internal:11434/v1` + - Provider: `openai` + - Model: `qwen2.5:latest` + +--- + +### 🏭 Traditional Deployment + +#### 1. Build + +```bash +# 1. Build frontend +cd web +npm run build +cd .. + +# 2. Compile backend +go build -o huobao-drama . +``` + +Generated files: +- `huobao-drama` - Backend executable +- `web/dist/` - Frontend static files (embedded in backend) + +#### 2. Prepare Deployment Files + +Files to upload to server: +``` +huobao-drama # Backend executable +configs/config.yaml # Configuration file +data/ # Data directory (optional, auto-created on first run) +``` + +#### 3. Server Configuration + +```bash +# Upload files to server +scp huobao-drama user@server:/opt/huobao-drama/ +scp configs/config.yaml user@server:/opt/huobao-drama/configs/ + +# SSH to server +ssh user@server + +# Modify configuration file +cd /opt/huobao-drama +vim configs/config.yaml +# Set mode to production +# Configure domain and storage path + +# Create data directory and set permissions (Important!) +# Note: Replace YOUR_USER with actual user running the service (e.g., www-data, ubuntu, deploy) +sudo mkdir -p /opt/huobao-drama/data/storage +sudo chown -R YOUR_USER:YOUR_USER /opt/huobao-drama/data +sudo chmod -R 755 /opt/huobao-drama/data + +# Grant execute permission +chmod +x huobao-drama + +# Start service +./huobao-drama +``` + +#### 4. Manage Service with systemd + +Create service file `/etc/systemd/system/huobao-drama.service`: + +```ini +[Unit] +Description=Huobao Drama Service +After=network.target + +[Service] +Type=simple +User=YOUR_USER +WorkingDirectory=/opt/huobao-drama +ExecStart=/opt/huobao-drama/huobao-drama +Restart=on-failure +RestartSec=10 + +# Environment variables (optional) +# Environment="GIN_MODE=release" + +[Install] +WantedBy=multi-user.target +``` + +Start service: +```bash +sudo systemctl daemon-reload +sudo systemctl enable huobao-drama +sudo systemctl start huobao-drama +sudo systemctl status huobao-drama +``` + +**⚠️ Common Issue: SQLite Write Permission Error** + +If you encounter `attempt to write a readonly database` error: + +```bash +# 1. Check current user running the service +sudo systemctl status huobao-drama | grep "Main PID" +ps aux | grep huobao-drama + +# 2. Fix permissions (replace YOUR_USER with actual username) +sudo chown -R YOUR_USER:YOUR_USER /opt/huobao-drama/data +sudo chmod -R 755 /opt/huobao-drama/data + +# 3. Verify permissions +ls -la /opt/huobao-drama/data +# Should show owner as the user running the service + +# 4. Restart service +sudo systemctl restart huobao-drama +``` + +**Reason:** +- SQLite requires write permission on both the database file **and** its directory +- Needs to create temporary files in the directory (e.g., `-wal`, `-journal`) +- **Key**: Ensure systemd `User` matches data directory owner + +**Common Usernames:** +- Ubuntu/Debian: `www-data`, `ubuntu` +- CentOS/RHEL: `nginx`, `apache` +- Custom deployment: `deploy`, `app`, current logged-in user + +#### 5. Nginx Reverse Proxy + +```nginx +server { + listen 80; + server_name your-domain.com; + + location / { + proxy_pass http://localhost:5678; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + # Direct access to static files + location /static/ { + alias /opt/huobao-drama/data/storage/; + } +} +``` + +--- + +## 🎨 Tech Stack + +### Backend +- **Language**: Go 1.23+ +- **Web Framework**: Gin 1.9+ +- **ORM**: GORM +- **Database**: SQLite +- **Logging**: Zap +- **Video Processing**: FFmpeg +- **AI Services**: OpenAI, Gemini, Doubao, etc. + +### Frontend +- **Framework**: Vue 3.4+ +- **Language**: TypeScript 5+ +- **Build Tool**: Vite 5 +- **UI Components**: Element Plus +- **CSS Framework**: TailwindCSS +- **State Management**: Pinia +- **Router**: Vue Router 4 + +### Development Tools +- **Package Management**: Go Modules, npm +- **Code Standards**: ESLint, Prettier +- **Version Control**: Git + +--- + +## 📝 FAQ + +### Q: How can Docker containers access Ollama on the host? +A: Use `http://host.docker.internal:11434/v1` as Base URL. Note two things: +1. Host Ollama needs to listen on `0.0.0.0`: `export OLLAMA_HOST=0.0.0.0:11434 && ollama serve` +2. Linux users using `docker run` need to add: `--add-host=host.docker.internal:host-gateway` + +See: [DOCKER_HOST_ACCESS.md](docs/DOCKER_HOST_ACCESS.md) + +### Q: FFmpeg not installed or not found? +A: Ensure FFmpeg is installed and in the PATH environment variable. Verify with `ffmpeg -version`. + +### Q: Frontend cannot connect to backend API? +A: Check if backend is running and port is correct. In development mode, frontend proxy config is in `web/vite.config.ts`. + +### Q: Database tables not created? +A: GORM automatically creates tables on first startup, check logs to confirm migration success. + +--- + +## 📋 Changelog + +### v1.0.2 (2026-01-16) + +#### 🚀 Major Updates +- Pure Go SQLite driver (`modernc.org/sqlite`), supports `CGO_ENABLED=0` cross-platform compilation +- Optimized concurrency performance (WAL mode), resolved "database is locked" errors +- Docker cross-platform support for `host.docker.internal` to access host services +- Streamlined documentation and deployment guides + +### v1.0.1 (2026-01-14) + +#### 🐛 Bug Fixes / 🔧 Improvements +- Fixed video generation API response parsing issues +- Added OpenAI Sora video endpoint configuration +- Optimized error handling and logging + +--- + +## 🤝 Contributing + +Issues and Pull Requests are welcome! + +1. Fork this project +2. Create a feature branch (`git checkout -b feature/AmazingFeature`) +3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) +4. Push to the branch (`git push origin feature/AmazingFeature`) +5. Open a Pull Request + +--- + +## API Configuration Site +Configure in 2 minutes: [API Aggregation Site](https://api.chatfire.site/models) + +## 📧 Contact +Business Contact (WeChat): dangbao1117 + +## Community Group +![Community Group](drama.png) +- Submit [Issue](../../issues) +- Email project maintainers + +--- + +
+ +**⭐ If this project helps you, please give it a Star!** + +## Star History + +[![Star History Chart](https://api.star-history.com/svg?repos=chatfire-AI/huobao-drama&type=date&legend=top-left)](https://www.star-history.com/#chatfire-AI/huobao-drama&type=date&legend=top-left) + +Made with ❤️ by Huobao Team + +
diff --git a/README.md b/README.md index c20a06c..b49889f 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,14 @@ **基于 Go + Vue3 的全栈AI短剧自动化生产平台** - [![Go Version](https://img.shields.io/badge/Go-1.23+-00ADD8?style=flat&logo=go)](https://golang.org) [![Vue Version](https://img.shields.io/badge/Vue-3.x-4FC08D?style=flat&logo=vue.js)](https://vuejs.org) [![License](https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by-nc-sa/4.0/) [功能特性](#功能特性) • [快速开始](#快速开始) • [部署指南](#部署指南) +[简体中文](README.md) | [English](README-EN.md) + --- diff --git a/api/handlers/audio_extraction.go b/api/handlers/audio_extraction.go new file mode 100644 index 0000000..e23d1b1 --- /dev/null +++ b/api/handlers/audio_extraction.go @@ -0,0 +1,88 @@ +package handlers + +import ( + "net/http" + + "github.com/drama-generator/backend/application/services" + "github.com/drama-generator/backend/pkg/logger" + "github.com/gin-gonic/gin" +) + +type AudioExtractionHandler struct { + service *services.AudioExtractionService + log *logger.Logger + dataDir string +} + +func NewAudioExtractionHandler(log *logger.Logger, dataDir string) *AudioExtractionHandler { + return &AudioExtractionHandler{ + service: services.NewAudioExtractionService(log), + log: log, + dataDir: dataDir, + } +} + +// ExtractAudio 提取单个视频的音频 +// @Summary 提取视频音频 +// @Description 从视频URL中提取音频轨道 +// @Tags Audio +// @Accept json +// @Produce json +// @Param request body services.ExtractAudioRequest true "提取请求" +// @Success 200 {object} services.ExtractAudioResponse +// @Router /api/audio/extract [post] +func (h *AudioExtractionHandler) ExtractAudio(c *gin.Context) { + var req services.ExtractAudioRequest + if err := c.ShouldBindJSON(&req); err != nil { + h.log.Errorw("Invalid request body", "error", err) + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body"}) + return + } + + h.log.Infow("Received audio extraction request", "video_url", req.VideoURL) + + result, err := h.service.ExtractAudio(req.VideoURL, h.dataDir) + if err != nil { + h.log.Errorw("Failed to extract audio", "error", err, "video_url", req.VideoURL) + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + c.JSON(http.StatusOK, result) +} + +type BatchExtractAudioRequest struct { + VideoURLs []string `json:"video_urls" binding:"required,min=1"` +} + +// BatchExtractAudio 批量提取音频 +// @Summary 批量提取视频音频 +// @Description 从多个视频URL中提取音频轨道 +// @Tags Audio +// @Accept json +// @Produce json +// @Param request body BatchExtractAudioRequest true "批量提取请求" +// @Success 200 {array} services.ExtractAudioResponse +// @Router /api/audio/extract/batch [post] +func (h *AudioExtractionHandler) BatchExtractAudio(c *gin.Context) { + var req BatchExtractAudioRequest + if err := c.ShouldBindJSON(&req); err != nil { + h.log.Errorw("Invalid request body", "error", err) + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body"}) + return + } + + h.log.Infow("Received batch audio extraction request", "count", len(req.VideoURLs)) + + results, err := h.service.BatchExtractAudio(req.VideoURLs, h.dataDir) + if err != nil { + h.log.Errorw("Failed to batch extract audio", "error", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{ + "results": results, + "total": len(results), + }) +} diff --git a/api/handlers/character_library.go b/api/handlers/character_library.go index aa97936..e5ded4e 100644 --- a/api/handlers/character_library.go +++ b/api/handlers/character_library.go @@ -21,7 +21,7 @@ type CharacterLibraryHandler struct { func NewCharacterLibraryHandler(db *gorm.DB, cfg *config.Config, log *logger.Logger, transferService *services2.ResourceTransferService, localStorage *storage.LocalStorage) *CharacterLibraryHandler { return &CharacterLibraryHandler{ libraryService: services2.NewCharacterLibraryService(db, log), - imageService: services2.NewImageGenerationService(db, transferService, localStorage, log), + imageService: services2.NewImageGenerationService(db, cfg, transferService, localStorage, log), log: log, } } diff --git a/api/handlers/frame_prompt.go b/api/handlers/frame_prompt.go index 33ced9d..37707b0 100644 --- a/api/handlers/frame_prompt.go +++ b/api/handlers/frame_prompt.go @@ -27,24 +27,22 @@ func (h *FramePromptHandler) GenerateFramePrompt(c *gin.Context) { storyboardID := c.Param("id") var req struct { - FrameType string `json:"frame_type" binding:"required"` // first, key, last, panel, action + FrameType string `json:"frame_type"` PanelCount int `json:"panel_count"` + Model string `json:"model"` } - if err := c.ShouldBindJSON(&req); err != nil { - response.BadRequest(c, "Invalid request body") + response.BadRequest(c, err.Error()) return } - // 构建请求 serviceReq := services.GenerateFramePromptRequest{ StoryboardID: storyboardID, FrameType: services.FrameType(req.FrameType), PanelCount: req.PanelCount, } - // 生成提示词 - result, err := h.framePromptService.GenerateFramePrompt(serviceReq) + result, err := h.framePromptService.GenerateFramePrompt(serviceReq, req.Model) if err != nil { h.log.Errorw("Failed to generate frame prompt", "error", err) response.InternalError(c, err.Error()) diff --git a/api/handlers/image_generation.go b/api/handlers/image_generation.go index 7ba5dbf..c595a14 100644 --- a/api/handlers/image_generation.go +++ b/api/handlers/image_generation.go @@ -20,7 +20,7 @@ type ImageGenerationHandler struct { func NewImageGenerationHandler(db *gorm.DB, cfg *config.Config, log *logger.Logger, transferService *services.ResourceTransferService, localStorage *storage.LocalStorage) *ImageGenerationHandler { return &ImageGenerationHandler{ - imageService: services.NewImageGenerationService(db, transferService, localStorage, log), + imageService: services.NewImageGenerationService(db, cfg, transferService, localStorage, log), taskService: services.NewTaskService(db, log), log: log, } @@ -75,6 +75,15 @@ func (h *ImageGenerationHandler) GetBackgroundsForEpisode(c *gin.Context) { func (h *ImageGenerationHandler) ExtractBackgroundsForEpisode(c *gin.Context) { episodeID := c.Param("episode_id") + // 接收可选的 model 参数 + var req struct { + Model string `json:"model"` + } + if err := c.ShouldBindJSON(&req); err != nil { + // 如果没有提供body或者解析失败,使用空字符串(使用默认模型) + req.Model = "" + } + // 创建异步任务 task, err := h.taskService.CreateTask("background_extraction", episodeID) if err != nil { @@ -84,7 +93,7 @@ func (h *ImageGenerationHandler) ExtractBackgroundsForEpisode(c *gin.Context) { } // 启动后台goroutine处理 - go h.processBackgroundExtraction(task.ID, episodeID) + go h.processBackgroundExtraction(task.ID, episodeID, req.Model) // 立即返回任务ID response.Success(c, gin.H{ @@ -95,8 +104,8 @@ func (h *ImageGenerationHandler) ExtractBackgroundsForEpisode(c *gin.Context) { } // processBackgroundExtraction 后台处理场景提取 -func (h *ImageGenerationHandler) processBackgroundExtraction(taskID, episodeID string) { - h.log.Infow("Starting background extraction", "task_id", taskID, "episode_id", episodeID) +func (h *ImageGenerationHandler) processBackgroundExtraction(taskID, episodeID, model string) { + h.log.Infow("Starting background extraction", "task_id", taskID, "episode_id", episodeID, "model", model) // 更新任务状态为处理中 if err := h.taskService.UpdateTaskStatus(taskID, "processing", 10, "开始提取场景..."); err != nil { @@ -104,7 +113,7 @@ func (h *ImageGenerationHandler) processBackgroundExtraction(taskID, episodeID s } // 调用实际的提取逻辑 - backgrounds, err := h.imageService.ExtractBackgroundsForEpisode(episodeID) + backgrounds, err := h.imageService.ExtractBackgroundsForEpisode(episodeID, model) if err != nil { h.log.Errorw("Failed to extract backgrounds", "error", err, "task_id", taskID) if updateErr := h.taskService.UpdateTaskError(taskID, err); updateErr != nil { diff --git a/api/handlers/scene.go b/api/handlers/scene.go index 968cd80..1367992 100644 --- a/api/handlers/scene.go +++ b/api/handlers/scene.go @@ -73,3 +73,19 @@ func (h *SceneHandler) GenerateSceneImage(c *gin.Context) { "image_generation": imageGen, }) } + +func (h *SceneHandler) DeleteScene(c *gin.Context) { + sceneID := c.Param("scene_id") + + if err := h.sceneService.DeleteScene(sceneID); err != nil { + h.log.Errorw("Failed to delete scene", "error", err, "scene_id", sceneID) + if err.Error() == "scene not found" { + response.NotFound(c, "场景不存在") + return + } + response.InternalError(c, err.Error()) + return + } + + response.Success(c, gin.H{"message": "场景已删除"}) +} diff --git a/api/handlers/script_generation.go b/api/handlers/script_generation.go index 6264959..e197ce1 100644 --- a/api/handlers/script_generation.go +++ b/api/handlers/script_generation.go @@ -17,30 +17,12 @@ type ScriptGenerationHandler struct { func NewScriptGenerationHandler(db *gorm.DB, cfg *config.Config, log *logger.Logger) *ScriptGenerationHandler { return &ScriptGenerationHandler{ - scriptService: services.NewScriptGenerationService(db, log), + scriptService: services.NewScriptGenerationService(db, cfg, log), taskService: services.NewTaskService(db, log), log: log, } } -func (h *ScriptGenerationHandler) GenerateOutline(c *gin.Context) { - - var req services.GenerateOutlineRequest - if err := c.ShouldBindJSON(&req); err != nil { - response.BadRequest(c, err.Error()) - return - } - - result, err := h.scriptService.GenerateOutline(&req) - if err != nil { - h.log.Errorw("Failed to generate outline", "error", err) - response.InternalError(c, err.Error()) - return - } - - response.Success(c, result) -} - func (h *ScriptGenerationHandler) GenerateCharacters(c *gin.Context) { var req services.GenerateCharactersRequest if err := c.ShouldBindJSON(&req); err != nil { @@ -56,8 +38,11 @@ func (h *ScriptGenerationHandler) GenerateCharacters(c *gin.Context) { return } + // 复制req值,避免goroutine中使用指针导致的并发问题 + reqCopy := req + // 启动后台goroutine处理 - go h.processCharacterGeneration(task.ID, &req) + go h.processCharacterGeneration(task.ID, &reqCopy) // 立即返回任务ID response.Success(c, gin.H{ @@ -98,21 +83,3 @@ func (h *ScriptGenerationHandler) processCharacterGeneration(taskID string, req h.log.Infow("Character generation completed", "task_id", taskID, "total", len(characters)) } - -func (h *ScriptGenerationHandler) GenerateEpisodes(c *gin.Context) { - - var req services.GenerateEpisodesRequest - if err := c.ShouldBindJSON(&req); err != nil { - response.BadRequest(c, err.Error()) - return - } - - episodes, err := h.scriptService.GenerateEpisodes(&req) - if err != nil { - h.log.Errorw("Failed to generate episodes", "error", err) - response.InternalError(c, err.Error()) - return - } - - response.Success(c, episodes) -} diff --git a/api/handlers/settings.go b/api/handlers/settings.go new file mode 100644 index 0000000..91d15f5 --- /dev/null +++ b/api/handlers/settings.go @@ -0,0 +1,67 @@ +package handlers + +import ( + "github.com/drama-generator/backend/pkg/config" + "github.com/drama-generator/backend/pkg/logger" + "github.com/drama-generator/backend/pkg/response" + "github.com/gin-gonic/gin" + "github.com/spf13/viper" +) + +type SettingsHandler struct { + config *config.Config + log *logger.Logger +} + +func NewSettingsHandler(cfg *config.Config, log *logger.Logger) *SettingsHandler { + return &SettingsHandler{ + config: cfg, + log: log, + } +} + +// GetLanguage 获取当前系统语言 +func (h *SettingsHandler) GetLanguage(c *gin.Context) { + language := h.config.App.Language + if language == "" { + language = "zh" // 默认中文 + } + + response.Success(c, gin.H{ + "language": language, + }) +} + +// UpdateLanguage 更新系统语言 +func (h *SettingsHandler) UpdateLanguage(c *gin.Context) { + var req struct { + Language string `json:"language" binding:"required,oneof=zh en"` + } + + if err := c.ShouldBindJSON(&req); err != nil { + response.BadRequest(c, "语言参数错误,只支持 zh 或 en") + return + } + + // 更新内存中的配置 + h.config.App.Language = req.Language + + // 更新配置文件 + viper.Set("app.language", req.Language) + if err := viper.WriteConfig(); err != nil { + h.log.Warnw("Failed to write config file", "error", err) + // 即使写入文件失败,内存配置也已更新,仍然可用 + } + + h.log.Infow("System language updated", "language", req.Language) + + message := "语言已切换为中文" + if req.Language == "en" { + message = "Language switched to English" + } + + response.Success(c, gin.H{ + "message": message, + "language": req.Language, + }) +} diff --git a/api/handlers/storyboard.go b/api/handlers/storyboard.go index 2dc1fda..2be974e 100644 --- a/api/handlers/storyboard.go +++ b/api/handlers/storyboard.go @@ -17,7 +17,7 @@ type StoryboardHandler struct { func NewStoryboardHandler(db *gorm.DB, cfg *config.Config, log *logger.Logger) *StoryboardHandler { return &StoryboardHandler{ - storyboardService: services.NewStoryboardService(db, log), + storyboardService: services.NewStoryboardService(db, cfg, log), taskService: services.NewTaskService(db, log), log: log, } @@ -27,6 +27,15 @@ func NewStoryboardHandler(db *gorm.DB, cfg *config.Config, log *logger.Logger) * func (h *StoryboardHandler) GenerateStoryboard(c *gin.Context) { episodeID := c.Param("episode_id") + // 接收可选的 model 参数 + var req struct { + Model string `json:"model"` + } + if err := c.ShouldBindJSON(&req); err != nil { + // 如果没有提供body或者解析失败,使用空字符串(使用默认模型) + req.Model = "" + } + // 创建异步任务 task, err := h.taskService.CreateTask("storyboard_generation", episodeID) if err != nil { @@ -36,19 +45,19 @@ func (h *StoryboardHandler) GenerateStoryboard(c *gin.Context) { } // 启动后台goroutine处理 - go h.processStoryboardGeneration(task.ID, episodeID) + go h.processStoryboardGeneration(task.ID, episodeID, req.Model) // 立即返回任务ID response.Success(c, gin.H{ "task_id": task.ID, "status": "pending", - "message": "分镜生成任务已创建,正在后台处理...", + "message": "分镜头生成任务已创建,正在后台处理...", }) } // processStoryboardGeneration 后台处理分镜生成 -func (h *StoryboardHandler) processStoryboardGeneration(taskID, episodeID string) { - h.log.Infow("Starting storyboard generation", "task_id", taskID, "episode_id", episodeID) +func (h *StoryboardHandler) processStoryboardGeneration(taskID, episodeID, model string) { + h.log.Infow("Starting storyboard generation", "task_id", taskID, "episode_id", episodeID, "model", model) // 更新任务状态为处理中 if err := h.taskService.UpdateTaskStatus(taskID, "processing", 10, "开始生成分镜..."); err != nil { @@ -56,7 +65,7 @@ func (h *StoryboardHandler) processStoryboardGeneration(taskID, episodeID string } // 调用实际的生成逻辑 - result, err := h.storyboardService.GenerateStoryboard(episodeID) + result, err := h.storyboardService.GenerateStoryboard(episodeID, model) if err != nil { h.log.Errorw("Failed to generate storyboard", "error", err, "task_id", taskID) if updateErr := h.taskService.UpdateTaskError(taskID, err); updateErr != nil { diff --git a/api/middlewares/ratelimit.go b/api/middlewares/ratelimit.go index c4cdfbc..d2a3e21 100644 --- a/api/middlewares/ratelimit.go +++ b/api/middlewares/ratelimit.go @@ -17,7 +17,7 @@ type rateLimiter struct { var limiter = &rateLimiter{ requests: make(map[string][]time.Time), - limit: 100, + limit: 2000, // 每分钟最多 2000 次请求 window: time.Minute, } diff --git a/api/routes/routes.go b/api/routes/routes.go index 00ec503..295e7d4 100644 --- a/api/routes/routes.go +++ b/api/routes/routes.go @@ -35,7 +35,7 @@ func SetupRouter(cfg *config.Config, db *gorm.DB, log *logger.Logger, localStora dramaHandler := handlers2.NewDramaHandler(db, cfg, log, nil) aiConfigHandler := handlers2.NewAIConfigHandler(db, cfg, log) scriptGenHandler := handlers2.NewScriptGenerationHandler(db, cfg, log) - imageGenService := services2.NewImageGenerationService(db, transferService, localStoragePtr, log) + imageGenService := services2.NewImageGenerationService(db, cfg, transferService, localStoragePtr, log) imageGenHandler := handlers2.NewImageGenerationHandler(db, cfg, log, transferService, localStoragePtr) videoGenHandler := handlers2.NewVideoGenerationHandler(db, transferService, localStoragePtr, aiService, log) videoMergeHandler := handlers2.NewVideoMergeHandler(db, nil, cfg.Storage.LocalPath, cfg.Storage.BaseURL, log) @@ -49,8 +49,10 @@ func SetupRouter(cfg *config.Config, db *gorm.DB, log *logger.Logger, localStora storyboardHandler := handlers2.NewStoryboardHandler(db, cfg, log) sceneHandler := handlers2.NewSceneHandler(db, log, imageGenService) taskHandler := handlers2.NewTaskHandler(db, log) - framePromptService := services2.NewFramePromptService(db, log) + framePromptService := services2.NewFramePromptService(db, cfg, log) framePromptHandler := handlers2.NewFramePromptHandler(framePromptService, log) + audioExtractionHandler := handlers2.NewAudioExtractionHandler(log, cfg.Storage.LocalPath) + settingsHandler := handlers2.NewSettingsHandler(cfg, log) api := r.Group("/api/v1") { @@ -83,9 +85,7 @@ func SetupRouter(cfg *config.Config, db *gorm.DB, log *logger.Logger, localStora generation := api.Group("/generation") { - generation.POST("/outline", scriptGenHandler.GenerateOutline) generation.POST("/characters", scriptGenHandler.GenerateCharacters) - generation.POST("/episodes", scriptGenHandler.GenerateEpisodes) } // 角色库路由 @@ -137,6 +137,7 @@ func SetupRouter(cfg *config.Config, db *gorm.DB, log *logger.Logger, localStora scenes := api.Group("/scenes") { scenes.PUT("/:scene_id", sceneHandler.UpdateScene) + scenes.DELETE("/:scene_id", sceneHandler.DeleteScene) scenes.POST("/generate-image", sceneHandler.GenerateSceneImage) } @@ -187,6 +188,18 @@ func SetupRouter(cfg *config.Config, db *gorm.DB, log *logger.Logger, localStora storyboards.POST("/:id/frame-prompt", framePromptHandler.GenerateFramePrompt) storyboards.GET("/:id/frame-prompts", handlers2.GetStoryboardFramePrompts(db, log)) } + + audio := api.Group("/audio") + { + audio.POST("/extract", audioExtractionHandler.ExtractAudio) + audio.POST("/extract/batch", audioExtractionHandler.BatchExtractAudio) + } + + settings := api.Group("/settings") + { + settings.GET("/language", settingsHandler.GetLanguage) + settings.PUT("/language", settingsHandler.UpdateLanguage) + } } // 前端静态文件服务(放在API路由之后,避免冲突) diff --git a/application/services/ai_service.go b/application/services/ai_service.go index 2dc868e..29b751e 100644 --- a/application/services/ai_service.go +++ b/application/services/ai_service.go @@ -317,14 +317,14 @@ func (s *AIService) TestConnection(req *TestConnectionRequest) error { func (s *AIService) GetDefaultConfig(serviceType string) (*models.AIServiceConfig, error) { var config models.AIServiceConfig - // 按优先级降序获取第一个配置 - err := s.db.Where("service_type = ?", serviceType). + // 按优先级降序获取第一个激活的配置 + err := s.db.Where("service_type = ? AND is_active = ?", serviceType, true). Order("priority DESC, created_at DESC"). First(&config).Error if err != nil { if errors.Is(err, gorm.ErrRecordNotFound) { - return nil, errors.New("no config found") + return nil, errors.New("no active config found") } return nil, err } @@ -332,10 +332,10 @@ func (s *AIService) GetDefaultConfig(serviceType string) (*models.AIServiceConfi return &config, nil } -// GetConfigForModel 根据服务类型和模型名称获取优先级最高的配置 +// GetConfigForModel 根据服务类型和模型名称获取优先级最高的激活配置 func (s *AIService) GetConfigForModel(serviceType string, modelName string) (*models.AIServiceConfig, error) { var configs []models.AIServiceConfig - err := s.db.Where("service_type = ?", serviceType). + err := s.db.Where("service_type = ? AND is_active = ?", serviceType, true). Order("priority DESC, created_at DESC"). Find(&configs).Error @@ -352,7 +352,7 @@ func (s *AIService) GetConfigForModel(serviceType string, modelName string) (*mo } } - return nil, errors.New("no config found for model: " + modelName) + return nil, errors.New("no active config found for model: " + modelName) } func (s *AIService) GetAIClient(serviceType string) (ai.AIClient, error) { @@ -388,6 +388,34 @@ func (s *AIService) GetAIClient(serviceType string) (ai.AIClient, error) { } } +// GetAIClientForModel 根据服务类型和模型名称获取对应的AI客户端 +func (s *AIService) GetAIClientForModel(serviceType string, modelName string) (ai.AIClient, error) { + config, err := s.GetConfigForModel(serviceType, modelName) + if err != nil { + return nil, err + } + + // 使用数据库配置中的 endpoint,如果为空则根据 provider 设置默认值 + endpoint := config.Endpoint + if endpoint == "" { + switch config.Provider { + case "gemini", "google": + endpoint = "/v1beta/models/{model}:generateContent" + default: + endpoint = "/chat/completions" + } + } + + // 根据 provider 创建对应的客户端 + switch config.Provider { + case "gemini", "google": + return ai.NewGeminiClient(config.BaseURL, config.APIKey, modelName, endpoint), nil + default: + // openai, chatfire 等其他厂商都使用 OpenAI 格式 + return ai.NewOpenAIClient(config.BaseURL, config.APIKey, modelName, endpoint), nil + } +} + func (s *AIService) GenerateText(prompt string, systemPrompt string, options ...func(*ai.ChatCompletionRequest)) (string, error) { client, err := s.GetAIClient("text") if err != nil { diff --git a/application/services/asset_duration_update.go b/application/services/asset_duration_update.go new file mode 100644 index 0000000..05dc884 --- /dev/null +++ b/application/services/asset_duration_update.go @@ -0,0 +1,66 @@ +package services + +import ( + "fmt" + + models "github.com/drama-generator/backend/domain/models" + "github.com/drama-generator/backend/infrastructure/storage" +) + +// UpdateAssetDurationFromFile 从本地文件探测并更新视频Asset的时长 +func (s *AssetService) UpdateAssetDurationFromFile(assetID uint, localFilePath string) error { + var asset models.Asset + if err := s.db.Where("id = ?", assetID).First(&asset).Error; err != nil { + return fmt.Errorf("asset not found") + } + + if asset.Type != models.AssetTypeVideo { + return fmt.Errorf("asset is not a video") + } + + if s.ffmpeg == nil { + return fmt.Errorf("ffmpeg not available") + } + + duration, err := s.ffmpeg.GetVideoDuration(localFilePath) + if err != nil { + return fmt.Errorf("failed to probe video duration: %w", err) + } + + durationInt := int(duration + 0.5) + if err := s.db.Model(&asset).Update("duration", durationInt).Error; err != nil { + return fmt.Errorf("failed to update duration: %w", err) + } + + s.log.Infow("Updated asset duration from file", + "asset_id", assetID, + "duration", durationInt, + "file", localFilePath) + + return nil +} + +// UpdateAssetDurationFromURL 下载视频并探测时长 +func (s *AssetService) UpdateAssetDurationFromURL(assetID uint, localStorage *storage.LocalStorage) error { + var asset models.Asset + if err := s.db.Where("id = ?", assetID).First(&asset).Error; err != nil { + return fmt.Errorf("asset not found") + } + + if asset.Type != models.AssetTypeVideo { + return fmt.Errorf("asset is not a video") + } + + if localStorage == nil { + return fmt.Errorf("local storage not available") + } + + // 下载视频到本地 + localPath, err := localStorage.DownloadFromURL(asset.URL, "videos") + if err != nil { + return fmt.Errorf("failed to download video: %w", err) + } + + // 探测时长 + return s.UpdateAssetDurationFromFile(assetID, localPath) +} diff --git a/application/services/asset_service.go b/application/services/asset_service.go index 3171bee..c630c5c 100644 --- a/application/services/asset_service.go +++ b/application/services/asset_service.go @@ -6,19 +6,22 @@ import ( "strings" models "github.com/drama-generator/backend/domain/models" + "github.com/drama-generator/backend/infrastructure/external/ffmpeg" "github.com/drama-generator/backend/pkg/logger" "gorm.io/gorm" ) type AssetService struct { - db *gorm.DB - log *logger.Logger + db *gorm.DB + log *logger.Logger + ffmpeg *ffmpeg.FFmpeg } func NewAssetService(db *gorm.DB, log *logger.Logger) *AssetService { return &AssetService{ - db: db, - log: log, + db: db, + log: log, + ffmpeg: ffmpeg.NewFFmpeg(log), } } diff --git a/application/services/audio_extraction_service.go b/application/services/audio_extraction_service.go new file mode 100644 index 0000000..9d92648 --- /dev/null +++ b/application/services/audio_extraction_service.go @@ -0,0 +1,97 @@ +package services + +import ( + "fmt" + "path/filepath" + "time" + + "github.com/drama-generator/backend/infrastructure/external/ffmpeg" + "github.com/drama-generator/backend/pkg/logger" +) + +type AudioExtractionService struct { + ffmpeg *ffmpeg.FFmpeg + log *logger.Logger +} + +func NewAudioExtractionService(log *logger.Logger) *AudioExtractionService { + return &AudioExtractionService{ + ffmpeg: ffmpeg.NewFFmpeg(log), + log: log, + } +} + +type ExtractAudioRequest struct { + VideoURL string `json:"video_url" binding:"required"` +} + +type ExtractAudioResponse struct { + AudioURL string `json:"audio_url"` + Duration float64 `json:"duration"` +} + +// ExtractAudio 从视频URL提取音频并返回音频文件URL +func (s *AudioExtractionService) ExtractAudio(videoURL string, dataDir string) (*ExtractAudioResponse, error) { + s.log.Infow("Starting audio extraction", "video_url", videoURL) + + // 生成输出文件名 + timestamp := time.Now().Unix() + audioFileName := fmt.Sprintf("audio_%d.aac", timestamp) + audioOutputPath := filepath.Join(dataDir, "audios", audioFileName) + + // 提取音频 + extractedPath, err := s.ffmpeg.ExtractAudio(videoURL, audioOutputPath) + if err != nil { + s.log.Errorw("Failed to extract audio", "error", err, "video_url", videoURL) + return nil, fmt.Errorf("failed to extract audio: %w", err) + } + + // 获取音频时长(使用提取后的本地文件路径) + duration, err := s.ffmpeg.GetVideoDuration(extractedPath) + if err != nil { + s.log.Errorw("Failed to get audio duration", "error", err, "path", extractedPath) + return nil, fmt.Errorf("failed to get audio duration: %w", err) + } + + if duration <= 0 { + s.log.Errorw("Invalid audio duration", "duration", duration, "path", extractedPath) + return nil, fmt.Errorf("invalid audio duration: %.2f", duration) + } + + // 构建音频URL(相对于data目录) + audioURL := fmt.Sprintf("/data/audios/%s", audioFileName) + + s.log.Infow("Audio extraction completed", + "video_url", videoURL, + "audio_url", audioURL, + "duration", duration, + "local_path", extractedPath) + + return &ExtractAudioResponse{ + AudioURL: audioURL, + Duration: duration, + }, nil +} + +// BatchExtractAudio 批量提取音频 +func (s *AudioExtractionService) BatchExtractAudio(videoURLs []string, dataDir string) ([]*ExtractAudioResponse, error) { + s.log.Infow("Starting batch audio extraction", "count", len(videoURLs)) + + results := make([]*ExtractAudioResponse, 0, len(videoURLs)) + + for i, videoURL := range videoURLs { + s.log.Infow("Extracting audio", "index", i+1, "total", len(videoURLs), "video_url", videoURL) + + result, err := s.ExtractAudio(videoURL, dataDir) + if err != nil { + s.log.Errorw("Failed to extract audio in batch", "index", i, "video_url", videoURL, "error", err) + // 继续处理其他视频,但记录错误 + return nil, fmt.Errorf("failed to extract audio at index %d: %w", i, err) + } + + results = append(results, result) + } + + s.log.Infow("Batch audio extraction completed", "successful_count", len(results)) + return results, nil +} diff --git a/application/services/frame_prompt_service.go b/application/services/frame_prompt_service.go index fe384ce..a3092a2 100644 --- a/application/services/frame_prompt_service.go +++ b/application/services/frame_prompt_service.go @@ -5,23 +5,28 @@ import ( "strings" "github.com/drama-generator/backend/domain/models" + "github.com/drama-generator/backend/pkg/config" "github.com/drama-generator/backend/pkg/logger" "gorm.io/gorm" ) // FramePromptService 处理帧提示词生成 type FramePromptService struct { - db *gorm.DB - aiService *AIService - log *logger.Logger + db *gorm.DB + aiService *AIService + log *logger.Logger + config *config.Config + promptI18n *PromptI18n } // NewFramePromptService 创建帧提示词服务 -func NewFramePromptService(db *gorm.DB, log *logger.Logger) *FramePromptService { +func NewFramePromptService(db *gorm.DB, cfg *config.Config, log *logger.Logger) *FramePromptService { return &FramePromptService{ - db: db, - aiService: NewAIService(db, log), - log: log, + db: db, + aiService: NewAIService(db, log), + log: log, + config: cfg, + promptI18n: NewPromptI18n(cfg), } } @@ -64,7 +69,7 @@ type MultiFramePrompt struct { } // GenerateFramePrompt 生成指定类型的帧提示词并保存到frame_prompts表 -func (s *FramePromptService) GenerateFramePrompt(req GenerateFramePromptRequest) (*FramePromptResponse, error) { +func (s *FramePromptService) GenerateFramePrompt(req GenerateFramePromptRequest, model string) (*FramePromptResponse, error) { // 查询分镜信息 var storyboard models.Storyboard if err := s.db.Preload("Characters").First(&storyboard, req.StoryboardID).Error; err != nil { @@ -88,21 +93,21 @@ func (s *FramePromptService) GenerateFramePrompt(req GenerateFramePromptRequest) // 生成提示词 switch req.FrameType { case FrameTypeFirst: - response.SingleFrame = s.generateFirstFrame(storyboard, scene) + response.SingleFrame = s.generateFirstFrame(storyboard, scene, model) // 保存单帧提示词 s.saveFramePrompt(req.StoryboardID, string(req.FrameType), response.SingleFrame.Prompt, response.SingleFrame.Description, "") case FrameTypeKey: - response.SingleFrame = s.generateKeyFrame(storyboard, scene) + response.SingleFrame = s.generateKeyFrame(storyboard, scene, model) s.saveFramePrompt(req.StoryboardID, string(req.FrameType), response.SingleFrame.Prompt, response.SingleFrame.Description, "") case FrameTypeLast: - response.SingleFrame = s.generateLastFrame(storyboard, scene) + response.SingleFrame = s.generateLastFrame(storyboard, scene, model) s.saveFramePrompt(req.StoryboardID, string(req.FrameType), response.SingleFrame.Prompt, response.SingleFrame.Description, "") case FrameTypePanel: count := req.PanelCount if count == 0 { count = 3 } - response.MultiFrame = s.generatePanelFrames(storyboard, scene, count) + response.MultiFrame = s.generatePanelFrames(storyboard, scene, count, model) // 保存多帧提示词(合并为一条记录) var prompts []string for _, frame := range response.MultiFrame.Frames { @@ -111,7 +116,7 @@ func (s *FramePromptService) GenerateFramePrompt(req GenerateFramePromptRequest) combinedPrompt := strings.Join(prompts, "\n---\n") s.saveFramePrompt(req.StoryboardID, string(req.FrameType), combinedPrompt, "分镜板组合提示词", response.MultiFrame.Layout) case FrameTypeAction: - response.MultiFrame = s.generateActionSequence(storyboard, scene) + response.MultiFrame = s.generateActionSequence(storyboard, scene, model) var prompts []string for _, frame := range response.MultiFrame.Frames { prompts = append(prompts, frame.Prompt) @@ -157,33 +162,28 @@ func mustParseUint(s string) uint64 { } // generateFirstFrame 生成首帧提示词 -func (s *FramePromptService) generateFirstFrame(sb models.Storyboard, scene *models.Scene) *SingleFramePrompt { +func (s *FramePromptService) generateFirstFrame(sb models.Storyboard, scene *models.Scene, model string) *SingleFramePrompt { // 构建上下文信息 contextInfo := s.buildStoryboardContext(sb, scene) - // 构建AI提示词 - systemPrompt := `你是一个专业的图像生成提示词专家。请根据提供的镜头信息,生成适合用于AI图像生成的提示词。 + // 使用国际化提示词 + systemPrompt := s.promptI18n.GetFirstFramePrompt() + userPrompt := s.promptI18n.FormatUserPrompt("frame_info", contextInfo) -重要:这是镜头的首帧 - 一个完全静态的画面,展示动作发生之前的初始状态。 - -要求: -1. 直接输出提示词,不要任何解释说明 -2. 可以使用中文或英文,用逗号分隔关键词 -3. 只描述静态视觉元素:场景环境、角色姿态、表情、氛围、光线 -4. 不要包含任何动作动词(如:猛然、弹起、坐直、抓住等) -5. 描述角色处于动作发生前的状态(如:躺在床上、站立、坐着等静态姿态) -6. 适合动画风格(anime style) - -示例格式: -Anime style, 城市公寓卧室, 凌晨, 昏暗房间, 床上, 年轻男子躺着, 表情平静, 闭眼睡眠, 柔和光线, 静谧氛围, 中景, 平视` - - userPrompt := fmt.Sprintf(`镜头信息: -%s - -请直接生成首帧的图像提示词,不要任何解释:`, contextInfo) - - // 调用AI生成 - prompt, err := s.aiService.GenerateText(userPrompt, systemPrompt) + // 调用AI生成(如果指定了模型则使用指定的模型) + var prompt string + var err error + if model != "" { + client, getErr := s.aiService.GetAIClientForModel("text", model) + if getErr != nil { + s.log.Warnw("Failed to get client for specified model, using default", "model", model, "error", getErr) + prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt) + } else { + prompt, err = client.GenerateText(userPrompt, systemPrompt) + } + } else { + prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt) + } if err != nil { s.log.Warnw("AI generation failed, using fallback", "error", err) // 降级方案:使用简单拼接 @@ -204,33 +204,28 @@ Anime style, 城市公寓卧室, 凌晨, 昏暗房间, 床上, 年轻男子躺 } // generateKeyFrame 生成关键帧提示词 -func (s *FramePromptService) generateKeyFrame(sb models.Storyboard, scene *models.Scene) *SingleFramePrompt { +func (s *FramePromptService) generateKeyFrame(sb models.Storyboard, scene *models.Scene, model string) *SingleFramePrompt { // 构建上下文信息 contextInfo := s.buildStoryboardContext(sb, scene) - // 构建AI提示词 - systemPrompt := `你是一个专业的图像生成提示词专家。请根据提供的镜头信息,生成适合用于AI图像生成的提示词。 + // 使用国际化提示词 + systemPrompt := s.promptI18n.GetKeyFramePrompt() + userPrompt := s.promptI18n.FormatUserPrompt("key_frame_info", contextInfo) -重要:这是镜头的关键帧 - 捕捉动作最激烈、最精彩的瞬间。 - -要求: -1. 直接输出提示词,不要任何解释说明 -2. 可以使用中文或英文,用逗号分隔关键词 -3. 重点描述动作的高潮瞬间:身体姿态、运动轨迹、力量感 -4. 包含动态元素:动作模糊、速度线、冲击感 -5. 强调表情和情绪的极致状态 -6. 适合动画风格(anime style) - -示例格式: -Anime style, 城市街道, 白天, 男子全力冲刺, 身体前倾, 动作模糊, 速度线, 汗水飞溅, 表情坚毅, 紧张氛围, 动态镜头, 中景` - - userPrompt := fmt.Sprintf(`镜头信息: -%s - -请直接生成关键帧的图像提示词,不要任何解释:`, contextInfo) - - // 调用AI生成 - prompt, err := s.aiService.GenerateText(userPrompt, systemPrompt) + // 调用AI生成(如果指定了模型则使用指定的模型) + var prompt string + var err error + if model != "" { + client, getErr := s.aiService.GetAIClientForModel("text", model) + if getErr != nil { + s.log.Warnw("Failed to get client for specified model, using default", "model", model, "error", getErr) + prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt) + } else { + prompt, err = client.GenerateText(userPrompt, systemPrompt) + } + } else { + prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt) + } if err != nil { s.log.Warnw("AI generation failed, using fallback", "error", err) prompt = s.buildFallbackPrompt(sb, scene, "key frame, dynamic action") @@ -250,33 +245,28 @@ Anime style, 城市街道, 白天, 男子全力冲刺, 身体前倾, 动作模 } // generateLastFrame 生成尾帧提示词 -func (s *FramePromptService) generateLastFrame(sb models.Storyboard, scene *models.Scene) *SingleFramePrompt { +func (s *FramePromptService) generateLastFrame(sb models.Storyboard, scene *models.Scene, model string) *SingleFramePrompt { // 构建上下文信息 contextInfo := s.buildStoryboardContext(sb, scene) - // 构建AI提示词 - systemPrompt := `你是一个专业的图像生成提示词专家。请根据提供的镜头信息,生成适合用于AI图像生成的提示词。 + // 使用国际化提示词 + systemPrompt := s.promptI18n.GetLastFramePrompt() + userPrompt := s.promptI18n.FormatUserPrompt("last_frame_info", contextInfo) -重要:这是镜头的尾帧 - 一个静态画面,展示动作结束后的最终状态和结果。 - -要求: -1. 直接输出提示词,不要任何解释说明 -2. 可以使用中文或英文,用逗号分隔关键词 -3. 只描述静态的最终状态:角色姿态、表情、环境变化 -4. 不要包含动作过程,只展示动作的结果和余韵 -5. 强调情绪的余波和氛围的沉淀 -6. 适合动画风格(anime style) - -示例格式: -Anime style, 房间内, 黄昏, 男子坐在椅子上, 身体放松, 表情疲惫, 长出一口气, 汗水滴落, 平静氛围, 静态镜头, 中景` - - userPrompt := fmt.Sprintf(`镜头信息: -%s - -请直接生成尾帧的图像提示词,不要任何解释:`, contextInfo) - - // 调用AI生成 - prompt, err := s.aiService.GenerateText(userPrompt, systemPrompt) + // 调用AI生成(如果指定了模型则使用指定的模型) + var prompt string + var err error + if model != "" { + client, getErr := s.aiService.GetAIClientForModel("text", model) + if getErr != nil { + s.log.Warnw("Failed to get client for specified model, using default", "model", model, "error", getErr) + prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt) + } else { + prompt, err = client.GenerateText(userPrompt, systemPrompt) + } + } else { + prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt) + } if err != nil { s.log.Warnw("AI generation failed, using fallback", "error", err) prompt = s.buildFallbackPrompt(sb, scene, "last frame, final state") @@ -296,27 +286,27 @@ Anime style, 房间内, 黄昏, 男子坐在椅子上, 身体放松, 表情疲 } // generatePanelFrames 生成分镜板(多格组合) -func (s *FramePromptService) generatePanelFrames(sb models.Storyboard, scene *models.Scene, count int) *MultiFramePrompt { +func (s *FramePromptService) generatePanelFrames(sb models.Storyboard, scene *models.Scene, count int, model string) *MultiFramePrompt { layout := fmt.Sprintf("horizontal_%d", count) frames := make([]SingleFramePrompt, count) // 固定生成:首帧 -> 关键帧 -> 尾帧 if count == 3 { - frames[0] = *s.generateFirstFrame(sb, scene) + frames[0] = *s.generateFirstFrame(sb, scene, model) frames[0].Description = "第1格:初始状态" - frames[1] = *s.generateKeyFrame(sb, scene) + frames[1] = *s.generateKeyFrame(sb, scene, model) frames[1].Description = "第2格:动作高潮" - frames[2] = *s.generateLastFrame(sb, scene) + frames[2] = *s.generateLastFrame(sb, scene, model) frames[2].Description = "第3格:最终状态" } else if count == 4 { // 4格:首帧 -> 中间帧1 -> 中间帧2 -> 尾帧 - frames[0] = *s.generateFirstFrame(sb, scene) - frames[1] = *s.generateKeyFrame(sb, scene) - frames[2] = *s.generateKeyFrame(sb, scene) - frames[3] = *s.generateLastFrame(sb, scene) + frames[0] = *s.generateFirstFrame(sb, scene, model) + frames[1] = *s.generateKeyFrame(sb, scene, model) + frames[2] = *s.generateKeyFrame(sb, scene, model) + frames[3] = *s.generateLastFrame(sb, scene, model) } return &MultiFramePrompt{ @@ -326,16 +316,16 @@ func (s *FramePromptService) generatePanelFrames(sb models.Storyboard, scene *mo } // generateActionSequence 生成动作序列(5-8格) -func (s *FramePromptService) generateActionSequence(sb models.Storyboard, scene *models.Scene) *MultiFramePrompt { +func (s *FramePromptService) generateActionSequence(sb models.Storyboard, scene *models.Scene, model string) *MultiFramePrompt { // 将动作分解为5个步骤 frames := make([]SingleFramePrompt, 5) // 简化实现:均匀分布从首帧到尾帧 - frames[0] = *s.generateFirstFrame(sb, scene) - frames[1] = *s.generateKeyFrame(sb, scene) - frames[2] = *s.generateKeyFrame(sb, scene) - frames[3] = *s.generateKeyFrame(sb, scene) - frames[4] = *s.generateLastFrame(sb, scene) + frames[0] = *s.generateFirstFrame(sb, scene, model) + frames[1] = *s.generateKeyFrame(sb, scene, model) + frames[2] = *s.generateKeyFrame(sb, scene, model) + frames[3] = *s.generateKeyFrame(sb, scene, model) + frames[4] = *s.generateLastFrame(sb, scene, model) return &MultiFramePrompt{ Layout: "horizontal_5", @@ -349,14 +339,14 @@ func (s *FramePromptService) buildStoryboardContext(sb models.Storyboard, scene // 镜头描述(最重要) if sb.Description != nil && *sb.Description != "" { - parts = append(parts, fmt.Sprintf("镜头描述: %s", *sb.Description)) + parts = append(parts, s.promptI18n.FormatUserPrompt("shot_description_label", *sb.Description)) } // 场景信息 if scene != nil { - parts = append(parts, fmt.Sprintf("场景: %s, %s", scene.Location, scene.Time)) + parts = append(parts, s.promptI18n.FormatUserPrompt("scene_label", scene.Location, scene.Time)) } else if sb.Location != nil && sb.Time != nil { - parts = append(parts, fmt.Sprintf("场景: %s, %s", *sb.Location, *sb.Time)) + parts = append(parts, s.promptI18n.FormatUserPrompt("scene_label", *sb.Location, *sb.Time)) } // 角色 @@ -365,38 +355,38 @@ func (s *FramePromptService) buildStoryboardContext(sb models.Storyboard, scene for _, char := range sb.Characters { charNames = append(charNames, char.Name) } - parts = append(parts, fmt.Sprintf("角色: %s", strings.Join(charNames, ", "))) + parts = append(parts, s.promptI18n.FormatUserPrompt("characters_label", strings.Join(charNames, ", "))) } // 动作 if sb.Action != nil && *sb.Action != "" { - parts = append(parts, fmt.Sprintf("动作: %s", *sb.Action)) + parts = append(parts, s.promptI18n.FormatUserPrompt("action_label", *sb.Action)) } // 结果 if sb.Result != nil && *sb.Result != "" { - parts = append(parts, fmt.Sprintf("结果: %s", *sb.Result)) + parts = append(parts, s.promptI18n.FormatUserPrompt("result_label", *sb.Result)) } // 对白 if sb.Dialogue != nil && *sb.Dialogue != "" { - parts = append(parts, fmt.Sprintf("对白: %s", *sb.Dialogue)) + parts = append(parts, s.promptI18n.FormatUserPrompt("dialogue_label", *sb.Dialogue)) } // 氛围 if sb.Atmosphere != nil && *sb.Atmosphere != "" { - parts = append(parts, fmt.Sprintf("氛围: %s", *sb.Atmosphere)) + parts = append(parts, s.promptI18n.FormatUserPrompt("atmosphere_label", *sb.Atmosphere)) } // 镜头参数 if sb.ShotType != nil { - parts = append(parts, fmt.Sprintf("景别: %s", *sb.ShotType)) + parts = append(parts, s.promptI18n.FormatUserPrompt("shot_type_label", *sb.ShotType)) } if sb.Angle != nil { - parts = append(parts, fmt.Sprintf("角度: %s", *sb.Angle)) + parts = append(parts, s.promptI18n.FormatUserPrompt("angle_label", *sb.Angle)) } if sb.Movement != nil { - parts = append(parts, fmt.Sprintf("运镜: %s", *sb.Movement)) + parts = append(parts, s.promptI18n.FormatUserPrompt("movement_label", *sb.Movement)) } return strings.Join(parts, "\n") diff --git a/application/services/image_generation_service.go b/application/services/image_generation_service.go index a67ad6f..7483086 100644 --- a/application/services/image_generation_service.go +++ b/application/services/image_generation_service.go @@ -10,6 +10,7 @@ import ( models "github.com/drama-generator/backend/domain/models" "github.com/drama-generator/backend/infrastructure/storage" "github.com/drama-generator/backend/pkg/ai" + "github.com/drama-generator/backend/pkg/config" "github.com/drama-generator/backend/pkg/image" "github.com/drama-generator/backend/pkg/logger" "github.com/drama-generator/backend/pkg/utils" @@ -22,6 +23,8 @@ type ImageGenerationService struct { transferService *ResourceTransferService localStorage *storage.LocalStorage log *logger.Logger + config *config.Config + promptI18n *PromptI18n } // truncateImageURL 截断图片 URL,避免 base64 格式的 URL 占满日志 @@ -42,12 +45,14 @@ func truncateImageURL(url string) string { return url } -func NewImageGenerationService(db *gorm.DB, transferService *ResourceTransferService, localStorage *storage.LocalStorage, log *logger.Logger) *ImageGenerationService { +func NewImageGenerationService(db *gorm.DB, cfg *config.Config, transferService *ResourceTransferService, localStorage *storage.LocalStorage, log *logger.Logger) *ImageGenerationService { return &ImageGenerationService{ db: db, aiService: NewAIService(db, log), transferService: transferService, localStorage: localStorage, + config: cfg, + promptI18n: NewPromptI18n(cfg), log: log, } } @@ -643,21 +648,22 @@ func (s *ImageGenerationService) GetScencesForEpisode(episodeID string) ([]*mode } // ExtractBackgroundsForEpisode 从剧本内容中提取场景并保存到项目级别数据库 -func (s *ImageGenerationService) ExtractBackgroundsForEpisode(episodeID string) ([]*models.Scene, error) { +func (s *ImageGenerationService) ExtractBackgroundsForEpisode(episodeID string, model string) ([]*models.Scene, error) { var episode models.Episode - if err := s.db.Preload("Drama").Where("id = ?", episodeID).First(&episode).Error; err != nil { + if err := s.db.Preload("Storyboards").First(&episode, episodeID).Error; err != nil { return nil, fmt.Errorf("episode not found") } - // 检查是否有剧本内容 + // 如果没有剧本内容,无法提取场景 if episode.ScriptContent == nil || *episode.ScriptContent == "" { - return nil, fmt.Errorf("剧本内容为空,无法提取场景") + return nil, fmt.Errorf("episode has no script content") } + s.log.Infow("Extracting backgrounds from script", "episode_id", episodeID, "model", model) dramaID := episode.DramaID // 使用AI从剧本内容中提取场景 - backgroundsInfo, err := s.extractBackgroundsFromScript(*episode.ScriptContent, dramaID) + backgroundsInfo, err := s.extractBackgroundsFromScript(*episode.ScriptContent, dramaID, model) if err != nil { s.log.Errorw("Failed to extract backgrounds from script", "error", err) return nil, err @@ -713,37 +719,74 @@ func (s *ImageGenerationService) ExtractBackgroundsForEpisode(episodeID string) } // extractBackgroundsFromScript 从剧本内容中使用AI提取场景信息 -func (s *ImageGenerationService) extractBackgroundsFromScript(scriptContent string, dramaID uint) ([]BackgroundInfo, error) { +func (s *ImageGenerationService) extractBackgroundsFromScript(scriptContent string, dramaID uint, model string) ([]BackgroundInfo, error) { if scriptContent == "" { return []BackgroundInfo{}, nil } - // 获取AI客户端 - client, err := s.aiService.GetAIClient("text") + // 获取AI客户端(如果指定了模型则使用指定的模型) + var client ai.AIClient + var err error + if model != "" { + s.log.Infow("Using specified model for background extraction", "model", model) + client, err = s.aiService.GetAIClientForModel("text", model) + if err != nil { + s.log.Warnw("Failed to get client for specified model, using default", "model", model, "error", err) + client, err = s.aiService.GetAIClient("text") + } + } else { + client, err = s.aiService.GetAIClient("text") + } if err != nil { return nil, fmt.Errorf("failed to get AI client: %w", err) } - // 构建AI提示词 - prompt := fmt.Sprintf(`【任务】分析以下剧本内容,提取出所有需要的场景背景信息。 + // 使用国际化提示词 + systemPrompt := s.promptI18n.GetSceneExtractionPrompt() + contentLabel := s.promptI18n.FormatUserPrompt("script_content_label") -【剧本内容】 -%s + // 根据语言构建不同的格式说明 + var formatInstructions string + if s.promptI18n.IsEnglish() { + formatInstructions = `[Output JSON Format] +{ + "backgrounds": [ + { + "location": "Location name (English)", + "time": "Time description (English)", + "atmosphere": "Atmosphere description (English)", + "prompt": "A cinematic anime-style pure background scene depicting [location description] at [time]. The scene shows [environment details, architecture, objects, lighting, no characters]. Style: rich details, high quality, atmospheric lighting. Mood: [environment mood description]." + } + ] +} -【要求】 -1. 识别剧本中所有不同的场景(地点+时间组合) -2. 为每个场景生成详细的**中文**图片生成提示词(Prompt) -3. **重要**:场景描述必须是**纯背景**,不能包含人物、角色、动作等元素 -4. Prompt要求: - - **必须使用中文**,不能包含英文字符 - - 详细描述场景环境、建筑、物品、光线、氛围等 - - **禁止描述人物、角色、动作、对话等** - - 适合AI图片生成模型使用 - - 风格统一为:电影感、细节丰富、动漫风格、高质量 -5. location、time、atmosphere和prompt字段都使用中文 -6. 提取场景的氛围描述(atmosphere) +[Example] +Correct example (note: no characters): +{ + "backgrounds": [ + { + "location": "Repair Shop Interior", + "time": "Late Night", + "atmosphere": "Dim, lonely, industrial", + "prompt": "A cinematic anime-style pure background scene depicting a messy repair shop interior at late night. Under dim fluorescent lights, the workbench is scattered with various wrenches, screwdrivers and mechanical parts, oil-stained tool boards and faded posters hang on walls, oil stains on the floor, used tires piled in corners. Style: rich details, high quality, dim atmosphere. Mood: lonely, industrial." + }, + { + "location": "City Street", + "time": "Dusk", + "atmosphere": "Warm, busy, lively", + "prompt": "A cinematic anime-style pure background scene depicting a bustling city street at dusk. Sunset afterglow shines on the asphalt road, neon lights of shops on both sides begin to light up, bicycle racks and bus stops on the street, high-rise buildings in the distance, sky showing orange-red gradient. Style: rich details, high quality, warm atmosphere. Mood: lively, busy." + } + ] +} -【输出JSON格式】 +[Wrong Examples (containing characters, forbidden)]: +❌ "Depicting protagonist standing on the street" - contains character +❌ "People hurrying by" - contains characters +❌ "Character moving in the room" - contains character + +Please strictly follow the JSON format and ensure all fields use English.` + } else { + formatInstructions = `【输出JSON格式】 { "backgrounds": [ { @@ -779,29 +822,57 @@ func (s *ImageGenerationService) extractBackgroundsFromScript(scriptContent stri ❌ "人们匆匆而过" - 包含人物 ❌ "角色在房间里活动" - 包含人物 -请严格按照JSON格式输出,确保所有字段都使用中文。`, scriptContent) +请严格按照JSON格式输出,确保所有字段都使用中文。` + } - response, err := client.GenerateText(prompt, "", ai.WithTemperature(0.7), ai.WithMaxTokens(8000)) + prompt := fmt.Sprintf(`%s + +%s +%s + +%s`, systemPrompt, contentLabel, scriptContent, formatInstructions) + + // 打印完整提示词用于调试 + s.log.Infow("=== AI Prompt for Background Extraction (extractBackgroundsFromScript) ===", + "language", s.promptI18n.GetLanguage(), + "prompt_length", len(prompt), + "full_prompt", prompt) + + response, err := client.GenerateText(prompt, "", ai.WithTemperature(0.7)) if err != nil { s.log.Errorw("Failed to extract backgrounds with AI", "error", err) return nil, fmt.Errorf("AI提取场景失败: %w", err) } - s.log.Infow("AI backgrounds extraction response", "length", len(response)) - // 解析JSON响应 - var result struct { - Backgrounds []BackgroundInfo `json:"backgrounds"` - } - if err := utils.SafeParseAIJSON(response, &result); err != nil { - s.log.Errorw("Failed to parse AI response", "error", err, "response", response[:minInt(500, len(response))]) - return nil, fmt.Errorf("解析AI响应失败: %w", err) + // 打印AI返回的原始响应 + s.log.Infow("=== AI Response for Background Extraction (extractBackgroundsFromScript) ===", + "response_length", len(response), + "raw_response", response) + + // 解析AI返回的JSON + var backgrounds []BackgroundInfo + + // 先尝试解析为数组格式 + if err := utils.SafeParseAIJSON(response, &backgrounds); err == nil { + s.log.Infow("Parsed backgrounds as array format", "count", len(backgrounds)) + } else { + // 尝试解析为对象格式 + var result struct { + Backgrounds []BackgroundInfo `json:"backgrounds"` + } + if err := utils.SafeParseAIJSON(response, &result); err != nil { + s.log.Errorw("Failed to parse AI response in both formats", "error", err, "response", response[:min(len(response), 500)]) + return nil, fmt.Errorf("解析AI响应失败: %w", err) + } + backgrounds = result.Backgrounds + s.log.Infow("Parsed backgrounds as object format", "count", len(backgrounds)) } s.log.Infow("Extracted backgrounds from script", "drama_id", dramaID, - "backgrounds_count", len(result.Backgrounds)) + "backgrounds_count", len(backgrounds)) - return result.Backgrounds, nil + return backgrounds, nil } // extractBackgroundsWithAI 使用AI智能分析场景并提取唯一背景 @@ -834,25 +905,50 @@ func (s *ImageGenerationService) extractBackgroundsWithAI(storyboards []models.S storyboard.StoryboardNumber, location, time, action, description) } - // 构建AI提示词 - prompt := fmt.Sprintf(`【任务】分析以下分镜头场景,提取出所有需要生成的唯一背景,并返回每个背景对应的场景编号。 + // 使用国际化提示词 + systemPrompt := s.promptI18n.GetSceneExtractionPrompt() + storyboardLabel := s.promptI18n.FormatUserPrompt("storyboard_list_label") -【分镜头列表】 -%s + // 根据语言构建不同的提示词 + var formatInstructions string + if s.promptI18n.IsEnglish() { + formatInstructions = `[Output JSON Format] +{ + "backgrounds": [ + { + "location": "Location name (English)", + "time": "Time description (English)", + "prompt": "A cinematic anime-style background depicting [location description] at [time]. The scene shows [detail description]. Style: rich details, high quality, atmospheric lighting. Mood: [mood description].", + "scene_numbers": [1, 2, 3] + } + ] +} -【要求】 -1. 合并相同或相似的场景背景(地点和时间相同或相近) -2. 为每个唯一背景生成**中文**图片生成提示词(Prompt) -3. Prompt要求: - - **必须使用中文**,不能包含英文字符 - - 详细描述场景、时间、氛围、风格 - - 适合AI图片生成模型使用 - - 风格统一为:电影感、细节丰富、动漫风格、高质量 -4. **重要**:必须返回使用该背景的场景编号数组(scene_numbers) -5. location、time和prompt字段都使用中文 -6. 每个场景都必须分配到某个背景,确保所有场景编号都被包含 +[Example] +Correct example: +{ + "backgrounds": [ + { + "location": "Repair Shop", + "time": "Late Night", + "prompt": "A cinematic anime-style background depicting a messy repair shop interior at late night. Under dim lighting, the workbench is scattered with various tools and parts, with greasy posters hanging on the walls. Style: rich details, high quality, dim atmosphere. Mood: lonely, industrial.", + "scene_numbers": [1, 5, 6, 10, 15] + }, + { + "location": "City Panorama", + "time": "Late Night with Acid Rain", + "prompt": "A cinematic anime-style background depicting a coastal city panorama in late night acid rain. Neon lights blur in the rain, skyscrapers shrouded in gray-green rain curtain, streets reflecting colorful lights. Style: rich details, high quality, cyberpunk atmosphere. Mood: oppressive, sci-fi, apocalyptic.", + "scene_numbers": [2, 7] + } + ] +} -【输出JSON格式】 +Please strictly follow the JSON format and ensure: +1. prompt field uses English +2. scene_numbers includes all scene numbers using this background +3. All scenes are assigned to a background` + } else { + formatInstructions = `【输出JSON格式】 { "backgrounds": [ { @@ -886,7 +982,21 @@ func (s *ImageGenerationService) extractBackgroundsWithAI(storyboards []models.S 请严格按照JSON格式输出,确保: 1. prompt字段使用中文 2. scene_numbers包含所有使用该背景的场景编号 -3. 所有场景都被分配到某个背景`, scenesText) +3. 所有场景都被分配到某个背景` + } + + prompt := fmt.Sprintf(`%s + +%s +%s + +%s`, systemPrompt, storyboardLabel, scenesText, formatInstructions) + + // 打印完整提示词用于调试 + s.log.Infow("=== AI Prompt for Background Extraction (extractBackgroundsWithAI) ===", + "language", s.promptI18n.GetLanguage(), + "prompt_length", len(prompt), + "full_prompt", prompt) // 调用AI服务 text, err := s.aiService.GenerateText(prompt, "") @@ -894,6 +1004,11 @@ func (s *ImageGenerationService) extractBackgroundsWithAI(storyboards []models.S return nil, fmt.Errorf("AI analysis failed: %w", err) } + // 打印AI返回的原始响应 + s.log.Infow("=== AI Response for Background Extraction ===", + "response_length", len(text), + "raw_response", text) + // 解析AI返回的JSON var result struct { Scenes []struct { diff --git a/application/services/prompt_i18n.go b/application/services/prompt_i18n.go new file mode 100644 index 0000000..5f4fafd --- /dev/null +++ b/application/services/prompt_i18n.go @@ -0,0 +1,516 @@ +package services + +import ( + "fmt" + + "github.com/drama-generator/backend/pkg/config" +) + +// PromptI18n 提示词国际化工具 +type PromptI18n struct { + config *config.Config +} + +// NewPromptI18n 创建提示词国际化工具 +func NewPromptI18n(cfg *config.Config) *PromptI18n { + return &PromptI18n{config: cfg} +} + +// GetLanguage 获取当前语言设置 +func (p *PromptI18n) GetLanguage() string { + lang := p.config.App.Language + if lang == "" { + return "zh" // 默认中文 + } + return lang +} + +// IsEnglish 判断是否为英文模式(动态读取配置) +func (p *PromptI18n) IsEnglish() bool { + return p.GetLanguage() == "en" +} + +// GetStoryboardSystemPrompt 获取分镜生成系统提示词 +func (p *PromptI18n) GetStoryboardSystemPrompt() string { + if p.IsEnglish() { + return `[Role] You are a senior film storyboard artist, proficient in Robert McKee's shot breakdown theory, skilled at building emotional rhythm. + +[Task] Break down the novel script into storyboard shots based on **independent action units**. + +[Shot Breakdown Principles] +1. **Action Unit Division**: Each shot must correspond to a complete and independent action + - One action = one shot (character stands up, walks over, speaks a line, reacts with an expression, etc.) + - Do NOT merge multiple actions (standing up + walking over should be split into 2 shots) + +2. **Shot Type Standards** (choose based on storytelling needs): + - Extreme Long Shot (ELS): Environment, atmosphere building + - Long Shot (LS): Full body action, spatial relationships + - Medium Shot (MS): Interactive dialogue, emotional communication + - Close-Up (CU): Detail display, emotional expression + - Extreme Close-Up (ECU): Key props, intense emotions + +3. **Camera Movement Requirements**: + - Fixed Shot: Stable focus on one subject + - Push In: Approaching subject, increasing tension + - Pull Out: Expanding field of view, revealing context + - Pan: Horizontal camera movement, spatial transitions + - Follow: Following subject movement + - Tracking: Linear movement with subject + +4. **Emotion & Intensity Markers**: + - Emotion: Brief description (excited, sad, nervous, happy, etc.) + - Intensity: Emotion level using arrows + * Extremely strong ↑↑↑ (3): Emotional peak, high tension + * Strong ↑↑ (2): Significant emotional fluctuation + * Moderate ↑ (1): Noticeable emotional change + * Stable → (0): Emotion remains unchanged + * Weak ↓ (-1): Emotion subsiding + +[Output Requirements] +1. Generate an array, each element is a shot containing: + - shot_number: Shot number + - scene_description: Scene (location + time, e.g., "bedroom interior, morning") + - shot_type: Shot type (extreme long shot/long shot/medium shot/close-up/extreme close-up) + - camera_angle: Camera angle (eye-level/low-angle/high-angle/side/back) + - camera_movement: Camera movement (fixed/push/pull/pan/follow/tracking) + - action: Action description + - result: Visual result of the action + - dialogue: Character dialogue or narration (if any) + - emotion: Current emotion + - emotion_intensity: Emotion intensity level (3/2/1/0/-1) + +**CRITICAL: Return ONLY a valid JSON array. Do NOT include any markdown code blocks, explanations, or other text. Start directly with [ and end with ].** + +[Important Notes] +- Shot count must match number of independent actions in the script (not allowed to merge or reduce) +- Each shot must have clear action and result +- Shot types must match storytelling rhythm (don't use same shot type continuously) +- Emotion intensity must accurately reflect script atmosphere changes` + } + + return `【角色】你是一位资深影视分镜师,精通罗伯特·麦基的镜头拆解理论,擅长构建情绪节奏。 + +【任务】将小说剧本按**独立动作单元**拆解为分镜头方案。 + +【分镜拆解原则】 +1. **动作单元划分**:每个镜头必须对应一个完整且独立的动作 + - 一个动作 = 一个镜头(角色站起来、走过去、说一句话、做一个反应表情等) + - 禁止合并多个动作(站起+走过去应拆分为2个镜头) + +2. **景别标准**(根据叙事需要选择): + - 大远景:环境、氛围营造 + - 远景:全身动作、空间关系 + - 中景:交互对话、情感交流 + - 近景:细节展示、情绪表达 + - 特写:关键道具、强烈情绪 + +3. **运镜要求**: + - 固定镜头:稳定聚焦于一个主体 + - 推镜:接近主体,增强紧张感 + - 拉镜:扩大视野,交代环境 + - 摇镜:水平移动摄像机,空间转换 + - 跟镜:跟随主体移动 + - 移镜:摄像机与主体同向移动 + +4. **情绪与强度标记**: + - emotion:简短描述(兴奋、悲伤、紧张、愉快等) + - emotion_intensity:用箭头表示情绪等级 + * 极强 ↑↑↑ (3):情绪高峰、高度紧张 + * 强 ↑↑ (2):情绪明显波动 + * 中 ↑ (1):情绪有所变化 + * 平稳 → (0):情绪不变 + * 弱 ↓ (-1):情绪回落 + +【输出要求】 +1. 生成一个数组,每个元素是一个镜头,包含: + - shot_number:镜头号 + - scene_description:场景(地点+时间,如"卧室内,早晨") + - shot_type:景别(大远景/远景/中景/近景/特写) + - camera_angle:机位角度(平视/仰视/俯视/侧面/背面) + - camera_movement:运镜方式(固定/推镜/拉镜/摇镜/跟镜/移镜) + - action:动作描述 + - result:动作完成后的画面结果 + - dialogue:角色对话或旁白(如有) + - emotion:当前情绪 + - emotion_intensity:情绪强度等级(3/2/1/0/-1) + +**重要:必须只返回纯JSON数组,不要包含任何markdown代码块、说明文字或其他内容。直接以 [ 开头,以 ] 结尾。** + +【重要提示】 +- 镜头数量必须与剧本中的独立动作数量匹配(不允许合并或减少) +- 每个镜头必须有明确的动作和结果 +- 景别选择必须符合叙事节奏(不要连续使用同一景别) +- 情绪强度必须准确反映剧本氛围变化` +} + +// GetSceneExtractionPrompt 获取场景提取提示词 +func (p *PromptI18n) GetSceneExtractionPrompt() string { + if p.IsEnglish() { + return `[Task] Extract all unique scene backgrounds from the script + +[Requirements] +1. Identify all different scenes (location + time combinations) in the script +2. Generate detailed **English** image generation prompts for each scene +3. **Important**: Scene descriptions must be **pure backgrounds** without any characters, people, or actions +4. Prompt requirements: + - Must use **English**, no Chinese characters + - Detailed description of scene, time, atmosphere, style + - Must explicitly specify "no people, no characters, empty scene" + - Must match the drama's genre and tone + +[Output Format] +**CRITICAL: Return ONLY a valid JSON array. Do NOT include any markdown code blocks, explanations, or other text. Start directly with [ and end with ].** + +Each element containing: +- location: Location (e.g., "luxurious office") +- time: Time period (e.g., "afternoon") +- prompt: Complete English image generation prompt (pure background, explicitly stating no people)` + } + + return `【任务】从剧本中提取所有唯一的场景背景 + +【要求】 +1. 识别剧本中所有不同的场景(地点+时间组合) +2. 为每个场景生成详细的**中文**图片生成提示词(Prompt) +3. **重要**:场景描述必须是**纯背景**,不能包含人物、角色、动作等元素 +4. Prompt要求: + - **必须使用中文**,不能包含英文字符 + - 详细描述场景、时间、氛围、风格 + - 必须明确说明"无人物、无角色、空场景" + - 要符合剧本的题材和氛围 + +【输出格式】 +**重要:必须只返回纯JSON数组,不要包含任何markdown代码块、说明文字或其他内容。直接以 [ 开头,以 ] 结尾。** + +每个元素包含: +- location:地点(如"豪华办公室") +- time:时间(如"下午") +- prompt:完整的中文图片生成提示词(纯背景,明确说明无人物)` +} + +// GetFirstFramePrompt 获取首帧提示词 +func (p *PromptI18n) GetFirstFramePrompt() string { + if p.IsEnglish() { + return `You are a professional image generation prompt expert. Please generate prompts suitable for AI image generation based on the provided shot information. + +Important: This is the first frame of the shot - a completely static image showing the initial state before the action begins. + +Key Points: +1. Focus on the initial static state - the moment before the action +2. Must NOT include any action or movement +3. Describe the character's initial posture, position, and expression +4. Can include scene atmosphere and environmental details +5. Shot type determines composition and framing + +Output Format: +Return a JSON object containing: +- prompt: Complete English image generation prompt (detailed description, suitable for AI image generation) +- description: Simplified Chinese description (for reference)` + } + + return `你是一个专业的图像生成提示词专家。请根据提供的镜头信息,生成适合用于AI图像生成的提示词。 + +重要:这是镜头的首帧 - 一个完全静态的画面,展示动作发生之前的初始状态。 + +关键要点: +1. 聚焦初始静态状态 - 动作发生之前的那一瞬间 +2. 必须不包含任何动作或运动 +3. 描述角色的初始姿态、位置和表情 +4. 可以包含场景氛围和环境细节 +5. 景别决定构图和取景范围 + +输出格式: +返回一个JSON对象,包含: +- prompt:完整的中文图片生成提示词(详细描述,适合AI图像生成) +- description:简化的中文描述(供参考)` +} + +// GetKeyFramePrompt 获取关键帧提示词 +func (p *PromptI18n) GetKeyFramePrompt() string { + if p.IsEnglish() { + return `You are a professional image generation prompt expert. Please generate prompts suitable for AI image generation based on the provided shot information. + +Important: This is the key frame of the shot - capturing the most intense and exciting moment of the action. + +Key Points: +1. Focus on the most exciting moment of the action +2. Capture peak emotional expression +3. Emphasize dynamic tension +4. Show character actions and expressions at their climax +5. Can include motion blur or dynamic effects + +Output Format: +Return a JSON object containing: +- prompt: Complete English image generation prompt (detailed description, suitable for AI image generation) +- description: Simplified Chinese description (for reference)` + } + + return `你是一个专业的图像生成提示词专家。请根据提供的镜头信息,生成适合用于AI图像生成的提示词。 + +重要:这是镜头的关键帧 - 捕捉动作最激烈、最精彩的瞬间。 + +关键要点: +1. 聚焦动作最精彩的时刻 +2. 捕捉情绪表达的顶点 +3. 强调动态张力 +4. 展示角色动作和表情的高潮状态 +5. 可以包含动作模糊或动态效果 + +输出格式: +返回一个JSON对象,包含: +- prompt:完整的中文图片生成提示词(详细描述,适合AI图像生成) +- description:简化的中文描述(供参考)` +} + +// GetLastFramePrompt 获取尾帧提示词 +func (p *PromptI18n) GetLastFramePrompt() string { + if p.IsEnglish() { + return `You are a professional image generation prompt expert. Please generate prompts suitable for AI image generation based on the provided shot information. + +Important: This is the last frame of the shot - a static image showing the final state and result after the action ends. + +Key Points: +1. Focus on the final state after action completion +2. Show the result of the action +3. Describe character's final posture and expression after action +4. Emphasize emotional state after action +5. Capture the calm moment after action ends + +Output Format: +Return a JSON object containing: +- prompt: Complete English image generation prompt (detailed description, suitable for AI image generation) +- description: Simplified Chinese description (for reference)` + } + + return `你是一个专业的图像生成提示词专家。请根据提供的镜头信息,生成适合用于AI图像生成的提示词。 + +重要:这是镜头的尾帧 - 一个静态画面,展示动作结束后的最终状态和结果。 + +关键要点: +1. 聚焦动作完成后的最终状态 +2. 展示动作的结果 +3. 描述角色在动作完成后的姿态和表情 +4. 强调动作后的情绪状态 +5. 捕捉动作结束后的平静瞬间 + +输出格式: +返回一个JSON对象,包含: +- prompt:完整的中文图片生成提示词(详细描述,适合AI图像生成) +- description:简化的中文描述(供参考)` +} + +// GetOutlineGenerationPrompt 获取大纲生成提示词 +func (p *PromptI18n) GetOutlineGenerationPrompt() string { + if p.IsEnglish() { + return `You are a professional short drama screenwriter. Based on the theme and number of episodes, create a complete short drama outline and plan the plot direction for each episode. + +Requirements: +1. Compact plot with strong conflicts and fast pace +2. Each episode should have independent conflicts while connecting the main storyline +3. Clear character arcs and growth +4. Cliffhanger endings to hook viewers +5. Clear theme and emotional core + +Output Format: +Return a JSON object containing: +- title: Drama title (creative and attractive) +- episodes: Episode list, each containing: + - episode_number: Episode number + - title: Episode title + - summary: Episode content summary (50-100 words) + - conflict: Main conflict point + - cliffhanger: Cliffhanger ending (if any)` + } + + return `你是专业短剧编剧。根据主题和剧集数量,创作完整的短剧大纲,规划好每一集的剧情走向。 + +要求: +1. 剧情紧凑,矛盾冲突强烈,节奏快 +2. 每集都有独立的矛盾冲突,同时推进主线 +3. 角色弧光清晰,成长变化明显 +4. 悬念设置合理,吸引观众继续观看 +5. 主题明确,情感内核清晰 + +输出格式: +返回一个JSON对象,包含: +- title: 剧名(富有创意和吸引力) +- episodes: 分集列表,每集包含: + - episode_number: 集数 + - title: 本集标题 + - summary: 本集内容概要(50-100字) + - conflict: 主要矛盾点 + - cliffhanger: 悬念结尾(如有)` +} + +// GetCharacterExtractionPrompt 获取角色提取提示词 +func (p *PromptI18n) GetCharacterExtractionPrompt() string { + if p.IsEnglish() { + return `You are a professional character analyst, skilled at extracting and analyzing character information from scripts. + +Your task is to extract and organize detailed character settings for all characters appearing in the script based on the provided script content. + +Requirements: +1. Extract all characters with names (ignore unnamed passersby or background characters) +2. For each character, extract: + - name: Character name + - role: Character role (main/supporting/minor) + - appearance: Physical appearance description (150-300 words) + - personality: Personality traits (100-200 words) + - description: Background story and character relationships (100-200 words) +3. Appearance must be detailed enough for AI image generation, including: gender, age, body type, facial features, hairstyle, clothing style, etc. +4. Main characters require more detailed descriptions, supporting characters can be simplified + +Output Format: +**CRITICAL: Return ONLY a valid JSON array. Do NOT include any markdown code blocks, explanations, or other text. Start directly with [ and end with ].** +Each element is a character object containing the above fields.` + } + + return `你是一个专业的角色分析师,擅长从剧本中提取和分析角色信息。 + +你的任务是根据提供的剧本内容,提取并整理剧中出现的所有角色的详细设定。 + +要求: +1. 提取所有有名字的角色(忽略无名路人或背景角色) +2. 对每个角色,提取以下信息: + - name: 角色名字 + - role: 角色类型(main/supporting/minor) + - appearance: 外貌描述(150-300字) + - personality: 性格特点(100-200字) + - description: 背景故事和角色关系(100-200字) +3. 外貌描述要足够详细,适合AI生成图片,包括:性别、年龄、体型、面部特征、发型、服装风格等 +4. 主要角色需要更详细的描述,次要角色可以简化 + +输出格式: +**重要:必须只返回纯JSON数组,不要包含任何markdown代码块、说明文字或其他内容。直接以 [ 开头,以 ] 结尾。** +每个元素是一个角色对象,包含上述字段。` +} + +// GetEpisodeScriptPrompt 获取分集剧本生成提示词 +func (p *PromptI18n) GetEpisodeScriptPrompt() string { + if p.IsEnglish() { + return `You are a professional short drama screenwriter. You excel at creating detailed plot content based on episode plans. + +Your task is to expand the summary in the outline into detailed plot narratives for each episode. Each episode is about 180 seconds (3 minutes) and requires substantial content. + +Requirements: +1. Expand the outline summary into detailed plot development +2. Write character dialogue and actions, not just description +3. Highlight conflict progression and emotional changes +4. Add scene transitions and atmosphere descriptions +5. Control rhythm, with climax at 2/3 point, resolution at the end +6. Each episode 800-1200 words, dialogue-rich +7. Keep consistent with character settings + +Output Format: +**CRITICAL: Return ONLY a valid JSON object. Do NOT include any markdown code blocks, explanations, or other text. Start directly with { and end with }.** + +- episodes: Episode list, each containing: + - episode_number: Episode number + - title: Episode title + - script_content: Detailed script content (800-1200 words)` + } + + return `你是一个专业的短剧编剧。你擅长根据分集规划创作详细的剧情内容。 + +你的任务是根据大纲中的分集规划,将每一集的概要扩展为详细的剧情叙述。每集约180秒(3分钟),需要充实的内容。 + +要求: +1. 将大纲中的概要扩展为具体的剧情发展 +2. 写出角色的对话和动作,不是简单描述 +3. 突出冲突的递进和情感的变化 +4. 增加场景转换和氛围描写 +5. 控制节奏,高潮在2/3处,结尾有收束 +6. 每集800-1200字,对话丰富 +7. 与角色设定保持一致 + +输出格式: +**重要:必须只返回纯JSON对象,不要包含任何markdown代码块、说明文字或其他内容。直接以 { 开头,以 } 结尾。** + +- episodes: 分集列表,每集包含: + - episode_number: 集数 + - title: 本集标题 + - script_content: 详细剧本内容(800-1200字)` +} + +// FormatUserPrompt 格式化用户提示词的通用文本 +func (p *PromptI18n) FormatUserPrompt(key string, args ...interface{}) string { + templates := map[string]map[string]string{ + "en": { + "outline_request": "Please create a short drama outline for the following theme:\n\nTheme: %s", + "genre_preference": "\nGenre preference: %s", + "style_requirement": "\nStyle requirement: %s", + "episode_count": "\nNumber of episodes: %d episodes", + "episode_importance": "\n\n**Important: Must plan complete storylines for all %d episodes in the episodes array, each with clear story content!**", + "character_request": "Script content:\n%s\n\nPlease extract and organize detailed character profiles for up to %d main characters from the script.", + "episode_script_request": "Drama outline:\n%s\n%s\nPlease create detailed scripts for %d episodes based on the above outline and characters.\n\n**Important requirements:**\n- Must generate all %d episodes, from episode 1 to episode %d, cannot skip any\n- Each episode is about 3-5 minutes (150-300 seconds)\n- The duration field for each episode should be set reasonably based on script content length, not all the same value\n- The episodes array in the returned JSON must contain %d elements", + "frame_info": "Shot information:\n%s\n\nPlease directly generate the image prompt for the first frame without any explanation:", + "key_frame_info": "Shot information:\n%s\n\nPlease directly generate the image prompt for the key frame without any explanation:", + "last_frame_info": "Shot information:\n%s\n\nPlease directly generate the image prompt for the last frame without any explanation:", + "script_content_label": "【Script Content】", + "storyboard_list_label": "【Storyboard List】", + "task_label": "【Task】", + "character_list_label": "【Available Character List】", + "scene_list_label": "【Extracted Scene Backgrounds】", + "task_instruction": "Break down the novel script into storyboard shots based on **independent action units**.", + "character_constraint": "**Important**: In the characters field, only use character IDs (numbers) from the above character list. Do not create new characters or use other IDs.", + "scene_constraint": "**Important**: In the scene_id field, select the most matching background ID (number) from the above background list. If no suitable background exists, use null.", + "shot_description_label": "Shot description: %s", + "scene_label": "Scene: %s, %s", + "characters_label": "Characters: %s", + "action_label": "Action: %s", + "result_label": "Result: %s", + "dialogue_label": "Dialogue: %s", + "atmosphere_label": "Atmosphere: %s", + "shot_type_label": "Shot type: %s", + "angle_label": "Angle: %s", + "movement_label": "Movement: %s", + "drama_info_template": "Title: %s\nSummary: %s\nGenre: %s", + }, + "zh": { + "outline_request": "请为以下主题创作短剧大纲:\n\n主题:%s", + "genre_preference": "\n类型偏好:%s", + "style_requirement": "\n风格要求:%s", + "episode_count": "\n剧集数量:%d集", + "episode_importance": "\n\n**重要:必须在episodes数组中规划完整的%d集剧情,每集都要有明确的故事内容!**", + "character_request": "剧本内容:\n%s\n\n请从剧本中提取并整理最多 %d 个主要角色的详细设定。", + "episode_script_request": "剧本大纲:\n%s\n%s\n请基于以上大纲和角色,创作 %d 集的详细剧本。\n\n**重要要求:**\n- 必须生成完整的 %d 集,从第1集到第%d集,不能遗漏\n- 每集约3-5分钟(150-300秒)\n- 每集的duration字段要根据剧本内容长度合理设置,不要都设置为同一个值\n- 返回的JSON中episodes数组必须包含 %d 个元素", + "frame_info": "镜头信息:\n%s\n\n请直接生成首帧的图像提示词,不要任何解释:", + "key_frame_info": "镜头信息:\n%s\n\n请直接生成关键帧的图像提示词,不要任何解释:", + "last_frame_info": "镜头信息:\n%s\n\n请直接生成尾帧的图像提示词,不要任何解释:", + "script_content_label": "【剧本内容】", + "storyboard_list_label": "【分镜头列表】", + "task_label": "【任务】", + "character_list_label": "【本剧可用角色列表】", + "scene_list_label": "【本剧已提取的场景背景列表】", + "task_instruction": "将小说剧本按**独立动作单元**拆解为分镜头方案。", + "character_constraint": "**重要**:在characters字段中,只能使用上述角色列表中的角色ID(数字),不得自创角色或使用其他ID。", + "scene_constraint": "**重要**:在scene_id字段中,必须从上述背景列表中选择最匹配的背景ID(数字)。如果没有合适的背景,则填null。", + "shot_description_label": "镜头描述: %s", + "scene_label": "场景: %s, %s", + "characters_label": "角色: %s", + "action_label": "动作: %s", + "result_label": "结果: %s", + "dialogue_label": "对白: %s", + "atmosphere_label": "氛围: %s", + "shot_type_label": "景别: %s", + "angle_label": "角度: %s", + "movement_label": "运镜: %s", + "drama_info_template": "剧名:%s\n简介:%s\n类型:%s", + }, + } + + lang := "zh" + if p.IsEnglish() { + lang = "en" + } + + template, ok := templates[lang][key] + if !ok { + return "" + } + + if len(args) > 0 { + return fmt.Sprintf(template, args...) + } + return template +} diff --git a/application/services/script_generation_service.go b/application/services/script_generation_service.go index 8a0479c..85741ec 100644 --- a/application/services/script_generation_service.go +++ b/application/services/script_generation_service.go @@ -1,177 +1,42 @@ package services import ( - "encoding/json" "fmt" "strconv" "github.com/drama-generator/backend/domain/models" "github.com/drama-generator/backend/pkg/ai" + "github.com/drama-generator/backend/pkg/config" "github.com/drama-generator/backend/pkg/logger" "github.com/drama-generator/backend/pkg/utils" "gorm.io/gorm" ) type ScriptGenerationService struct { - db *gorm.DB - aiService *AIService - log *logger.Logger + db *gorm.DB + aiService *AIService + log *logger.Logger + config *config.Config + promptI18n *PromptI18n } -func NewScriptGenerationService(db *gorm.DB, log *logger.Logger) *ScriptGenerationService { +func NewScriptGenerationService(db *gorm.DB, cfg *config.Config, log *logger.Logger) *ScriptGenerationService { return &ScriptGenerationService{ - db: db, - aiService: NewAIService(db, log), - log: log, + db: db, + aiService: NewAIService(db, log), + log: log, + config: cfg, + promptI18n: NewPromptI18n(cfg), } } -type GenerateOutlineRequest struct { - DramaID string `json:"drama_id" binding:"required"` - Theme string `json:"theme" binding:"required,min=2,max=500"` - Genre string `json:"genre"` - Style string `json:"style"` - Length int `json:"length"` - Temperature float64 `json:"temperature"` -} - type GenerateCharactersRequest struct { DramaID string `json:"drama_id" binding:"required"` + EpisodeID uint `json:"episode_id"` Outline string `json:"outline"` Count int `json:"count"` Temperature float64 `json:"temperature"` -} - -type GenerateEpisodesRequest struct { - DramaID string `json:"drama_id" binding:"required"` - Outline string `json:"outline"` - EpisodeCount int `json:"episode_count" binding:"required,min=1,max=100"` - Temperature float64 `json:"temperature"` -} - -type OutlineResult struct { - Title string `json:"title"` - Summary string `json:"summary"` - Genre string `json:"genre"` - Tags []string `json:"tags"` - Characters []CharacterOutline `json:"characters"` - Episodes []EpisodeOutline `json:"episodes"` - KeyScenes []string `json:"key_scenes"` -} - -type CharacterOutline struct { - Name string `json:"name"` - Role string `json:"role"` - Description string `json:"description"` - Personality string `json:"personality"` - Appearance string `json:"appearance"` -} - -type EpisodeOutline struct { - EpisodeNumber int `json:"episode_number"` - Title string `json:"title"` - Summary string `json:"summary"` - Scenes []string `json:"scenes"` - Duration int `json:"duration"` -} - -func (s *ScriptGenerationService) GenerateOutline(req *GenerateOutlineRequest) (*OutlineResult, error) { - var drama models.Drama - if err := s.db.Where("id = ?", req.DramaID).First(&drama).Error; err != nil { - return nil, fmt.Errorf("drama not found") - } - - systemPrompt := `你是专业短剧编剧。根据主题和剧集数量,创作完整的短剧大纲,规划好每一集的剧情走向。 - -要求: -1. 剧情紧凑,矛盾冲突强烈,节奏快 -2. 必须规划好每一集的核心剧情 -3. 每集有明确冲突和转折点,集与集之间有连贯性和悬念 - -**重要:必须输出完整有效的JSON,确保所有字段完整,特别是episodes数组必须完整闭合!** - -JSON格式(紧凑,summary和episodes字段必须完整): -{"title":"剧名","summary":"200-250字剧情概述,包含故事背景、主要矛盾、核心冲突、完整走向","genre":"类型","tags":["标签1","标签2","标签3"],"episodes":[{"episode_number":1,"title":"标题","summary":"80字剧情概要"},{"episode_number":2,"title":"标题","summary":"80字剧情概要"}],"key_scenes":["场景1","场景2","场景3"]} - -关键要求: -- summary控制在200-250字,简洁清晰 -- episodes必须生成用户要求的完整集数 -- 每集summary控制在80字左右 -- 确保JSON完整闭合,不要截断 -- 不要添加任何JSON外的文字说明` - - userPrompt := fmt.Sprintf(`请为以下主题创作短剧大纲: - -主题:%s`, req.Theme) - - if req.Genre != "" { - userPrompt += fmt.Sprintf("\n类型偏好:%s", req.Genre) - } - - if req.Style != "" { - userPrompt += fmt.Sprintf("\n风格要求:%s", req.Style) - } - - length := req.Length - if length == 0 { - length = 5 - } - userPrompt += fmt.Sprintf("\n剧集数量:%d集", length) - userPrompt += fmt.Sprintf("\n\n**重要:必须在episodes数组中规划完整的%d集剧情,每集都要有明确的故事内容!**", length) - - temperature := req.Temperature - if temperature == 0 { - temperature = 0.8 - } - - // 调整token限制:基础2000 + 每集约150 tokens(包含80-100字概要) - maxTokens := 2000 + (length * 150) - if maxTokens > 8000 { - maxTokens = 8000 - } - - s.log.Infow("Generating outline with episodes", - "episode_count", length, - "max_tokens", maxTokens) - - text, err := s.aiService.GenerateText( - userPrompt, - systemPrompt, - ai.WithTemperature(temperature), - ai.WithMaxTokens(maxTokens), - ) - - if err != nil { - s.log.Errorw("Failed to generate outline", "error", err) - return nil, fmt.Errorf("生成失败: %w", err) - } - - s.log.Infow("AI response received", "length", len(text), "preview", text[:minInt(200, len(text))]) - - var result OutlineResult - if err := utils.SafeParseAIJSON(text, &result); err != nil { - s.log.Errorw("Failed to parse outline JSON", "error", err, "raw_response", text[:minInt(500, len(text))]) - return nil, fmt.Errorf("解析 AI 返回结果失败: %w", err) - } - - // 将Tags转换为JSON格式存储 - tagsJSON, err := json.Marshal(result.Tags) - if err != nil { - s.log.Errorw("Failed to marshal tags", "error", err) - tagsJSON = []byte("[]") - } - - if err := s.db.Model(&drama).Updates(map[string]interface{}{ - "title": result.Title, - "description": result.Summary, - "genre": result.Genre, - "tags": tagsJSON, - }).Error; err != nil { - s.log.Errorw("Failed to update drama", "error", err) - } - - s.log.Infow("Outline generated", "drama_id", req.DramaID) - return &result, nil + Model string `json:"model"` // 指定使用的文本模型 } func (s *ScriptGenerationService) GenerateCharacters(req *GenerateCharactersRequest) ([]models.Character, error) { @@ -185,61 +50,35 @@ func (s *ScriptGenerationService) GenerateCharacters(req *GenerateCharactersRequ count = 5 } - systemPrompt := `你是一个专业的角色分析师,擅长从剧本中提取和分析角色信息。 - -你的任务是根据提供的剧本内容,提取并整理剧中出现的所有角色的详细设定。 - -要求: -1. 仔细阅读剧本,识别所有出现的角色 -2. 根据剧本中的对话、行为和描述,总结角色的性格特点 -3. 提取角色在剧本中的关键信息:背景、动机、目标、关系等 -4. 角色之间的关系必须基于剧本中的实际描述 -5. 外貌描述必须极其详细,如果剧本中有描述则使用,如果没有则根据角色设定合理推断,便于AI绘画生成角色形象 -6. 优先提取主要角色和重要配角,次要角色可以简略 - -请严格按照以下 JSON 格式输出,不要添加任何其他文字: - -{ - "characters": [ - { - "name": "角色名", - "role": "主角/重要配角/配角", - "description": "角色背景和简介(200-300字,包括:出身背景、成长经历、核心动机、与其他角色的关系、在故事中的作用)", - "personality": "性格特点(详细描述,100-150字,包括:主要性格特征、行为习惯、价值观、优点缺点、情绪表达方式、对待他人的态度等)", - "appearance": "外貌描述(极其详细,150-200字,必须包括:确切年龄、精确身高、体型身材、肤色质感、发型发色发长、眼睛颜色形状、面部特征(如眉毛、鼻子、嘴唇)、着装风格、服装颜色材质、配饰细节、标志性特征、整体气质风格等,描述要具体到可以直接用于AI绘画)", - "voice_style": "说话风格和语气特点(详细描述,50-80字,包括:语速语调、用词习惯、口头禅、说话时的情绪特征等)" - } - ] -} - -注意: -- 必须基于剧本内容提取角色,不要凭空创作 -- 优先提取主要角色和重要配角,数量根据剧本实际情况确定 -- description、personality、appearance、voice_style都必须详细描述,字数要充足 -- appearance外貌描述是重中之重,必须极其详细具体,要能让AI准确生成角色形象 -- 如果剧本中角色信息不完整,可以根据角色设定合理补充,但要符合剧本整体风格` + systemPrompt := s.promptI18n.GetCharacterExtractionPrompt() outlineText := req.Outline if outlineText == "" { - outlineText = fmt.Sprintf("剧名:%s\n简介:%s\n类型:%s", drama.Title, drama.Description, drama.Genre) + outlineText = s.promptI18n.FormatUserPrompt("drama_info_template", drama.Title, drama.Description, drama.Genre) } - userPrompt := fmt.Sprintf(`剧本内容: -%s - -请从剧本中提取并整理最多 %d 个主要角色的详细设定。`, outlineText, count) + userPrompt := s.promptI18n.FormatUserPrompt("character_request", outlineText, count) temperature := req.Temperature if temperature == 0 { temperature = 0.7 } - text, err := s.aiService.GenerateText( - userPrompt, - systemPrompt, - ai.WithTemperature(temperature), - ai.WithMaxTokens(3000), - ) + // 如果指定了模型,使用指定的模型;否则使用默认配置 + var text string + var err error + if req.Model != "" { + s.log.Infow("Using specified model for character generation", "model", req.Model) + client, getErr := s.aiService.GetAIClientForModel("text", req.Model) + if getErr != nil { + s.log.Warnw("Failed to get client for specified model, using default", "model", req.Model, "error", getErr) + text, err = s.aiService.GenerateText(userPrompt, systemPrompt, ai.WithTemperature(temperature)) + } else { + text, err = client.GenerateText(userPrompt, systemPrompt, ai.WithTemperature(temperature)) + } + } else { + text, err = s.aiService.GenerateText(userPrompt, systemPrompt, ai.WithTemperature(temperature)) + } if err != nil { s.log.Errorw("Failed to generate characters", "error", err) @@ -248,15 +87,14 @@ func (s *ScriptGenerationService) GenerateCharacters(req *GenerateCharactersRequ s.log.Infow("AI response received", "length", len(text), "preview", text[:minInt(200, len(text))]) - var result struct { - Characters []struct { - Name string `json:"name"` - Role string `json:"role"` - Description string `json:"description"` - Personality string `json:"personality"` - Appearance string `json:"appearance"` - VoiceStyle string `json:"voice_style"` - } `json:"characters"` + // AI直接返回数组格式 + var result []struct { + Name string `json:"name"` + Role string `json:"role"` + Description string `json:"description"` + Personality string `json:"personality"` + Appearance string `json:"appearance"` + VoiceStyle string `json:"voice_style"` } if err := utils.SafeParseAIJSON(text, &result); err != nil { @@ -265,7 +103,7 @@ func (s *ScriptGenerationService) GenerateCharacters(req *GenerateCharactersRequ } var characters []models.Character - for _, char := range result.Characters { + for _, char := range result { // 检查角色是否已存在 var existingChar models.Character err := s.db.Where("drama_id = ? AND name = ?", req.DramaID, char.Name).First(&existingChar).Error @@ -296,209 +134,25 @@ func (s *ScriptGenerationService) GenerateCharacters(req *GenerateCharactersRequ characters = append(characters, character) } + // 如果提供了 EpisodeID,建立 episode_characters 关联关系 + if req.EpisodeID > 0 { + var episode models.Episode + if err := s.db.First(&episode, req.EpisodeID).Error; err == nil { + // 使用 GORM 的 Association 建立多对多关联 + if err := s.db.Model(&episode).Association("Characters").Append(characters); err != nil { + s.log.Errorw("Failed to associate characters with episode", "error", err, "episode_id", req.EpisodeID) + } else { + s.log.Infow("Characters associated with episode", "episode_id", req.EpisodeID, "character_count", len(characters)) + } + } else { + s.log.Errorw("Episode not found for association", "episode_id", req.EpisodeID, "error", err) + } + } + s.log.Infow("Characters generated", "drama_id", req.DramaID, "total_count", len(characters), "new_count", len(characters)) return characters, nil } -func (s *ScriptGenerationService) GenerateEpisodes(req *GenerateEpisodesRequest) ([]models.Episode, error) { - var drama models.Drama - if err := s.db.Where("id = ? ", req.DramaID).First(&drama).Error; err != nil { - return nil, fmt.Errorf("drama not found") - } - - // 获取角色信息 - var characters []models.Character - s.db.Where("drama_id = ?", req.DramaID).Find(&characters) - - var characterList string - if len(characters) > 0 { - characterList = "\n角色设定:\n" - for _, char := range characters { - characterList += fmt.Sprintf("- %s", char.Name) - if char.Role != nil { - characterList += fmt.Sprintf("(%s)", *char.Role) - } - if char.Description != nil { - characterList += fmt.Sprintf(":%s", *char.Description) - } - if char.Personality != nil { - characterList += fmt.Sprintf(" | 性格:%s", *char.Personality) - } - characterList += "\n" - } - } else { - characterList = "\n(注意:尚未设定角色,请根据大纲创作合理的角色出场)\n" - } - - systemPrompt := `你是一个专业的短剧编剧。你擅长根据分集规划创作详细的剧情内容。 - -你的任务是根据大纲中的分集规划,将每一集的概要扩展为详细的剧情叙述。每集约180秒(3分钟),需要充实的内容。 - -工作流程: -1. 大纲中已提供每集的剧情规划(80-100字概要) -2. 你需要将每集概要扩展为400-500字的详细剧情叙述 -3. 严格按照分集规划的数量和走向展开,不能遗漏任何一集 - -详细要求: -1. script_content用400-500字详细叙述,包括: - - 具体场景和环境描写 - - 角色的行动、对话要点、情绪变化 - - 冲突的产生过程和激化细节 - - 关键情节点和转折 - - 为下一集埋下的伏笔 -2. 每集有明确的冲突和转折点 -3. 集与集之间有连贯性和悬念 -4. 充分展现角色性格和关系演变 -5. 内容详实,足以支撑180秒时长 - -JSON格式(紧凑): -{"episodes":[{"episode_number":1,"title":"标题","description":"简短梗概","script_content":"400-500字详细剧情叙述","duration":210}]} - -格式说明: -1. script_content为叙述文,不是场景对话格式 -2. 每集包含开场铺垫、冲突发展、高潮转折、结局悬念 -3. duration根据剧情复杂度设置在150-300秒 - -关键要求: -- 大纲规划了几集就必须生成几集 -- 严格按照分集规划的故事线展开 -- 每一集都要有完整的400-500字详细内容 -- 绝对不能遗漏任何一集` - - outlineText := req.Outline - if outlineText == "" { - outlineText = fmt.Sprintf("剧名:%s\n简介:%s\n类型:%s", drama.Title, drama.Description, drama.Genre) - } - - userPrompt := fmt.Sprintf(`剧本大纲: -%s -%s -请基于以上大纲和角色,创作 %d 集的详细剧本。 - -**重要要求:** -- 必须生成完整的 %d 集,从第1集到第%d集,不能遗漏 -- 每集约3-5分钟(150-300秒) -- 每集的duration字段要根据剧本内容长度合理设置,不要都设置为同一个值 -- 返回的JSON中episodes数组必须包含 %d 个元素`, outlineText, characterList, req.EpisodeCount, req.EpisodeCount, req.EpisodeCount, req.EpisodeCount) - - temperature := req.Temperature - if temperature == 0 { - temperature = 0.7 - } - - // 根据剧集数量调整token限制 - // 模型支持128k上下文,每集400-500字约需800-1000 tokens(包含JSON结构) - baseTokens := 3000 // 基础(系统提示+角色列表+大纲) - perEpisodeTokens := 900 // 每集约900 tokens(支持400-500字详细内容) - maxTokens := baseTokens + (req.EpisodeCount * perEpisodeTokens) - - // 128k上下文,可以设置较大的token限制 - // 10集约12000 tokens,20集约21000 tokens,都在安全范围内 - if maxTokens > 32000 { - maxTokens = 32000 // 保守限制在32k,留足够空间 - } - - s.log.Infow("Generating episodes with token limit", - "episode_count", req.EpisodeCount, - "max_tokens", maxTokens, - "estimated_per_episode", perEpisodeTokens) - - text, err := s.aiService.GenerateText( - userPrompt, - systemPrompt, - ai.WithTemperature(0.8), - ai.WithMaxTokens(maxTokens), - ) - - if err != nil { - s.log.Errorw("Failed to generate episodes", "error", err) - return nil, fmt.Errorf("生成失败: %w", err) - } - - s.log.Infow("AI response received", "length", len(text), "preview", text[:minInt(200, len(text))]) - - var result struct { - Episodes []struct { - EpisodeNumber int `json:"episode_number"` - Title string `json:"title"` - Description string `json:"description"` - ScriptContent string `json:"script_content"` - Duration int `json:"duration"` - } `json:"episodes"` - } - - if err := utils.SafeParseAIJSON(text, &result); err != nil { - s.log.Errorw("Failed to parse episodes JSON", "error", err, "raw_response", text[:minInt(500, len(text))]) - return nil, fmt.Errorf("解析 AI 返回结果失败: %w", err) - } - - // 检查生成的集数是否符合要求 - if len(result.Episodes) < req.EpisodeCount { - s.log.Warnw("AI generated fewer episodes than requested", - "requested", req.EpisodeCount, - "generated", len(result.Episodes)) - } - - // 记录每集的详细信息 - for i, ep := range result.Episodes { - s.log.Infow("Episode parsed from AI", - "index", i, - "episode_number", ep.EpisodeNumber, - "title", ep.Title, - "description_length", len(ep.Description), - "script_content_length", len(ep.ScriptContent), - "duration", ep.Duration) - } - - var episodes []models.Episode - for _, ep := range result.Episodes { - duration := ep.Duration - if duration == 0 { - // AI未返回时长时使用默认值 - duration = 180 - s.log.Warnw("Episode duration not provided by AI, using default", - "episode_number", ep.EpisodeNumber, - "default_duration", 180) - } else { - s.log.Infow("Episode duration from AI", - "episode_number", ep.EpisodeNumber, - "duration", duration) - } - - // 记录即将保存的数据 - s.log.Infow("Creating episode in database", - "episode_number", ep.EpisodeNumber, - "title", ep.Title, - "script_content_length", len(ep.ScriptContent), - "script_content_empty", ep.ScriptContent == "") - - dramaID, err := strconv.ParseUint(req.DramaID, 10, 32) - if err != nil { - return nil, fmt.Errorf("invalid drama ID") - } - - episode := models.Episode{ - DramaID: uint(dramaID), - EpisodeNum: ep.EpisodeNumber, - Title: ep.Title, - Description: &ep.Description, - ScriptContent: &ep.ScriptContent, - Duration: duration, - Status: "draft", - } - - if err := s.db.Create(&episode).Error; err != nil { - s.log.Errorw("Failed to create episode", "error", err) - continue - } - - episodes = append(episodes, episode) - } - - s.log.Infow("Episodes generated", "drama_id", req.DramaID, "count", len(episodes)) - return episodes, nil -} - // GenerateScenesForEpisode 已废弃,使用 StoryboardService.GenerateStoryboard 替代 // ParseScript 已废弃,使用 GenerateCharacters 替代 diff --git a/application/services/storyboard_composition_service.go b/application/services/storyboard_composition_service.go index 9dc9626..0b203c3 100644 --- a/application/services/storyboard_composition_service.go +++ b/application/services/storyboard_composition_service.go @@ -42,12 +42,18 @@ type SceneCompositionInfo struct { StoryboardNumber int `json:"storyboard_number"` Title *string `json:"title"` Description *string `json:"description"` + ShotType *string `json:"shot_type"` + Angle *string `json:"angle"` + Movement *string `json:"movement"` Location *string `json:"location"` Time *string `json:"time"` Duration int `json:"duration"` Dialogue *string `json:"dialogue"` Action *string `json:"action"` + Result *string `json:"result"` Atmosphere *string `json:"atmosphere"` + BgmPrompt *string `json:"bgm_prompt,omitempty"` + SoundEffect *string `json:"sound_effect,omitempty"` ImagePrompt *string `json:"image_prompt,omitempty"` VideoPrompt *string `json:"video_prompt,omitempty"` Characters []SceneCharacterInfo `json:"characters"` @@ -182,12 +188,18 @@ func (s *StoryboardCompositionService) GetScenesForEpisode(episodeID string) ([] StoryboardNumber: storyboard.StoryboardNumber, Title: storyboard.Title, Description: storyboard.Description, + ShotType: storyboard.ShotType, + Angle: storyboard.Angle, + Movement: storyboard.Movement, Location: storyboard.Location, Time: storyboard.Time, Duration: storyboard.Duration, Action: storyboard.Action, Dialogue: storyboard.Dialogue, + Result: storyboard.Result, Atmosphere: storyboard.Atmosphere, + BgmPrompt: storyboard.BgmPrompt, + SoundEffect: storyboard.SoundEffect, ImagePrompt: storyboard.ImagePrompt, VideoPrompt: storyboard.VideoPrompt, SceneID: storyboard.SceneID, @@ -387,6 +399,24 @@ func (s *StoryboardCompositionService) GenerateSceneImage(req *GenerateSceneImag return nil, fmt.Errorf("image generation service not available") } +func (s *StoryboardCompositionService) DeleteScene(sceneID string) error { + var scene models.Scene + if err := s.db.Where("id = ?", sceneID).First(&scene).Error; err != nil { + if err == gorm.ErrRecordNotFound { + return fmt.Errorf("scene not found") + } + return fmt.Errorf("failed to find scene: %w", err) + } + + // 删除场景 + if err := s.db.Delete(&scene).Error; err != nil { + return fmt.Errorf("failed to delete scene: %w", err) + } + + s.log.Infow("Scene deleted successfully", "scene_id", sceneID) + return nil +} + func getStringValue(s *string) string { if s != nil { return *s diff --git a/application/services/storyboard_service.go b/application/services/storyboard_service.go index 0d8f491..668bae7 100644 --- a/application/services/storyboard_service.go +++ b/application/services/storyboard_service.go @@ -7,22 +7,28 @@ import ( "strings" models "github.com/drama-generator/backend/domain/models" + "github.com/drama-generator/backend/pkg/ai" + "github.com/drama-generator/backend/pkg/config" "github.com/drama-generator/backend/pkg/logger" "github.com/drama-generator/backend/pkg/utils" "gorm.io/gorm" ) type StoryboardService struct { - db *gorm.DB - aiService *AIService - log *logger.Logger + db *gorm.DB + aiService *AIService + log *logger.Logger + config *config.Config + promptI18n *PromptI18n } -func NewStoryboardService(db *gorm.DB, log *logger.Logger) *StoryboardService { +func NewStoryboardService(db *gorm.DB, cfg *config.Config, log *logger.Logger) *StoryboardService { return &StoryboardService{ - db: db, - aiService: NewAIService(db, log), - log: log, + db: db, + aiService: NewAIService(db, log), + log: log, + config: cfg, + promptI18n: NewPromptI18n(cfg), } } @@ -52,7 +58,7 @@ type GenerateStoryboardResult struct { Total int `json:"total"` } -func (s *StoryboardService) GenerateStoryboard(episodeID string) (*GenerateStoryboardResult, error) { +func (s *StoryboardService) GenerateStoryboard(episodeID string, model string) (*GenerateStoryboardResult, error) { // 从数据库获取剧集信息 var episode struct { ID string @@ -122,20 +128,33 @@ func (s *StoryboardService) GenerateStoryboard(episodeID string) (*GenerateStory "scene_count", len(scenes), "scenes", sceneList) - // 构建分镜头生成提示词 - prompt := fmt.Sprintf(`【角色】你是一位资深影视分镜师,精通罗伯特·麦基的镜头拆解理论,擅长构建情绪节奏。 + // 使用国际化提示词 + systemPrompt := s.promptI18n.GetStoryboardSystemPrompt() -【任务】将小说剧本按**独立动作单元**拆解为分镜头方案。 + scriptLabel := s.promptI18n.FormatUserPrompt("script_content_label") + taskLabel := s.promptI18n.FormatUserPrompt("task_label") + taskInstruction := s.promptI18n.FormatUserPrompt("task_instruction") + charListLabel := s.promptI18n.FormatUserPrompt("character_list_label") + charConstraint := s.promptI18n.FormatUserPrompt("character_constraint") + sceneListLabel := s.promptI18n.FormatUserPrompt("scene_list_label") + sceneConstraint := s.promptI18n.FormatUserPrompt("scene_constraint") -【本剧可用角色列表】 + prompt := fmt.Sprintf(`%s + +%s %s -**重要**:在characters字段中,只能使用上述角色列表中的角色ID(数字),不得自创角色或使用其他ID。 +%s%s -【本剧已提取的场景背景列表】 +%s %s -**重要**:在scene_id字段中,必须从上述背景列表中选择最匹配的背景ID(数字)。如果没有合适的背景,则填null。 +%s + +%s +%s + +%s 【剧本原文】 %s @@ -305,23 +324,61 @@ func (s *StoryboardService) GenerateStoryboard(episodeID string) (*GenerateStory - 包含感官细节:视觉、听觉、触觉、嗅觉 - 描述光线、色彩、质感、动态 - 为视频生成AI提供足够的画面构建信息 -- 避免抽象词汇,使用具象的视觉化描述`, characterList, sceneList, scriptContent) +- 避免抽象词汇,使用具象的视觉化描述`, systemPrompt, scriptLabel, scriptContent, taskLabel, taskInstruction, charListLabel, characterList, charConstraint, sceneListLabel, sceneList, sceneConstraint) - // 调用AI服务生成 - text, err := s.aiService.GenerateText(prompt, "") - if err != nil { - s.log.Errorw("Failed to generate storyboard", "error", err) - return nil, fmt.Errorf("生成分镜头失败: %w", err) + // 调用AI服务生成(如果指定了模型则使用指定的模型) + // 设置较大的max_tokens以确保完整返回所有分镜的JSON + var text string + if model != "" { + s.log.Infow("Using specified model for storyboard generation", "model", model) + client, getErr := s.aiService.GetAIClientForModel("text", model) + if getErr != nil { + s.log.Warnw("Failed to get client for specified model, using default", "model", model, "error", getErr) + var err error + text, err = s.aiService.GenerateText(prompt, "", ai.WithMaxTokens(16000)) + if err != nil { + s.log.Errorw("Failed to generate storyboard", "error", err) + return nil, fmt.Errorf("生成分镜头失败: %w", err) + } + } else { + var err error + text, err = client.GenerateText(prompt, "", ai.WithMaxTokens(16000)) + if err != nil { + s.log.Errorw("Failed to generate storyboard", "error", err) + return nil, fmt.Errorf("生成分镜头失败: %w", err) + } + } + } else { + var err error + text, err = s.aiService.GenerateText(prompt, "", ai.WithMaxTokens(16000)) + if err != nil { + s.log.Errorw("Failed to generate storyboard", "error", err) + return nil, fmt.Errorf("生成分镜头失败: %w", err) + } } // 解析JSON结果 + // AI可能返回两种格式: + // 1. 数组格式: [{...}, {...}] + // 2. 对象格式: {"storyboards": [{...}, {...}]} var result GenerateStoryboardResult - if err := utils.SafeParseAIJSON(text, &result); err != nil { - s.log.Errorw("Failed to parse storyboard JSON", "error", err, "response", text[:min(500, len(text))]) - return nil, fmt.Errorf("解析分镜头结果失败: %w", err) - } - result.Total = len(result.Storyboards) + // 先尝试解析为数组格式 + var storyboards []Storyboard + if err := utils.SafeParseAIJSON(text, &storyboards); err == nil { + // 成功解析为数组,包装为对象 + result.Storyboards = storyboards + result.Total = len(storyboards) + s.log.Infow("Parsed storyboard as array format", "count", len(storyboards)) + } else { + // 尝试解析为对象格式 + if err := utils.SafeParseAIJSON(text, &result); err != nil { + s.log.Errorw("Failed to parse storyboard JSON in both formats", "error", err, "response", text[:min(500, len(text))]) + return nil, fmt.Errorf("解析分镜头结果失败: %w", err) + } + result.Total = len(result.Storyboards) + s.log.Infow("Parsed storyboard as object format", "count", len(result.Storyboards)) + } // 计算总时长(所有分镜时长之和) totalDuration := 0 @@ -566,16 +623,53 @@ func (s *StoryboardService) generateVideoPrompt(sb Storyboard) string { } func (s *StoryboardService) saveStoryboards(episodeID string, storyboards []Storyboard) error { + // 验证 episodeID + epID, err := strconv.ParseUint(episodeID, 10, 32) + if err != nil { + s.log.Errorw("Invalid episode ID", "episode_id", episodeID, "error", err) + return fmt.Errorf("无效的章节ID: %s", episodeID) + } + + // 防御性检查:如果AI返回的分镜数量为0,不应该删除旧分镜 + if len(storyboards) == 0 { + s.log.Errorw("AI返回的分镜数量为0,拒绝保存以避免删除现有分镜", "episode_id", episodeID) + return fmt.Errorf("AI生成分镜失败:返回的分镜数量为0") + } + + s.log.Infow("开始保存分镜头", + "episode_id", episodeID, + "episode_id_uint", uint(epID), + "storyboard_count", len(storyboards)) + // 开启事务 return s.db.Transaction(func(tx *gorm.DB) error { - // 获取该剧集所有的分镜ID + // 验证该章节是否存在 + var episode models.Episode + if err := tx.First(&episode, epID).Error; err != nil { + s.log.Errorw("Episode not found", "episode_id", episodeID, "error", err) + return fmt.Errorf("章节不存在: %s", episodeID) + } + + s.log.Infow("找到章节信息", + "episode_id", episode.ID, + "episode_number", episode.EpisodeNum, + "drama_id", episode.DramaID, + "title", episode.Title) + + // 获取该剧集所有的分镜ID(使用 uint 类型) var storyboardIDs []uint if err := tx.Model(&models.Storyboard{}). - Where("episode_id = ?", episodeID). + Where("episode_id = ?", uint(epID)). Pluck("id", &storyboardIDs).Error; err != nil { return err } + s.log.Infow("查询到现有分镜", + "episode_id_string", episodeID, + "episode_id_uint", uint(epID), + "existing_storyboard_count", len(storyboardIDs), + "storyboard_ids", storyboardIDs) + // 如果有分镜,先清理关联的image_generations的storyboard_id if len(storyboardIDs) > 0 { if err := tx.Model(&models.ImageGeneration{}). @@ -583,13 +677,26 @@ func (s *StoryboardService) saveStoryboards(episodeID string, storyboards []Stor Update("storyboard_id", nil).Error; err != nil { return err } + s.log.Infow("已清理关联的图片生成记录", "count", len(storyboardIDs)) } - // 删除该剧集已有的分镜头 - if err := tx.Where("episode_id = ?", episodeID).Delete(&models.Storyboard{}).Error; err != nil { - return err + // 删除该剧集已有的分镜头(使用 uint 类型确保类型匹配) + s.log.Warnw("准备删除分镜数据", + "episode_id_string", episodeID, + "episode_id_uint", uint(epID), + "episode_id_from_db", episode.ID, + "will_delete_count", len(storyboardIDs)) + + result := tx.Where("episode_id = ?", uint(epID)).Delete(&models.Storyboard{}) + if result.Error != nil { + s.log.Errorw("删除旧分镜失败", "episode_id", uint(epID), "error", result.Error) + return result.Error } + s.log.Infow("已删除旧分镜头", + "episode_id", uint(epID), + "deleted_count", result.RowsAffected) + // 注意:不删除背景,因为背景是在分镜拆解前就提取好的 // AI会直接返回scene_id,不需要在这里做字符串匹配 @@ -616,8 +723,6 @@ func (s *StoryboardService) saveStoryboards(episodeID string, storyboards []Stor "scene_id", *sb.SceneID) } - epID, _ := strconv.ParseUint(episodeID, 10, 32) - // 处理 title 字段 var titlePtr *string if sb.Title != "" { diff --git a/application/services/video_generation_service.go b/application/services/video_generation_service.go index 9e1ae77..abe813a 100644 --- a/application/services/video_generation_service.go +++ b/application/services/video_generation_service.go @@ -7,6 +7,7 @@ import ( "time" models "github.com/drama-generator/backend/domain/models" + "github.com/drama-generator/backend/infrastructure/external/ffmpeg" "github.com/drama-generator/backend/infrastructure/storage" "github.com/drama-generator/backend/pkg/logger" "github.com/drama-generator/backend/pkg/video" @@ -19,6 +20,7 @@ type VideoGenerationService struct { log *logger.Logger localStorage *storage.LocalStorage aiService *AIService + ffmpeg *ffmpeg.FFmpeg } func NewVideoGenerationService(db *gorm.DB, transferService *ResourceTransferService, localStorage *storage.LocalStorage, aiService *AIService, log *logger.Logger) *VideoGenerationService { @@ -28,6 +30,7 @@ func NewVideoGenerationService(db *gorm.DB, transferService *ResourceTransferSer transferService: transferService, aiService: aiService, log: log, + ffmpeg: ffmpeg.NewFFmpeg(log), } go service.RecoverPendingTasks() @@ -316,18 +319,40 @@ func (s *VideoGenerationService) pollTaskStatus(videoGenID uint, taskID string, } func (s *VideoGenerationService) completeVideoGeneration(videoGenID uint, videoURL string, duration *int, width *int, height *int, firstFrameURL *string) { + var localVideoPath string + // 下载视频到本地存储(仅用于缓存,不更新数据库) if s.localStorage != nil && videoURL != "" { - _, err := s.localStorage.DownloadFromURL(videoURL, "videos") + downloadedPath, err := s.localStorage.DownloadFromURL(videoURL, "videos") if err != nil { s.log.Warnw("Failed to download video to local storage", "error", err, "id", videoGenID, "original_url", videoURL) } else { + localVideoPath = downloadedPath s.log.Infow("Video downloaded to local storage for caching", "id", videoGenID, - "original_url", videoURL) + "original_url", videoURL, + "local_path", localVideoPath) + } + } + + // 如果视频已下载到本地,探测真实时长 + if localVideoPath != "" && s.ffmpeg != nil { + if probedDuration, err := s.ffmpeg.GetVideoDuration(localVideoPath); err == nil { + // 转换为整数秒(向上取整) + durationInt := int(probedDuration + 0.5) + duration = &durationInt + s.log.Infow("Probed video duration", + "id", videoGenID, + "duration_seconds", durationInt, + "duration_float", probedDuration) + } else { + s.log.Warnw("Failed to probe video duration, using provided duration", + "error", err, + "id", videoGenID, + "local_path", localVideoPath) } } @@ -372,13 +397,22 @@ func (s *VideoGenerationService) completeVideoGeneration(videoGenID uint, videoU var videoGen models.VideoGeneration if err := s.db.First(&videoGen, videoGenID).Error; err == nil { if videoGen.StoryboardID != nil { - if err := s.db.Model(&models.Storyboard{}).Where("id = ?", *videoGen.StoryboardID).Update("video_url", videoURL).Error; err != nil { - s.log.Warnw("Failed to update storyboard video_url", "storyboard_id", *videoGen.StoryboardID, "error", err) + // 更新 Storyboard 的 video_url 和 duration + storyboardUpdates := map[string]interface{}{ + "video_url": videoURL, + } + if duration != nil { + storyboardUpdates["duration"] = *duration + } + if err := s.db.Model(&models.Storyboard{}).Where("id = ?", *videoGen.StoryboardID).Updates(storyboardUpdates).Error; err != nil { + s.log.Warnw("Failed to update storyboard", "storyboard_id", *videoGen.StoryboardID, "error", err) + } else { + s.log.Infow("Updated storyboard with video info", "storyboard_id", *videoGen.StoryboardID, "duration", duration) } } } - s.log.Infow("Video generation completed", "id", videoGenID, "url", videoURL) + s.log.Infow("Video generation completed", "id", videoGenID, "url", videoURL, "duration", duration) } func (s *VideoGenerationService) updateVideoGenError(videoGenID uint, errorMsg string) { diff --git a/configs/config.example.yaml b/configs/config.example.yaml index ae25c33..080e768 100644 --- a/configs/config.example.yaml +++ b/configs/config.example.yaml @@ -2,6 +2,7 @@ app: name: "Huobao Drama API" version: "1.0.0" debug: true + language: "zh" # 系统语言:zh(中文) 或 en(英文) server: port: 5678 diff --git a/infrastructure/external/ffmpeg/ffmpeg.go b/infrastructure/external/ffmpeg/ffmpeg.go index 624964e..9985296 100644 --- a/infrastructure/external/ffmpeg/ffmpeg.go +++ b/infrastructure/external/ffmpeg/ffmpeg.go @@ -316,17 +316,68 @@ func (f *FFmpeg) mergeWithXfade(inputPaths []string, clips []VideoClip, outputPa f.log.Infow("Target resolution", "width", maxWidth, "height", maxHeight) // 为每个视频流添加缩放滤镜,统一分辨率 + // 同时为有转场的视频添加 tpad 延长(freeze 最后一帧) var scaleFilters []string for i := 0; i < len(inputPaths); i++ { + // 检查当前视频是否需要转场到下一个视频 + var tpadDuration float64 = 0 + if i < len(clips)-1 && clips[i].Transition != nil { + // 检查转场类型 + if tType, ok := clips[i].Transition["type"].(string); ok { + // none 转场不需要 tpad + if strings.ToLower(tType) != "none" && tType != "" { + if tDuration, ok := clips[i].Transition["duration"].(float64); ok && tDuration > 0 { + tpadDuration = tDuration + } else { + tpadDuration = 1.0 // 默认1秒 + } + } + } else { + // 没有指定类型,默认需要转场 + if tDuration, ok := clips[i].Transition["duration"].(float64); ok && tDuration > 0 { + tpadDuration = tDuration + } else { + tpadDuration = 1.0 + } + } + } + // 使用scale滤镜缩放到目标分辨率,pad添加黑边保持长宽比 - scaleFilters = append(scaleFilters, - fmt.Sprintf("[%d:v]scale=%d:%d:force_original_aspect_ratio=decrease,pad=%d:%d:(ow-iw)/2:(oh-ih)/2[v%d]", - i, maxWidth, maxHeight, maxWidth, maxHeight, i)) + // 如果需要转场,使用 tpad 延长视频(freeze最后一帧) + if tpadDuration > 0 { + scaleFilters = append(scaleFilters, + fmt.Sprintf("[%d:v]scale=%d:%d:force_original_aspect_ratio=decrease,pad=%d:%d:(ow-iw)/2:(oh-ih)/2,tpad=stop_mode=clone:stop_duration=%.2f[v%d]", + i, maxWidth, maxHeight, maxWidth, maxHeight, tpadDuration, i)) + f.log.Infow("Adding tpad to video", "index", i, "duration", tpadDuration) + } else { + scaleFilters = append(scaleFilters, + fmt.Sprintf("[%d:v]scale=%d:%d:force_original_aspect_ratio=decrease,pad=%d:%d:(ow-iw)/2:(oh-ih)/2[v%d]", + i, maxWidth, maxHeight, maxWidth, maxHeight, i)) + } } // 构建filter_complex - // 例如: [0:v][1:v]xfade=transition=fade:duration=1:offset=5[v01];[v01][2:v]xfade=transition=fade:duration=1:offset=10[out] + // 检查是否有任何转场效果 + hasAnyTransition := false + for i := 0; i < len(inputPaths)-1; i++ { + if clips[i].Transition != nil { + if tType, ok := clips[i].Transition["type"].(string); ok { + if strings.ToLower(tType) != "none" && tType != "" { + hasAnyTransition = true + break + } + } + } + } + + // 如果没有任何转场,使用简单拼接 + if !hasAnyTransition { + f.log.Infow("No transitions detected, using simple concatenation") + return f.concatenateVideos(inputPaths, outputPath) + } + // 构建转场滤镜,使用缩放后的视频流 + // 对所有相邻视频都应用 xfade,type=none 时使用 0 秒时长实现无缝拼接 var transitionFilters []string var offset float64 = 0 @@ -337,26 +388,31 @@ func (f *FFmpeg) mergeWithXfade(inputPaths []string, clips []VideoClip, outputPa clipDuration = clips[i].EndTime - clips[i].StartTime } - // 获取转场类型和时长 - transitionType := "fade" // 默认淡入淡出 - transitionDuration := 1.0 // 默认转场时长为1秒 + // 默认转场参数 + transitionType := "fade" + transitionDuration := 1.0 if clips[i].Transition != nil { - // 读取转场类型 - if tType, ok := clips[i].Transition["type"].(string); ok && tType != "" { - transitionType = f.mapTransitionType(tType) - f.log.Infow("Using transition type", "type", tType, "mapped", transitionType) + if tType, ok := clips[i].Transition["type"].(string); ok { + if strings.ToLower(tType) == "none" || tType == "" { + // none 转场使用 0 秒时长,实现无缝拼接 + transitionDuration = 0.0 + f.log.Infow("Using no transition (0s xfade)", "clip_index", i) + } else { + transitionType = f.mapTransitionType(tType) + f.log.Infow("Using transition type", "type", tType, "mapped", transitionType) + } } - // 读取转场时长 - if tDuration, ok := clips[i].Transition["duration"].(float64); ok && tDuration > 0 { - transitionDuration = tDuration + // 只有非 none 转场才读取时长 + if transitionDuration > 0 { + if tDuration, ok := clips[i].Transition["duration"].(float64); ok && tDuration > 0 { + transitionDuration = tDuration + } } } // 计算转场开始的时间点 - // 转场在两个片段的交界处,从前一个片段结束前 transitionDuration/2 开始 - // 这样转场效果会平均分布在两个片段的交界处 - offset += clipDuration - (transitionDuration / 2) + offset += clipDuration if offset < 0 { offset = 0 } @@ -395,38 +451,104 @@ func (f *FFmpeg) mergeWithXfade(inputPaths []string, clips []VideoClip, outputPa // 音频处理:如果有任何视频包含音频流,则处理音频 var fullFilter string if hasAnyAudio { - // 为没有音频的视频生成静音轨道,确保所有输入音频流一致 - var silenceFilters []string + // 为音频流添加处理:生成静音流或延长音频 + var audioFilters []string for i := 0; i < len(inputPaths); i++ { - if !audioStreams[i] { - // 计算该视频的时长 - clipDuration := clips[i].Duration - if clips[i].EndTime > 0 && clips[i].StartTime >= 0 { - clipDuration = clips[i].EndTime - clips[i].StartTime + // 计算该视频的时长 + clipDuration := clips[i].Duration + if clips[i].EndTime > 0 && clips[i].StartTime >= 0 { + clipDuration = clips[i].EndTime - clips[i].StartTime + } + + // 检查是否需要为转场延长音频 + var padDuration float64 = 0 + if i < len(clips)-1 && clips[i].Transition != nil { + // 检查转场类型 + needTransition := true + if tType, ok := clips[i].Transition["type"].(string); ok { + if strings.ToLower(tType) == "none" || tType == "" { + needTransition = false + } } - // anullsrc是源滤镜,不接受输入,使用duration参数指定时长 - silenceFilters = append(silenceFilters, - fmt.Sprintf("anullsrc=channel_layout=stereo:sample_rate=44100:duration=%.2f[a%d]", clipDuration, i)) - } - } - // 拼接所有音频流(包括生成的静音流) - var audioConcat strings.Builder - for i := 0; i < len(inputPaths); i++ { - if audioStreams[i] { - audioConcat.WriteString(fmt.Sprintf("[%d:a]", i)) + // 只有需要转场时才延长音频 + if needTransition { + if tDuration, ok := clips[i].Transition["duration"].(float64); ok && tDuration > 0 { + padDuration = tDuration + } else { + padDuration = 1.0 + } + } + } + + if !audioStreams[i] { + // 没有音频的视频:生成静音轨道(包括转场延长) + totalDuration := clipDuration + padDuration + audioFilters = append(audioFilters, + fmt.Sprintf("anullsrc=channel_layout=stereo:sample_rate=44100:duration=%.2f[a%d]", totalDuration, i)) + f.log.Infow("Generated silence for audio", "index", i, "duration", totalDuration) + } else if padDuration > 0 { + // 有音频且需要延长:使用apad添加静音延长(稍后会用acrossfade处理) + audioFilters = append(audioFilters, + fmt.Sprintf("[%d:a]apad=pad_dur=%.2f[a%d]", i, padDuration, i)) + f.log.Infow("Padding audio with silence", "index", i, "pad_duration", padDuration) } else { - audioConcat.WriteString(fmt.Sprintf("[a%d]", i)) + // 有音频但不需要延长:直接标记 + audioFilters = append(audioFilters, + fmt.Sprintf("[%d:a]acopy[a%d]", i, i)) } } - audioConcat.WriteString(fmt.Sprintf("concat=n=%d:v=0:a=1[outa]", len(inputPaths))) - // 构建完整滤镜:先生成静音流,再拼接音频 - if len(silenceFilters) > 0 { - fullFilter = filterComplex + ";" + strings.Join(silenceFilters, ";") + ";" + audioConcat.String() - } else { - fullFilter = filterComplex + ";" + audioConcat.String() + // 音频交叉淡入淡出(避免转场时静音) + // 对所有相邻音频都应用 acrossfade,type=none 时使用 0 秒时长 + var audioCrossfades []string + + for i := 0; i < len(inputPaths)-1; i++ { + // 默认转场时长 + transitionDuration := 1.0 + if clips[i].Transition != nil { + if tType, ok := clips[i].Transition["type"].(string); ok { + if strings.ToLower(tType) == "none" || tType == "" { + // none 转场使用 0 秒 + transitionDuration = 0.0 + } + } + // 只有非 none 转场才读取自定义时长 + if transitionDuration > 0 { + if tDuration, ok := clips[i].Transition["duration"].(float64); ok && tDuration > 0 { + transitionDuration = tDuration + } + } + } + + var inputLabel, outputLabel string + if i == 0 { + inputLabel = "[a0][a1]" + } else { + inputLabel = fmt.Sprintf("[ax%02d][a%d]", i-1, i+1) + } + + if i == len(inputPaths)-2 { + outputLabel = "[outa]" + } else { + outputLabel = fmt.Sprintf("[ax%02d]", i) + } + + // acrossfade: d=转场时长,c1=第一个音频淡出曲线,c2=第二个音频淡入曲线 + // 0 秒时长实现无缝音频拼接 + audioCrossfades = append(audioCrossfades, + fmt.Sprintf("%sacrossfade=d=%.2f:c1=tri:c2=tri%s", inputLabel, transitionDuration, outputLabel)) + + f.log.Infow("Audio crossfade", + "clip_index", i, + "duration", transitionDuration) } + + // 构建完整滤镜:音频处理 + 音频交叉淡入淡出 + var allAudioFilters []string + allAudioFilters = append(allAudioFilters, audioFilters...) + allAudioFilters = append(allAudioFilters, audioCrossfades...) + fullFilter = filterComplex + ";" + strings.Join(allAudioFilters, ";") } else { // 所有视频都无音频流,只处理视频 fullFilter = filterComplex @@ -589,6 +711,36 @@ func (f *FFmpeg) getVideoResolution(videoPath string) (int, int) { return width, height } +// GetVideoDuration 获取视频时长(秒) +func (f *FFmpeg) GetVideoDuration(videoPath string) (float64, error) { + cmd := exec.Command("ffprobe", + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + videoPath, + ) + + output, err := cmd.CombinedOutput() + if err != nil { + f.log.Errorw("Failed to get video duration", "path", videoPath, "error", err) + return 0, fmt.Errorf("ffprobe failed: %w", err) + } + + result := strings.TrimSpace(string(output)) + var duration float64 + _, err = fmt.Sscanf(result, "%f", &duration) + if err != nil { + f.log.Errorw("Failed to parse duration", "output", result, "error", err) + return 0, fmt.Errorf("parse duration failed: %w", err) + } + + if duration <= 0 { + return 0, fmt.Errorf("invalid duration: %f", duration) + } + + return duration, nil +} + func (f *FFmpeg) copyFile(src, dst string) error { cmd := exec.Command("cp", src, dst) output, err := cmd.CombinedOutput() @@ -610,3 +762,94 @@ func (f *FFmpeg) cleanup(paths []string) { func (f *FFmpeg) CleanupTempDir() error { return os.RemoveAll(f.tempDir) } + +// ExtractAudio 从视频文件中提取音频轨道 +// 返回提取的音频文件路径 +func (f *FFmpeg) ExtractAudio(videoURL, outputPath string) (string, error) { + f.log.Infow("Extracting audio from video", "url", videoURL, "output", outputPath) + + // 下载视频文件 + downloadPath := filepath.Join(f.tempDir, fmt.Sprintf("video_%d.mp4", time.Now().Unix())) + localVideoPath, err := f.downloadVideo(videoURL, downloadPath) + if err != nil { + return "", fmt.Errorf("failed to download video: %w", err) + } + defer os.Remove(localVideoPath) + + // 检查视频是否有音频流 + if !f.hasAudioStream(localVideoPath) { + f.log.Warnw("Video has no audio stream, generating silence", "video", videoURL) + // 获取视频时长 + duration, err := f.GetVideoDuration(localVideoPath) + if err != nil { + return "", fmt.Errorf("failed to get video duration: %w", err) + } + // 生成静音音频文件 + return f.generateSilence(outputPath, duration) + } + + // 确保输出目录存在 + outputDir := filepath.Dir(outputPath) + if err := os.MkdirAll(outputDir, 0755); err != nil { + return "", fmt.Errorf("failed to create output directory: %w", err) + } + + // 使用FFmpeg提取音频 + // -vn: 禁用视频 + // -acodec: 音频编码器 + // -ar: 音频采样率 + // -ac: 音频声道数 + // -ab: 音频比特率 + cmd := exec.Command("ffmpeg", + "-i", localVideoPath, + "-vn", + "-acodec", "aac", + "-ar", "44100", + "-ac", "2", + "-ab", "128k", + "-y", + outputPath, + ) + + output, err := cmd.CombinedOutput() + if err != nil { + f.log.Errorw("FFmpeg audio extraction failed", "error", err, "output", string(output)) + return "", fmt.Errorf("ffmpeg audio extraction failed: %w, output: %s", err, string(output)) + } + + f.log.Infow("Audio extracted successfully", "output", outputPath) + return outputPath, nil +} + +// generateSilence 生成指定时长的静音音频文件 +func (f *FFmpeg) generateSilence(outputPath string, duration float64) (string, error) { + f.log.Infow("Generating silence audio", "duration", duration, "output", outputPath) + + // 确保输出目录存在 + outputDir := filepath.Dir(outputPath) + if err := os.MkdirAll(outputDir, 0755); err != nil { + return "", fmt.Errorf("failed to create output directory: %w", err) + } + + // 使用FFmpeg生成静音 + // -f lavfi: 使用lavfi(libavfilter)输入 + // -i anullsrc: 生成静音音频源 + cmd := exec.Command("ffmpeg", + "-f", "lavfi", + "-i", fmt.Sprintf("anullsrc=channel_layout=stereo:sample_rate=44100"), + "-t", fmt.Sprintf("%.2f", duration), + "-acodec", "aac", + "-ab", "128k", + "-y", + outputPath, + ) + + output, err := cmd.CombinedOutput() + if err != nil { + f.log.Errorw("FFmpeg silence generation failed", "error", err, "output", string(output)) + return "", fmt.Errorf("ffmpeg silence generation failed: %w, output: %s", err, string(output)) + } + + f.log.Infow("Silence audio generated successfully", "output", outputPath) + return outputPath, nil +} diff --git a/pkg/ai/openai_client.go b/pkg/ai/openai_client.go index 1264e1d..83734fc 100644 --- a/pkg/ai/openai_client.go +++ b/pkg/ai/openai_client.go @@ -159,6 +159,27 @@ func (c *OpenAIClient) sendChatRequest(req *ChatCompletionRequest) (*ChatComplet fmt.Printf("OpenAI: Successfully parsed response, choices count: %d\n", len(chatResp.Choices)) + if len(chatResp.Choices) == 0 { + fmt.Printf("OpenAI: No choices in response\n") + return nil, fmt.Errorf("no choices in response") + } + + // 检查 finish_reason,处理内容过滤的情况 + if len(chatResp.Choices) > 0 { + finishReason := chatResp.Choices[0].FinishReason + content := chatResp.Choices[0].Message.Content + + fmt.Printf("OpenAI: finish_reason=%s, content_length=%d\n", finishReason, len(content)) + + if finishReason == "content_filter" { + return nil, fmt.Errorf("AI内容被安全过滤器拦截,可能因为:\n1. 请求内容触发了安全策略\n2. 生成的内容包含敏感信息\n3. 建议:调整输入内容或联系API提供商调整过滤策略") + } + + if content == "" && finishReason != "stop" { + return nil, fmt.Errorf("AI返回内容为空 (finish_reason: %s),可能的原因:\n1. 内容被过滤\n2. Token限制\n3. API异常", finishReason) + } + } + return &chatResp, nil } diff --git a/pkg/config/config.go b/pkg/config/config.go index cbe3fca..081db5d 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -15,9 +15,10 @@ type Config struct { } type AppConfig struct { - Name string `mapstructure:"name"` - Version string `mapstructure:"version"` - Debug bool `mapstructure:"debug"` + Name string `mapstructure:"name"` + Version string `mapstructure:"version"` + Debug bool `mapstructure:"debug"` + Language string `mapstructure:"language"` // zh 或 en } type ServerConfig struct { diff --git a/pkg/utils/json_parser.go b/pkg/utils/json_parser.go index bb08195..fa09dd1 100644 --- a/pkg/utils/json_parser.go +++ b/pkg/utils/json_parser.go @@ -21,16 +21,44 @@ func SafeParseAIJSON(aiResponse string, v interface{}) error { // 1. 移除可能的Markdown代码块标记 cleaned := strings.TrimSpace(aiResponse) + // 移除开头的 ```json 或 ``` cleaned = regexp.MustCompile("(?m)^```json\\s*").ReplaceAllString(cleaned, "") cleaned = regexp.MustCompile("(?m)^```\\s*").ReplaceAllString(cleaned, "") + // 移除结尾的 ``` + cleaned = regexp.MustCompile("(?m)```\\s*$").ReplaceAllString(cleaned, "") cleaned = strings.TrimSpace(cleaned) - // 2. 提取JSON对象 (查找第一个 { 到最后一个 }) - jsonRegex := regexp.MustCompile(`(?s)\{.*\}`) - jsonMatch := jsonRegex.FindString(cleaned) + // 2. 提取JSON (支持对象 {} 和数组 []) + var jsonMatch string + + // 优先尝试提取完整的JSON(对象或数组) + // 先尝试对象格式 + if strings.HasPrefix(cleaned, "{") { + jsonRegex := regexp.MustCompile(`(?s)\{.*\}`) + jsonMatch = jsonRegex.FindString(cleaned) + } + + // 如果没找到对象,尝试数组格式 + if jsonMatch == "" && strings.HasPrefix(cleaned, "[") { + jsonRegex := regexp.MustCompile(`(?s)\[.*\]`) + jsonMatch = jsonRegex.FindString(cleaned) + } + + // 如果还是没找到,尝试从中间提取 + if jsonMatch == "" { + // 尝试对象 + objRegex := regexp.MustCompile(`(?s)\{.*\}`) + jsonMatch = objRegex.FindString(cleaned) + + // 如果对象没找到,尝试数组 + if jsonMatch == "" { + arrRegex := regexp.MustCompile(`(?s)\[.*\]`) + jsonMatch = arrRegex.FindString(cleaned) + } + } if jsonMatch == "" { - return fmt.Errorf("响应中未找到有效的JSON对象,原始响应: %s", truncateString(aiResponse, 200)) + return fmt.Errorf("响应中未找到有效的JSON对象或数组,原始响应: %s", truncateString(aiResponse, 200)) } // 3. 尝试解析JSON @@ -47,7 +75,17 @@ func SafeParseAIJSON(aiResponse string, v interface{}) error { } } - // 5. 提供详细的错误上下文 + // 5. 检测是否是响应被截断导致的问题 + if isTruncated(jsonMatch) { + return fmt.Errorf( + "AI响应可能被截断,导致JSON不完整。\n请尝试:\n1. 增加maxTokens参数\n2. 简化输入内容\n3. 使用更强大的模型\n\n原始错误: %s\n响应长度: %d\n响应末尾: %s", + err.Error(), + len(jsonMatch), + truncateString(jsonMatch[maxInt(0, len(jsonMatch)-200):], 200), + ) + } + + // 6. 提供详细的错误上下文 if jsonErr, ok := err.(*json.SyntaxError); ok { errorPos := int(jsonErr.Offset) start := maxInt(0, errorPos-100) @@ -130,6 +168,38 @@ func ValidateJSON(jsonStr string) error { return json.Unmarshal([]byte(jsonStr), &js) } +// isTruncated 检测JSON字符串是否可能被截断 +func isTruncated(jsonStr string) bool { + trimmed := strings.TrimSpace(jsonStr) + if len(trimmed) == 0 { + return false + } + + // 检查是否以不完整的字符串结尾(引号未闭合) + lastChar := trimmed[len(trimmed)-1] + if lastChar != '}' && lastChar != ']' { + return true + } + + // 检查括号是否匹配 + openBraces := strings.Count(trimmed, "{") + closeBraces := strings.Count(trimmed, "}") + openBrackets := strings.Count(trimmed, "[") + closeBrackets := strings.Count(trimmed, "]") + + if openBraces != closeBraces || openBrackets != closeBrackets { + return true + } + + // 检查引号是否匹配(简化检查,不考虑转义) + quoteCount := strings.Count(trimmed, `"`) + if quoteCount%2 != 0 { + return true + } + + return false +} + // Helper functions func truncateString(s string, maxLen int) string { if len(s) <= maxLen { diff --git a/pkg/video/minimax_client.go b/pkg/video/minimax_client.go index 6b143d5..c543427 100644 --- a/pkg/video/minimax_client.go +++ b/pkg/video/minimax_client.go @@ -9,6 +9,36 @@ import ( "time" ) +// MiniMax Hailuo 支持的模型 +const ( + // ModelHailuo23 全新视频生成模型,肢体动作、面部表情、物理表现与指令遵循再度突破 + // 支持:文生视频、图生视频 + // 时长:768P(6s/10s), 1080P(6s) + ModelHailuo23 = "MiniMax-Hailuo-2.3" + + // ModelHailuo23Fast 全新图生视频模型,物理表现与指令遵循具佳,更快更优惠 + // 支持:图生视频 + // 时长:768P(6s/10s), 1080P(6s) + ModelHailuo23Fast = "MiniMax-Hailuo-2.3-Fast" + + // ModelHailuo02 新一代视频生成模型,1080p 原生,SOTA 指令遵循,极致物理表现 + // 支持:文生视频、图生视频、首尾帧模式 + // 时长:768P(6s/10s), 1080P(6s) + ModelHailuo02 = "MiniMax-Hailuo-02" +) + +// MiniMax Hailuo 支持的分辨率 +const ( + Resolution768P = "768P" + Resolution1080P = "1080P" +) + +// MiniMax Hailuo 支持的时长(秒) +const ( + Duration6s = 6 + Duration10s = 10 +) + // MinimaxClient Minimax视频生成客户端 type MinimaxClient struct { BaseURL string @@ -32,21 +62,42 @@ type MinimaxRequest struct { Resolution string `json:"resolution,omitempty"` } -type MinimaxResponse struct { +// MinimaxCreateResponse 创建任务的响应 +type MinimaxCreateResponse struct { TaskID string `json:"task_id"` - Status string `json:"status"` BaseResp struct { StatusCode int `json:"status_code"` StatusMsg string `json:"status_msg"` } `json:"base_resp"` - Video struct { - URL string `json:"url"` - Duration int `json:"duration"` - } `json:"video"` - Error struct { - Code string `json:"code"` - Message string `json:"message"` - } `json:"error"` +} + +// MinimaxQueryResponse 查询任务状态的响应 +type MinimaxQueryResponse struct { + TaskID string `json:"task_id"` + Status string `json:"status"` // Processing, Success, Failed + FileID string `json:"file_id"` + VideoWidth int `json:"video_width"` + VideoHeight int `json:"video_height"` + BaseResp struct { + StatusCode int `json:"status_code"` + StatusMsg string `json:"status_msg"` + } `json:"base_resp"` +} + +// MinimaxFileResponse 获取文件信息的响应 +type MinimaxFileResponse struct { + File struct { + FileID string `json:"file_id"` + Bytes int `json:"bytes"` + CreatedAt int64 `json:"created_at"` + Filename string `json:"filename"` + Purpose string `json:"purpose"` + DownloadURL string `json:"download_url"` + } `json:"file"` + BaseResp struct { + StatusCode int `json:"status_code"` + StatusMsg string `json:"status_msg"` + } `json:"base_resp"` } func NewMinimaxClient(baseURL, apiKey, model string) *MinimaxClient { @@ -61,6 +112,7 @@ func NewMinimaxClient(baseURL, apiKey, model string) *MinimaxClient { } // GenerateVideo 生成视频(支持首尾帧和主体参考) +// 步骤1:创建任务,返回 task_id func (c *MinimaxClient) GenerateVideo(imageURL, prompt string, opts ...VideoOption) (*VideoResult, error) { options := &VideoOptions{ Duration: 6, @@ -87,19 +139,26 @@ func (c *MinimaxClient) GenerateVideo(imageURL, prompt string, opts ...VideoOpti reqBody.Resolution = options.Resolution } - // 如果有首帧图片(从imageURL或FirstFrameURL) + // 支持首帧图片 if options.FirstFrameURL != "" { reqBody.FirstFrameImage = options.FirstFrameURL } else if imageURL != "" { reqBody.FirstFrameImage = imageURL } + // 支持尾帧图片 + if options.LastFrameURL != "" { + reqBody.LastFrameImage = options.LastFrameURL + } + jsonData, err := json.Marshal(reqBody) if err != nil { return nil, fmt.Errorf("marshal request: %w", err) } - endpoint := c.BaseURL + "/v1/video_generation" + // 步骤1:创建任务,POST 请求 + // 注意:BaseURL 应该已包含 /v1,例如 https://api.minimaxi.com/v1 + endpoint := c.BaseURL + "/video_generation" req, err := http.NewRequest("POST", endpoint, bytes.NewBuffer(jsonData)) if err != nil { return nil, fmt.Errorf("create request: %w", err) @@ -123,32 +182,31 @@ func (c *MinimaxClient) GenerateVideo(imageURL, prompt string, opts ...VideoOpti return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body)) } - var result MinimaxResponse + var result MinimaxCreateResponse if err := json.Unmarshal(body, &result); err != nil { return nil, fmt.Errorf("parse response: %w", err) } - if result.Error.Message != "" { - return nil, fmt.Errorf("minimax error: %s", result.Error.Message) + if result.BaseResp.StatusCode != 0 { + return nil, fmt.Errorf("minimax error: %s", result.BaseResp.StatusMsg) } + // 第一步只返回 task_id,状态为 Processing videoResult := &VideoResult{ TaskID: result.TaskID, - Status: result.Status, - Completed: result.Status == "completed", - Duration: result.Video.Duration, - } - - if result.Video.URL != "" { - videoResult.VideoURL = result.Video.URL - videoResult.Completed = true + Status: "Processing", + Completed: false, } return videoResult, nil } +// GetTaskStatus 查询任务状态 +// 步骤2:查询任务状态,如果成功则进入步骤3获取文件下载地址 func (c *MinimaxClient) GetTaskStatus(taskID string) (*VideoResult, error) { - endpoint := c.BaseURL + "/v1/video_generation/" + taskID + // 步骤2:查询任务状态 + // 注意:BaseURL 应该已包含 /v1 + endpoint := fmt.Sprintf("%s/query/video_generation?task_id=%s", c.BaseURL, taskID) req, err := http.NewRequest("GET", endpoint, nil) if err != nil { return nil, fmt.Errorf("create request: %w", err) @@ -167,26 +225,77 @@ func (c *MinimaxClient) GetTaskStatus(taskID string) (*VideoResult, error) { return nil, fmt.Errorf("read response: %w", err) } - var result MinimaxResponse - if err := json.Unmarshal(body, &result); err != nil { + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body)) + } + + var queryResult MinimaxQueryResponse + if err := json.Unmarshal(body, &queryResult); err != nil { return nil, fmt.Errorf("parse response: %w", err) } + if queryResult.BaseResp.StatusCode != 0 { + return nil, fmt.Errorf("minimax error: %s", queryResult.BaseResp.StatusMsg) + } + videoResult := &VideoResult{ - TaskID: result.TaskID, - Status: result.Status, - Completed: result.Status == "completed", - Duration: result.Video.Duration, + TaskID: queryResult.TaskID, + Status: queryResult.Status, + Width: queryResult.VideoWidth, + Height: queryResult.VideoHeight, + Completed: false, } - if result.Error.Message != "" { - videoResult.Error = result.Error.Message - } - - if result.Video.URL != "" { - videoResult.VideoURL = result.Video.URL + // 如果状态是 Success 且有 file_id,则获取文件下载地址 + if queryResult.Status == "Success" && queryResult.FileID != "" { + downloadURL, err := c.getFileDownloadURL(queryResult.FileID) + if err != nil { + return nil, fmt.Errorf("failed to get download URL: %w", err) + } + videoResult.VideoURL = downloadURL + videoResult.Completed = true + } else if queryResult.Status == "Failed" { + videoResult.Error = "Video generation failed" videoResult.Completed = true } return videoResult, nil } + +// getFileDownloadURL 步骤3:根据 file_id 获取文件下载地址 +func (c *MinimaxClient) getFileDownloadURL(fileID string) (string, error) { + // 注意:BaseURL 应该已包含 /v1 + endpoint := fmt.Sprintf("%s/files/retrieve?file_id=%s", c.BaseURL, fileID) + req, err := http.NewRequest("GET", endpoint, nil) + if err != nil { + return "", fmt.Errorf("create request: %w", err) + } + + req.Header.Set("Authorization", "Bearer "+c.APIKey) + + resp, err := c.HTTPClient.Do(req) + if err != nil { + return "", fmt.Errorf("send request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body)) + } + + var fileResult MinimaxFileResponse + if err := json.Unmarshal(body, &fileResult); err != nil { + return "", fmt.Errorf("parse response: %w", err) + } + + if fileResult.BaseResp.StatusCode != 0 { + return "", fmt.Errorf("minimax error: %s", fileResult.BaseResp.StatusMsg) + } + + return fileResult.File.DownloadURL, nil +} diff --git a/web/package.json b/web/package.json index 650033f..6682cd4 100644 --- a/web/package.json +++ b/web/package.json @@ -18,6 +18,7 @@ "axios": "^1.6.0", "dayjs": "^1.11.10", "element-plus": "^2.5.0", + "lodash-es": "^4.17.22", "pinia": "^2.1.0", "vue": "^3.4.0", "vue-i18n": "^9.14.5", diff --git a/web/src/api/audio.ts b/web/src/api/audio.ts new file mode 100644 index 0000000..2bdfec7 --- /dev/null +++ b/web/src/api/audio.ts @@ -0,0 +1,45 @@ +import axios from 'axios' + +const API_BASE_URL = '/api/v1' + +export interface ExtractAudioRequest { + video_url: string +} + +export interface ExtractAudioResponse { + audio_url: string + duration: number +} + +export interface BatchExtractAudioRequest { + video_urls: string[] +} + +export interface BatchExtractAudioResponse { + results: ExtractAudioResponse[] + total: number +} + +export const audioAPI = { + /** + * 从视频URL提取音频 + */ + extractAudio: async (videoUrl: string): Promise => { + const response = await axios.post( + `${API_BASE_URL}/audio/extract`, + { video_url: videoUrl } + ) + return response.data + }, + + /** + * 批量从视频URL提取音频 + */ + batchExtractAudio: async (videoUrls: string[]): Promise => { + const response = await axios.post( + `${API_BASE_URL}/audio/extract/batch`, + { video_urls: videoUrls } + ) + return response.data + } +} diff --git a/web/src/api/drama.ts b/web/src/api/drama.ts index 53dccfe..34a3acf 100644 --- a/web/src/api/drama.ts +++ b/web/src/api/drama.ts @@ -71,8 +71,8 @@ export const dramaAPI = { return request.get(`/images/episode/${episodeId}/backgrounds`) }, - extractBackgrounds(episodeId: string) { - return request.post<{ task_id: string; status: string; message: string }>(`/images/episode/${episodeId}/backgrounds/extract`) + extractBackgrounds(episodeId: string, model?: string) { + return request.post<{ task_id: string; status: string; message: string }>(`/images/episode/${episodeId}/backgrounds/extract`, { model }) }, batchGenerateBackgrounds(episodeId: string) { @@ -112,6 +112,10 @@ export const dramaAPI = { return request.post('/scenes/generate-image', data) }, + deleteScene(sceneId: string) { + return request.delete(`/scenes/${sceneId}`) + }, + // 完成集数制作(触发视频合成) finalizeEpisode(episodeId: string, timelineData?: any) { return request.post(`/episodes/${episodeId}/finalize`, timelineData || {}) diff --git a/web/src/api/generation.ts b/web/src/api/generation.ts index 7a2fac9..48b82dc 100644 --- a/web/src/api/generation.ts +++ b/web/src/api/generation.ts @@ -1,27 +1,15 @@ -import type { Character, Episode } from '../types/drama' import type { - GenerateCharactersRequest, - GenerateEpisodesRequest, - GenerateOutlineRequest, - OutlineResult + GenerateCharactersRequest } from '../types/generation' import request from '../utils/request' export const generationAPI = { - generateOutline(data: GenerateOutlineRequest) { - return request.post('/generation/outline', data) - }, - generateCharacters(data: GenerateCharactersRequest) { return request.post<{ task_id: string; status: string; message: string }>('/generation/characters', data) }, - generateEpisodes(data: GenerateEpisodesRequest) { - return request.post('/generation/episodes', data) - }, - - generateStoryboard(episodeId: string) { - return request.post<{ task_id: string; status: string; message: string }>(`/episodes/${episodeId}/storyboards`) + generateStoryboard(episodeId: string, model?: string) { + return request.post<{ task_id: string; status: string; message: string }>(`/episodes/${episodeId}/storyboards`, { model }) }, getTaskStatus(taskId: string) { diff --git a/web/src/api/settings.ts b/web/src/api/settings.ts new file mode 100644 index 0000000..02d3383 --- /dev/null +++ b/web/src/api/settings.ts @@ -0,0 +1,13 @@ +import request from '../utils/request' + +export const settingsAPI = { + // 获取系统语言 + getLanguage() { + return request.get<{ language: string }>('/settings/language') + }, + + // 更新系统语言 + updateLanguage(language: 'zh' | 'en') { + return request.put<{ message: string; language: string }>('/settings/language', { language }) + } +} diff --git a/web/src/assets/styles/main.css b/web/src/assets/styles/main.css index 11c0531..bf0f26c 100644 --- a/web/src/assets/styles/main.css +++ b/web/src/assets/styles/main.css @@ -556,6 +556,7 @@ body { --el-table-header-bg-color: var(--bg-secondary); --el-table-tr-bg-color: var(--bg-card); --el-table-row-hover-bg-color: var(--bg-card-hover); + --el-fill-color-lighter: var(--bg-secondary); } .dark .el-table th.el-table__cell, @@ -563,6 +564,10 @@ body { border-color: var(--border-primary); } +.dark .el-table--striped .el-table__body tr.el-table__row--striped td.el-table__cell { + background-color: var(--bg-secondary); +} + /* Pagination overrides / 分页样式覆盖 */ .el-pagination { --el-pagination-bg-color: transparent; diff --git a/web/src/components/LanguageSwitcher.vue b/web/src/components/LanguageSwitcher.vue index b002948..0c1901d 100644 --- a/web/src/components/LanguageSwitcher.vue +++ b/web/src/components/LanguageSwitcher.vue @@ -21,24 +21,90 @@ import { ref, computed } from 'vue' import { useI18n } from 'vue-i18n' import { setLanguage } from '@/locales' -import { ElMessage } from 'element-plus' +import { ElMessage, ElMessageBox } from 'element-plus' +import { settingsAPI } from '@/api/settings' const { locale } = useI18n() const currentLang = ref(locale.value) +const loading = ref(false) const currentLangText = computed(() => { return currentLang.value === 'zh-CN' ? '中文' : 'English' }) -const handleCommand = (lang: string) => { - setLanguage(lang) - currentLang.value = lang - ElMessage.success( - lang === 'zh-CN' - ? '语言已切换为中文' - : 'Language switched to English' - ) +const handleCommand = async (lang: string) => { + if (loading.value) return + + // 将 zh-CN/en-US 转换为 zh/en (后端格式) + const backendLang = lang === 'zh-CN' ? 'zh' : 'en' + const currentBackendLang = currentLang.value === 'zh-CN' ? 'zh' : 'en' + + // 双语确认消息 + const confirmMessage = backendLang === 'zh' + ? `切换为中文后,后端生成的所有提示词、角色描述、场景描述等都将使用中文。是否继续? + + +After switching to Chinese, all prompts, character descriptions, scene descriptions generated by the backend will use Chinese. Continue?` + : `After switching to English, all prompts, character descriptions, scene descriptions generated by the backend will use English. Continue? + + +切换为英文后,后端生成的所有提示词、角色描述、场景描述等都将使用英文。是否继续?` + + try { + await ElMessageBox.confirm( + confirmMessage, + '切换语言 / Switch Language', + { + confirmButtonText: '确定 / Confirm', + cancelButtonText: '取消 / Cancel', + type: 'warning', + dangerouslyUseHTMLString: false + } + ) + + loading.value = true + + // 调用后端API更新语言设置 + const res = await settingsAPI.updateLanguage(backendLang) + console.log('Backend language updated:', res) + + // 更新前端语言 + setLanguage(lang) + currentLang.value = lang + + // 使用后端返回的双语消息(request拦截器已经返回了data) + ElMessage.success({ + message: res?.message || (backendLang === 'zh' ? '语言已切换为中文' : 'Language switched to English'), + duration: 3000 + }) + } catch (error: any) { + if (error !== 'cancel') { + console.error('Failed to switch language:', error) + + // 安全获取错误消息 + let errorMessage = '未知错误' + if (error?.message) { + errorMessage = error.message + } else if (error?.response?.data?.error?.message) { + errorMessage = error.response.data.error.message + } else if (typeof error === 'string') { + errorMessage = error + } + + // 双语错误提示 + const errorMsg = currentBackendLang === 'zh' + ? `切换语言失败: ${errorMessage}` + : `Failed to switch language: ${errorMessage}` + + ElMessage.error({ + message: errorMsg, + duration: 5000 + }) + } + } finally { + loading.value = false + } } diff --git a/web/src/components/common/AIConfigDialog.vue b/web/src/components/common/AIConfigDialog.vue index 932f0dc..23e5412 100644 --- a/web/src/components/common/AIConfigDialog.vue +++ b/web/src/components/common/AIConfigDialog.vue @@ -283,13 +283,9 @@ const providerConfigs: Record = { id: 'chatfire', name: 'Chatfire', models: [ - 'gpt-4o', + 'gemini-3-pro-preview', 'claude-sonnet-4-5-20250929', - 'doubao-seed-1-8-251228', - 'kimi-k2-thinking', - 'gemini-3-pro', - 'gemini-2.5-pro', - 'gemini-3-pro-preview' + 'doubao-seed-1-8-251228' ] }, { @@ -341,18 +337,43 @@ const providerConfigs: Record = { 'sora-pro' ] }, + { + id: 'minimax', + name: 'MiniMax 海螺', + models: [ + 'MiniMax-Hailuo-2.3', + 'MiniMax-Hailuo-2.3-Fast', + 'MiniMax-Hailuo-02' + ] + }, { id: 'openai', name: 'OpenAI', models: ['sora-2', 'sora-2-pro'] } ] } +// 当前可用的厂商列表(显示所有配置的厂商) const availableProviders = computed(() => { + // 返回当前service_type下的所有厂商 return providerConfigs[form.service_type] || [] }) +// 当前可用的模型列表(从已激活的配置中获取) const availableModels = computed(() => { if (!form.provider) return [] - const provider = availableProviders.value.find(p => p.id === form.provider) - return provider?.models || [] + + // 从已激活的配置中提取该 provider 的所有模型 + const activeConfigsForProvider = configs.value.filter( + c => c.provider === form.provider && + c.service_type === form.service_type && + c.is_active + ) + + // 提取所有模型,去重 + const models = new Set() + activeConfigsForProvider.forEach(config => { + config.model.forEach(m => models.add(m)) + }) + + return Array.from(models) }) const fullEndpointExample = computed(() => { @@ -379,6 +400,8 @@ const fullEndpointExample = computed(() => { endpoint = '/video/generations' } else if (provider === 'doubao' || provider === 'volcengine' || provider === 'volces') { endpoint = '/contents/generations/tasks' + } else if (provider === 'minimax') { + endpoint = '/video_generation' } else if (provider === 'openai') { endpoint = '/videos' } else { @@ -585,9 +608,17 @@ const handleTabChange = (tabName: string | number) => { const handleProviderChange = () => { form.model = [] + // 根据厂商自动设置 Base URL if (form.provider === 'gemini' || form.provider === 'google') { - form.base_url = 'https://api.chatfire.site' + form.base_url = 'https://generativelanguage.googleapis.com' + } else if (form.provider === 'minimax') { + form.base_url = 'https://api.minimaxi.com/v1' + } else if (form.provider === 'volces' || form.provider === 'volcengine') { + form.base_url = 'https://ark.cn-beijing.volces.com/api/v3' + } else if (form.provider === 'openai') { + form.base_url = 'https://api.openai.com/v1' } else { + // chatfire 和其他厂商 form.base_url = 'https://api.chatfire.site/v1' } @@ -812,6 +843,9 @@ watch(visible, (val) => { font-size: 0.75rem; color: var(--text-muted); margin-top: 0.25rem; + word-break: break-all; + overflow-wrap: break-word; + line-height: 1.5; } /* Dark mode */ diff --git a/web/src/components/common/AppHeader.vue b/web/src/components/common/AppHeader.vue index e9c3003..6fb6c38 100644 --- a/web/src/components/common/AppHeader.vue +++ b/web/src/components/common/AppHeader.vue @@ -5,7 +5,7 @@
diff --git a/web/src/components/common/AppLayout.vue b/web/src/components/common/AppLayout.vue index 3493391..734a1d1 100644 --- a/web/src/components/common/AppLayout.vue +++ b/web/src/components/common/AppLayout.vue @@ -5,7 +5,7 @@
diff --git a/web/src/components/editor/StoryboardEditor.vue b/web/src/components/editor/StoryboardEditor.vue index b280dc0..afbe43e 100644 --- a/web/src/components/editor/StoryboardEditor.vue +++ b/web/src/components/editor/StoryboardEditor.vue @@ -17,7 +17,7 @@ :class="{ active: currentShotIndex === index }" @click="selectShot(index)" > -
{{ shot.shot_number }}
+
{{ shot.storyboard_number }}
{{ shot.shot_type }} @@ -36,7 +36,7 @@
- 镜头 {{ currentShot?.shot_number || '-' }} + 镜头 {{ currentShot?.storyboard_number || '-' }} {{ currentShot?.shot_type }}
@@ -101,7 +101,7 @@ @click="selectShot(index)" >
- {{ shot.shot_number }} + {{ shot.storyboard_number }}
@@ -116,11 +116,11 @@
- +
- - + + @@ -128,14 +128,35 @@
+
+ + + + + + + + +
+
+ + + + + + + + + +
- +
- +
@@ -149,7 +170,7 @@ :rows="2" size="small" placeholder="角色对话或旁白" - @change="handleShotUpdate" + @blur="handleShotUpdateImmediate" />
@@ -159,7 +180,7 @@ type="textarea" :rows="2" size="small" - @change="handleShotUpdate" + @blur="handleShotUpdateImmediate" />
@@ -169,24 +190,32 @@ type="textarea" :rows="2" size="small" - @change="handleShotUpdate" + @blur="handleShotUpdateImmediate" />
- +
- -
- - - - - - - - -
+ + +
+
+ +
@@ -382,8 +411,9 @@ - - diff --git a/web/src/views/settings/AIConfig.vue b/web/src/views/settings/AIConfig.vue index cedeeae..8bccc8a 100644 --- a/web/src/views/settings/AIConfig.vue +++ b/web/src/views/settings/AIConfig.vue @@ -207,13 +207,9 @@ const providerConfigs: Record = { id: 'chatfire', name: 'Chatfire', models: [ - 'gpt-4o', + 'gemini-3-pro-preview', 'claude-sonnet-4-5-20250929', 'doubao-seed-1-8-251228', - 'kimi-k2-thinking', - 'gemini-3-pro', - 'gemini-2.5-pro', - 'gemini-3-pro-preview' ] }, { @@ -281,16 +277,39 @@ const providerConfigs: Record = { ] } -// 当前可用的厂商列表 +// 当前可用的厂商列表(只显示有激活配置的) const availableProviders = computed(() => { - return providerConfigs[form.service_type] || [] + // 获取当前service_type下所有激活的配置 + const activeConfigs = configs.value.filter( + c => c.service_type === form.service_type && c.is_active + ) + + // 提取所有激活配置的provider,去重 + const activeProviderIds = new Set(activeConfigs.map(c => c.provider)) + + // 从providerConfigs中筛选出有激活配置的provider + const allProviders = providerConfigs[form.service_type] || [] + return allProviders.filter(p => activeProviderIds.has(p.id)) }) -// 当前可用的模型列表 +// 当前可用的模型列表(从已激活的配置中获取) const availableModels = computed(() => { if (!form.provider) return [] - const provider = availableProviders.value.find(p => p.id === form.provider) - return provider?.models || [] + + // 从已激活的配置中提取该 provider 的所有模型 + const activeConfigsForProvider = configs.value.filter( + c => c.provider === form.provider && + c.service_type === form.service_type && + c.is_active + ) + + // 提取所有模型,去重 + const models = new Set() + activeConfigsForProvider.forEach(config => { + config.model.forEach(m => models.add(m)) + }) + + return Array.from(models) }) // 完整端点示例 diff --git a/web/src/views/settings/SystemSettings.vue b/web/src/views/settings/SystemSettings.vue new file mode 100644 index 0000000..220475a --- /dev/null +++ b/web/src/views/settings/SystemSettings.vue @@ -0,0 +1,169 @@ + + + + + diff --git a/web/src/views/workflow/ScriptGeneration.vue b/web/src/views/workflow/ScriptGeneration.vue deleted file mode 100644 index d1ab1bf..0000000 --- a/web/src/views/workflow/ScriptGeneration.vue +++ /dev/null @@ -1,1221 +0,0 @@ - - - - -