1、添加中英文版本

2、修复已知BUG
3、完善功能
4、添加minimax视频渠道
This commit is contained in:
Connor
2026-01-18 05:21:34 +08:00
parent bfba6342dc
commit d39759926e
52 changed files with 3456 additions and 2617 deletions

View File

@@ -317,14 +317,14 @@ func (s *AIService) TestConnection(req *TestConnectionRequest) error {
func (s *AIService) GetDefaultConfig(serviceType string) (*models.AIServiceConfig, error) {
var config models.AIServiceConfig
// 按优先级降序获取第一个配置
err := s.db.Where("service_type = ?", serviceType).
// 按优先级降序获取第一个激活的配置
err := s.db.Where("service_type = ? AND is_active = ?", serviceType, true).
Order("priority DESC, created_at DESC").
First(&config).Error
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil, errors.New("no config found")
return nil, errors.New("no active config found")
}
return nil, err
}
@@ -332,10 +332,10 @@ func (s *AIService) GetDefaultConfig(serviceType string) (*models.AIServiceConfi
return &config, nil
}
// GetConfigForModel 根据服务类型和模型名称获取优先级最高的配置
// GetConfigForModel 根据服务类型和模型名称获取优先级最高的激活配置
func (s *AIService) GetConfigForModel(serviceType string, modelName string) (*models.AIServiceConfig, error) {
var configs []models.AIServiceConfig
err := s.db.Where("service_type = ?", serviceType).
err := s.db.Where("service_type = ? AND is_active = ?", serviceType, true).
Order("priority DESC, created_at DESC").
Find(&configs).Error
@@ -352,7 +352,7 @@ func (s *AIService) GetConfigForModel(serviceType string, modelName string) (*mo
}
}
return nil, errors.New("no config found for model: " + modelName)
return nil, errors.New("no active config found for model: " + modelName)
}
func (s *AIService) GetAIClient(serviceType string) (ai.AIClient, error) {
@@ -388,6 +388,34 @@ func (s *AIService) GetAIClient(serviceType string) (ai.AIClient, error) {
}
}
// GetAIClientForModel 根据服务类型和模型名称获取对应的AI客户端
func (s *AIService) GetAIClientForModel(serviceType string, modelName string) (ai.AIClient, error) {
config, err := s.GetConfigForModel(serviceType, modelName)
if err != nil {
return nil, err
}
// 使用数据库配置中的 endpoint如果为空则根据 provider 设置默认值
endpoint := config.Endpoint
if endpoint == "" {
switch config.Provider {
case "gemini", "google":
endpoint = "/v1beta/models/{model}:generateContent"
default:
endpoint = "/chat/completions"
}
}
// 根据 provider 创建对应的客户端
switch config.Provider {
case "gemini", "google":
return ai.NewGeminiClient(config.BaseURL, config.APIKey, modelName, endpoint), nil
default:
// openai, chatfire 等其他厂商都使用 OpenAI 格式
return ai.NewOpenAIClient(config.BaseURL, config.APIKey, modelName, endpoint), nil
}
}
func (s *AIService) GenerateText(prompt string, systemPrompt string, options ...func(*ai.ChatCompletionRequest)) (string, error) {
client, err := s.GetAIClient("text")
if err != nil {

View File

@@ -0,0 +1,66 @@
package services
import (
"fmt"
models "github.com/drama-generator/backend/domain/models"
"github.com/drama-generator/backend/infrastructure/storage"
)
// UpdateAssetDurationFromFile 从本地文件探测并更新视频Asset的时长
func (s *AssetService) UpdateAssetDurationFromFile(assetID uint, localFilePath string) error {
var asset models.Asset
if err := s.db.Where("id = ?", assetID).First(&asset).Error; err != nil {
return fmt.Errorf("asset not found")
}
if asset.Type != models.AssetTypeVideo {
return fmt.Errorf("asset is not a video")
}
if s.ffmpeg == nil {
return fmt.Errorf("ffmpeg not available")
}
duration, err := s.ffmpeg.GetVideoDuration(localFilePath)
if err != nil {
return fmt.Errorf("failed to probe video duration: %w", err)
}
durationInt := int(duration + 0.5)
if err := s.db.Model(&asset).Update("duration", durationInt).Error; err != nil {
return fmt.Errorf("failed to update duration: %w", err)
}
s.log.Infow("Updated asset duration from file",
"asset_id", assetID,
"duration", durationInt,
"file", localFilePath)
return nil
}
// UpdateAssetDurationFromURL 下载视频并探测时长
func (s *AssetService) UpdateAssetDurationFromURL(assetID uint, localStorage *storage.LocalStorage) error {
var asset models.Asset
if err := s.db.Where("id = ?", assetID).First(&asset).Error; err != nil {
return fmt.Errorf("asset not found")
}
if asset.Type != models.AssetTypeVideo {
return fmt.Errorf("asset is not a video")
}
if localStorage == nil {
return fmt.Errorf("local storage not available")
}
// 下载视频到本地
localPath, err := localStorage.DownloadFromURL(asset.URL, "videos")
if err != nil {
return fmt.Errorf("failed to download video: %w", err)
}
// 探测时长
return s.UpdateAssetDurationFromFile(assetID, localPath)
}

View File

@@ -6,19 +6,22 @@ import (
"strings"
models "github.com/drama-generator/backend/domain/models"
"github.com/drama-generator/backend/infrastructure/external/ffmpeg"
"github.com/drama-generator/backend/pkg/logger"
"gorm.io/gorm"
)
type AssetService struct {
db *gorm.DB
log *logger.Logger
db *gorm.DB
log *logger.Logger
ffmpeg *ffmpeg.FFmpeg
}
func NewAssetService(db *gorm.DB, log *logger.Logger) *AssetService {
return &AssetService{
db: db,
log: log,
db: db,
log: log,
ffmpeg: ffmpeg.NewFFmpeg(log),
}
}

View File

@@ -0,0 +1,97 @@
package services
import (
"fmt"
"path/filepath"
"time"
"github.com/drama-generator/backend/infrastructure/external/ffmpeg"
"github.com/drama-generator/backend/pkg/logger"
)
type AudioExtractionService struct {
ffmpeg *ffmpeg.FFmpeg
log *logger.Logger
}
func NewAudioExtractionService(log *logger.Logger) *AudioExtractionService {
return &AudioExtractionService{
ffmpeg: ffmpeg.NewFFmpeg(log),
log: log,
}
}
type ExtractAudioRequest struct {
VideoURL string `json:"video_url" binding:"required"`
}
type ExtractAudioResponse struct {
AudioURL string `json:"audio_url"`
Duration float64 `json:"duration"`
}
// ExtractAudio 从视频URL提取音频并返回音频文件URL
func (s *AudioExtractionService) ExtractAudio(videoURL string, dataDir string) (*ExtractAudioResponse, error) {
s.log.Infow("Starting audio extraction", "video_url", videoURL)
// 生成输出文件名
timestamp := time.Now().Unix()
audioFileName := fmt.Sprintf("audio_%d.aac", timestamp)
audioOutputPath := filepath.Join(dataDir, "audios", audioFileName)
// 提取音频
extractedPath, err := s.ffmpeg.ExtractAudio(videoURL, audioOutputPath)
if err != nil {
s.log.Errorw("Failed to extract audio", "error", err, "video_url", videoURL)
return nil, fmt.Errorf("failed to extract audio: %w", err)
}
// 获取音频时长(使用提取后的本地文件路径)
duration, err := s.ffmpeg.GetVideoDuration(extractedPath)
if err != nil {
s.log.Errorw("Failed to get audio duration", "error", err, "path", extractedPath)
return nil, fmt.Errorf("failed to get audio duration: %w", err)
}
if duration <= 0 {
s.log.Errorw("Invalid audio duration", "duration", duration, "path", extractedPath)
return nil, fmt.Errorf("invalid audio duration: %.2f", duration)
}
// 构建音频URL相对于data目录
audioURL := fmt.Sprintf("/data/audios/%s", audioFileName)
s.log.Infow("Audio extraction completed",
"video_url", videoURL,
"audio_url", audioURL,
"duration", duration,
"local_path", extractedPath)
return &ExtractAudioResponse{
AudioURL: audioURL,
Duration: duration,
}, nil
}
// BatchExtractAudio 批量提取音频
func (s *AudioExtractionService) BatchExtractAudio(videoURLs []string, dataDir string) ([]*ExtractAudioResponse, error) {
s.log.Infow("Starting batch audio extraction", "count", len(videoURLs))
results := make([]*ExtractAudioResponse, 0, len(videoURLs))
for i, videoURL := range videoURLs {
s.log.Infow("Extracting audio", "index", i+1, "total", len(videoURLs), "video_url", videoURL)
result, err := s.ExtractAudio(videoURL, dataDir)
if err != nil {
s.log.Errorw("Failed to extract audio in batch", "index", i, "video_url", videoURL, "error", err)
// 继续处理其他视频,但记录错误
return nil, fmt.Errorf("failed to extract audio at index %d: %w", i, err)
}
results = append(results, result)
}
s.log.Infow("Batch audio extraction completed", "successful_count", len(results))
return results, nil
}

View File

@@ -5,23 +5,28 @@ import (
"strings"
"github.com/drama-generator/backend/domain/models"
"github.com/drama-generator/backend/pkg/config"
"github.com/drama-generator/backend/pkg/logger"
"gorm.io/gorm"
)
// FramePromptService 处理帧提示词生成
type FramePromptService struct {
db *gorm.DB
aiService *AIService
log *logger.Logger
db *gorm.DB
aiService *AIService
log *logger.Logger
config *config.Config
promptI18n *PromptI18n
}
// NewFramePromptService 创建帧提示词服务
func NewFramePromptService(db *gorm.DB, log *logger.Logger) *FramePromptService {
func NewFramePromptService(db *gorm.DB, cfg *config.Config, log *logger.Logger) *FramePromptService {
return &FramePromptService{
db: db,
aiService: NewAIService(db, log),
log: log,
db: db,
aiService: NewAIService(db, log),
log: log,
config: cfg,
promptI18n: NewPromptI18n(cfg),
}
}
@@ -64,7 +69,7 @@ type MultiFramePrompt struct {
}
// GenerateFramePrompt 生成指定类型的帧提示词并保存到frame_prompts表
func (s *FramePromptService) GenerateFramePrompt(req GenerateFramePromptRequest) (*FramePromptResponse, error) {
func (s *FramePromptService) GenerateFramePrompt(req GenerateFramePromptRequest, model string) (*FramePromptResponse, error) {
// 查询分镜信息
var storyboard models.Storyboard
if err := s.db.Preload("Characters").First(&storyboard, req.StoryboardID).Error; err != nil {
@@ -88,21 +93,21 @@ func (s *FramePromptService) GenerateFramePrompt(req GenerateFramePromptRequest)
// 生成提示词
switch req.FrameType {
case FrameTypeFirst:
response.SingleFrame = s.generateFirstFrame(storyboard, scene)
response.SingleFrame = s.generateFirstFrame(storyboard, scene, model)
// 保存单帧提示词
s.saveFramePrompt(req.StoryboardID, string(req.FrameType), response.SingleFrame.Prompt, response.SingleFrame.Description, "")
case FrameTypeKey:
response.SingleFrame = s.generateKeyFrame(storyboard, scene)
response.SingleFrame = s.generateKeyFrame(storyboard, scene, model)
s.saveFramePrompt(req.StoryboardID, string(req.FrameType), response.SingleFrame.Prompt, response.SingleFrame.Description, "")
case FrameTypeLast:
response.SingleFrame = s.generateLastFrame(storyboard, scene)
response.SingleFrame = s.generateLastFrame(storyboard, scene, model)
s.saveFramePrompt(req.StoryboardID, string(req.FrameType), response.SingleFrame.Prompt, response.SingleFrame.Description, "")
case FrameTypePanel:
count := req.PanelCount
if count == 0 {
count = 3
}
response.MultiFrame = s.generatePanelFrames(storyboard, scene, count)
response.MultiFrame = s.generatePanelFrames(storyboard, scene, count, model)
// 保存多帧提示词(合并为一条记录)
var prompts []string
for _, frame := range response.MultiFrame.Frames {
@@ -111,7 +116,7 @@ func (s *FramePromptService) GenerateFramePrompt(req GenerateFramePromptRequest)
combinedPrompt := strings.Join(prompts, "\n---\n")
s.saveFramePrompt(req.StoryboardID, string(req.FrameType), combinedPrompt, "分镜板组合提示词", response.MultiFrame.Layout)
case FrameTypeAction:
response.MultiFrame = s.generateActionSequence(storyboard, scene)
response.MultiFrame = s.generateActionSequence(storyboard, scene, model)
var prompts []string
for _, frame := range response.MultiFrame.Frames {
prompts = append(prompts, frame.Prompt)
@@ -157,33 +162,28 @@ func mustParseUint(s string) uint64 {
}
// generateFirstFrame 生成首帧提示词
func (s *FramePromptService) generateFirstFrame(sb models.Storyboard, scene *models.Scene) *SingleFramePrompt {
func (s *FramePromptService) generateFirstFrame(sb models.Storyboard, scene *models.Scene, model string) *SingleFramePrompt {
// 构建上下文信息
contextInfo := s.buildStoryboardContext(sb, scene)
// 构建AI提示词
systemPrompt := `你是一个专业的图像生成提示词专家。请根据提供的镜头信息生成适合用于AI图像生成的提示词。
// 使用国际化提示词
systemPrompt := s.promptI18n.GetFirstFramePrompt()
userPrompt := s.promptI18n.FormatUserPrompt("frame_info", contextInfo)
重要:这是镜头的首帧 - 一个完全静态的画面,展示动作发生之前的初始状态。
要求:
1. 直接输出提示词,不要任何解释说明
2. 可以使用中文或英文,用逗号分隔关键词
3. 只描述静态视觉元素:场景环境、角色姿态、表情、氛围、光线
4. 不要包含任何动作动词(如:猛然、弹起、坐直、抓住等)
5. 描述角色处于动作发生前的状态(如:躺在床上、站立、坐着等静态姿态)
6. 适合动画风格anime style
示例格式:
Anime style, 城市公寓卧室, 凌晨, 昏暗房间, 床上, 年轻男子躺着, 表情平静, 闭眼睡眠, 柔和光线, 静谧氛围, 中景, 平视`
userPrompt := fmt.Sprintf(`镜头信息:
%s
请直接生成首帧的图像提示词,不要任何解释:`, contextInfo)
// 调用AI生成
prompt, err := s.aiService.GenerateText(userPrompt, systemPrompt)
// 调用AI生成如果指定了模型则使用指定的模型
var prompt string
var err error
if model != "" {
client, getErr := s.aiService.GetAIClientForModel("text", model)
if getErr != nil {
s.log.Warnw("Failed to get client for specified model, using default", "model", model, "error", getErr)
prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt)
} else {
prompt, err = client.GenerateText(userPrompt, systemPrompt)
}
} else {
prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt)
}
if err != nil {
s.log.Warnw("AI generation failed, using fallback", "error", err)
// 降级方案:使用简单拼接
@@ -204,33 +204,28 @@ Anime style, 城市公寓卧室, 凌晨, 昏暗房间, 床上, 年轻男子躺
}
// generateKeyFrame 生成关键帧提示词
func (s *FramePromptService) generateKeyFrame(sb models.Storyboard, scene *models.Scene) *SingleFramePrompt {
func (s *FramePromptService) generateKeyFrame(sb models.Storyboard, scene *models.Scene, model string) *SingleFramePrompt {
// 构建上下文信息
contextInfo := s.buildStoryboardContext(sb, scene)
// 构建AI提示词
systemPrompt := `你是一个专业的图像生成提示词专家。请根据提供的镜头信息生成适合用于AI图像生成的提示词。
// 使用国际化提示词
systemPrompt := s.promptI18n.GetKeyFramePrompt()
userPrompt := s.promptI18n.FormatUserPrompt("key_frame_info", contextInfo)
重要:这是镜头的关键帧 - 捕捉动作最激烈、最精彩的瞬间。
要求:
1. 直接输出提示词,不要任何解释说明
2. 可以使用中文或英文,用逗号分隔关键词
3. 重点描述动作的高潮瞬间:身体姿态、运动轨迹、力量感
4. 包含动态元素:动作模糊、速度线、冲击感
5. 强调表情和情绪的极致状态
6. 适合动画风格anime style
示例格式:
Anime style, 城市街道, 白天, 男子全力冲刺, 身体前倾, 动作模糊, 速度线, 汗水飞溅, 表情坚毅, 紧张氛围, 动态镜头, 中景`
userPrompt := fmt.Sprintf(`镜头信息:
%s
请直接生成关键帧的图像提示词,不要任何解释:`, contextInfo)
// 调用AI生成
prompt, err := s.aiService.GenerateText(userPrompt, systemPrompt)
// 调用AI生成如果指定了模型则使用指定的模型
var prompt string
var err error
if model != "" {
client, getErr := s.aiService.GetAIClientForModel("text", model)
if getErr != nil {
s.log.Warnw("Failed to get client for specified model, using default", "model", model, "error", getErr)
prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt)
} else {
prompt, err = client.GenerateText(userPrompt, systemPrompt)
}
} else {
prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt)
}
if err != nil {
s.log.Warnw("AI generation failed, using fallback", "error", err)
prompt = s.buildFallbackPrompt(sb, scene, "key frame, dynamic action")
@@ -250,33 +245,28 @@ Anime style, 城市街道, 白天, 男子全力冲刺, 身体前倾, 动作模
}
// generateLastFrame 生成尾帧提示词
func (s *FramePromptService) generateLastFrame(sb models.Storyboard, scene *models.Scene) *SingleFramePrompt {
func (s *FramePromptService) generateLastFrame(sb models.Storyboard, scene *models.Scene, model string) *SingleFramePrompt {
// 构建上下文信息
contextInfo := s.buildStoryboardContext(sb, scene)
// 构建AI提示词
systemPrompt := `你是一个专业的图像生成提示词专家。请根据提供的镜头信息生成适合用于AI图像生成的提示词。
// 使用国际化提示词
systemPrompt := s.promptI18n.GetLastFramePrompt()
userPrompt := s.promptI18n.FormatUserPrompt("last_frame_info", contextInfo)
重要:这是镜头的尾帧 - 一个静态画面,展示动作结束后的最终状态和结果。
要求:
1. 直接输出提示词,不要任何解释说明
2. 可以使用中文或英文,用逗号分隔关键词
3. 只描述静态的最终状态:角色姿态、表情、环境变化
4. 不要包含动作过程,只展示动作的结果和余韵
5. 强调情绪的余波和氛围的沉淀
6. 适合动画风格anime style
示例格式:
Anime style, 房间内, 黄昏, 男子坐在椅子上, 身体放松, 表情疲惫, 长出一口气, 汗水滴落, 平静氛围, 静态镜头, 中景`
userPrompt := fmt.Sprintf(`镜头信息:
%s
请直接生成尾帧的图像提示词,不要任何解释:`, contextInfo)
// 调用AI生成
prompt, err := s.aiService.GenerateText(userPrompt, systemPrompt)
// 调用AI生成如果指定了模型则使用指定的模型
var prompt string
var err error
if model != "" {
client, getErr := s.aiService.GetAIClientForModel("text", model)
if getErr != nil {
s.log.Warnw("Failed to get client for specified model, using default", "model", model, "error", getErr)
prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt)
} else {
prompt, err = client.GenerateText(userPrompt, systemPrompt)
}
} else {
prompt, err = s.aiService.GenerateText(userPrompt, systemPrompt)
}
if err != nil {
s.log.Warnw("AI generation failed, using fallback", "error", err)
prompt = s.buildFallbackPrompt(sb, scene, "last frame, final state")
@@ -296,27 +286,27 @@ Anime style, 房间内, 黄昏, 男子坐在椅子上, 身体放松, 表情疲
}
// generatePanelFrames 生成分镜板(多格组合)
func (s *FramePromptService) generatePanelFrames(sb models.Storyboard, scene *models.Scene, count int) *MultiFramePrompt {
func (s *FramePromptService) generatePanelFrames(sb models.Storyboard, scene *models.Scene, count int, model string) *MultiFramePrompt {
layout := fmt.Sprintf("horizontal_%d", count)
frames := make([]SingleFramePrompt, count)
// 固定生成:首帧 -> 关键帧 -> 尾帧
if count == 3 {
frames[0] = *s.generateFirstFrame(sb, scene)
frames[0] = *s.generateFirstFrame(sb, scene, model)
frames[0].Description = "第1格初始状态"
frames[1] = *s.generateKeyFrame(sb, scene)
frames[1] = *s.generateKeyFrame(sb, scene, model)
frames[1].Description = "第2格动作高潮"
frames[2] = *s.generateLastFrame(sb, scene)
frames[2] = *s.generateLastFrame(sb, scene, model)
frames[2].Description = "第3格最终状态"
} else if count == 4 {
// 4格首帧 -> 中间帧1 -> 中间帧2 -> 尾帧
frames[0] = *s.generateFirstFrame(sb, scene)
frames[1] = *s.generateKeyFrame(sb, scene)
frames[2] = *s.generateKeyFrame(sb, scene)
frames[3] = *s.generateLastFrame(sb, scene)
frames[0] = *s.generateFirstFrame(sb, scene, model)
frames[1] = *s.generateKeyFrame(sb, scene, model)
frames[2] = *s.generateKeyFrame(sb, scene, model)
frames[3] = *s.generateLastFrame(sb, scene, model)
}
return &MultiFramePrompt{
@@ -326,16 +316,16 @@ func (s *FramePromptService) generatePanelFrames(sb models.Storyboard, scene *mo
}
// generateActionSequence 生成动作序列5-8格
func (s *FramePromptService) generateActionSequence(sb models.Storyboard, scene *models.Scene) *MultiFramePrompt {
func (s *FramePromptService) generateActionSequence(sb models.Storyboard, scene *models.Scene, model string) *MultiFramePrompt {
// 将动作分解为5个步骤
frames := make([]SingleFramePrompt, 5)
// 简化实现:均匀分布从首帧到尾帧
frames[0] = *s.generateFirstFrame(sb, scene)
frames[1] = *s.generateKeyFrame(sb, scene)
frames[2] = *s.generateKeyFrame(sb, scene)
frames[3] = *s.generateKeyFrame(sb, scene)
frames[4] = *s.generateLastFrame(sb, scene)
frames[0] = *s.generateFirstFrame(sb, scene, model)
frames[1] = *s.generateKeyFrame(sb, scene, model)
frames[2] = *s.generateKeyFrame(sb, scene, model)
frames[3] = *s.generateKeyFrame(sb, scene, model)
frames[4] = *s.generateLastFrame(sb, scene, model)
return &MultiFramePrompt{
Layout: "horizontal_5",
@@ -349,14 +339,14 @@ func (s *FramePromptService) buildStoryboardContext(sb models.Storyboard, scene
// 镜头描述(最重要)
if sb.Description != nil && *sb.Description != "" {
parts = append(parts, fmt.Sprintf("镜头描述: %s", *sb.Description))
parts = append(parts, s.promptI18n.FormatUserPrompt("shot_description_label", *sb.Description))
}
// 场景信息
if scene != nil {
parts = append(parts, fmt.Sprintf("场景: %s, %s", scene.Location, scene.Time))
parts = append(parts, s.promptI18n.FormatUserPrompt("scene_label", scene.Location, scene.Time))
} else if sb.Location != nil && sb.Time != nil {
parts = append(parts, fmt.Sprintf("场景: %s, %s", *sb.Location, *sb.Time))
parts = append(parts, s.promptI18n.FormatUserPrompt("scene_label", *sb.Location, *sb.Time))
}
// 角色
@@ -365,38 +355,38 @@ func (s *FramePromptService) buildStoryboardContext(sb models.Storyboard, scene
for _, char := range sb.Characters {
charNames = append(charNames, char.Name)
}
parts = append(parts, fmt.Sprintf("角色: %s", strings.Join(charNames, ", ")))
parts = append(parts, s.promptI18n.FormatUserPrompt("characters_label", strings.Join(charNames, ", ")))
}
// 动作
if sb.Action != nil && *sb.Action != "" {
parts = append(parts, fmt.Sprintf("动作: %s", *sb.Action))
parts = append(parts, s.promptI18n.FormatUserPrompt("action_label", *sb.Action))
}
// 结果
if sb.Result != nil && *sb.Result != "" {
parts = append(parts, fmt.Sprintf("结果: %s", *sb.Result))
parts = append(parts, s.promptI18n.FormatUserPrompt("result_label", *sb.Result))
}
// 对白
if sb.Dialogue != nil && *sb.Dialogue != "" {
parts = append(parts, fmt.Sprintf("对白: %s", *sb.Dialogue))
parts = append(parts, s.promptI18n.FormatUserPrompt("dialogue_label", *sb.Dialogue))
}
// 氛围
if sb.Atmosphere != nil && *sb.Atmosphere != "" {
parts = append(parts, fmt.Sprintf("氛围: %s", *sb.Atmosphere))
parts = append(parts, s.promptI18n.FormatUserPrompt("atmosphere_label", *sb.Atmosphere))
}
// 镜头参数
if sb.ShotType != nil {
parts = append(parts, fmt.Sprintf("景别: %s", *sb.ShotType))
parts = append(parts, s.promptI18n.FormatUserPrompt("shot_type_label", *sb.ShotType))
}
if sb.Angle != nil {
parts = append(parts, fmt.Sprintf("角度: %s", *sb.Angle))
parts = append(parts, s.promptI18n.FormatUserPrompt("angle_label", *sb.Angle))
}
if sb.Movement != nil {
parts = append(parts, fmt.Sprintf("运镜: %s", *sb.Movement))
parts = append(parts, s.promptI18n.FormatUserPrompt("movement_label", *sb.Movement))
}
return strings.Join(parts, "\n")

View File

@@ -10,6 +10,7 @@ import (
models "github.com/drama-generator/backend/domain/models"
"github.com/drama-generator/backend/infrastructure/storage"
"github.com/drama-generator/backend/pkg/ai"
"github.com/drama-generator/backend/pkg/config"
"github.com/drama-generator/backend/pkg/image"
"github.com/drama-generator/backend/pkg/logger"
"github.com/drama-generator/backend/pkg/utils"
@@ -22,6 +23,8 @@ type ImageGenerationService struct {
transferService *ResourceTransferService
localStorage *storage.LocalStorage
log *logger.Logger
config *config.Config
promptI18n *PromptI18n
}
// truncateImageURL 截断图片 URL避免 base64 格式的 URL 占满日志
@@ -42,12 +45,14 @@ func truncateImageURL(url string) string {
return url
}
func NewImageGenerationService(db *gorm.DB, transferService *ResourceTransferService, localStorage *storage.LocalStorage, log *logger.Logger) *ImageGenerationService {
func NewImageGenerationService(db *gorm.DB, cfg *config.Config, transferService *ResourceTransferService, localStorage *storage.LocalStorage, log *logger.Logger) *ImageGenerationService {
return &ImageGenerationService{
db: db,
aiService: NewAIService(db, log),
transferService: transferService,
localStorage: localStorage,
config: cfg,
promptI18n: NewPromptI18n(cfg),
log: log,
}
}
@@ -643,21 +648,22 @@ func (s *ImageGenerationService) GetScencesForEpisode(episodeID string) ([]*mode
}
// ExtractBackgroundsForEpisode 从剧本内容中提取场景并保存到项目级别数据库
func (s *ImageGenerationService) ExtractBackgroundsForEpisode(episodeID string) ([]*models.Scene, error) {
func (s *ImageGenerationService) ExtractBackgroundsForEpisode(episodeID string, model string) ([]*models.Scene, error) {
var episode models.Episode
if err := s.db.Preload("Drama").Where("id = ?", episodeID).First(&episode).Error; err != nil {
if err := s.db.Preload("Storyboards").First(&episode, episodeID).Error; err != nil {
return nil, fmt.Errorf("episode not found")
}
// 检查是否有剧本内容
// 如果没有剧本内容,无法提取场景
if episode.ScriptContent == nil || *episode.ScriptContent == "" {
return nil, fmt.Errorf("剧本内容为空,无法提取场景")
return nil, fmt.Errorf("episode has no script content")
}
s.log.Infow("Extracting backgrounds from script", "episode_id", episodeID, "model", model)
dramaID := episode.DramaID
// 使用AI从剧本内容中提取场景
backgroundsInfo, err := s.extractBackgroundsFromScript(*episode.ScriptContent, dramaID)
backgroundsInfo, err := s.extractBackgroundsFromScript(*episode.ScriptContent, dramaID, model)
if err != nil {
s.log.Errorw("Failed to extract backgrounds from script", "error", err)
return nil, err
@@ -713,37 +719,74 @@ func (s *ImageGenerationService) ExtractBackgroundsForEpisode(episodeID string)
}
// extractBackgroundsFromScript 从剧本内容中使用AI提取场景信息
func (s *ImageGenerationService) extractBackgroundsFromScript(scriptContent string, dramaID uint) ([]BackgroundInfo, error) {
func (s *ImageGenerationService) extractBackgroundsFromScript(scriptContent string, dramaID uint, model string) ([]BackgroundInfo, error) {
if scriptContent == "" {
return []BackgroundInfo{}, nil
}
// 获取AI客户端
client, err := s.aiService.GetAIClient("text")
// 获取AI客户端(如果指定了模型则使用指定的模型)
var client ai.AIClient
var err error
if model != "" {
s.log.Infow("Using specified model for background extraction", "model", model)
client, err = s.aiService.GetAIClientForModel("text", model)
if err != nil {
s.log.Warnw("Failed to get client for specified model, using default", "model", model, "error", err)
client, err = s.aiService.GetAIClient("text")
}
} else {
client, err = s.aiService.GetAIClient("text")
}
if err != nil {
return nil, fmt.Errorf("failed to get AI client: %w", err)
}
// 构建AI提示词
prompt := fmt.Sprintf(`【任务】分析以下剧本内容,提取出所有需要的场景背景信息。
// 使用国际化提示词
systemPrompt := s.promptI18n.GetSceneExtractionPrompt()
contentLabel := s.promptI18n.FormatUserPrompt("script_content_label")
【剧本内容】
%s
// 根据语言构建不同的格式说明
var formatInstructions string
if s.promptI18n.IsEnglish() {
formatInstructions = `[Output JSON Format]
{
"backgrounds": [
{
"location": "Location name (English)",
"time": "Time description (English)",
"atmosphere": "Atmosphere description (English)",
"prompt": "A cinematic anime-style pure background scene depicting [location description] at [time]. The scene shows [environment details, architecture, objects, lighting, no characters]. Style: rich details, high quality, atmospheric lighting. Mood: [environment mood description]."
}
]
}
【要求】
1. 识别剧本中所有不同的场景(地点+时间组合)
2. 为每个场景生成详细的**中文**图片生成提示词Prompt
3. **重要**:场景描述必须是**纯背景**,不能包含人物、角色、动作等元素
4. Prompt要求
- **必须使用中文**,不能包含英文字符
- 详细描述场景环境、建筑、物品、光线、氛围等
- **禁止描述人物、角色、动作、对话等**
- 适合AI图片生成模型使用
- 风格统一为:电影感、细节丰富、动漫风格、高质量
5. location、time、atmosphere和prompt字段都使用中文
6. 提取场景的氛围描述atmosphere
[Example]
Correct example (note: no characters):
{
"backgrounds": [
{
"location": "Repair Shop Interior",
"time": "Late Night",
"atmosphere": "Dim, lonely, industrial",
"prompt": "A cinematic anime-style pure background scene depicting a messy repair shop interior at late night. Under dim fluorescent lights, the workbench is scattered with various wrenches, screwdrivers and mechanical parts, oil-stained tool boards and faded posters hang on walls, oil stains on the floor, used tires piled in corners. Style: rich details, high quality, dim atmosphere. Mood: lonely, industrial."
},
{
"location": "City Street",
"time": "Dusk",
"atmosphere": "Warm, busy, lively",
"prompt": "A cinematic anime-style pure background scene depicting a bustling city street at dusk. Sunset afterglow shines on the asphalt road, neon lights of shops on both sides begin to light up, bicycle racks and bus stops on the street, high-rise buildings in the distance, sky showing orange-red gradient. Style: rich details, high quality, warm atmosphere. Mood: lively, busy."
}
]
}
【输出JSON格式】
[Wrong Examples (containing characters, forbidden)]:
❌ "Depicting protagonist standing on the street" - contains character
❌ "People hurrying by" - contains characters
❌ "Character moving in the room" - contains character
Please strictly follow the JSON format and ensure all fields use English.`
} else {
formatInstructions = `【输出JSON格式】
{
"backgrounds": [
{
@@ -779,29 +822,57 @@ func (s *ImageGenerationService) extractBackgroundsFromScript(scriptContent stri
❌ "人们匆匆而过" - 包含人物
❌ "角色在房间里活动" - 包含人物
请严格按照JSON格式输出确保所有字段都使用中文。`, scriptContent)
请严格按照JSON格式输出确保所有字段都使用中文。`
}
response, err := client.GenerateText(prompt, "", ai.WithTemperature(0.7), ai.WithMaxTokens(8000))
prompt := fmt.Sprintf(`%s
%s
%s
%s`, systemPrompt, contentLabel, scriptContent, formatInstructions)
// 打印完整提示词用于调试
s.log.Infow("=== AI Prompt for Background Extraction (extractBackgroundsFromScript) ===",
"language", s.promptI18n.GetLanguage(),
"prompt_length", len(prompt),
"full_prompt", prompt)
response, err := client.GenerateText(prompt, "", ai.WithTemperature(0.7))
if err != nil {
s.log.Errorw("Failed to extract backgrounds with AI", "error", err)
return nil, fmt.Errorf("AI提取场景失败: %w", err)
}
s.log.Infow("AI backgrounds extraction response", "length", len(response))
// 解析JSON响应
var result struct {
Backgrounds []BackgroundInfo `json:"backgrounds"`
}
if err := utils.SafeParseAIJSON(response, &result); err != nil {
s.log.Errorw("Failed to parse AI response", "error", err, "response", response[:minInt(500, len(response))])
return nil, fmt.Errorf("解析AI响应失败: %w", err)
// 打印AI返回的原始响应
s.log.Infow("=== AI Response for Background Extraction (extractBackgroundsFromScript) ===",
"response_length", len(response),
"raw_response", response)
// 解析AI返回的JSON
var backgrounds []BackgroundInfo
// 先尝试解析为数组格式
if err := utils.SafeParseAIJSON(response, &backgrounds); err == nil {
s.log.Infow("Parsed backgrounds as array format", "count", len(backgrounds))
} else {
// 尝试解析为对象格式
var result struct {
Backgrounds []BackgroundInfo `json:"backgrounds"`
}
if err := utils.SafeParseAIJSON(response, &result); err != nil {
s.log.Errorw("Failed to parse AI response in both formats", "error", err, "response", response[:min(len(response), 500)])
return nil, fmt.Errorf("解析AI响应失败: %w", err)
}
backgrounds = result.Backgrounds
s.log.Infow("Parsed backgrounds as object format", "count", len(backgrounds))
}
s.log.Infow("Extracted backgrounds from script",
"drama_id", dramaID,
"backgrounds_count", len(result.Backgrounds))
"backgrounds_count", len(backgrounds))
return result.Backgrounds, nil
return backgrounds, nil
}
// extractBackgroundsWithAI 使用AI智能分析场景并提取唯一背景
@@ -834,25 +905,50 @@ func (s *ImageGenerationService) extractBackgroundsWithAI(storyboards []models.S
storyboard.StoryboardNumber, location, time, action, description)
}
// 构建AI提示词
prompt := fmt.Sprintf(`【任务】分析以下分镜头场景,提取出所有需要生成的唯一背景,并返回每个背景对应的场景编号。
// 使用国际化提示词
systemPrompt := s.promptI18n.GetSceneExtractionPrompt()
storyboardLabel := s.promptI18n.FormatUserPrompt("storyboard_list_label")
【分镜头列表】
%s
// 根据语言构建不同的提示词
var formatInstructions string
if s.promptI18n.IsEnglish() {
formatInstructions = `[Output JSON Format]
{
"backgrounds": [
{
"location": "Location name (English)",
"time": "Time description (English)",
"prompt": "A cinematic anime-style background depicting [location description] at [time]. The scene shows [detail description]. Style: rich details, high quality, atmospheric lighting. Mood: [mood description].",
"scene_numbers": [1, 2, 3]
}
]
}
【要求】
1. 合并相同或相似的场景背景(地点和时间相同或相近)
2. 为每个唯一背景生成**中文**图片生成提示词Prompt
3. Prompt要求
- **必须使用中文**,不能包含英文字符
- 详细描述场景、时间、氛围、风格
- 适合AI图片生成模型使用
- 风格统一为:电影感、细节丰富、动漫风格、高质量
4. **重要**必须返回使用该背景的场景编号数组scene_numbers
5. location、time和prompt字段都使用中文
6. 每个场景都必须分配到某个背景,确保所有场景编号都被包含
[Example]
Correct example:
{
"backgrounds": [
{
"location": "Repair Shop",
"time": "Late Night",
"prompt": "A cinematic anime-style background depicting a messy repair shop interior at late night. Under dim lighting, the workbench is scattered with various tools and parts, with greasy posters hanging on the walls. Style: rich details, high quality, dim atmosphere. Mood: lonely, industrial.",
"scene_numbers": [1, 5, 6, 10, 15]
},
{
"location": "City Panorama",
"time": "Late Night with Acid Rain",
"prompt": "A cinematic anime-style background depicting a coastal city panorama in late night acid rain. Neon lights blur in the rain, skyscrapers shrouded in gray-green rain curtain, streets reflecting colorful lights. Style: rich details, high quality, cyberpunk atmosphere. Mood: oppressive, sci-fi, apocalyptic.",
"scene_numbers": [2, 7]
}
]
}
【输出JSON格式】
Please strictly follow the JSON format and ensure:
1. prompt field uses English
2. scene_numbers includes all scene numbers using this background
3. All scenes are assigned to a background`
} else {
formatInstructions = `【输出JSON格式】
{
"backgrounds": [
{
@@ -886,7 +982,21 @@ func (s *ImageGenerationService) extractBackgroundsWithAI(storyboards []models.S
请严格按照JSON格式输出确保
1. prompt字段使用中文
2. scene_numbers包含所有使用该背景的场景编号
3. 所有场景都被分配到某个背景`, scenesText)
3. 所有场景都被分配到某个背景`
}
prompt := fmt.Sprintf(`%s
%s
%s
%s`, systemPrompt, storyboardLabel, scenesText, formatInstructions)
// 打印完整提示词用于调试
s.log.Infow("=== AI Prompt for Background Extraction (extractBackgroundsWithAI) ===",
"language", s.promptI18n.GetLanguage(),
"prompt_length", len(prompt),
"full_prompt", prompt)
// 调用AI服务
text, err := s.aiService.GenerateText(prompt, "")
@@ -894,6 +1004,11 @@ func (s *ImageGenerationService) extractBackgroundsWithAI(storyboards []models.S
return nil, fmt.Errorf("AI analysis failed: %w", err)
}
// 打印AI返回的原始响应
s.log.Infow("=== AI Response for Background Extraction ===",
"response_length", len(text),
"raw_response", text)
// 解析AI返回的JSON
var result struct {
Scenes []struct {

View File

@@ -0,0 +1,516 @@
package services
import (
"fmt"
"github.com/drama-generator/backend/pkg/config"
)
// PromptI18n 提示词国际化工具
type PromptI18n struct {
config *config.Config
}
// NewPromptI18n 创建提示词国际化工具
func NewPromptI18n(cfg *config.Config) *PromptI18n {
return &PromptI18n{config: cfg}
}
// GetLanguage 获取当前语言设置
func (p *PromptI18n) GetLanguage() string {
lang := p.config.App.Language
if lang == "" {
return "zh" // 默认中文
}
return lang
}
// IsEnglish 判断是否为英文模式(动态读取配置)
func (p *PromptI18n) IsEnglish() bool {
return p.GetLanguage() == "en"
}
// GetStoryboardSystemPrompt 获取分镜生成系统提示词
func (p *PromptI18n) GetStoryboardSystemPrompt() string {
if p.IsEnglish() {
return `[Role] You are a senior film storyboard artist, proficient in Robert McKee's shot breakdown theory, skilled at building emotional rhythm.
[Task] Break down the novel script into storyboard shots based on **independent action units**.
[Shot Breakdown Principles]
1. **Action Unit Division**: Each shot must correspond to a complete and independent action
- One action = one shot (character stands up, walks over, speaks a line, reacts with an expression, etc.)
- Do NOT merge multiple actions (standing up + walking over should be split into 2 shots)
2. **Shot Type Standards** (choose based on storytelling needs):
- Extreme Long Shot (ELS): Environment, atmosphere building
- Long Shot (LS): Full body action, spatial relationships
- Medium Shot (MS): Interactive dialogue, emotional communication
- Close-Up (CU): Detail display, emotional expression
- Extreme Close-Up (ECU): Key props, intense emotions
3. **Camera Movement Requirements**:
- Fixed Shot: Stable focus on one subject
- Push In: Approaching subject, increasing tension
- Pull Out: Expanding field of view, revealing context
- Pan: Horizontal camera movement, spatial transitions
- Follow: Following subject movement
- Tracking: Linear movement with subject
4. **Emotion & Intensity Markers**:
- Emotion: Brief description (excited, sad, nervous, happy, etc.)
- Intensity: Emotion level using arrows
* Extremely strong ↑↑↑ (3): Emotional peak, high tension
* Strong ↑↑ (2): Significant emotional fluctuation
* Moderate ↑ (1): Noticeable emotional change
* Stable → (0): Emotion remains unchanged
* Weak ↓ (-1): Emotion subsiding
[Output Requirements]
1. Generate an array, each element is a shot containing:
- shot_number: Shot number
- scene_description: Scene (location + time, e.g., "bedroom interior, morning")
- shot_type: Shot type (extreme long shot/long shot/medium shot/close-up/extreme close-up)
- camera_angle: Camera angle (eye-level/low-angle/high-angle/side/back)
- camera_movement: Camera movement (fixed/push/pull/pan/follow/tracking)
- action: Action description
- result: Visual result of the action
- dialogue: Character dialogue or narration (if any)
- emotion: Current emotion
- emotion_intensity: Emotion intensity level (3/2/1/0/-1)
**CRITICAL: Return ONLY a valid JSON array. Do NOT include any markdown code blocks, explanations, or other text. Start directly with [ and end with ].**
[Important Notes]
- Shot count must match number of independent actions in the script (not allowed to merge or reduce)
- Each shot must have clear action and result
- Shot types must match storytelling rhythm (don't use same shot type continuously)
- Emotion intensity must accurately reflect script atmosphere changes`
}
return `【角色】你是一位资深影视分镜师,精通罗伯特·麦基的镜头拆解理论,擅长构建情绪节奏。
【任务】将小说剧本按**独立动作单元**拆解为分镜头方案。
【分镜拆解原则】
1. **动作单元划分**:每个镜头必须对应一个完整且独立的动作
- 一个动作 = 一个镜头(角色站起来、走过去、说一句话、做一个反应表情等)
- 禁止合并多个动作(站起+走过去应拆分为2个镜头
2. **景别标准**(根据叙事需要选择):
- 大远景:环境、氛围营造
- 远景:全身动作、空间关系
- 中景:交互对话、情感交流
- 近景:细节展示、情绪表达
- 特写:关键道具、强烈情绪
3. **运镜要求**
- 固定镜头:稳定聚焦于一个主体
- 推镜:接近主体,增强紧张感
- 拉镜:扩大视野,交代环境
- 摇镜:水平移动摄像机,空间转换
- 跟镜:跟随主体移动
- 移镜:摄像机与主体同向移动
4. **情绪与强度标记**
- emotion简短描述兴奋、悲伤、紧张、愉快等
- emotion_intensity用箭头表示情绪等级
* 极强 ↑↑↑ (3):情绪高峰、高度紧张
* 强 ↑↑ (2):情绪明显波动
* 中 ↑ (1):情绪有所变化
* 平稳 → (0):情绪不变
* 弱 ↓ (-1):情绪回落
【输出要求】
1. 生成一个数组,每个元素是一个镜头,包含:
- shot_number镜头号
- scene_description场景地点+时间,如"卧室内,早晨"
- shot_type景别大远景/远景/中景/近景/特写)
- camera_angle机位角度平视/仰视/俯视/侧面/背面)
- camera_movement运镜方式固定/推镜/拉镜/摇镜/跟镜/移镜)
- action动作描述
- result动作完成后的画面结果
- dialogue角色对话或旁白如有
- emotion当前情绪
- emotion_intensity情绪强度等级3/2/1/0/-1
**重要必须只返回纯JSON数组不要包含任何markdown代码块、说明文字或其他内容。直接以 [ 开头,以 ] 结尾。**
【重要提示】
- 镜头数量必须与剧本中的独立动作数量匹配(不允许合并或减少)
- 每个镜头必须有明确的动作和结果
- 景别选择必须符合叙事节奏(不要连续使用同一景别)
- 情绪强度必须准确反映剧本氛围变化`
}
// GetSceneExtractionPrompt 获取场景提取提示词
func (p *PromptI18n) GetSceneExtractionPrompt() string {
if p.IsEnglish() {
return `[Task] Extract all unique scene backgrounds from the script
[Requirements]
1. Identify all different scenes (location + time combinations) in the script
2. Generate detailed **English** image generation prompts for each scene
3. **Important**: Scene descriptions must be **pure backgrounds** without any characters, people, or actions
4. Prompt requirements:
- Must use **English**, no Chinese characters
- Detailed description of scene, time, atmosphere, style
- Must explicitly specify "no people, no characters, empty scene"
- Must match the drama's genre and tone
[Output Format]
**CRITICAL: Return ONLY a valid JSON array. Do NOT include any markdown code blocks, explanations, or other text. Start directly with [ and end with ].**
Each element containing:
- location: Location (e.g., "luxurious office")
- time: Time period (e.g., "afternoon")
- prompt: Complete English image generation prompt (pure background, explicitly stating no people)`
}
return `【任务】从剧本中提取所有唯一的场景背景
【要求】
1. 识别剧本中所有不同的场景(地点+时间组合)
2. 为每个场景生成详细的**中文**图片生成提示词Prompt
3. **重要**:场景描述必须是**纯背景**,不能包含人物、角色、动作等元素
4. Prompt要求
- **必须使用中文**,不能包含英文字符
- 详细描述场景、时间、氛围、风格
- 必须明确说明"无人物、无角色、空场景"
- 要符合剧本的题材和氛围
【输出格式】
**重要必须只返回纯JSON数组不要包含任何markdown代码块、说明文字或其他内容。直接以 [ 开头,以 ] 结尾。**
每个元素包含:
- location地点如"豪华办公室"
- time时间如"下午"
- prompt完整的中文图片生成提示词纯背景明确说明无人物`
}
// GetFirstFramePrompt 获取首帧提示词
func (p *PromptI18n) GetFirstFramePrompt() string {
if p.IsEnglish() {
return `You are a professional image generation prompt expert. Please generate prompts suitable for AI image generation based on the provided shot information.
Important: This is the first frame of the shot - a completely static image showing the initial state before the action begins.
Key Points:
1. Focus on the initial static state - the moment before the action
2. Must NOT include any action or movement
3. Describe the character's initial posture, position, and expression
4. Can include scene atmosphere and environmental details
5. Shot type determines composition and framing
Output Format:
Return a JSON object containing:
- prompt: Complete English image generation prompt (detailed description, suitable for AI image generation)
- description: Simplified Chinese description (for reference)`
}
return `你是一个专业的图像生成提示词专家。请根据提供的镜头信息生成适合用于AI图像生成的提示词。
重要:这是镜头的首帧 - 一个完全静态的画面,展示动作发生之前的初始状态。
关键要点:
1. 聚焦初始静态状态 - 动作发生之前的那一瞬间
2. 必须不包含任何动作或运动
3. 描述角色的初始姿态、位置和表情
4. 可以包含场景氛围和环境细节
5. 景别决定构图和取景范围
输出格式:
返回一个JSON对象包含
- prompt完整的中文图片生成提示词详细描述适合AI图像生成
- description简化的中文描述供参考`
}
// GetKeyFramePrompt 获取关键帧提示词
func (p *PromptI18n) GetKeyFramePrompt() string {
if p.IsEnglish() {
return `You are a professional image generation prompt expert. Please generate prompts suitable for AI image generation based on the provided shot information.
Important: This is the key frame of the shot - capturing the most intense and exciting moment of the action.
Key Points:
1. Focus on the most exciting moment of the action
2. Capture peak emotional expression
3. Emphasize dynamic tension
4. Show character actions and expressions at their climax
5. Can include motion blur or dynamic effects
Output Format:
Return a JSON object containing:
- prompt: Complete English image generation prompt (detailed description, suitable for AI image generation)
- description: Simplified Chinese description (for reference)`
}
return `你是一个专业的图像生成提示词专家。请根据提供的镜头信息生成适合用于AI图像生成的提示词。
重要:这是镜头的关键帧 - 捕捉动作最激烈、最精彩的瞬间。
关键要点:
1. 聚焦动作最精彩的时刻
2. 捕捉情绪表达的顶点
3. 强调动态张力
4. 展示角色动作和表情的高潮状态
5. 可以包含动作模糊或动态效果
输出格式:
返回一个JSON对象包含
- prompt完整的中文图片生成提示词详细描述适合AI图像生成
- description简化的中文描述供参考`
}
// GetLastFramePrompt 获取尾帧提示词
func (p *PromptI18n) GetLastFramePrompt() string {
if p.IsEnglish() {
return `You are a professional image generation prompt expert. Please generate prompts suitable for AI image generation based on the provided shot information.
Important: This is the last frame of the shot - a static image showing the final state and result after the action ends.
Key Points:
1. Focus on the final state after action completion
2. Show the result of the action
3. Describe character's final posture and expression after action
4. Emphasize emotional state after action
5. Capture the calm moment after action ends
Output Format:
Return a JSON object containing:
- prompt: Complete English image generation prompt (detailed description, suitable for AI image generation)
- description: Simplified Chinese description (for reference)`
}
return `你是一个专业的图像生成提示词专家。请根据提供的镜头信息生成适合用于AI图像生成的提示词。
重要:这是镜头的尾帧 - 一个静态画面,展示动作结束后的最终状态和结果。
关键要点:
1. 聚焦动作完成后的最终状态
2. 展示动作的结果
3. 描述角色在动作完成后的姿态和表情
4. 强调动作后的情绪状态
5. 捕捉动作结束后的平静瞬间
输出格式:
返回一个JSON对象包含
- prompt完整的中文图片生成提示词详细描述适合AI图像生成
- description简化的中文描述供参考`
}
// GetOutlineGenerationPrompt 获取大纲生成提示词
func (p *PromptI18n) GetOutlineGenerationPrompt() string {
if p.IsEnglish() {
return `You are a professional short drama screenwriter. Based on the theme and number of episodes, create a complete short drama outline and plan the plot direction for each episode.
Requirements:
1. Compact plot with strong conflicts and fast pace
2. Each episode should have independent conflicts while connecting the main storyline
3. Clear character arcs and growth
4. Cliffhanger endings to hook viewers
5. Clear theme and emotional core
Output Format:
Return a JSON object containing:
- title: Drama title (creative and attractive)
- episodes: Episode list, each containing:
- episode_number: Episode number
- title: Episode title
- summary: Episode content summary (50-100 words)
- conflict: Main conflict point
- cliffhanger: Cliffhanger ending (if any)`
}
return `你是专业短剧编剧。根据主题和剧集数量,创作完整的短剧大纲,规划好每一集的剧情走向。
要求:
1. 剧情紧凑,矛盾冲突强烈,节奏快
2. 每集都有独立的矛盾冲突,同时推进主线
3. 角色弧光清晰,成长变化明显
4. 悬念设置合理,吸引观众继续观看
5. 主题明确,情感内核清晰
输出格式:
返回一个JSON对象包含
- title: 剧名(富有创意和吸引力)
- episodes: 分集列表,每集包含:
- episode_number: 集数
- title: 本集标题
- summary: 本集内容概要50-100字
- conflict: 主要矛盾点
- cliffhanger: 悬念结尾(如有)`
}
// GetCharacterExtractionPrompt 获取角色提取提示词
func (p *PromptI18n) GetCharacterExtractionPrompt() string {
if p.IsEnglish() {
return `You are a professional character analyst, skilled at extracting and analyzing character information from scripts.
Your task is to extract and organize detailed character settings for all characters appearing in the script based on the provided script content.
Requirements:
1. Extract all characters with names (ignore unnamed passersby or background characters)
2. For each character, extract:
- name: Character name
- role: Character role (main/supporting/minor)
- appearance: Physical appearance description (150-300 words)
- personality: Personality traits (100-200 words)
- description: Background story and character relationships (100-200 words)
3. Appearance must be detailed enough for AI image generation, including: gender, age, body type, facial features, hairstyle, clothing style, etc.
4. Main characters require more detailed descriptions, supporting characters can be simplified
Output Format:
**CRITICAL: Return ONLY a valid JSON array. Do NOT include any markdown code blocks, explanations, or other text. Start directly with [ and end with ].**
Each element is a character object containing the above fields.`
}
return `你是一个专业的角色分析师,擅长从剧本中提取和分析角色信息。
你的任务是根据提供的剧本内容,提取并整理剧中出现的所有角色的详细设定。
要求:
1. 提取所有有名字的角色(忽略无名路人或背景角色)
2. 对每个角色,提取以下信息:
- name: 角色名字
- role: 角色类型main/supporting/minor
- appearance: 外貌描述150-300字
- personality: 性格特点100-200字
- description: 背景故事和角色关系100-200字
3. 外貌描述要足够详细适合AI生成图片包括性别、年龄、体型、面部特征、发型、服装风格等
4. 主要角色需要更详细的描述,次要角色可以简化
输出格式:
**重要必须只返回纯JSON数组不要包含任何markdown代码块、说明文字或其他内容。直接以 [ 开头,以 ] 结尾。**
每个元素是一个角色对象,包含上述字段。`
}
// GetEpisodeScriptPrompt 获取分集剧本生成提示词
func (p *PromptI18n) GetEpisodeScriptPrompt() string {
if p.IsEnglish() {
return `You are a professional short drama screenwriter. You excel at creating detailed plot content based on episode plans.
Your task is to expand the summary in the outline into detailed plot narratives for each episode. Each episode is about 180 seconds (3 minutes) and requires substantial content.
Requirements:
1. Expand the outline summary into detailed plot development
2. Write character dialogue and actions, not just description
3. Highlight conflict progression and emotional changes
4. Add scene transitions and atmosphere descriptions
5. Control rhythm, with climax at 2/3 point, resolution at the end
6. Each episode 800-1200 words, dialogue-rich
7. Keep consistent with character settings
Output Format:
**CRITICAL: Return ONLY a valid JSON object. Do NOT include any markdown code blocks, explanations, or other text. Start directly with { and end with }.**
- episodes: Episode list, each containing:
- episode_number: Episode number
- title: Episode title
- script_content: Detailed script content (800-1200 words)`
}
return `你是一个专业的短剧编剧。你擅长根据分集规划创作详细的剧情内容。
你的任务是根据大纲中的分集规划将每一集的概要扩展为详细的剧情叙述。每集约180秒3分钟需要充实的内容。
要求:
1. 将大纲中的概要扩展为具体的剧情发展
2. 写出角色的对话和动作,不是简单描述
3. 突出冲突的递进和情感的变化
4. 增加场景转换和氛围描写
5. 控制节奏高潮在2/3处结尾有收束
6. 每集800-1200字对话丰富
7. 与角色设定保持一致
输出格式:
**重要必须只返回纯JSON对象不要包含任何markdown代码块、说明文字或其他内容。直接以 { 开头,以 } 结尾。**
- episodes: 分集列表,每集包含:
- episode_number: 集数
- title: 本集标题
- script_content: 详细剧本内容800-1200字`
}
// FormatUserPrompt 格式化用户提示词的通用文本
func (p *PromptI18n) FormatUserPrompt(key string, args ...interface{}) string {
templates := map[string]map[string]string{
"en": {
"outline_request": "Please create a short drama outline for the following theme:\n\nTheme: %s",
"genre_preference": "\nGenre preference: %s",
"style_requirement": "\nStyle requirement: %s",
"episode_count": "\nNumber of episodes: %d episodes",
"episode_importance": "\n\n**Important: Must plan complete storylines for all %d episodes in the episodes array, each with clear story content!**",
"character_request": "Script content:\n%s\n\nPlease extract and organize detailed character profiles for up to %d main characters from the script.",
"episode_script_request": "Drama outline:\n%s\n%s\nPlease create detailed scripts for %d episodes based on the above outline and characters.\n\n**Important requirements:**\n- Must generate all %d episodes, from episode 1 to episode %d, cannot skip any\n- Each episode is about 3-5 minutes (150-300 seconds)\n- The duration field for each episode should be set reasonably based on script content length, not all the same value\n- The episodes array in the returned JSON must contain %d elements",
"frame_info": "Shot information:\n%s\n\nPlease directly generate the image prompt for the first frame without any explanation:",
"key_frame_info": "Shot information:\n%s\n\nPlease directly generate the image prompt for the key frame without any explanation:",
"last_frame_info": "Shot information:\n%s\n\nPlease directly generate the image prompt for the last frame without any explanation:",
"script_content_label": "【Script Content】",
"storyboard_list_label": "【Storyboard List】",
"task_label": "【Task】",
"character_list_label": "【Available Character List】",
"scene_list_label": "【Extracted Scene Backgrounds】",
"task_instruction": "Break down the novel script into storyboard shots based on **independent action units**.",
"character_constraint": "**Important**: In the characters field, only use character IDs (numbers) from the above character list. Do not create new characters or use other IDs.",
"scene_constraint": "**Important**: In the scene_id field, select the most matching background ID (number) from the above background list. If no suitable background exists, use null.",
"shot_description_label": "Shot description: %s",
"scene_label": "Scene: %s, %s",
"characters_label": "Characters: %s",
"action_label": "Action: %s",
"result_label": "Result: %s",
"dialogue_label": "Dialogue: %s",
"atmosphere_label": "Atmosphere: %s",
"shot_type_label": "Shot type: %s",
"angle_label": "Angle: %s",
"movement_label": "Movement: %s",
"drama_info_template": "Title: %s\nSummary: %s\nGenre: %s",
},
"zh": {
"outline_request": "请为以下主题创作短剧大纲:\n\n主题%s",
"genre_preference": "\n类型偏好%s",
"style_requirement": "\n风格要求%s",
"episode_count": "\n剧集数量%d集",
"episode_importance": "\n\n**重要必须在episodes数组中规划完整的%d集剧情每集都要有明确的故事内容**",
"character_request": "剧本内容:\n%s\n\n请从剧本中提取并整理最多 %d 个主要角色的详细设定。",
"episode_script_request": "剧本大纲:\n%s\n%s\n请基于以上大纲和角色创作 %d 集的详细剧本。\n\n**重要要求:**\n- 必须生成完整的 %d 集从第1集到第%d集不能遗漏\n- 每集约3-5分钟150-300秒\n- 每集的duration字段要根据剧本内容长度合理设置不要都设置为同一个值\n- 返回的JSON中episodes数组必须包含 %d 个元素",
"frame_info": "镜头信息:\n%s\n\n请直接生成首帧的图像提示词不要任何解释",
"key_frame_info": "镜头信息:\n%s\n\n请直接生成关键帧的图像提示词不要任何解释",
"last_frame_info": "镜头信息:\n%s\n\n请直接生成尾帧的图像提示词不要任何解释",
"script_content_label": "【剧本内容】",
"storyboard_list_label": "【分镜头列表】",
"task_label": "【任务】",
"character_list_label": "【本剧可用角色列表】",
"scene_list_label": "【本剧已提取的场景背景列表】",
"task_instruction": "将小说剧本按**独立动作单元**拆解为分镜头方案。",
"character_constraint": "**重要**在characters字段中只能使用上述角色列表中的角色ID数字不得自创角色或使用其他ID。",
"scene_constraint": "**重要**在scene_id字段中必须从上述背景列表中选择最匹配的背景ID数字。如果没有合适的背景则填null。",
"shot_description_label": "镜头描述: %s",
"scene_label": "场景: %s, %s",
"characters_label": "角色: %s",
"action_label": "动作: %s",
"result_label": "结果: %s",
"dialogue_label": "对白: %s",
"atmosphere_label": "氛围: %s",
"shot_type_label": "景别: %s",
"angle_label": "角度: %s",
"movement_label": "运镜: %s",
"drama_info_template": "剧名:%s\n简介%s\n类型%s",
},
}
lang := "zh"
if p.IsEnglish() {
lang = "en"
}
template, ok := templates[lang][key]
if !ok {
return ""
}
if len(args) > 0 {
return fmt.Sprintf(template, args...)
}
return template
}

View File

@@ -1,177 +1,42 @@
package services
import (
"encoding/json"
"fmt"
"strconv"
"github.com/drama-generator/backend/domain/models"
"github.com/drama-generator/backend/pkg/ai"
"github.com/drama-generator/backend/pkg/config"
"github.com/drama-generator/backend/pkg/logger"
"github.com/drama-generator/backend/pkg/utils"
"gorm.io/gorm"
)
type ScriptGenerationService struct {
db *gorm.DB
aiService *AIService
log *logger.Logger
db *gorm.DB
aiService *AIService
log *logger.Logger
config *config.Config
promptI18n *PromptI18n
}
func NewScriptGenerationService(db *gorm.DB, log *logger.Logger) *ScriptGenerationService {
func NewScriptGenerationService(db *gorm.DB, cfg *config.Config, log *logger.Logger) *ScriptGenerationService {
return &ScriptGenerationService{
db: db,
aiService: NewAIService(db, log),
log: log,
db: db,
aiService: NewAIService(db, log),
log: log,
config: cfg,
promptI18n: NewPromptI18n(cfg),
}
}
type GenerateOutlineRequest struct {
DramaID string `json:"drama_id" binding:"required"`
Theme string `json:"theme" binding:"required,min=2,max=500"`
Genre string `json:"genre"`
Style string `json:"style"`
Length int `json:"length"`
Temperature float64 `json:"temperature"`
}
type GenerateCharactersRequest struct {
DramaID string `json:"drama_id" binding:"required"`
EpisodeID uint `json:"episode_id"`
Outline string `json:"outline"`
Count int `json:"count"`
Temperature float64 `json:"temperature"`
}
type GenerateEpisodesRequest struct {
DramaID string `json:"drama_id" binding:"required"`
Outline string `json:"outline"`
EpisodeCount int `json:"episode_count" binding:"required,min=1,max=100"`
Temperature float64 `json:"temperature"`
}
type OutlineResult struct {
Title string `json:"title"`
Summary string `json:"summary"`
Genre string `json:"genre"`
Tags []string `json:"tags"`
Characters []CharacterOutline `json:"characters"`
Episodes []EpisodeOutline `json:"episodes"`
KeyScenes []string `json:"key_scenes"`
}
type CharacterOutline struct {
Name string `json:"name"`
Role string `json:"role"`
Description string `json:"description"`
Personality string `json:"personality"`
Appearance string `json:"appearance"`
}
type EpisodeOutline struct {
EpisodeNumber int `json:"episode_number"`
Title string `json:"title"`
Summary string `json:"summary"`
Scenes []string `json:"scenes"`
Duration int `json:"duration"`
}
func (s *ScriptGenerationService) GenerateOutline(req *GenerateOutlineRequest) (*OutlineResult, error) {
var drama models.Drama
if err := s.db.Where("id = ?", req.DramaID).First(&drama).Error; err != nil {
return nil, fmt.Errorf("drama not found")
}
systemPrompt := `你是专业短剧编剧。根据主题和剧集数量,创作完整的短剧大纲,规划好每一集的剧情走向。
要求:
1. 剧情紧凑,矛盾冲突强烈,节奏快
2. 必须规划好每一集的核心剧情
3. 每集有明确冲突和转折点,集与集之间有连贯性和悬念
**重要必须输出完整有效的JSON确保所有字段完整特别是episodes数组必须完整闭合**
JSON格式紧凑summary和episodes字段必须完整
{"title":"剧名","summary":"200-250字剧情概述包含故事背景、主要矛盾、核心冲突、完整走向","genre":"类型","tags":["标签1","标签2","标签3"],"episodes":[{"episode_number":1,"title":"标题","summary":"80字剧情概要"},{"episode_number":2,"title":"标题","summary":"80字剧情概要"}],"key_scenes":["场景1","场景2","场景3"]}
关键要求:
- summary控制在200-250字简洁清晰
- episodes必须生成用户要求的完整集数
- 每集summary控制在80字左右
- 确保JSON完整闭合不要截断
- 不要添加任何JSON外的文字说明`
userPrompt := fmt.Sprintf(`请为以下主题创作短剧大纲:
主题:%s`, req.Theme)
if req.Genre != "" {
userPrompt += fmt.Sprintf("\n类型偏好%s", req.Genre)
}
if req.Style != "" {
userPrompt += fmt.Sprintf("\n风格要求%s", req.Style)
}
length := req.Length
if length == 0 {
length = 5
}
userPrompt += fmt.Sprintf("\n剧集数量%d集", length)
userPrompt += fmt.Sprintf("\n\n**重要必须在episodes数组中规划完整的%d集剧情每集都要有明确的故事内容**", length)
temperature := req.Temperature
if temperature == 0 {
temperature = 0.8
}
// 调整token限制基础2000 + 每集约150 tokens包含80-100字概要
maxTokens := 2000 + (length * 150)
if maxTokens > 8000 {
maxTokens = 8000
}
s.log.Infow("Generating outline with episodes",
"episode_count", length,
"max_tokens", maxTokens)
text, err := s.aiService.GenerateText(
userPrompt,
systemPrompt,
ai.WithTemperature(temperature),
ai.WithMaxTokens(maxTokens),
)
if err != nil {
s.log.Errorw("Failed to generate outline", "error", err)
return nil, fmt.Errorf("生成失败: %w", err)
}
s.log.Infow("AI response received", "length", len(text), "preview", text[:minInt(200, len(text))])
var result OutlineResult
if err := utils.SafeParseAIJSON(text, &result); err != nil {
s.log.Errorw("Failed to parse outline JSON", "error", err, "raw_response", text[:minInt(500, len(text))])
return nil, fmt.Errorf("解析 AI 返回结果失败: %w", err)
}
// 将Tags转换为JSON格式存储
tagsJSON, err := json.Marshal(result.Tags)
if err != nil {
s.log.Errorw("Failed to marshal tags", "error", err)
tagsJSON = []byte("[]")
}
if err := s.db.Model(&drama).Updates(map[string]interface{}{
"title": result.Title,
"description": result.Summary,
"genre": result.Genre,
"tags": tagsJSON,
}).Error; err != nil {
s.log.Errorw("Failed to update drama", "error", err)
}
s.log.Infow("Outline generated", "drama_id", req.DramaID)
return &result, nil
Model string `json:"model"` // 指定使用的文本模型
}
func (s *ScriptGenerationService) GenerateCharacters(req *GenerateCharactersRequest) ([]models.Character, error) {
@@ -185,61 +50,35 @@ func (s *ScriptGenerationService) GenerateCharacters(req *GenerateCharactersRequ
count = 5
}
systemPrompt := `你是一个专业的角色分析师,擅长从剧本中提取和分析角色信息。
你的任务是根据提供的剧本内容,提取并整理剧中出现的所有角色的详细设定。
要求:
1. 仔细阅读剧本,识别所有出现的角色
2. 根据剧本中的对话、行为和描述,总结角色的性格特点
3. 提取角色在剧本中的关键信息:背景、动机、目标、关系等
4. 角色之间的关系必须基于剧本中的实际描述
5. 外貌描述必须极其详细如果剧本中有描述则使用如果没有则根据角色设定合理推断便于AI绘画生成角色形象
6. 优先提取主要角色和重要配角,次要角色可以简略
请严格按照以下 JSON 格式输出,不要添加任何其他文字:
{
"characters": [
{
"name": "角色名",
"role": "主角/重要配角/配角",
"description": "角色背景和简介200-300字包括出身背景、成长经历、核心动机、与其他角色的关系、在故事中的作用",
"personality": "性格特点详细描述100-150字包括主要性格特征、行为习惯、价值观、优点缺点、情绪表达方式、对待他人的态度等",
"appearance": "外貌描述极其详细150-200字必须包括确切年龄、精确身高、体型身材、肤色质感、发型发色发长、眼睛颜色形状、面部特征如眉毛、鼻子、嘴唇、着装风格、服装颜色材质、配饰细节、标志性特征、整体气质风格等描述要具体到可以直接用于AI绘画",
"voice_style": "说话风格和语气特点详细描述50-80字包括语速语调、用词习惯、口头禅、说话时的情绪特征等"
}
]
}
注意:
- 必须基于剧本内容提取角色,不要凭空创作
- 优先提取主要角色和重要配角,数量根据剧本实际情况确定
- description、personality、appearance、voice_style都必须详细描述字数要充足
- appearance外貌描述是重中之重必须极其详细具体要能让AI准确生成角色形象
- 如果剧本中角色信息不完整,可以根据角色设定合理补充,但要符合剧本整体风格`
systemPrompt := s.promptI18n.GetCharacterExtractionPrompt()
outlineText := req.Outline
if outlineText == "" {
outlineText = fmt.Sprintf("剧名:%s\n简介%s\n类型%s", drama.Title, drama.Description, drama.Genre)
outlineText = s.promptI18n.FormatUserPrompt("drama_info_template", drama.Title, drama.Description, drama.Genre)
}
userPrompt := fmt.Sprintf(`剧本内容:
%s
请从剧本中提取并整理最多 %d 个主要角色的详细设定。`, outlineText, count)
userPrompt := s.promptI18n.FormatUserPrompt("character_request", outlineText, count)
temperature := req.Temperature
if temperature == 0 {
temperature = 0.7
}
text, err := s.aiService.GenerateText(
userPrompt,
systemPrompt,
ai.WithTemperature(temperature),
ai.WithMaxTokens(3000),
)
// 如果指定了模型,使用指定的模型;否则使用默认配置
var text string
var err error
if req.Model != "" {
s.log.Infow("Using specified model for character generation", "model", req.Model)
client, getErr := s.aiService.GetAIClientForModel("text", req.Model)
if getErr != nil {
s.log.Warnw("Failed to get client for specified model, using default", "model", req.Model, "error", getErr)
text, err = s.aiService.GenerateText(userPrompt, systemPrompt, ai.WithTemperature(temperature))
} else {
text, err = client.GenerateText(userPrompt, systemPrompt, ai.WithTemperature(temperature))
}
} else {
text, err = s.aiService.GenerateText(userPrompt, systemPrompt, ai.WithTemperature(temperature))
}
if err != nil {
s.log.Errorw("Failed to generate characters", "error", err)
@@ -248,15 +87,14 @@ func (s *ScriptGenerationService) GenerateCharacters(req *GenerateCharactersRequ
s.log.Infow("AI response received", "length", len(text), "preview", text[:minInt(200, len(text))])
var result struct {
Characters []struct {
Name string `json:"name"`
Role string `json:"role"`
Description string `json:"description"`
Personality string `json:"personality"`
Appearance string `json:"appearance"`
VoiceStyle string `json:"voice_style"`
} `json:"characters"`
// AI直接返回数组格式
var result []struct {
Name string `json:"name"`
Role string `json:"role"`
Description string `json:"description"`
Personality string `json:"personality"`
Appearance string `json:"appearance"`
VoiceStyle string `json:"voice_style"`
}
if err := utils.SafeParseAIJSON(text, &result); err != nil {
@@ -265,7 +103,7 @@ func (s *ScriptGenerationService) GenerateCharacters(req *GenerateCharactersRequ
}
var characters []models.Character
for _, char := range result.Characters {
for _, char := range result {
// 检查角色是否已存在
var existingChar models.Character
err := s.db.Where("drama_id = ? AND name = ?", req.DramaID, char.Name).First(&existingChar).Error
@@ -296,209 +134,25 @@ func (s *ScriptGenerationService) GenerateCharacters(req *GenerateCharactersRequ
characters = append(characters, character)
}
// 如果提供了 EpisodeID建立 episode_characters 关联关系
if req.EpisodeID > 0 {
var episode models.Episode
if err := s.db.First(&episode, req.EpisodeID).Error; err == nil {
// 使用 GORM 的 Association 建立多对多关联
if err := s.db.Model(&episode).Association("Characters").Append(characters); err != nil {
s.log.Errorw("Failed to associate characters with episode", "error", err, "episode_id", req.EpisodeID)
} else {
s.log.Infow("Characters associated with episode", "episode_id", req.EpisodeID, "character_count", len(characters))
}
} else {
s.log.Errorw("Episode not found for association", "episode_id", req.EpisodeID, "error", err)
}
}
s.log.Infow("Characters generated", "drama_id", req.DramaID, "total_count", len(characters), "new_count", len(characters))
return characters, nil
}
func (s *ScriptGenerationService) GenerateEpisodes(req *GenerateEpisodesRequest) ([]models.Episode, error) {
var drama models.Drama
if err := s.db.Where("id = ? ", req.DramaID).First(&drama).Error; err != nil {
return nil, fmt.Errorf("drama not found")
}
// 获取角色信息
var characters []models.Character
s.db.Where("drama_id = ?", req.DramaID).Find(&characters)
var characterList string
if len(characters) > 0 {
characterList = "\n角色设定\n"
for _, char := range characters {
characterList += fmt.Sprintf("- %s", char.Name)
if char.Role != nil {
characterList += fmt.Sprintf("%s", *char.Role)
}
if char.Description != nil {
characterList += fmt.Sprintf("%s", *char.Description)
}
if char.Personality != nil {
characterList += fmt.Sprintf(" | 性格:%s", *char.Personality)
}
characterList += "\n"
}
} else {
characterList = "\n注意尚未设定角色请根据大纲创作合理的角色出场\n"
}
systemPrompt := `你是一个专业的短剧编剧。你擅长根据分集规划创作详细的剧情内容。
你的任务是根据大纲中的分集规划将每一集的概要扩展为详细的剧情叙述。每集约180秒3分钟需要充实的内容。
工作流程:
1. 大纲中已提供每集的剧情规划80-100字概要
2. 你需要将每集概要扩展为400-500字的详细剧情叙述
3. 严格按照分集规划的数量和走向展开,不能遗漏任何一集
详细要求:
1. script_content用400-500字详细叙述包括
- 具体场景和环境描写
- 角色的行动、对话要点、情绪变化
- 冲突的产生过程和激化细节
- 关键情节点和转折
- 为下一集埋下的伏笔
2. 每集有明确的冲突和转折点
3. 集与集之间有连贯性和悬念
4. 充分展现角色性格和关系演变
5. 内容详实足以支撑180秒时长
JSON格式紧凑
{"episodes":[{"episode_number":1,"title":"标题","description":"简短梗概","script_content":"400-500字详细剧情叙述","duration":210}]}
格式说明:
1. script_content为叙述文不是场景对话格式
2. 每集包含开场铺垫、冲突发展、高潮转折、结局悬念
3. duration根据剧情复杂度设置在150-300秒
关键要求:
- 大纲规划了几集就必须生成几集
- 严格按照分集规划的故事线展开
- 每一集都要有完整的400-500字详细内容
- 绝对不能遗漏任何一集`
outlineText := req.Outline
if outlineText == "" {
outlineText = fmt.Sprintf("剧名:%s\n简介%s\n类型%s", drama.Title, drama.Description, drama.Genre)
}
userPrompt := fmt.Sprintf(`剧本大纲:
%s
%s
请基于以上大纲和角色,创作 %d 集的详细剧本。
**重要要求:**
- 必须生成完整的 %d 集从第1集到第%d集不能遗漏
- 每集约3-5分钟150-300秒
- 每集的duration字段要根据剧本内容长度合理设置不要都设置为同一个值
- 返回的JSON中episodes数组必须包含 %d 个元素`, outlineText, characterList, req.EpisodeCount, req.EpisodeCount, req.EpisodeCount, req.EpisodeCount)
temperature := req.Temperature
if temperature == 0 {
temperature = 0.7
}
// 根据剧集数量调整token限制
// 模型支持128k上下文每集400-500字约需800-1000 tokens包含JSON结构
baseTokens := 3000 // 基础(系统提示+角色列表+大纲)
perEpisodeTokens := 900 // 每集约900 tokens支持400-500字详细内容
maxTokens := baseTokens + (req.EpisodeCount * perEpisodeTokens)
// 128k上下文可以设置较大的token限制
// 10集约12000 tokens20集约21000 tokens都在安全范围内
if maxTokens > 32000 {
maxTokens = 32000 // 保守限制在32k留足够空间
}
s.log.Infow("Generating episodes with token limit",
"episode_count", req.EpisodeCount,
"max_tokens", maxTokens,
"estimated_per_episode", perEpisodeTokens)
text, err := s.aiService.GenerateText(
userPrompt,
systemPrompt,
ai.WithTemperature(0.8),
ai.WithMaxTokens(maxTokens),
)
if err != nil {
s.log.Errorw("Failed to generate episodes", "error", err)
return nil, fmt.Errorf("生成失败: %w", err)
}
s.log.Infow("AI response received", "length", len(text), "preview", text[:minInt(200, len(text))])
var result struct {
Episodes []struct {
EpisodeNumber int `json:"episode_number"`
Title string `json:"title"`
Description string `json:"description"`
ScriptContent string `json:"script_content"`
Duration int `json:"duration"`
} `json:"episodes"`
}
if err := utils.SafeParseAIJSON(text, &result); err != nil {
s.log.Errorw("Failed to parse episodes JSON", "error", err, "raw_response", text[:minInt(500, len(text))])
return nil, fmt.Errorf("解析 AI 返回结果失败: %w", err)
}
// 检查生成的集数是否符合要求
if len(result.Episodes) < req.EpisodeCount {
s.log.Warnw("AI generated fewer episodes than requested",
"requested", req.EpisodeCount,
"generated", len(result.Episodes))
}
// 记录每集的详细信息
for i, ep := range result.Episodes {
s.log.Infow("Episode parsed from AI",
"index", i,
"episode_number", ep.EpisodeNumber,
"title", ep.Title,
"description_length", len(ep.Description),
"script_content_length", len(ep.ScriptContent),
"duration", ep.Duration)
}
var episodes []models.Episode
for _, ep := range result.Episodes {
duration := ep.Duration
if duration == 0 {
// AI未返回时长时使用默认值
duration = 180
s.log.Warnw("Episode duration not provided by AI, using default",
"episode_number", ep.EpisodeNumber,
"default_duration", 180)
} else {
s.log.Infow("Episode duration from AI",
"episode_number", ep.EpisodeNumber,
"duration", duration)
}
// 记录即将保存的数据
s.log.Infow("Creating episode in database",
"episode_number", ep.EpisodeNumber,
"title", ep.Title,
"script_content_length", len(ep.ScriptContent),
"script_content_empty", ep.ScriptContent == "")
dramaID, err := strconv.ParseUint(req.DramaID, 10, 32)
if err != nil {
return nil, fmt.Errorf("invalid drama ID")
}
episode := models.Episode{
DramaID: uint(dramaID),
EpisodeNum: ep.EpisodeNumber,
Title: ep.Title,
Description: &ep.Description,
ScriptContent: &ep.ScriptContent,
Duration: duration,
Status: "draft",
}
if err := s.db.Create(&episode).Error; err != nil {
s.log.Errorw("Failed to create episode", "error", err)
continue
}
episodes = append(episodes, episode)
}
s.log.Infow("Episodes generated", "drama_id", req.DramaID, "count", len(episodes))
return episodes, nil
}
// GenerateScenesForEpisode 已废弃,使用 StoryboardService.GenerateStoryboard 替代
// ParseScript 已废弃,使用 GenerateCharacters 替代

View File

@@ -42,12 +42,18 @@ type SceneCompositionInfo struct {
StoryboardNumber int `json:"storyboard_number"`
Title *string `json:"title"`
Description *string `json:"description"`
ShotType *string `json:"shot_type"`
Angle *string `json:"angle"`
Movement *string `json:"movement"`
Location *string `json:"location"`
Time *string `json:"time"`
Duration int `json:"duration"`
Dialogue *string `json:"dialogue"`
Action *string `json:"action"`
Result *string `json:"result"`
Atmosphere *string `json:"atmosphere"`
BgmPrompt *string `json:"bgm_prompt,omitempty"`
SoundEffect *string `json:"sound_effect,omitempty"`
ImagePrompt *string `json:"image_prompt,omitempty"`
VideoPrompt *string `json:"video_prompt,omitempty"`
Characters []SceneCharacterInfo `json:"characters"`
@@ -182,12 +188,18 @@ func (s *StoryboardCompositionService) GetScenesForEpisode(episodeID string) ([]
StoryboardNumber: storyboard.StoryboardNumber,
Title: storyboard.Title,
Description: storyboard.Description,
ShotType: storyboard.ShotType,
Angle: storyboard.Angle,
Movement: storyboard.Movement,
Location: storyboard.Location,
Time: storyboard.Time,
Duration: storyboard.Duration,
Action: storyboard.Action,
Dialogue: storyboard.Dialogue,
Result: storyboard.Result,
Atmosphere: storyboard.Atmosphere,
BgmPrompt: storyboard.BgmPrompt,
SoundEffect: storyboard.SoundEffect,
ImagePrompt: storyboard.ImagePrompt,
VideoPrompt: storyboard.VideoPrompt,
SceneID: storyboard.SceneID,
@@ -387,6 +399,24 @@ func (s *StoryboardCompositionService) GenerateSceneImage(req *GenerateSceneImag
return nil, fmt.Errorf("image generation service not available")
}
func (s *StoryboardCompositionService) DeleteScene(sceneID string) error {
var scene models.Scene
if err := s.db.Where("id = ?", sceneID).First(&scene).Error; err != nil {
if err == gorm.ErrRecordNotFound {
return fmt.Errorf("scene not found")
}
return fmt.Errorf("failed to find scene: %w", err)
}
// 删除场景
if err := s.db.Delete(&scene).Error; err != nil {
return fmt.Errorf("failed to delete scene: %w", err)
}
s.log.Infow("Scene deleted successfully", "scene_id", sceneID)
return nil
}
func getStringValue(s *string) string {
if s != nil {
return *s

View File

@@ -7,22 +7,28 @@ import (
"strings"
models "github.com/drama-generator/backend/domain/models"
"github.com/drama-generator/backend/pkg/ai"
"github.com/drama-generator/backend/pkg/config"
"github.com/drama-generator/backend/pkg/logger"
"github.com/drama-generator/backend/pkg/utils"
"gorm.io/gorm"
)
type StoryboardService struct {
db *gorm.DB
aiService *AIService
log *logger.Logger
db *gorm.DB
aiService *AIService
log *logger.Logger
config *config.Config
promptI18n *PromptI18n
}
func NewStoryboardService(db *gorm.DB, log *logger.Logger) *StoryboardService {
func NewStoryboardService(db *gorm.DB, cfg *config.Config, log *logger.Logger) *StoryboardService {
return &StoryboardService{
db: db,
aiService: NewAIService(db, log),
log: log,
db: db,
aiService: NewAIService(db, log),
log: log,
config: cfg,
promptI18n: NewPromptI18n(cfg),
}
}
@@ -52,7 +58,7 @@ type GenerateStoryboardResult struct {
Total int `json:"total"`
}
func (s *StoryboardService) GenerateStoryboard(episodeID string) (*GenerateStoryboardResult, error) {
func (s *StoryboardService) GenerateStoryboard(episodeID string, model string) (*GenerateStoryboardResult, error) {
// 从数据库获取剧集信息
var episode struct {
ID string
@@ -122,20 +128,33 @@ func (s *StoryboardService) GenerateStoryboard(episodeID string) (*GenerateStory
"scene_count", len(scenes),
"scenes", sceneList)
// 构建分镜头生成提示词
prompt := fmt.Sprintf(`【角色】你是一位资深影视分镜师,精通罗伯特·麦基的镜头拆解理论,擅长构建情绪节奏。
// 使用国际化提示词
systemPrompt := s.promptI18n.GetStoryboardSystemPrompt()
【任务】将小说剧本按**独立动作单元**拆解为分镜头方案。
scriptLabel := s.promptI18n.FormatUserPrompt("script_content_label")
taskLabel := s.promptI18n.FormatUserPrompt("task_label")
taskInstruction := s.promptI18n.FormatUserPrompt("task_instruction")
charListLabel := s.promptI18n.FormatUserPrompt("character_list_label")
charConstraint := s.promptI18n.FormatUserPrompt("character_constraint")
sceneListLabel := s.promptI18n.FormatUserPrompt("scene_list_label")
sceneConstraint := s.promptI18n.FormatUserPrompt("scene_constraint")
【本剧可用角色列表】
prompt := fmt.Sprintf(`%s
%s
%s
**重要**在characters字段中只能使用上述角色列表中的角色ID数字不得自创角色或使用其他ID。
%s%s
【本剧已提取的场景背景列表】
%s
%s
**重要**在scene_id字段中必须从上述背景列表中选择最匹配的背景ID数字。如果没有合适的背景则填null。
%s
%s
%s
%s
【剧本原文】
%s
@@ -305,23 +324,61 @@ func (s *StoryboardService) GenerateStoryboard(episodeID string) (*GenerateStory
- 包含感官细节:视觉、听觉、触觉、嗅觉
- 描述光线、色彩、质感、动态
- 为视频生成AI提供足够的画面构建信息
- 避免抽象词汇,使用具象的视觉化描述`, characterList, sceneList, scriptContent)
- 避免抽象词汇,使用具象的视觉化描述`, systemPrompt, scriptLabel, scriptContent, taskLabel, taskInstruction, charListLabel, characterList, charConstraint, sceneListLabel, sceneList, sceneConstraint)
// 调用AI服务生成
text, err := s.aiService.GenerateText(prompt, "")
if err != nil {
s.log.Errorw("Failed to generate storyboard", "error", err)
return nil, fmt.Errorf("生成分镜头失败: %w", err)
// 调用AI服务生成(如果指定了模型则使用指定的模型)
// 设置较大的max_tokens以确保完整返回所有分镜的JSON
var text string
if model != "" {
s.log.Infow("Using specified model for storyboard generation", "model", model)
client, getErr := s.aiService.GetAIClientForModel("text", model)
if getErr != nil {
s.log.Warnw("Failed to get client for specified model, using default", "model", model, "error", getErr)
var err error
text, err = s.aiService.GenerateText(prompt, "", ai.WithMaxTokens(16000))
if err != nil {
s.log.Errorw("Failed to generate storyboard", "error", err)
return nil, fmt.Errorf("生成分镜头失败: %w", err)
}
} else {
var err error
text, err = client.GenerateText(prompt, "", ai.WithMaxTokens(16000))
if err != nil {
s.log.Errorw("Failed to generate storyboard", "error", err)
return nil, fmt.Errorf("生成分镜头失败: %w", err)
}
}
} else {
var err error
text, err = s.aiService.GenerateText(prompt, "", ai.WithMaxTokens(16000))
if err != nil {
s.log.Errorw("Failed to generate storyboard", "error", err)
return nil, fmt.Errorf("生成分镜头失败: %w", err)
}
}
// 解析JSON结果
// AI可能返回两种格式
// 1. 数组格式: [{...}, {...}]
// 2. 对象格式: {"storyboards": [{...}, {...}]}
var result GenerateStoryboardResult
if err := utils.SafeParseAIJSON(text, &result); err != nil {
s.log.Errorw("Failed to parse storyboard JSON", "error", err, "response", text[:min(500, len(text))])
return nil, fmt.Errorf("解析分镜头结果失败: %w", err)
}
result.Total = len(result.Storyboards)
// 先尝试解析为数组格式
var storyboards []Storyboard
if err := utils.SafeParseAIJSON(text, &storyboards); err == nil {
// 成功解析为数组,包装为对象
result.Storyboards = storyboards
result.Total = len(storyboards)
s.log.Infow("Parsed storyboard as array format", "count", len(storyboards))
} else {
// 尝试解析为对象格式
if err := utils.SafeParseAIJSON(text, &result); err != nil {
s.log.Errorw("Failed to parse storyboard JSON in both formats", "error", err, "response", text[:min(500, len(text))])
return nil, fmt.Errorf("解析分镜头结果失败: %w", err)
}
result.Total = len(result.Storyboards)
s.log.Infow("Parsed storyboard as object format", "count", len(result.Storyboards))
}
// 计算总时长(所有分镜时长之和)
totalDuration := 0
@@ -566,16 +623,53 @@ func (s *StoryboardService) generateVideoPrompt(sb Storyboard) string {
}
func (s *StoryboardService) saveStoryboards(episodeID string, storyboards []Storyboard) error {
// 验证 episodeID
epID, err := strconv.ParseUint(episodeID, 10, 32)
if err != nil {
s.log.Errorw("Invalid episode ID", "episode_id", episodeID, "error", err)
return fmt.Errorf("无效的章节ID: %s", episodeID)
}
// 防御性检查如果AI返回的分镜数量为0不应该删除旧分镜
if len(storyboards) == 0 {
s.log.Errorw("AI返回的分镜数量为0拒绝保存以避免删除现有分镜", "episode_id", episodeID)
return fmt.Errorf("AI生成分镜失败返回的分镜数量为0")
}
s.log.Infow("开始保存分镜头",
"episode_id", episodeID,
"episode_id_uint", uint(epID),
"storyboard_count", len(storyboards))
// 开启事务
return s.db.Transaction(func(tx *gorm.DB) error {
// 获取该剧集所有的分镜ID
// 验证该章节是否存在
var episode models.Episode
if err := tx.First(&episode, epID).Error; err != nil {
s.log.Errorw("Episode not found", "episode_id", episodeID, "error", err)
return fmt.Errorf("章节不存在: %s", episodeID)
}
s.log.Infow("找到章节信息",
"episode_id", episode.ID,
"episode_number", episode.EpisodeNum,
"drama_id", episode.DramaID,
"title", episode.Title)
// 获取该剧集所有的分镜ID使用 uint 类型)
var storyboardIDs []uint
if err := tx.Model(&models.Storyboard{}).
Where("episode_id = ?", episodeID).
Where("episode_id = ?", uint(epID)).
Pluck("id", &storyboardIDs).Error; err != nil {
return err
}
s.log.Infow("查询到现有分镜",
"episode_id_string", episodeID,
"episode_id_uint", uint(epID),
"existing_storyboard_count", len(storyboardIDs),
"storyboard_ids", storyboardIDs)
// 如果有分镜先清理关联的image_generations的storyboard_id
if len(storyboardIDs) > 0 {
if err := tx.Model(&models.ImageGeneration{}).
@@ -583,13 +677,26 @@ func (s *StoryboardService) saveStoryboards(episodeID string, storyboards []Stor
Update("storyboard_id", nil).Error; err != nil {
return err
}
s.log.Infow("已清理关联的图片生成记录", "count", len(storyboardIDs))
}
// 删除该剧集已有的分镜头
if err := tx.Where("episode_id = ?", episodeID).Delete(&models.Storyboard{}).Error; err != nil {
return err
// 删除该剧集已有的分镜头(使用 uint 类型确保类型匹配)
s.log.Warnw("准备删除分镜数据",
"episode_id_string", episodeID,
"episode_id_uint", uint(epID),
"episode_id_from_db", episode.ID,
"will_delete_count", len(storyboardIDs))
result := tx.Where("episode_id = ?", uint(epID)).Delete(&models.Storyboard{})
if result.Error != nil {
s.log.Errorw("删除旧分镜失败", "episode_id", uint(epID), "error", result.Error)
return result.Error
}
s.log.Infow("已删除旧分镜头",
"episode_id", uint(epID),
"deleted_count", result.RowsAffected)
// 注意:不删除背景,因为背景是在分镜拆解前就提取好的
// AI会直接返回scene_id不需要在这里做字符串匹配
@@ -616,8 +723,6 @@ func (s *StoryboardService) saveStoryboards(episodeID string, storyboards []Stor
"scene_id", *sb.SceneID)
}
epID, _ := strconv.ParseUint(episodeID, 10, 32)
// 处理 title 字段
var titlePtr *string
if sb.Title != "" {

View File

@@ -7,6 +7,7 @@ import (
"time"
models "github.com/drama-generator/backend/domain/models"
"github.com/drama-generator/backend/infrastructure/external/ffmpeg"
"github.com/drama-generator/backend/infrastructure/storage"
"github.com/drama-generator/backend/pkg/logger"
"github.com/drama-generator/backend/pkg/video"
@@ -19,6 +20,7 @@ type VideoGenerationService struct {
log *logger.Logger
localStorage *storage.LocalStorage
aiService *AIService
ffmpeg *ffmpeg.FFmpeg
}
func NewVideoGenerationService(db *gorm.DB, transferService *ResourceTransferService, localStorage *storage.LocalStorage, aiService *AIService, log *logger.Logger) *VideoGenerationService {
@@ -28,6 +30,7 @@ func NewVideoGenerationService(db *gorm.DB, transferService *ResourceTransferSer
transferService: transferService,
aiService: aiService,
log: log,
ffmpeg: ffmpeg.NewFFmpeg(log),
}
go service.RecoverPendingTasks()
@@ -316,18 +319,40 @@ func (s *VideoGenerationService) pollTaskStatus(videoGenID uint, taskID string,
}
func (s *VideoGenerationService) completeVideoGeneration(videoGenID uint, videoURL string, duration *int, width *int, height *int, firstFrameURL *string) {
var localVideoPath string
// 下载视频到本地存储(仅用于缓存,不更新数据库)
if s.localStorage != nil && videoURL != "" {
_, err := s.localStorage.DownloadFromURL(videoURL, "videos")
downloadedPath, err := s.localStorage.DownloadFromURL(videoURL, "videos")
if err != nil {
s.log.Warnw("Failed to download video to local storage",
"error", err,
"id", videoGenID,
"original_url", videoURL)
} else {
localVideoPath = downloadedPath
s.log.Infow("Video downloaded to local storage for caching",
"id", videoGenID,
"original_url", videoURL)
"original_url", videoURL,
"local_path", localVideoPath)
}
}
// 如果视频已下载到本地,探测真实时长
if localVideoPath != "" && s.ffmpeg != nil {
if probedDuration, err := s.ffmpeg.GetVideoDuration(localVideoPath); err == nil {
// 转换为整数秒(向上取整)
durationInt := int(probedDuration + 0.5)
duration = &durationInt
s.log.Infow("Probed video duration",
"id", videoGenID,
"duration_seconds", durationInt,
"duration_float", probedDuration)
} else {
s.log.Warnw("Failed to probe video duration, using provided duration",
"error", err,
"id", videoGenID,
"local_path", localVideoPath)
}
}
@@ -372,13 +397,22 @@ func (s *VideoGenerationService) completeVideoGeneration(videoGenID uint, videoU
var videoGen models.VideoGeneration
if err := s.db.First(&videoGen, videoGenID).Error; err == nil {
if videoGen.StoryboardID != nil {
if err := s.db.Model(&models.Storyboard{}).Where("id = ?", *videoGen.StoryboardID).Update("video_url", videoURL).Error; err != nil {
s.log.Warnw("Failed to update storyboard video_url", "storyboard_id", *videoGen.StoryboardID, "error", err)
// 更新 Storyboard 的 video_url 和 duration
storyboardUpdates := map[string]interface{}{
"video_url": videoURL,
}
if duration != nil {
storyboardUpdates["duration"] = *duration
}
if err := s.db.Model(&models.Storyboard{}).Where("id = ?", *videoGen.StoryboardID).Updates(storyboardUpdates).Error; err != nil {
s.log.Warnw("Failed to update storyboard", "storyboard_id", *videoGen.StoryboardID, "error", err)
} else {
s.log.Infow("Updated storyboard with video info", "storyboard_id", *videoGen.StoryboardID, "duration", duration)
}
}
}
s.log.Infow("Video generation completed", "id", videoGenID, "url", videoURL)
s.log.Infow("Video generation completed", "id", videoGenID, "url", videoURL, "duration", duration)
}
func (s *VideoGenerationService) updateVideoGenError(videoGenID uint, errorMsg string) {