添加chat gemini、chatfire端点、图片生成 gemini、chatfire 更轻松的AI配置

2026-01-14 02:25:41 +08:00
parent 4d38357ff6
commit 23b45efae9
22 changed files with 1512 additions and 405 deletions
--- a/pkg/ai/client.go
+++ b/pkg/ai/client.go
@@ -0,0 +1,7 @@
+package ai
+
+// AIClient 定义文本生成客户端接口
+type AIClient interface {
+	GenerateText(prompt string, systemPrompt string, options ...func(*ChatCompletionRequest)) (string, error)
+	TestConnection() error
+}
--- a/pkg/ai/gemini_client.go
+++ b/pkg/ai/gemini_client.go
@@ -0,0 +1,195 @@
+package ai
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+)
+
+type GeminiClient struct {
+	BaseURL    string
+	APIKey     string
+	Model      string
+	Endpoint   string
+	HTTPClient *http.Client
+}
+
+type GeminiTextRequest struct {
+	Contents          []GeminiContent    `json:"contents"`
+	SystemInstruction *GeminiInstruction `json:"systemInstruction,omitempty"`
+}
+
+type GeminiContent struct {
+	Parts []GeminiPart `json:"parts"`
+	Role  string       `json:"role,omitempty"`
+}
+
+type GeminiPart struct {
+	Text string `json:"text"`
+}
+
+type GeminiInstruction struct {
+	Parts []GeminiPart `json:"parts"`
+}
+
+type GeminiTextResponse struct {
+	Candidates []struct {
+		Content struct {
+			Parts []struct {
+				Text string `json:"text"`
+			} `json:"parts"`
+			Role string `json:"role"`
+		} `json:"content"`
+		FinishReason  string `json:"finishReason"`
+		Index         int    `json:"index"`
+		SafetyRatings []struct {
+			Category    string `json:"category"`
+			Probability string `json:"probability"`
+		} `json:"safetyRatings"`
+	} `json:"candidates"`
+	UsageMetadata struct {
+		PromptTokenCount     int `json:"promptTokenCount"`
+		CandidatesTokenCount int `json:"candidatesTokenCount"`
+		TotalTokenCount      int `json:"totalTokenCount"`
+	} `json:"usageMetadata"`
+}
+
+func NewGeminiClient(baseURL, apiKey, model, endpoint string) *GeminiClient {
+	if baseURL == "" {
+		baseURL = "https://generativelanguage.googleapis.com"
+	}
+	if endpoint == "" {
+		endpoint = "/v1beta/models/{model}:generateContent"
+	}
+	if model == "" {
+		model = "gemini-3-pro"
+	}
+	return &GeminiClient{
+		BaseURL:  baseURL,
+		APIKey:   apiKey,
+		Model:    model,
+		Endpoint: endpoint,
+		HTTPClient: &http.Client{
+			Timeout: 10 * time.Minute,
+		},
+	}
+}
+
+func (c *GeminiClient) GenerateText(prompt string, systemPrompt string, options ...func(*ChatCompletionRequest)) (string, error) {
+	model := c.Model
+
+	// 构建请求体
+	reqBody := GeminiTextRequest{
+		Contents: []GeminiContent{
+			{
+				Parts: []GeminiPart{{Text: prompt}},
+				Role:  "user",
+			},
+		},
+	}
+
+	// 使用 systemInstruction 字段处理系统提示
+	if systemPrompt != "" {
+		reqBody.SystemInstruction = &GeminiInstruction{
+			Parts: []GeminiPart{{Text: systemPrompt}},
+		}
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		fmt.Printf("Gemini: Failed to marshal request: %v\n", err)
+		return "", fmt.Errorf("marshal request: %w", err)
+	}
+
+	// 替换端点中的 {model} 占位符
+	endpoint := c.BaseURL + c.Endpoint
+	endpoint = strings.ReplaceAll(endpoint, "{model}", model)
+	url := fmt.Sprintf("%s?key=%s", endpoint, c.APIKey)
+
+	// 打印请求信息（隐藏 API Key）
+	safeURL := strings.Replace(url, c.APIKey, "***", 1)
+	fmt.Printf("Gemini: Sending request to: %s\n", safeURL)
+	requestPreview := string(jsonData)
+	if len(jsonData) > 300 {
+		requestPreview = string(jsonData[:300]) + "..."
+	}
+	fmt.Printf("Gemini: Request body: %s\n", requestPreview)
+
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		fmt.Printf("Gemini: Failed to create request: %v\n", err)
+		return "", fmt.Errorf("create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+
+	fmt.Printf("Gemini: Executing HTTP request...\n")
+	resp, err := c.HTTPClient.Do(req)
+	if err != nil {
+		fmt.Printf("Gemini: HTTP request failed: %v\n", err)
+		return "", fmt.Errorf("send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	fmt.Printf("Gemini: Received response with status: %d\n", resp.StatusCode)
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		fmt.Printf("Gemini: Failed to read response body: %v\n", err)
+		return "", fmt.Errorf("read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		fmt.Printf("Gemini: API error (status %d): %s\n", resp.StatusCode, string(body))
+		return "", fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
+	}
+
+	// 打印响应体用于调试
+	bodyPreview := string(body)
+	if len(body) > 500 {
+		bodyPreview = string(body[:500]) + "..."
+	}
+	fmt.Printf("Gemini: Response body: %s\n", bodyPreview)
+
+	var result GeminiTextResponse
+	if err := json.Unmarshal(body, &result); err != nil {
+		errorPreview := string(body)
+		if len(body) > 200 {
+			errorPreview = string(body[:200])
+		}
+		fmt.Printf("Gemini: Failed to parse response: %v\n", err)
+		return "", fmt.Errorf("parse response: %w, body preview: %s", err, errorPreview)
+	}
+
+	fmt.Printf("Gemini: Successfully parsed response, candidates count: %d\n", len(result.Candidates))
+
+	if len(result.Candidates) == 0 {
+		fmt.Printf("Gemini: No candidates in response\n")
+		return "", fmt.Errorf("no candidates in response")
+	}
+
+	if len(result.Candidates[0].Content.Parts) == 0 {
+		fmt.Printf("Gemini: No parts in first candidate\n")
+		return "", fmt.Errorf("no parts in response")
+	}
+
+	responseText := result.Candidates[0].Content.Parts[0].Text
+	fmt.Printf("Gemini: Generated text: %s\n", responseText)
+
+	return responseText, nil
+}
+
+func (c *GeminiClient) TestConnection() error {
+	fmt.Printf("Gemini: TestConnection called with BaseURL=%s, Model=%s, Endpoint=%s\n", c.BaseURL, c.Model, c.Endpoint)
+	_, err := c.GenerateText("Hello", "")
+	if err != nil {
+		fmt.Printf("Gemini: TestConnection failed: %v\n", err)
+	} else {
+		fmt.Printf("Gemini: TestConnection succeeded\n")
+	}
+	return err
+}
--- a/pkg/ai/openai_client.go
+++ b/pkg/ai/openai_client.go
@@ -91,30 +91,48 @@ func (c *OpenAIClient) ChatCompletion(messages []ChatMessage, options ...func(*C
 func (c *OpenAIClient) sendChatRequest(req *ChatCompletionRequest) (*ChatCompletionResponse, error) {
 	jsonData, err := json.Marshal(req)
 	if err != nil {
+		fmt.Printf("OpenAI: Failed to marshal request: %v\n", err)
 		return nil, fmt.Errorf("failed to marshal request: %w", err)
 	}

 	url := c.BaseURL + c.Endpoint
+
+	// 打印请求信息
+	fmt.Printf("OpenAI: Sending request to: %s\n", url)
+	fmt.Printf("OpenAI: BaseURL=%s, Endpoint=%s, Model=%s\n", c.BaseURL, c.Endpoint, c.Model)
+	requestPreview := string(jsonData)
+	if len(jsonData) > 300 {
+		requestPreview = string(jsonData[:300]) + "..."
+	}
+	fmt.Printf("OpenAI: Request body: %s\n", requestPreview)
+
 	httpReq, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
 	if err != nil {
+		fmt.Printf("OpenAI: Failed to create request: %v\n", err)
 		return nil, fmt.Errorf("failed to create request: %w", err)
 	}

 	httpReq.Header.Set("Content-Type", "application/json")
 	httpReq.Header.Set("Authorization", "Bearer "+c.APIKey)

+	fmt.Printf("OpenAI: Executing HTTP request...\n")
 	resp, err := c.HTTPClient.Do(httpReq)
 	if err != nil {
+		fmt.Printf("OpenAI: HTTP request failed: %v\n", err)
 		return nil, fmt.Errorf("failed to send request: %w", err)
 	}
 	defer resp.Body.Close()

+	fmt.Printf("OpenAI: Received response with status: %d\n", resp.StatusCode)
+
 	body, err := io.ReadAll(resp.Body)
 	if err != nil {
+		fmt.Printf("OpenAI: Failed to read response body: %v\n", err)
 		return nil, fmt.Errorf("failed to read response: %w", err)
 	}

 	if resp.StatusCode != http.StatusOK {
+		fmt.Printf("OpenAI: API error (status %d): %s\n", resp.StatusCode, string(body))
 		var errResp ErrorResponse
 		if err := json.Unmarshal(body, &errResp); err != nil {
 			return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
@@ -122,11 +140,25 @@ func (c *OpenAIClient) sendChatRequest(req *ChatCompletionRequest) (*ChatComplet
 		return nil, fmt.Errorf("API error: %s", errResp.Error.Message)
 	}

+	// 打印响应体用于调试
+	bodyPreview := string(body)
+	if len(body) > 500 {
+		bodyPreview = string(body[:500]) + "..."
+	}
+	fmt.Printf("OpenAI: Response body: %s\n", bodyPreview)
+
 	var chatResp ChatCompletionResponse
 	if err := json.Unmarshal(body, &chatResp); err != nil {
-		return nil, fmt.Errorf("failed to unmarshal response: %w", err)
+		errorPreview := string(body)
+		if len(body) > 200 {
+			errorPreview = string(body[:200])
+		}
+		fmt.Printf("OpenAI: Failed to parse response: %v\n", err)
+		return nil, fmt.Errorf("failed to unmarshal response: %w, body preview: %s", err, errorPreview)
 	}

+	fmt.Printf("OpenAI: Successfully parsed response, choices count: %d\n", len(chatResp.Choices))
+
 	return &chatResp, nil
 }

@@ -176,6 +208,8 @@ func (c *OpenAIClient) GenerateText(prompt string, systemPrompt string, options
 }

 func (c *OpenAIClient) TestConnection() error {
+	fmt.Printf("OpenAI: TestConnection called with BaseURL=%s, Endpoint=%s, Model=%s\n", c.BaseURL, c.Endpoint, c.Model)
+
 	messages := []ChatMessage{
 		{
 			Role:    "user",
@@ -184,5 +218,10 @@ func (c *OpenAIClient) TestConnection() error {
 	}

 	_, err := c.ChatCompletion(messages, WithMaxTokens(10))
+	if err != nil {
+		fmt.Printf("OpenAI: TestConnection failed: %v\n", err)
+	} else {
+		fmt.Printf("OpenAI: TestConnection succeeded\n")
+	}
 	return err
 }
--- a/pkg/image/gemini_image_client.go
+++ b/pkg/image/gemini_image_client.go
@@ -0,0 +1,277 @@
+package image
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+)
+
+type GeminiImageClient struct {
+	BaseURL    string
+	APIKey     string
+	Model      string
+	Endpoint   string
+	HTTPClient *http.Client
+}
+
+type GeminiImageRequest struct {
+	Contents []struct {
+		Parts []GeminiPart `json:"parts"`
+	} `json:"contents"`
+	GenerationConfig struct {
+		ResponseModalities []string `json:"responseModalities"`
+	} `json:"generationConfig"`
+}
+
+type GeminiPart struct {
+	Text       string            `json:"text,omitempty"`
+	InlineData *GeminiInlineData `json:"inlineData,omitempty"`
+}
+
+type GeminiInlineData struct {
+	MimeType string `json:"mimeType"`
+	Data     string `json:"data"` // base64 编码的图片数据
+}
+
+type GeminiImageResponse struct {
+	Candidates []struct {
+		Content struct {
+			Parts []struct {
+				InlineData struct {
+					MimeType string `json:"mimeType"`
+					Data     string `json:"data"`
+				} `json:"inlineData,omitempty"`
+				Text string `json:"text,omitempty"`
+			} `json:"parts"`
+		} `json:"content"`
+	} `json:"candidates"`
+	UsageMetadata struct {
+		PromptTokenCount     int `json:"promptTokenCount"`
+		CandidatesTokenCount int `json:"candidatesTokenCount"`
+		TotalTokenCount      int `json:"totalTokenCount"`
+	} `json:"usageMetadata"`
+}
+
+// downloadImageToBase64 下载图片 URL 并转换为 base64
+func downloadImageToBase64(imageURL string) (string, string, error) {
+	resp, err := http.Get(imageURL)
+	if err != nil {
+		return "", "", fmt.Errorf("download image: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return "", "", fmt.Errorf("download image failed with status: %d", resp.StatusCode)
+	}
+
+	imageData, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return "", "", fmt.Errorf("read image data: %w", err)
+	}
+
+	// 根据 Content-Type 确定 mimeType
+	mimeType := resp.Header.Get("Content-Type")
+	if mimeType == "" {
+		mimeType = "image/jpeg"
+	}
+
+	base64Data := base64.StdEncoding.EncodeToString(imageData)
+	return base64Data, mimeType, nil
+}
+
+func NewGeminiImageClient(baseURL, apiKey, model, endpoint string) *GeminiImageClient {
+	if baseURL == "" {
+		baseURL = "https://generativelanguage.googleapis.com"
+	}
+	if endpoint == "" {
+		endpoint = "/v1beta/models/{model}:generateContent"
+	}
+	if model == "" {
+		model = "gemini-3-pro-image-preview"
+	}
+	return &GeminiImageClient{
+		BaseURL:  baseURL,
+		APIKey:   apiKey,
+		Model:    model,
+		Endpoint: endpoint,
+		HTTPClient: &http.Client{
+			Timeout: 10 * time.Minute,
+		},
+	}
+}
+
+func (c *GeminiImageClient) GenerateImage(prompt string, opts ...ImageOption) (*ImageResult, error) {
+	options := &ImageOptions{
+		Size:    "1024x1024",
+		Quality: "standard",
+	}
+
+	for _, opt := range opts {
+		opt(options)
+	}
+
+	model := c.Model
+	if options.Model != "" {
+		model = options.Model
+	}
+
+	promptText := prompt
+	if options.NegativePrompt != "" {
+		promptText += fmt.Sprintf("\n\nNegative prompt: %s", options.NegativePrompt)
+	}
+	if options.Size != "" {
+		promptText += fmt.Sprintf("\n\nImage size: %s", options.Size)
+	}
+
+	// 构建请求的 parts，支持参考图
+	parts := []GeminiPart{}
+
+	// 如果有参考图，先添加参考图
+	if len(options.ReferenceImages) > 0 {
+		for _, refImg := range options.ReferenceImages {
+			var base64Data string
+			var mimeType string
+			var err error
+
+			// 检查是否是 HTTP/HTTPS URL
+			if strings.HasPrefix(refImg, "http://") || strings.HasPrefix(refImg, "https://") {
+				// 下载图片并转换为 base64
+				base64Data, mimeType, err = downloadImageToBase64(refImg)
+				if err != nil {
+					continue
+				}
+			} else if strings.HasPrefix(refImg, "data:") {
+				// 如果是 data URI 格式，需要解析
+				// 格式: data:image/jpeg;base64,xxxxx
+				mimeType = "image/jpeg"
+				parts := []byte(refImg)
+				for i := 0; i < len(parts); i++ {
+					if parts[i] == ',' {
+						base64Data = refImg[i+1:]
+						// 提取 mime type
+						if i > 11 {
+							mimeTypeEnd := i
+							for j := 5; j < i; j++ {
+								if parts[j] == ';' {
+									mimeTypeEnd = j
+									break
+								}
+							}
+							mimeType = refImg[5:mimeTypeEnd]
+						}
+						break
+					}
+				}
+			} else {
+				// 假设已经是 base64 编码
+				base64Data = refImg
+				mimeType = "image/jpeg"
+			}
+
+			if base64Data != "" {
+				parts = append(parts, GeminiPart{
+					InlineData: &GeminiInlineData{
+						MimeType: mimeType,
+						Data:     base64Data,
+					},
+				})
+			}
+		}
+	}
+
+	// 添加文本提示词
+	parts = append(parts, GeminiPart{
+		Text: promptText,
+	})
+
+	reqBody := GeminiImageRequest{
+		Contents: []struct {
+			Parts []GeminiPart `json:"parts"`
+		}{
+			{
+				Parts: parts,
+			},
+		},
+		GenerationConfig: struct {
+			ResponseModalities []string `json:"responseModalities"`
+		}{
+			ResponseModalities: []string{"IMAGE"},
+		},
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	endpoint := c.BaseURL + c.Endpoint
+	endpoint = replaceModelPlaceholder(endpoint, model)
+	url := fmt.Sprintf("%s?key=%s", endpoint, c.APIKey)
+
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.HTTPClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		bodyStr := string(body)
+		if len(bodyStr) > 1000 {
+			bodyStr = fmt.Sprintf("%s ... %s", bodyStr[:500], bodyStr[len(bodyStr)-500:])
+		}
+		return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, bodyStr)
+	}
+
+	var result GeminiImageResponse
+	if err := json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("parse response: %w", err)
+	}
+
+	if len(result.Candidates) == 0 || len(result.Candidates[0].Content.Parts) == 0 {
+		return nil, fmt.Errorf("no image generated in response")
+	}
+
+	base64Data := result.Candidates[0].Content.Parts[0].InlineData.Data
+	if base64Data == "" {
+		return nil, fmt.Errorf("no base64 image data in response")
+	}
+
+	dataURI := fmt.Sprintf("data:image/jpeg;base64,%s", base64Data)
+
+	return &ImageResult{
+		Status:    "completed",
+		ImageURL:  dataURI,
+		Completed: true,
+		Width:     1024,
+		Height:    1024,
+	}, nil
+}
+
+func (c *GeminiImageClient) GetTaskStatus(taskID string) (*ImageResult, error) {
+	return nil, fmt.Errorf("not supported for Gemini (synchronous generation)")
+}
+
+func replaceModelPlaceholder(endpoint, model string) string {
+	result := endpoint
+	if bytes.Contains([]byte(result), []byte("{model}")) {
+		result = string(bytes.ReplaceAll([]byte(result), []byte("{model}"), []byte(model)))
+	}
+	return result
+}
--- a/pkg/image/image_client.go
+++ b/pkg/image/image_client.go
@@ -1,14 +1,5 @@
 package image

-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io"
-	"net/http"
-	"time"
-)
-
 type ImageClient interface {
 	GenerateImage(prompt string, opts ...ImageOption) (*ImageResult, error)
 	GetTaskStatus(taskID string) (*ImageResult, error)
@@ -100,285 +91,3 @@ func WithReferenceImages(images []string) ImageOption {
 		o.ReferenceImages = images
 	}
 }
-
-type OpenAIImageClient struct {
-	BaseURL    string
-	APIKey     string
-	Model      string
-	HTTPClient *http.Client
-}
-
-type DALLERequest struct {
-	Model   string   `json:"model"`
-	Prompt  string   `json:"prompt"`
-	Size    string   `json:"size,omitempty"`
-	Quality string   `json:"quality,omitempty"`
-	N       int      `json:"n"`
-	Image   []string `json:"image,omitempty"` // 参考图片URL列表
-}
-
-type DALLEResponse struct {
-	Created int64 `json:"created"`
-	Data    []struct {
-		URL           string `json:"url"`
-		RevisedPrompt string `json:"revised_prompt,omitempty"`
-	} `json:"data"`
-}
-
-func NewOpenAIImageClient(baseURL, apiKey, model string) *OpenAIImageClient {
-	return &OpenAIImageClient{
-		BaseURL: baseURL,
-		APIKey:  apiKey,
-		Model:   model,
-		HTTPClient: &http.Client{
-			Timeout: 10 * time.Minute,
-		},
-	}
-}
-
-func (c *OpenAIImageClient) GenerateImage(prompt string, opts ...ImageOption) (*ImageResult, error) {
-	options := &ImageOptions{
-		Size:    "1920x1920",
-		Quality: "standard",
-	}
-
-	for _, opt := range opts {
-		opt(options)
-	}
-
-	model := c.Model
-	if options.Model != "" {
-		model = options.Model
-	}
-
-	reqBody := DALLERequest{
-		Model:   model,
-		Prompt:  prompt,
-		Size:    options.Size,
-		Quality: options.Quality,
-		N:       1,
-		Image:   options.ReferenceImages,
-	}
-
-	jsonData, err := json.Marshal(reqBody)
-	if err != nil {
-		return nil, fmt.Errorf("marshal request: %w", err)
-	}
-
-	endpoint := c.BaseURL + "/v1/images/generations"
-	req, err := http.NewRequest("POST", endpoint, bytes.NewBuffer(jsonData))
-	if err != nil {
-		return nil, fmt.Errorf("create request: %w", err)
-	}
-
-	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("Authorization", "Bearer "+c.APIKey)
-
-	resp, err := c.HTTPClient.Do(req)
-	if err != nil {
-		return nil, fmt.Errorf("send request: %w", err)
-	}
-	defer resp.Body.Close()
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return nil, fmt.Errorf("read response: %w", err)
-	}
-
-	if resp.StatusCode != http.StatusOK {
-		return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
-	}
-
-	// 打印原始响应以便调试
-	fmt.Printf("OpenAI API Response: %s\n", string(body))
-
-	var result DALLEResponse
-	if err := json.Unmarshal(body, &result); err != nil {
-		return nil, fmt.Errorf("parse response: %w, body: %s", err, string(body))
-	}
-
-	if len(result.Data) == 0 {
-		return nil, fmt.Errorf("no image generated, response: %s", string(body))
-	}
-
-	return &ImageResult{
-		Status:    "completed",
-		ImageURL:  result.Data[0].URL,
-		Completed: true,
-	}, nil
-}
-
-func (c *OpenAIImageClient) GetTaskStatus(taskID string) (*ImageResult, error) {
-	return nil, fmt.Errorf("not supported for OpenAI/DALL-E")
-}
-
-type StableDiffusionClient struct {
-	BaseURL    string
-	APIKey     string
-	Model      string
-	HTTPClient *http.Client
-}
-
-type SDRequest struct {
-	Prompt         string   `json:"prompt"`
-	NegativePrompt string   `json:"negative_prompt,omitempty"`
-	Model          string   `json:"model,omitempty"`
-	Width          int      `json:"width,omitempty"`
-	Height         int      `json:"height,omitempty"`
-	Steps          int      `json:"steps,omitempty"`
-	CfgScale       float64  `json:"cfg_scale,omitempty"`
-	Seed           int64    `json:"seed,omitempty"`
-	Samples        int      `json:"samples"`
-	Image          []string `json:"image,omitempty"` // 参考图片URL列表
-}
-
-type SDResponse struct {
-	Status string `json:"status"`
-	TaskID string `json:"task_id,omitempty"`
-	Output []struct {
-		URL string `json:"url"`
-	} `json:"output,omitempty"`
-	Error string `json:"error,omitempty"`
-}
-
-func NewStableDiffusionClient(baseURL, apiKey, model string) *StableDiffusionClient {
-	return &StableDiffusionClient{
-		BaseURL: baseURL,
-		APIKey:  apiKey,
-		Model:   model,
-		HTTPClient: &http.Client{
-			Timeout: 10 * time.Minute,
-		},
-	}
-}
-
-func (c *StableDiffusionClient) GenerateImage(prompt string, opts ...ImageOption) (*ImageResult, error) {
-	options := &ImageOptions{
-		Width:    1024,
-		Height:   1024,
-		Steps:    30,
-		CfgScale: 7.5,
-	}
-
-	for _, opt := range opts {
-		opt(options)
-	}
-
-	model := c.Model
-	if options.Model != "" {
-		model = options.Model
-	}
-
-	reqBody := SDRequest{
-		Prompt:         prompt,
-		NegativePrompt: options.NegativePrompt,
-		Model:          model,
-		Width:          options.Width,
-		Height:         options.Height,
-		Steps:          options.Steps,
-		CfgScale:       options.CfgScale,
-		Seed:           options.Seed,
-		Samples:        1,
-		Image:          options.ReferenceImages,
-	}
-
-	jsonData, err := json.Marshal(reqBody)
-	if err != nil {
-		return nil, fmt.Errorf("marshal request: %w", err)
-	}
-
-	endpoint := c.BaseURL + "/v1/images/generations"
-	req, err := http.NewRequest("POST", endpoint, bytes.NewBuffer(jsonData))
-	if err != nil {
-		return nil, fmt.Errorf("create request: %w", err)
-	}
-
-	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("Authorization", "Bearer "+c.APIKey)
-
-	resp, err := c.HTTPClient.Do(req)
-	if err != nil {
-		return nil, fmt.Errorf("send request: %w", err)
-	}
-	defer resp.Body.Close()
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return nil, fmt.Errorf("read response: %w", err)
-	}
-
-	if resp.StatusCode != http.StatusOK {
-		return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
-	}
-
-	var result SDResponse
-	if err := json.Unmarshal(body, &result); err != nil {
-		return nil, fmt.Errorf("parse response: %w", err)
-	}
-
-	if result.Error != "" {
-		return nil, fmt.Errorf("SD error: %s", result.Error)
-	}
-
-	if result.Status == "processing" {
-		return &ImageResult{
-			TaskID:    result.TaskID,
-			Status:    "processing",
-			Completed: false,
-		}, nil
-	}
-
-	if len(result.Output) == 0 {
-		return nil, fmt.Errorf("no image generated")
-	}
-
-	return &ImageResult{
-		Status:    "completed",
-		ImageURL:  result.Output[0].URL,
-		Width:     options.Width,
-		Height:    options.Height,
-		Completed: true,
-	}, nil
-}
-
-func (c *StableDiffusionClient) GetTaskStatus(taskID string) (*ImageResult, error) {
-	endpoint := c.BaseURL + "/v1/images/status/" + taskID
-	req, err := http.NewRequest("GET", endpoint, nil)
-	if err != nil {
-		return nil, fmt.Errorf("create request: %w", err)
-	}
-
-	req.Header.Set("Authorization", "Bearer "+c.APIKey)
-
-	resp, err := c.HTTPClient.Do(req)
-	if err != nil {
-		return nil, fmt.Errorf("send request: %w", err)
-	}
-	defer resp.Body.Close()
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return nil, fmt.Errorf("read response: %w", err)
-	}
-
-	var result SDResponse
-	if err := json.Unmarshal(body, &result); err != nil {
-		return nil, fmt.Errorf("parse response: %w", err)
-	}
-
-	imageResult := &ImageResult{
-		TaskID:    taskID,
-		Status:    result.Status,
-		Completed: result.Status == "completed",
-	}
-
-	if result.Error != "" {
-		imageResult.Error = result.Error
-	}
-
-	if len(result.Output) > 0 {
-		imageResult.ImageURL = result.Output[0].URL
-	}
-
-	return imageResult, nil
-}
--- a/pkg/image/openai_image_client.go
+++ b/pkg/image/openai_image_client.go
@@ -0,0 +1,128 @@
+package image
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+)
+
+type OpenAIImageClient struct {
+	BaseURL    string
+	APIKey     string
+	Model      string
+	Endpoint   string
+	HTTPClient *http.Client
+}
+
+type DALLERequest struct {
+	Model   string   `json:"model"`
+	Prompt  string   `json:"prompt"`
+	Size    string   `json:"size,omitempty"`
+	Quality string   `json:"quality,omitempty"`
+	N       int      `json:"n"`
+	Image   []string `json:"image,omitempty"`
+}
+
+type DALLEResponse struct {
+	Created int64 `json:"created"`
+	Data    []struct {
+		URL           string `json:"url"`
+		RevisedPrompt string `json:"revised_prompt,omitempty"`
+	} `json:"data"`
+}
+
+func NewOpenAIImageClient(baseURL, apiKey, model, endpoint string) *OpenAIImageClient {
+	if endpoint == "" {
+		endpoint = "/v1/images/generations"
+	}
+	return &OpenAIImageClient{
+		BaseURL:  baseURL,
+		APIKey:   apiKey,
+		Model:    model,
+		Endpoint: endpoint,
+		HTTPClient: &http.Client{
+			Timeout: 10 * time.Minute,
+		},
+	}
+}
+
+func (c *OpenAIImageClient) GenerateImage(prompt string, opts ...ImageOption) (*ImageResult, error) {
+	options := &ImageOptions{
+		Size:    "1920x1920",
+		Quality: "standard",
+	}
+
+	for _, opt := range opts {
+		opt(options)
+	}
+
+	model := c.Model
+	if options.Model != "" {
+		model = options.Model
+	}
+
+	reqBody := DALLERequest{
+		Model:   model,
+		Prompt:  prompt,
+		Size:    options.Size,
+		Quality: options.Quality,
+		N:       1,
+		Image:   options.ReferenceImages,
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	url := c.BaseURL + c.Endpoint
+	fmt.Printf("[OpenAI Image] Request URL: %s\n", url)
+	fmt.Printf("[OpenAI Image] Request Body: %s\n", string(jsonData))
+
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+c.APIKey)
+
+	resp, err := c.HTTPClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
+	}
+
+	fmt.Printf("OpenAI API Response: %s\n", string(body))
+
+	var result DALLEResponse
+	if err := json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("parse response: %w, body: %s", err, string(body))
+	}
+
+	if len(result.Data) == 0 {
+		return nil, fmt.Errorf("no image generated, response: %s", string(body))
+	}
+
+	return &ImageResult{
+		Status:    "completed",
+		ImageURL:  result.Data[0].URL,
+		Completed: true,
+	}, nil
+}
+
+func (c *OpenAIImageClient) GetTaskStatus(taskID string) (*ImageResult, error) {
+	return nil, fmt.Errorf("not supported for OpenAI/DALL-E")
+}
--- a/pkg/image/volcengine_image_client.go
+++ b/pkg/image/volcengine_image_client.go
@@ -0,0 +1,158 @@
+package image
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+)
+
+type VolcEngineImageClient struct {
+	BaseURL       string
+	APIKey        string
+	Model         string
+	Endpoint      string
+	QueryEndpoint string
+	HTTPClient    *http.Client
+}
+
+type VolcEngineImageRequest struct {
+	Model                     string   `json:"model"`
+	Prompt                    string   `json:"prompt"`
+	Image                     []string `json:"image,omitempty"`
+	SequentialImageGeneration string   `json:"sequential_image_generation,omitempty"`
+	Size                      string   `json:"size,omitempty"`
+	Watermark                 bool     `json:"watermark,omitempty"`
+}
+
+type VolcEngineImageResponse struct {
+	Model   string `json:"model"`
+	Created int64  `json:"created"`
+	Data    []struct {
+		URL  string `json:"url"`
+		Size string `json:"size"`
+	} `json:"data"`
+	Usage struct {
+		GeneratedImages int `json:"generated_images"`
+		OutputTokens    int `json:"output_tokens"`
+		TotalTokens     int `json:"total_tokens"`
+	} `json:"usage"`
+	Error interface{} `json:"error,omitempty"`
+}
+
+func NewVolcEngineImageClient(baseURL, apiKey, model, endpoint, queryEndpoint string) *VolcEngineImageClient {
+	if endpoint == "" {
+		endpoint = "/api/v3/images/generations"
+	}
+	if queryEndpoint == "" {
+		queryEndpoint = endpoint
+	}
+	return &VolcEngineImageClient{
+		BaseURL:       baseURL,
+		APIKey:        apiKey,
+		Model:         model,
+		Endpoint:      endpoint,
+		QueryEndpoint: queryEndpoint,
+		HTTPClient: &http.Client{
+			Timeout: 10 * time.Minute,
+		},
+	}
+}
+
+func (c *VolcEngineImageClient) GenerateImage(prompt string, opts ...ImageOption) (*ImageResult, error) {
+	options := &ImageOptions{
+		Size:    "1024x1024",
+		Quality: "standard",
+	}
+
+	for _, opt := range opts {
+		opt(options)
+	}
+
+	model := c.Model
+	if options.Model != "" {
+		model = options.Model
+	}
+
+	promptText := prompt
+	if options.NegativePrompt != "" {
+		promptText += fmt.Sprintf(". Negative: %s", options.NegativePrompt)
+	}
+
+	size := options.Size
+	if size == "" {
+		if model == "doubao-seedream-4-5-251128" {
+			size = "2K"
+		} else {
+			size = "1K"
+		}
+	}
+
+	reqBody := VolcEngineImageRequest{
+		Model:                     model,
+		Prompt:                    promptText,
+		Image:                     options.ReferenceImages,
+		SequentialImageGeneration: "disabled",
+		Size:                      size,
+		Watermark:                 false,
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	url := c.BaseURL + c.Endpoint
+	fmt.Printf("[VolcEngine Image] Request URL: %s\n", url)
+	fmt.Printf("[VolcEngine Image] Request Body: %s\n", string(jsonData))
+
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+c.APIKey)
+
+	resp, err := c.HTTPClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("read response: %w", err)
+	}
+
+	fmt.Printf("VolcEngine Image API Response: %s\n", string(body))
+
+	if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated {
+		return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
+	}
+
+	var result VolcEngineImageResponse
+	if err := json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("parse response: %w", err)
+	}
+
+	if result.Error != nil {
+		return nil, fmt.Errorf("volcengine error: %v", result.Error)
+	}
+
+	if len(result.Data) == 0 {
+		return nil, fmt.Errorf("no image generated")
+	}
+
+	return &ImageResult{
+		Status:    "completed",
+		ImageURL:  result.Data[0].URL,
+		Completed: true,
+	}, nil
+}
+
+func (c *VolcEngineImageClient) GetTaskStatus(taskID string) (*ImageResult, error) {
+	return nil, fmt.Errorf("not supported for VolcEngine Seedream (synchronous generation)")
+}
--- a/pkg/video/chatfire_client.go
+++ b/pkg/video/chatfire_client.go
@@ -0,0 +1,184 @@
+package video
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+)
+
+// ChatfireClient Chatfire 视频生成客户端
+type ChatfireClient struct {
+	BaseURL       string
+	APIKey        string
+	Model         string
+	Endpoint      string
+	QueryEndpoint string
+	HTTPClient    *http.Client
+}
+
+type ChatfireRequest struct {
+	Model    string `json:"model"`
+	Prompt   string `json:"prompt"`
+	ImageURL string `json:"image_url,omitempty"`
+	Duration int    `json:"duration,omitempty"`
+	Size     string `json:"size,omitempty"`
+}
+
+type ChatfireResponse struct {
+	TaskID string `json:"task_id"`
+	Status string `json:"status"`
+	Error  string `json:"error,omitempty"`
+}
+
+type ChatfireTaskResponse struct {
+	TaskID   string `json:"task_id"`
+	Status   string `json:"status"`
+	VideoURL string `json:"video_url,omitempty"`
+	Error    string `json:"error,omitempty"`
+}
+
+func NewChatfireClient(baseURL, apiKey, model, endpoint, queryEndpoint string) *ChatfireClient {
+	if endpoint == "" {
+		endpoint = "/video/generations"
+	}
+	if queryEndpoint == "" {
+		queryEndpoint = "/v1/video/task/{taskId}"
+	}
+	return &ChatfireClient{
+		BaseURL:       baseURL,
+		APIKey:        apiKey,
+		Model:         model,
+		Endpoint:      endpoint,
+		QueryEndpoint: queryEndpoint,
+		HTTPClient: &http.Client{
+			Timeout: 300 * time.Second,
+		},
+	}
+}
+
+func (c *ChatfireClient) GenerateVideo(imageURL, prompt string, opts ...VideoOption) (*VideoResult, error) {
+	options := &VideoOptions{
+		Duration:    5,
+		AspectRatio: "16:9",
+	}
+
+	for _, opt := range opts {
+		opt(options)
+	}
+
+	model := c.Model
+	if options.Model != "" {
+		model = options.Model
+	}
+
+	reqBody := ChatfireRequest{
+		Model:    model,
+		Prompt:   prompt,
+		ImageURL: imageURL,
+		Duration: options.Duration,
+		Size:     options.AspectRatio,
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	endpoint := c.BaseURL + c.Endpoint
+	req, err := http.NewRequest("POST", endpoint, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+c.APIKey)
+
+	resp, err := c.HTTPClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated {
+		return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
+	}
+
+	var result ChatfireResponse
+	if err := json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("parse response: %w", err)
+	}
+
+	if result.Error != "" {
+		return nil, fmt.Errorf("chatfire error: %s", result.Error)
+	}
+
+	videoResult := &VideoResult{
+		TaskID:    result.TaskID,
+		Status:    result.Status,
+		Completed: result.Status == "completed" || result.Status == "succeeded",
+		Duration:  options.Duration,
+	}
+
+	return videoResult, nil
+}
+
+func (c *ChatfireClient) GetTaskStatus(taskID string) (*VideoResult, error) {
+	queryPath := c.QueryEndpoint
+	if strings.Contains(queryPath, "{taskId}") {
+		queryPath = strings.ReplaceAll(queryPath, "{taskId}", taskID)
+	} else if strings.Contains(queryPath, "{task_id}") {
+		queryPath = strings.ReplaceAll(queryPath, "{task_id}", taskID)
+	} else {
+		queryPath = queryPath + "/" + taskID
+	}
+
+	endpoint := c.BaseURL + queryPath
+	req, err := http.NewRequest("GET", endpoint, nil)
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+
+	req.Header.Set("Authorization", "Bearer "+c.APIKey)
+
+	resp, err := c.HTTPClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("read response: %w", err)
+	}
+
+	var result ChatfireTaskResponse
+	if err := json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("parse response: %w", err)
+	}
+
+	videoResult := &VideoResult{
+		TaskID:    result.TaskID,
+		Status:    result.Status,
+		Completed: result.Status == "completed" || result.Status == "succeeded",
+	}
+
+	if result.Error != "" {
+		videoResult.Error = result.Error
+	}
+
+	if result.VideoURL != "" {
+		videoResult.VideoURL = result.VideoURL
+		videoResult.Completed = true
+	}
+
+	return videoResult, nil
+}
--- a/pkg/video/openai_sora_client.go
+++ b/pkg/video/openai_sora_client.go
@@ -83,7 +83,7 @@ func (c *OpenAISoraClient) GenerateVideo(imageURL, prompt string, opts ...VideoO

 	writer.Close()

-	endpoint := c.BaseURL + "/v1/videos"
+	endpoint := c.BaseURL + "/videos"
 	req, err := http.NewRequest("POST", endpoint, body)
 	if err != nil {
 		return nil, fmt.Errorf("create request: %w", err)