feat: Add Qwen VL support for character analysis, configurable via VLM_PROVIDER
This commit is contained in:
@@ -145,29 +145,51 @@ Output ONLY the JSON object, no additional text."""
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
# Get LLM config
|
# Get VLM configuration from environment or fallback to LLM config
|
||||||
from pixelle_video.config import config_manager
|
import os
|
||||||
llm_config = config_manager.config.llm
|
vlm_provider = os.getenv("VLM_PROVIDER", "qwen") # qwen, glm, openai
|
||||||
|
vlm_api_key = os.getenv("VLM_API_KEY") or os.getenv("DASHSCOPE_API_KEY")
|
||||||
|
vlm_base_url = os.getenv("VLM_BASE_URL")
|
||||||
|
vlm_model = os.getenv("VLM_MODEL")
|
||||||
|
|
||||||
# Create OpenAI client for VLM call
|
# Configure based on provider
|
||||||
|
if vlm_provider == "qwen":
|
||||||
|
# 通义千问 Qwen VL
|
||||||
|
vlm_base_url = vlm_base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||||
|
vlm_model = vlm_model or "qwen-vl-plus" # or qwen-vl-max, qwen3-vl-plus
|
||||||
|
logger.info(f"Using Qwen VL: model={vlm_model}")
|
||||||
|
elif vlm_provider == "glm":
|
||||||
|
# 智谱 GLM-4V
|
||||||
|
from pixelle_video.config import config_manager
|
||||||
|
llm_config = config_manager.config.llm
|
||||||
|
vlm_api_key = vlm_api_key or llm_config.api_key
|
||||||
|
vlm_base_url = vlm_base_url or llm_config.base_url
|
||||||
|
vlm_model = vlm_model or "glm-4v-flash"
|
||||||
|
logger.info(f"Using GLM VL: model={vlm_model}")
|
||||||
|
else: # openai or other
|
||||||
|
from pixelle_video.config import config_manager
|
||||||
|
llm_config = config_manager.config.llm
|
||||||
|
vlm_api_key = vlm_api_key or llm_config.api_key
|
||||||
|
vlm_base_url = vlm_base_url or llm_config.base_url
|
||||||
|
vlm_model = vlm_model or llm_config.model
|
||||||
|
logger.info(f"Using {vlm_provider} VL: model={vlm_model}")
|
||||||
|
|
||||||
|
if not vlm_api_key:
|
||||||
|
logger.error("No VLM API key configured. Set VLM_API_KEY or DASHSCOPE_API_KEY environment variable.")
|
||||||
|
return CharacterAnalysisResult()
|
||||||
|
|
||||||
|
# Create OpenAI-compatible client
|
||||||
client = AsyncOpenAI(
|
client = AsyncOpenAI(
|
||||||
api_key=llm_config.api_key,
|
api_key=vlm_api_key,
|
||||||
base_url=llm_config.base_url
|
base_url=vlm_base_url
|
||||||
)
|
)
|
||||||
|
|
||||||
# Use vision model - GLM-4V for ZhiPu, or fall back to configured model
|
|
||||||
# Vision models: glm-4v, glm-4v-flash, gpt-4-vision-preview
|
|
||||||
vision_model = llm_config.model
|
|
||||||
if "glm" in llm_config.model.lower() and "v" not in llm_config.model.lower():
|
|
||||||
vision_model = "glm-4v-flash" # Use GLM-4V for vision tasks
|
|
||||||
logger.info(f"Using vision model: {vision_model}")
|
|
||||||
|
|
||||||
# Call VLM
|
# Call VLM
|
||||||
response = await client.chat.completions.create(
|
response = await client.chat.completions.create(
|
||||||
model=vision_model,
|
model=vlm_model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.3,
|
temperature=0.3,
|
||||||
max_tokens=2000 # Increased to avoid truncation
|
max_tokens=2000
|
||||||
)
|
)
|
||||||
|
|
||||||
vlm_response = response.choices[0].message.content if response.choices else None
|
vlm_response = response.choices[0].message.content if response.choices else None
|
||||||
|
|||||||
Reference in New Issue
Block a user