优化LLM返回结构化数据的逻辑

This commit is contained in:
puke
2025-12-28 15:06:34 +08:00
parent 1530d1f8c8
commit c449a6dfe8

View File

@@ -215,8 +215,8 @@ class LLMService:
""" """
Call LLM with structured output support Call LLM with structured output support
Tries OpenAI beta.chat.completions.parse first, falls back to JSON parsing Uses JSON schema instruction appended to prompt for maximum compatibility
if the provider doesn't support structured outputs. across all OpenAI-compatible providers (Qwen, DeepSeek, etc.).
Args: Args:
client: OpenAI client client: OpenAI client
@@ -230,42 +230,54 @@ class LLMService:
Returns: Returns:
Parsed Pydantic model instance Parsed Pydantic model instance
""" """
# Try OpenAI structured output API first (beta.chat.completions.parse) # Build JSON schema instruction and append to prompt
try: json_schema_instruction = self._get_json_schema_instruction(response_type)
response = await client.beta.chat.completions.parse( enhanced_prompt = f"{prompt}\n\n{json_schema_instruction}"
model=model,
messages=[{"role": "user", "content": prompt}],
response_format=response_type,
temperature=temperature,
max_tokens=max_tokens,
**kwargs
)
parsed = response.choices[0].message.parsed # Call LLM with enhanced prompt
if parsed is not None: response = await client.chat.completions.create(
logger.debug(f"Structured output parsed successfully via beta API") model=model,
return parsed messages=[{"role": "user", "content": enhanced_prompt}],
temperature=temperature,
max_tokens=max_tokens,
**kwargs
)
content = response.choices[0].message.content
# If parsed is None, fall through to fallback logger.debug(f"Structured output response length: {len(content)} chars")
logger.warning("Structured output API returned None, falling back to JSON parsing")
content = response.choices[0].message.content
except Exception as e: # Parse JSON from response content
# If beta API not supported, fall back to JSON mode
logger.debug(f"Structured output API not available ({e}), falling back to JSON parsing")
response = await client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=temperature,
max_tokens=max_tokens,
**kwargs
)
content = response.choices[0].message.content
# Fallback: Parse JSON from response content
return self._parse_response_as_model(content, response_type) return self._parse_response_as_model(content, response_type)
def _get_json_schema_instruction(self, response_type: Type[T]) -> str:
"""
Generate JSON schema instruction for LLM fallback mode
Args:
response_type: Pydantic model class
Returns:
Formatted instruction string with JSON schema
"""
try:
# Get JSON schema from Pydantic model
schema = response_type.model_json_schema()
schema_str = json.dumps(schema, indent=2, ensure_ascii=False)
return f"""## IMPORTANT: JSON Output Format Required
You MUST respond with ONLY a valid JSON object (no markdown, no extra text).
The JSON must strictly follow this schema:
```json
{schema_str}
```
Output ONLY the JSON object, nothing else."""
except Exception as e:
logger.warning(f"Failed to generate JSON schema: {e}")
return """## IMPORTANT: JSON Output Format Required
You MUST respond with ONLY a valid JSON object (no markdown, no extra text)."""
def _parse_response_as_model(self, content: str, response_type: Type[T]) -> T: def _parse_response_as_model(self, content: str, response_type: Type[T]) -> T:
""" """
Parse LLM response content as Pydantic model Parse LLM response content as Pydantic model