draft init
This commit is contained in:
168
phone_agent/model/client.py
Normal file
168
phone_agent/model/client.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""Model client for AI inference using OpenAI-compatible API."""
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelConfig:
|
||||
"""Configuration for the AI model."""
|
||||
|
||||
base_url: str = "http://localhost:8000/v1"
|
||||
api_key: str = "EMPTY"
|
||||
model_name: str = "autoglm-phone-9b"
|
||||
max_tokens: int = 3000
|
||||
temperature: float = 0.0
|
||||
top_p: float = 0.85
|
||||
frequency_penalty: float = 0.2
|
||||
extra_body: dict[str, Any] = field(
|
||||
default_factory=lambda: {"skip_special_tokens": False}
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelResponse:
|
||||
"""Response from the AI model."""
|
||||
|
||||
thinking: str
|
||||
action: str
|
||||
raw_content: str
|
||||
|
||||
|
||||
class ModelClient:
|
||||
"""
|
||||
Client for interacting with OpenAI-compatible vision-language models.
|
||||
|
||||
Args:
|
||||
config: Model configuration.
|
||||
"""
|
||||
|
||||
def __init__(self, config: ModelConfig | None = None):
|
||||
self.config = config or ModelConfig()
|
||||
self.client = OpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
|
||||
|
||||
def request(self, messages: list[dict[str, Any]]) -> ModelResponse:
|
||||
"""
|
||||
Send a request to the model.
|
||||
|
||||
Args:
|
||||
messages: List of message dictionaries in OpenAI format.
|
||||
|
||||
Returns:
|
||||
ModelResponse containing thinking and action.
|
||||
|
||||
Raises:
|
||||
ValueError: If the response cannot be parsed.
|
||||
"""
|
||||
response = self.client.chat.completions.create(
|
||||
messages=messages,
|
||||
model=self.config.model_name,
|
||||
max_tokens=self.config.max_tokens,
|
||||
temperature=self.config.temperature,
|
||||
top_p=self.config.top_p,
|
||||
frequency_penalty=self.config.frequency_penalty,
|
||||
extra_body=self.config.extra_body,
|
||||
)
|
||||
|
||||
raw_content = response.choices[0].message.content
|
||||
|
||||
# Parse thinking and action from response
|
||||
thinking, action = self._parse_response(raw_content)
|
||||
|
||||
return ModelResponse(thinking=thinking, action=action, raw_content=raw_content)
|
||||
|
||||
def _parse_response(self, content: str) -> tuple[str, str]:
|
||||
"""
|
||||
Parse the model response into thinking and action parts.
|
||||
|
||||
Args:
|
||||
content: Raw response content.
|
||||
|
||||
Returns:
|
||||
Tuple of (thinking, action).
|
||||
"""
|
||||
if "<answer>" not in content:
|
||||
return "", content
|
||||
|
||||
parts = content.split("<answer>", 1)
|
||||
thinking = parts[0].replace("<think>", "").replace("</think>", "").strip()
|
||||
action = parts[1].replace("</answer>", "").strip()
|
||||
|
||||
return thinking, action
|
||||
|
||||
|
||||
class MessageBuilder:
|
||||
"""Helper class for building conversation messages."""
|
||||
|
||||
@staticmethod
|
||||
def create_system_message(content: str) -> dict[str, Any]:
|
||||
"""Create a system message."""
|
||||
return {"role": "system", "content": content}
|
||||
|
||||
@staticmethod
|
||||
def create_user_message(
|
||||
text: str, image_base64: str | None = None
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Create a user message with optional image.
|
||||
|
||||
Args:
|
||||
text: Text content.
|
||||
image_base64: Optional base64-encoded image.
|
||||
|
||||
Returns:
|
||||
Message dictionary.
|
||||
"""
|
||||
content = []
|
||||
|
||||
if image_base64:
|
||||
content.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
||||
}
|
||||
)
|
||||
|
||||
content.append({"type": "text", "text": text})
|
||||
|
||||
return {"role": "user", "content": content}
|
||||
|
||||
@staticmethod
|
||||
def create_assistant_message(content: str) -> dict[str, Any]:
|
||||
"""Create an assistant message."""
|
||||
return {"role": "assistant", "content": content}
|
||||
|
||||
@staticmethod
|
||||
def remove_images_from_message(message: dict[str, Any]) -> dict[str, Any]:
|
||||
"""
|
||||
Remove image content from a message to save context space.
|
||||
|
||||
Args:
|
||||
message: Message dictionary.
|
||||
|
||||
Returns:
|
||||
Message with images removed.
|
||||
"""
|
||||
if isinstance(message.get("content"), list):
|
||||
message["content"] = [
|
||||
item for item in message["content"] if item.get("type") == "text"
|
||||
]
|
||||
return message
|
||||
|
||||
@staticmethod
|
||||
def build_screen_info(current_app: str, **extra_info) -> str:
|
||||
"""
|
||||
Build screen info string for the model.
|
||||
|
||||
Args:
|
||||
current_app: Current app name.
|
||||
**extra_info: Additional info to include.
|
||||
|
||||
Returns:
|
||||
JSON string with screen info.
|
||||
"""
|
||||
info = {"current_app": current_app, **extra_info}
|
||||
return json.dumps(info, ensure_ascii=False)
|
||||
Reference in New Issue
Block a user