Add Web Dashboard with multi-device control and callback hooks

Features:
- Web Dashboard: FastAPI-based dashboard with Vue.js frontend
  - Multi-device support (ADB, HDC, iOS)
  - Real-time WebSocket updates for task progress
  - Device management with status tracking
  - Task queue with execution controls (start/stop/re-execute)
  - Detailed task information display (thinking, actions, completion messages)
  - Screenshot viewing per device
  - LAN deployment support with configurable CORS

- Callback Hooks: Interrupt and modify task execution
  - step_callback: Called after each step with StepResult
  - before_action_callback: Called before executing action
  - Support for task interruption and dynamic task switching
  - Example scripts demonstrating callback usage

- Configuration: Environment-based configuration
  - .env file support for all settings
  - .env.example template with documentation
  - Model API configuration (base URL, model name, API key)
  - Dashboard configuration (host, port, CORS, device type)
  - Phone agent configuration (delays, max steps, language)

Technical improvements:
- Fixed forward reference issue with StepResult
- Added package exports for callback types and configs
- Enhanced dependencies with FastAPI, WebSocket support
- Thread-safe task execution with device locking
- Async WebSocket broadcasting from sync thread pool

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
let5sne.win10
2026-01-09 02:20:06 +08:00
parent 9fe189a8f8
commit 3552df23d6
31 changed files with 4221 additions and 8 deletions

View File

@@ -22,6 +22,10 @@ class AgentConfig:
lang: str = "cn"
system_prompt: str | None = None
verbose: bool = True
step_callback: Callable[["StepResult"], str | None] | None = None
"""Callback after each step. Return 'stop' to interrupt, or a new task string to switch."""
before_action_callback: Callable[[dict[str, Any]], dict[str, Any] | None] | None = None
"""Callback before executing action. Return modified action dict, or None to proceed as-is."""
def __post_init__(self):
if self.system_prompt is None:
@@ -37,6 +41,7 @@ class StepResult:
action: dict[str, Any] | None
thinking: str
message: str | None = None
step_count: int = 0
class PhoneAgent:
@@ -52,12 +57,29 @@ class PhoneAgent:
confirmation_callback: Optional callback for sensitive action confirmation.
takeover_callback: Optional callback for takeover requests.
Callbacks in agent_config:
step_callback: Called after each step with StepResult.
- Return 'stop' to interrupt the task
- Return a new task string to switch tasks
- Return None to continue normally
before_action_callback: Called before executing an action with the action dict.
- Return modified action dict to override
- Return None to execute the original action
Example:
>>> from phone_agent import PhoneAgent
>>> from phone_agent import PhoneAgent, AgentConfig
>>> from phone_agent.model import ModelConfig
>>>
>>> # With callback
>>> def on_step(result):
... if result.step_count > 10:
... return "stop" # Interrupt after 10 steps
... return None
>>>
>>> model_config = ModelConfig(base_url="http://localhost:8000/v1")
>>> agent = PhoneAgent(model_config)
>>> agent_config = AgentConfig(step_callback=on_step)
>>> agent = PhoneAgent(model_config, agent_config)
>>> agent.run("Open WeChat and send a message to John")
"""
@@ -184,6 +206,7 @@ class PhoneAgent:
action=None,
thinking="",
message=f"Model error: {e}",
step_count=self._step_count,
)
# Parse action from response
@@ -204,6 +227,16 @@ class PhoneAgent:
# Remove image from context to save space
self._context[-1] = MessageBuilder.remove_images_from_message(self._context[-1])
# Before action callback - allow modifying or intercepting action
if self.agent_config.before_action_callback is not None:
try:
modified_action = self.agent_config.before_action_callback(action)
if modified_action is not None:
action = modified_action
except Exception as e:
if self.agent_config.verbose:
print(f"Warning: before_action_callback error: {e}")
# Execute action
try:
result = self.action_handler.execute(
@@ -234,14 +267,38 @@ class PhoneAgent:
)
print("=" * 50 + "\n")
return StepResult(
# Build step result
step_result = StepResult(
success=result.success,
finished=finished,
action=action,
thinking=response.thinking,
message=result.message or action.get("message"),
step_count=self._step_count,
)
# Step callback - allow interrupting or switching tasks
if self.agent_config.step_callback is not None and not finished:
try:
callback_result = self.agent_config.step_callback(step_result)
if callback_result == "stop":
# Interrupt the task
if self.agent_config.verbose:
print("\n⏹ Task interrupted by callback\n")
step_result.finished = True
return step_result
elif isinstance(callback_result, str):
# Switch to new task
if self.agent_config.verbose:
print(f"\n🔄 Switching to new task: {callback_result}\n")
self.reset()
return self._execute_step(callback_result, is_first=True)
except Exception as e:
if self.agent_config.verbose:
print(f"Warning: step_callback error: {e}")
return step_result
@property
def context(self) -> list[dict[str, Any]]:
"""Get the current conversation context."""