Features: - Web Dashboard: FastAPI-based dashboard with Vue.js frontend - Multi-device support (ADB, HDC, iOS) - Real-time WebSocket updates for task progress - Device management with status tracking - Task queue with execution controls (start/stop/re-execute) - Detailed task information display (thinking, actions, completion messages) - Screenshot viewing per device - LAN deployment support with configurable CORS - Callback Hooks: Interrupt and modify task execution - step_callback: Called after each step with StepResult - before_action_callback: Called before executing action - Support for task interruption and dynamic task switching - Example scripts demonstrating callback usage - Configuration: Environment-based configuration - .env file support for all settings - .env.example template with documentation - Model API configuration (base URL, model name, API key) - Dashboard configuration (host, port, CORS, device type) - Phone agent configuration (delays, max steps, language) Technical improvements: - Fixed forward reference issue with StepResult - Added package exports for callback types and configs - Enhanced dependencies with FastAPI, WebSocket support - Thread-safe task execution with device locking - Async WebSocket broadcasting from sync thread pool Co-Authored-By: Claude <noreply@anthropic.com>
200 lines
6.1 KiB
Python
200 lines
6.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
带回调钩子的命令行工具 / Command-line tool with callback hooks
|
|
|
|
支持通过命令行参数配置回调钩子,实现任务中断和切换。
|
|
|
|
Configuration / 配置:
|
|
Loads settings from .env file (if present).
|
|
从 .env 文件加载配置(如果存在)。
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
from dotenv import load_dotenv
|
|
|
|
# Load .env file for configuration
|
|
# 加载 .env 配置文件
|
|
load_dotenv()
|
|
|
|
from phone_agent import PhoneAgent, AgentConfig
|
|
from phone_agent.model import ModelConfig
|
|
from phone_agent.config import get_messages
|
|
|
|
|
|
def create_step_callback(max_steps: int | None = None, lang: str = "cn"):
|
|
"""创建步数限制回调"""
|
|
if max_steps is None:
|
|
return None
|
|
|
|
def callback(result):
|
|
if result.step_count >= max_steps:
|
|
return "stop"
|
|
return None
|
|
|
|
return callback
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Phone Agent with Callback Hooks",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# 基本用法(与 main.py 相同)
|
|
python run_with_callbacks.py "打开微信"
|
|
|
|
# 限制最大执行步数
|
|
python run_with_callbacks.py "打开微信" --max-steps 5
|
|
|
|
# 使用智谱 API
|
|
python run_with_callbacks.py "打开微信" \\
|
|
--base-url https://open.bigmodel.cn/api/paas/v4 \\
|
|
--model autoglm-phone \\
|
|
--apikey your-key
|
|
|
|
# 交互模式(输入新任务可切换)
|
|
python run_with_callbacks.py --interactive
|
|
"""
|
|
)
|
|
|
|
# 模型配置
|
|
parser.add_argument("--base-url", default=os.getenv("PHONE_AGENT_BASE_URL", "http://localhost:8000/v1"), help="模型 API 地址")
|
|
parser.add_argument("--model", default=os.getenv("PHONE_AGENT_MODEL", "autoglm-phone-9b"), help="模型名称")
|
|
parser.add_argument("--apikey", default=os.getenv("PHONE_AGENT_API_KEY", "EMPTY"), help="API 密钥")
|
|
|
|
# Agent 配置
|
|
parser.add_argument("--max-steps", type=int, default=int(os.getenv("PHONE_AGENT_MAX_STEPS", "100")), help="最大执行步数")
|
|
parser.add_argument("--device-id", default=os.getenv("PHONE_AGENT_DEVICE_ID"), help="设备 ID")
|
|
parser.add_argument("--lang", choices=["cn", "en"], default=os.getenv("PHONE_AGENT_LANG", "cn"), help="语言")
|
|
|
|
# 回调配置
|
|
parser.add_argument("--callback-max-steps", type=int, help="回调强制中断的步数")
|
|
parser.add_argument("--interactive", action="store_true", help="交互模式(支持动态切换任务)")
|
|
|
|
# 任务参数
|
|
parser.add_argument("task", nargs="?", help="要执行的任务")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# 创建模型配置
|
|
model_config = ModelConfig(
|
|
base_url=args.base_url,
|
|
model_name=args.model,
|
|
api_key=args.apikey,
|
|
lang=args.lang,
|
|
)
|
|
|
|
# 创建回调
|
|
step_callback = create_step_callback(args.callback_max_steps, args.lang)
|
|
|
|
# 交互模式回调
|
|
if args.interactive:
|
|
import threading
|
|
import queue
|
|
|
|
task_queue = queue.Queue()
|
|
input_ready = threading.Event()
|
|
|
|
def input_listener():
|
|
"""后台监听用户输入"""
|
|
print("\n[交互模式] 在任务执行时可以输入:")
|
|
print(" 's' + Enter - 停止当前任务")
|
|
print(" 'n:新任务' + Enter - 切换到新任务")
|
|
print("-" * 50)
|
|
|
|
while True:
|
|
try:
|
|
cmd = input().strip()
|
|
if cmd.lower() == 's':
|
|
task_queue.put("stop")
|
|
elif cmd.lower().startswith('n:'):
|
|
task_queue.put(cmd[2:])
|
|
except (EOFError, KeyboardInterrupt):
|
|
break
|
|
|
|
threading.Thread(target=input_listener, daemon=True).start()
|
|
|
|
def interactive_callback(result):
|
|
"""交互式回调"""
|
|
try:
|
|
cmd = task_queue.get_nowait()
|
|
if cmd == "stop":
|
|
return "stop"
|
|
return cmd
|
|
except queue.Empty:
|
|
return None
|
|
|
|
step_callback = interactive_callback
|
|
|
|
# 创建 Agent 配置
|
|
agent_config = AgentConfig(
|
|
max_steps=args.max_steps,
|
|
device_id=args.device_id,
|
|
lang=args.lang,
|
|
step_callback=step_callback,
|
|
)
|
|
|
|
# 创建 Agent
|
|
agent = PhoneAgent(model_config=model_config, agent_config=agent_config)
|
|
|
|
# 打印配置信息
|
|
msgs = get_messages(args.lang)
|
|
print("=" * 50)
|
|
print("Phone Agent with Callback Hooks")
|
|
print("=" * 50)
|
|
print(f"Model: {args.model}")
|
|
print(f"Base URL: {args.base_url}")
|
|
print(f"Max Steps: {args.max_steps}")
|
|
if args.callback_max_steps:
|
|
print(f"Callback Max Steps: {args.callback_max_steps} (强制中断)")
|
|
print("=" * 50)
|
|
|
|
# 执行任务
|
|
if args.task:
|
|
print(f"\nTask: {args.task}\n")
|
|
result = agent.run(args.task)
|
|
print(f"\nResult: {result}")
|
|
elif args.interactive:
|
|
# 交互模式循环
|
|
print("\n输入任务 (或 'quit' 退出):\n")
|
|
while True:
|
|
try:
|
|
task = input("> ").strip()
|
|
if task.lower() in ("quit", "exit", "q"):
|
|
break
|
|
if not task:
|
|
continue
|
|
|
|
print(f"\n执行: {task}\n")
|
|
result = agent.run(task)
|
|
print(f"\n结果: {result}\n")
|
|
agent.reset()
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n\nGoodbye!")
|
|
break
|
|
else:
|
|
# 默认交互模式
|
|
print("\n输入任务 (或 'quit' 退出):\n")
|
|
while True:
|
|
try:
|
|
task = input("> ").strip()
|
|
if task.lower() in ("quit", "exit", "q"):
|
|
break
|
|
if not task:
|
|
continue
|
|
|
|
result = agent.run(task)
|
|
print(f"\n结果: {result}\n")
|
|
agent.reset()
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n\nGoodbye!")
|
|
break
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|