Files
Open-AutoGLM/examples/run_with_callbacks.py
let5sne.win10 3552df23d6 Add Web Dashboard with multi-device control and callback hooks
Features:
- Web Dashboard: FastAPI-based dashboard with Vue.js frontend
  - Multi-device support (ADB, HDC, iOS)
  - Real-time WebSocket updates for task progress
  - Device management with status tracking
  - Task queue with execution controls (start/stop/re-execute)
  - Detailed task information display (thinking, actions, completion messages)
  - Screenshot viewing per device
  - LAN deployment support with configurable CORS

- Callback Hooks: Interrupt and modify task execution
  - step_callback: Called after each step with StepResult
  - before_action_callback: Called before executing action
  - Support for task interruption and dynamic task switching
  - Example scripts demonstrating callback usage

- Configuration: Environment-based configuration
  - .env file support for all settings
  - .env.example template with documentation
  - Model API configuration (base URL, model name, API key)
  - Dashboard configuration (host, port, CORS, device type)
  - Phone agent configuration (delays, max steps, language)

Technical improvements:
- Fixed forward reference issue with StepResult
- Added package exports for callback types and configs
- Enhanced dependencies with FastAPI, WebSocket support
- Thread-safe task execution with device locking
- Async WebSocket broadcasting from sync thread pool

Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-09 02:20:06 +08:00

200 lines
6.1 KiB
Python

#!/usr/bin/env python3
"""
带回调钩子的命令行工具 / Command-line tool with callback hooks
支持通过命令行参数配置回调钩子,实现任务中断和切换。
Configuration / 配置:
Loads settings from .env file (if present).
从 .env 文件加载配置(如果存在)。
"""
import argparse
import os
import sys
from dotenv import load_dotenv
# Load .env file for configuration
# 加载 .env 配置文件
load_dotenv()
from phone_agent import PhoneAgent, AgentConfig
from phone_agent.model import ModelConfig
from phone_agent.config import get_messages
def create_step_callback(max_steps: int | None = None, lang: str = "cn"):
"""创建步数限制回调"""
if max_steps is None:
return None
def callback(result):
if result.step_count >= max_steps:
return "stop"
return None
return callback
def main():
parser = argparse.ArgumentParser(
description="Phone Agent with Callback Hooks",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# 基本用法(与 main.py 相同)
python run_with_callbacks.py "打开微信"
# 限制最大执行步数
python run_with_callbacks.py "打开微信" --max-steps 5
# 使用智谱 API
python run_with_callbacks.py "打开微信" \\
--base-url https://open.bigmodel.cn/api/paas/v4 \\
--model autoglm-phone \\
--apikey your-key
# 交互模式(输入新任务可切换)
python run_with_callbacks.py --interactive
"""
)
# 模型配置
parser.add_argument("--base-url", default=os.getenv("PHONE_AGENT_BASE_URL", "http://localhost:8000/v1"), help="模型 API 地址")
parser.add_argument("--model", default=os.getenv("PHONE_AGENT_MODEL", "autoglm-phone-9b"), help="模型名称")
parser.add_argument("--apikey", default=os.getenv("PHONE_AGENT_API_KEY", "EMPTY"), help="API 密钥")
# Agent 配置
parser.add_argument("--max-steps", type=int, default=int(os.getenv("PHONE_AGENT_MAX_STEPS", "100")), help="最大执行步数")
parser.add_argument("--device-id", default=os.getenv("PHONE_AGENT_DEVICE_ID"), help="设备 ID")
parser.add_argument("--lang", choices=["cn", "en"], default=os.getenv("PHONE_AGENT_LANG", "cn"), help="语言")
# 回调配置
parser.add_argument("--callback-max-steps", type=int, help="回调强制中断的步数")
parser.add_argument("--interactive", action="store_true", help="交互模式(支持动态切换任务)")
# 任务参数
parser.add_argument("task", nargs="?", help="要执行的任务")
args = parser.parse_args()
# 创建模型配置
model_config = ModelConfig(
base_url=args.base_url,
model_name=args.model,
api_key=args.apikey,
lang=args.lang,
)
# 创建回调
step_callback = create_step_callback(args.callback_max_steps, args.lang)
# 交互模式回调
if args.interactive:
import threading
import queue
task_queue = queue.Queue()
input_ready = threading.Event()
def input_listener():
"""后台监听用户输入"""
print("\n[交互模式] 在任务执行时可以输入:")
print(" 's' + Enter - 停止当前任务")
print(" 'n:新任务' + Enter - 切换到新任务")
print("-" * 50)
while True:
try:
cmd = input().strip()
if cmd.lower() == 's':
task_queue.put("stop")
elif cmd.lower().startswith('n:'):
task_queue.put(cmd[2:])
except (EOFError, KeyboardInterrupt):
break
threading.Thread(target=input_listener, daemon=True).start()
def interactive_callback(result):
"""交互式回调"""
try:
cmd = task_queue.get_nowait()
if cmd == "stop":
return "stop"
return cmd
except queue.Empty:
return None
step_callback = interactive_callback
# 创建 Agent 配置
agent_config = AgentConfig(
max_steps=args.max_steps,
device_id=args.device_id,
lang=args.lang,
step_callback=step_callback,
)
# 创建 Agent
agent = PhoneAgent(model_config=model_config, agent_config=agent_config)
# 打印配置信息
msgs = get_messages(args.lang)
print("=" * 50)
print("Phone Agent with Callback Hooks")
print("=" * 50)
print(f"Model: {args.model}")
print(f"Base URL: {args.base_url}")
print(f"Max Steps: {args.max_steps}")
if args.callback_max_steps:
print(f"Callback Max Steps: {args.callback_max_steps} (强制中断)")
print("=" * 50)
# 执行任务
if args.task:
print(f"\nTask: {args.task}\n")
result = agent.run(args.task)
print(f"\nResult: {result}")
elif args.interactive:
# 交互模式循环
print("\n输入任务 (或 'quit' 退出):\n")
while True:
try:
task = input("> ").strip()
if task.lower() in ("quit", "exit", "q"):
break
if not task:
continue
print(f"\n执行: {task}\n")
result = agent.run(task)
print(f"\n结果: {result}\n")
agent.reset()
except KeyboardInterrupt:
print("\n\nGoodbye!")
break
else:
# 默认交互模式
print("\n输入任务 (或 'quit' 退出):\n")
while True:
try:
task = input("> ").strip()
if task.lower() in ("quit", "exit", "q"):
break
if not task:
continue
result = agent.run(task)
print(f"\n结果: {result}\n")
agent.reset()
except KeyboardInterrupt:
print("\n\nGoodbye!")
break
if __name__ == "__main__":
main()