#!/usr/bin/env python3 """ Phone Agent iOS CLI - AI-powered iOS phone automation. Usage: python ios.py [OPTIONS] Environment Variables: PHONE_AGENT_BASE_URL: Model API base URL (default: http://localhost:8000/v1) PHONE_AGENT_MODEL: Model name (default: autoglm-phone-9b) PHONE_AGENT_MAX_STEPS: Maximum steps per task (default: 100) PHONE_AGENT_WDA_URL: WebDriverAgent URL (default: http://localhost:8100) PHONE_AGENT_DEVICE_ID: iOS device UDID for multi-device setups """ import argparse import os import shutil import subprocess import sys from urllib.parse import urlparse from openai import OpenAI from phone_agent.agent_ios import IOSAgentConfig, IOSPhoneAgent from phone_agent.config.apps_ios import list_supported_apps from phone_agent.model import ModelConfig from phone_agent.xctest import XCTestConnection, list_devices def check_system_requirements(wda_url: str = "http://localhost:8100") -> bool: """ Check system requirements before running the agent. Checks: 1. libimobiledevice tools installed 2. At least one iOS device connected 3. WebDriverAgent is running Args: wda_url: WebDriverAgent URL to check. Returns: True if all checks pass, False otherwise. """ print("🔍 Checking system requirements...") print("-" * 50) all_passed = True # Check 1: libimobiledevice installed print("1. Checking libimobiledevice installation...", end=" ") if shutil.which("idevice_id") is None: print("❌ FAILED") print(" Error: libimobiledevice is not installed or not in PATH.") print(" Solution: Install libimobiledevice:") print(" - macOS: brew install libimobiledevice") print(" - Linux: sudo apt-get install libimobiledevice-utils") all_passed = False else: # Double check by running idevice_id try: result = subprocess.run( ["idevice_id", "-ln"], capture_output=True, text=True, timeout=10 ) if result.returncode == 0: print("✅ OK") else: print("❌ FAILED") print(" Error: idevice_id command failed to run.") all_passed = False except FileNotFoundError: print("❌ FAILED") print(" Error: idevice_id command not found.") all_passed = False except subprocess.TimeoutExpired: print("❌ FAILED") print(" Error: idevice_id command timed out.") all_passed = False # If libimobiledevice is not installed, skip remaining checks if not all_passed: print("-" * 50) print("❌ System check failed. Please fix the issues above.") return False # Check 2: iOS Device connected print("2. Checking connected iOS devices...", end=" ") try: devices = list_devices() if not devices: print("❌ FAILED") print(" Error: No iOS devices connected.") print(" Solution:") print(" 1. Connect your iOS device via USB") print(" 2. Unlock the device and tap 'Trust This Computer'") print(" 3. Verify connection: idevice_id -l") print(" 4. Or connect via WiFi using device IP") all_passed = False else: device_names = [ d.device_name or d.device_id[:8] + "..." for d in devices ] print(f"✅ OK ({len(devices)} device(s): {', '.join(device_names)})") except Exception as e: print("❌ FAILED") print(f" Error: {e}") all_passed = False # If no device connected, skip WebDriverAgent check if not all_passed: print("-" * 50) print("❌ System check failed. Please fix the issues above.") return False # Check 3: WebDriverAgent running print(f"3. Checking WebDriverAgent ({wda_url})...", end=" ") try: conn = XCTestConnection(wda_url=wda_url) if conn.is_wda_ready(): print("✅ OK") # Get WDA status for additional info status = conn.get_wda_status() if status: session_id = status.get("sessionId", "N/A") print(f" Session ID: {session_id}") else: print("❌ FAILED") print(" Error: WebDriverAgent is not running or not accessible.") print(" Solution:") print(" 1. Run WebDriverAgent on your iOS device via Xcode") print(" 2. For USB: Set up port forwarding: iproxy 8100 8100") print( " 3. For WiFi: Use device IP, e.g., --wda-url http://192.168.1.100:8100" ) print(" 4. Verify in browser: open http://localhost:8100/status") print("\n Quick setup guide:") print( " git clone https://github.com/appium/WebDriverAgent.git && cd WebDriverAgent" ) print(" ./Scripts/bootstrap.sh") print(" open WebDriverAgent.xcodeproj") print(" # Configure signing, then Product > Test (Cmd+U)") all_passed = False except Exception as e: print("❌ FAILED") print(f" Error: {e}") all_passed = False print("-" * 50) if all_passed: print("✅ All system checks passed!\n") else: print("❌ System check failed. Please fix the issues above.") return all_passed def check_model_api(base_url: str, api_key: str, model_name: str) -> bool: """ Check if the model API is accessible and the specified model exists. Checks: 1. Network connectivity to the API endpoint 2. Model exists in the available models list Args: base_url: The API base URL model_name: The model name to check Returns: True if all checks pass, False otherwise. """ print("🔍 Checking model API...") print("-" * 50) all_passed = True # Check 1: Network connectivity print(f"1. Checking API connectivity ({base_url})...", end=" ") try: # Parse the URL to get host and port parsed = urlparse(base_url) # Create OpenAI client client = OpenAI(base_url=base_url, api_key=api_key, timeout=10.0) # Try to list models (this tests connectivity) models_response = client.models.list() available_models = [model.id for model in models_response.data] print("✅ OK") # Check 2: Model exists print(f"2. Checking model '{model_name}'...", end=" ") if model_name in available_models: print("✅ OK") else: print("❌ FAILED") print(f" Error: Model '{model_name}' not found.") print(f" Available models:") for m in available_models[:10]: # Show first 10 models print(f" - {m}") if len(available_models) > 10: print(f" ... and {len(available_models) - 10} more") all_passed = False except Exception as e: print("❌ FAILED") error_msg = str(e) # Provide more specific error messages if "Connection refused" in error_msg or "Connection error" in error_msg: print(f" Error: Cannot connect to {base_url}") print(" Solution:") print(" 1. Check if the model server is running") print(" 2. Verify the base URL is correct") print(f" 3. Try: curl {base_url}/models") elif "timed out" in error_msg.lower() or "timeout" in error_msg.lower(): print(f" Error: Connection to {base_url} timed out") print(" Solution:") print(" 1. Check your network connection") print(" 2. Verify the server is responding") elif ( "Name or service not known" in error_msg or "nodename nor servname" in error_msg ): print(f" Error: Cannot resolve hostname") print(" Solution:") print(" 1. Check the URL is correct") print(" 2. Verify DNS settings") else: print(f" Error: {error_msg}") all_passed = False print("-" * 50) if all_passed: print("✅ Model API checks passed!\n") else: print("❌ Model API check failed. Please fix the issues above.") return all_passed def parse_args() -> argparse.Namespace: """Parse command line arguments.""" parser = argparse.ArgumentParser( description="Phone Agent iOS - AI-powered iOS phone automation", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Run with default settings python ios.py # Specify model endpoint python ios.py --base-url http://localhost:8000/v1 # Run with specific device python ios.py --device-id # Use WiFi connection python ios.py --wda-url http://192.168.1.100:8100 # List connected devices python ios.py --list-devices # Check device pairing status python ios.py --pair # List supported apps python ios.py --list-apps # Run a specific task python ios.py "Open Safari and search for iPhone tips" """, ) # Model options parser.add_argument( "--base-url", type=str, default=os.getenv("PHONE_AGENT_BASE_URL", "http://localhost:8000/v1"), help="Model API base URL", ) parser.add_argument( "--api-key", type=str, default="EMPTY", help="Model API KEY", ) parser.add_argument( "--model", type=str, default=os.getenv("PHONE_AGENT_MODEL", "autoglm-phone-9b"), help="Model name", ) parser.add_argument( "--max-steps", type=int, default=int(os.getenv("PHONE_AGENT_MAX_STEPS", "100")), help="Maximum steps per task", ) # iOS Device options parser.add_argument( "--device-id", "-d", type=str, default=os.getenv("PHONE_AGENT_DEVICE_ID"), help="iOS device UDID", ) parser.add_argument( "--wda-url", type=str, default=os.getenv("PHONE_AGENT_WDA_URL", "http://localhost:8100"), help="WebDriverAgent URL (default: http://localhost:8100)", ) parser.add_argument( "--list-devices", action="store_true", help="List connected iOS devices and exit" ) parser.add_argument( "--pair", action="store_true", help="Pair with iOS device (required for some operations)", ) parser.add_argument( "--wda-status", action="store_true", help="Show WebDriverAgent status and exit", ) # Other options parser.add_argument( "--quiet", "-q", action="store_true", help="Suppress verbose output" ) parser.add_argument( "--list-apps", action="store_true", help="List supported apps and exit" ) parser.add_argument( "--lang", type=str, choices=["cn", "en"], default=os.getenv("PHONE_AGENT_LANG", "cn"), help="Language for system prompt (cn or en, default: cn)", ) parser.add_argument( "task", nargs="?", type=str, help="Task to execute (interactive mode if not provided)", ) return parser.parse_args() def handle_device_commands(args) -> bool: """ Handle iOS device-related commands. Returns: True if a device command was handled (should exit), False otherwise. """ conn = XCTestConnection(wda_url=args.wda_url) # Handle --list-devices if args.list_devices: devices = list_devices() if not devices: print("No iOS devices connected.") print("\nTroubleshooting:") print(" 1. Connect device via USB") print(" 2. Unlock device and trust this computer") print(" 3. Run: idevice_id -l") else: print("Connected iOS devices:") print("-" * 70) for device in devices: conn_type = device.connection_type.value model_info = f"{device.model}" if device.model else "Unknown" ios_info = f"iOS {device.ios_version}" if device.ios_version else "" name_info = device.device_name or "Unnamed" print(f" ✓ {name_info}") print(f" UDID: {device.device_id}") print(f" Model: {model_info}") print(f" OS: {ios_info}") print(f" Connection: {conn_type}") print("-" * 70) return True # Handle --pair if args.pair: print("Pairing with iOS device...") success, message = conn.pair_device(args.device_id) print(f"{'✓' if success else '✗'} {message}") return True # Handle --wda-status if args.wda_status: print(f"Checking WebDriverAgent status at {args.wda_url}...") print("-" * 50) if conn.is_wda_ready(): print("✓ WebDriverAgent is running") status = conn.get_wda_status() if status: print(f"\nStatus details:") value = status.get("value", {}) print(f" Session ID: {status.get('sessionId', 'N/A')}") print(f" Build: {value.get('build', {}).get('time', 'N/A')}") current_app = value.get("currentApp", {}) if current_app: print(f"\nCurrent App:") print(f" Bundle ID: {current_app.get('bundleId', 'N/A')}") print(f" Process ID: {current_app.get('pid', 'N/A')}") else: print("✗ WebDriverAgent is not running") print("\nPlease start WebDriverAgent on your iOS device:") print(" 1. Open WebDriverAgent.xcodeproj in Xcode") print(" 2. Select your device") print(" 3. Run WebDriverAgentRunner (Product > Test or Cmd+U)") print(f" 4. For USB: Run port forwarding: iproxy 8100 8100") return True return False def main(): """Main entry point.""" args = parse_args() # Handle --list-apps (no system check needed) if args.list_apps: print("Supported iOS apps:") print("\nNote: For iOS apps, Bundle IDs are configured in:") print(" phone_agent/config/apps_ios.py") print("\nCurrently configured apps:") for app in sorted(list_supported_apps()): print(f" - {app}") print( "\nTo add iOS apps, find the Bundle ID and add to APP_PACKAGES_IOS dictionary." ) return # Handle device commands (these may need partial system checks) if handle_device_commands(args): return # Run system requirements check before proceeding if not check_system_requirements(wda_url=args.wda_url): sys.exit(1) # Check model API connectivity and model availability # if not check_model_api(args.base_url, args.api_key, args.model): # sys.exit(1) # Create configurations model_config = ModelConfig( base_url=args.base_url, model_name=args.model, api_key=args.api_key ) agent_config = IOSAgentConfig( max_steps=args.max_steps, wda_url=args.wda_url, device_id=args.device_id, verbose=not args.quiet, lang=args.lang, ) # Create iOS agent agent = IOSPhoneAgent( model_config=model_config, agent_config=agent_config, ) # Print header print("=" * 50) print("Phone Agent iOS - AI-powered iOS automation") print("=" * 50) print(f"Model: {model_config.model_name}") print(f"Base URL: {model_config.base_url}") print(f"WDA URL: {args.wda_url}") print(f"Max Steps: {agent_config.max_steps}") print(f"Language: {agent_config.lang}") # Show device info devices = list_devices() if agent_config.device_id: print(f"Device: {agent_config.device_id}") elif devices: device = devices[0] print(f"Device: {device.device_name or device.device_id[:16]}") print(f" {device.model}, iOS {device.ios_version}") print("=" * 50) # Run with provided task or enter interactive mode if args.task: print(f"\nTask: {args.task}\n") result = agent.run(args.task) print(f"\nResult: {result}") else: # Interactive mode print("\nEntering interactive mode. Type 'quit' to exit.\n") while True: try: task = input("Enter your task: ").strip() if task.lower() in ("quit", "exit", "q"): print("Goodbye!") break if not task: continue print() result = agent.run(task) print(f"\nResult: {result}\n") agent.reset() except KeyboardInterrupt: print("\n\nInterrupted. Goodbye!") break except Exception as e: print(f"\nError: {e}\n") if __name__ == "__main__": main()