diff --git a/README.md b/README.md index 3468720..abd39bd 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,9 @@ python main.py --base-url http://localhost:8000/v1 --model "autoglm-phone-9b" # 指定模型端点 python main.py --base-url http://localhost:8000/v1 "打开美团搜索附近的火锅店" +# 使用 API Key 进行认证 +python main.py --apikey sk-xxxxx + # 使用英文 system prompt python main.py --lang en --base-url http://localhost:8000/v1 "Open Chrome browser" @@ -255,6 +258,7 @@ conn.disconnect("192.168.1.100:5555") |-------------------------|------------------|----------------------------| | `PHONE_AGENT_BASE_URL` | 模型 API 地址 | `http://localhost:8000/v1` | | `PHONE_AGENT_MODEL` | 模型名称 | `autoglm-phone-9b` | +| `PHONE_AGENT_API_KEY` | 模型认证 API Key | `EMPTY` | | `PHONE_AGENT_MAX_STEPS` | 每个任务最大步数 | `100` | | `PHONE_AGENT_DEVICE_ID` | ADB 设备 ID | (自动检测) | | `PHONE_AGENT_LANG` | 语言 (`cn` 或 `en`) | `cn` | diff --git a/README_en.md b/README_en.md index 10c99e8..269016c 100644 --- a/README_en.md +++ b/README_en.md @@ -52,7 +52,7 @@ Python 3.10 or higher is recommended. **Please carefully check the relevant permissions** -![Permissions](resources/screenshot-20251209-181423.png) +![Permissions](resources/screenshot-20251210-120416.png) ### 4. Install ADB Keyboard (for Text Input) @@ -121,6 +121,9 @@ python main.py --base-url http://localhost:8000/v1 --model "autoglm-phone-9b" # Specify model endpoint python main.py --base-url http://localhost:8000/v1 "Open Meituan and search for nearby hotpot restaurants" +# Use API key for authentication +python main.py --apikey sk-xxxxx + # Use English system prompt python main.py --lang en --base-url http://localhost:8000/v1 "Open Chrome browser" @@ -158,7 +161,7 @@ Phone Agent supports remote ADB debugging via WiFi/network, allowing device cont Ensure the phone and computer are on the same WiFi network, as shown below: -![Enable Wireless Debugging](resources/setting.png) +![Enable Wireless Debugging](resources/screenshot-20251210-120630.png) #### Use Standard ADB Commands on Computer @@ -248,6 +251,7 @@ You can directly modify the corresponding config files to enhance model capabili |---------------------------|---------------------------|------------------------------| | `PHONE_AGENT_BASE_URL` | Model API URL | `http://localhost:8000/v1` | | `PHONE_AGENT_MODEL` | Model name | `autoglm-phone-9b` | +| `PHONE_AGENT_API_KEY` | API key for authentication| `EMPTY` | | `PHONE_AGENT_MAX_STEPS` | Maximum steps per task | `100` | | `PHONE_AGENT_DEVICE_ID` | ADB device ID | (auto-detect) | | `PHONE_AGENT_LANG` | Language (`cn` or `en`) | `cn` | diff --git a/main.py b/main.py index 6dc104f..79deefd 100644 --- a/main.py +++ b/main.py @@ -8,6 +8,7 @@ Usage: Environment Variables: PHONE_AGENT_BASE_URL: Model API base URL (default: http://localhost:8000/v1) PHONE_AGENT_MODEL: Model name (default: autoglm-phone-9b) + PHONE_AGENT_API_KEY: API key for model authentication (default: EMPTY) PHONE_AGENT_MAX_STEPS: Maximum steps per task (default: 100) PHONE_AGENT_DEVICE_ID: ADB device ID for multi-device setups """ @@ -166,7 +167,7 @@ def check_system_requirements() -> bool: return all_passed -def check_model_api(base_url: str, model_name: str) -> bool: +def check_model_api(base_url: str, model_name: str, api_key: str = "EMPTY") -> bool: """ Check if the model API is accessible and the specified model exists. @@ -177,6 +178,7 @@ def check_model_api(base_url: str, model_name: str) -> bool: Args: base_url: The API base URL model_name: The model name to check + api_key: The API key for authentication Returns: True if all checks pass, False otherwise. @@ -193,7 +195,7 @@ def check_model_api(base_url: str, model_name: str) -> bool: parsed = urlparse(base_url) # Create OpenAI client - client = OpenAI(base_url=base_url, api_key="EMPTY", timeout=10.0) + client = OpenAI(base_url=base_url, api_key=api_key, timeout=10.0) # Try to list models (this tests connectivity) models_response = client.models.list() @@ -202,6 +204,7 @@ def check_model_api(base_url: str, model_name: str) -> bool: print("✅ OK") # Check 2: Model exists + """ print(f"2. Checking model '{model_name}'...", end=" ") if model_name in available_models: print("✅ OK") @@ -214,6 +217,7 @@ def check_model_api(base_url: str, model_name: str) -> bool: if len(available_models) > 10: print(f" ... and {len(available_models) - 10} more") all_passed = False + """ except Exception as e: print("❌ FAILED") @@ -267,6 +271,9 @@ Examples: # Specify model endpoint python main.py --base-url http://localhost:8000/v1 + # Use API key for authentication + python main.py --apikey sk-xxxxx + # Run with specific device python main.py --device-id emulator-5554 @@ -299,6 +306,13 @@ Examples: help="Model name", ) + parser.add_argument( + "--apikey", + type=str, + default=os.getenv("PHONE_AGENT_API_KEY", "EMPTY"), + help="API key for model authentication", + ) + parser.add_argument( "--max-steps", type=int, @@ -462,13 +476,14 @@ def main(): sys.exit(1) # Check model API connectivity and model availability - if not check_model_api(args.base_url, args.model): + if not check_model_api(args.base_url, args.model, args.apikey): sys.exit(1) # Create configurations model_config = ModelConfig( base_url=args.base_url, model_name=args.model, + api_key=args.apikey, ) agent_config = AgentConfig( diff --git a/phone_agent/config/prompts_en.py b/phone_agent/config/prompts_en.py index 0da3a2b..d734b80 100644 --- a/phone_agent/config/prompts_en.py +++ b/phone_agent/config/prompts_en.py @@ -1,10 +1,14 @@ """System prompts for the AI agent.""" + from datetime import datetime today = datetime.today() formatted_date = today.strftime("%Y-%m-%d, %A") -SYSTEM_PROMPT = "The current date: " + formatted_date + ''' +SYSTEM_PROMPT = ( + "The current date: " + + formatted_date + + """ # Setup You are a professional Android operation agent assistant that can fulfill the user's high-level instructions. Given a screenshot of the Android interface at each step, you first analyze the situation, then plan the best course of action using Python-style pseudo-code. @@ -16,7 +20,7 @@ Provide the action: Use ... to return a single line of pseudo-c Your output should STRICTLY follow the format: -[Your throught] +[Your thought] [Your operation code] @@ -71,4 +75,5 @@ REMEMBER: - Think before you act: Always analyze the current UI and the best course of action before executing any step, and output in part. - Only ONE LINE of action in part per response: Each step must contain exactly one line of executable code. - Generate execution code strictly according to format requirements. -''' \ No newline at end of file +""" +) diff --git a/phone_agent/config/prompts_zh.py b/phone_agent/config/prompts_zh.py index 2e5f29d..dc85476 100644 --- a/phone_agent/config/prompts_zh.py +++ b/phone_agent/config/prompts_zh.py @@ -1,4 +1,5 @@ """System prompts for the AI agent.""" + from datetime import datetime today = datetime.today() @@ -6,7 +7,10 @@ weekday_names = ["星期一", "星期二", "星期三", "星期四", "星期五" weekday = weekday_names[today.weekday()] formatted_date = today.strftime("%Y年%m月%d日") + " " + weekday -SYSTEM_PROMPT = "今天的日期是: " + formatted_date + ''' +SYSTEM_PROMPT = ( + "今天的日期是: " + + formatted_date + + """ 你是一个智能体分析专家,可以根据操作历史和当前状态图执行一系列操作来完成任务。 你必须严格按照要求输出以下格式: {think} @@ -69,4 +73,5 @@ SYSTEM_PROMPT = "今天的日期是: " + formatted_date + ''' 16. 在做游戏任务时如果在战斗页面如果有自动战斗一定要开启自动战斗,如果多轮历史状态相似要检查自动战斗是否开启。 17. 如果没有合适的搜索结果,可能是因为搜索页面不对,请返回到搜索页面的上一级尝试重新搜索,如果尝试三次返回上一级搜索后仍然没有符合要求的结果,执行 finish(message="原因")。 18. 在结束任务前请一定要仔细检查任务是否完整准确的完成,如果出现错选、漏选、多选的情况,请返回之前的步骤进行纠正。 -''' \ No newline at end of file +""" +) diff --git a/resources/screenshot-20251210-120416.png b/resources/screenshot-20251210-120416.png new file mode 100644 index 0000000..025a814 Binary files /dev/null and b/resources/screenshot-20251210-120416.png differ diff --git a/resources/screenshot-20251210-120630.png b/resources/screenshot-20251210-120630.png new file mode 100644 index 0000000..f533e3f Binary files /dev/null and b/resources/screenshot-20251210-120630.png differ