Merge pull request #39 from zai-org/add-api-key

Add api key
This commit is contained in:
yongbin-buaa
2025-12-10 18:27:39 +08:00
committed by GitHub
7 changed files with 43 additions and 10 deletions

View File

@@ -127,6 +127,9 @@ python main.py --base-url http://localhost:8000/v1 --model "autoglm-phone-9b"
# 指定模型端点
python main.py --base-url http://localhost:8000/v1 "打开美团搜索附近的火锅店"
# 使用 API Key 进行认证
python main.py --apikey sk-xxxxx
# 使用英文 system prompt
python main.py --lang en --base-url http://localhost:8000/v1 "Open Chrome browser"
@@ -255,6 +258,7 @@ conn.disconnect("192.168.1.100:5555")
|-------------------------|------------------|----------------------------|
| `PHONE_AGENT_BASE_URL` | 模型 API 地址 | `http://localhost:8000/v1` |
| `PHONE_AGENT_MODEL` | 模型名称 | `autoglm-phone-9b` |
| `PHONE_AGENT_API_KEY` | 模型认证 API Key | `EMPTY` |
| `PHONE_AGENT_MAX_STEPS` | 每个任务最大步数 | `100` |
| `PHONE_AGENT_DEVICE_ID` | ADB 设备 ID | (自动检测) |
| `PHONE_AGENT_LANG` | 语言 (`cn` 或 `en`) | `cn` |

View File

@@ -52,7 +52,7 @@ Python 3.10 or higher is recommended.
**Please carefully check the relevant permissions**
![Permissions](resources/screenshot-20251209-181423.png)
![Permissions](resources/screenshot-20251210-120416.png)
### 4. Install ADB Keyboard (for Text Input)
@@ -121,6 +121,9 @@ python main.py --base-url http://localhost:8000/v1 --model "autoglm-phone-9b"
# Specify model endpoint
python main.py --base-url http://localhost:8000/v1 "Open Meituan and search for nearby hotpot restaurants"
# Use API key for authentication
python main.py --apikey sk-xxxxx
# Use English system prompt
python main.py --lang en --base-url http://localhost:8000/v1 "Open Chrome browser"
@@ -158,7 +161,7 @@ Phone Agent supports remote ADB debugging via WiFi/network, allowing device cont
Ensure the phone and computer are on the same WiFi network, as shown below:
![Enable Wireless Debugging](resources/setting.png)
![Enable Wireless Debugging](resources/screenshot-20251210-120630.png)
#### Use Standard ADB Commands on Computer
@@ -248,6 +251,7 @@ You can directly modify the corresponding config files to enhance model capabili
|---------------------------|---------------------------|------------------------------|
| `PHONE_AGENT_BASE_URL` | Model API URL | `http://localhost:8000/v1` |
| `PHONE_AGENT_MODEL` | Model name | `autoglm-phone-9b` |
| `PHONE_AGENT_API_KEY` | API key for authentication| `EMPTY` |
| `PHONE_AGENT_MAX_STEPS` | Maximum steps per task | `100` |
| `PHONE_AGENT_DEVICE_ID` | ADB device ID | (auto-detect) |
| `PHONE_AGENT_LANG` | Language (`cn` or `en`) | `cn` |

21
main.py
View File

@@ -8,6 +8,7 @@ Usage:
Environment Variables:
PHONE_AGENT_BASE_URL: Model API base URL (default: http://localhost:8000/v1)
PHONE_AGENT_MODEL: Model name (default: autoglm-phone-9b)
PHONE_AGENT_API_KEY: API key for model authentication (default: EMPTY)
PHONE_AGENT_MAX_STEPS: Maximum steps per task (default: 100)
PHONE_AGENT_DEVICE_ID: ADB device ID for multi-device setups
"""
@@ -166,7 +167,7 @@ def check_system_requirements() -> bool:
return all_passed
def check_model_api(base_url: str, model_name: str) -> bool:
def check_model_api(base_url: str, model_name: str, api_key: str = "EMPTY") -> bool:
"""
Check if the model API is accessible and the specified model exists.
@@ -177,6 +178,7 @@ def check_model_api(base_url: str, model_name: str) -> bool:
Args:
base_url: The API base URL
model_name: The model name to check
api_key: The API key for authentication
Returns:
True if all checks pass, False otherwise.
@@ -193,7 +195,7 @@ def check_model_api(base_url: str, model_name: str) -> bool:
parsed = urlparse(base_url)
# Create OpenAI client
client = OpenAI(base_url=base_url, api_key="EMPTY", timeout=10.0)
client = OpenAI(base_url=base_url, api_key=api_key, timeout=10.0)
# Try to list models (this tests connectivity)
models_response = client.models.list()
@@ -202,6 +204,7 @@ def check_model_api(base_url: str, model_name: str) -> bool:
print("✅ OK")
# Check 2: Model exists
"""
print(f"2. Checking model '{model_name}'...", end=" ")
if model_name in available_models:
print("✅ OK")
@@ -214,6 +217,7 @@ def check_model_api(base_url: str, model_name: str) -> bool:
if len(available_models) > 10:
print(f" ... and {len(available_models) - 10} more")
all_passed = False
"""
except Exception as e:
print("❌ FAILED")
@@ -267,6 +271,9 @@ Examples:
# Specify model endpoint
python main.py --base-url http://localhost:8000/v1
# Use API key for authentication
python main.py --apikey sk-xxxxx
# Run with specific device
python main.py --device-id emulator-5554
@@ -299,6 +306,13 @@ Examples:
help="Model name",
)
parser.add_argument(
"--apikey",
type=str,
default=os.getenv("PHONE_AGENT_API_KEY", "EMPTY"),
help="API key for model authentication",
)
parser.add_argument(
"--max-steps",
type=int,
@@ -462,13 +476,14 @@ def main():
sys.exit(1)
# Check model API connectivity and model availability
if not check_model_api(args.base_url, args.model):
if not check_model_api(args.base_url, args.model, args.apikey):
sys.exit(1)
# Create configurations
model_config = ModelConfig(
base_url=args.base_url,
model_name=args.model,
api_key=args.apikey,
)
agent_config = AgentConfig(

View File

@@ -1,10 +1,14 @@
"""System prompts for the AI agent."""
from datetime import datetime
today = datetime.today()
formatted_date = today.strftime("%Y-%m-%d, %A")
SYSTEM_PROMPT = "The current date: " + formatted_date + '''
SYSTEM_PROMPT = (
"The current date: "
+ formatted_date
+ """
# Setup
You are a professional Android operation agent assistant that can fulfill the user's high-level instructions. Given a screenshot of the Android interface at each step, you first analyze the situation, then plan the best course of action using Python-style pseudo-code.
@@ -16,7 +20,7 @@ Provide the action: Use <answer>...</answer> to return a single line of pseudo-c
Your output should STRICTLY follow the format:
<think>
[Your throught]
[Your thought]
</think>
<answer>
[Your operation code]
@@ -71,4 +75,5 @@ REMEMBER:
- Think before you act: Always analyze the current UI and the best course of action before executing any step, and output in <think> part.
- Only ONE LINE of action in <answer> part per response: Each step must contain exactly one line of executable code.
- Generate execution code strictly according to format requirements.
'''
"""
)

View File

@@ -1,4 +1,5 @@
"""System prompts for the AI agent."""
from datetime import datetime
today = datetime.today()
@@ -6,7 +7,10 @@ weekday_names = ["星期一", "星期二", "星期三", "星期四", "星期五"
weekday = weekday_names[today.weekday()]
formatted_date = today.strftime("%Y年%m月%d") + " " + weekday
SYSTEM_PROMPT = "今天的日期是: " + formatted_date + '''
SYSTEM_PROMPT = (
"今天的日期是: "
+ formatted_date
+ """
你是一个智能体分析专家,可以根据操作历史和当前状态图执行一系列操作来完成任务。
你必须严格按照要求输出以下格式:
<think>{think}</think>
@@ -69,4 +73,5 @@ SYSTEM_PROMPT = "今天的日期是: " + formatted_date + '''
16. 在做游戏任务时如果在战斗页面如果有自动战斗一定要开启自动战斗,如果多轮历史状态相似要检查自动战斗是否开启。
17. 如果没有合适的搜索结果,可能是因为搜索页面不对,请返回到搜索页面的上一级尝试重新搜索,如果尝试三次返回上一级搜索后仍然没有符合要求的结果,执行 finish(message="原因")。
18. 在结束任务前请一定要仔细检查任务是否完整准确的完成,如果出现错选、漏选、多选的情况,请返回之前的步骤进行纠正。
'''
"""
)

Binary file not shown.

After

Width:  |  Height:  |  Size: 268 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 228 KiB