draft init

This commit is contained in:
zRzRzRzRzRzRzR
2025-12-08 23:54:29 +08:00
commit 7e1785e08e
31 changed files with 3639 additions and 0 deletions

View File

@@ -0,0 +1,51 @@
"""ADB utilities for Android device interaction."""
from phone_agent.adb.connection import (
ADBConnection,
ConnectionType,
DeviceInfo,
list_devices,
quick_connect,
)
from phone_agent.adb.device import (
back,
double_tap,
get_current_app,
home,
launch_app,
long_press,
swipe,
tap,
)
from phone_agent.adb.input import (
clear_text,
detect_and_set_adb_keyboard,
restore_keyboard,
type_text,
)
from phone_agent.adb.screenshot import get_screenshot
__all__ = [
# Screenshot
"get_screenshot",
# Input
"type_text",
"clear_text",
"detect_and_set_adb_keyboard",
"restore_keyboard",
# Device control
"get_current_app",
"tap",
"swipe",
"back",
"home",
"double_tap",
"long_press",
"launch_app",
# Connection management
"ADBConnection",
"DeviceInfo",
"ConnectionType",
"quick_connect",
"list_devices",
]

View File

@@ -0,0 +1,350 @@
"""ADB connection management for local and remote devices."""
import subprocess
import time
from dataclasses import dataclass
from enum import Enum
from typing import Optional
class ConnectionType(Enum):
"""Type of ADB connection."""
USB = "usb"
WIFI = "wifi"
REMOTE = "remote"
@dataclass
class DeviceInfo:
"""Information about a connected device."""
device_id: str
status: str
connection_type: ConnectionType
model: str | None = None
android_version: str | None = None
class ADBConnection:
"""
Manages ADB connections to Android devices.
Supports USB, WiFi, and remote TCP/IP connections.
Example:
>>> conn = ADBConnection()
>>> # Connect to remote device
>>> conn.connect("192.168.1.100:5555")
>>> # List devices
>>> devices = conn.list_devices()
>>> # Disconnect
>>> conn.disconnect("192.168.1.100:5555")
"""
def __init__(self, adb_path: str = "adb"):
"""
Initialize ADB connection manager.
Args:
adb_path: Path to ADB executable.
"""
self.adb_path = adb_path
def connect(self, address: str, timeout: int = 10) -> tuple[bool, str]:
"""
Connect to a remote device via TCP/IP.
Args:
address: Device address in format "host:port" (e.g., "192.168.1.100:5555").
timeout: Connection timeout in seconds.
Returns:
Tuple of (success, message).
Note:
The remote device must have TCP/IP debugging enabled.
On the device, run: adb tcpip 5555
"""
# Validate address format
if ":" not in address:
address = f"{address}:5555" # Default ADB port
try:
result = subprocess.run(
[self.adb_path, "connect", address],
capture_output=True,
text=True,
timeout=timeout,
)
output = result.stdout + result.stderr
if "connected" in output.lower():
return True, f"Connected to {address}"
elif "already connected" in output.lower():
return True, f"Already connected to {address}"
else:
return False, output.strip()
except subprocess.TimeoutExpired:
return False, f"Connection timeout after {timeout}s"
except Exception as e:
return False, f"Connection error: {e}"
def disconnect(self, address: str | None = None) -> tuple[bool, str]:
"""
Disconnect from a remote device.
Args:
address: Device address to disconnect. If None, disconnects all.
Returns:
Tuple of (success, message).
"""
try:
cmd = [self.adb_path, "disconnect"]
if address:
cmd.append(address)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
output = result.stdout + result.stderr
return True, output.strip() or "Disconnected"
except Exception as e:
return False, f"Disconnect error: {e}"
def list_devices(self) -> list[DeviceInfo]:
"""
List all connected devices.
Returns:
List of DeviceInfo objects.
"""
try:
result = subprocess.run(
[self.adb_path, "devices", "-l"],
capture_output=True,
text=True,
timeout=5,
)
devices = []
for line in result.stdout.strip().split("\n")[1:]: # Skip header
if not line.strip():
continue
parts = line.split()
if len(parts) >= 2:
device_id = parts[0]
status = parts[1]
# Determine connection type
if ":" in device_id:
conn_type = ConnectionType.REMOTE
elif "emulator" in device_id:
conn_type = ConnectionType.USB # Emulator via USB
else:
conn_type = ConnectionType.USB
# Parse additional info
model = None
for part in parts[2:]:
if part.startswith("model:"):
model = part.split(":", 1)[1]
break
devices.append(
DeviceInfo(
device_id=device_id,
status=status,
connection_type=conn_type,
model=model,
)
)
return devices
except Exception as e:
print(f"Error listing devices: {e}")
return []
def get_device_info(self, device_id: str | None = None) -> DeviceInfo | None:
"""
Get detailed information about a device.
Args:
device_id: Device ID. If None, uses first available device.
Returns:
DeviceInfo or None if not found.
"""
devices = self.list_devices()
if not devices:
return None
if device_id is None:
return devices[0]
for device in devices:
if device.device_id == device_id:
return device
return None
def is_connected(self, device_id: str | None = None) -> bool:
"""
Check if a device is connected.
Args:
device_id: Device ID to check. If None, checks if any device is connected.
Returns:
True if connected, False otherwise.
"""
devices = self.list_devices()
if not devices:
return False
if device_id is None:
return any(d.status == "device" for d in devices)
return any(d.device_id == device_id and d.status == "device" for d in devices)
def enable_tcpip(
self, port: int = 5555, device_id: str | None = None
) -> tuple[bool, str]:
"""
Enable TCP/IP debugging on a USB-connected device.
This allows subsequent wireless connections to the device.
Args:
port: TCP port for ADB (default: 5555).
device_id: Device ID. If None, uses first available device.
Returns:
Tuple of (success, message).
Note:
The device must be connected via USB first.
After this, you can disconnect USB and connect via WiFi.
"""
try:
cmd = [self.adb_path]
if device_id:
cmd.extend(["-s", device_id])
cmd.extend(["tcpip", str(port)])
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
output = result.stdout + result.stderr
if "restarting" in output.lower() or result.returncode == 0:
time.sleep(2) # Wait for ADB to restart
return True, f"TCP/IP mode enabled on port {port}"
else:
return False, output.strip()
except Exception as e:
return False, f"Error enabling TCP/IP: {e}"
def get_device_ip(self, device_id: str | None = None) -> str | None:
"""
Get the IP address of a connected device.
Args:
device_id: Device ID. If None, uses first available device.
Returns:
IP address string or None if not found.
"""
try:
cmd = [self.adb_path]
if device_id:
cmd.extend(["-s", device_id])
cmd.extend(["shell", "ip", "route"])
result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
# Parse IP from route output
for line in result.stdout.split("\n"):
if "src" in line:
parts = line.split()
for i, part in enumerate(parts):
if part == "src" and i + 1 < len(parts):
return parts[i + 1]
# Alternative: try wlan0 interface
cmd[-1] = "ip addr show wlan0"
result = subprocess.run(
cmd[:-1] + ["shell", "ip", "addr", "show", "wlan0"],
capture_output=True,
text=True,
timeout=5,
)
for line in result.stdout.split("\n"):
if "inet " in line:
parts = line.strip().split()
if len(parts) >= 2:
return parts[1].split("/")[0]
return None
except Exception as e:
print(f"Error getting device IP: {e}")
return None
def restart_server(self) -> tuple[bool, str]:
"""
Restart the ADB server.
Returns:
Tuple of (success, message).
"""
try:
# Kill server
subprocess.run(
[self.adb_path, "kill-server"], capture_output=True, timeout=5
)
time.sleep(1)
# Start server
subprocess.run(
[self.adb_path, "start-server"], capture_output=True, timeout=5
)
return True, "ADB server restarted"
except Exception as e:
return False, f"Error restarting server: {e}"
def quick_connect(address: str) -> tuple[bool, str]:
"""
Quick helper to connect to a remote device.
Args:
address: Device address (e.g., "192.168.1.100" or "192.168.1.100:5555").
Returns:
Tuple of (success, message).
"""
conn = ADBConnection()
return conn.connect(address)
def list_devices() -> list[DeviceInfo]:
"""
Quick helper to list connected devices.
Returns:
List of DeviceInfo objects.
"""
conn = ADBConnection()
return conn.list_devices()

224
phone_agent/adb/device.py Normal file
View File

@@ -0,0 +1,224 @@
"""Device control utilities for Android automation."""
import os
import subprocess
import time
from typing import List, Optional, Tuple
from phone_agent.config.apps import APP_PACKAGES
def get_current_app(device_id: str | None = None) -> str:
"""
Get the currently focused app name.
Args:
device_id: Optional ADB device ID for multi-device setups.
Returns:
The app name if recognized, otherwise "System Home".
"""
adb_prefix = _get_adb_prefix(device_id)
result = subprocess.run(
adb_prefix + ["shell", "dumpsys", "window"], capture_output=True, text=True
)
output = result.stdout
# Parse window focus info
for line in output.split("\n"):
if "mCurrentFocus" in line or "mFocusedApp" in line:
for app_name, package in APP_PACKAGES.items():
if package in line:
return app_name
return "System Home"
def tap(x: int, y: int, device_id: str | None = None, delay: float = 1.0) -> None:
"""
Tap at the specified coordinates.
Args:
x: X coordinate.
y: Y coordinate.
device_id: Optional ADB device ID.
delay: Delay in seconds after tap.
"""
adb_prefix = _get_adb_prefix(device_id)
subprocess.run(
adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True
)
time.sleep(delay)
def double_tap(
x: int, y: int, device_id: str | None = None, delay: float = 1.0
) -> None:
"""
Double tap at the specified coordinates.
Args:
x: X coordinate.
y: Y coordinate.
device_id: Optional ADB device ID.
delay: Delay in seconds after double tap.
"""
adb_prefix = _get_adb_prefix(device_id)
subprocess.run(
adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True
)
time.sleep(0.1)
subprocess.run(
adb_prefix + ["shell", "input", "tap", str(x), str(y)], capture_output=True
)
time.sleep(delay)
def long_press(
x: int,
y: int,
duration_ms: int = 3000,
device_id: str | None = None,
delay: float = 1.0,
) -> None:
"""
Long press at the specified coordinates.
Args:
x: X coordinate.
y: Y coordinate.
duration_ms: Duration of press in milliseconds.
device_id: Optional ADB device ID.
delay: Delay in seconds after long press.
"""
adb_prefix = _get_adb_prefix(device_id)
subprocess.run(
adb_prefix
+ ["shell", "input", "swipe", str(x), str(y), str(x), str(y), str(duration_ms)],
capture_output=True,
)
time.sleep(delay)
def swipe(
start_x: int,
start_y: int,
end_x: int,
end_y: int,
duration_ms: int | None = None,
device_id: str | None = None,
delay: float = 1.0,
) -> None:
"""
Swipe from start to end coordinates.
Args:
start_x: Starting X coordinate.
start_y: Starting Y coordinate.
end_x: Ending X coordinate.
end_y: Ending Y coordinate.
duration_ms: Duration of swipe in milliseconds (auto-calculated if None).
device_id: Optional ADB device ID.
delay: Delay in seconds after swipe.
"""
adb_prefix = _get_adb_prefix(device_id)
if duration_ms is None:
# Calculate duration based on distance
dist_sq = (start_x - end_x) ** 2 + (start_y - end_y) ** 2
duration_ms = int(dist_sq / 1000)
duration_ms = max(1000, min(duration_ms, 2000)) # Clamp between 1000-2000ms
subprocess.run(
adb_prefix
+ [
"shell",
"input",
"swipe",
str(start_x),
str(start_y),
str(end_x),
str(end_y),
str(duration_ms),
],
capture_output=True,
)
time.sleep(delay)
def back(device_id: str | None = None, delay: float = 1.0) -> None:
"""
Press the back button.
Args:
device_id: Optional ADB device ID.
delay: Delay in seconds after pressing back.
"""
adb_prefix = _get_adb_prefix(device_id)
subprocess.run(
adb_prefix + ["shell", "input", "keyevent", "4"], capture_output=True
)
time.sleep(delay)
def home(device_id: str | None = None, delay: float = 1.0) -> None:
"""
Press the home button.
Args:
device_id: Optional ADB device ID.
delay: Delay in seconds after pressing home.
"""
adb_prefix = _get_adb_prefix(device_id)
subprocess.run(
adb_prefix + ["shell", "input", "keyevent", "KEYCODE_HOME"], capture_output=True
)
time.sleep(delay)
def launch_app(app_name: str, device_id: str | None = None, delay: float = 1.0) -> bool:
"""
Launch an app by name.
Args:
app_name: The app name (must be in APP_PACKAGES).
device_id: Optional ADB device ID.
delay: Delay in seconds after launching.
Returns:
True if app was launched, False if app not found.
"""
if app_name not in APP_PACKAGES:
return False
adb_prefix = _get_adb_prefix(device_id)
package = APP_PACKAGES[app_name]
subprocess.run(
adb_prefix
+ [
"shell",
"monkey",
"-p",
package,
"-c",
"android.intent.category.LAUNCHER",
"1",
],
capture_output=True,
)
time.sleep(delay)
return True
def _get_adb_prefix(device_id: str | None) -> list:
"""Get ADB command prefix with optional device specifier."""
if device_id:
return ["adb", "-s", device_id]
return ["adb"]

109
phone_agent/adb/input.py Normal file
View File

@@ -0,0 +1,109 @@
"""Input utilities for Android device text input."""
import base64
import subprocess
from typing import Optional
def type_text(text: str, device_id: str | None = None) -> None:
"""
Type text into the currently focused input field using ADB Keyboard.
Args:
text: The text to type.
device_id: Optional ADB device ID for multi-device setups.
Note:
Requires ADB Keyboard to be installed on the device.
See: https://github.com/nicnocquee/AdbKeyboard
"""
adb_prefix = _get_adb_prefix(device_id)
encoded_text = base64.b64encode(text.encode("utf-8")).decode("utf-8")
subprocess.run(
adb_prefix
+ [
"shell",
"am",
"broadcast",
"-a",
"ADB_INPUT_B64",
"--es",
"msg",
encoded_text,
],
capture_output=True,
text=True,
)
def clear_text(device_id: str | None = None) -> None:
"""
Clear text in the currently focused input field.
Args:
device_id: Optional ADB device ID for multi-device setups.
"""
adb_prefix = _get_adb_prefix(device_id)
subprocess.run(
adb_prefix + ["shell", "am", "broadcast", "-a", "ADB_CLEAR_TEXT"],
capture_output=True,
text=True,
)
def detect_and_set_adb_keyboard(device_id: str | None = None) -> str:
"""
Detect current keyboard and switch to ADB Keyboard if needed.
Args:
device_id: Optional ADB device ID for multi-device setups.
Returns:
The original keyboard IME identifier for later restoration.
"""
adb_prefix = _get_adb_prefix(device_id)
# Get current IME
result = subprocess.run(
adb_prefix + ["shell", "settings", "get", "secure", "default_input_method"],
capture_output=True,
text=True,
)
current_ime = (result.stdout + result.stderr).strip()
# Switch to ADB Keyboard if not already set
if "com.android.adbkeyboard/.AdbIME" not in current_ime:
subprocess.run(
adb_prefix + ["shell", "ime", "set", "com.android.adbkeyboard/.AdbIME"],
capture_output=True,
text=True,
)
# Warm up the keyboard
type_text("", device_id)
return current_ime
def restore_keyboard(ime: str, device_id: str | None = None) -> None:
"""
Restore the original keyboard IME.
Args:
ime: The IME identifier to restore.
device_id: Optional ADB device ID for multi-device setups.
"""
adb_prefix = _get_adb_prefix(device_id)
subprocess.run(
adb_prefix + ["shell", "ime", "set", ime], capture_output=True, text=True
)
def _get_adb_prefix(device_id: str | None) -> list:
"""Get ADB command prefix with optional device specifier."""
if device_id:
return ["adb", "-s", device_id]
return ["adb"]

View File

@@ -0,0 +1,108 @@
"""Screenshot utilities for capturing Android device screen."""
import base64
import os
import subprocess
import uuid
from dataclasses import dataclass
from io import BytesIO
from typing import Tuple
from PIL import Image
@dataclass
class Screenshot:
"""Represents a captured screenshot."""
base64_data: str
width: int
height: int
is_sensitive: bool = False
def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screenshot:
"""
Capture a screenshot from the connected Android device.
Args:
device_id: Optional ADB device ID for multi-device setups.
timeout: Timeout in seconds for screenshot operations.
Returns:
Screenshot object containing base64 data and dimensions.
Note:
If the screenshot fails (e.g., on sensitive screens like payment pages),
a black fallback image is returned with is_sensitive=True.
"""
temp_path = f"/tmp/screenshot_{uuid.uuid4()}.png"
adb_prefix = _get_adb_prefix(device_id)
try:
# Execute screenshot command
result = subprocess.run(
adb_prefix + ["shell", "screencap", "-p", "/sdcard/tmp.png"],
capture_output=True,
text=True,
timeout=timeout,
)
# Check for screenshot failure (sensitive screen)
output = result.stdout + result.stderr
if "Status: -1" in output or "Failed" in output:
return _create_fallback_screenshot(is_sensitive=True)
# Pull screenshot to local temp path
subprocess.run(
adb_prefix + ["pull", "/sdcard/tmp.png", temp_path],
capture_output=True,
text=True,
timeout=5,
)
if not os.path.exists(temp_path):
return _create_fallback_screenshot(is_sensitive=False)
# Read and encode image
img = Image.open(temp_path)
width, height = img.size
buffered = BytesIO()
img.save(buffered, format="PNG")
base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
# Cleanup
os.remove(temp_path)
return Screenshot(
base64_data=base64_data, width=width, height=height, is_sensitive=False
)
except Exception as e:
print(f"Screenshot error: {e}")
return _create_fallback_screenshot(is_sensitive=False)
def _get_adb_prefix(device_id: str | None) -> list:
"""Get ADB command prefix with optional device specifier."""
if device_id:
return ["adb", "-s", device_id]
return ["adb"]
def _create_fallback_screenshot(is_sensitive: bool) -> Screenshot:
"""Create a black fallback image when screenshot fails."""
default_width, default_height = 1080, 2400
black_img = Image.new("RGB", (default_width, default_height), color="black")
buffered = BytesIO()
black_img.save(buffered, format="PNG")
base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
return Screenshot(
base64_data=base64_data,
width=default_width,
height=default_height,
is_sensitive=is_sensitive,
)