From 2a77c511ff78db523e707fad834fc3e6c250725d Mon Sep 17 00:00:00 2001
From: liuyongbin <liuyongbin@liuyongbindeMacBook-Pro.local>
Date: Thu, 11 Dec 2025 15:07:42 +0800
Subject: [PATCH] update action parser

---
 phone_agent/model/client.py    | 40 ++++++++++++++++++++++++++--------
 scripts/check_deployment_cn.py |  2 +-
 2 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/phone_agent/model/client.py b/phone_agent/model/client.py
index e326f91..31eb8bb 100644
--- a/phone_agent/model/client.py
+++ b/phone_agent/model/client.py
@@ -18,9 +18,7 @@ class ModelConfig:
     temperature: float = 0.0
     top_p: float = 0.85
     frequency_penalty: float = 0.2
-    extra_body: dict[str, Any] = field(
-        default_factory=lambda: {"skip_special_tokens": False}
-    )
+    extra_body: dict[str, Any] = field(default_factory=dict)
 
 
 @dataclass
@@ -65,6 +63,7 @@ class ModelClient:
             top_p=self.config.top_p,
             frequency_penalty=self.config.frequency_penalty,
             extra_body=self.config.extra_body,
+            stream=False,
         )
 
         raw_content = response.choices[0].message.content
@@ -78,20 +77,43 @@ class ModelClient:
         """
         Parse the model response into thinking and action parts.
 
+        Parsing rules:
+        1. If content contains 'finish(message=', everything before is thinking,
+           everything from 'finish(message=' onwards is action.
+        2. If rule 1 doesn't apply but content contains 'do(action=',
+           everything before is thinking, everything from 'do(action=' onwards is action.
+        3. Fallback: If content contains '<answer>', use legacy parsing with XML tags.
+        4. Otherwise, return empty thinking and full content as action.
+
         Args:
             content: Raw response content.
 
         Returns:
             Tuple of (thinking, action).
         """
-        if "<answer>" not in content:
-            return "", content
+        # Rule 1: Check for finish(message=
+        if "finish(message=" in content:
+            parts = content.split("finish(message=", 1)
+            thinking = parts[0].strip()
+            action = "finish(message=" + parts[1]
+            return thinking, action
 
-        parts = content.split("<answer>", 1)
-        thinking = parts[0].replace("<think>", "").replace("</think>", "").strip()
-        action = parts[1].replace("</answer>", "").strip()
+        # Rule 2: Check for do(action=
+        if "do(action=" in content:
+            parts = content.split("do(action=", 1)
+            thinking = parts[0].strip()
+            action = "do(action=" + parts[1]
+            return thinking, action
 
-        return thinking, action
+        # Rule 3: Fallback to legacy XML tag parsing
+        if "<answer>" in content:
+            parts = content.split("<answer>", 1)
+            thinking = parts[0].replace("<think>", "").replace("</think>", "").strip()
+            action = parts[1].replace("</answer>", "").strip()
+            return thinking, action
+
+        # Rule 4: No markers found, return content as action
+        return "", content
 
 
 class MessageBuilder:
diff --git a/scripts/check_deployment_cn.py b/scripts/check_deployment_cn.py
index 1b67068..d23f41d 100644
--- a/scripts/check_deployment_cn.py
+++ b/scripts/check_deployment_cn.py
@@ -89,7 +89,7 @@ if __name__ == "__main__":
             temperature=args.temperature,
             top_p=args.top_p,
             frequency_penalty=args.frequency_penalty,
-            extra_body={"skip_special_tokens": False},
+            stream=False,
         )
 
         print("\n模型推理结果:")