From 0653d6ea65e9f8dd91e26b58249b416d46eb677b Mon Sep 17 00:00:00 2001 From: yongbin-buaa Date: Sat, 13 Dec 2025 00:41:40 +0800 Subject: [PATCH] support stream thinking --- phone_agent/agent.py | 9 +++---- phone_agent/model/client.py | 53 ++++++++++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/phone_agent/agent.py b/phone_agent/agent.py index d7801ae..b170316 100644 --- a/phone_agent/agent.py +++ b/phone_agent/agent.py @@ -169,6 +169,10 @@ class PhoneAgent: # Get model response try: + msgs = get_messages(self.agent_config.lang) + print("\n" + "=" * 50) + print(f"💭 {msgs['thinking']}:") + print("-" * 50) response = self.model_client.request(self._context) except Exception as e: if self.agent_config.verbose: @@ -191,11 +195,6 @@ class PhoneAgent: if self.agent_config.verbose: # Print thinking process - msgs = get_messages(self.agent_config.lang) - print("\n" + "=" * 50) - print(f"💭 {msgs['thinking']}:") - print("-" * 50) - print(response.thinking) print("-" * 50) print(f"🎯 {msgs['action']}:") print(json.dumps(action, ensure_ascii=False, indent=2)) diff --git a/phone_agent/model/client.py b/phone_agent/model/client.py index 31eb8bb..ccf77ea 100644 --- a/phone_agent/model/client.py +++ b/phone_agent/model/client.py @@ -55,7 +55,7 @@ class ModelClient: Raises: ValueError: If the response cannot be parsed. """ - response = self.client.chat.completions.create( + stream = self.client.chat.completions.create( messages=messages, model=self.config.model_name, max_tokens=self.config.max_tokens, @@ -63,10 +63,57 @@ class ModelClient: top_p=self.config.top_p, frequency_penalty=self.config.frequency_penalty, extra_body=self.config.extra_body, - stream=False, + stream=True, ) - raw_content = response.choices[0].message.content + raw_content = "" + buffer = "" # Buffer to hold content that might be part of a marker + action_markers = ["finish(message=", "do(action="] + in_action_phase = False # Track if we've entered the action phase + + for chunk in stream: + if len(chunk.choices) == 0: + continue + if chunk.choices[0].delta.content is not None: + content = chunk.choices[0].delta.content + raw_content += content + + if in_action_phase: + # Already in action phase, just accumulate content without printing + continue + + buffer += content + + # Check if any marker is fully present in buffer + marker_found = False + for marker in action_markers: + if marker in buffer: + # Marker found, print everything before it + thinking_part = buffer.split(marker, 1)[0] + print(thinking_part, end="", flush=True) + print() # Print newline after thinking is complete + in_action_phase = True + marker_found = True + break + + if marker_found: + continue # Continue to collect remaining content + + # Check if buffer ends with a prefix of any marker + # If so, don't print yet (wait for more content) + is_potential_marker = False + for marker in action_markers: + for i in range(1, len(marker)): + if buffer.endswith(marker[:i]): + is_potential_marker = True + break + if is_potential_marker: + break + + if not is_potential_marker: + # Safe to print the buffer + print(buffer, end="", flush=True) + buffer = "" # Parse thinking and action from response thinking, action = self._parse_response(raw_content)