ci: use experimental for send message tests (#2290)

Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
2025-05-20 18:39:27 -07:00
parent d319d22ca9
commit 095a14cd1d
3 changed files with 25 additions and 5 deletions
--- a/letta/agents/letta_agent.py
+++ b/letta/agents/letta_agent.py
@@ -132,7 +132,16 @@ class LettaAgent(BaseAgent):
                # TODO: make into a real error
                raise ValueError("No tool calls found in response, model must make a tool call")
            tool_call = response.choices[0].message.tool_calls[0]
-            reasoning = [TextContent(text=response.choices[0].message.content)]  # reasoning placed into content for legacy reasons
+            if response.choices[0].message.reasoning_content:
+                reasoning = [
+                    ReasoningContent(
+                        reasoning=response.choices[0].message.reasoning_content,
+                        is_native=True,
+                        signature=response.choices[0].message.reasoning_content_signature,
+                    )
+                ]
+            else:
+                reasoning = [TextContent(text=response.choices[0].message.content)]  # reasoning placed into content for legacy reasons

            persisted_messages, should_continue = await self._handle_ai_response(
                tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning
@@ -230,7 +239,16 @@ class LettaAgent(BaseAgent):
                # TODO: make into a real error
                raise ValueError("No tool calls found in response, model must make a tool call")
            tool_call = response.choices[0].message.tool_calls[0]
-            reasoning = [TextContent(text=response.choices[0].message.content)]  # reasoning placed into content for legacy reasons
+            if response.choices[0].message.reasoning_content:
+                reasoning = [
+                    ReasoningContent(
+                        reasoning=response.choices[0].message.reasoning_content,
+                        is_native=True,
+                        signature=response.choices[0].message.reasoning_content_signature,
+                    )
+                ]
+            else:
+                reasoning = [TextContent(text=response.choices[0].message.content)]  # reasoning placed into content for legacy reasons

            persisted_messages, should_continue = await self._handle_ai_response(
                tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning, step_id=step_id
--- a/letta/interfaces/openai_streaming_interface.py
+++ b/letta/interfaces/openai_streaming_interface.py
@@ -78,8 +78,8 @@ class OpenAIStreamingInterface:

                # track usage
                if chunk.usage:
-                    self.input_tokens += len(chunk.usage.prompt_tokens)
-                    self.output_tokens += len(chunk.usage.completion_tokens)
+                    self.input_tokens += chunk.usage.prompt_tokens
+                    self.output_tokens += chunk.usage.completion_tokens

                if chunk.choices:
                    choice = chunk.choices[0]
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -261,7 +261,9 @@ class OpenAIClient(LLMClientBase):
        Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
        """
        client = AsyncOpenAI(**self._prepare_client_kwargs(llm_config))
-        response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(**request_data, stream=True)
+        response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
+            **request_data, stream=True, stream_options={"include_usage": True}
+        )
        return response_stream

    def handle_llm_error(self, e: Exception) -> Exception: