From 095a14cd1dea0525a4b35b6a62cdf61ac76df0a6 Mon Sep 17 00:00:00 2001
From: cthomas <caren@letta.com>
Date: Tue, 20 May 2025 18:39:27 -0700
Subject: [PATCH] ci: use experimental for send message tests (#2290)

Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
---
 letta/agents/letta_agent.py                   | 22 +++++++++++++++++--
 .../interfaces/openai_streaming_interface.py  |  4 ++--
 letta/llm_api/openai_client.py                |  4 +++-
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/letta/agents/letta_agent.py b/letta/agents/letta_agent.py
index 367605a8..3ec862b1 100644
--- a/letta/agents/letta_agent.py
+++ b/letta/agents/letta_agent.py
@@ -132,7 +132,16 @@ class LettaAgent(BaseAgent):
                 # TODO: make into a real error
                 raise ValueError("No tool calls found in response, model must make a tool call")
             tool_call = response.choices[0].message.tool_calls[0]
-            reasoning = [TextContent(text=response.choices[0].message.content)]  # reasoning placed into content for legacy reasons
+            if response.choices[0].message.reasoning_content:
+                reasoning = [
+                    ReasoningContent(
+                        reasoning=response.choices[0].message.reasoning_content,
+                        is_native=True,
+                        signature=response.choices[0].message.reasoning_content_signature,
+                    )
+                ]
+            else:
+                reasoning = [TextContent(text=response.choices[0].message.content)]  # reasoning placed into content for legacy reasons
 
             persisted_messages, should_continue = await self._handle_ai_response(
                 tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning
@@ -230,7 +239,16 @@ class LettaAgent(BaseAgent):
                 # TODO: make into a real error
                 raise ValueError("No tool calls found in response, model must make a tool call")
             tool_call = response.choices[0].message.tool_calls[0]
-            reasoning = [TextContent(text=response.choices[0].message.content)]  # reasoning placed into content for legacy reasons
+            if response.choices[0].message.reasoning_content:
+                reasoning = [
+                    ReasoningContent(
+                        reasoning=response.choices[0].message.reasoning_content,
+                        is_native=True,
+                        signature=response.choices[0].message.reasoning_content_signature,
+                    )
+                ]
+            else:
+                reasoning = [TextContent(text=response.choices[0].message.content)]  # reasoning placed into content for legacy reasons
 
             persisted_messages, should_continue = await self._handle_ai_response(
                 tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning, step_id=step_id
diff --git a/letta/interfaces/openai_streaming_interface.py b/letta/interfaces/openai_streaming_interface.py
index 3d1fabe5..eea1b3b2 100644
--- a/letta/interfaces/openai_streaming_interface.py
+++ b/letta/interfaces/openai_streaming_interface.py
@@ -78,8 +78,8 @@ class OpenAIStreamingInterface:
 
                 # track usage
                 if chunk.usage:
-                    self.input_tokens += len(chunk.usage.prompt_tokens)
-                    self.output_tokens += len(chunk.usage.completion_tokens)
+                    self.input_tokens += chunk.usage.prompt_tokens
+                    self.output_tokens += chunk.usage.completion_tokens
 
                 if chunk.choices:
                     choice = chunk.choices[0]
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index 639a550d..e6ac37a2 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -261,7 +261,9 @@ class OpenAIClient(LLMClientBase):
         Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
         """
         client = AsyncOpenAI(**self._prepare_client_kwargs(llm_config))
-        response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(**request_data, stream=True)
+        response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
+            **request_data, stream=True, stream_options={"include_usage": True}
+        )
         return response_stream
 
     def handle_llm_error(self, e: Exception) -> Exception: