From 095a14cd1dea0525a4b35b6a62cdf61ac76df0a6 Mon Sep 17 00:00:00 2001 From: cthomas Date: Tue, 20 May 2025 18:39:27 -0700 Subject: [PATCH] ci: use experimental for send message tests (#2290) Co-authored-by: Sarah Wooders --- letta/agents/letta_agent.py | 22 +++++++++++++++++-- .../interfaces/openai_streaming_interface.py | 4 ++-- letta/llm_api/openai_client.py | 4 +++- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/letta/agents/letta_agent.py b/letta/agents/letta_agent.py index 367605a8..3ec862b1 100644 --- a/letta/agents/letta_agent.py +++ b/letta/agents/letta_agent.py @@ -132,7 +132,16 @@ class LettaAgent(BaseAgent): # TODO: make into a real error raise ValueError("No tool calls found in response, model must make a tool call") tool_call = response.choices[0].message.tool_calls[0] - reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons + if response.choices[0].message.reasoning_content: + reasoning = [ + ReasoningContent( + reasoning=response.choices[0].message.reasoning_content, + is_native=True, + signature=response.choices[0].message.reasoning_content_signature, + ) + ] + else: + reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons persisted_messages, should_continue = await self._handle_ai_response( tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning @@ -230,7 +239,16 @@ class LettaAgent(BaseAgent): # TODO: make into a real error raise ValueError("No tool calls found in response, model must make a tool call") tool_call = response.choices[0].message.tool_calls[0] - reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons + if response.choices[0].message.reasoning_content: + reasoning = [ + ReasoningContent( + reasoning=response.choices[0].message.reasoning_content, + is_native=True, + signature=response.choices[0].message.reasoning_content_signature, + ) + ] + else: + reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons persisted_messages, should_continue = await self._handle_ai_response( tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning, step_id=step_id diff --git a/letta/interfaces/openai_streaming_interface.py b/letta/interfaces/openai_streaming_interface.py index 3d1fabe5..eea1b3b2 100644 --- a/letta/interfaces/openai_streaming_interface.py +++ b/letta/interfaces/openai_streaming_interface.py @@ -78,8 +78,8 @@ class OpenAIStreamingInterface: # track usage if chunk.usage: - self.input_tokens += len(chunk.usage.prompt_tokens) - self.output_tokens += len(chunk.usage.completion_tokens) + self.input_tokens += chunk.usage.prompt_tokens + self.output_tokens += chunk.usage.completion_tokens if chunk.choices: choice = chunk.choices[0] diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 639a550d..e6ac37a2 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -261,7 +261,9 @@ class OpenAIClient(LLMClientBase): Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator. """ client = AsyncOpenAI(**self._prepare_client_kwargs(llm_config)) - response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(**request_data, stream=True) + response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create( + **request_data, stream=True, stream_options={"include_usage": True} + ) return response_stream def handle_llm_error(self, e: Exception) -> Exception: