from typing import AsyncGenerator from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter from letta.helpers.datetime_helpers import get_utc_timestamp_ns from letta.schemas.enums import LLMCallType from letta.schemas.letta_message import LettaMessage from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, TextContent from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens class SimpleLLMRequestAdapter(LettaLLMRequestAdapter): """Simplifying assumptions: - No inner thoughts in kwargs - No forced tool calls - Content native as assistant message """ async def invoke_llm( self, request_data: dict, messages: list, tools: list, use_assistant_message: bool, requires_approval_tools: list[str] = [], step_id: str | None = None, actor: str | None = None, ) -> AsyncGenerator[LettaMessage | None, None]: """ Execute a blocking LLM request and yield the response. This adapter: 1. Makes a blocking request to the LLM 2. Converts the response to chat completion format 3. Extracts reasoning and tool call information 4. Updates all instance variables 5. Yields nothing (blocking mode doesn't stream) """ # Store request data self.request_data = request_data # Set telemetry context and make the blocking LLM request self.llm_client.set_telemetry_context( telemetry_manager=self.telemetry_manager, step_id=step_id, agent_id=self.agent_id, agent_tags=self.agent_tags, run_id=self.run_id, call_type=LLMCallType.agent_step, org_id=self.org_id, user_id=self.user_id, llm_config=self.llm_config.model_dump() if self.llm_config else None, ) try: self.response_data = await self.llm_client.request_async_with_telemetry(request_data, self.llm_config) except Exception as e: raise self.llm_client.handle_llm_error(e, llm_config=self.llm_config) self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns() # Convert response to chat completion format self.chat_completions_response = await self.llm_client.convert_response_to_chat_completion( self.response_data, messages, self.llm_config ) # Extract reasoning content from the response if self.chat_completions_response.choices[0].message.reasoning_content: self.reasoning_content = [ ReasoningContent( reasoning=self.chat_completions_response.choices[0].message.reasoning_content, is_native=True, signature=self.chat_completions_response.choices[0].message.reasoning_content_signature, ) ] elif self.chat_completions_response.choices[0].message.omitted_reasoning_content: self.reasoning_content = [OmittedReasoningContent()] else: # logger.info("No reasoning content found.") self.reasoning_content = None if self.chat_completions_response.choices[0].message.content: # NOTE: big difference - 'content' goes into 'content' # Reasoning placed into content for legacy reasons # Carry thought_signature on TextContent when ReasoningContent doesn't exist to hold it # (e.g. Gemini 2.5 Flash with include_thoughts=False still returns thought_signature) orphan_sig = ( self.chat_completions_response.choices[0].message.reasoning_content_signature if not self.reasoning_content else None ) self.content = [TextContent(text=self.chat_completions_response.choices[0].message.content, signature=orphan_sig)] else: self.content = None if self.reasoning_content and len(self.reasoning_content) > 0: # Temp workaround to consolidate parts to persist reasoning content, this should be integrated better self.content = self.reasoning_content + (self.content or []) # Extract tool call tool_calls = self.chat_completions_response.choices[0].message.tool_calls or [] self.tool_calls = list(tool_calls) self.tool_call = self.tool_calls[0] if self.tool_calls else None # Extract logprobs if present self.logprobs = self.chat_completions_response.choices[0].logprobs # Extract usage statistics self.usage.step_count = 1 self.usage.completion_tokens = self.chat_completions_response.usage.completion_tokens self.usage.prompt_tokens = self.chat_completions_response.usage.prompt_tokens self.usage.total_tokens = self.chat_completions_response.usage.total_tokens # Extract cache and reasoning token details using normalized helpers usage = self.chat_completions_response.usage self.usage.cached_input_tokens, self.usage.cache_write_tokens = normalize_cache_tokens(usage.prompt_tokens_details) self.usage.reasoning_tokens = normalize_reasoning_tokens(usage.completion_tokens_details) self.log_provider_trace(step_id=step_id, actor=actor) yield None return