diff --git a/fern/openapi.json b/fern/openapi.json index c3ef87c4..7dfbcd90 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -36206,6 +36206,7 @@ "invalid_llm_response", "invalid_tool_call", "max_steps", + "max_tokens_exceeded", "no_tool_call", "tool_rule", "cancelled", diff --git a/letta/adapters/letta_llm_adapter.py b/letta/adapters/letta_llm_adapter.py index 69ac2ab1..ca2e1534 100644 --- a/letta/adapters/letta_llm_adapter.py +++ b/letta/adapters/letta_llm_adapter.py @@ -63,6 +63,18 @@ class LettaLLMAdapter(ABC): """ raise NotImplementedError + @property + def finish_reason(self) -> str | None: + """ + Get the finish_reason from the LLM response. + + Returns: + str | None: The finish_reason if available, None otherwise + """ + if self.chat_completions_response and self.chat_completions_response.choices: + return self.chat_completions_response.choices[0].finish_reason + return None + def supports_token_streaming(self) -> bool: """ Check if the adapter supports token-level streaming. diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index 6b97f59d..176717b1 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -745,6 +745,7 @@ class LettaAgentV3(LettaAgentV2): is_approval_response=approval_response is not None, tool_call_denials=tool_call_denials, tool_returns=tool_returns, + finish_reason=llm_adapter.finish_reason, ) # extend trackers with new messages @@ -892,6 +893,7 @@ class LettaAgentV3(LettaAgentV2): tool_calls: list[ToolCall] = [], tool_call_denials: list[ToolCallDenial] = [], tool_returns: list[ToolReturn] = [], + finish_reason: str | None = None, ) -> tuple[list[Message], bool, LettaStopReason | None]: """ Handle the final AI response once streaming completes, execute / validate tool calls, @@ -936,6 +938,7 @@ class LettaAgentV3(LettaAgentV2): tool_rule_violated=False, tool_rules_solver=tool_rules_solver, is_final_step=is_final_step, + finish_reason=finish_reason, ) assistant_message = create_letta_messages_from_llm_response( agent_id=self.agent_state.id, @@ -1180,6 +1183,7 @@ class LettaAgentV3(LettaAgentV2): tool_rule_violated=spec["violated"], tool_rules_solver=tool_rules_solver, is_final_step=(is_final_step and idx == len(exec_specs) - 1), + finish_reason=finish_reason, ) persisted_continue_flags.append(cont) persisted_stop_reasons.append(sr) @@ -1243,6 +1247,7 @@ class LettaAgentV3(LettaAgentV2): tool_rule_violated: bool, tool_rules_solver: ToolRulesSolver, is_final_step: bool | None, + finish_reason: str | None = None, ) -> tuple[bool, str | None, LettaStopReason | None]: """ In v3 loop, we apply the following rules: @@ -1267,6 +1272,9 @@ class LettaAgentV3(LettaAgentV2): reason = f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop." return True, reason, None # No required tools remaining → end turn + # Check if the LLM hit max_tokens (finish_reason == "length") + if finish_reason == "length": + return False, None, LettaStopReason(stop_reason=StopReasonType.max_tokens_exceeded.value) return False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value) else: if tool_rule_violated: diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 51e89c4e..5bb5b08c 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -656,7 +656,15 @@ class OpenAIClient(LLMClientBase): reasoning_summary_parts = None reasoning_content_signature = None tool_calls = None - finish_reason = "stop" if (response_data.get("status") == "completed") else None + + # Check for incomplete_details first (e.g., max_output_tokens reached) + incomplete_details = response_data.get("incomplete_details") + if incomplete_details and incomplete_details.get("reason") == "max_output_tokens": + finish_reason = "length" + elif response_data.get("status") == "completed": + finish_reason = "stop" + else: + finish_reason = None # Optionally capture reasoning presence found_reasoning = False diff --git a/letta/schemas/letta_stop_reason.py b/letta/schemas/letta_stop_reason.py index fa67bc11..abb28aa7 100644 --- a/letta/schemas/letta_stop_reason.py +++ b/letta/schemas/letta_stop_reason.py @@ -13,6 +13,7 @@ class StopReasonType(str, Enum): invalid_llm_response = "invalid_llm_response" invalid_tool_call = "invalid_tool_call" max_steps = "max_steps" + max_tokens_exceeded = "max_tokens_exceeded" no_tool_call = "no_tool_call" tool_rule = "tool_rule" cancelled = "cancelled" @@ -34,6 +35,8 @@ class StopReasonType(str, Enum): StopReasonType.no_tool_call, StopReasonType.invalid_llm_response, StopReasonType.llm_api_error, + # Treat context/token limit exhaustion as an error state (same as llm_api_error) + StopReasonType.max_tokens_exceeded, StopReasonType.context_window_overflow_in_system_prompt, ): return RunStatus.failed