diff --git a/fern/openapi.json b/fern/openapi.json
index c3ef87c4..7dfbcd90 100644
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -36206,6 +36206,7 @@
           "invalid_llm_response",
           "invalid_tool_call",
           "max_steps",
+          "max_tokens_exceeded",
           "no_tool_call",
           "tool_rule",
           "cancelled",
diff --git a/letta/adapters/letta_llm_adapter.py b/letta/adapters/letta_llm_adapter.py
index 69ac2ab1..ca2e1534 100644
--- a/letta/adapters/letta_llm_adapter.py
+++ b/letta/adapters/letta_llm_adapter.py
@@ -63,6 +63,18 @@ class LettaLLMAdapter(ABC):
         """
         raise NotImplementedError
 
+    @property
+    def finish_reason(self) -> str | None:
+        """
+        Get the finish_reason from the LLM response.
+
+        Returns:
+            str | None: The finish_reason if available, None otherwise
+        """
+        if self.chat_completions_response and self.chat_completions_response.choices:
+            return self.chat_completions_response.choices[0].finish_reason
+        return None
+
     def supports_token_streaming(self) -> bool:
         """
         Check if the adapter supports token-level streaming.
diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py
index 6b97f59d..176717b1 100644
--- a/letta/agents/letta_agent_v3.py
+++ b/letta/agents/letta_agent_v3.py
@@ -745,6 +745,7 @@ class LettaAgentV3(LettaAgentV2):
                 is_approval_response=approval_response is not None,
                 tool_call_denials=tool_call_denials,
                 tool_returns=tool_returns,
+                finish_reason=llm_adapter.finish_reason,
             )
 
             # extend trackers with new messages
@@ -892,6 +893,7 @@ class LettaAgentV3(LettaAgentV2):
         tool_calls: list[ToolCall] = [],
         tool_call_denials: list[ToolCallDenial] = [],
         tool_returns: list[ToolReturn] = [],
+        finish_reason: str | None = None,
     ) -> tuple[list[Message], bool, LettaStopReason | None]:
         """
         Handle the final AI response once streaming completes, execute / validate tool calls,
@@ -936,6 +938,7 @@ class LettaAgentV3(LettaAgentV2):
                     tool_rule_violated=False,
                     tool_rules_solver=tool_rules_solver,
                     is_final_step=is_final_step,
+                    finish_reason=finish_reason,
                 )
                 assistant_message = create_letta_messages_from_llm_response(
                     agent_id=self.agent_state.id,
@@ -1180,6 +1183,7 @@ class LettaAgentV3(LettaAgentV2):
                     tool_rule_violated=spec["violated"],
                     tool_rules_solver=tool_rules_solver,
                     is_final_step=(is_final_step and idx == len(exec_specs) - 1),
+                    finish_reason=finish_reason,
                 )
             persisted_continue_flags.append(cont)
             persisted_stop_reasons.append(sr)
@@ -1243,6 +1247,7 @@ class LettaAgentV3(LettaAgentV2):
         tool_rule_violated: bool,
         tool_rules_solver: ToolRulesSolver,
         is_final_step: bool | None,
+        finish_reason: str | None = None,
     ) -> tuple[bool, str | None, LettaStopReason | None]:
         """
         In v3 loop, we apply the following rules:
@@ -1267,6 +1272,9 @@ class LettaAgentV3(LettaAgentV2):
                 reason = f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
                 return True, reason, None
             # No required tools remaining → end turn
+            # Check if the LLM hit max_tokens (finish_reason == "length")
+            if finish_reason == "length":
+                return False, None, LettaStopReason(stop_reason=StopReasonType.max_tokens_exceeded.value)
             return False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
         else:
             if tool_rule_violated:
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index 51e89c4e..5bb5b08c 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -656,7 +656,15 @@ class OpenAIClient(LLMClientBase):
             reasoning_summary_parts = None
             reasoning_content_signature = None
             tool_calls = None
-            finish_reason = "stop" if (response_data.get("status") == "completed") else None
+
+            # Check for incomplete_details first (e.g., max_output_tokens reached)
+            incomplete_details = response_data.get("incomplete_details")
+            if incomplete_details and incomplete_details.get("reason") == "max_output_tokens":
+                finish_reason = "length"
+            elif response_data.get("status") == "completed":
+                finish_reason = "stop"
+            else:
+                finish_reason = None
 
             # Optionally capture reasoning presence
             found_reasoning = False
diff --git a/letta/schemas/letta_stop_reason.py b/letta/schemas/letta_stop_reason.py
index fa67bc11..abb28aa7 100644
--- a/letta/schemas/letta_stop_reason.py
+++ b/letta/schemas/letta_stop_reason.py
@@ -13,6 +13,7 @@ class StopReasonType(str, Enum):
     invalid_llm_response = "invalid_llm_response"
     invalid_tool_call = "invalid_tool_call"
     max_steps = "max_steps"
+    max_tokens_exceeded = "max_tokens_exceeded"
     no_tool_call = "no_tool_call"
     tool_rule = "tool_rule"
     cancelled = "cancelled"
@@ -34,6 +35,8 @@ class StopReasonType(str, Enum):
             StopReasonType.no_tool_call,
             StopReasonType.invalid_llm_response,
             StopReasonType.llm_api_error,
+            # Treat context/token limit exhaustion as an error state (same as llm_api_error)
+            StopReasonType.max_tokens_exceeded,
             StopReasonType.context_window_overflow_in_system_prompt,
         ):
             return RunStatus.failed