From 8872a3b9545a87e49cb03aa00aaffc2397cfd31d Mon Sep 17 00:00:00 2001
From: Christina Tong <christina@letta.com>
Date: Thu, 23 Oct 2025 15:47:29 -0700
Subject: [PATCH] chore: add context_window_limit and max_tokens to UpdateAgent
 [LET-3743] [LET-3741] (#5710)

* chore: add context_window_limit and max_tokens to update agent [LET-3743]

* add generated sdk docs

* simplify comment
---
 fern/openapi.json      | 24 ++++++++++++++++++++++++
 letta/schemas/agent.py |  5 +++++
 letta/server/server.py | 17 +++++++++++++++--
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/fern/openapi.json b/fern/openapi.json
index debd8961..bdd5a7f1 100644
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -31467,6 +31467,30 @@
             "title": "Embedding",
             "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name."
           },
+          "context_window_limit": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Context Window Limit",
+            "description": "The context window limit used by the agent."
+          },
+          "max_tokens": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Max Tokens",
+            "description": "The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value."
+          },
           "reasoning": {
             "anyOf": [
               {
diff --git a/letta/schemas/agent.py b/letta/schemas/agent.py
index 844a282c..48912f8f 100644
--- a/letta/schemas/agent.py
+++ b/letta/schemas/agent.py
@@ -378,6 +378,11 @@ class UpdateAgent(BaseModel):
     embedding: Optional[str] = Field(
         None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
     )
+    context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.")
+    max_tokens: Optional[int] = Field(
+        None,
+        description="The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value.",
+    )
     reasoning: Optional[bool] = Field(None, description="Whether to enable reasoning for this agent.")
     enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.")
     response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.")
diff --git a/letta/server/server.py b/letta/server/server.py
index b6a010d0..f9686557 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -482,8 +482,21 @@ class SyncServer(object):
         request: UpdateAgent,
         actor: User,
     ) -> AgentState:
-        if request.model is not None:
-            request.llm_config = await self.get_llm_config_from_handle_async(handle=request.model, actor=actor)
+        # Build llm_config from convenience fields if llm_config is not provided
+        if request.llm_config is None and (
+            request.model is not None or request.context_window_limit is not None or request.max_tokens is not None
+        ):
+            if request.model is None:
+                agent = await self.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
+                request.model = agent.llm_config.handle
+            config_params = {
+                "handle": request.model,
+                "context_window_limit": request.context_window_limit,
+                "max_tokens": request.max_tokens,
+            }
+            log_event(name="start get_cached_llm_config", attributes=config_params)
+            request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params)
+            log_event(name="end get_cached_llm_config", attributes=config_params)
 
         if request.embedding is not None:
             request.embedding_config = await self.get_embedding_config_from_handle_async(handle=request.embedding, actor=actor)