chore: add context_window_limit and max_tokens to UpdateAgent [LET-3743] [LET-3741] (#5710)

* chore: add context_window_limit and max_tokens to update agent [LET-3743] * add generated sdk docs * simplify comment
2025-10-23 15:47:29 -07:00
parent 4823416af9
commit 8872a3b954
3 changed files with 44 additions and 2 deletions
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -31467,6 +31467,30 @@
            "title": "Embedding",
            "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name."
          },
+          "context_window_limit": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Context Window Limit",
+            "description": "The context window limit used by the agent."
+          },
+          "max_tokens": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Max Tokens",
+            "description": "The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value."
+          },
          "reasoning": {
            "anyOf": [
              {
--- a/letta/schemas/agent.py
+++ b/letta/schemas/agent.py
@@ -378,6 +378,11 @@ class UpdateAgent(BaseModel):
    embedding: Optional[str] = Field(
        None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
    )
+    context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.")
+    max_tokens: Optional[int] = Field(
+        None,
+        description="The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value.",
+    )
    reasoning: Optional[bool] = Field(None, description="Whether to enable reasoning for this agent.")
    enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.")
    response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.")
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -482,8 +482,21 @@ class SyncServer(object):
        request: UpdateAgent,
        actor: User,
    ) -> AgentState:
-        if request.model is not None:
-            request.llm_config = await self.get_llm_config_from_handle_async(handle=request.model, actor=actor)
+        # Build llm_config from convenience fields if llm_config is not provided
+        if request.llm_config is None and (
+            request.model is not None or request.context_window_limit is not None or request.max_tokens is not None
+        ):
+            if request.model is None:
+                agent = await self.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
+                request.model = agent.llm_config.handle
+            config_params = {
+                "handle": request.model,
+                "context_window_limit": request.context_window_limit,
+                "max_tokens": request.max_tokens,
+            }
+            log_event(name="start get_cached_llm_config", attributes=config_params)
+            request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params)
+            log_event(name="end get_cached_llm_config", attributes=config_params)

        if request.embedding is not None:
            request.embedding_config = await self.get_embedding_config_from_handle_async(handle=request.embedding, actor=actor)