From 8872a3b9545a87e49cb03aa00aaffc2397cfd31d Mon Sep 17 00:00:00 2001 From: Christina Tong Date: Thu, 23 Oct 2025 15:47:29 -0700 Subject: [PATCH] chore: add context_window_limit and max_tokens to UpdateAgent [LET-3743] [LET-3741] (#5710) * chore: add context_window_limit and max_tokens to update agent [LET-3743] * add generated sdk docs * simplify comment --- fern/openapi.json | 24 ++++++++++++++++++++++++ letta/schemas/agent.py | 5 +++++ letta/server/server.py | 17 +++++++++++++++-- 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/fern/openapi.json b/fern/openapi.json index debd8961..bdd5a7f1 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -31467,6 +31467,30 @@ "title": "Embedding", "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name." }, + "context_window_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Context Window Limit", + "description": "The context window limit used by the agent." + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tokens", + "description": "The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value." + }, "reasoning": { "anyOf": [ { diff --git a/letta/schemas/agent.py b/letta/schemas/agent.py index 844a282c..48912f8f 100644 --- a/letta/schemas/agent.py +++ b/letta/schemas/agent.py @@ -378,6 +378,11 @@ class UpdateAgent(BaseModel): embedding: Optional[str] = Field( None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name." ) + context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.") + max_tokens: Optional[int] = Field( + None, + description="The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value.", + ) reasoning: Optional[bool] = Field(None, description="Whether to enable reasoning for this agent.") enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.") response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.") diff --git a/letta/server/server.py b/letta/server/server.py index b6a010d0..f9686557 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -482,8 +482,21 @@ class SyncServer(object): request: UpdateAgent, actor: User, ) -> AgentState: - if request.model is not None: - request.llm_config = await self.get_llm_config_from_handle_async(handle=request.model, actor=actor) + # Build llm_config from convenience fields if llm_config is not provided + if request.llm_config is None and ( + request.model is not None or request.context_window_limit is not None or request.max_tokens is not None + ): + if request.model is None: + agent = await self.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor) + request.model = agent.llm_config.handle + config_params = { + "handle": request.model, + "context_window_limit": request.context_window_limit, + "max_tokens": request.max_tokens, + } + log_event(name="start get_cached_llm_config", attributes=config_params) + request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params) + log_event(name="end get_cached_llm_config", attributes=config_params) if request.embedding is not None: request.embedding_config = await self.get_embedding_config_from_handle_async(handle=request.embedding, actor=actor)