From 6eeb3c90bb94d3eb1beab48d324190add45b7e23 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Tue, 11 Nov 2025 14:49:15 -0800 Subject: [PATCH] feat: bring back model_settings and remove validation again (#6104) --- fern/openapi.json | 400 +++++++++--------- letta/orm/agent.py | 8 +- letta/schemas/agent.py | 37 +- letta/schemas/llm_config.py | 15 +- letta/schemas/model.py | 58 ++- letta/server/server.py | 7 + .../llm_model_configs/gemini-2.5-flash.json | 2 +- tests/managers/test_agent_manager.py | 15 +- tests/sdk_v1/agents_test.py | 26 +- 9 files changed, 298 insertions(+), 270 deletions(-) diff --git a/fern/openapi.json b/fern/openapi.json index 617e19bf..e8d460e0 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -18937,24 +18937,84 @@ "model": { "anyOf": [ { - "$ref": "#/components/schemas/ModelSettings" + "type": "string" }, { "type": "null" } ], - "description": "The model used by the agent." + "title": "Model", + "description": "The model handle used by the agent (format: provider/model-name)." }, "embedding": { "anyOf": [ { - "$ref": "#/components/schemas/EmbeddingModelSettings" + "type": "string" }, { "type": "null" } ], - "description": "The embedding model used by the agent." + "title": "Embedding", + "description": "The embedding model handle used by the agent (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIModelSettings" + }, + { + "$ref": "#/components/schemas/AnthropicModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleAIModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleVertexModelSettings" + }, + { + "$ref": "#/components/schemas/AzureModelSettings" + }, + { + "$ref": "#/components/schemas/XAIModelSettings" + }, + { + "$ref": "#/components/schemas/GroqModelSettings" + }, + { + "$ref": "#/components/schemas/DeepseekModelSettings" + }, + { + "$ref": "#/components/schemas/TogetherModelSettings" + }, + { + "$ref": "#/components/schemas/BedrockModelSettings" + } + ], + "discriminator": { + "propertyName": "provider_type", + "mapping": { + "anthropic": "#/components/schemas/AnthropicModelSettings", + "azure": "#/components/schemas/AzureModelSettings", + "bedrock": "#/components/schemas/BedrockModelSettings", + "deepseek": "#/components/schemas/DeepseekModelSettings", + "google_ai": "#/components/schemas/GoogleAIModelSettings", + "google_vertex": "#/components/schemas/GoogleVertexModelSettings", + "groq": "#/components/schemas/GroqModelSettings", + "openai": "#/components/schemas/OpenAIModelSettings", + "together": "#/components/schemas/TogetherModelSettings", + "xai": "#/components/schemas/XAIModelSettings" + } + } + }, + { + "type": "null" + } + ], + "title": "Model Settings", + "description": "The model settings used by the agent." }, "response_format": { "anyOf": [ @@ -19346,11 +19406,6 @@ }, "AnthropicModelSettings": { "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, "max_output_tokens": { "type": "integer", "title": "Max Output Tokens", @@ -19363,11 +19418,11 @@ "description": "Whether to enable parallel tool calling.", "default": false }, - "provider": { + "provider_type": { "type": "string", "const": "anthropic", - "title": "Provider", - "description": "The provider of the model.", + "title": "Provider Type", + "description": "The type of the provider.", "default": "anthropic" }, "temperature": { @@ -19399,7 +19454,6 @@ } }, "type": "object", - "required": ["model"], "title": "AnthropicModelSettings" }, "AnthropicThinking": { @@ -20221,11 +20275,6 @@ }, "AzureModelSettings": { "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, "max_output_tokens": { "type": "integer", "title": "Max Output Tokens", @@ -20238,11 +20287,11 @@ "description": "Whether to enable parallel tool calling.", "default": false }, - "provider": { + "provider_type": { "type": "string", "const": "azure", - "title": "Provider", - "description": "The provider of the model.", + "title": "Provider Type", + "description": "The type of the provider.", "default": "azure" }, "temperature": { @@ -20283,7 +20332,6 @@ } }, "type": "object", - "required": ["model"], "title": "AzureModelSettings", "description": "Azure OpenAI model configuration (OpenAI-compatible)." }, @@ -20540,11 +20588,6 @@ }, "BedrockModelSettings": { "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, "max_output_tokens": { "type": "integer", "title": "Max Output Tokens", @@ -20557,11 +20600,11 @@ "description": "Whether to enable parallel tool calling.", "default": false }, - "provider": { + "provider_type": { "type": "string", "const": "bedrock", - "title": "Provider", - "description": "The provider of the model.", + "title": "Provider Type", + "description": "The type of the provider.", "default": "bedrock" }, "temperature": { @@ -20602,7 +20645,6 @@ } }, "type": "object", - "required": ["model"], "title": "BedrockModelSettings", "description": "AWS Bedrock model configuration." }, @@ -23159,6 +23201,27 @@ { "type": "string" }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model handle for the agent to use (format: provider/model-name)." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The embedding model handle used by the agent (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ { "oneOf": [ { @@ -23193,7 +23256,7 @@ } ], "discriminator": { - "propertyName": "provider", + "propertyName": "provider_type", "mapping": { "anthropic": "#/components/schemas/AnthropicModelSettings", "azure": "#/components/schemas/AzureModelSettings", @@ -23212,23 +23275,8 @@ "type": "null" } ], - "title": "Model", - "description": "The model handle or model settings for the agent to use, specified either by a handle or an object. See the model schema for more information." - }, - "embedding": { - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/EmbeddingModelSettings" - }, - { - "type": "null" - } - ], - "title": "Embedding", - "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name." + "title": "Model Settings", + "description": "The model settings for the agent." }, "context_window_limit": { "anyOf": [ @@ -23987,11 +24035,6 @@ }, "DeepseekModelSettings": { "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, "max_output_tokens": { "type": "integer", "title": "Max Output Tokens", @@ -24004,11 +24047,11 @@ "description": "Whether to enable parallel tool calling.", "default": false }, - "provider": { + "provider_type": { "type": "string", "const": "deepseek", - "title": "Provider", - "description": "The provider of the model.", + "title": "Provider Type", + "description": "The type of the provider.", "default": "deepseek" }, "temperature": { @@ -24049,7 +24092,6 @@ } }, "type": "object", - "required": ["model"], "title": "DeepseekModelSettings", "description": "Deepseek model configuration (OpenAI-compatible)." }, @@ -24578,25 +24620,6 @@ ], "title": "EmbeddingModel" }, - "EmbeddingModelSettings": { - "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, - "provider": { - "type": "string", - "enum": ["openai", "ollama"], - "title": "Provider", - "description": "The provider of the model." - } - }, - "type": "object", - "required": ["model", "provider"], - "title": "EmbeddingModelSettings", - "description": "Schema for defining settings for an embedding model" - }, "EventMessage": { "properties": { "id": { @@ -25746,11 +25769,6 @@ }, "GoogleAIModelSettings": { "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, "max_output_tokens": { "type": "integer", "title": "Max Output Tokens", @@ -25763,11 +25781,11 @@ "description": "Whether to enable parallel tool calling.", "default": false }, - "provider": { + "provider_type": { "type": "string", "const": "google_ai", - "title": "Provider", - "description": "The provider of the model.", + "title": "Provider Type", + "description": "The type of the provider.", "default": "google_ai" }, "temperature": { @@ -25816,16 +25834,10 @@ } }, "type": "object", - "required": ["model"], "title": "GoogleAIModelSettings" }, "GoogleVertexModelSettings": { "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, "max_output_tokens": { "type": "integer", "title": "Max Output Tokens", @@ -25838,11 +25850,11 @@ "description": "Whether to enable parallel tool calling.", "default": false }, - "provider": { + "provider_type": { "type": "string", "const": "google_vertex", - "title": "Provider", - "description": "The provider of the model.", + "title": "Provider Type", + "description": "The type of the provider.", "default": "google_vertex" }, "temperature": { @@ -25891,16 +25903,10 @@ } }, "type": "object", - "required": ["model"], "title": "GoogleVertexModelSettings" }, "GroqModelSettings": { "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, "max_output_tokens": { "type": "integer", "title": "Max Output Tokens", @@ -25913,11 +25919,11 @@ "description": "Whether to enable parallel tool calling.", "default": false }, - "provider": { + "provider_type": { "type": "string", "const": "groq", - "title": "Provider", - "description": "The provider of the model.", + "title": "Provider Type", + "description": "The type of the provider.", "default": "groq" }, "temperature": { @@ -25958,7 +25964,6 @@ } }, "type": "object", - "required": ["model"], "title": "GroqModelSettings", "description": "Groq model configuration (OpenAI-compatible)." }, @@ -27361,6 +27366,27 @@ { "type": "string" }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model handle for the agent to use (format: provider/model-name)." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The embedding model handle used by the agent (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ { "oneOf": [ { @@ -27395,7 +27421,7 @@ } ], "discriminator": { - "propertyName": "provider", + "propertyName": "provider_type", "mapping": { "anthropic": "#/components/schemas/AnthropicModelSettings", "azure": "#/components/schemas/AzureModelSettings", @@ -27414,23 +27440,8 @@ "type": "null" } ], - "title": "Model", - "description": "The model handle or model settings for the agent to use, specified either by a handle or an object. See the model schema for more information." - }, - "embedding": { - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/EmbeddingModelSettings" - }, - { - "type": "null" - } - ], - "title": "Embedding", - "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name." + "title": "Model Settings", + "description": "The model settings for the agent." }, "context_window_limit": { "anyOf": [ @@ -30628,31 +30639,6 @@ ], "title": "Model" }, - "ModelSettings": { - "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, - "max_output_tokens": { - "type": "integer", - "title": "Max Output Tokens", - "description": "The maximum number of tokens the model can generate.", - "default": 4096 - }, - "parallel_tool_calls": { - "type": "boolean", - "title": "Parallel Tool Calls", - "description": "Whether to enable parallel tool calling.", - "default": false - } - }, - "type": "object", - "required": ["model"], - "title": "ModelSettings", - "description": "Schema for defining settings for a model" - }, "ModifyApprovalRequest": { "properties": { "requires_approval": { @@ -30752,11 +30738,6 @@ }, "OpenAIModelSettings": { "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, "max_output_tokens": { "type": "integer", "title": "Max Output Tokens", @@ -30769,11 +30750,11 @@ "description": "Whether to enable parallel tool calling.", "default": false }, - "provider": { + "provider_type": { "type": "string", "const": "openai", - "title": "Provider", - "description": "The provider of the model.", + "title": "Provider Type", + "description": "The type of the provider.", "default": "openai" }, "temperature": { @@ -30821,7 +30802,6 @@ } }, "type": "object", - "required": ["model"], "title": "OpenAIModelSettings" }, "OpenAIReasoning": { @@ -34397,11 +34377,6 @@ }, "TogetherModelSettings": { "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, "max_output_tokens": { "type": "integer", "title": "Max Output Tokens", @@ -34414,11 +34389,11 @@ "description": "Whether to enable parallel tool calling.", "default": false }, - "provider": { + "provider_type": { "type": "string", "const": "together", - "title": "Provider", - "description": "The provider of the model.", + "title": "Provider Type", + "description": "The type of the provider.", "default": "together" }, "temperature": { @@ -34459,7 +34434,6 @@ } }, "type": "object", - "required": ["model"], "title": "TogetherModelSettings", "description": "Together AI model configuration (OpenAI-compatible)." }, @@ -36063,6 +36037,27 @@ { "type": "string" }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model handle used by the agent (format: provider/model-name)." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The embedding model handle used by the agent (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ { "oneOf": [ { @@ -36097,7 +36092,7 @@ } ], "discriminator": { - "propertyName": "provider", + "propertyName": "provider_type", "mapping": { "anthropic": "#/components/schemas/AnthropicModelSettings", "azure": "#/components/schemas/AzureModelSettings", @@ -36116,23 +36111,8 @@ "type": "null" } ], - "title": "Model", - "description": "The model used by the agent, specified either by a handle or an object. See the model schema for more information." - }, - "embedding": { - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/EmbeddingModelSettings" - }, - { - "type": "null" - } - ], - "title": "Embedding", - "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name." + "title": "Model Settings", + "description": "The model settings for the agent." }, "context_window_limit": { "anyOf": [ @@ -36849,11 +36829,6 @@ }, "XAIModelSettings": { "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The name of the model." - }, "max_output_tokens": { "type": "integer", "title": "Max Output Tokens", @@ -36866,11 +36841,11 @@ "description": "Whether to enable parallel tool calling.", "default": false }, - "provider": { + "provider_type": { "type": "string", "const": "xai", - "title": "Provider", - "description": "The provider of the model.", + "title": "Provider Type", + "description": "The type of the provider.", "default": "xai" }, "temperature": { @@ -36911,7 +36886,6 @@ } }, "type": "object", - "required": ["model"], "title": "XAIModelSettings", "description": "xAI model configuration (OpenAI-compatible)." }, @@ -37185,6 +37159,27 @@ { "type": "string" }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The model handle for the agent to use (format: provider/model-name)." + }, + "embedding": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Embedding", + "description": "The embedding model handle used by the agent (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ { "oneOf": [ { @@ -37219,7 +37214,7 @@ } ], "discriminator": { - "propertyName": "provider", + "propertyName": "provider_type", "mapping": { "anthropic": "#/components/schemas/AnthropicModelSettings", "azure": "#/components/schemas/AzureModelSettings", @@ -37238,23 +37233,8 @@ "type": "null" } ], - "title": "Model", - "description": "The model handle or model settings for the agent to use, specified either by a handle or an object. See the model schema for more information." - }, - "embedding": { - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/EmbeddingModelSettings" - }, - { - "type": "null" - } - ], - "title": "Embedding", - "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name." + "title": "Model Settings", + "description": "The model settings for the agent." }, "context_window_limit": { "anyOf": [ diff --git a/letta/orm/agent.py b/letta/orm/agent.py index 22fd33de..01b3cbca 100644 --- a/letta/orm/agent.py +++ b/letta/orm/agent.py @@ -285,7 +285,9 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin if resolver: state[field_name] = resolver() - state["model"] = self.llm_config._to_model() if self.llm_config else None + state["model"] = self.llm_config.handle if self.llm_config else None + state["model_settings"] = self.llm_config._to_model_settings() if self.llm_config else None + state["embedding"] = self.embedding_config.handle if self.embedding_config else None return self.__pydantic_model__(**state) @@ -425,6 +427,8 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin state["managed_group"] = multi_agent_group state["tool_exec_environment_variables"] = tool_exec_environment_variables state["secrets"] = tool_exec_environment_variables - state["model"] = self.llm_config._to_model() if self.llm_config else None + state["model"] = self.llm_config.handle if self.llm_config else None + state["model_settings"] = self.llm_config._to_model_settings() if self.llm_config else None + state["embedding"] = self.embedding_config.handle if self.embedding_config else None return self.__pydantic_model__(**state) diff --git a/letta/schemas/agent.py b/letta/schemas/agent.py index 1f1aa7b3..b3fa1cab 100644 --- a/letta/schemas/agent.py +++ b/letta/schemas/agent.py @@ -5,7 +5,7 @@ from typing import Dict, List, Literal, Optional from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from letta.constants import CORE_MEMORY_LINE_NUMBER_WARNING, DEFAULT_EMBEDDING_CHUNK_SIZE -from letta.errors import AgentExportProcessingError +from letta.errors import AgentExportProcessingError, LettaInvalidArgumentError from letta.schemas.block import Block, CreateBlock from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.enums import PrimitiveType @@ -18,7 +18,7 @@ from letta.schemas.letta_stop_reason import StopReasonType from letta.schemas.llm_config import LLMConfig from letta.schemas.memory import Memory from letta.schemas.message import Message, MessageCreate -from letta.schemas.model import EmbeddingModelSettings, ModelSettings, ModelSettingsUnion +from letta.schemas.model import ModelSettingsUnion from letta.schemas.openai.chat_completion_response import UsageStatistics from letta.schemas.response_format import ResponseFormatUnion from letta.schemas.source import Source @@ -83,8 +83,9 @@ class AgentState(OrmMetadataBase, validate_assignment=True): embedding_config: EmbeddingConfig = Field( ..., description="Deprecated: Use `embedding` field instead. The embedding configuration used by the agent.", deprecated=True ) - model: Optional[ModelSettings] = Field(None, description="The model used by the agent.") - embedding: Optional[EmbeddingModelSettings] = Field(None, description="The embedding model used by the agent.") + model: Optional[str] = Field(None, description="The model handle used by the agent (format: provider/model-name).") + embedding: Optional[str] = Field(None, description="The embedding model handle used by the agent (format: provider/model-name).") + model_settings: Optional[ModelSettingsUnion] = Field(None, description="The model settings used by the agent.") response_format: Optional[ResponseFormatUnion] = Field( None, @@ -229,13 +230,12 @@ class CreateAgent(BaseModel, validate_assignment=True): # embedding_config: Optional[EmbeddingConfig] = Field( None, description="Deprecated: Use `embedding` field instead. The embedding configuration used by the agent.", deprecated=True ) - model: Optional[str | ModelSettingsUnion] = Field( # TODO: make this required (breaking change) + model: Optional[str] = Field( # TODO: make this required (breaking change) None, - description="The model handle or model settings for the agent to use, specified either by a handle or an object. See the model schema for more information.", - ) - embedding: Optional[str | EmbeddingModelSettings] = Field( - None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name." + description="The model handle for the agent to use (format: provider/model-name).", ) + embedding: Optional[str] = Field(None, description="The embedding model handle used by the agent (format: provider/model-name).") + model_settings: Optional[ModelSettingsUnion] = Field(None, description="The model settings for the agent.") context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.") embedding_chunk_size: Optional[int] = Field( @@ -348,9 +348,12 @@ class CreateAgent(BaseModel, validate_assignment=True): # if not model: return model + if "/" not in model: + raise LettaInvalidArgumentError("The model handle should be in the format provider/model-name", argument_name="model") + provider_name, model_name = model.split("/", 1) if not provider_name or not model_name: - raise ValueError("The llm config handle should be in the format provider/model-name") + raise LettaInvalidArgumentError("The model handle should be in the format provider/model-name", argument_name="model") return model @@ -360,9 +363,12 @@ class CreateAgent(BaseModel, validate_assignment=True): # if not embedding: return embedding + if "/" not in embedding: + raise ValueError("The embedding handle should be in the format provider/model-name") + provider_name, embedding_name = embedding.split("/", 1) if not provider_name or not embedding_name: - raise ValueError("The embedding config handle should be in the format provider/model-name") + raise ValueError("The embedding handle should be in the format provider/model-name") return embedding @@ -410,13 +416,12 @@ class UpdateAgent(BaseModel): ) # model configuration - model: Optional[str | ModelSettingsUnion ] = Field( + model: Optional[str] = Field( None, - description="The model used by the agent, specified either by a handle or an object. See the model schema for more information.", - ) - embedding: Optional[str | EmbeddingModelSettings] = Field( - None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name." + description="The model handle used by the agent (format: provider/model-name).", ) + embedding: Optional[str] = Field(None, description="The embedding model handle used by the agent (format: provider/model-name).") + model_settings: Optional[ModelSettingsUnion] = Field(None, description="The model settings for the agent.") context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.") reasoning: Optional[bool] = Field( None, diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index d586a0df..3c1c95fe 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -255,7 +255,7 @@ class LLMConfig(BaseModel): + (f" [ip={self.model_endpoint}]" if self.model_endpoint else "") ) - def _to_model(self) -> "ModelSettings": + def _to_model_settings(self) -> "ModelSettings": """ Convert LLMConfig back into a Model schema (OpenAIModelSettings, AnthropicModelSettings, etc.). This is the inverse of the _to_legacy_config_params() methods in model.py. @@ -279,7 +279,6 @@ class LLMConfig(BaseModel): if self.model_endpoint_type == "openai": return OpenAIModelSettings( - model=self.model, max_output_tokens=self.max_tokens or 4096, temperature=self.temperature, reasoning=OpenAIReasoning(reasoning_effort=self.reasoning_effort or "minimal"), @@ -287,7 +286,6 @@ class LLMConfig(BaseModel): elif self.model_endpoint_type == "anthropic": thinking_type = "enabled" if self.enable_reasoner else "disabled" return AnthropicModelSettings( - model=self.model, max_output_tokens=self.max_tokens or 4096, temperature=self.temperature, thinking=AnthropicThinking(type=thinking_type, budget_tokens=self.max_reasoning_tokens or 1024), @@ -295,7 +293,6 @@ class LLMConfig(BaseModel): ) elif self.model_endpoint_type == "google_ai": return GoogleAIModelSettings( - model=self.model, max_output_tokens=self.max_tokens or 65536, temperature=self.temperature, thinking_config=GeminiThinkingConfig( @@ -304,7 +301,6 @@ class LLMConfig(BaseModel): ) elif self.model_endpoint_type == "google_vertex": return GoogleVertexModelSettings( - model=self.model, max_output_tokens=self.max_tokens or 65536, temperature=self.temperature, thinking_config=GeminiThinkingConfig( @@ -313,39 +309,34 @@ class LLMConfig(BaseModel): ) elif self.model_endpoint_type == "azure": return AzureModelSettings( - model=self.model, max_output_tokens=self.max_tokens or 4096, temperature=self.temperature, ) elif self.model_endpoint_type == "xai": return XAIModelSettings( - model=self.model, max_output_tokens=self.max_tokens or 4096, temperature=self.temperature, ) elif self.model_endpoint_type == "groq": return GroqModelSettings( - model=self.model, max_output_tokens=self.max_tokens or 4096, temperature=self.temperature, ) elif self.model_endpoint_type == "deepseek": return DeepseekModelSettings( - model=self.model, max_output_tokens=self.max_tokens or 4096, temperature=self.temperature, ) elif self.model_endpoint_type == "together": return TogetherModelSettings( - model=self.model, max_output_tokens=self.max_tokens or 4096, temperature=self.temperature, ) elif self.model_endpoint_type == "bedrock": - return Model(model=self.model, max_output_tokens=self.max_tokens or 4096) + return Model(max_output_tokens=self.max_tokens or 4096) else: # If we don't know the model type, use the default Model schema - return Model(model=self.model, max_output_tokens=self.max_tokens or 4096) + return Model(max_output_tokens=self.max_tokens or 4096) @classmethod def is_openai_reasoning_model(cls, config: "LLMConfig") -> bool: diff --git a/letta/schemas/model.py b/letta/schemas/model.py index 5bcb8e5f..3b8839cb 100644 --- a/letta/schemas/model.py +++ b/letta/schemas/model.py @@ -120,6 +120,25 @@ class Model(LLMConfig, ModelBase): provider_category=llm_config.provider_category, ) + @property + def model_settings_schema(self) -> Optional[dict]: + """Returns the JSON schema for the ModelSettings class corresponding to this model's provider.""" + PROVIDER_SETTINGS_MAP = { + ProviderType.openai: OpenAIModelSettings, + ProviderType.anthropic: AnthropicModelSettings, + ProviderType.google_ai: GoogleAIModelSettings, + ProviderType.google_vertex: GoogleVertexModelSettings, + ProviderType.azure: AzureModelSettings, + ProviderType.xai: XAIModelSettings, + ProviderType.groq: GroqModelSettings, + ProviderType.deepseek: DeepseekModelSettings, + ProviderType.together: TogetherModelSettings, + ProviderType.bedrock: BedrockModelSettings, + } + + settings_class = PROVIDER_SETTINGS_MAP.get(self.provider_type) + return settings_class.model_json_schema() if settings_class else None + class EmbeddingModel(EmbeddingConfig, ModelBase): model_type: Literal["embedding"] = Field("embedding", description="Type of model (llm or embedding)") @@ -184,18 +203,11 @@ class EmbeddingModel(EmbeddingConfig, ModelBase): class ModelSettings(BaseModel): """Schema for defining settings for a model""" - model: str = Field(..., description="The name of the model.") + # model: str = Field(..., description="The name of the model.") max_output_tokens: int = Field(4096, description="The maximum number of tokens the model can generate.") parallel_tool_calls: bool = Field(False, description="Whether to enable parallel tool calling.") -class EmbeddingModelSettings(BaseModel): - """Schema for defining settings for an embedding model""" - - model: str = Field(..., description="The name of the model.") - provider: Literal["openai", "ollama"] = Field(..., description="The provider of the model.") - - class OpenAIReasoning(BaseModel): reasoning_effort: Literal["minimal", "low", "medium", "high"] = Field( "minimal", description="The reasoning effort to use when generating text reasoning models" @@ -208,7 +220,7 @@ class OpenAIReasoning(BaseModel): class OpenAIModelSettings(ModelSettings): - provider: Literal["openai"] = Field("openai", description="The provider of the model.") + provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.") temperature: float = Field(0.7, description="The temperature of the model.") reasoning: OpenAIReasoning = Field(OpenAIReasoning(reasoning_effort="high"), description="The reasoning configuration for the model.") response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.") @@ -228,6 +240,7 @@ class OpenAIModelSettings(ModelSettings): "max_tokens": self.max_output_tokens, "reasoning_effort": self.reasoning.reasoning_effort, "response_format": self.response_format, + "parallel_tool_calls": self.parallel_tool_calls, } @@ -243,7 +256,7 @@ class AnthropicThinking(BaseModel): class AnthropicModelSettings(ModelSettings): - provider: Literal["anthropic"] = Field("anthropic", description="The provider of the model.") + provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.") temperature: float = Field(1.0, description="The temperature of the model.") thinking: AnthropicThinking = Field( AnthropicThinking(type="enabled", budget_tokens=1024), description="The thinking configuration for the model." @@ -266,6 +279,7 @@ class AnthropicModelSettings(ModelSettings): "extended_thinking": self.thinking.type == "enabled", "thinking_budget_tokens": self.thinking.budget_tokens, "verbosity": self.verbosity, + "parallel_tool_calls": self.parallel_tool_calls, } @@ -275,7 +289,7 @@ class GeminiThinkingConfig(BaseModel): class GoogleAIModelSettings(ModelSettings): - provider: Literal["google_ai"] = Field("google_ai", description="The provider of the model.") + provider_type: Literal[ProviderType.google_ai] = Field(ProviderType.google_ai, description="The type of the provider.") temperature: float = Field(0.7, description="The temperature of the model.") thinking_config: GeminiThinkingConfig = Field( GeminiThinkingConfig(include_thoughts=True, thinking_budget=1024), description="The thinking configuration for the model." @@ -288,17 +302,18 @@ class GoogleAIModelSettings(ModelSettings): "temperature": self.temperature, "max_tokens": self.max_output_tokens, "max_reasoning_tokens": self.thinking_config.thinking_budget if self.thinking_config.include_thoughts else 0, + "parallel_tool_calls": self.parallel_tool_calls, } class GoogleVertexModelSettings(GoogleAIModelSettings): - provider: Literal["google_vertex"] = Field("google_vertex", description="The provider of the model.") + provider_type: Literal[ProviderType.google_vertex] = Field(ProviderType.google_vertex, description="The type of the provider.") class AzureModelSettings(ModelSettings): """Azure OpenAI model configuration (OpenAI-compatible).""" - provider: Literal["azure"] = Field("azure", description="The provider of the model.") + provider_type: Literal[ProviderType.azure] = Field(ProviderType.azure, description="The type of the provider.") temperature: float = Field(0.7, description="The temperature of the model.") response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.") @@ -307,13 +322,14 @@ class AzureModelSettings(ModelSettings): "temperature": self.temperature, "max_tokens": self.max_output_tokens, "response_format": self.response_format, + "parallel_tool_calls": self.parallel_tool_calls, } class XAIModelSettings(ModelSettings): """xAI model configuration (OpenAI-compatible).""" - provider: Literal["xai"] = Field("xai", description="The provider of the model.") + provider_type: Literal[ProviderType.xai] = Field(ProviderType.xai, description="The type of the provider.") temperature: float = Field(0.7, description="The temperature of the model.") response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.") @@ -322,13 +338,14 @@ class XAIModelSettings(ModelSettings): "temperature": self.temperature, "max_tokens": self.max_output_tokens, "response_format": self.response_format, + "parallel_tool_calls": self.parallel_tool_calls, } class GroqModelSettings(ModelSettings): """Groq model configuration (OpenAI-compatible).""" - provider: Literal["groq"] = Field("groq", description="The provider of the model.") + provider_type: Literal[ProviderType.groq] = Field(ProviderType.groq, description="The type of the provider.") temperature: float = Field(0.7, description="The temperature of the model.") response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.") @@ -343,7 +360,7 @@ class GroqModelSettings(ModelSettings): class DeepseekModelSettings(ModelSettings): """Deepseek model configuration (OpenAI-compatible).""" - provider: Literal["deepseek"] = Field("deepseek", description="The provider of the model.") + provider_type: Literal[ProviderType.deepseek] = Field(ProviderType.deepseek, description="The type of the provider.") temperature: float = Field(0.7, description="The temperature of the model.") response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.") @@ -352,13 +369,14 @@ class DeepseekModelSettings(ModelSettings): "temperature": self.temperature, "max_tokens": self.max_output_tokens, "response_format": self.response_format, + "parallel_tool_calls": self.parallel_tool_calls, } class TogetherModelSettings(ModelSettings): """Together AI model configuration (OpenAI-compatible).""" - provider: Literal["together"] = Field("together", description="The provider of the model.") + provider_type: Literal[ProviderType.together] = Field(ProviderType.together, description="The type of the provider.") temperature: float = Field(0.7, description="The temperature of the model.") response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.") @@ -367,13 +385,14 @@ class TogetherModelSettings(ModelSettings): "temperature": self.temperature, "max_tokens": self.max_output_tokens, "response_format": self.response_format, + "parallel_tool_calls": self.parallel_tool_calls, } class BedrockModelSettings(ModelSettings): """AWS Bedrock model configuration.""" - provider: Literal["bedrock"] = Field("bedrock", description="The provider of the model.") + provider_type: Literal[ProviderType.bedrock] = Field(ProviderType.bedrock, description="The type of the provider.") temperature: float = Field(0.7, description="The temperature of the model.") response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.") @@ -382,6 +401,7 @@ class BedrockModelSettings(ModelSettings): "temperature": self.temperature, "max_tokens": self.max_output_tokens, "response_format": self.response_format, + "parallel_tool_calls": self.parallel_tool_calls, } @@ -398,5 +418,5 @@ ModelSettingsUnion = Annotated[ TogetherModelSettings, BedrockModelSettings, ], - Field(discriminator="provider"), + Field(discriminator="provider_type"), ] diff --git a/letta/server/server.py b/letta/server/server.py index f15e5c91..be188dac 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -436,6 +436,8 @@ class SyncServer(object): handle = f"{request.model.provider}/{request.model.model}" # TODO: figure out how to override various params additional_config_params = request.model._to_legacy_config_params() + additional_config_params["model"] = request.model.model + additional_config_params["provider_name"] = request.model.provider config_params = { "handle": handle, @@ -525,6 +527,11 @@ class SyncServer(object): request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params) log_event(name="end get_cached_llm_config", attributes=config_params) + # update with model_settings + if request.model_settings is not None: + update_llm_config_params = request.model_settings._to_legacy_config_params() + request.llm_config.update(update_llm_config_params) + # Copy parallel_tool_calls from request to llm_config if provided if request.parallel_tool_calls is not None: if request.llm_config is None: diff --git a/tests/configs/llm_model_configs/gemini-2.5-flash.json b/tests/configs/llm_model_configs/gemini-2.5-flash.json index 387f1eb7..51e58d2e 100644 --- a/tests/configs/llm_model_configs/gemini-2.5-flash.json +++ b/tests/configs/llm_model_configs/gemini-2.5-flash.json @@ -6,5 +6,5 @@ "model_wrapper": null, "put_inner_thoughts_in_kwargs": true, "enable_reasoner": true, - "max_reasoning_tokens": 20000 + "max_reasoning_tokens": 1000 } diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py index 96056cc3..aa05c3df 100644 --- a/tests/managers/test_agent_manager.py +++ b/tests/managers/test_agent_manager.py @@ -1250,7 +1250,7 @@ async def test_agent_state_schema_unchanged(server: SyncServer): from letta.schemas.group import Group from letta.schemas.llm_config import LLMConfig from letta.schemas.memory import Memory - from letta.schemas.model import EmbeddingModelSettings, ModelSettings + from letta.schemas.model import ModelSettingsUnion from letta.schemas.response_format import ResponseFormatUnion from letta.schemas.source import Source from letta.schemas.tool import Tool @@ -1271,9 +1271,10 @@ async def test_agent_state_schema_unchanged(server: SyncServer): "agent_type": AgentType, # LLM information "llm_config": LLMConfig, - "model": ModelSettings, - "embedding": EmbeddingModelSettings, + "model": str, + "embedding": str, "embedding_config": EmbeddingConfig, + "model_settings": (ModelSettingsUnion, type(None)), "response_format": (ResponseFormatUnion, type(None)), # State fields "description": (str, type(None)), @@ -1378,6 +1379,14 @@ async def test_agent_state_schema_unchanged(server: SyncServer): for arg in args: if typing.get_origin(arg) is dict: return True + # Handle Annotated types within Union (e.g., Union[Annotated[...], None]) + # This checks if any of the union args is an Annotated type that matches expected + for arg in args: + if typing.get_origin(arg) is typing.Annotated: + # For Annotated types, compare the first argument (the actual type) + annotated_args = typing.get_args(arg) + if annotated_args and annotated_args[0] == expected: + return True return False diff --git a/tests/sdk_v1/agents_test.py b/tests/sdk_v1/agents_test.py index 6eb7c7df..55501bef 100644 --- a/tests/sdk_v1/agents_test.py +++ b/tests/sdk_v1/agents_test.py @@ -5,11 +5,16 @@ AGENTS_CREATE_PARAMS = [ "caren_agent", {"name": "caren", "model": "openai/gpt-4o-mini", "embedding": "openai/text-embedding-3-small"}, { - # Verify model field contains the model name and settings - # Note: we override 'model' here since the input is a string but the output is a ModelSettings object - "model": {"model": "gpt-4o-mini", "max_output_tokens": 4096, "parallel_tool_calls": False}, - # Note: we override 'embedding' here since it's currently not populated in AgentState (remains None) - "embedding": None, + # Verify model_settings is populated with config values + # Note: The 'model' field itself is separate from model_settings + "model_settings": { + "max_output_tokens": 4096, + "parallel_tool_calls": False, + "provider_type": "openai", + "temperature": 0.7, + "reasoning": {"reasoning_effort": "minimal"}, + "response_format": None, + } }, None, ), @@ -20,8 +25,15 @@ AGENTS_MODIFY_PARAMS = [ "caren_agent", {"name": "caren_updated"}, { - # After modifying just the name, model field should still be present and unchanged - "model": {"model": "gpt-4o-mini", "max_output_tokens": 4096, "parallel_tool_calls": False} + # After modifying just the name, model_settings should still be present + "model_settings": { + "max_output_tokens": 4096, + "parallel_tool_calls": False, + "provider_type": "openai", + "temperature": 0.7, + "reasoning": {"reasoning_effort": "minimal"}, + "response_format": None, + } }, None, ),