From 6eeb3c90bb94d3eb1beab48d324190add45b7e23 Mon Sep 17 00:00:00 2001
From: Sarah Wooders <sarahwooders@gmail.com>
Date: Tue, 11 Nov 2025 14:49:15 -0800
Subject: [PATCH] feat: bring back model_settings and remove validation again  
 (#6104)

---
 fern/openapi.json                             | 400 +++++++++---------
 letta/orm/agent.py                            |   8 +-
 letta/schemas/agent.py                        |  37 +-
 letta/schemas/llm_config.py                   |  15 +-
 letta/schemas/model.py                        |  58 ++-
 letta/server/server.py                        |   7 +
 .../llm_model_configs/gemini-2.5-flash.json   |   2 +-
 tests/managers/test_agent_manager.py          |  15 +-
 tests/sdk_v1/agents_test.py                   |  26 +-
 9 files changed, 298 insertions(+), 270 deletions(-)

diff --git a/fern/openapi.json b/fern/openapi.json
index 617e19bf..e8d460e0 100644
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -18937,24 +18937,84 @@
           "model": {
             "anyOf": [
               {
-                "$ref": "#/components/schemas/ModelSettings"
+                "type": "string"
               },
               {
                 "type": "null"
               }
             ],
-            "description": "The model used by the agent."
+            "title": "Model",
+            "description": "The model handle used by the agent (format: provider/model-name)."
           },
           "embedding": {
             "anyOf": [
               {
-                "$ref": "#/components/schemas/EmbeddingModelSettings"
+                "type": "string"
               },
               {
                 "type": "null"
               }
             ],
-            "description": "The embedding model used by the agent."
+            "title": "Embedding",
+            "description": "The embedding model handle used by the agent (format: provider/model-name)."
+          },
+          "model_settings": {
+            "anyOf": [
+              {
+                "oneOf": [
+                  {
+                    "$ref": "#/components/schemas/OpenAIModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/AnthropicModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/GoogleAIModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/GoogleVertexModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/AzureModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/XAIModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/GroqModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/DeepseekModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/TogetherModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/BedrockModelSettings"
+                  }
+                ],
+                "discriminator": {
+                  "propertyName": "provider_type",
+                  "mapping": {
+                    "anthropic": "#/components/schemas/AnthropicModelSettings",
+                    "azure": "#/components/schemas/AzureModelSettings",
+                    "bedrock": "#/components/schemas/BedrockModelSettings",
+                    "deepseek": "#/components/schemas/DeepseekModelSettings",
+                    "google_ai": "#/components/schemas/GoogleAIModelSettings",
+                    "google_vertex": "#/components/schemas/GoogleVertexModelSettings",
+                    "groq": "#/components/schemas/GroqModelSettings",
+                    "openai": "#/components/schemas/OpenAIModelSettings",
+                    "together": "#/components/schemas/TogetherModelSettings",
+                    "xai": "#/components/schemas/XAIModelSettings"
+                  }
+                }
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Model Settings",
+            "description": "The model settings used by the agent."
           },
           "response_format": {
             "anyOf": [
@@ -19346,11 +19406,6 @@
       },
       "AnthropicModelSettings": {
         "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
           "max_output_tokens": {
             "type": "integer",
             "title": "Max Output Tokens",
@@ -19363,11 +19418,11 @@
             "description": "Whether to enable parallel tool calling.",
             "default": false
           },
-          "provider": {
+          "provider_type": {
             "type": "string",
             "const": "anthropic",
-            "title": "Provider",
-            "description": "The provider of the model.",
+            "title": "Provider Type",
+            "description": "The type of the provider.",
             "default": "anthropic"
           },
           "temperature": {
@@ -19399,7 +19454,6 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "AnthropicModelSettings"
       },
       "AnthropicThinking": {
@@ -20221,11 +20275,6 @@
       },
       "AzureModelSettings": {
         "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
           "max_output_tokens": {
             "type": "integer",
             "title": "Max Output Tokens",
@@ -20238,11 +20287,11 @@
             "description": "Whether to enable parallel tool calling.",
             "default": false
           },
-          "provider": {
+          "provider_type": {
             "type": "string",
             "const": "azure",
-            "title": "Provider",
-            "description": "The provider of the model.",
+            "title": "Provider Type",
+            "description": "The type of the provider.",
             "default": "azure"
           },
           "temperature": {
@@ -20283,7 +20332,6 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "AzureModelSettings",
         "description": "Azure OpenAI model configuration (OpenAI-compatible)."
       },
@@ -20540,11 +20588,6 @@
       },
       "BedrockModelSettings": {
         "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
           "max_output_tokens": {
             "type": "integer",
             "title": "Max Output Tokens",
@@ -20557,11 +20600,11 @@
             "description": "Whether to enable parallel tool calling.",
             "default": false
           },
-          "provider": {
+          "provider_type": {
             "type": "string",
             "const": "bedrock",
-            "title": "Provider",
-            "description": "The provider of the model.",
+            "title": "Provider Type",
+            "description": "The type of the provider.",
             "default": "bedrock"
           },
           "temperature": {
@@ -20602,7 +20645,6 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "BedrockModelSettings",
         "description": "AWS Bedrock model configuration."
       },
@@ -23159,6 +23201,27 @@
               {
                 "type": "string"
               },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Model",
+            "description": "The model handle for the agent to use (format: provider/model-name)."
+          },
+          "embedding": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Embedding",
+            "description": "The embedding model handle used by the agent (format: provider/model-name)."
+          },
+          "model_settings": {
+            "anyOf": [
               {
                 "oneOf": [
                   {
@@ -23193,7 +23256,7 @@
                   }
                 ],
                 "discriminator": {
-                  "propertyName": "provider",
+                  "propertyName": "provider_type",
                   "mapping": {
                     "anthropic": "#/components/schemas/AnthropicModelSettings",
                     "azure": "#/components/schemas/AzureModelSettings",
@@ -23212,23 +23275,8 @@
                 "type": "null"
               }
             ],
-            "title": "Model",
-            "description": "The model handle or model settings for the agent to use, specified either by a handle or an object. See the model schema for more information."
-          },
-          "embedding": {
-            "anyOf": [
-              {
-                "type": "string"
-              },
-              {
-                "$ref": "#/components/schemas/EmbeddingModelSettings"
-              },
-              {
-                "type": "null"
-              }
-            ],
-            "title": "Embedding",
-            "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name."
+            "title": "Model Settings",
+            "description": "The model settings for the agent."
           },
           "context_window_limit": {
             "anyOf": [
@@ -23987,11 +24035,6 @@
       },
       "DeepseekModelSettings": {
         "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
           "max_output_tokens": {
             "type": "integer",
             "title": "Max Output Tokens",
@@ -24004,11 +24047,11 @@
             "description": "Whether to enable parallel tool calling.",
             "default": false
           },
-          "provider": {
+          "provider_type": {
             "type": "string",
             "const": "deepseek",
-            "title": "Provider",
-            "description": "The provider of the model.",
+            "title": "Provider Type",
+            "description": "The type of the provider.",
             "default": "deepseek"
           },
           "temperature": {
@@ -24049,7 +24092,6 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "DeepseekModelSettings",
         "description": "Deepseek model configuration (OpenAI-compatible)."
       },
@@ -24578,25 +24620,6 @@
         ],
         "title": "EmbeddingModel"
       },
-      "EmbeddingModelSettings": {
-        "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
-          "provider": {
-            "type": "string",
-            "enum": ["openai", "ollama"],
-            "title": "Provider",
-            "description": "The provider of the model."
-          }
-        },
-        "type": "object",
-        "required": ["model", "provider"],
-        "title": "EmbeddingModelSettings",
-        "description": "Schema for defining settings for an embedding model"
-      },
       "EventMessage": {
         "properties": {
           "id": {
@@ -25746,11 +25769,6 @@
       },
       "GoogleAIModelSettings": {
         "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
           "max_output_tokens": {
             "type": "integer",
             "title": "Max Output Tokens",
@@ -25763,11 +25781,11 @@
             "description": "Whether to enable parallel tool calling.",
             "default": false
           },
-          "provider": {
+          "provider_type": {
             "type": "string",
             "const": "google_ai",
-            "title": "Provider",
-            "description": "The provider of the model.",
+            "title": "Provider Type",
+            "description": "The type of the provider.",
             "default": "google_ai"
           },
           "temperature": {
@@ -25816,16 +25834,10 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "GoogleAIModelSettings"
       },
       "GoogleVertexModelSettings": {
         "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
           "max_output_tokens": {
             "type": "integer",
             "title": "Max Output Tokens",
@@ -25838,11 +25850,11 @@
             "description": "Whether to enable parallel tool calling.",
             "default": false
           },
-          "provider": {
+          "provider_type": {
             "type": "string",
             "const": "google_vertex",
-            "title": "Provider",
-            "description": "The provider of the model.",
+            "title": "Provider Type",
+            "description": "The type of the provider.",
             "default": "google_vertex"
           },
           "temperature": {
@@ -25891,16 +25903,10 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "GoogleVertexModelSettings"
       },
       "GroqModelSettings": {
         "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
           "max_output_tokens": {
             "type": "integer",
             "title": "Max Output Tokens",
@@ -25913,11 +25919,11 @@
             "description": "Whether to enable parallel tool calling.",
             "default": false
           },
-          "provider": {
+          "provider_type": {
             "type": "string",
             "const": "groq",
-            "title": "Provider",
-            "description": "The provider of the model.",
+            "title": "Provider Type",
+            "description": "The type of the provider.",
             "default": "groq"
           },
           "temperature": {
@@ -25958,7 +25964,6 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "GroqModelSettings",
         "description": "Groq model configuration (OpenAI-compatible)."
       },
@@ -27361,6 +27366,27 @@
               {
                 "type": "string"
               },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Model",
+            "description": "The model handle for the agent to use (format: provider/model-name)."
+          },
+          "embedding": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Embedding",
+            "description": "The embedding model handle used by the agent (format: provider/model-name)."
+          },
+          "model_settings": {
+            "anyOf": [
               {
                 "oneOf": [
                   {
@@ -27395,7 +27421,7 @@
                   }
                 ],
                 "discriminator": {
-                  "propertyName": "provider",
+                  "propertyName": "provider_type",
                   "mapping": {
                     "anthropic": "#/components/schemas/AnthropicModelSettings",
                     "azure": "#/components/schemas/AzureModelSettings",
@@ -27414,23 +27440,8 @@
                 "type": "null"
               }
             ],
-            "title": "Model",
-            "description": "The model handle or model settings for the agent to use, specified either by a handle or an object. See the model schema for more information."
-          },
-          "embedding": {
-            "anyOf": [
-              {
-                "type": "string"
-              },
-              {
-                "$ref": "#/components/schemas/EmbeddingModelSettings"
-              },
-              {
-                "type": "null"
-              }
-            ],
-            "title": "Embedding",
-            "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name."
+            "title": "Model Settings",
+            "description": "The model settings for the agent."
           },
           "context_window_limit": {
             "anyOf": [
@@ -30628,31 +30639,6 @@
         ],
         "title": "Model"
       },
-      "ModelSettings": {
-        "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
-          "max_output_tokens": {
-            "type": "integer",
-            "title": "Max Output Tokens",
-            "description": "The maximum number of tokens the model can generate.",
-            "default": 4096
-          },
-          "parallel_tool_calls": {
-            "type": "boolean",
-            "title": "Parallel Tool Calls",
-            "description": "Whether to enable parallel tool calling.",
-            "default": false
-          }
-        },
-        "type": "object",
-        "required": ["model"],
-        "title": "ModelSettings",
-        "description": "Schema for defining settings for a model"
-      },
       "ModifyApprovalRequest": {
         "properties": {
           "requires_approval": {
@@ -30752,11 +30738,6 @@
       },
       "OpenAIModelSettings": {
         "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
           "max_output_tokens": {
             "type": "integer",
             "title": "Max Output Tokens",
@@ -30769,11 +30750,11 @@
             "description": "Whether to enable parallel tool calling.",
             "default": false
           },
-          "provider": {
+          "provider_type": {
             "type": "string",
             "const": "openai",
-            "title": "Provider",
-            "description": "The provider of the model.",
+            "title": "Provider Type",
+            "description": "The type of the provider.",
             "default": "openai"
           },
           "temperature": {
@@ -30821,7 +30802,6 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "OpenAIModelSettings"
       },
       "OpenAIReasoning": {
@@ -34397,11 +34377,6 @@
       },
       "TogetherModelSettings": {
         "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
           "max_output_tokens": {
             "type": "integer",
             "title": "Max Output Tokens",
@@ -34414,11 +34389,11 @@
             "description": "Whether to enable parallel tool calling.",
             "default": false
           },
-          "provider": {
+          "provider_type": {
             "type": "string",
             "const": "together",
-            "title": "Provider",
-            "description": "The provider of the model.",
+            "title": "Provider Type",
+            "description": "The type of the provider.",
             "default": "together"
           },
           "temperature": {
@@ -34459,7 +34434,6 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "TogetherModelSettings",
         "description": "Together AI model configuration (OpenAI-compatible)."
       },
@@ -36063,6 +36037,27 @@
               {
                 "type": "string"
               },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Model",
+            "description": "The model handle used by the agent (format: provider/model-name)."
+          },
+          "embedding": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Embedding",
+            "description": "The embedding model handle used by the agent (format: provider/model-name)."
+          },
+          "model_settings": {
+            "anyOf": [
               {
                 "oneOf": [
                   {
@@ -36097,7 +36092,7 @@
                   }
                 ],
                 "discriminator": {
-                  "propertyName": "provider",
+                  "propertyName": "provider_type",
                   "mapping": {
                     "anthropic": "#/components/schemas/AnthropicModelSettings",
                     "azure": "#/components/schemas/AzureModelSettings",
@@ -36116,23 +36111,8 @@
                 "type": "null"
               }
             ],
-            "title": "Model",
-            "description": "The model used by the agent, specified either by a handle or an object. See the model schema for more information."
-          },
-          "embedding": {
-            "anyOf": [
-              {
-                "type": "string"
-              },
-              {
-                "$ref": "#/components/schemas/EmbeddingModelSettings"
-              },
-              {
-                "type": "null"
-              }
-            ],
-            "title": "Embedding",
-            "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name."
+            "title": "Model Settings",
+            "description": "The model settings for the agent."
           },
           "context_window_limit": {
             "anyOf": [
@@ -36849,11 +36829,6 @@
       },
       "XAIModelSettings": {
         "properties": {
-          "model": {
-            "type": "string",
-            "title": "Model",
-            "description": "The name of the model."
-          },
           "max_output_tokens": {
             "type": "integer",
             "title": "Max Output Tokens",
@@ -36866,11 +36841,11 @@
             "description": "Whether to enable parallel tool calling.",
             "default": false
           },
-          "provider": {
+          "provider_type": {
             "type": "string",
             "const": "xai",
-            "title": "Provider",
-            "description": "The provider of the model.",
+            "title": "Provider Type",
+            "description": "The type of the provider.",
             "default": "xai"
           },
           "temperature": {
@@ -36911,7 +36886,6 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "XAIModelSettings",
         "description": "xAI model configuration (OpenAI-compatible)."
       },
@@ -37185,6 +37159,27 @@
               {
                 "type": "string"
               },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Model",
+            "description": "The model handle for the agent to use (format: provider/model-name)."
+          },
+          "embedding": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Embedding",
+            "description": "The embedding model handle used by the agent (format: provider/model-name)."
+          },
+          "model_settings": {
+            "anyOf": [
               {
                 "oneOf": [
                   {
@@ -37219,7 +37214,7 @@
                   }
                 ],
                 "discriminator": {
-                  "propertyName": "provider",
+                  "propertyName": "provider_type",
                   "mapping": {
                     "anthropic": "#/components/schemas/AnthropicModelSettings",
                     "azure": "#/components/schemas/AzureModelSettings",
@@ -37238,23 +37233,8 @@
                 "type": "null"
               }
             ],
-            "title": "Model",
-            "description": "The model handle or model settings for the agent to use, specified either by a handle or an object. See the model schema for more information."
-          },
-          "embedding": {
-            "anyOf": [
-              {
-                "type": "string"
-              },
-              {
-                "$ref": "#/components/schemas/EmbeddingModelSettings"
-              },
-              {
-                "type": "null"
-              }
-            ],
-            "title": "Embedding",
-            "description": "The embedding configuration handle used by the agent, specified in the format provider/model-name."
+            "title": "Model Settings",
+            "description": "The model settings for the agent."
           },
           "context_window_limit": {
             "anyOf": [
diff --git a/letta/orm/agent.py b/letta/orm/agent.py
index 22fd33de..01b3cbca 100644
--- a/letta/orm/agent.py
+++ b/letta/orm/agent.py
@@ -285,7 +285,9 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin
             if resolver:
                 state[field_name] = resolver()
 
-        state["model"] = self.llm_config._to_model() if self.llm_config else None
+        state["model"] = self.llm_config.handle if self.llm_config else None
+        state["model_settings"] = self.llm_config._to_model_settings() if self.llm_config else None
+        state["embedding"] = self.embedding_config.handle if self.embedding_config else None
 
         return self.__pydantic_model__(**state)
 
@@ -425,6 +427,8 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin
         state["managed_group"] = multi_agent_group
         state["tool_exec_environment_variables"] = tool_exec_environment_variables
         state["secrets"] = tool_exec_environment_variables
-        state["model"] = self.llm_config._to_model() if self.llm_config else None
+        state["model"] = self.llm_config.handle if self.llm_config else None
+        state["model_settings"] = self.llm_config._to_model_settings() if self.llm_config else None
+        state["embedding"] = self.embedding_config.handle if self.embedding_config else None
 
         return self.__pydantic_model__(**state)
diff --git a/letta/schemas/agent.py b/letta/schemas/agent.py
index 1f1aa7b3..b3fa1cab 100644
--- a/letta/schemas/agent.py
+++ b/letta/schemas/agent.py
@@ -5,7 +5,7 @@ from typing import Dict, List, Literal, Optional
 from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
 from letta.constants import CORE_MEMORY_LINE_NUMBER_WARNING, DEFAULT_EMBEDDING_CHUNK_SIZE
-from letta.errors import AgentExportProcessingError
+from letta.errors import AgentExportProcessingError, LettaInvalidArgumentError
 from letta.schemas.block import Block, CreateBlock
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import PrimitiveType
@@ -18,7 +18,7 @@ from letta.schemas.letta_stop_reason import StopReasonType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import Memory
 from letta.schemas.message import Message, MessageCreate
-from letta.schemas.model import EmbeddingModelSettings, ModelSettings, ModelSettingsUnion
+from letta.schemas.model import ModelSettingsUnion
 from letta.schemas.openai.chat_completion_response import UsageStatistics
 from letta.schemas.response_format import ResponseFormatUnion
 from letta.schemas.source import Source
@@ -83,8 +83,9 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
     embedding_config: EmbeddingConfig = Field(
         ..., description="Deprecated: Use `embedding` field instead. The embedding configuration used by the agent.", deprecated=True
     )
-    model: Optional[ModelSettings] = Field(None, description="The model used by the agent.")
-    embedding: Optional[EmbeddingModelSettings] = Field(None, description="The embedding model used by the agent.")
+    model: Optional[str] = Field(None, description="The model handle used by the agent (format: provider/model-name).")
+    embedding: Optional[str] = Field(None, description="The embedding model handle used by the agent (format: provider/model-name).")
+    model_settings: Optional[ModelSettingsUnion] = Field(None, description="The model settings used by the agent.")
 
     response_format: Optional[ResponseFormatUnion] = Field(
         None,
@@ -229,13 +230,12 @@ class CreateAgent(BaseModel, validate_assignment=True):  #
     embedding_config: Optional[EmbeddingConfig] = Field(
         None, description="Deprecated: Use `embedding` field instead. The embedding configuration used by the agent.", deprecated=True
     )
-    model: Optional[str | ModelSettingsUnion] = Field(  # TODO: make this required  (breaking change)
+    model: Optional[str] = Field(  # TODO: make this required  (breaking change)
         None,
-        description="The model handle or model settings for the agent to use, specified either by a handle or an object. See the model schema for more information.",
-    )
-    embedding: Optional[str | EmbeddingModelSettings] = Field(
-        None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
+        description="The model handle for the agent to use (format: provider/model-name).",
     )
+    embedding: Optional[str] = Field(None, description="The embedding model handle used by the agent (format: provider/model-name).")
+    model_settings: Optional[ModelSettingsUnion] = Field(None, description="The model settings for the agent.")
 
     context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.")
     embedding_chunk_size: Optional[int] = Field(
@@ -348,9 +348,12 @@ class CreateAgent(BaseModel, validate_assignment=True):  #
         if not model:
             return model
 
+        if "/" not in model:
+            raise LettaInvalidArgumentError("The model handle should be in the format provider/model-name", argument_name="model")
+
         provider_name, model_name = model.split("/", 1)
         if not provider_name or not model_name:
-            raise ValueError("The llm config handle should be in the format provider/model-name")
+            raise LettaInvalidArgumentError("The model handle should be in the format provider/model-name", argument_name="model")
 
         return model
 
@@ -360,9 +363,12 @@ class CreateAgent(BaseModel, validate_assignment=True):  #
         if not embedding:
             return embedding
 
+        if "/" not in embedding:
+            raise ValueError("The embedding handle should be in the format provider/model-name")
+
         provider_name, embedding_name = embedding.split("/", 1)
         if not provider_name or not embedding_name:
-            raise ValueError("The embedding config handle should be in the format provider/model-name")
+            raise ValueError("The embedding handle should be in the format provider/model-name")
 
         return embedding
 
@@ -410,13 +416,12 @@ class UpdateAgent(BaseModel):
     )
 
     # model configuration
-    model: Optional[str | ModelSettingsUnion ] = Field(
+    model: Optional[str] = Field(
         None,
-        description="The model used by the agent, specified either by a handle or an object. See the model schema for more information.",
-    )
-    embedding: Optional[str | EmbeddingModelSettings] = Field(
-        None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
+        description="The model handle used by the agent (format: provider/model-name).",
     )
+    embedding: Optional[str] = Field(None, description="The embedding model handle used by the agent (format: provider/model-name).")
+    model_settings: Optional[ModelSettingsUnion] = Field(None, description="The model settings for the agent.")
     context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.")
     reasoning: Optional[bool] = Field(
         None,
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index d586a0df..3c1c95fe 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -255,7 +255,7 @@ class LLMConfig(BaseModel):
             + (f" [ip={self.model_endpoint}]" if self.model_endpoint else "")
         )
 
-    def _to_model(self) -> "ModelSettings":
+    def _to_model_settings(self) -> "ModelSettings":
         """
         Convert LLMConfig back into a Model schema (OpenAIModelSettings, AnthropicModelSettings, etc.).
         This is the inverse of the _to_legacy_config_params() methods in model.py.
@@ -279,7 +279,6 @@ class LLMConfig(BaseModel):
 
         if self.model_endpoint_type == "openai":
             return OpenAIModelSettings(
-                model=self.model,
                 max_output_tokens=self.max_tokens or 4096,
                 temperature=self.temperature,
                 reasoning=OpenAIReasoning(reasoning_effort=self.reasoning_effort or "minimal"),
@@ -287,7 +286,6 @@ class LLMConfig(BaseModel):
         elif self.model_endpoint_type == "anthropic":
             thinking_type = "enabled" if self.enable_reasoner else "disabled"
             return AnthropicModelSettings(
-                model=self.model,
                 max_output_tokens=self.max_tokens or 4096,
                 temperature=self.temperature,
                 thinking=AnthropicThinking(type=thinking_type, budget_tokens=self.max_reasoning_tokens or 1024),
@@ -295,7 +293,6 @@ class LLMConfig(BaseModel):
             )
         elif self.model_endpoint_type == "google_ai":
             return GoogleAIModelSettings(
-                model=self.model,
                 max_output_tokens=self.max_tokens or 65536,
                 temperature=self.temperature,
                 thinking_config=GeminiThinkingConfig(
@@ -304,7 +301,6 @@ class LLMConfig(BaseModel):
             )
         elif self.model_endpoint_type == "google_vertex":
             return GoogleVertexModelSettings(
-                model=self.model,
                 max_output_tokens=self.max_tokens or 65536,
                 temperature=self.temperature,
                 thinking_config=GeminiThinkingConfig(
@@ -313,39 +309,34 @@ class LLMConfig(BaseModel):
             )
         elif self.model_endpoint_type == "azure":
             return AzureModelSettings(
-                model=self.model,
                 max_output_tokens=self.max_tokens or 4096,
                 temperature=self.temperature,
             )
         elif self.model_endpoint_type == "xai":
             return XAIModelSettings(
-                model=self.model,
                 max_output_tokens=self.max_tokens or 4096,
                 temperature=self.temperature,
             )
         elif self.model_endpoint_type == "groq":
             return GroqModelSettings(
-                model=self.model,
                 max_output_tokens=self.max_tokens or 4096,
                 temperature=self.temperature,
             )
         elif self.model_endpoint_type == "deepseek":
             return DeepseekModelSettings(
-                model=self.model,
                 max_output_tokens=self.max_tokens or 4096,
                 temperature=self.temperature,
             )
         elif self.model_endpoint_type == "together":
             return TogetherModelSettings(
-                model=self.model,
                 max_output_tokens=self.max_tokens or 4096,
                 temperature=self.temperature,
             )
         elif self.model_endpoint_type == "bedrock":
-            return Model(model=self.model, max_output_tokens=self.max_tokens or 4096)
+            return Model(max_output_tokens=self.max_tokens or 4096)
         else:
             # If we don't know the model type, use the default Model schema
-            return Model(model=self.model, max_output_tokens=self.max_tokens or 4096)
+            return Model(max_output_tokens=self.max_tokens or 4096)
 
     @classmethod
     def is_openai_reasoning_model(cls, config: "LLMConfig") -> bool:
diff --git a/letta/schemas/model.py b/letta/schemas/model.py
index 5bcb8e5f..3b8839cb 100644
--- a/letta/schemas/model.py
+++ b/letta/schemas/model.py
@@ -120,6 +120,25 @@ class Model(LLMConfig, ModelBase):
             provider_category=llm_config.provider_category,
         )
 
+    @property
+    def model_settings_schema(self) -> Optional[dict]:
+        """Returns the JSON schema for the ModelSettings class corresponding to this model's provider."""
+        PROVIDER_SETTINGS_MAP = {
+            ProviderType.openai: OpenAIModelSettings,
+            ProviderType.anthropic: AnthropicModelSettings,
+            ProviderType.google_ai: GoogleAIModelSettings,
+            ProviderType.google_vertex: GoogleVertexModelSettings,
+            ProviderType.azure: AzureModelSettings,
+            ProviderType.xai: XAIModelSettings,
+            ProviderType.groq: GroqModelSettings,
+            ProviderType.deepseek: DeepseekModelSettings,
+            ProviderType.together: TogetherModelSettings,
+            ProviderType.bedrock: BedrockModelSettings,
+        }
+
+        settings_class = PROVIDER_SETTINGS_MAP.get(self.provider_type)
+        return settings_class.model_json_schema() if settings_class else None
+
 
 class EmbeddingModel(EmbeddingConfig, ModelBase):
     model_type: Literal["embedding"] = Field("embedding", description="Type of model (llm or embedding)")
@@ -184,18 +203,11 @@ class EmbeddingModel(EmbeddingConfig, ModelBase):
 class ModelSettings(BaseModel):
     """Schema for defining settings for a model"""
 
-    model: str = Field(..., description="The name of the model.")
+    # model: str = Field(..., description="The name of the model.")
     max_output_tokens: int = Field(4096, description="The maximum number of tokens the model can generate.")
     parallel_tool_calls: bool = Field(False, description="Whether to enable parallel tool calling.")
 
 
-class EmbeddingModelSettings(BaseModel):
-    """Schema for defining settings for an embedding model"""
-
-    model: str = Field(..., description="The name of the model.")
-    provider: Literal["openai", "ollama"] = Field(..., description="The provider of the model.")
-
-
 class OpenAIReasoning(BaseModel):
     reasoning_effort: Literal["minimal", "low", "medium", "high"] = Field(
         "minimal", description="The reasoning effort to use when generating text reasoning models"
@@ -208,7 +220,7 @@ class OpenAIReasoning(BaseModel):
 
 
 class OpenAIModelSettings(ModelSettings):
-    provider: Literal["openai"] = Field("openai", description="The provider of the model.")
+    provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
     temperature: float = Field(0.7, description="The temperature of the model.")
     reasoning: OpenAIReasoning = Field(OpenAIReasoning(reasoning_effort="high"), description="The reasoning configuration for the model.")
     response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
@@ -228,6 +240,7 @@ class OpenAIModelSettings(ModelSettings):
             "max_tokens": self.max_output_tokens,
             "reasoning_effort": self.reasoning.reasoning_effort,
             "response_format": self.response_format,
+            "parallel_tool_calls": self.parallel_tool_calls,
         }
 
 
@@ -243,7 +256,7 @@ class AnthropicThinking(BaseModel):
 
 
 class AnthropicModelSettings(ModelSettings):
-    provider: Literal["anthropic"] = Field("anthropic", description="The provider of the model.")
+    provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
     temperature: float = Field(1.0, description="The temperature of the model.")
     thinking: AnthropicThinking = Field(
         AnthropicThinking(type="enabled", budget_tokens=1024), description="The thinking configuration for the model."
@@ -266,6 +279,7 @@ class AnthropicModelSettings(ModelSettings):
             "extended_thinking": self.thinking.type == "enabled",
             "thinking_budget_tokens": self.thinking.budget_tokens,
             "verbosity": self.verbosity,
+            "parallel_tool_calls": self.parallel_tool_calls,
         }
 
 
@@ -275,7 +289,7 @@ class GeminiThinkingConfig(BaseModel):
 
 
 class GoogleAIModelSettings(ModelSettings):
-    provider: Literal["google_ai"] = Field("google_ai", description="The provider of the model.")
+    provider_type: Literal[ProviderType.google_ai] = Field(ProviderType.google_ai, description="The type of the provider.")
     temperature: float = Field(0.7, description="The temperature of the model.")
     thinking_config: GeminiThinkingConfig = Field(
         GeminiThinkingConfig(include_thoughts=True, thinking_budget=1024), description="The thinking configuration for the model."
@@ -288,17 +302,18 @@ class GoogleAIModelSettings(ModelSettings):
             "temperature": self.temperature,
             "max_tokens": self.max_output_tokens,
             "max_reasoning_tokens": self.thinking_config.thinking_budget if self.thinking_config.include_thoughts else 0,
+            "parallel_tool_calls": self.parallel_tool_calls,
         }
 
 
 class GoogleVertexModelSettings(GoogleAIModelSettings):
-    provider: Literal["google_vertex"] = Field("google_vertex", description="The provider of the model.")
+    provider_type: Literal[ProviderType.google_vertex] = Field(ProviderType.google_vertex, description="The type of the provider.")
 
 
 class AzureModelSettings(ModelSettings):
     """Azure OpenAI model configuration (OpenAI-compatible)."""
 
-    provider: Literal["azure"] = Field("azure", description="The provider of the model.")
+    provider_type: Literal[ProviderType.azure] = Field(ProviderType.azure, description="The type of the provider.")
     temperature: float = Field(0.7, description="The temperature of the model.")
     response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
 
@@ -307,13 +322,14 @@ class AzureModelSettings(ModelSettings):
             "temperature": self.temperature,
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
+            "parallel_tool_calls": self.parallel_tool_calls,
         }
 
 
 class XAIModelSettings(ModelSettings):
     """xAI model configuration (OpenAI-compatible)."""
 
-    provider: Literal["xai"] = Field("xai", description="The provider of the model.")
+    provider_type: Literal[ProviderType.xai] = Field(ProviderType.xai, description="The type of the provider.")
     temperature: float = Field(0.7, description="The temperature of the model.")
     response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
 
@@ -322,13 +338,14 @@ class XAIModelSettings(ModelSettings):
             "temperature": self.temperature,
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
+            "parallel_tool_calls": self.parallel_tool_calls,
         }
 
 
 class GroqModelSettings(ModelSettings):
     """Groq model configuration (OpenAI-compatible)."""
 
-    provider: Literal["groq"] = Field("groq", description="The provider of the model.")
+    provider_type: Literal[ProviderType.groq] = Field(ProviderType.groq, description="The type of the provider.")
     temperature: float = Field(0.7, description="The temperature of the model.")
     response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
 
@@ -343,7 +360,7 @@ class GroqModelSettings(ModelSettings):
 class DeepseekModelSettings(ModelSettings):
     """Deepseek model configuration (OpenAI-compatible)."""
 
-    provider: Literal["deepseek"] = Field("deepseek", description="The provider of the model.")
+    provider_type: Literal[ProviderType.deepseek] = Field(ProviderType.deepseek, description="The type of the provider.")
     temperature: float = Field(0.7, description="The temperature of the model.")
     response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
 
@@ -352,13 +369,14 @@ class DeepseekModelSettings(ModelSettings):
             "temperature": self.temperature,
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
+            "parallel_tool_calls": self.parallel_tool_calls,
         }
 
 
 class TogetherModelSettings(ModelSettings):
     """Together AI model configuration (OpenAI-compatible)."""
 
-    provider: Literal["together"] = Field("together", description="The provider of the model.")
+    provider_type: Literal[ProviderType.together] = Field(ProviderType.together, description="The type of the provider.")
     temperature: float = Field(0.7, description="The temperature of the model.")
     response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
 
@@ -367,13 +385,14 @@ class TogetherModelSettings(ModelSettings):
             "temperature": self.temperature,
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
+            "parallel_tool_calls": self.parallel_tool_calls,
         }
 
 
 class BedrockModelSettings(ModelSettings):
     """AWS Bedrock model configuration."""
 
-    provider: Literal["bedrock"] = Field("bedrock", description="The provider of the model.")
+    provider_type: Literal[ProviderType.bedrock] = Field(ProviderType.bedrock, description="The type of the provider.")
     temperature: float = Field(0.7, description="The temperature of the model.")
     response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
 
@@ -382,6 +401,7 @@ class BedrockModelSettings(ModelSettings):
             "temperature": self.temperature,
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
+            "parallel_tool_calls": self.parallel_tool_calls,
         }
 
 
@@ -398,5 +418,5 @@ ModelSettingsUnion = Annotated[
         TogetherModelSettings,
         BedrockModelSettings,
     ],
-    Field(discriminator="provider"),
+    Field(discriminator="provider_type"),
 ]
diff --git a/letta/server/server.py b/letta/server/server.py
index f15e5c91..be188dac 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -436,6 +436,8 @@ class SyncServer(object):
                     handle = f"{request.model.provider}/{request.model.model}"
                     # TODO: figure out how to override various params
                     additional_config_params = request.model._to_legacy_config_params()
+                    additional_config_params["model"] = request.model.model
+                    additional_config_params["provider_name"] = request.model.provider
 
             config_params = {
                 "handle": handle,
@@ -525,6 +527,11 @@ class SyncServer(object):
             request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params)
             log_event(name="end get_cached_llm_config", attributes=config_params)
 
+        # update with model_settings
+        if request.model_settings is not None:
+            update_llm_config_params = request.model_settings._to_legacy_config_params()
+            request.llm_config.update(update_llm_config_params)
+
         # Copy parallel_tool_calls from request to llm_config if provided
         if request.parallel_tool_calls is not None:
             if request.llm_config is None:
diff --git a/tests/configs/llm_model_configs/gemini-2.5-flash.json b/tests/configs/llm_model_configs/gemini-2.5-flash.json
index 387f1eb7..51e58d2e 100644
--- a/tests/configs/llm_model_configs/gemini-2.5-flash.json
+++ b/tests/configs/llm_model_configs/gemini-2.5-flash.json
@@ -6,5 +6,5 @@
   "model_wrapper": null,
   "put_inner_thoughts_in_kwargs": true,
   "enable_reasoner": true,
-  "max_reasoning_tokens": 20000
+  "max_reasoning_tokens": 1000
 }
diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py
index 96056cc3..aa05c3df 100644
--- a/tests/managers/test_agent_manager.py
+++ b/tests/managers/test_agent_manager.py
@@ -1250,7 +1250,7 @@ async def test_agent_state_schema_unchanged(server: SyncServer):
     from letta.schemas.group import Group
     from letta.schemas.llm_config import LLMConfig
     from letta.schemas.memory import Memory
-    from letta.schemas.model import EmbeddingModelSettings, ModelSettings
+    from letta.schemas.model import ModelSettingsUnion
     from letta.schemas.response_format import ResponseFormatUnion
     from letta.schemas.source import Source
     from letta.schemas.tool import Tool
@@ -1271,9 +1271,10 @@ async def test_agent_state_schema_unchanged(server: SyncServer):
         "agent_type": AgentType,
         # LLM information
         "llm_config": LLMConfig,
-        "model": ModelSettings,
-        "embedding": EmbeddingModelSettings,
+        "model": str,
+        "embedding": str,
         "embedding_config": EmbeddingConfig,
+        "model_settings": (ModelSettingsUnion, type(None)),
         "response_format": (ResponseFormatUnion, type(None)),
         # State fields
         "description": (str, type(None)),
@@ -1378,6 +1379,14 @@ async def test_agent_state_schema_unchanged(server: SyncServer):
                     for arg in args:
                         if typing.get_origin(arg) is dict:
                             return True
+                # Handle Annotated types within Union (e.g., Union[Annotated[...], None])
+                # This checks if any of the union args is an Annotated type that matches expected
+                for arg in args:
+                    if typing.get_origin(arg) is typing.Annotated:
+                        # For Annotated types, compare the first argument (the actual type)
+                        annotated_args = typing.get_args(arg)
+                        if annotated_args and annotated_args[0] == expected:
+                            return True
 
             return False
 
diff --git a/tests/sdk_v1/agents_test.py b/tests/sdk_v1/agents_test.py
index 6eb7c7df..55501bef 100644
--- a/tests/sdk_v1/agents_test.py
+++ b/tests/sdk_v1/agents_test.py
@@ -5,11 +5,16 @@ AGENTS_CREATE_PARAMS = [
         "caren_agent",
         {"name": "caren", "model": "openai/gpt-4o-mini", "embedding": "openai/text-embedding-3-small"},
         {
-            # Verify model field contains the model name and settings
-            # Note: we override 'model' here since the input is a string but the output is a ModelSettings object
-            "model": {"model": "gpt-4o-mini", "max_output_tokens": 4096, "parallel_tool_calls": False},
-            # Note: we override 'embedding' here since it's currently not populated in AgentState (remains None)
-            "embedding": None,
+            # Verify model_settings is populated with config values
+            # Note: The 'model' field itself is separate from model_settings
+            "model_settings": {
+                "max_output_tokens": 4096,
+                "parallel_tool_calls": False,
+                "provider_type": "openai",
+                "temperature": 0.7,
+                "reasoning": {"reasoning_effort": "minimal"},
+                "response_format": None,
+            }
         },
         None,
     ),
@@ -20,8 +25,15 @@ AGENTS_MODIFY_PARAMS = [
         "caren_agent",
         {"name": "caren_updated"},
         {
-            # After modifying just the name, model field should still be present and unchanged
-            "model": {"model": "gpt-4o-mini", "max_output_tokens": 4096, "parallel_tool_calls": False}
+            # After modifying just the name, model_settings should still be present
+            "model_settings": {
+                "max_output_tokens": 4096,
+                "parallel_tool_calls": False,
+                "provider_type": "openai",
+                "temperature": 0.7,
+                "reasoning": {"reasoning_effort": "minimal"},
+                "response_format": None,
+            }
         },
         None,
     ),