Revert "feat: provider-specific model configuration " (#5873)
Revert "feat: provider-specific model configuration (#5774)" This reverts commit 34a334949a3ef72cd49ff0ca3da9e85d16daa57c.
This commit is contained in:
committed by
Caren Thomas
parent
d3ab51b822
commit
cfeed463a9
1403
fern/openapi.json
1403
fern/openapi.json
File diff suppressed because it is too large
Load Diff
@@ -280,8 +280,6 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin
|
||||
if resolver:
|
||||
state[field_name] = resolver()
|
||||
|
||||
state["model"] = self.llm_config._to_model() if self.llm_config else None
|
||||
|
||||
return self.__pydantic_model__(**state)
|
||||
|
||||
async def to_pydantic_async(
|
||||
@@ -419,6 +417,5 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin
|
||||
state["managed_group"] = multi_agent_group
|
||||
state["tool_exec_environment_variables"] = tool_exec_environment_variables
|
||||
state["secrets"] = tool_exec_environment_variables
|
||||
state["model"] = self.llm_config._to_model() if self.llm_config else None
|
||||
|
||||
return self.__pydantic_model__(**state)
|
||||
|
||||
@@ -17,7 +17,6 @@ from letta.schemas.letta_base import OrmMetadataBase
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.memory import Memory
|
||||
from letta.schemas.message import Message, MessageCreate
|
||||
from letta.schemas.model import EmbeddingModelSettings, ModelSettings
|
||||
from letta.schemas.openai.chat_completion_response import UsageStatistics
|
||||
from letta.schemas.response_format import ResponseFormatUnion
|
||||
from letta.schemas.source import Source
|
||||
@@ -88,19 +87,11 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
|
||||
# agent configuration
|
||||
agent_type: AgentType = Field(..., description="The type of agent.")
|
||||
|
||||
# model information
|
||||
llm_config: LLMConfig = Field(
|
||||
..., description="Deprecated: Use `model` field instead. The LLM configuration used by the agent.", deprecated=True
|
||||
)
|
||||
embedding_config: EmbeddingConfig = Field(
|
||||
..., description="Deprecated: Use `embedding` field instead. The embedding configuration used by the agent.", deprecated=True
|
||||
)
|
||||
model: Optional[ModelSettings] = Field(None, description="The model used by the agent.")
|
||||
embedding: Optional[EmbeddingModelSettings] = Field(None, description="The embedding model used by the agent.")
|
||||
|
||||
# llm information
|
||||
llm_config: LLMConfig = Field(..., description="The LLM configuration used by the agent.")
|
||||
embedding_config: EmbeddingConfig = Field(..., description="The embedding configuration used by the agent.")
|
||||
response_format: Optional[ResponseFormatUnion] = Field(
|
||||
None,
|
||||
description="The response format used by the agent",
|
||||
None, description="The response format used by the agent when returning from `send_message`."
|
||||
)
|
||||
|
||||
# This is an object representing the in-process state of a running `Agent`
|
||||
@@ -108,7 +99,7 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
|
||||
description: Optional[str] = Field(None, description="The description of the agent.")
|
||||
metadata: Optional[Dict] = Field(None, description="The metadata of the agent.")
|
||||
|
||||
memory: Memory = Field(..., description="Deprecated: Use `blocks` field instead. The in-context memory of the agent.", deprecated=True)
|
||||
memory: Memory = Field(..., description="The in-context memory of the agent.", deprecated=True)
|
||||
blocks: List[Block] = Field(..., description="The memory blocks used by the agent.")
|
||||
tools: List[Tool] = Field(..., description="The tools used by the agent.")
|
||||
sources: List[Source] = Field(..., description="The sources used by the agent.")
|
||||
@@ -126,9 +117,7 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
|
||||
base_template_id: Optional[str] = Field(None, description="The base template id of the agent.")
|
||||
deployment_id: Optional[str] = Field(None, description="The id of the deployment.")
|
||||
entity_id: Optional[str] = Field(None, description="The id of the entity within the template.")
|
||||
identity_ids: List[str] = Field(
|
||||
[], description="Deprecated: Use `identities` field instead. The ids of the identities associated with this agent.", deprecated=True
|
||||
)
|
||||
identity_ids: List[str] = Field([], description="The ids of the identities associated with this agent.", deprecated=True)
|
||||
identities: List[Identity] = Field([], description="The identities associated with this agent.")
|
||||
|
||||
# An advanced configuration that makes it so this agent does not remember any previous messages
|
||||
@@ -141,9 +130,7 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
|
||||
description="If set to True, memory management will move to a background agent thread.",
|
||||
)
|
||||
|
||||
multi_agent_group: Optional[Group] = Field(
|
||||
None, description="Deprecated: Use `managed_group` field instead. The multi-agent group that this agent manages.", deprecated=True
|
||||
)
|
||||
multi_agent_group: Optional[Group] = Field(None, description="The multi-agent group that this agent manages", deprecated=True)
|
||||
managed_group: Optional[Group] = Field(None, description="The multi-agent group that this agent manages")
|
||||
# Run metrics
|
||||
last_run_completion: Optional[datetime] = Field(None, description="The timestamp when the agent last completed a run.")
|
||||
@@ -215,6 +202,8 @@ class CreateAgent(BaseModel, validate_assignment=True): #
|
||||
tags: Optional[List[str]] = Field(None, description="The tags associated with the agent.")
|
||||
system: Optional[str] = Field(None, description="The system prompt used by the agent.")
|
||||
agent_type: AgentType = Field(default_factory=lambda: AgentType.memgpt_v2_agent, description="The type of agent.")
|
||||
llm_config: Optional[LLMConfig] = Field(None, description="The LLM configuration used by the agent.")
|
||||
embedding_config: Optional[EmbeddingConfig] = Field(None, description="The embedding configuration used by the agent.")
|
||||
# Note: if this is None, then we'll populate with the standard "more human than human" initial message sequence
|
||||
# If the client wants to make this empty, then the client can set the arg to an empty list
|
||||
initial_message_sequence: Optional[List[MessageCreate]] = Field(
|
||||
@@ -227,78 +216,43 @@ class CreateAgent(BaseModel, validate_assignment=True): #
|
||||
include_base_tool_rules: Optional[bool] = Field(
|
||||
None, description="If true, attaches the Letta base tool rules (e.g. deny all tools not explicitly allowed)."
|
||||
)
|
||||
include_default_source: bool = Field( # TODO: get rid of this
|
||||
False, description="If true, automatically creates and attaches a default data source for this agent.", deprecated=True
|
||||
include_default_source: bool = Field(
|
||||
False, description="If true, automatically creates and attaches a default data source for this agent."
|
||||
)
|
||||
description: Optional[str] = Field(None, description="The description of the agent.")
|
||||
metadata: Optional[Dict] = Field(None, description="The metadata of the agent.")
|
||||
|
||||
# model configuration
|
||||
llm_config: Optional[LLMConfig] = Field(
|
||||
None, description="Deprecated: Use `model` field instead. The LLM configuration used by the agent.", deprecated=True
|
||||
)
|
||||
embedding_config: Optional[EmbeddingConfig] = Field(
|
||||
None, description="Deprecated: Use `embedding` field instead. The embedding configuration used by the agent.", deprecated=True
|
||||
)
|
||||
model: Optional[str | ModelSettings] = Field( # TODO: make this required (breaking change)
|
||||
model: Optional[str] = Field(
|
||||
None,
|
||||
description="The model handle or model settings for the agent to use, specified either by a handle or an object. See the model schema for more information.",
|
||||
description="The LLM configuration handle used by the agent, specified in the format "
|
||||
"provider/model-name, as an alternative to specifying llm_config.",
|
||||
)
|
||||
embedding: Optional[str | EmbeddingModelSettings] = Field(
|
||||
embedding: Optional[str] = Field(
|
||||
None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
|
||||
)
|
||||
|
||||
context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.")
|
||||
embedding_chunk_size: Optional[int] = Field(
|
||||
DEFAULT_EMBEDDING_CHUNK_SIZE, description="Deprecated: No longer used. The embedding chunk size used by the agent.", deprecated=True
|
||||
)
|
||||
embedding_chunk_size: Optional[int] = Field(DEFAULT_EMBEDDING_CHUNK_SIZE, description="The embedding chunk size used by the agent.")
|
||||
max_tokens: Optional[int] = Field(
|
||||
None,
|
||||
description="Deprecated: Use `model` field to configure max output tokens instead. The maximum number of tokens to generate, including reasoning step.",
|
||||
deprecated=True,
|
||||
description="The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value.",
|
||||
)
|
||||
max_reasoning_tokens: Optional[int] = Field(
|
||||
None,
|
||||
description="Deprecated: Use `model` field to configure reasoning tokens instead. The maximum number of tokens to generate for reasoning step.",
|
||||
deprecated=True,
|
||||
None, description="The maximum number of tokens to generate for reasoning step. If not set, the model will use its default value."
|
||||
)
|
||||
enable_reasoner: Optional[bool] = Field(
|
||||
True,
|
||||
description="Deprecated: Use `model` field to configure reasoning instead. Whether to enable internal extended thinking step for a reasoner model.",
|
||||
deprecated=True,
|
||||
)
|
||||
reasoning: Optional[bool] = Field(
|
||||
None,
|
||||
description="Deprecated: Use `model` field to configure reasoning instead. Whether to enable reasoning for this agent.",
|
||||
deprecated=True,
|
||||
)
|
||||
from_template: Optional[str] = Field(
|
||||
None, description="Deprecated: please use the 'create agents from a template' endpoint instead.", deprecated=True
|
||||
)
|
||||
template: bool = Field(False, description="Deprecated: No longer used.", deprecated=True)
|
||||
enable_reasoner: Optional[bool] = Field(True, description="Whether to enable internal extended thinking step for a reasoner model.")
|
||||
reasoning: Optional[bool] = Field(None, description="Whether to enable reasoning for this agent.")
|
||||
from_template: Optional[str] = Field(None, description="Deprecated: please use the 'create agents from a template' endpoint instead.")
|
||||
template: bool = Field(False, description="Deprecated: No longer used")
|
||||
project: Optional[str] = Field(
|
||||
None,
|
||||
deprecated=True,
|
||||
description="Deprecated: Project should now be passed via the X-Project header instead of in the request body. If using the SDK, this can be done via the x_project parameter.",
|
||||
)
|
||||
tool_exec_environment_variables: Optional[Dict[str, str]] = Field(
|
||||
None, description="Deprecated: Use `secrets` field instead. Environment variables for tool execution.", deprecated=True
|
||||
description="Deprecated: Project should now be passed via the X-Project header instead of in the request body. If using the sdk, this can be done via the new x_project field below.",
|
||||
)
|
||||
tool_exec_environment_variables: Optional[Dict[str, str]] = Field(None, description="Deprecated: use `secrets` field instead.")
|
||||
secrets: Optional[Dict[str, str]] = Field(None, description="The environment variables for tool execution specific to this agent.")
|
||||
memory_variables: Optional[Dict[str, str]] = Field(
|
||||
None,
|
||||
description="Deprecated: Only relevant for creating agents from a template. Use the 'create agents from a template' endpoint instead.",
|
||||
deprecated=True,
|
||||
)
|
||||
project_id: Optional[str] = Field(
|
||||
None, description="Deprecated: No longer used. The id of the project the agent belongs to.", deprecated=True
|
||||
)
|
||||
template_id: Optional[str] = Field(
|
||||
None, description="Deprecated: No longer used. The id of the template the agent belongs to.", deprecated=True
|
||||
)
|
||||
base_template_id: Optional[str] = Field(
|
||||
None, description="Deprecated: No longer used. The base template id of the agent.", deprecated=True
|
||||
)
|
||||
memory_variables: Optional[Dict[str, str]] = Field(None, description="The variables that should be set for the agent.")
|
||||
project_id: Optional[str] = Field(None, description="The id of the project the agent belongs to.")
|
||||
template_id: Optional[str] = Field(None, description="The id of the template the agent belongs to.")
|
||||
base_template_id: Optional[str] = Field(None, description="The base template id of the agent.")
|
||||
identity_ids: Optional[List[str]] = Field(None, description="The ids of the identities associated with this agent.")
|
||||
message_buffer_autoclear: bool = Field(
|
||||
False,
|
||||
@@ -317,14 +271,9 @@ class CreateAgent(BaseModel, validate_assignment=True): #
|
||||
)
|
||||
hidden: Optional[bool] = Field(
|
||||
None,
|
||||
description="Deprecated: No longer used. If set to True, the agent will be hidden.",
|
||||
deprecated=True,
|
||||
)
|
||||
parallel_tool_calls: Optional[bool] = Field(
|
||||
False,
|
||||
description="Deprecated: Use `model` field to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
|
||||
deprecated=True,
|
||||
description="If set to True, the agent will be hidden.",
|
||||
)
|
||||
parallel_tool_calls: Optional[bool] = Field(False, description="If set to True, enables parallel tool calling. Defaults to False.")
|
||||
|
||||
@field_validator("name")
|
||||
@classmethod
|
||||
@@ -406,6 +355,8 @@ class UpdateAgent(BaseModel):
|
||||
tags: Optional[List[str]] = Field(None, description="The tags associated with the agent.")
|
||||
system: Optional[str] = Field(None, description="The system prompt used by the agent.")
|
||||
tool_rules: Optional[List[ToolRule]] = Field(None, description="The tool rules governing the agent.")
|
||||
llm_config: Optional[LLMConfig] = Field(None, description="The LLM configuration used by the agent.")
|
||||
embedding_config: Optional[EmbeddingConfig] = Field(None, description="The embedding configuration used by the agent.")
|
||||
message_ids: Optional[List[str]] = Field(None, description="The ids of the messages in the agent's in-context memory.")
|
||||
description: Optional[str] = Field(None, description="The description of the agent.")
|
||||
metadata: Optional[Dict] = Field(None, description="The metadata of the agent.")
|
||||
@@ -419,42 +370,22 @@ class UpdateAgent(BaseModel):
|
||||
None,
|
||||
description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.",
|
||||
)
|
||||
|
||||
# model configuration
|
||||
model: Optional[str | ModelSettings] = Field(
|
||||
model: Optional[str] = Field(
|
||||
None,
|
||||
description="The model used by the agent, specified either by a handle or an object. See the model schema for more information.",
|
||||
description="The LLM configuration handle used by the agent, specified in the format "
|
||||
"provider/model-name, as an alternative to specifying llm_config.",
|
||||
)
|
||||
embedding: Optional[str | EmbeddingModelSettings] = Field(
|
||||
embedding: Optional[str] = Field(
|
||||
None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
|
||||
)
|
||||
context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.")
|
||||
reasoning: Optional[bool] = Field(
|
||||
None,
|
||||
description="Deprecated: Use `model` field to configure reasoning instead. Whether to enable reasoning for this agent.",
|
||||
deprecated=True,
|
||||
)
|
||||
llm_config: Optional[LLMConfig] = Field(
|
||||
None, description="Deprecated: Use `model` field instead. The LLM configuration used by the agent.", deprecated=True
|
||||
)
|
||||
embedding_config: Optional[EmbeddingConfig] = Field(None, description="The embedding configuration used by the agent.")
|
||||
parallel_tool_calls: Optional[bool] = Field(
|
||||
False,
|
||||
description="Deprecated: Use `model` field to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
|
||||
deprecated=True,
|
||||
)
|
||||
response_format: Optional[ResponseFormatUnion] = Field(
|
||||
None,
|
||||
description="Deprecated: Use `model` field to configure response format instead. The response format for the agent.",
|
||||
deprecated=True,
|
||||
)
|
||||
max_tokens: Optional[int] = Field(
|
||||
None,
|
||||
description="Deprecated: Use `model` field to configure max output tokens instead. The maximum number of tokens to generate, including reasoning step.",
|
||||
deprecated=True,
|
||||
description="The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value.",
|
||||
)
|
||||
|
||||
reasoning: Optional[bool] = Field(None, description="Whether to enable reasoning for this agent.")
|
||||
enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.")
|
||||
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.")
|
||||
last_run_completion: Optional[datetime] = Field(None, description="The timestamp when the agent last completed a run.")
|
||||
last_run_duration_ms: Optional[int] = Field(None, description="The duration in milliseconds of the agent's last run.")
|
||||
timezone: Optional[str] = Field(None, description="The timezone of the agent (IANA format).")
|
||||
@@ -470,6 +401,7 @@ class UpdateAgent(BaseModel):
|
||||
None,
|
||||
description="If set to True, the agent will be hidden.",
|
||||
)
|
||||
parallel_tool_calls: Optional[bool] = Field(False, description="If set to True, enables parallel tool calling. Defaults to False.")
|
||||
|
||||
model_config = ConfigDict(extra="ignore") # Ignores extra fields
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import TYPE_CHECKING, Annotated, Literal, Optional, Union
|
||||
from typing import TYPE_CHECKING, Literal, Optional
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
||||
|
||||
@@ -7,9 +7,6 @@ from letta.errors import LettaInvalidArgumentError
|
||||
from letta.log import get_logger
|
||||
from letta.schemas.enums import AgentType, ProviderCategory
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from letta.schemas.model import ModelSettings
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -255,98 +252,6 @@ class LLMConfig(BaseModel):
|
||||
+ (f" [ip={self.model_endpoint}]" if self.model_endpoint else "")
|
||||
)
|
||||
|
||||
def _to_model(self) -> "ModelSettings":
|
||||
"""
|
||||
Convert LLMConfig back into a Model schema (OpenAIModel, AnthropicModel, etc.).
|
||||
This is the inverse of the _to_legacy_config_params() methods in model.py.
|
||||
"""
|
||||
from letta.schemas.model import (
|
||||
AnthropicModel,
|
||||
AnthropicThinking,
|
||||
AzureModel,
|
||||
BedrockModel,
|
||||
DeepseekModel,
|
||||
GeminiThinkingConfig,
|
||||
GoogleAIModel,
|
||||
GoogleVertexModel,
|
||||
GroqModel,
|
||||
Model,
|
||||
OpenAIModel,
|
||||
OpenAIReasoning,
|
||||
TogetherModel,
|
||||
XAIModel,
|
||||
)
|
||||
|
||||
if self.model_endpoint_type == "openai":
|
||||
return OpenAIModel(
|
||||
model=self.model,
|
||||
max_output_tokens=self.max_tokens or 4096,
|
||||
temperature=self.temperature,
|
||||
reasoning=OpenAIReasoning(reasoning_effort=self.reasoning_effort or "minimal"),
|
||||
)
|
||||
elif self.model_endpoint_type == "anthropic":
|
||||
thinking_type = "enabled" if self.enable_reasoner else "disabled"
|
||||
return AnthropicModel(
|
||||
model=self.model,
|
||||
max_output_tokens=self.max_tokens or 4096,
|
||||
temperature=self.temperature,
|
||||
thinking=AnthropicThinking(type=thinking_type, budget_tokens=self.max_reasoning_tokens or 1024),
|
||||
verbosity=self.verbosity,
|
||||
)
|
||||
elif self.model_endpoint_type == "google_ai":
|
||||
return GoogleAIModel(
|
||||
model=self.model,
|
||||
max_output_tokens=self.max_tokens or 65536,
|
||||
temperature=self.temperature,
|
||||
thinking_config=GeminiThinkingConfig(
|
||||
include_thoughts=self.max_reasoning_tokens > 0, thinking_budget=self.max_reasoning_tokens or 1024
|
||||
),
|
||||
)
|
||||
elif self.model_endpoint_type == "google_vertex":
|
||||
return GoogleVertexModel(
|
||||
model=self.model,
|
||||
max_output_tokens=self.max_tokens or 65536,
|
||||
temperature=self.temperature,
|
||||
thinking_config=GeminiThinkingConfig(
|
||||
include_thoughts=self.max_reasoning_tokens > 0, thinking_budget=self.max_reasoning_tokens or 1024
|
||||
),
|
||||
)
|
||||
elif self.model_endpoint_type == "azure":
|
||||
return AzureModel(
|
||||
model=self.model,
|
||||
max_output_tokens=self.max_tokens or 4096,
|
||||
temperature=self.temperature,
|
||||
)
|
||||
elif self.model_endpoint_type == "xai":
|
||||
return XAIModel(
|
||||
model=self.model,
|
||||
max_output_tokens=self.max_tokens or 4096,
|
||||
temperature=self.temperature,
|
||||
)
|
||||
elif self.model_endpoint_type == "groq":
|
||||
return GroqModel(
|
||||
model=self.model,
|
||||
max_output_tokens=self.max_tokens or 4096,
|
||||
temperature=self.temperature,
|
||||
)
|
||||
elif self.model_endpoint_type == "deepseek":
|
||||
return DeepseekModel(
|
||||
model=self.model,
|
||||
max_output_tokens=self.max_tokens or 4096,
|
||||
temperature=self.temperature,
|
||||
)
|
||||
elif self.model_endpoint_type == "together":
|
||||
return TogetherModel(
|
||||
model=self.model,
|
||||
max_output_tokens=self.max_tokens or 4096,
|
||||
temperature=self.temperature,
|
||||
)
|
||||
elif self.model_endpoint_type == "bedrock":
|
||||
return Model(model=self.model, max_output_tokens=self.max_tokens or 4096)
|
||||
else:
|
||||
# If we don't know the model type, use the default Model schema
|
||||
return Model(model=self.model, max_output_tokens=self.max_tokens or 4096)
|
||||
|
||||
@classmethod
|
||||
def is_openai_reasoning_model(cls, config: "LLMConfig") -> bool:
|
||||
from letta.llm_api.openai_client import is_openai_reasoning_model
|
||||
|
||||
@@ -1,224 +0,0 @@
|
||||
from typing import Annotated, Literal, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.response_format import ResponseFormatUnion
|
||||
|
||||
|
||||
class Model(BaseModel):
|
||||
"""Schema for defining settings for a model"""
|
||||
|
||||
model: str = Field(..., description="The name of the model.")
|
||||
max_output_tokens: int = Field(4096, description="The maximum number of tokens the model can generate.")
|
||||
|
||||
|
||||
class OpenAIReasoning(BaseModel):
|
||||
reasoning_effort: Literal["minimal", "low", "medium", "high"] = Field(
|
||||
"minimal", description="The reasoning effort to use when generating text reasoning models"
|
||||
)
|
||||
|
||||
# TODO: implement support for this
|
||||
# summary: Optional[Literal["auto", "detailed"]] = Field(
|
||||
# None, description="The reasoning summary level to use when generating text reasoning models"
|
||||
# )
|
||||
|
||||
|
||||
class OpenAIModel(Model):
|
||||
provider: Literal["openai"] = Field("openai", description="The provider of the model.")
|
||||
temperature: float = Field(0.7, description="The temperature of the model.")
|
||||
reasoning: OpenAIReasoning = Field(OpenAIReasoning(reasoning_effort="high"), description="The reasoning configuration for the model.")
|
||||
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
||||
|
||||
# TODO: implement support for these
|
||||
# reasoning_summary: Optional[Literal["none", "short", "detailed"]] = Field(
|
||||
# None, description="The reasoning summary level to use when generating text reasoning models"
|
||||
# )
|
||||
# max_tool_calls: int = Field(10, description="The maximum number of tool calls the model can make.")
|
||||
# parallel_tool_calls: bool = Field(False, description="Whether the model supports parallel tool calls.")
|
||||
# top_logprobs: int = Field(10, description="The number of top logprobs to return.")
|
||||
# top_p: float = Field(1.0, description="The top-p value to use when generating text.")
|
||||
|
||||
def _to_legacy_config_params(self) -> dict:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"reasoning_effort": self.reasoning.reasoning_effort,
|
||||
"response_format": self.response_format,
|
||||
}
|
||||
|
||||
|
||||
# "thinking": {
|
||||
# "type": "enabled",
|
||||
# "budget_tokens": 10000
|
||||
# }
|
||||
|
||||
|
||||
class AnthropicThinking(BaseModel):
|
||||
type: Literal["enabled", "disabled"] = Field("enabled", description="The type of thinking to use.")
|
||||
budget_tokens: int = Field(1024, description="The maximum number of tokens the model can use for extended thinking.")
|
||||
|
||||
|
||||
class AnthropicModel(Model):
|
||||
provider: Literal["anthropic"] = Field("anthropic", description="The provider of the model.")
|
||||
temperature: float = Field(1.0, description="The temperature of the model.")
|
||||
thinking: AnthropicThinking = Field(
|
||||
AnthropicThinking(type="enabled", budget_tokens=1024), description="The thinking configuration for the model."
|
||||
)
|
||||
|
||||
# gpt-5 models only
|
||||
verbosity: Optional[Literal["low", "medium", "high"]] = Field(
|
||||
None,
|
||||
description="Soft control for how verbose model output should be, used for GPT-5 models.",
|
||||
)
|
||||
|
||||
# TODO: implement support for these
|
||||
# top_k: Optional[int] = Field(None, description="The number of top tokens to return.")
|
||||
# top_p: Optional[float] = Field(None, description="The top-p value to use when generating text.")
|
||||
|
||||
def _to_legacy_config_params(self) -> dict:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"extended_thinking": self.thinking.type == "enabled",
|
||||
"thinking_budget_tokens": self.thinking.budget_tokens,
|
||||
"verbosity": self.verbosity,
|
||||
}
|
||||
|
||||
|
||||
class GeminiThinkingConfig(BaseModel):
|
||||
include_thoughts: bool = Field(True, description="Whether to include thoughts in the model's response.")
|
||||
thinking_budget: int = Field(1024, description="The thinking budget for the model.")
|
||||
|
||||
|
||||
class GoogleAIModel(Model):
|
||||
provider: Literal["google_ai"] = Field("google_ai", description="The provider of the model.")
|
||||
temperature: float = Field(0.7, description="The temperature of the model.")
|
||||
thinking_config: GeminiThinkingConfig = Field(
|
||||
GeminiThinkingConfig(include_thoughts=True, thinking_budget=1024), description="The thinking configuration for the model."
|
||||
)
|
||||
response_schema: Optional[ResponseFormatUnion] = Field(None, description="The response schema for the model.")
|
||||
max_output_tokens: int = Field(65536, description="The maximum number of tokens the model can generate.")
|
||||
|
||||
def _to_legacy_config_params(self) -> dict:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"max_reasoning_tokens": self.thinking_config.thinking_budget if self.thinking_config.include_thoughts else 0,
|
||||
}
|
||||
|
||||
|
||||
class GoogleVertexModel(GoogleAIModel):
|
||||
provider: Literal["google_vertex"] = Field("google_vertex", description="The provider of the model.")
|
||||
|
||||
|
||||
class AzureModel(Model):
|
||||
"""Azure OpenAI model configuration (OpenAI-compatible)."""
|
||||
|
||||
provider: Literal["azure"] = Field("azure", description="The provider of the model.")
|
||||
temperature: float = Field(0.7, description="The temperature of the model.")
|
||||
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
||||
|
||||
def _to_legacy_config_params(self) -> dict:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
}
|
||||
|
||||
|
||||
class XAIModel(Model):
|
||||
"""xAI model configuration (OpenAI-compatible)."""
|
||||
|
||||
provider: Literal["xai"] = Field("xai", description="The provider of the model.")
|
||||
temperature: float = Field(0.7, description="The temperature of the model.")
|
||||
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
||||
|
||||
def _to_legacy_config_params(self) -> dict:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
}
|
||||
|
||||
|
||||
class GroqModel(Model):
|
||||
"""Groq model configuration (OpenAI-compatible)."""
|
||||
|
||||
provider: Literal["groq"] = Field("groq", description="The provider of the model.")
|
||||
temperature: float = Field(0.7, description="The temperature of the model.")
|
||||
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
||||
|
||||
def _to_legacy_config_params(self) -> dict:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
}
|
||||
|
||||
|
||||
class DeepseekModel(Model):
|
||||
"""Deepseek model configuration (OpenAI-compatible)."""
|
||||
|
||||
provider: Literal["deepseek"] = Field("deepseek", description="The provider of the model.")
|
||||
temperature: float = Field(0.7, description="The temperature of the model.")
|
||||
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
||||
|
||||
def _to_legacy_config_params(self) -> dict:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
}
|
||||
|
||||
|
||||
class TogetherModel(Model):
|
||||
"""Together AI model configuration (OpenAI-compatible)."""
|
||||
|
||||
provider: Literal["together"] = Field("together", description="The provider of the model.")
|
||||
temperature: float = Field(0.7, description="The temperature of the model.")
|
||||
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
||||
|
||||
def _to_legacy_config_params(self) -> dict:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
}
|
||||
|
||||
|
||||
class BedrockModel(Model):
|
||||
"""AWS Bedrock model configuration."""
|
||||
|
||||
provider: Literal["bedrock"] = Field("bedrock", description="The provider of the model.")
|
||||
temperature: float = Field(0.7, description="The temperature of the model.")
|
||||
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
||||
|
||||
def _to_legacy_config_params(self) -> dict:
|
||||
return {
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
}
|
||||
|
||||
|
||||
ModelSettings = Annotated[
|
||||
Union[
|
||||
OpenAIModel,
|
||||
AnthropicModel,
|
||||
GoogleAIModel,
|
||||
GoogleVertexModel,
|
||||
AzureModel,
|
||||
XAIModel,
|
||||
GroqModel,
|
||||
DeepseekModel,
|
||||
TogetherModel,
|
||||
BedrockModel,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
||||
class EmbeddingModelSettings(BaseModel):
|
||||
model: str = Field(..., description="The name of the model.")
|
||||
provider: Literal["openai", "ollama"] = Field(..., description="The provider of the model.")
|
||||
|
||||
@@ -414,31 +414,18 @@ class SyncServer(object):
|
||||
actor: User,
|
||||
) -> AgentState:
|
||||
if request.llm_config is None:
|
||||
additional_config_params = {}
|
||||
if request.model is None:
|
||||
if settings.default_llm_handle is None:
|
||||
raise LettaInvalidArgumentError("Must specify either model or llm_config in request", argument_name="model")
|
||||
else:
|
||||
handle = settings.default_llm_handle
|
||||
else:
|
||||
if isinstance(request.model, str):
|
||||
handle = request.model
|
||||
elif isinstance(request.model, list):
|
||||
raise LettaInvalidArgumentError("Multiple models are not supported yet")
|
||||
else:
|
||||
# EXTREMELEY HACKY, TEMPORARY WORKAROUND
|
||||
handle = f"{request.model.provider}/{request.model.model}"
|
||||
# TODO: figure out how to override various params
|
||||
additional_config_params = request.model._to_legacy_config_params()
|
||||
|
||||
request.model = settings.default_llm_handle
|
||||
config_params = {
|
||||
"handle": handle,
|
||||
"handle": request.model,
|
||||
"context_window_limit": request.context_window_limit,
|
||||
"max_tokens": request.max_tokens,
|
||||
"max_reasoning_tokens": request.max_reasoning_tokens,
|
||||
"enable_reasoner": request.enable_reasoner,
|
||||
}
|
||||
config_params.update(additional_config_params)
|
||||
log_event(name="start get_cached_llm_config", attributes=config_params)
|
||||
request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params)
|
||||
log_event(name="end get_cached_llm_config", attributes=config_params)
|
||||
|
||||
@@ -1036,7 +1036,6 @@ async def test_agent_state_schema_unchanged(server: SyncServer):
|
||||
from letta.schemas.group import Group
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.memory import Memory
|
||||
from letta.schemas.model import EmbeddingModelSettings, ModelSettings
|
||||
from letta.schemas.response_format import ResponseFormatUnion
|
||||
from letta.schemas.source import Source
|
||||
from letta.schemas.tool import Tool
|
||||
@@ -1057,8 +1056,6 @@ async def test_agent_state_schema_unchanged(server: SyncServer):
|
||||
"agent_type": AgentType,
|
||||
# LLM information
|
||||
"llm_config": LLMConfig,
|
||||
"model": ModelSettings,
|
||||
"embedding": EmbeddingModelSettings,
|
||||
"embedding_config": EmbeddingConfig,
|
||||
"response_format": (ResponseFormatUnion, type(None)),
|
||||
# State fields
|
||||
|
||||
@@ -138,11 +138,7 @@ def create_test_module(
|
||||
expected_values = processed_params | processed_extra_expected
|
||||
for key, value in expected_values.items():
|
||||
if hasattr(item, key):
|
||||
if key == "model" or key == "embedding":
|
||||
# NOTE: add back these tests after v1 migration
|
||||
continue
|
||||
print(f"item.{key}: {getattr(item, key)}")
|
||||
assert custom_model_dump(getattr(item, key)) == value, f"For key {key}, expected {value}, but got {getattr(item, key)}"
|
||||
assert custom_model_dump(getattr(item, key)) == value
|
||||
|
||||
@pytest.mark.order(1)
|
||||
def test_retrieve(handler):
|
||||
@@ -276,8 +272,6 @@ def custom_model_dump(model):
|
||||
return model
|
||||
if isinstance(model, list):
|
||||
return [custom_model_dump(item) for item in model]
|
||||
if isinstance(model, dict):
|
||||
return {key: custom_model_dump(value) for key, value in model.items()}
|
||||
else:
|
||||
return model.model_dump()
|
||||
|
||||
|
||||
@@ -587,6 +587,37 @@ def test_agent_creation(client: Letta):
|
||||
client.agents.delete(agent_id=agent.id)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------------------------------------------------
|
||||
# Agent sources
|
||||
# --------------------------------------------------------------------------------------------------------------------
|
||||
def test_attach_detach_agent_source(client: Letta, agent: AgentState):
|
||||
"""Test that we can attach and detach a source from an agent"""
|
||||
|
||||
# Create a source
|
||||
source = client.sources.create(
|
||||
name="test_source",
|
||||
embedding="openai/text-embedding-3-small",
|
||||
)
|
||||
initial_sources = client.agents.sources.list(agent_id=agent.id)
|
||||
assert source.id not in [s.id for s in initial_sources]
|
||||
|
||||
# Attach source
|
||||
client.agents.sources.attach(agent_id=agent.id, source_id=source.id)
|
||||
|
||||
# Verify source is attached
|
||||
final_sources = client.agents.sources.list(agent_id=agent.id)
|
||||
assert source.id in [s.id for s in final_sources]
|
||||
|
||||
# Detach source
|
||||
client.agents.sources.detach(agent_id=agent.id, source_id=source.id)
|
||||
|
||||
# Verify source is detached
|
||||
final_sources = client.agents.sources.list(agent_id=agent.id)
|
||||
assert source.id not in [s.id for s in final_sources]
|
||||
|
||||
client.sources.delete(source.id)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------------------------------------------------
|
||||
# Agent Initial Message Sequence
|
||||
# --------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user