* feat: add reasoning zai openrouter * add openrouter reasoning * stage + publish api * openrouter reasoning always on * revert * fix * remove reference * do
534 lines
25 KiB
Python
534 lines
25 KiB
Python
from typing import Annotated, Literal, Optional, Union
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
from letta.schemas.embedding_config import EmbeddingConfig
|
|
from letta.schemas.enums import ProviderCategory, ProviderType
|
|
from letta.schemas.llm_config import LLMConfig
|
|
from letta.schemas.response_format import ResponseFormatUnion
|
|
|
|
|
|
class ModelBase(BaseModel):
|
|
handle: str = Field(..., description="Unique handle for API reference (format: provider_display_name/model_display_name)")
|
|
name: str = Field(..., description="The actual model name used by the provider")
|
|
display_name: str = Field(..., description="Display name for the model shown in UI")
|
|
provider_type: ProviderType = Field(..., description="The type of the provider")
|
|
provider_name: str = Field(..., description="The name of the provider")
|
|
model_type: Literal["llm", "embedding"] = Field(..., description="Type of model (llm or embedding)")
|
|
|
|
|
|
class Model(LLMConfig, ModelBase):
|
|
model_type: Literal["llm"] = Field("llm", description="Type of model (llm or embedding)")
|
|
max_context_window: int = Field(..., description="The maximum context window for the model")
|
|
# supports_token_streaming: Optional[bool] = Field(None, description="Whether token streaming is supported")
|
|
# supports_tool_calling: Optional[bool] = Field(None, description="Whether tool calling is supported")
|
|
|
|
# Deprecated fields from LLMConfig - use new field names instead
|
|
model: str = Field(..., description="Deprecated: Use 'name' field instead. LLM model name.", deprecated=True)
|
|
model_endpoint_type: Literal[
|
|
"openai",
|
|
"anthropic",
|
|
"google_ai",
|
|
"google_vertex",
|
|
"azure",
|
|
"groq",
|
|
"ollama",
|
|
"webui",
|
|
"webui-legacy",
|
|
"lmstudio",
|
|
"lmstudio-legacy",
|
|
"lmstudio-chatcompletions",
|
|
"llamacpp",
|
|
"koboldcpp",
|
|
"vllm",
|
|
"hugging-face",
|
|
"minimax",
|
|
"mistral",
|
|
"together",
|
|
"bedrock",
|
|
"deepseek",
|
|
"xai",
|
|
"zai",
|
|
"openrouter",
|
|
"chatgpt_oauth",
|
|
] = Field(..., description="Deprecated: Use 'provider_type' field instead. The endpoint type for the model.", deprecated=True)
|
|
context_window: int = Field(
|
|
..., description="Deprecated: Use 'max_context_window' field instead. The context window size for the model.", deprecated=True
|
|
)
|
|
|
|
# Additional deprecated LLMConfig fields - kept for backward compatibility
|
|
model_endpoint: Optional[str] = Field(None, description="Deprecated: The endpoint for the model.", deprecated=True)
|
|
model_wrapper: Optional[str] = Field(None, description="Deprecated: The wrapper for the model.", deprecated=True)
|
|
put_inner_thoughts_in_kwargs: Optional[bool] = Field(
|
|
True, description="Deprecated: Puts 'inner_thoughts' as a kwarg in the function call.", deprecated=True
|
|
)
|
|
temperature: float = Field(0.7, description="Deprecated: The temperature to use when generating text with the model.", deprecated=True)
|
|
max_tokens: Optional[int] = Field(None, description="Deprecated: The maximum number of tokens to generate.", deprecated=True)
|
|
enable_reasoner: bool = Field(
|
|
True,
|
|
description="Deprecated: Whether or not the model should use extended thinking if it is a 'reasoning' style model.",
|
|
deprecated=True,
|
|
)
|
|
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] = Field(
|
|
None, description="Deprecated: The reasoning effort to use when generating text reasoning models.", deprecated=True
|
|
)
|
|
max_reasoning_tokens: int = Field(0, description="Deprecated: Configurable thinking budget for extended thinking.", deprecated=True)
|
|
frequency_penalty: Optional[float] = Field(
|
|
None,
|
|
description="Deprecated: Positive values penalize new tokens based on their existing frequency in the text so far.",
|
|
deprecated=True,
|
|
)
|
|
compatibility_type: Optional[Literal["gguf", "mlx"]] = Field(
|
|
None, description="Deprecated: The framework compatibility type for the model.", deprecated=True
|
|
)
|
|
verbosity: Optional[Literal["low", "medium", "high"]] = Field(
|
|
None, description="Deprecated: Soft control for how verbose model output should be.", deprecated=True
|
|
)
|
|
tier: Optional[str] = Field(None, description="Deprecated: The cost tier for the model (cloud only).", deprecated=True)
|
|
parallel_tool_calls: Optional[bool] = Field(
|
|
False, description="Deprecated: If set to True, enables parallel tool calling.", deprecated=True
|
|
)
|
|
provider_category: Optional[ProviderCategory] = Field(
|
|
None, description="Deprecated: The provider category for the model.", deprecated=True
|
|
)
|
|
|
|
@classmethod
|
|
def from_llm_config(cls, llm_config: "LLMConfig") -> "Model":
|
|
"""Create a Model instance from an LLMConfig"""
|
|
return cls(
|
|
# New fields
|
|
handle=llm_config.handle or f"{llm_config.provider_name}/{llm_config.model}",
|
|
name=llm_config.model,
|
|
display_name=llm_config.display_name or llm_config.model,
|
|
provider_type=llm_config.model_endpoint_type,
|
|
provider_name=llm_config.provider_name or llm_config.model_endpoint_type,
|
|
model_type="llm",
|
|
max_context_window=llm_config.context_window,
|
|
# Deprecated fields (copy from LLMConfig for backward compatibility)
|
|
model=llm_config.model,
|
|
model_endpoint_type=llm_config.model_endpoint_type,
|
|
model_endpoint=llm_config.model_endpoint,
|
|
model_wrapper=llm_config.model_wrapper,
|
|
context_window=llm_config.context_window,
|
|
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
|
temperature=llm_config.temperature,
|
|
max_tokens=llm_config.max_tokens,
|
|
enable_reasoner=llm_config.enable_reasoner,
|
|
reasoning_effort=llm_config.reasoning_effort,
|
|
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
|
effort=llm_config.effort,
|
|
frequency_penalty=llm_config.frequency_penalty,
|
|
compatibility_type=llm_config.compatibility_type,
|
|
verbosity=llm_config.verbosity,
|
|
tier=llm_config.tier,
|
|
parallel_tool_calls=llm_config.parallel_tool_calls,
|
|
provider_category=llm_config.provider_category,
|
|
)
|
|
|
|
@property
|
|
def model_settings_schema(self) -> Optional[dict]:
|
|
"""Returns the JSON schema for the ModelSettings class corresponding to this model's provider."""
|
|
PROVIDER_SETTINGS_MAP = {
|
|
ProviderType.openai: OpenAIModelSettings,
|
|
ProviderType.anthropic: AnthropicModelSettings,
|
|
ProviderType.google_ai: GoogleAIModelSettings,
|
|
ProviderType.google_vertex: GoogleVertexModelSettings,
|
|
ProviderType.azure: AzureModelSettings,
|
|
ProviderType.xai: XAIModelSettings,
|
|
ProviderType.zai: ZAIModelSettings,
|
|
ProviderType.groq: GroqModelSettings,
|
|
ProviderType.deepseek: DeepseekModelSettings,
|
|
ProviderType.together: TogetherModelSettings,
|
|
ProviderType.bedrock: BedrockModelSettings,
|
|
ProviderType.openrouter: OpenRouterModelSettings,
|
|
}
|
|
|
|
settings_class = PROVIDER_SETTINGS_MAP.get(self.provider_type)
|
|
return settings_class.model_json_schema() if settings_class else None
|
|
|
|
|
|
class EmbeddingModel(EmbeddingConfig, ModelBase):
|
|
model_type: Literal["embedding"] = Field("embedding", description="Type of model (llm or embedding)")
|
|
embedding_dim: int = Field(..., description="The dimension of the embedding")
|
|
|
|
# Deprecated fields from EmbeddingConfig - use new field names instead
|
|
embedding_model: str = Field(..., description="Deprecated: Use 'name' field instead. Embedding model name.", deprecated=True)
|
|
embedding_endpoint_type: Literal[
|
|
"openai",
|
|
"anthropic",
|
|
"bedrock",
|
|
"google_ai",
|
|
"google_vertex",
|
|
"azure",
|
|
"groq",
|
|
"ollama",
|
|
"webui",
|
|
"webui-legacy",
|
|
"lmstudio",
|
|
"lmstudio-legacy",
|
|
"llamacpp",
|
|
"koboldcpp",
|
|
"vllm",
|
|
"hugging-face",
|
|
"mistral",
|
|
"together",
|
|
"pinecone",
|
|
] = Field(..., description="Deprecated: Use 'provider_type' field instead. The endpoint type for the embedding model.", deprecated=True)
|
|
|
|
# Additional deprecated EmbeddingConfig fields - kept for backward compatibility
|
|
embedding_endpoint: Optional[str] = Field(None, description="Deprecated: The endpoint for the model.", deprecated=True)
|
|
embedding_chunk_size: Optional[int] = Field(300, description="Deprecated: The chunk size of the embedding.", deprecated=True)
|
|
batch_size: int = Field(32, description="Deprecated: The maximum batch size for processing embeddings.", deprecated=True)
|
|
azure_endpoint: Optional[str] = Field(None, description="Deprecated: The Azure endpoint for the model.", deprecated=True)
|
|
azure_version: Optional[str] = Field(None, description="Deprecated: The Azure version for the model.", deprecated=True)
|
|
azure_deployment: Optional[str] = Field(None, description="Deprecated: The Azure deployment for the model.", deprecated=True)
|
|
|
|
@classmethod
|
|
def from_embedding_config(cls, embedding_config: "EmbeddingConfig") -> "EmbeddingModel":
|
|
"""Create an EmbeddingModel instance from an EmbeddingConfig"""
|
|
return cls(
|
|
# New fields
|
|
handle=embedding_config.handle or f"{embedding_config.embedding_endpoint_type}/{embedding_config.embedding_model}",
|
|
name=embedding_config.embedding_model,
|
|
display_name=embedding_config.embedding_model,
|
|
provider_type=embedding_config.embedding_endpoint_type,
|
|
provider_name=embedding_config.embedding_endpoint_type,
|
|
model_type="embedding",
|
|
embedding_dim=embedding_config.embedding_dim,
|
|
# Deprecated fields (copy from EmbeddingConfig for backward compatibility)
|
|
embedding_model=embedding_config.embedding_model,
|
|
embedding_endpoint_type=embedding_config.embedding_endpoint_type,
|
|
embedding_endpoint=embedding_config.embedding_endpoint,
|
|
embedding_chunk_size=embedding_config.embedding_chunk_size,
|
|
batch_size=embedding_config.batch_size,
|
|
azure_endpoint=embedding_config.azure_endpoint,
|
|
azure_version=embedding_config.azure_version,
|
|
azure_deployment=embedding_config.azure_deployment,
|
|
)
|
|
|
|
|
|
class ModelSettings(BaseModel):
|
|
"""Schema for defining settings for a model"""
|
|
|
|
# model: str = Field(..., description="The name of the model.")
|
|
max_output_tokens: int = Field(4096, description="The maximum number of tokens the model can generate.")
|
|
parallel_tool_calls: bool = Field(False, description="Whether to enable parallel tool calling.")
|
|
|
|
|
|
class OpenAIReasoning(BaseModel):
|
|
reasoning_effort: Literal["none", "minimal", "low", "medium", "high", "xhigh"] = Field(
|
|
"minimal", description="The reasoning effort to use when generating text reasoning models"
|
|
)
|
|
|
|
# TODO: implement support for this
|
|
# summary: Optional[Literal["auto", "detailed"]] = Field(
|
|
# None, description="The reasoning summary level to use when generating text reasoning models"
|
|
# )
|
|
|
|
|
|
class OpenAIModelSettings(ModelSettings):
|
|
provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
reasoning: OpenAIReasoning = Field(OpenAIReasoning(reasoning_effort="high"), description="The reasoning configuration for the model.")
|
|
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
|
# OpenAI supports strict mode for tool calling - defaults to True
|
|
strict: bool = Field(
|
|
True,
|
|
description="Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.",
|
|
)
|
|
|
|
# TODO: implement support for these
|
|
# reasoning_summary: Optional[Literal["none", "short", "detailed"]] = Field(
|
|
# None, description="The reasoning summary level to use when generating text reasoning models"
|
|
# )
|
|
# max_tool_calls: int = Field(10, description="The maximum number of tool calls the model can make.")
|
|
# parallel_tool_calls: bool = Field(False, description="Whether the model supports parallel tool calls.")
|
|
# top_logprobs: int = Field(10, description="The number of top logprobs to return.")
|
|
# top_p: float = Field(1.0, description="The top-p value to use when generating text.")
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"reasoning_effort": self.reasoning.reasoning_effort,
|
|
"response_format": self.response_format,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"strict": self.strict,
|
|
}
|
|
|
|
|
|
# "thinking": {
|
|
# "type": "enabled",
|
|
# "budget_tokens": 10000
|
|
# }
|
|
|
|
|
|
class AnthropicThinking(BaseModel):
|
|
type: Literal["enabled", "disabled"] = Field("enabled", description="The type of thinking to use.")
|
|
budget_tokens: int = Field(1024, description="The maximum number of tokens the model can use for extended thinking.")
|
|
|
|
|
|
class AnthropicModelSettings(ModelSettings):
|
|
provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
|
|
temperature: float = Field(1.0, description="The temperature of the model.")
|
|
thinking: AnthropicThinking = Field(
|
|
AnthropicThinking(type="enabled", budget_tokens=1024), description="The thinking configuration for the model."
|
|
)
|
|
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
|
|
|
# gpt-5 models only
|
|
verbosity: Optional[Literal["low", "medium", "high"]] = Field(
|
|
None,
|
|
description="Soft control for how verbose model output should be, used for GPT-5 models.",
|
|
)
|
|
|
|
# Opus 4.5 effort parameter
|
|
effort: Optional[Literal["low", "medium", "high"]] = Field(
|
|
None,
|
|
description="Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'.",
|
|
)
|
|
|
|
# Anthropic supports strict mode for tool calling - defaults to False
|
|
strict: bool = Field(
|
|
False,
|
|
description="Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.",
|
|
)
|
|
|
|
# TODO: implement support for these
|
|
# top_k: Optional[int] = Field(None, description="The number of top tokens to return.")
|
|
# top_p: Optional[float] = Field(None, description="The top-p value to use when generating text.")
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"extended_thinking": self.thinking.type == "enabled",
|
|
"max_reasoning_tokens": self.thinking.budget_tokens,
|
|
"verbosity": self.verbosity,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"effort": self.effort,
|
|
"response_format": self.response_format,
|
|
"strict": self.strict,
|
|
}
|
|
|
|
|
|
class GeminiThinkingConfig(BaseModel):
|
|
include_thoughts: bool = Field(True, description="Whether to include thoughts in the model's response.")
|
|
thinking_budget: int = Field(1024, description="The thinking budget for the model.")
|
|
|
|
|
|
class GoogleAIModelSettings(ModelSettings):
|
|
provider_type: Literal[ProviderType.google_ai] = Field(ProviderType.google_ai, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
thinking_config: GeminiThinkingConfig = Field(
|
|
GeminiThinkingConfig(include_thoughts=True, thinking_budget=1024), description="The thinking configuration for the model."
|
|
)
|
|
response_schema: Optional[ResponseFormatUnion] = Field(None, description="The response schema for the model.")
|
|
max_output_tokens: int = Field(65536, description="The maximum number of tokens the model can generate.")
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"max_reasoning_tokens": self.thinking_config.thinking_budget if self.thinking_config.include_thoughts else 0,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"strict": False, # Google AI does not support strict mode
|
|
}
|
|
|
|
|
|
class GoogleVertexModelSettings(GoogleAIModelSettings):
|
|
provider_type: Literal[ProviderType.google_vertex] = Field(ProviderType.google_vertex, description="The type of the provider.")
|
|
|
|
|
|
class AzureModelSettings(ModelSettings):
|
|
"""Azure OpenAI model configuration (OpenAI-compatible)."""
|
|
|
|
provider_type: Literal[ProviderType.azure] = Field(ProviderType.azure, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"response_format": self.response_format,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"strict": False, # Azure does not support strict mode
|
|
}
|
|
|
|
|
|
class XAIModelSettings(ModelSettings):
|
|
"""xAI model configuration (OpenAI-compatible)."""
|
|
|
|
provider_type: Literal[ProviderType.xai] = Field(ProviderType.xai, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"response_format": self.response_format,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"strict": False, # xAI does not support strict mode
|
|
}
|
|
|
|
|
|
class ZAIThinking(BaseModel):
|
|
"""Thinking configuration for ZAI GLM-4.5+ models."""
|
|
|
|
type: Literal["enabled", "disabled"] = Field("enabled", description="Whether thinking is enabled or disabled.")
|
|
clear_thinking: bool = Field(False, description="If False, preserved thinking is used (recommended for agents).")
|
|
|
|
|
|
class ZAIModelSettings(ModelSettings):
|
|
"""Z.ai (ZhipuAI) model configuration (OpenAI-compatible)."""
|
|
|
|
provider_type: Literal[ProviderType.zai] = Field(ProviderType.zai, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
|
thinking: ZAIThinking = Field(
|
|
ZAIThinking(type="enabled", clear_thinking=False), description="The thinking configuration for GLM-4.5+ models."
|
|
)
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"response_format": self.response_format,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"strict": False, # ZAI does not support strict mode
|
|
"extended_thinking": self.thinking.type == "enabled",
|
|
}
|
|
|
|
|
|
class GroqModelSettings(ModelSettings):
|
|
"""Groq model configuration (OpenAI-compatible)."""
|
|
|
|
provider_type: Literal[ProviderType.groq] = Field(ProviderType.groq, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"response_format": self.response_format,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"strict": False, # Groq does not support strict mode
|
|
}
|
|
|
|
|
|
class DeepseekModelSettings(ModelSettings):
|
|
"""Deepseek model configuration (OpenAI-compatible)."""
|
|
|
|
provider_type: Literal[ProviderType.deepseek] = Field(ProviderType.deepseek, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"response_format": self.response_format,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"strict": False, # Deepseek does not support strict mode
|
|
}
|
|
|
|
|
|
class TogetherModelSettings(ModelSettings):
|
|
"""Together AI model configuration (OpenAI-compatible)."""
|
|
|
|
provider_type: Literal[ProviderType.together] = Field(ProviderType.together, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"response_format": self.response_format,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"strict": False, # Together does not support strict mode
|
|
}
|
|
|
|
|
|
class BedrockModelSettings(ModelSettings):
|
|
"""AWS Bedrock model configuration."""
|
|
|
|
provider_type: Literal[ProviderType.bedrock] = Field(ProviderType.bedrock, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"response_format": self.response_format,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"strict": False, # Bedrock does not support strict mode
|
|
}
|
|
|
|
|
|
class OpenRouterModelSettings(ModelSettings):
|
|
"""OpenRouter model configuration (OpenAI-compatible)."""
|
|
|
|
provider_type: Literal[ProviderType.openrouter] = Field(ProviderType.openrouter, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"response_format": self.response_format,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
"strict": False, # OpenRouter does not support strict mode
|
|
}
|
|
|
|
|
|
class ChatGPTOAuthReasoning(BaseModel):
|
|
"""Reasoning configuration for ChatGPT OAuth models (GPT-5.x, o-series)."""
|
|
|
|
reasoning_effort: Literal["none", "low", "medium", "high", "xhigh"] = Field(
|
|
"medium", description="The reasoning effort level for GPT-5.x and o-series models."
|
|
)
|
|
|
|
|
|
class ChatGPTOAuthModelSettings(ModelSettings):
|
|
"""ChatGPT OAuth model configuration (uses ChatGPT backend API)."""
|
|
|
|
provider_type: Literal[ProviderType.chatgpt_oauth] = Field(ProviderType.chatgpt_oauth, description="The type of the provider.")
|
|
temperature: float = Field(0.7, description="The temperature of the model.")
|
|
reasoning: ChatGPTOAuthReasoning = Field(
|
|
ChatGPTOAuthReasoning(reasoning_effort="medium"), description="The reasoning configuration for the model."
|
|
)
|
|
|
|
def _to_legacy_config_params(self) -> dict:
|
|
return {
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_output_tokens,
|
|
"reasoning_effort": self.reasoning.reasoning_effort,
|
|
"parallel_tool_calls": self.parallel_tool_calls,
|
|
}
|
|
|
|
|
|
ModelSettingsUnion = Annotated[
|
|
Union[
|
|
OpenAIModelSettings,
|
|
AnthropicModelSettings,
|
|
GoogleAIModelSettings,
|
|
GoogleVertexModelSettings,
|
|
AzureModelSettings,
|
|
XAIModelSettings,
|
|
ZAIModelSettings,
|
|
GroqModelSettings,
|
|
DeepseekModelSettings,
|
|
TogetherModelSettings,
|
|
BedrockModelSettings,
|
|
OpenRouterModelSettings,
|
|
ChatGPTOAuthModelSettings,
|
|
],
|
|
Field(discriminator="provider_type"),
|
|
]
|