diff --git a/letta/constants.py b/letta/constants.py index 85d3d382..5f6e7361 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -192,10 +192,21 @@ CORE_MEMORY_LINE_NUMBER_WARNING = ( # Constants to do with summarization / conversation length window # The max amount of tokens supported by the underlying model (eg 8k for gpt-4 and Mistral 7B) LLM_MAX_TOKENS = { - "DEFAULT": 8192, + "DEFAULT": 30000, + # deepseek "deepseek-chat": 64000, "deepseek-reasoner": 64000, ## OpenAI models: https://platform.openai.com/docs/models/overview + # reasoners + "o1": 200000, + # "o1-pro": 200000, # responses API only + "o1-2024-12-17": 200000, + "o3": 200000, + "o3-2025-04-16": 200000, + "o3-mini": 200000, + "o3-mini-2025-01-31": 200000, + # "o3-pro": 200000, # responses API only + # "o3-pro-2025-06-10": 200000, "gpt-4.1": 1047576, "gpt-4.1-2025-04-14": 1047576, "gpt-4.1-mini": 1047576, @@ -209,6 +220,7 @@ LLM_MAX_TOKENS = { "chatgpt-4o-latest": 128000, # "o1-preview-2024-09-12 "gpt-4o-2024-08-06": 128000, + "gpt-4o-2024-11-20": 128000, "gpt-4-turbo-preview": 128000, "gpt-4o": 128000, "gpt-3.5-turbo-instruct": 16385, @@ -218,7 +230,7 @@ LLM_MAX_TOKENS = { # "davinci-002": 128000, "gpt-4-turbo-2024-04-09": 128000, # "gpt-4o-realtime-preview-2024-10-01 - "gpt-4-turbo": 8192, + "gpt-4-turbo": 128000, "gpt-4o-2024-05-13": 128000, # "o1-mini # "o1-mini-2024-09-12