From a027014a7c474ccf40ee33a86df72d22b74f431e Mon Sep 17 00:00:00 2001 From: cthomas Date: Thu, 27 Feb 2025 14:21:06 -0800 Subject: [PATCH] feat: add setting for max context window size and persist for ADE (#1124) --- letta/constants.py | 1 - letta/server/server.py | 6 +++++- letta/settings.py | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/letta/constants.py b/letta/constants.py index 468afa4c..b6fcb973 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -27,7 +27,6 @@ TOOL_CALL_ID_MAX_LEN = 29 # minimum context window size MIN_CONTEXT_WINDOW = 4096 -DEFAULT_CONTEXT_WINDOW_SIZE = 32000 # embeddings MAX_EMBEDDING_DIM = 4096 # maximum supported embeding size - do NOT change or else DBs will need to be reset diff --git a/letta/server/server.py b/letta/server/server.py index 2fe0b5d5..d4687314 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -983,6 +983,10 @@ class SyncServer(Server): warnings.warn(f"An error occurred while listing LLM models for provider {provider}: {e}") llm_models.extend(self.get_local_llm_configs()) + + # respect global maximum + for llm_config in llm_models: + llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit) return llm_models def list_embedding_models(self) -> List[EmbeddingConfig]: @@ -1028,7 +1032,7 @@ class SyncServer(Server): raise ValueError(f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})") llm_config.context_window = context_window_limit else: - llm_config.context_window = min(llm_config.context_window, constants.DEFAULT_CONTEXT_WINDOW_SIZE) + llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit) return llm_config diff --git a/letta/settings.py b/letta/settings.py index d69c0777..2a214846 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -50,6 +50,8 @@ class ModelSettings(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="ignore") + global_max_context_window_limit: int = 32000 + # env_prefix='my_prefix_' # when we use /completions APIs (instead of /chat/completions), we need to specify a model wrapper