diff --git a/letta/constants.py b/letta/constants.py index 468afa4c..b6fcb973 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -27,7 +27,6 @@ TOOL_CALL_ID_MAX_LEN = 29 # minimum context window size MIN_CONTEXT_WINDOW = 4096 -DEFAULT_CONTEXT_WINDOW_SIZE = 32000 # embeddings MAX_EMBEDDING_DIM = 4096 # maximum supported embeding size - do NOT change or else DBs will need to be reset diff --git a/letta/server/server.py b/letta/server/server.py index 2fe0b5d5..d4687314 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -983,6 +983,10 @@ class SyncServer(Server): warnings.warn(f"An error occurred while listing LLM models for provider {provider}: {e}") llm_models.extend(self.get_local_llm_configs()) + + # respect global maximum + for llm_config in llm_models: + llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit) return llm_models def list_embedding_models(self) -> List[EmbeddingConfig]: @@ -1028,7 +1032,7 @@ class SyncServer(Server): raise ValueError(f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})") llm_config.context_window = context_window_limit else: - llm_config.context_window = min(llm_config.context_window, constants.DEFAULT_CONTEXT_WINDOW_SIZE) + llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit) return llm_config diff --git a/letta/settings.py b/letta/settings.py index d69c0777..2a214846 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -50,6 +50,8 @@ class ModelSettings(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="ignore") + global_max_context_window_limit: int = 32000 + # env_prefix='my_prefix_' # when we use /completions APIs (instead of /chat/completions), we need to specify a model wrapper