diff --git a/letta/constants.py b/letta/constants.py index ef4eafc2..1a59137e 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -27,6 +27,7 @@ TOOL_CALL_ID_MAX_LEN = 29 # minimum context window size MIN_CONTEXT_WINDOW = 4096 +DEFAULT_CONTEXT_WINDOW_SIZE = 32000 # embeddings MAX_EMBEDDING_DIM = 4096 # maximum supported embeding size - do NOT change or else DBs will need to be reset diff --git a/letta/llm_api/google_vertex.py b/letta/llm_api/google_vertex.py index 7b551af2..9530211f 100644 --- a/letta/llm_api/google_vertex.py +++ b/letta/llm_api/google_vertex.py @@ -1,7 +1,5 @@ import uuid -from typing import List, Optional, Tuple - -import requests +from typing import List, Optional from letta.constants import NON_USER_MSG_PREFIX from letta.local_llm.json_parser import clean_json_string_extra_backslash diff --git a/letta/server/server.py b/letta/server/server.py index 5c32182a..ac889f55 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -1112,6 +1112,8 @@ class SyncServer(Server): if context_window_limit > llm_config.context_window: raise ValueError(f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})") llm_config.context_window = context_window_limit + else: + llm_config.context_window = min(llm_config.context_window, constants.DEFAULT_CONTEXT_WINDOW_SIZE) return llm_config