From 6ce293ea3c0a57abf626c8a017fb541467bf1a1b Mon Sep 17 00:00:00 2001
From: cthomas <caren@letta.com>
Date: Wed, 12 Feb 2025 18:32:40 -0800
Subject: [PATCH] feat: add vertex support (#2429)

Co-authored-by: Matthew Zhou <mattzh1314@gmail.com>
Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
---
 ...a08_add_stateless_option_for_agentstate.py |  36 ++
 letta/__init__.py                             |   2 +-
 letta/agent.py                                |  22 +-
 letta/client/client.py                        |   5 +
 letta/embeddings.py                           |  21 ++
 letta/functions/helpers.py                    |  29 +-
 letta/llm_api/google_vertex.py                | 328 ++++++++++++++++++
 letta/llm_api/llm_api_tools.py                |  26 ++
 letta/orm/agent.py                            |   8 +-
 letta/schemas/agent.py                        |  15 +-
 letta/schemas/embedding_config.py             |   1 +
 letta/schemas/llm_config.py                   |   1 +
 letta/schemas/message.py                      |  11 -
 letta/schemas/providers.py                    |  45 ++-
 letta/server/rest_api/routers/v1/tools.py     |  17 +-
 letta/server/server.py                        |  14 +-
 letta/services/agent_manager.py               |   5 +
 letta/services/message_manager.py             | 151 ++++----
 letta/settings.py                             |   8 +
 letta/utils.py                                |  17 +
 poetry.lock                                   | 272 ++++++++++-----
 pyproject.toml                                |   5 +-
 .../llm_model_configs/gemini-vertex.json      |   7 +
 tests/helpers/utils.py                        |   4 +
 tests/integration_test_agent_tool_graph.py    |  14 +-
 tests/integration_test_multi_agent.py         | 328 ++++++++++++++++++
 tests/test_base_functions.py                  |  49 ---
 tests/test_managers.py                        |  13 +-
 tests/test_model_letta_performance.py         |  12 +
 tests/test_providers.py                       |  11 +
 tests/utils.py                                |   2 +-
 31 files changed, 1226 insertions(+), 253 deletions(-)
 create mode 100644 alembic/versions/7980d239ea08_add_stateless_option_for_agentstate.py
 create mode 100644 letta/llm_api/google_vertex.py
 create mode 100644 tests/configs/llm_model_configs/gemini-vertex.json
 create mode 100644 tests/integration_test_multi_agent.py

diff --git a/alembic/versions/7980d239ea08_add_stateless_option_for_agentstate.py b/alembic/versions/7980d239ea08_add_stateless_option_for_agentstate.py
new file mode 100644
index 00000000..9693940d
--- /dev/null
+++ b/alembic/versions/7980d239ea08_add_stateless_option_for_agentstate.py
@@ -0,0 +1,36 @@
+"""Add message_buffer_autoclear option for AgentState
+
+Revision ID: 7980d239ea08
+Revises: dfafcf8210ca
+Create Date: 2025-02-12 14:02:00.918226
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "7980d239ea08"
+down_revision: Union[str, None] = "dfafcf8210ca"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # Add the column with a temporary nullable=True so we can backfill
+    op.add_column("agents", sa.Column("message_buffer_autoclear", sa.Boolean(), nullable=True))
+
+    # Backfill existing rows to set message_buffer_autoclear to False where it's NULL
+    op.execute("UPDATE agents SET message_buffer_autoclear = false WHERE message_buffer_autoclear IS NULL")
+
+    # Now, enforce nullable=False after backfilling
+    op.alter_column("agents", "message_buffer_autoclear", nullable=False)
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column("agents", "message_buffer_autoclear")
+    # ### end Alembic commands ###
diff --git a/letta/__init__.py b/letta/__init__.py
index 4fd77b52..f4868bb3 100644
--- a/letta/__init__.py
+++ b/letta/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.24"
+__version__ = "0.6.25"
 
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client
diff --git a/letta/agent.py b/letta/agent.py
index 9a8ce758..5202bac2 100644
--- a/letta/agent.py
+++ b/letta/agent.py
@@ -61,6 +61,7 @@ from letta.utils import (
     get_utc_time,
     json_dumps,
     json_loads,
+    log_telemetry,
     parse_json,
     printd,
     validate_function_response,
@@ -306,7 +307,7 @@ class Agent(BaseAgent):
         last_function_failed: bool = False,
     ) -> ChatCompletionResponse:
         """Get response from LLM API with robust retry mechanism."""
-
+        log_telemetry(self.logger, "_get_ai_reply start")
         allowed_tool_names = self.tool_rules_solver.get_allowed_tool_names(last_function_response=self.last_function_response)
         agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
 
@@ -337,6 +338,7 @@ class Agent(BaseAgent):
 
         for attempt in range(1, empty_response_retry_limit + 1):
             try:
+                log_telemetry(self.logger, "_get_ai_reply create start")
                 response = create(
                     llm_config=self.agent_state.llm_config,
                     messages=message_sequence,
@@ -349,6 +351,7 @@ class Agent(BaseAgent):
                     stream=stream,
                     stream_interface=self.interface,
                 )
+                log_telemetry(self.logger, "_get_ai_reply create finish")
 
                 # These bottom two are retryable
                 if len(response.choices) == 0 or response.choices[0] is None:
@@ -360,12 +363,13 @@ class Agent(BaseAgent):
                         raise RuntimeError("Finish reason was length (maximum context length)")
                     else:
                         raise ValueError(f"Bad finish reason from API: {response.choices[0].finish_reason}")
-
+                log_telemetry(self.logger, "_handle_ai_response finish")
                 return response
 
             except ValueError as ve:
                 if attempt >= empty_response_retry_limit:
                     warnings.warn(f"Retry limit reached. Final error: {ve}")
+                    log_telemetry(self.logger, "_handle_ai_response finish ValueError")
                     raise Exception(f"Retries exhausted and no valid response received. Final error: {ve}")
                 else:
                     delay = min(backoff_factor * (2 ** (attempt - 1)), max_delay)
@@ -374,8 +378,10 @@ class Agent(BaseAgent):
 
             except Exception as e:
                 # For non-retryable errors, exit immediately
+                log_telemetry(self.logger, "_handle_ai_response finish generic Exception")
                 raise e
 
+        log_telemetry(self.logger, "_handle_ai_response finish catch-all exception")
         raise Exception("Retries exhausted and no valid response received.")
 
     def _handle_ai_response(
@@ -388,7 +394,7 @@ class Agent(BaseAgent):
         response_message_id: Optional[str] = None,
     ) -> Tuple[List[Message], bool, bool]:
         """Handles parsing and function execution"""
-
+        log_telemetry(self.logger, "_handle_ai_response start")
         # Hacky failsafe for now to make sure we didn't implement the streaming Message ID creation incorrectly
         if response_message_id is not None:
             assert response_message_id.startswith("message-"), response_message_id
@@ -506,7 +512,13 @@ class Agent(BaseAgent):
             self.interface.function_message(f"Running {function_name}({function_args})", msg_obj=messages[-1])
             try:
                 # handle tool execution (sandbox) and state updates
+                log_telemetry(
+                    self.logger, "_handle_ai_response execute tool start", function_name=function_name, function_args=function_args
+                )
                 function_response, sandbox_run_result = self.execute_tool_and_persist_state(function_name, function_args, target_letta_tool)
+                log_telemetry(
+                    self.logger, "_handle_ai_response execute tool finish", function_name=function_name, function_args=function_args
+                )
 
                 if sandbox_run_result and sandbox_run_result.status == "error":
                     messages = self._handle_function_error_response(
@@ -597,6 +609,7 @@ class Agent(BaseAgent):
         elif self.tool_rules_solver.is_terminal_tool(function_name):
             heartbeat_request = False
 
+        log_telemetry(self.logger, "_handle_ai_response finish")
         return messages, heartbeat_request, function_failed
 
     def step(
@@ -684,6 +697,9 @@ class Agent(BaseAgent):
             else:
                 break
 
+        if self.agent_state.message_buffer_autoclear:
+            self.agent_manager.trim_all_in_context_messages_except_system(self.agent_state.id, actor=self.user)
+
         return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count)
 
     def inner_step(
diff --git a/letta/client/client.py b/letta/client/client.py
index 485cc6f9..ed7a3220 100644
--- a/letta/client/client.py
+++ b/letta/client/client.py
@@ -73,6 +73,7 @@ class AbstractClient(object):
         metadata: Optional[Dict] = {"human:": DEFAULT_HUMAN, "persona": DEFAULT_PERSONA},
         description: Optional[str] = None,
         tags: Optional[List[str]] = None,
+        message_buffer_autoclear: bool = False,
     ) -> AgentState:
         raise NotImplementedError
 
@@ -540,6 +541,7 @@ class RESTClient(AbstractClient):
         description: Optional[str] = None,
         initial_message_sequence: Optional[List[Message]] = None,
         tags: Optional[List[str]] = None,
+        message_buffer_autoclear: bool = False,
     ) -> AgentState:
         """Create an agent
 
@@ -600,6 +602,7 @@ class RESTClient(AbstractClient):
             "initial_message_sequence": initial_message_sequence,
             "tags": tags,
             "include_base_tools": include_base_tools,
+            "message_buffer_autoclear": message_buffer_autoclear,
         }
 
         # Only add name if it's not None
@@ -2353,6 +2356,7 @@ class LocalClient(AbstractClient):
         description: Optional[str] = None,
         initial_message_sequence: Optional[List[Message]] = None,
         tags: Optional[List[str]] = None,
+        message_buffer_autoclear: bool = False,
     ) -> AgentState:
         """Create an agent
 
@@ -2404,6 +2408,7 @@ class LocalClient(AbstractClient):
             "embedding_config": embedding_config if embedding_config else self._default_embedding_config,
             "initial_message_sequence": initial_message_sequence,
             "tags": tags,
+            "message_buffer_autoclear": message_buffer_autoclear,
         }
 
         # Only add name if it's not None
diff --git a/letta/embeddings.py b/letta/embeddings.py
index 6541cea3..e8d1f54d 100644
--- a/letta/embeddings.py
+++ b/letta/embeddings.py
@@ -188,6 +188,19 @@ class GoogleEmbeddings:
         return response_json["embedding"]["values"]
 
 
+class GoogleVertexEmbeddings:
+
+    def __init__(self, model: str, project_id: str, region: str):
+        from google import genai
+
+        self.client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"})
+        self.model = model
+
+    def get_text_embedding(self, text: str):
+        response = self.client.generate_embeddings(content=text, model=self.model)
+        return response.embeddings[0].embedding
+
+
 def query_embedding(embedding_model, query_text: str):
     """Generate padded embedding for querying database"""
     query_vec = embedding_model.get_text_embedding(query_text)
@@ -267,5 +280,13 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
         )
         return model
 
+    elif endpoint_type == "google_vertex":
+        model = GoogleVertexEmbeddings(
+            model=config.embedding_model,
+            api_key=model_settings.gemini_api_key,
+            base_url=model_settings.gemini_base_url,
+        )
+        return model
+
     else:
         raise ValueError(f"Unknown endpoint type {endpoint_type}")
diff --git a/letta/functions/helpers.py b/letta/functions/helpers.py
index 92b75e49..ef42b4c9 100644
--- a/letta/functions/helpers.py
+++ b/letta/functions/helpers.py
@@ -17,6 +17,7 @@ from letta.schemas.message import Message, MessageCreate
 from letta.schemas.user import User
 from letta.server.rest_api.utils import get_letta_server
 from letta.settings import settings
+from letta.utils import log_telemetry
 
 
 # TODO: This is kind of hacky, as this is used to search up the action later on composio's side
@@ -341,10 +342,16 @@ async def async_send_message_with_retries(
     timeout: int,
     logging_prefix: Optional[str] = None,
 ) -> str:
-
     logging_prefix = logging_prefix or "[async_send_message_with_retries]"
+    log_telemetry(sender_agent.logger, f"async_send_message_with_retries start", target_agent_id=target_agent_id)
+
     for attempt in range(1, max_retries + 1):
         try:
+            log_telemetry(
+                sender_agent.logger,
+                f"async_send_message_with_retries -> asyncio wait for send_message_to_agent_no_stream start",
+                target_agent_id=target_agent_id,
+            )
             response = await asyncio.wait_for(
                 send_message_to_agent_no_stream(
                     server=server,
@@ -354,15 +361,24 @@ async def async_send_message_with_retries(
                 ),
                 timeout=timeout,
             )
+            log_telemetry(
+                sender_agent.logger,
+                f"async_send_message_with_retries -> asyncio wait for send_message_to_agent_no_stream finish",
+                target_agent_id=target_agent_id,
+            )
 
             # Then parse out the assistant message
             assistant_message = parse_letta_response_for_assistant_message(target_agent_id, response)
             if assistant_message:
                 sender_agent.logger.info(f"{logging_prefix} - {assistant_message}")
+                log_telemetry(
+                    sender_agent.logger, f"async_send_message_with_retries finish with assistant message", target_agent_id=target_agent_id
+                )
                 return assistant_message
             else:
                 msg = f"(No response from agent {target_agent_id})"
                 sender_agent.logger.info(f"{logging_prefix} - {msg}")
+                log_telemetry(sender_agent.logger, f"async_send_message_with_retries finish no response", target_agent_id=target_agent_id)
                 return msg
 
         except asyncio.TimeoutError:
@@ -380,6 +396,12 @@ async def async_send_message_with_retries(
             await asyncio.sleep(backoff)
         else:
             sender_agent.logger.error(f"{logging_prefix} - Fatal error: {error_msg}")
+            log_telemetry(
+                sender_agent.logger,
+                f"async_send_message_with_retries finish fatal error",
+                target_agent_id=target_agent_id,
+                error_msg=error_msg,
+            )
             raise Exception(error_msg)
 
 
@@ -468,6 +490,7 @@ def fire_and_forget_send_to_agent(
 
 
 async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent", message: str, tags: List[str]) -> List[str]:
+    log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async start", message=message, tags=tags)
     server = get_letta_server()
 
     augmented_message = (
@@ -477,7 +500,9 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
     )
 
     # Retrieve up to 100 matching agents
+    log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async listing agents start", message=message, tags=tags)
     matching_agents = server.agent_manager.list_agents(actor=sender_agent.user, tags=tags, match_all_tags=True, limit=100)
+    log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async  listing agents finish", message=message, tags=tags)
 
     # Create a system message
     messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=sender_agent.agent_state.name)]
@@ -504,4 +529,6 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
             final.append(str(r))
         else:
             final.append(r)
+
+    log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async finish", message=message, tags=tags)
     return final
diff --git a/letta/llm_api/google_vertex.py b/letta/llm_api/google_vertex.py
new file mode 100644
index 00000000..9530211f
--- /dev/null
+++ b/letta/llm_api/google_vertex.py
@@ -0,0 +1,328 @@
+import uuid
+from typing import List, Optional
+
+from letta.constants import NON_USER_MSG_PREFIX
+from letta.local_llm.json_parser import clean_json_string_extra_backslash
+from letta.local_llm.utils import count_tokens
+from letta.schemas.openai.chat_completion_request import Tool
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
+from letta.utils import get_tool_call_id, get_utc_time, json_dumps
+
+
+def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
+    """Google AI API requires all function call returns are immediately followed by a 'model' role message.
+
+    In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
+    so there is no natural follow-up 'model' role message.
+
+    To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
+    with role == 'model' that is placed in-betweeen and function output
+    (role == 'tool') and user message (role == 'user').
+    """
+    dummy_yield_message = {"role": "model", "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}]}
+    messages_with_padding = []
+    for i, message in enumerate(messages):
+        messages_with_padding.append(message)
+        # Check if the current message role is 'tool' and the next message role is 'user'
+        if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
+            messages_with_padding.append(dummy_yield_message)
+
+    return messages_with_padding
+
+
+# TODO use pydantic model as input
+def to_google_ai(openai_message_dict: dict) -> dict:
+
+    # TODO supports "parts" as part of multimodal support
+    assert not isinstance(openai_message_dict["content"], list), "Multi-part content is message not yet supported"
+    if openai_message_dict["role"] == "user":
+        google_ai_message_dict = {
+            "role": "user",
+            "parts": [{"text": openai_message_dict["content"]}],
+        }
+    elif openai_message_dict["role"] == "assistant":
+        google_ai_message_dict = {
+            "role": "model",  # NOTE: diff
+            "parts": [{"text": openai_message_dict["content"]}],
+        }
+    elif openai_message_dict["role"] == "tool":
+        google_ai_message_dict = {
+            "role": "function",  # NOTE: diff
+            "parts": [{"text": openai_message_dict["content"]}],
+        }
+    else:
+        raise ValueError(f"Unsupported conversion (OpenAI -> Google AI) from role {openai_message_dict['role']}")
+
+
+# TODO convert return type to pydantic
+def convert_tools_to_google_ai_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]:
+    """
+    OpenAI style:
+      "tools": [{
+        "type": "function",
+        "function": {
+            "name": "find_movies",
+            "description": "find ....",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                 PARAM: {
+                   "type": PARAM_TYPE,  # eg "string"
+                   "description": PARAM_DESCRIPTION,
+                 },
+                 ...
+              },
+              "required": List[str],
+            }
+        }
+      }
+      ]
+
+    Google AI style:
+      "tools": [{
+        "functionDeclarations": [{
+          "name": "find_movies",
+          "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
+          "parameters": {
+            "type": "OBJECT",
+            "properties": {
+              "location": {
+                "type": "STRING",
+                "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
+              },
+              "description": {
+                "type": "STRING",
+                "description": "Any kind of description including category or genre, title words, attributes, etc."
+              }
+            },
+            "required": ["description"]
+          }
+        }, {
+          "name": "find_theaters",
+          ...
+    """
+    function_list = [
+        dict(
+            name=t.function.name,
+            description=t.function.description,
+            parameters=t.function.parameters,  # TODO need to unpack
+        )
+        for t in tools
+    ]
+
+    # Correct casing + add inner thoughts if needed
+    for func in function_list:
+        func["parameters"]["type"] = "OBJECT"
+        for param_name, param_fields in func["parameters"]["properties"].items():
+            param_fields["type"] = param_fields["type"].upper()
+        # Add inner thoughts
+        if inner_thoughts_in_kwargs:
+            from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
+
+            func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
+                "type": "STRING",
+                "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
+            }
+            func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
+
+    return [{"functionDeclarations": function_list}]
+
+
+def convert_google_ai_response_to_chatcompletion(
+    response,
+    model: str,  # Required since not returned
+    input_messages: Optional[List[dict]] = None,  # Required if the API doesn't return UsageMetadata
+    pull_inner_thoughts_from_args: Optional[bool] = True,
+) -> ChatCompletionResponse:
+    """Google AI API response format is not the same as ChatCompletion, requires unpacking
+
+    Example:
+    {
+      "candidates": [
+        {
+          "content": {
+            "parts": [
+              {
+                "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
+              }
+            ]
+          }
+        }
+      ],
+      "usageMetadata": {
+        "promptTokenCount": 9,
+        "candidatesTokenCount": 27,
+        "totalTokenCount": 36
+      }
+    }
+    """
+    try:
+        choices = []
+        index = 0
+        for candidate in response.candidates:
+            content = candidate.content
+
+            role = content.role
+            assert role == "model", f"Unknown role in response: {role}"
+
+            parts = content.parts
+            # TODO support parts / multimodal
+            # TODO support parallel tool calling natively
+            # TODO Alternative here is to throw away everything else except for the first part
+            for response_message in parts:
+                # Convert the actual message style to OpenAI style
+                if response_message.function_call:
+                    function_call = response_message.function_call
+                    function_name = function_call.name
+                    function_args = function_call.args
+                    assert isinstance(function_args, dict), function_args
+
+                    # NOTE: this also involves stripping the inner monologue out of the function
+                    if pull_inner_thoughts_from_args:
+                        from letta.local_llm.constants import INNER_THOUGHTS_KWARG
+
+                        assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
+                        inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
+                        assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
+                    else:
+                        inner_thoughts = None
+
+                    # Google AI API doesn't generate tool call IDs
+                    openai_response_message = Message(
+                        role="assistant",  # NOTE: "model" -> "assistant"
+                        content=inner_thoughts,
+                        tool_calls=[
+                            ToolCall(
+                                id=get_tool_call_id(),
+                                type="function",
+                                function=FunctionCall(
+                                    name=function_name,
+                                    arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
+                                ),
+                            )
+                        ],
+                    )
+
+                else:
+
+                    # Inner thoughts are the content by default
+                    inner_thoughts = response_message.text
+
+                    # Google AI API doesn't generate tool call IDs
+                    openai_response_message = Message(
+                        role="assistant",  # NOTE: "model" -> "assistant"
+                        content=inner_thoughts,
+                    )
+
+                # Google AI API uses different finish reason strings than OpenAI
+                # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
+                #   see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
+                # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
+                #   see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
+                finish_reason = candidate.finish_reason.value
+                if finish_reason == "STOP":
+                    openai_finish_reason = (
+                        "function_call"
+                        if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
+                        else "stop"
+                    )
+                elif finish_reason == "MAX_TOKENS":
+                    openai_finish_reason = "length"
+                elif finish_reason == "SAFETY":
+                    openai_finish_reason = "content_filter"
+                elif finish_reason == "RECITATION":
+                    openai_finish_reason = "content_filter"
+                else:
+                    raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
+
+                choices.append(
+                    Choice(
+                        finish_reason=openai_finish_reason,
+                        index=index,
+                        message=openai_response_message,
+                    )
+                )
+                index += 1
+
+        # if len(choices) > 1:
+        #     raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
+
+        # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
+        #  "usageMetadata": {
+        #     "promptTokenCount": 9,
+        #     "candidatesTokenCount": 27,
+        #     "totalTokenCount": 36
+        #   }
+        if response.usage_metadata:
+            usage = UsageStatistics(
+                prompt_tokens=response.usage_metadata.prompt_token_count,
+                completion_tokens=response.usage_metadata.candidates_token_count,
+                total_tokens=response.usage_metadata.total_token_count,
+            )
+        else:
+            # Count it ourselves
+            assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
+            prompt_tokens = count_tokens(json_dumps(input_messages))  # NOTE: this is a very rough approximation
+            completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump()))  # NOTE: this is also approximate
+            total_tokens = prompt_tokens + completion_tokens
+            usage = UsageStatistics(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=total_tokens,
+            )
+
+        response_id = str(uuid.uuid4())
+        return ChatCompletionResponse(
+            id=response_id,
+            choices=choices,
+            model=model,  # NOTE: Google API doesn't pass back model in the response
+            created=get_utc_time(),
+            usage=usage,
+        )
+    except KeyError as e:
+        raise e
+
+
+# TODO convert 'data' type to pydantic
+def google_vertex_chat_completions_request(
+    model: str,
+    project_id: str,
+    region: str,
+    contents: List[dict],
+    config: dict,
+    add_postfunc_model_messages: bool = True,
+    # NOTE: Google AI API doesn't support mixing parts 'text' and 'function',
+    # so there's no clean way to put inner thoughts in the same message as a function call
+    inner_thoughts_in_kwargs: bool = True,
+) -> ChatCompletionResponse:
+    """https://ai.google.dev/docs/function_calling
+
+    From https://ai.google.dev/api/rest#service-endpoint:
+    "A service endpoint is a base URL that specifies the network address of an API service.
+    One service might have multiple service endpoints.
+    This service has the following service endpoint and all URIs below are relative to this service endpoint:
+    https://xxx.googleapis.com
+    """
+
+    from google import genai
+
+    client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"})
+    # add dummy model messages to the end of the input
+    if add_postfunc_model_messages:
+        contents = add_dummy_model_messages(contents)
+
+    # make request to client
+    response = client.models.generate_content(model=model, contents=contents, config=config)
+    print(response)
+
+    # convert back response
+    try:
+        return convert_google_ai_response_to_chatcompletion(
+            response=response,
+            model=model,
+            input_messages=contents,
+            pull_inner_thoughts_from_args=inner_thoughts_in_kwargs,
+        )
+    except Exception as conversion_error:
+        print(f"Error during response conversion: {conversion_error}")
+        raise conversion_error
diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py
index 77ba4839..65bdc1f1 100644
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -252,6 +252,32 @@ def create(
             inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
         )
 
+    elif llm_config.model_endpoint_type == "google_vertex":
+        from letta.llm_api.google_vertex import google_vertex_chat_completions_request
+
+        if stream:
+            raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
+        if not use_tool_naming:
+            raise NotImplementedError("Only tool calling supported on Google Vertex AI API requests")
+
+        if functions is not None:
+            tools = [{"type": "function", "function": f} for f in functions]
+            tools = [Tool(**t) for t in tools]
+            tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
+        else:
+            tools = None
+
+        config = {"tools": tools, "temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens}
+
+        return google_vertex_chat_completions_request(
+            model=llm_config.model,
+            project_id=model_settings.google_cloud_project,
+            region=model_settings.google_cloud_location,
+            contents=[m.to_google_ai_dict() for m in messages],
+            config=config,
+            inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
+        )
+
     elif llm_config.model_endpoint_type == "anthropic":
         if not use_tool_naming:
             raise NotImplementedError("Only tool calling supported on Anthropic API requests")
diff --git a/letta/orm/agent.py b/letta/orm/agent.py
index a4d08f71..07b3917b 100644
--- a/letta/orm/agent.py
+++ b/letta/orm/agent.py
@@ -1,7 +1,7 @@
 import uuid
 from typing import TYPE_CHECKING, List, Optional
 
-from sqlalchemy import JSON, Index, String
+from sqlalchemy import JSON, Boolean, Index, String
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from letta.orm.block import Block
@@ -62,6 +62,11 @@ class Agent(SqlalchemyBase, OrganizationMixin):
     # Tool rules
     tool_rules: Mapped[Optional[List[ToolRule]]] = mapped_column(ToolRulesColumn, doc="the tool rules for this agent.")
 
+    # Stateless
+    message_buffer_autoclear: Mapped[bool] = mapped_column(
+        Boolean, doc="If set to True, the agent will not remember previous messages. Not recommended unless you have an advanced use case."
+    )
+
     # relationships
     organization: Mapped["Organization"] = relationship("Organization", back_populates="agents")
     tool_exec_environment_variables: Mapped[List["AgentEnvironmentVariable"]] = relationship(
@@ -146,6 +151,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
             "project_id": self.project_id,
             "template_id": self.template_id,
             "base_template_id": self.base_template_id,
+            "message_buffer_autoclear": self.message_buffer_autoclear,
         }
 
         return self.__pydantic_model__(**state)
diff --git a/letta/schemas/agent.py b/letta/schemas/agent.py
index 9269742d..032b9aab 100644
--- a/letta/schemas/agent.py
+++ b/letta/schemas/agent.py
@@ -43,7 +43,6 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
         system (str): The system prompt used by the agent.
         llm_config (LLMConfig): The LLM configuration used by the agent.
         embedding_config (EmbeddingConfig): The embedding configuration used by the agent.
-
     """
 
     __id_prefix__ = "agent"
@@ -85,6 +84,12 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
     template_id: Optional[str] = Field(None, description="The id of the template the agent belongs to.")
     base_template_id: Optional[str] = Field(None, description="The base template id of the agent.")
 
+    # An advanced configuration that makes it so this agent does not remember any previous messages
+    message_buffer_autoclear: bool = Field(
+        False,
+        description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.",
+    )
+
     def get_agent_env_vars_as_dict(self) -> Dict[str, str]:
         # Get environment variables for this agent specifically
         per_agent_env_vars = {}
@@ -146,6 +151,10 @@ class CreateAgent(BaseModel, validate_assignment=True):  #
     project_id: Optional[str] = Field(None, description="The id of the project the agent belongs to.")
     template_id: Optional[str] = Field(None, description="The id of the template the agent belongs to.")
     base_template_id: Optional[str] = Field(None, description="The base template id of the agent.")
+    message_buffer_autoclear: bool = Field(
+        False,
+        description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.",
+    )
 
     @field_validator("name")
     @classmethod
@@ -216,6 +225,10 @@ class UpdateAgent(BaseModel):
     project_id: Optional[str] = Field(None, description="The id of the project the agent belongs to.")
     template_id: Optional[str] = Field(None, description="The id of the template the agent belongs to.")
     base_template_id: Optional[str] = Field(None, description="The base template id of the agent.")
+    message_buffer_autoclear: Optional[bool] = Field(
+        None,
+        description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.",
+    )
 
     class Config:
         extra = "ignore"  # Ignores extra fields
diff --git a/letta/schemas/embedding_config.py b/letta/schemas/embedding_config.py
index c0a569a7..25162d0b 100644
--- a/letta/schemas/embedding_config.py
+++ b/letta/schemas/embedding_config.py
@@ -26,6 +26,7 @@ class EmbeddingConfig(BaseModel):
         "bedrock",
         "cohere",
         "google_ai",
+        "google_vertex",
         "azure",
         "groq",
         "ollama",
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index e3877389..8e44b25e 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -25,6 +25,7 @@ class LLMConfig(BaseModel):
         "anthropic",
         "cohere",
         "google_ai",
+        "google_vertex",
         "azure",
         "groq",
         "ollama",
diff --git a/letta/schemas/message.py b/letta/schemas/message.py
index 722a749b..f86e7c15 100644
--- a/letta/schemas/message.py
+++ b/letta/schemas/message.py
@@ -570,19 +570,12 @@ class Message(BaseMessage):
                 "role": "user",
             }
 
-            # Optional field, do not include if null
-            if self.name is not None:
-                anthropic_message["name"] = self.name
-
         elif self.role == "user":
             assert all([v is not None for v in [self.text, self.role]]), vars(self)
             anthropic_message = {
                 "content": self.text,
                 "role": self.role,
             }
-            # Optional field, do not include if null
-            if self.name is not None:
-                anthropic_message["name"] = self.name
 
         elif self.role == "assistant":
             assert self.tool_calls is not None or self.text is not None
@@ -624,10 +617,6 @@ class Message(BaseMessage):
             # TODO support multi-modal
             anthropic_message["content"] = content
 
-            # Optional fields, do not include if null
-            if self.name is not None:
-                anthropic_message["name"] = self.name
-
         elif self.role == "tool":
             # NOTE: Anthropic uses role "user" for "tool" responses
             assert all([v is not None for v in [self.role, self.tool_call_id]]), vars(self)
diff --git a/letta/schemas/providers.py b/letta/schemas/providers.py
index e9678759..621958cc 100644
--- a/letta/schemas/providers.py
+++ b/letta/schemas/providers.py
@@ -327,7 +327,7 @@ class LMStudioOpenAIProvider(OpenAIProvider):
                     embedding_endpoint_type="openai",
                     embedding_endpoint=self.base_url,
                     embedding_dim=context_window_size,
-                    embedding_chunk_size=300,
+                    embedding_chunk_size=300,  # NOTE: max is 2048
                     handle=self.get_handle(model_name),
                 ),
             )
@@ -737,6 +737,45 @@ class GoogleAIProvider(Provider):
         return google_ai_get_model_context_window(self.base_url, self.api_key, model_name)
 
 
+class GoogleVertexProvider(Provider):
+    name: str = "google_vertex"
+    google_cloud_project: str = Field(..., description="GCP project ID for the Google Vertex API.")
+    google_cloud_location: str = Field(..., description="GCP region for the Google Vertex API.")
+
+    def list_llm_models(self) -> List[LLMConfig]:
+        from letta.llm_api.google_constants import GOOGLE_MODEL_TO_CONTEXT_LENGTH
+
+        configs = []
+        for model, context_length in GOOGLE_MODEL_TO_CONTEXT_LENGTH.items():
+            configs.append(
+                LLMConfig(
+                    model=model,
+                    model_endpoint_type="google_vertex",
+                    model_endpoint=f"https://{self.google_cloud_location}-aiplatform.googleapis.com/v1/projects/{self.google_cloud_project}/locations/{self.google_cloud_location}",
+                    context_window=context_length,
+                    handle=self.get_handle(model),
+                )
+            )
+        return configs
+
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        from letta.llm_api.google_constants import GOOGLE_EMBEDING_MODEL_TO_DIM
+
+        configs = []
+        for model, dim in GOOGLE_EMBEDING_MODEL_TO_DIM.items():
+            configs.append(
+                EmbeddingConfig(
+                    embedding_model=model,
+                    embedding_endpoint_type="google_vertex",
+                    embedding_endpoint=f"https://{self.google_cloud_location}-aiplatform.googleapis.com/v1/projects/{self.google_cloud_project}/locations/{self.google_cloud_location}",
+                    embedding_dim=dim,
+                    embedding_chunk_size=300,  # NOTE: max is 2048
+                    handle=self.get_handle(model, is_embedding=True),
+                )
+            )
+        return configs
+
+
 class AzureProvider(Provider):
     name: str = "azure"
     latest_api_version: str = "2024-09-01-preview"  # https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
@@ -792,8 +831,8 @@ class AzureProvider(Provider):
                     embedding_endpoint=model_endpoint,
                     embedding_dim=768,
                     embedding_chunk_size=300,  # NOTE: max is 2048
-                    handle=self.get_handle(model_name, is_embedding=True),
-                )
+                    handle=self.get_handle(model_name),
+                ),
             )
         return configs
 
diff --git a/letta/server/rest_api/routers/v1/tools.py b/letta/server/rest_api/routers/v1/tools.py
index 912503fe..58588d77 100644
--- a/letta/server/rest_api/routers/v1/tools.py
+++ b/letta/server/rest_api/routers/v1/tools.py
@@ -124,6 +124,10 @@ def upsert_tool(
         # Log the error and raise a conflict exception
         print(f"Unique constraint violation occurred: {e}")
         raise HTTPException(status_code=409, detail=str(e))
+    except LettaToolCreateError as e:
+        # HTTP 400 == Bad Request
+        print(f"Error occurred during tool upsert: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
         # Catch other unexpected errors and raise an internal server error
         print(f"Unexpected error occurred: {e}")
@@ -140,8 +144,17 @@ def modify_tool(
     """
     Update an existing tool
     """
-    actor = server.user_manager.get_user_or_default(user_id=user_id)
-    return server.tool_manager.update_tool_by_id(tool_id=tool_id, tool_update=request, actor=actor)
+    try:
+        actor = server.user_manager.get_user_or_default(user_id=user_id)
+        return server.tool_manager.update_tool_by_id(tool_id=tool_id, tool_update=request, actor=actor)
+    except LettaToolCreateError as e:
+        # HTTP 400 == Bad Request
+        print(f"Error occurred during tool update: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        # Catch other unexpected errors and raise an internal server error
+        print(f"Unexpected error occurred: {e}")
+        raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
 
 
 @router.post("/add-base-tools", response_model=List[Tool], operation_id="add_base_tools")
diff --git a/letta/server/server.py b/letta/server/server.py
index 4a02b74e..5c32182a 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -47,6 +47,7 @@ from letta.schemas.providers import (
     AnthropicProvider,
     AzureProvider,
     GoogleAIProvider,
+    GoogleVertexProvider,
     GroqProvider,
     LettaProvider,
     LMStudioOpenAIProvider,
@@ -352,6 +353,13 @@ class SyncServer(Server):
                     api_key=model_settings.gemini_api_key,
                 )
             )
+        if model_settings.google_cloud_location and model_settings.google_cloud_project:
+            self._enabled_providers.append(
+                GoogleVertexProvider(
+                    google_cloud_project=model_settings.google_cloud_project,
+                    google_cloud_location=model_settings.google_cloud_location,
+                )
+            )
         if model_settings.azure_api_key and model_settings.azure_base_url:
             assert model_settings.azure_api_version, "AZURE_API_VERSION is required"
             self._enabled_providers.append(
@@ -875,14 +883,12 @@ class SyncServer(Server):
         # TODO: Thread actor directly through this function, since the top level caller most likely already retrieved the user
 
         actor = self.user_manager.get_user_or_default(user_id=user_id)
-        start_date = self.message_manager.get_message_by_id(after, actor=actor).created_at if after else None
-        end_date = self.message_manager.get_message_by_id(before, actor=actor).created_at if before else None
 
         records = self.message_manager.list_messages_for_agent(
             agent_id=agent_id,
             actor=actor,
-            start_date=start_date,
-            end_date=end_date,
+            after=after,
+            before=before,
             limit=limit,
             ascending=not reverse,
         )
diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py
index 3c965386..917ff968 100644
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -123,6 +123,7 @@ class AgentManager:
             project_id=agent_create.project_id,
             template_id=agent_create.template_id,
             base_template_id=agent_create.base_template_id,
+            message_buffer_autoclear=agent_create.message_buffer_autoclear,
         )
 
         # If there are provided environment variables, add them in
@@ -185,6 +186,7 @@ class AgentManager:
         project_id: Optional[str] = None,
         template_id: Optional[str] = None,
         base_template_id: Optional[str] = None,
+        message_buffer_autoclear: bool = False,
     ) -> PydanticAgentState:
         """Create a new agent."""
         with self.session_maker() as session:
@@ -202,6 +204,7 @@ class AgentManager:
                 "project_id": project_id,
                 "template_id": template_id,
                 "base_template_id": base_template_id,
+                "message_buffer_autoclear": message_buffer_autoclear,
             }
 
             # Create the new agent using SqlalchemyBase.create
@@ -263,6 +266,7 @@ class AgentManager:
                 "project_id",
                 "template_id",
                 "base_template_id",
+                "message_buffer_autoclear",
             }
             for field in scalar_fields:
                 value = getattr(agent_update, field, None)
@@ -494,6 +498,7 @@ class AgentManager:
     @enforce_types
     def trim_all_in_context_messages_except_system(self, agent_id: str, actor: PydanticUser) -> PydanticAgentState:
         message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
+        # TODO: How do we know this?
         new_messages = [message_ids[0]]  # 0 is system message
         return self._set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
 
diff --git a/letta/services/message_manager.py b/letta/services/message_manager.py
index ac00ca15..01eccb53 100644
--- a/letta/services/message_manager.py
+++ b/letta/services/message_manager.py
@@ -1,6 +1,8 @@
-from datetime import datetime
-from typing import Dict, List, Optional
+from typing import List, Optional
 
+from sqlalchemy import and_, or_
+
+from letta.orm.agent import Agent as AgentModel
 from letta.orm.errors import NoResultFound
 from letta.orm.message import Message as MessageModel
 from letta.schemas.enums import MessageRole
@@ -127,44 +129,21 @@ class MessageManager:
     def list_user_messages_for_agent(
         self,
         agent_id: str,
-        actor: Optional[PydanticUser] = None,
-        before: Optional[str] = None,
+        actor: PydanticUser,
         after: Optional[str] = None,
-        start_date: Optional[datetime] = None,
-        end_date: Optional[datetime] = None,
-        limit: Optional[int] = 50,
-        filters: Optional[Dict] = None,
+        before: Optional[str] = None,
         query_text: Optional[str] = None,
+        limit: Optional[int] = 50,
         ascending: bool = True,
     ) -> List[PydanticMessage]:
-        """List user messages with flexible filtering and pagination options.
-
-        Args:
-            before: Cursor-based pagination - return records before this ID (exclusive)
-            after: Cursor-based pagination - return records after this ID (exclusive)
-            start_date: Filter records created after this date
-            end_date: Filter records created before this date
-            limit: Maximum number of records to return
-            filters: Additional filters to apply
-            query_text: Optional text to search for in message content
-
-        Returns:
-            List[PydanticMessage] - List of messages matching the criteria
-        """
-        message_filters = {"role": "user"}
-        if filters:
-            message_filters.update(filters)
-
         return self.list_messages_for_agent(
             agent_id=agent_id,
             actor=actor,
-            before=before,
             after=after,
-            start_date=start_date,
-            end_date=end_date,
-            limit=limit,
-            filters=message_filters,
+            before=before,
             query_text=query_text,
+            role=MessageRole.user,
+            limit=limit,
             ascending=ascending,
         )
 
@@ -172,48 +151,94 @@ class MessageManager:
     def list_messages_for_agent(
         self,
         agent_id: str,
-        actor: Optional[PydanticUser] = None,
-        before: Optional[str] = None,
+        actor: PydanticUser,
         after: Optional[str] = None,
-        start_date: Optional[datetime] = None,
-        end_date: Optional[datetime] = None,
-        limit: Optional[int] = 50,
-        filters: Optional[Dict] = None,
+        before: Optional[str] = None,
         query_text: Optional[str] = None,
+        role: Optional[MessageRole] = None,  # New parameter for filtering by role
+        limit: Optional[int] = 50,
         ascending: bool = True,
     ) -> List[PydanticMessage]:
-        """List messages with flexible filtering and pagination options.
+        """
+        Most performant query to list messages for an agent by directly querying the Message table.
+
+        This function filters by the agent_id (leveraging the index on messages.agent_id)
+        and applies efficient pagination using (created_at, id) as the cursor.
+        If query_text is provided, it will filter messages whose text content partially matches the query.
+        If role is provided, it will filter messages by the specified role.
 
         Args:
-            before: Cursor-based pagination - return records before this ID (exclusive)
-            after: Cursor-based pagination - return records after this ID (exclusive)
-            start_date: Filter records created after this date
-            end_date: Filter records created before this date
-            limit: Maximum number of records to return
-            filters: Additional filters to apply
-            query_text: Optional text to search for in message content
+            agent_id: The ID of the agent whose messages are queried.
+            actor: The user performing the action (used for permission checks).
+            after: A message ID; if provided, only messages *after* this message (per sort order) are returned.
+            before: A message ID; if provided, only messages *before* this message are returned.
+            query_text: Optional string to partially match the message text content.
+            role: Optional MessageRole to filter messages by role.
+            limit: Maximum number of messages to return.
+            ascending: If True, sort by (created_at, id) ascending; if False, sort descending.
 
         Returns:
-            List[PydanticMessage] - List of messages matching the criteria
+            List[PydanticMessage]: A list of messages (converted via .to_pydantic()).
+
+        Raises:
+            NoResultFound: If the provided after/before message IDs do not exist.
         """
         with self.session_maker() as session:
-            # Start with base filters
-            message_filters = {"agent_id": agent_id}
-            if actor:
-                message_filters.update({"organization_id": actor.organization_id})
-            if filters:
-                message_filters.update(filters)
+            # Permission check: raise if the agent doesn't exist or actor is not allowed.
+            AgentModel.read(db_session=session, identifier=agent_id, actor=actor)
 
-            results = MessageModel.list(
-                db_session=session,
-                before=before,
-                after=after,
-                start_date=start_date,
-                end_date=end_date,
-                limit=limit,
-                query_text=query_text,
-                ascending=ascending,
-                **message_filters,
-            )
+            # Build a query that directly filters the Message table by agent_id.
+            query = session.query(MessageModel).filter(MessageModel.agent_id == agent_id)
 
+            # If query_text is provided, filter messages by partial match on text.
+            if query_text:
+                query = query.filter(MessageModel.text.ilike(f"%{query_text}%"))
+
+            # If role is provided, filter messages by role.
+            if role:
+                query = query.filter(MessageModel.role == role.value)  # Enum.value ensures comparison is against the string value
+
+            # Apply 'after' pagination if specified.
+            if after:
+                after_ref = session.query(MessageModel.created_at, MessageModel.id).filter(MessageModel.id == after).limit(1).one_or_none()
+                if not after_ref:
+                    raise NoResultFound(f"No message found with id '{after}' for agent '{agent_id}'.")
+                query = query.filter(
+                    or_(
+                        MessageModel.created_at > after_ref.created_at,
+                        and_(
+                            MessageModel.created_at == after_ref.created_at,
+                            MessageModel.id > after_ref.id,
+                        ),
+                    )
+                )
+
+            # Apply 'before' pagination if specified.
+            if before:
+                before_ref = (
+                    session.query(MessageModel.created_at, MessageModel.id).filter(MessageModel.id == before).limit(1).one_or_none()
+                )
+                if not before_ref:
+                    raise NoResultFound(f"No message found with id '{before}' for agent '{agent_id}'.")
+                query = query.filter(
+                    or_(
+                        MessageModel.created_at < before_ref.created_at,
+                        and_(
+                            MessageModel.created_at == before_ref.created_at,
+                            MessageModel.id < before_ref.id,
+                        ),
+                    )
+                )
+
+            # Apply ordering based on the ascending flag.
+            if ascending:
+                query = query.order_by(MessageModel.created_at.asc(), MessageModel.id.asc())
+            else:
+                query = query.order_by(MessageModel.created_at.desc(), MessageModel.id.desc())
+
+            # Limit the number of results.
+            query = query.limit(limit)
+
+            # Execute and convert each Message to its Pydantic representation.
+            results = query.all()
             return [msg.to_pydantic() for msg in results]
diff --git a/letta/settings.py b/letta/settings.py
index 667f7242..4e9f0d0b 100644
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -86,6 +86,11 @@ class ModelSettings(BaseSettings):
     # google ai
     gemini_api_key: Optional[str] = None
     gemini_base_url: str = "https://generativelanguage.googleapis.com/"
+
+    # google vertex
+    google_cloud_project: Optional[str] = None
+    google_cloud_location: Optional[str] = None
+
     # together
     together_api_key: Optional[str] = None
 
@@ -151,6 +156,9 @@ class Settings(BaseSettings):
     multi_agent_send_message_timeout: int = 20 * 60
     multi_agent_concurrent_sends: int = 15
 
+    # telemetry logging
+    verbose_telemetry_logging: bool = False
+
     @property
     def letta_pg_uri(self) -> str:
         if self.pg_uri:
diff --git a/letta/utils.py b/letta/utils.py
index 171391e3..d0893bab 100644
--- a/letta/utils.py
+++ b/letta/utils.py
@@ -16,6 +16,7 @@ import uuid
 from contextlib import contextmanager
 from datetime import datetime, timedelta, timezone
 from functools import wraps
+from logging import Logger
 from typing import Any, Coroutine, List, Union, _GenericAlias, get_args, get_origin, get_type_hints
 from urllib.parse import urljoin, urlparse
 
@@ -1150,3 +1151,19 @@ def run_async_task(coro: Coroutine[Any, Any, Any]) -> Any:
     except RuntimeError:
         # If no event loop is running, create a new one
         return asyncio.run(coro)
+
+
+def log_telemetry(logger: Logger, event: str, **kwargs):
+    """
+    Logs telemetry events with a timestamp.
+
+    :param logger: A logger
+    :param event: A string describing the event.
+    :param kwargs: Additional key-value pairs for logging metadata.
+    """
+    from letta.settings import settings
+
+    if settings.verbose_telemetry_logging:
+        timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S,%f UTC")  # More readable timestamp
+        extra_data = " | ".join(f"{key}={value}" for key, value in kwargs.items() if value is not None)
+        logger.info(f"[{timestamp}] EVENT: {event} | {extra_data}")
diff --git a/poetry.lock b/poetry.lock
index ff140e9c..b7eb803e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -539,6 +539,17 @@ files = [
     {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"},
 ]
 
+[[package]]
+name = "cachetools"
+version = "5.5.1"
+description = "Extensible memoizing collections and decorators"
+optional = true
+python-versions = ">=3.7"
+files = [
+    {file = "cachetools-5.5.1-py3-none-any.whl", hash = "sha256:b76651fdc3b24ead3c648bbdeeb940c1b04d365b38b4af66788f9ec4a81d42bb"},
+    {file = "cachetools-5.5.1.tar.gz", hash = "sha256:70f238fbba50383ef62e55c6aff6d9673175fe59f7c6782c7a0b9e38f4a9df95"},
+]
+
 [[package]]
 name = "certifi"
 version = "2025.1.31"
@@ -828,7 +839,6 @@ optional = false
 python-versions = "<4,>=3.9"
 files = [
     {file = "composio_langchain-0.6.19-py3-none-any.whl", hash = "sha256:d0811956fe22bfa20d08828edca1757523730a6a02e6021e8ce3509c926c7f9b"},
-    {file = "composio_langchain-0.6.19.tar.gz", hash = "sha256:17b8c7ee042c0cf2c154772d742fe19e9d79a7e9e2a32d382d6f722b2104d671"},
 ]
 
 [package.dependencies]
@@ -1606,6 +1616,47 @@ benchmarks = ["httplib2", "httpx", "requests", "urllib3"]
 dev = ["dpkt", "pytest", "requests"]
 examples = ["oauth2"]
 
+[[package]]
+name = "google-auth"
+version = "2.38.0"
+description = "Google Authentication Library"
+optional = true
+python-versions = ">=3.7"
+files = [
+    {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"},
+    {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"},
+]
+
+[package.dependencies]
+cachetools = ">=2.0.0,<6.0"
+pyasn1-modules = ">=0.2.1"
+rsa = ">=3.1.4,<5"
+
+[package.extras]
+aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"]
+enterprise-cert = ["cryptography", "pyopenssl"]
+pyjwt = ["cryptography (>=38.0.3)", "pyjwt (>=2.0)"]
+pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"]
+reauth = ["pyu2f (>=0.1.5)"]
+requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
+
+[[package]]
+name = "google-genai"
+version = "1.2.0"
+description = "GenAI Python SDK"
+optional = true
+python-versions = ">=3.9"
+files = [
+    {file = "google_genai-1.2.0-py3-none-any.whl", hash = "sha256:609d61bee73f1a6ae5b47e9c7dd4b469d50318f050c5ceacf835b0f80f79d2d9"},
+]
+
+[package.dependencies]
+google-auth = ">=2.14.1,<3.0.0dev"
+pydantic = ">=2.0.0,<3.0.0dev"
+requests = ">=2.28.1,<3.0.0dev"
+typing-extensions = ">=4.11.0,<5.0.0dev"
+websockets = ">=13.0,<15.0dev"
+
 [[package]]
 name = "greenlet"
 version = "3.1.1"
@@ -2481,13 +2532,13 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10"
 
 [[package]]
 name = "langchain-core"
-version = "0.3.34"
+version = "0.3.35"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langchain_core-0.3.34-py3-none-any.whl", hash = "sha256:a057ebeddd2158d3be14bde341b25640ddf958b6989bd6e47160396f5a8202ae"},
-    {file = "langchain_core-0.3.34.tar.gz", hash = "sha256:26504cf1e8e6c310adad907b890d4e3c147581cfa7434114f6dc1134fe4bc6d3"},
+    {file = "langchain_core-0.3.35-py3-none-any.whl", hash = "sha256:81a4097226e180fa6c64e2d2ab38dcacbbc23b64fc109fb15622910fe8951670"},
+    {file = "langchain_core-0.3.35.tar.gz", hash = "sha256:328688228ece259da734417d477994a69cf8202dea9ed4271f2d792e3575c6fc"},
 ]
 
 [package.dependencies]
@@ -2576,13 +2627,13 @@ pytest = ["pytest (>=7.0.0)", "rich (>=13.9.4,<14.0.0)"]
 
 [[package]]
 name = "letta-client"
-version = "0.1.28"
+version = "0.1.31"
 description = ""
 optional = false
 python-versions = "<4.0,>=3.8"
 files = [
-    {file = "letta_client-0.1.28-py3-none-any.whl", hash = "sha256:ace0c95a7429d2335ff7221aacaef9db7220ab5a4e5d87c6af7d6adbb86362aa"},
-    {file = "letta_client-0.1.28.tar.gz", hash = "sha256:bdb41aa9a6def43f0e7a8c1ccc3b48d6028f332ee73804d59330596b7f96c4a9"},
+    {file = "letta_client-0.1.31-py3-none-any.whl", hash = "sha256:323b4cce482fb38fb701268804163132e102c6b23262dfee2080aa36a4127a53"},
+    {file = "letta_client-0.1.31.tar.gz", hash = "sha256:68247baf20ed6a472e3e5b6d9b0e3912387f4e0c3ee12e3e8eb9a9d1dd3063c3"},
 ]
 
 [package.dependencies]
@@ -3376,13 +3427,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.61.1"
+version = "1.62.0"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "openai-1.61.1-py3-none-any.whl", hash = "sha256:72b0826240ce26026ac2cd17951691f046e5be82ad122d20a8e1b30ca18bd11e"},
-    {file = "openai-1.61.1.tar.gz", hash = "sha256:ce1851507218209961f89f3520e06726c0aa7d0512386f0f977e3ac3e4f2472e"},
+    {file = "openai-1.62.0-py3-none-any.whl", hash = "sha256:dcb7f9fb4fbc3f27e3ffd2d7bf045be9211510d7fafefcef7ad2302cb27484e0"},
+    {file = "openai-1.62.0.tar.gz", hash = "sha256:ef3f6864ae2f75fa6296bc9811acf684b95557fcb611fe95734215a8b9150b43"},
 ]
 
 [package.dependencies]
@@ -4030,7 +4081,6 @@ files = [
     {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
     {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
     {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
-    {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"},
     {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
     {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
     {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
@@ -4090,7 +4140,6 @@ files = [
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
-    {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
@@ -4192,6 +4241,31 @@ files = [
 [package.extras]
 test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"]
 
+[[package]]
+name = "pyasn1"
+version = "0.6.1"
+description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
+    {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.1"
+description = "A collection of ASN.1-based protocols modules"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"},
+    {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.4.6,<0.7.0"
+
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -4443,13 +4517,13 @@ files = [
 
 [[package]]
 name = "pyright"
-version = "1.1.393"
+version = "1.1.394"
 description = "Command line wrapper for pyright"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "pyright-1.1.393-py3-none-any.whl", hash = "sha256:8320629bb7a44ca90944ba599390162bf59307f3d9fb6e27da3b7011b8c17ae5"},
-    {file = "pyright-1.1.393.tar.gz", hash = "sha256:aeeb7ff4e0364775ef416a80111613f91a05c8e01e58ecfefc370ca0db7aed9c"},
+    {file = "pyright-1.1.394-py3-none-any.whl", hash = "sha256:5f74cce0a795a295fb768759bbeeec62561215dea657edcaab48a932b031ddbb"},
+    {file = "pyright-1.1.394.tar.gz", hash = "sha256:56f2a3ab88c5214a451eb71d8f2792b7700434f841ea219119ade7f42ca93608"},
 ]
 
 [package.dependencies]
@@ -5191,6 +5265,20 @@ files = [
     {file = "rpds_py-0.22.3.tar.gz", hash = "sha256:e32fee8ab45d3c2db6da19a5323bc3362237c8b653c70194414b892fd06a080d"},
 ]
 
+[[package]]
+name = "rsa"
+version = "4.9"
+description = "Pure-Python RSA implementation"
+optional = true
+python-versions = ">=3.6,<4"
+files = [
+    {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
+    {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.1.3"
+
 [[package]]
 name = "scramp"
 version = "1.4.5"
@@ -5855,83 +5943,80 @@ test = ["websockets"]
 
 [[package]]
 name = "websockets"
-version = "12.0"
+version = "14.2"
 description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
 optional = true
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"},
-    {file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"},
-    {file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547"},
-    {file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2"},
-    {file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558"},
-    {file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480"},
-    {file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c"},
-    {file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8"},
-    {file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603"},
-    {file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f"},
-    {file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf"},
-    {file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4"},
-    {file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f"},
-    {file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3"},
-    {file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c"},
-    {file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45"},
-    {file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04"},
-    {file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447"},
-    {file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca"},
-    {file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"},
-    {file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"},
-    {file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"},
-    {file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"},
-    {file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"},
-    {file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"},
-    {file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"},
-    {file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"},
-    {file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"},
-    {file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"},
-    {file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"},
-    {file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"},
-    {file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"},
-    {file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"},
-    {file = "websockets-12.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5f6ffe2c6598f7f7207eef9a1228b6f5c818f9f4d53ee920aacd35cec8110438"},
-    {file = "websockets-12.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9edf3fc590cc2ec20dc9d7a45108b5bbaf21c0d89f9fd3fd1685e223771dc0b2"},
-    {file = "websockets-12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8572132c7be52632201a35f5e08348137f658e5ffd21f51f94572ca6c05ea81d"},
-    {file = "websockets-12.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:604428d1b87edbf02b233e2c207d7d528460fa978f9e391bd8aaf9c8311de137"},
-    {file = "websockets-12.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a9d160fd080c6285e202327aba140fc9a0d910b09e423afff4ae5cbbf1c7205"},
-    {file = "websockets-12.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87b4aafed34653e465eb77b7c93ef058516cb5acf3eb21e42f33928616172def"},
-    {file = "websockets-12.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b2ee7288b85959797970114deae81ab41b731f19ebcd3bd499ae9ca0e3f1d2c8"},
-    {file = "websockets-12.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7fa3d25e81bfe6a89718e9791128398a50dec6d57faf23770787ff441d851967"},
-    {file = "websockets-12.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a571f035a47212288e3b3519944f6bf4ac7bc7553243e41eac50dd48552b6df7"},
-    {file = "websockets-12.0-cp38-cp38-win32.whl", hash = "sha256:3c6cc1360c10c17463aadd29dd3af332d4a1adaa8796f6b0e9f9df1fdb0bad62"},
-    {file = "websockets-12.0-cp38-cp38-win_amd64.whl", hash = "sha256:1bf386089178ea69d720f8db6199a0504a406209a0fc23e603b27b300fdd6892"},
-    {file = "websockets-12.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d"},
-    {file = "websockets-12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28"},
-    {file = "websockets-12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53"},
-    {file = "websockets-12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c"},
-    {file = "websockets-12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec"},
-    {file = "websockets-12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9"},
-    {file = "websockets-12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae"},
-    {file = "websockets-12.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b"},
-    {file = "websockets-12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9"},
-    {file = "websockets-12.0-cp39-cp39-win32.whl", hash = "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6"},
-    {file = "websockets-12.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8"},
-    {file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"},
-    {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"},
-    {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"},
-    {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b"},
-    {file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30"},
-    {file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6"},
-    {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123"},
-    {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931"},
-    {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2"},
-    {file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468"},
-    {file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b"},
-    {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399"},
-    {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7"},
-    {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611"},
-    {file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370"},
-    {file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e"},
-    {file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"},
+    {file = "websockets-14.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e8179f95323b9ab1c11723e5d91a89403903f7b001828161b480a7810b334885"},
+    {file = "websockets-14.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d8c3e2cdb38f31d8bd7d9d28908005f6fa9def3324edb9bf336d7e4266fd397"},
+    {file = "websockets-14.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:714a9b682deb4339d39ffa674f7b674230227d981a37d5d174a4a83e3978a610"},
+    {file = "websockets-14.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e53c72052f2596fb792a7acd9704cbc549bf70fcde8a99e899311455974ca3"},
+    {file = "websockets-14.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fbd68850c837e57373d95c8fe352203a512b6e49eaae4c2f4088ef8cf21980"},
+    {file = "websockets-14.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b27ece32f63150c268593d5fdb82819584831a83a3f5809b7521df0685cd5d8"},
+    {file = "websockets-14.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4daa0faea5424d8713142b33825fff03c736f781690d90652d2c8b053345b0e7"},
+    {file = "websockets-14.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:bc63cee8596a6ec84d9753fd0fcfa0452ee12f317afe4beae6b157f0070c6c7f"},
+    {file = "websockets-14.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a570862c325af2111343cc9b0257b7119b904823c675b22d4ac547163088d0d"},
+    {file = "websockets-14.2-cp310-cp310-win32.whl", hash = "sha256:75862126b3d2d505e895893e3deac0a9339ce750bd27b4ba515f008b5acf832d"},
+    {file = "websockets-14.2-cp310-cp310-win_amd64.whl", hash = "sha256:cc45afb9c9b2dc0852d5c8b5321759cf825f82a31bfaf506b65bf4668c96f8b2"},
+    {file = "websockets-14.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3bdc8c692c866ce5fefcaf07d2b55c91d6922ac397e031ef9b774e5b9ea42166"},
+    {file = "websockets-14.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c93215fac5dadc63e51bcc6dceca72e72267c11def401d6668622b47675b097f"},
+    {file = "websockets-14.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c9b6535c0e2cf8a6bf938064fb754aaceb1e6a4a51a80d884cd5db569886910"},
+    {file = "websockets-14.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a52a6d7cf6938e04e9dceb949d35fbdf58ac14deea26e685ab6368e73744e4c"},
+    {file = "websockets-14.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f05702e93203a6ff5226e21d9b40c037761b2cfb637187c9802c10f58e40473"},
+    {file = "websockets-14.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22441c81a6748a53bfcb98951d58d1af0661ab47a536af08920d129b4d1c3473"},
+    {file = "websockets-14.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd9b868d78b194790e6236d9cbc46d68aba4b75b22497eb4ab64fa640c3af56"},
+    {file = "websockets-14.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1a5a20d5843886d34ff8c57424cc65a1deda4375729cbca4cb6b3353f3ce4142"},
+    {file = "websockets-14.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:34277a29f5303d54ec6468fb525d99c99938607bc96b8d72d675dee2b9f5bf1d"},
+    {file = "websockets-14.2-cp311-cp311-win32.whl", hash = "sha256:02687db35dbc7d25fd541a602b5f8e451a238ffa033030b172ff86a93cb5dc2a"},
+    {file = "websockets-14.2-cp311-cp311-win_amd64.whl", hash = "sha256:862e9967b46c07d4dcd2532e9e8e3c2825e004ffbf91a5ef9dde519ee2effb0b"},
+    {file = "websockets-14.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1f20522e624d7ffbdbe259c6b6a65d73c895045f76a93719aa10cd93b3de100c"},
+    {file = "websockets-14.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:647b573f7d3ada919fd60e64d533409a79dcf1ea21daeb4542d1d996519ca967"},
+    {file = "websockets-14.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6af99a38e49f66be5a64b1e890208ad026cda49355661549c507152113049990"},
+    {file = "websockets-14.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:091ab63dfc8cea748cc22c1db2814eadb77ccbf82829bac6b2fbe3401d548eda"},
+    {file = "websockets-14.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b374e8953ad477d17e4851cdc66d83fdc2db88d9e73abf755c94510ebddceb95"},
+    {file = "websockets-14.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a39d7eceeea35db85b85e1169011bb4321c32e673920ae9c1b6e0978590012a3"},
+    {file = "websockets-14.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0a6f3efd47ffd0d12080594f434faf1cd2549b31e54870b8470b28cc1d3817d9"},
+    {file = "websockets-14.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:065ce275e7c4ffb42cb738dd6b20726ac26ac9ad0a2a48e33ca632351a737267"},
+    {file = "websockets-14.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e9d0e53530ba7b8b5e389c02282f9d2aa47581514bd6049d3a7cffe1385cf5fe"},
+    {file = "websockets-14.2-cp312-cp312-win32.whl", hash = "sha256:20e6dd0984d7ca3037afcb4494e48c74ffb51e8013cac71cf607fffe11df7205"},
+    {file = "websockets-14.2-cp312-cp312-win_amd64.whl", hash = "sha256:44bba1a956c2c9d268bdcdf234d5e5ff4c9b6dc3e300545cbe99af59dda9dcce"},
+    {file = "websockets-14.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f1372e511c7409a542291bce92d6c83320e02c9cf392223272287ce55bc224e"},
+    {file = "websockets-14.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4da98b72009836179bb596a92297b1a61bb5a830c0e483a7d0766d45070a08ad"},
+    {file = "websockets-14.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8a86a269759026d2bde227652b87be79f8a734e582debf64c9d302faa1e9f03"},
+    {file = "websockets-14.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86cf1aaeca909bf6815ea714d5c5736c8d6dd3a13770e885aafe062ecbd04f1f"},
+    {file = "websockets-14.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9b0f6c3ba3b1240f602ebb3971d45b02cc12bd1845466dd783496b3b05783a5"},
+    {file = "websockets-14.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669c3e101c246aa85bc8534e495952e2ca208bd87994650b90a23d745902db9a"},
+    {file = "websockets-14.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eabdb28b972f3729348e632ab08f2a7b616c7e53d5414c12108c29972e655b20"},
+    {file = "websockets-14.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2066dc4cbcc19f32c12a5a0e8cc1b7ac734e5b64ac0a325ff8353451c4b15ef2"},
+    {file = "websockets-14.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ab95d357cd471df61873dadf66dd05dd4709cae001dd6342edafc8dc6382f307"},
+    {file = "websockets-14.2-cp313-cp313-win32.whl", hash = "sha256:a9e72fb63e5f3feacdcf5b4ff53199ec8c18d66e325c34ee4c551ca748623bbc"},
+    {file = "websockets-14.2-cp313-cp313-win_amd64.whl", hash = "sha256:b439ea828c4ba99bb3176dc8d9b933392a2413c0f6b149fdcba48393f573377f"},
+    {file = "websockets-14.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7cd5706caec1686c5d233bc76243ff64b1c0dc445339bd538f30547e787c11fe"},
+    {file = "websockets-14.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ec607328ce95a2f12b595f7ae4c5d71bf502212bddcea528290b35c286932b12"},
+    {file = "websockets-14.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da85651270c6bfb630136423037dd4975199e5d4114cae6d3066641adcc9d1c7"},
+    {file = "websockets-14.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ecadc7ce90accf39903815697917643f5b7cfb73c96702318a096c00aa71f5"},
+    {file = "websockets-14.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1979bee04af6a78608024bad6dfcc0cc930ce819f9e10342a29a05b5320355d0"},
+    {file = "websockets-14.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dddacad58e2614a24938a50b85969d56f88e620e3f897b7d80ac0d8a5800258"},
+    {file = "websockets-14.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:89a71173caaf75fa71a09a5f614f450ba3ec84ad9fca47cb2422a860676716f0"},
+    {file = "websockets-14.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6af6a4b26eea4fc06c6818a6b962a952441e0e39548b44773502761ded8cc1d4"},
+    {file = "websockets-14.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:80c8efa38957f20bba0117b48737993643204645e9ec45512579132508477cfc"},
+    {file = "websockets-14.2-cp39-cp39-win32.whl", hash = "sha256:2e20c5f517e2163d76e2729104abc42639c41cf91f7b1839295be43302713661"},
+    {file = "websockets-14.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4c8cef610e8d7c70dea92e62b6814a8cd24fbd01d7103cc89308d2bfe1659ef"},
+    {file = "websockets-14.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d7d9cafbccba46e768be8a8ad4635fa3eae1ffac4c6e7cb4eb276ba41297ed29"},
+    {file = "websockets-14.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:c76193c1c044bd1e9b3316dcc34b174bbf9664598791e6fb606d8d29000e070c"},
+    {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd475a974d5352390baf865309fe37dec6831aafc3014ffac1eea99e84e83fc2"},
+    {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c6c0097a41968b2e2b54ed3424739aab0b762ca92af2379f152c1aef0187e1c"},
+    {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d7ff794c8b36bc402f2e07c0b2ceb4a2424147ed4785ff03e2a7af03711d60a"},
+    {file = "websockets-14.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dec254fcabc7bd488dab64846f588fc5b6fe0d78f641180030f8ea27b76d72c3"},
+    {file = "websockets-14.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bbe03eb853e17fd5b15448328b4ec7fb2407d45fb0245036d06a3af251f8e48f"},
+    {file = "websockets-14.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a3c4aa3428b904d5404a0ed85f3644d37e2cb25996b7f096d77caeb0e96a3b42"},
+    {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:577a4cebf1ceaf0b65ffc42c54856214165fb8ceeba3935852fc33f6b0c55e7f"},
+    {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad1c1d02357b7665e700eca43a31d52814ad9ad9b89b58118bdabc365454b574"},
+    {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f390024a47d904613577df83ba700bd189eedc09c57af0a904e5c39624621270"},
+    {file = "websockets-14.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3c1426c021c38cf92b453cdf371228d3430acd775edee6bac5a4d577efc72365"},
+    {file = "websockets-14.2-py3-none-any.whl", hash = "sha256:7a6ceec4ea84469f15cf15807a747e9efe57e369c384fa86e022b3bea679b79b"},
+    {file = "websockets-14.2.tar.gz", hash = "sha256:5059ed9c54945efb321f097084b4c7e52c246f2c869815876a69d1efc4ad6eb5"},
 ]
 
 [[package]]
@@ -6485,17 +6570,18 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
 cffi = ["cffi (>=1.11)"]
 
 [extras]
-all = ["autoflake", "black", "datasets", "docker", "fastapi", "isort", "langchain", "langchain-community", "locust", "pexpect", "pg8000", "pgvector", "pre-commit", "psycopg2", "psycopg2-binary", "pyright", "pytest-asyncio", "pytest-order", "uvicorn", "websockets", "wikipedia"]
+all = ["autoflake", "black", "datasets", "docker", "fastapi", "isort", "langchain", "langchain-community", "locust", "pexpect", "pg8000", "pgvector", "pre-commit", "psycopg2", "psycopg2-binary", "pyright", "pytest-asyncio", "pytest-order", "uvicorn", "wikipedia"]
 bedrock = []
 cloud-tool-sandbox = ["e2b-code-interpreter"]
 dev = ["autoflake", "black", "datasets", "isort", "locust", "pexpect", "pre-commit", "pyright", "pytest-asyncio", "pytest-order"]
 external-tools = ["docker", "langchain", "langchain-community", "wikipedia"]
+google = ["google-genai"]
 postgres = ["pg8000", "pgvector", "psycopg2", "psycopg2-binary"]
 qdrant = ["qdrant-client"]
-server = ["fastapi", "uvicorn", "websockets"]
+server = ["fastapi", "uvicorn"]
 tests = ["wikipedia"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = "<3.14,>=3.10"
-content-hash = "c7fc4c28d463efcb2c555d3592a4dce11e36cd179513376ee23087b7784682e4"
+content-hash = "bb1df03a109d017d6fa9e060616cf113721b1bb6407ca5ecea5a1b8a6eb5c4de"
diff --git a/pyproject.toml b/pyproject.toml
index 07696ce4..ef98d2f5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "letta"
-version = "0.6.24"
+version = "0.6.25"
 packages = [
     {include = "letta"},
 ]
@@ -27,7 +27,6 @@ prettytable = "^3.9.0"
 pgvector = { version = "^0.2.3", optional = true }
 pre-commit = {version = "^3.5.0", optional = true }
 pg8000 = {version = "^1.30.3", optional = true}
-websockets = {version = "^12.0", optional = true}
 docstring-parser = ">=0.16,<0.17"
 httpx = "^0.28.0"
 numpy = "^1.26.2"
@@ -79,6 +78,7 @@ e2b-code-interpreter = {version = "^1.0.3", optional = true}
 anthropic = "^0.43.0"
 letta_client = "^0.1.23"
 openai = "^1.60.0"
+google-genai = {version = "^1.1.0", optional = true}
 faker = "^36.1.0"
 colorama = "^0.4.6"
 
@@ -93,6 +93,7 @@ external-tools = ["docker", "langchain", "wikipedia", "langchain-community"]
 tests = ["wikipedia"]
 all = ["pgvector", "pg8000", "psycopg2-binary", "psycopg2", "pytest", "pytest-asyncio", "pexpect", "black", "pre-commit", "datasets", "pyright", "pytest-order", "autoflake", "isort", "websockets", "fastapi", "uvicorn", "docker", "langchain", "wikipedia", "langchain-community", "locust"]
 bedrock = ["boto3"]
+google = ["google-genai"]
 
 [tool.poetry.group.dev.dependencies]
 black = "^24.4.2"
diff --git a/tests/configs/llm_model_configs/gemini-vertex.json b/tests/configs/llm_model_configs/gemini-vertex.json
new file mode 100644
index 00000000..a9a1f2af
--- /dev/null
+++ b/tests/configs/llm_model_configs/gemini-vertex.json
@@ -0,0 +1,7 @@
+{
+    "model": "gemini-2.0-pro-exp-02-05", 
+    "model_endpoint_type": "google_vertex", 
+    "model_endpoint": "https://us-central1-aiplatform.googleapis.com/v1/projects/memgpt-428419/locations/us-central1", 
+    "context_window": 2097152, 
+    "put_inner_thoughts_in_kwargs": true
+}
diff --git a/tests/helpers/utils.py b/tests/helpers/utils.py
index 167a39ee..f4868fda 100644
--- a/tests/helpers/utils.py
+++ b/tests/helpers/utils.py
@@ -151,3 +151,7 @@ def comprehensive_agent_checks(agent: AgentState, request: Union[CreateAgent, Up
         assert all(
             any(rule.tool_name == req_rule.tool_name for rule in agent.tool_rules) for req_rule in request.tool_rules
         ), f"Tool rules mismatch: {agent.tool_rules} != {request.tool_rules}"
+
+    # Assert message_buffer_autoclear
+    if not request.message_buffer_autoclear is None:
+        assert agent.message_buffer_autoclear == request.message_buffer_autoclear
diff --git a/tests/integration_test_agent_tool_graph.py b/tests/integration_test_agent_tool_graph.py
index 025f751b..97b8709f 100644
--- a/tests/integration_test_agent_tool_graph.py
+++ b/tests/integration_test_agent_tool_graph.py
@@ -186,7 +186,7 @@ def test_check_tool_rules_with_different_models(mock_e2b_api_key_none):
     client = create_client()
 
     config_files = [
-        "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json",
+        "tests/configs/llm_model_configs/claude-3-5-sonnet.json",
         "tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json",
         "tests/configs/llm_model_configs/openai-gpt-4o.json",
     ]
@@ -247,7 +247,7 @@ def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none):
     tools = [t1, t2]
 
     # Make agent state
-    anthropic_config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json"
+    anthropic_config_file = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
     for i in range(3):
         agent_uuid = str(uuid.uuid4())
         agent_state = setup_agent(
@@ -299,7 +299,7 @@ def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none):
     tools = [send_message, archival_memory_search, archival_memory_insert]
 
     config_files = [
-        "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json",
+        "tests/configs/llm_model_configs/claude-3-5-sonnet.json",
         "tests/configs/llm_model_configs/openai-gpt-4o.json",
     ]
 
@@ -383,7 +383,7 @@ def test_agent_conditional_tool_easy(mock_e2b_api_key_none):
     ]
     tools = [flip_coin_tool, reveal_secret]
 
-    config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json"
+    config_file = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
     agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
     response = client.user_message(agent_id=agent_state.id, message="flip a coin until you get the secret word")
 
@@ -455,7 +455,7 @@ def test_agent_conditional_tool_hard(mock_e2b_api_key_none):
 
     # Setup agent with all tools
     tools = [play_game_tool, flip_coin_tool, reveal_secret]
-    config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json"
+    config_file = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
     agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
 
     # Ask agent to try to get all secret words
@@ -681,7 +681,7 @@ def test_init_tool_rule_always_fails_one_tool():
     )
 
     # Set up agent with the tool rule
-    claude_config = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json"
+    claude_config = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
     agent_state = setup_agent(client, claude_config, agent_uuid, tool_rules=[tool_rule], tool_ids=[bad_tool.id], include_base_tools=False)
 
     # Start conversation
@@ -710,7 +710,7 @@ def test_init_tool_rule_always_fails_multiple_tools():
     )
 
     # Set up agent with the tool rule
-    claude_config = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json"
+    claude_config = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
     agent_state = setup_agent(client, claude_config, agent_uuid, tool_rules=[tool_rule], tool_ids=[bad_tool.id], include_base_tools=True)
 
     # Start conversation
diff --git a/tests/integration_test_multi_agent.py b/tests/integration_test_multi_agent.py
new file mode 100644
index 00000000..d0b0edb3
--- /dev/null
+++ b/tests/integration_test_multi_agent.py
@@ -0,0 +1,328 @@
+import json
+
+import pytest
+
+from letta import LocalClient, create_client
+from letta.functions.functions import derive_openai_json_schema, parse_source_code
+from letta.orm import Base
+from letta.schemas.embedding_config import EmbeddingConfig
+from letta.schemas.letta_message import SystemMessage, ToolReturnMessage
+from letta.schemas.llm_config import LLMConfig
+from letta.schemas.memory import ChatMemory
+from letta.schemas.tool import Tool
+from tests.helpers.utils import retry_until_success
+from tests.utils import wait_for_incoming_message
+
+
+@pytest.fixture(autouse=True)
+def truncate_database():
+    from letta.server.server import db_context
+
+    with db_context() as session:
+        for table in reversed(Base.metadata.sorted_tables):  # Reverse to avoid FK issues
+            session.execute(table.delete())  # Truncate table
+        session.commit()
+
+
+@pytest.fixture(scope="function")
+def client():
+    client = create_client()
+    client.set_default_llm_config(LLMConfig.default_config("gpt-4o"))
+    client.set_default_embedding_config(EmbeddingConfig.default_config(provider="openai"))
+
+    yield client
+
+
+@pytest.fixture(scope="function")
+def agent_obj(client: LocalClient):
+    """Create a test agent that we can call functions on"""
+    send_message_to_agent_and_wait_for_reply_tool_id = client.get_tool_id(name="send_message_to_agent_and_wait_for_reply")
+    agent_state = client.create_agent(tool_ids=[send_message_to_agent_and_wait_for_reply_tool_id])
+
+    agent_obj = client.server.load_agent(agent_id=agent_state.id, actor=client.user)
+    yield agent_obj
+
+    # client.delete_agent(agent_obj.agent_state.id)
+
+
+@pytest.fixture(scope="function")
+def other_agent_obj(client: LocalClient):
+    """Create another test agent that we can call functions on"""
+    agent_state = client.create_agent(include_multi_agent_tools=False)
+
+    other_agent_obj = client.server.load_agent(agent_id=agent_state.id, actor=client.user)
+    yield other_agent_obj
+
+    client.delete_agent(other_agent_obj.agent_state.id)
+
+
+@pytest.fixture
+def roll_dice_tool(client):
+    def roll_dice():
+        """
+        Rolls a 6 sided die.
+
+        Returns:
+            str: The roll result.
+        """
+        return "Rolled a 5!"
+
+    # Set up tool details
+    source_code = parse_source_code(roll_dice)
+    source_type = "python"
+    description = "test_description"
+    tags = ["test"]
+
+    tool = Tool(description=description, tags=tags, source_code=source_code, source_type=source_type)
+    derived_json_schema = derive_openai_json_schema(source_code=tool.source_code, name=tool.name)
+
+    derived_name = derived_json_schema["name"]
+    tool.json_schema = derived_json_schema
+    tool.name = derived_name
+
+    tool = client.server.tool_manager.create_or_update_tool(tool, actor=client.user)
+
+    # Yield the created tool
+    yield tool
+
+
+@retry_until_success(max_attempts=3, sleep_time_seconds=2)
+def test_send_message_to_agent(client, agent_obj, other_agent_obj):
+    secret_word = "banana"
+
+    # Encourage the agent to send a message to the other agent_obj with the secret string
+    client.send_message(
+        agent_id=agent_obj.agent_state.id,
+        role="user",
+        message=f"Use your tool to send a message to another agent with id {other_agent_obj.agent_state.id} to share the secret word: {secret_word}!",
+    )
+
+    # Conversation search the other agent
+    messages = client.get_messages(other_agent_obj.agent_state.id)
+    # Check for the presence of system message
+    for m in reversed(messages):
+        print(f"\n\n {other_agent_obj.agent_state.id} -> {m.model_dump_json(indent=4)}")
+        if isinstance(m, SystemMessage):
+            assert secret_word in m.content
+            break
+
+    # Search the sender agent for the response from another agent
+    in_context_messages = agent_obj.agent_manager.get_in_context_messages(agent_id=agent_obj.agent_state.id, actor=agent_obj.user)
+    found = False
+    target_snippet = f"{other_agent_obj.agent_state.id} said:"
+
+    for m in in_context_messages:
+        if target_snippet in m.text:
+            found = True
+            break
+
+    print(f"In context messages of the sender agent (without system):\n\n{"\n".join([m.text for m in in_context_messages[1:]])}")
+    if not found:
+        raise Exception(f"Was not able to find an instance of the target snippet: {target_snippet}")
+
+    # Test that the agent can still receive messages fine
+    response = client.send_message(agent_id=agent_obj.agent_state.id, role="user", message="So what did the other agent say?")
+    print(response.messages)
+
+
+@retry_until_success(max_attempts=3, sleep_time_seconds=2)
+def test_send_message_to_agents_with_tags_simple(client):
+    worker_tags = ["worker", "user-456"]
+
+    # Clean up first from possibly failed tests
+    prev_worker_agents = client.server.agent_manager.list_agents(client.user, tags=worker_tags, match_all_tags=True)
+    for agent in prev_worker_agents:
+        client.delete_agent(agent.id)
+
+    secret_word = "banana"
+
+    # Create "manager" agent
+    send_message_to_agents_matching_all_tags_tool_id = client.get_tool_id(name="send_message_to_agents_matching_all_tags")
+    manager_agent_state = client.create_agent(tool_ids=[send_message_to_agents_matching_all_tags_tool_id])
+    manager_agent = client.server.load_agent(agent_id=manager_agent_state.id, actor=client.user)
+
+    # Create 3 non-matching worker agents (These should NOT get the message)
+    worker_agents = []
+    worker_tags = ["worker", "user-123"]
+    for _ in range(3):
+        worker_agent_state = client.create_agent(include_multi_agent_tools=False, tags=worker_tags)
+        worker_agent = client.server.load_agent(agent_id=worker_agent_state.id, actor=client.user)
+        worker_agents.append(worker_agent)
+
+    # Create 3 worker agents that should get the message
+    worker_agents = []
+    worker_tags = ["worker", "user-456"]
+    for _ in range(3):
+        worker_agent_state = client.create_agent(include_multi_agent_tools=False, tags=worker_tags)
+        worker_agent = client.server.load_agent(agent_id=worker_agent_state.id, actor=client.user)
+        worker_agents.append(worker_agent)
+
+    # Encourage the manager to send a message to the other agent_obj with the secret string
+    response = client.send_message(
+        agent_id=manager_agent.agent_state.id,
+        role="user",
+        message=f"Send a message to all agents with tags {worker_tags} informing them of the secret word: {secret_word}!",
+    )
+
+    for m in response.messages:
+        if isinstance(m, ToolReturnMessage):
+            tool_response = eval(json.loads(m.tool_return)["message"])
+            print(f"\n\nManager agent tool response: \n{tool_response}\n\n")
+            assert len(tool_response) == len(worker_agents)
+
+            # We can break after this, the ToolReturnMessage after is not related
+            break
+
+    # Conversation search the worker agents
+    for agent in worker_agents:
+        messages = client.get_messages(agent.agent_state.id)
+        # Check for the presence of system message
+        for m in reversed(messages):
+            print(f"\n\n {agent.agent_state.id} -> {m.model_dump_json(indent=4)}")
+            if isinstance(m, SystemMessage):
+                assert secret_word in m.content
+                break
+
+    # Test that the agent can still receive messages fine
+    response = client.send_message(agent_id=manager_agent.agent_state.id, role="user", message="So what did the other agents say?")
+    print("Manager agent followup message: \n\n" + "\n".join([str(m) for m in response.messages]))
+
+    # Clean up agents
+    client.delete_agent(manager_agent_state.id)
+    for agent in worker_agents:
+        client.delete_agent(agent.agent_state.id)
+
+
+@retry_until_success(max_attempts=3, sleep_time_seconds=2)
+def test_send_message_to_agents_with_tags_complex_tool_use(client, roll_dice_tool):
+    worker_tags = ["dice-rollers"]
+
+    # Clean up first from possibly failed tests
+    prev_worker_agents = client.server.agent_manager.list_agents(client.user, tags=worker_tags, match_all_tags=True)
+    for agent in prev_worker_agents:
+        client.delete_agent(agent.id)
+
+    # Create "manager" agent
+    send_message_to_agents_matching_all_tags_tool_id = client.get_tool_id(name="send_message_to_agents_matching_all_tags")
+    manager_agent_state = client.create_agent(tool_ids=[send_message_to_agents_matching_all_tags_tool_id])
+    manager_agent = client.server.load_agent(agent_id=manager_agent_state.id, actor=client.user)
+
+    # Create 3 worker agents
+    worker_agents = []
+    worker_tags = ["dice-rollers"]
+    for _ in range(2):
+        worker_agent_state = client.create_agent(include_multi_agent_tools=False, tags=worker_tags, tool_ids=[roll_dice_tool.id])
+        worker_agent = client.server.load_agent(agent_id=worker_agent_state.id, actor=client.user)
+        worker_agents.append(worker_agent)
+
+    # Encourage the manager to send a message to the other agent_obj with the secret string
+    broadcast_message = f"Send a message to all agents with tags {worker_tags} asking them to roll a dice for you!"
+    response = client.send_message(
+        agent_id=manager_agent.agent_state.id,
+        role="user",
+        message=broadcast_message,
+    )
+
+    for m in response.messages:
+        if isinstance(m, ToolReturnMessage):
+            tool_response = eval(json.loads(m.tool_return)["message"])
+            print(f"\n\nManager agent tool response: \n{tool_response}\n\n")
+            assert len(tool_response) == len(worker_agents)
+
+            # We can break after this, the ToolReturnMessage after is not related
+            break
+
+    # Test that the agent can still receive messages fine
+    response = client.send_message(agent_id=manager_agent.agent_state.id, role="user", message="So what did the other agents say?")
+    print("Manager agent followup message: \n\n" + "\n".join([str(m) for m in response.messages]))
+
+    # Clean up agents
+    client.delete_agent(manager_agent_state.id)
+    for agent in worker_agents:
+        client.delete_agent(agent.agent_state.id)
+
+
+@retry_until_success(max_attempts=3, sleep_time_seconds=2)
+def test_send_message_to_sub_agents_auto_clear_message_buffer(client):
+    # Create "manager" agent
+    send_message_to_agents_matching_all_tags_tool_id = client.get_tool_id(name="send_message_to_agents_matching_all_tags")
+    manager_agent_state = client.create_agent(name="manager", tool_ids=[send_message_to_agents_matching_all_tags_tool_id])
+    manager_agent = client.server.load_agent(agent_id=manager_agent_state.id, actor=client.user)
+
+    # Create 2 worker agents
+    worker_agents = []
+    worker_tags = ["banana-boys"]
+    for i in range(2):
+        worker_agent_state = client.create_agent(
+            name=f"worker_{i}", include_multi_agent_tools=False, tags=worker_tags, message_buffer_autoclear=True
+        )
+        worker_agent = client.server.load_agent(agent_id=worker_agent_state.id, actor=client.user)
+        worker_agents.append(worker_agent)
+
+    # Encourage the manager to send a message to the other agent_obj with the secret string
+    broadcast_message = f"Using your tool named `send_message_to_agents_matching_all_tags`, instruct all agents with tags {worker_tags} to `core_memory_append` the topic of the day: bananas!"
+    client.send_message(
+        agent_id=manager_agent.agent_state.id,
+        role="user",
+        message=broadcast_message,
+    )
+
+    for worker_agent in worker_agents:
+        worker_agent_state = client.server.load_agent(agent_id=worker_agent.agent_state.id, actor=client.user).agent_state
+        # assert there's only one message in the message_ids
+        assert len(worker_agent_state.message_ids) == 1
+        # check that banana made it in
+        assert "banana" in worker_agent_state.memory.compile().lower()
+
+
+@retry_until_success(max_attempts=3, sleep_time_seconds=2)
+def test_agents_async_simple(client):
+    """
+    Test two agents with multi-agent tools sending messages back and forth to count to 5.
+    The chain is started by prompting one of the agents.
+    """
+    # Cleanup from potentially failed previous runs
+    existing_agents = client.server.agent_manager.list_agents(client.user)
+    for agent in existing_agents:
+        client.delete_agent(agent.id)
+
+    # Create two agents with multi-agent tools
+    send_message_to_agent_async_tool_id = client.get_tool_id(name="send_message_to_agent_async")
+    memory_a = ChatMemory(
+        human="Chad - I'm interested in hearing poem.",
+        persona="You are an AI agent that can communicate with your agent buddy using `send_message_to_agent_async`, who has some great poem ideas (so I've heard).",
+    )
+    charles_state = client.create_agent(name="charles", memory=memory_a, tool_ids=[send_message_to_agent_async_tool_id])
+    charles = client.server.load_agent(agent_id=charles_state.id, actor=client.user)
+
+    memory_b = ChatMemory(
+        human="No human - you are to only communicate with the other AI agent.",
+        persona="You are an AI agent that can communicate with your agent buddy using `send_message_to_agent_async`, who is interested in great poem ideas.",
+    )
+    sarah_state = client.create_agent(name="sarah", memory=memory_b, tool_ids=[send_message_to_agent_async_tool_id])
+
+    # Start the count chain with Agent1
+    initial_prompt = f"I want you to talk to the other agent with ID {sarah_state.id} using `send_message_to_agent_async`. Specifically, I want you to ask him for a poem idea, and then craft a poem for me."
+    client.send_message(
+        agent_id=charles.agent_state.id,
+        role="user",
+        message=initial_prompt,
+    )
+
+    found_in_charles = wait_for_incoming_message(
+        client=client,
+        agent_id=charles_state.id,
+        substring="[Incoming message from agent with ID",
+        max_wait_seconds=10,
+        sleep_interval=0.5,
+    )
+    assert found_in_charles, "Charles never received the system message from Sarah (timed out)."
+
+    found_in_sarah = wait_for_incoming_message(
+        client=client,
+        agent_id=sarah_state.id,
+        substring="[Incoming message from agent with ID",
+        max_wait_seconds=10,
+        sleep_interval=0.5,
+    )
+    assert found_in_sarah, "Sarah never received the system message from Charles (timed out)."
diff --git a/tests/test_base_functions.py b/tests/test_base_functions.py
index 79926aaf..8b133638 100644
--- a/tests/test_base_functions.py
+++ b/tests/test_base_functions.py
@@ -1,17 +1,9 @@
-import json
-
 import pytest
 
 import letta.functions.function_sets.base as base_functions
 from letta import LocalClient, create_client
-from letta.functions.functions import derive_openai_json_schema, parse_source_code
 from letta.schemas.embedding_config import EmbeddingConfig
-from letta.schemas.letta_message import SystemMessage, ToolReturnMessage
 from letta.schemas.llm_config import LLMConfig
-from letta.schemas.memory import ChatMemory
-from letta.schemas.tool import Tool
-from tests.helpers.utils import retry_until_success
-from tests.utils import wait_for_incoming_message
 
 
 @pytest.fixture(scope="function")
@@ -35,47 +27,6 @@ def agent_obj(client: LocalClient):
     # client.delete_agent(agent_obj.agent_state.id)
 
 
-@pytest.fixture(scope="function")
-def other_agent_obj(client: LocalClient):
-    """Create another test agent that we can call functions on"""
-    agent_state = client.create_agent(include_multi_agent_tools=False)
-
-    other_agent_obj = client.server.load_agent(agent_id=agent_state.id, actor=client.user)
-    yield other_agent_obj
-
-    client.delete_agent(other_agent_obj.agent_state.id)
-
-
-@pytest.fixture
-def roll_dice_tool(client):
-    def roll_dice():
-        """
-        Rolls a 6 sided die.
-
-        Returns:
-            str: The roll result.
-        """
-        return "Rolled a 5!"
-
-    # Set up tool details
-    source_code = parse_source_code(roll_dice)
-    source_type = "python"
-    description = "test_description"
-    tags = ["test"]
-
-    tool = Tool(description=description, tags=tags, source_code=source_code, source_type=source_type)
-    derived_json_schema = derive_openai_json_schema(source_code=tool.source_code, name=tool.name)
-
-    derived_name = derived_json_schema["name"]
-    tool.json_schema = derived_json_schema
-    tool.name = derived_name
-
-    tool = client.server.tool_manager.create_or_update_tool(tool, actor=client.user)
-
-    # Yield the created tool
-    yield tool
-
-
 def query_in_search_results(search_results, query):
     for result in search_results:
         if query.lower() in result["content"].lower():
diff --git a/tests/test_managers.py b/tests/test_managers.py
index 43ffbaa7..cce3e449 100644
--- a/tests/test_managers.py
+++ b/tests/test_managers.py
@@ -447,6 +447,7 @@ def comprehensive_test_agent_fixture(server: SyncServer, default_user, print_too
         tool_rules=[InitToolRule(tool_name=print_tool.name)],
         initial_message_sequence=[MessageCreate(role=MessageRole.user, content="hello world")],
         tool_exec_environment_variables={"test_env_var_key_a": "test_env_var_value_a", "test_env_var_key_b": "test_env_var_value_b"},
+        message_buffer_autoclear=True,
     )
     created_agent = server.agent_manager.create_agent(
         create_agent_request,
@@ -601,6 +602,7 @@ def test_update_agent(server: SyncServer, comprehensive_test_agent_fixture, othe
         message_ids=["10", "20"],
         metadata={"train_key": "train_value"},
         tool_exec_environment_variables={"test_env_var_key_a": "a", "new_tool_exec_key": "n"},
+        message_buffer_autoclear=False,
     )
 
     last_updated_timestamp = agent.updated_at
@@ -1971,17 +1973,6 @@ def test_message_listing_text_search(server: SyncServer, hello_world_message_fix
     assert len(search_results) == 0
 
 
-def test_message_listing_date_range_filtering(server: SyncServer, hello_world_message_fixture, default_user, sarah_agent):
-    """Test filtering messages by date range"""
-    create_test_messages(server, hello_world_message_fixture, default_user)
-    now = datetime.utcnow()
-
-    date_results = server.message_manager.list_user_messages_for_agent(
-        agent_id=sarah_agent.id, actor=default_user, start_date=now - timedelta(minutes=1), end_date=now + timedelta(minutes=1), limit=10
-    )
-    assert len(date_results) > 0
-
-
 # ======================================================================================================================
 # Block Manager Tests
 # ======================================================================================================================
diff --git a/tests/test_model_letta_performance.py b/tests/test_model_letta_performance.py
index bcc5c5f6..369552c6 100644
--- a/tests/test_model_letta_performance.py
+++ b/tests/test_model_letta_performance.py
@@ -303,6 +303,18 @@ def test_gemini_pro_15_edit_core_memory():
     print(f"Got successful response from client: \n\n{response}")
 
 
+# ======================================================================================================================
+# GOOGLE VERTEX TESTS
+# ======================================================================================================================
+@pytest.mark.vertex_basic
+@retry_until_success(max_attempts=1, sleep_time_seconds=2)
+def test_vertex_gemini_pro_20_returns_valid_first_message():
+    filename = os.path.join(llm_config_dir, "gemini-vertex.json")
+    response = check_first_response_is_valid_for_llm_endpoint(filename)
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
+
+
 # ======================================================================================================================
 # TOGETHER TESTS
 # ======================================================================================================================
diff --git a/tests/test_providers.py b/tests/test_providers.py
index a575fba5..5dd99fbe 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -5,6 +5,7 @@ from letta.schemas.providers import (
     AnthropicProvider,
     AzureProvider,
     GoogleAIProvider,
+    GoogleVertexProvider,
     GroqProvider,
     MistralProvider,
     OllamaProvider,
@@ -66,6 +67,16 @@ def test_googleai():
     provider.list_embedding_models()
 
 
+def test_google_vertex():
+    provider = GoogleVertexProvider(google_cloud_project=os.getenv("GCP_PROJECT_ID"), google_cloud_location=os.getenv("GCP_REGION"))
+    models = provider.list_llm_models()
+    print(models)
+    print([m.model for m in models])
+
+    embedding_models = provider.list_embedding_models()
+    print([m.embedding_model for m in embedding_models])
+
+
 def test_mistral():
     provider = MistralProvider(api_key=os.getenv("MISTRAL_API_KEY"))
     models = provider.list_llm_models()
diff --git a/tests/utils.py b/tests/utils.py
index 46d83ed7..e16cd15a 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -164,7 +164,7 @@ def wait_for_incoming_message(
     deadline = time.time() + max_wait_seconds
 
     while time.time() < deadline:
-        messages = client.server.message_manager.list_messages_for_agent(agent_id=agent_id)
+        messages = client.server.message_manager.list_messages_for_agent(agent_id=agent_id, actor=client.user)
         # Check for the system message containing `substring`
         if any(message.role == MessageRole.system and substring in (message.text or "") for message in messages):
             return True