chore: enable vertex experimental (#2053)

Co-authored-by: Caren Thomas <carenthomas@gmail.com>
2025-05-07 14:52:26 -07:00
parent 6336c9dca4
commit abd3fb3204
5 changed files with 27 additions and 17 deletions
--- a/letta/llm_api/google_ai_client.py
+++ b/letta/llm_api/google_ai_client.py
@@ -165,10 +165,12 @@ class GoogleAIClient(LLMClientBase):
                        # NOTE: this also involves stripping the inner monologue out of the function
                        if llm_config.put_inner_thoughts_in_kwargs:
-                            from letta.local_llm.constants import INNER_THOUGHTS_KWARG
+                            from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
-                            assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
+                            assert (
-                            inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
+                                INNER_THOUGHTS_KWARG_VERTEX in function_args
                            ), f"Couldn't find inner thoughts in function args:\n{function_call}"
                            inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
                            assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
                        else:
                            inner_thoughts = None
@@ -380,13 +382,13 @@ class GoogleAIClient(LLMClientBase):
            # Add inner thoughts
            if llm_config.put_inner_thoughts_in_kwargs:
-                from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
+                from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX
-                func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
+                func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = {
                    "type": "string",
                    "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
                }
-                func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
+                func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX)
        return [{"functionDeclarations": function_list}]
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -168,10 +168,12 @@ class GoogleVertexClient(GoogleAIClient):
                        # NOTE: this also involves stripping the inner monologue out of the function
                        if llm_config.put_inner_thoughts_in_kwargs:
-                            from letta.local_llm.constants import INNER_THOUGHTS_KWARG
+                            from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
-                            assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
+                            assert (
-                            inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
+                                INNER_THOUGHTS_KWARG_VERTEX in function_args
                            ), f"Couldn't find inner thoughts in function args:\n{function_call}"
                            inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
                            assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
                        else:
                            inner_thoughts = None
--- a/letta/local_llm/constants.py
+++ b/letta/local_llm/constants.py
@@ -26,6 +26,7 @@ DEFAULT_WRAPPER = ChatMLInnerMonologueWrapper
 DEFAULT_WRAPPER_NAME = "chatml"
 INNER_THOUGHTS_KWARG = "inner_thoughts"
 INNER_THOUGHTS_KWARG_VERTEX = "thinking"
 INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only."
 INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST = f"Deep inner monologue private to you only. Think before you act, so always generate arg '{INNER_THOUGHTS_KWARG}' first before any other arg."
 INNER_THOUGHTS_CLI_SYMBOL = "💭"
--- a/letta/schemas/message.py
+++ b/letta/schemas/message.py
@@ -16,7 +16,7 @@ from pydantic import BaseModel, Field, field_validator
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, TOOL_CALL_ID_MAX_LEN
 from letta.helpers.datetime_helpers import get_utc_time, is_utc_datetime
 from letta.helpers.json_helpers import json_dumps
-from letta.local_llm.constants import INNER_THOUGHTS_KWARG
+from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_VERTEX
 from letta.schemas.enums import MessageRole
 from letta.schemas.letta_base import OrmMetadataBase
 from letta.schemas.letta_message import (
@@ -914,9 +914,9 @@ class Message(BaseMessage):
                        function_args = {"args": function_args}
                    if put_inner_thoughts_in_kwargs and text_content is not None:
-                        assert "inner_thoughts" not in function_args, function_args
+                        assert INNER_THOUGHTS_KWARG not in function_args, function_args
                        assert len(self.tool_calls) == 1
-                        function_args[INNER_THOUGHTS_KWARG] = text_content
+                        function_args[INNER_THOUGHTS_KWARG_VERTEX] = text_content
                    parts.append(
                        {
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -631,12 +631,17 @@ async def send_message(
    # TODO: This is redundant, remove soon
    agent = server.agent_manager.get_agent_by_id(agent_id, actor)
-    if (
+    if all(
        (
            settings.use_experimental,
            not agent.enable_sleeptime,
            not agent.multi_agent_group,
            not agent.agent_type == AgentType.sleeptime_agent,
        )
    ) and (
        # LLM Model Check: (1) Anthropic or (2) Google Vertex + Flag
        agent.llm_config.model_endpoint_type == "anthropic"
-        and not agent.enable_sleeptime
+        or (agent.llm_config.model_endpoint_type == "google_vertex" and settings.use_vertex_async_loop_experimental)
        and not agent.multi_agent_group
        and not agent.agent_type == AgentType.sleeptime_agent
        and settings.use_experimental
    ):
        experimental_agent = LettaAgent(
            agent_id=agent_id,