From f076964bd1db84609344b0d8d1284c5b082b5fa6 Mon Sep 17 00:00:00 2001
From: Sarah Wooders <sarahwooders@gmail.com>
Date: Sat, 17 May 2025 19:17:08 -0700
Subject: [PATCH] feat: support together in new agent loop and add tests
 (#2231)

---
 letta/llm_api/llm_client.py                                | 2 +-
 letta/llm_api/openai_client.py                             | 4 +++-
 letta/server/rest_api/routers/v1/agents.py                 | 7 ++++---
 .../llm_model_configs/together-qwen-2.5-72b-instruct.json  | 7 +++++++
 tests/integration_test_send_message.py                     | 1 +
 5 files changed, 16 insertions(+), 5 deletions(-)
 create mode 100644 tests/configs/llm_model_configs/together-qwen-2.5-72b-instruct.json

diff --git a/letta/llm_api/llm_client.py b/letta/llm_api/llm_client.py
index 63adbcc2..7372b68a 100644
--- a/letta/llm_api/llm_client.py
+++ b/letta/llm_api/llm_client.py
@@ -51,7 +51,7 @@ class LLMClient:
                     put_inner_thoughts_first=put_inner_thoughts_first,
                     actor=actor,
                 )
-            case ProviderType.openai:
+            case ProviderType.openai | ProviderType.together:
                 from letta.llm_api.openai_client import OpenAIClient
 
                 return OpenAIClient(
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index 61089bbf..150def39 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -22,7 +22,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.log import get_logger
-from letta.schemas.enums import ProviderCategory
+from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
@@ -113,6 +113,8 @@ class OpenAIClient(LLMClientBase):
             from letta.services.provider_manager import ProviderManager
 
             api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
+        if llm_config.model_endpoint_type == ProviderType.together:
+            api_key = model_settings.together_api_key or os.environ.get("TOGETHER_API_KEY")
 
         if not api_key:
             api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py
index fbfc67cd..6c121f30 100644
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -635,7 +635,7 @@ async def send_message(
     agent_eligible = not agent.enable_sleeptime and not agent.multi_agent_group and agent.agent_type != AgentType.sleeptime_agent
     experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
     feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
-    model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]
+    model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together"]
 
     if agent_eligible and feature_enabled and model_compatible:
         experimental_agent = LettaAgent(
@@ -695,7 +695,8 @@ async def send_message_streaming(
     agent_eligible = not agent.enable_sleeptime and not agent.multi_agent_group and agent.agent_type != AgentType.sleeptime_agent
     experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
     feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
-    model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]
+    model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together"]
+    model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]
 
     if agent_eligible and feature_enabled and model_compatible:
         experimental_agent = LettaAgent(
@@ -706,7 +707,7 @@ async def send_message_streaming(
             passage_manager=server.passage_manager,
             actor=actor,
         )
-        if request.stream_tokens:
+        if request.stream_tokens and model_compatible_token_streaming:
             result = StreamingResponse(
                 experimental_agent.step_stream(request.messages, max_steps=10, use_assistant_message=request.use_assistant_message),
                 media_type="text/event-stream",
diff --git a/tests/configs/llm_model_configs/together-qwen-2.5-72b-instruct.json b/tests/configs/llm_model_configs/together-qwen-2.5-72b-instruct.json
new file mode 100644
index 00000000..18dd9774
--- /dev/null
+++ b/tests/configs/llm_model_configs/together-qwen-2.5-72b-instruct.json
@@ -0,0 +1,7 @@
+{
+    "context_window": 16000,
+    "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+    "model_endpoint_type": "together",
+    "model_endpoint": "https://api.together.ai/v1",
+    "model_wrapper": "chatml"
+}
diff --git a/tests/integration_test_send_message.py b/tests/integration_test_send_message.py
index e1784820..afaf7959 100644
--- a/tests/integration_test_send_message.py
+++ b/tests/integration_test_send_message.py
@@ -135,6 +135,7 @@ all_configs = [
     "gemini-1.5-pro.json",
     "gemini-2.5-flash-vertex.json",
     "gemini-2.5-pro-vertex.json",
+    "together-qwen-2.5-72b-instruct.json",
 ]
 requested = os.getenv("LLM_CONFIG_FILE")
 filenames = [requested] if requested else all_configs