feat: support together in new agent loop and add tests (#2231)

2025-05-17 19:17:08 -07:00
parent 42d3ce6d09
commit 65f8db2efd
5 changed files with 16 additions and 5 deletions
--- a/letta/llm_api/llm_client.py
+++ b/letta/llm_api/llm_client.py
@@ -51,7 +51,7 @@ class LLMClient:
                    put_inner_thoughts_first=put_inner_thoughts_first,
                    actor=actor,
                )
-            case ProviderType.openai:
+            case ProviderType.openai | ProviderType.together:
                from letta.llm_api.openai_client import OpenAIClient

                return OpenAIClient(
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -22,7 +22,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.log import get_logger
-from letta.schemas.enums import ProviderCategory
+from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
@@ -113,6 +113,8 @@ class OpenAIClient(LLMClientBase):
            from letta.services.provider_manager import ProviderManager

            api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
+        if llm_config.model_endpoint_type == ProviderType.together:
+            api_key = model_settings.together_api_key or os.environ.get("TOGETHER_API_KEY")

        if not api_key:
            api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -635,7 +635,7 @@ async def send_message(
    agent_eligible = not agent.enable_sleeptime and not agent.multi_agent_group and agent.agent_type != AgentType.sleeptime_agent
    experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
    feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
-    model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]
+    model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together"]

    if agent_eligible and feature_enabled and model_compatible:
        experimental_agent = LettaAgent(
@@ -695,7 +695,8 @@ async def send_message_streaming(
    agent_eligible = not agent.enable_sleeptime and not agent.multi_agent_group and agent.agent_type != AgentType.sleeptime_agent
    experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
    feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
-    model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]
+    model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together"]
+    model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]

    if agent_eligible and feature_enabled and model_compatible:
        experimental_agent = LettaAgent(
@@ -706,7 +707,7 @@ async def send_message_streaming(
            passage_manager=server.passage_manager,
            actor=actor,
        )
-        if request.stream_tokens:
+        if request.stream_tokens and model_compatible_token_streaming:
            result = StreamingResponse(
                experimental_agent.step_stream(request.messages, max_steps=10, use_assistant_message=request.use_assistant_message),
                media_type="text/event-stream",
--- a/tests/configs/llm_model_configs/together-qwen-2.5-72b-instruct.json
+++ b/tests/configs/llm_model_configs/together-qwen-2.5-72b-instruct.json
@@ -0,0 +1,7 @@
+{
+    "context_window": 16000,
+    "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+    "model_endpoint_type": "together",
+    "model_endpoint": "https://api.together.ai/v1",
+    "model_wrapper": "chatml"
+}
--- a/tests/integration_test_send_message.py
+++ b/tests/integration_test_send_message.py
@@ -135,6 +135,7 @@ all_configs = [
    "gemini-1.5-pro.json",
    "gemini-2.5-flash-vertex.json",
    "gemini-2.5-pro-vertex.json",
+    "together-qwen-2.5-72b-instruct.json",
 ]
 requested = os.getenv("LLM_CONFIG_FILE")
 filenames = [requested] if requested else all_configs