From f076964bd1db84609344b0d8d1284c5b082b5fa6 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Sat, 17 May 2025 19:17:08 -0700 Subject: [PATCH] feat: support together in new agent loop and add tests (#2231) --- letta/llm_api/llm_client.py | 2 +- letta/llm_api/openai_client.py | 4 +++- letta/server/rest_api/routers/v1/agents.py | 7 ++++--- .../llm_model_configs/together-qwen-2.5-72b-instruct.json | 7 +++++++ tests/integration_test_send_message.py | 1 + 5 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 tests/configs/llm_model_configs/together-qwen-2.5-72b-instruct.json diff --git a/letta/llm_api/llm_client.py b/letta/llm_api/llm_client.py index 63adbcc2..7372b68a 100644 --- a/letta/llm_api/llm_client.py +++ b/letta/llm_api/llm_client.py @@ -51,7 +51,7 @@ class LLMClient: put_inner_thoughts_first=put_inner_thoughts_first, actor=actor, ) - case ProviderType.openai: + case ProviderType.openai | ProviderType.together: from letta.llm_api.openai_client import OpenAIClient return OpenAIClient( diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 61089bbf..150def39 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -22,7 +22,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st from letta.llm_api.llm_client_base import LLMClientBase from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST from letta.log import get_logger -from letta.schemas.enums import ProviderCategory +from letta.schemas.enums import ProviderCategory, ProviderType from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message as PydanticMessage from letta.schemas.openai.chat_completion_request import ChatCompletionRequest @@ -113,6 +113,8 @@ class OpenAIClient(LLMClientBase): from letta.services.provider_manager import ProviderManager api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor) + if llm_config.model_endpoint_type == ProviderType.together: + api_key = model_settings.together_api_key or os.environ.get("TOGETHER_API_KEY") if not api_key: api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY") diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py index fbfc67cd..6c121f30 100644 --- a/letta/server/rest_api/routers/v1/agents.py +++ b/letta/server/rest_api/routers/v1/agents.py @@ -635,7 +635,7 @@ async def send_message( agent_eligible = not agent.enable_sleeptime and not agent.multi_agent_group and agent.agent_type != AgentType.sleeptime_agent experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false" feature_enabled = settings.use_experimental or experimental_header.lower() == "true" - model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai"] + model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together"] if agent_eligible and feature_enabled and model_compatible: experimental_agent = LettaAgent( @@ -695,7 +695,8 @@ async def send_message_streaming( agent_eligible = not agent.enable_sleeptime and not agent.multi_agent_group and agent.agent_type != AgentType.sleeptime_agent experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false" feature_enabled = settings.use_experimental or experimental_header.lower() == "true" - model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai"] + model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together"] + model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai"] if agent_eligible and feature_enabled and model_compatible: experimental_agent = LettaAgent( @@ -706,7 +707,7 @@ async def send_message_streaming( passage_manager=server.passage_manager, actor=actor, ) - if request.stream_tokens: + if request.stream_tokens and model_compatible_token_streaming: result = StreamingResponse( experimental_agent.step_stream(request.messages, max_steps=10, use_assistant_message=request.use_assistant_message), media_type="text/event-stream", diff --git a/tests/configs/llm_model_configs/together-qwen-2.5-72b-instruct.json b/tests/configs/llm_model_configs/together-qwen-2.5-72b-instruct.json new file mode 100644 index 00000000..18dd9774 --- /dev/null +++ b/tests/configs/llm_model_configs/together-qwen-2.5-72b-instruct.json @@ -0,0 +1,7 @@ +{ + "context_window": 16000, + "model": "Qwen/Qwen2.5-72B-Instruct-Turbo", + "model_endpoint_type": "together", + "model_endpoint": "https://api.together.ai/v1", + "model_wrapper": "chatml" +} diff --git a/tests/integration_test_send_message.py b/tests/integration_test_send_message.py index e1784820..afaf7959 100644 --- a/tests/integration_test_send_message.py +++ b/tests/integration_test_send_message.py @@ -135,6 +135,7 @@ all_configs = [ "gemini-1.5-pro.json", "gemini-2.5-flash-vertex.json", "gemini-2.5-pro-vertex.json", + "together-qwen-2.5-72b-instruct.json", ] requested = os.getenv("LLM_CONFIG_FILE") filenames = [requested] if requested else all_configs