chore: deprecate old agent messaging (#9120)

2026-02-02 22:55:35 -08:00
parent cd7e80acc3
commit 3fdf2b6c79
9 changed files with 205 additions and 1117 deletions
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -9559,12 +9559,12 @@
        }
      }
    },
-    "/v1/groups/{group_id}/messages": {
+    "/v1/groups/{group_id}/messages/{message_id}": {
-      "post": {
+      "patch": {
        "tags": ["groups"],
-        "summary": "Send Group Message",
+        "summary": "Modify Group Message",
-        "description": "Process a user message and return the group's response.\nThis endpoint accepts a message from a user and processes it through through agents in the group based on the specified pattern",
+        "description": "Update the details of a message associated with an agent.",
-        "operationId": "send_group_message",
+        "operationId": "modify_group_message",
        "deprecated": true,
        "parameters": [
          {
@@ -9581,6 +9581,21 @@
              "title": "Group Id"
            },
            "description": "The ID of the group in the format 'group-<uuid4>'"
          },
          {
            "name": "message_id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "string",
              "minLength": 44,
              "maxLength": 44,
              "pattern": "^message-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
              "description": "The ID of the message in the format 'message-<uuid4>'",
              "examples": ["message-123e4567-e89b-42d3-8456-426614174000"],
              "title": "Message Id"
            },
            "description": "The ID of the message in the format 'message-<uuid4>'"
          }
        ],
        "requestBody": {
@@ -9588,7 +9603,21 @@
          "content": {
            "application/json": {
              "schema": {
-                "$ref": "#/components/schemas/LettaRequest"
+                "anyOf": [
                  {
                    "$ref": "#/components/schemas/UpdateSystemMessage"
                  },
                  {
                    "$ref": "#/components/schemas/UpdateUserMessage"
                  },
                  {
                    "$ref": "#/components/schemas/UpdateReasoningMessage"
                  },
                  {
                    "$ref": "#/components/schemas/UpdateAssistantMessage"
                  }
                ],
                "title": "Request"
              }
            }
          }
@@ -9599,7 +9628,58 @@
            "content": {
              "application/json": {
                "schema": {
-                  "$ref": "#/components/schemas/LettaResponse"
+                  "oneOf": [
                    {
                      "$ref": "#/components/schemas/SystemMessage"
                    },
                    {
                      "$ref": "#/components/schemas/UserMessage"
                    },
                    {
                      "$ref": "#/components/schemas/ReasoningMessage"
                    },
                    {
                      "$ref": "#/components/schemas/HiddenReasoningMessage"
                    },
                    {
                      "$ref": "#/components/schemas/ToolCallMessage"
                    },
                    {
                      "$ref": "#/components/schemas/ToolReturnMessage"
                    },
                    {
                      "$ref": "#/components/schemas/AssistantMessage"
                    },
                    {
                      "$ref": "#/components/schemas/ApprovalRequestMessage"
                    },
                    {
                      "$ref": "#/components/schemas/ApprovalResponseMessage"
                    },
                    {
                      "$ref": "#/components/schemas/SummaryMessage"
                    },
                    {
                      "$ref": "#/components/schemas/EventMessage"
                    }
                  ],
                  "discriminator": {
                    "propertyName": "message_type",
                    "mapping": {
                      "system_message": "#/components/schemas/SystemMessage",
                      "user_message": "#/components/schemas/UserMessage",
                      "reasoning_message": "#/components/schemas/ReasoningMessage",
                      "hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage",
                      "tool_call_message": "#/components/schemas/ToolCallMessage",
                      "tool_return_message": "#/components/schemas/ToolReturnMessage",
                      "assistant_message": "#/components/schemas/AssistantMessage",
                      "approval_request_message": "#/components/schemas/ApprovalRequestMessage",
                      "approval_response_message": "#/components/schemas/ApprovalResponseMessage",
                      "summary_message": "#/components/schemas/SummaryMessage",
                      "event_message": "#/components/schemas/EventMessage"
                    }
                  },
                  "title": "Response Modify Group Message"
                }
              }
            }
@@ -9615,7 +9695,9 @@
            }
          }
        }
-      },
+      }
    },
    "/v1/groups/{group_id}/messages": {
      "get": {
        "tags": ["groups"],
        "summary": "List Group Messages",
@@ -9790,203 +9872,6 @@
        }
      }
    },
    "/v1/groups/{group_id}/messages/stream": {
      "post": {
        "tags": ["groups"],
        "summary": "Send Group Message Streaming",
        "description": "Process a user message and return the group's responses.\nThis endpoint accepts a message from a user and processes it through agents in the group based on the specified pattern.\nIt will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.",
        "operationId": "send_group_message_streaming",
        "deprecated": true,
        "parameters": [
          {
            "name": "group_id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "string",
              "minLength": 42,
              "maxLength": 42,
              "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
              "description": "The ID of the group in the format 'group-<uuid4>'",
              "examples": ["group-123e4567-e89b-42d3-8456-426614174000"],
              "title": "Group Id"
            },
            "description": "The ID of the group in the format 'group-<uuid4>'"
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/LettaStreamingRequest"
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {}
              },
              "text/event-stream": {
                "description": "Server-Sent Events stream"
              }
            }
          },
          "422": {
            "description": "Validation Error",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/HTTPValidationError"
                }
              }
            }
          }
        }
      }
    },
    "/v1/groups/{group_id}/messages/{message_id}": {
      "patch": {
        "tags": ["groups"],
        "summary": "Modify Group Message",
        "description": "Update the details of a message associated with an agent.",
        "operationId": "modify_group_message",
        "deprecated": true,
        "parameters": [
          {
            "name": "group_id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "string",
              "minLength": 42,
              "maxLength": 42,
              "pattern": "^group-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
              "description": "The ID of the group in the format 'group-<uuid4>'",
              "examples": ["group-123e4567-e89b-42d3-8456-426614174000"],
              "title": "Group Id"
            },
            "description": "The ID of the group in the format 'group-<uuid4>'"
          },
          {
            "name": "message_id",
            "in": "path",
            "required": true,
            "schema": {
              "type": "string",
              "minLength": 44,
              "maxLength": 44,
              "pattern": "^message-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
              "description": "The ID of the message in the format 'message-<uuid4>'",
              "examples": ["message-123e4567-e89b-42d3-8456-426614174000"],
              "title": "Message Id"
            },
            "description": "The ID of the message in the format 'message-<uuid4>'"
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "anyOf": [
                  {
                    "$ref": "#/components/schemas/UpdateSystemMessage"
                  },
                  {
                    "$ref": "#/components/schemas/UpdateUserMessage"
                  },
                  {
                    "$ref": "#/components/schemas/UpdateReasoningMessage"
                  },
                  {
                    "$ref": "#/components/schemas/UpdateAssistantMessage"
                  }
                ],
                "title": "Request"
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful Response",
            "content": {
              "application/json": {
                "schema": {
                  "oneOf": [
                    {
                      "$ref": "#/components/schemas/SystemMessage"
                    },
                    {
                      "$ref": "#/components/schemas/UserMessage"
                    },
                    {
                      "$ref": "#/components/schemas/ReasoningMessage"
                    },
                    {
                      "$ref": "#/components/schemas/HiddenReasoningMessage"
                    },
                    {
                      "$ref": "#/components/schemas/ToolCallMessage"
                    },
                    {
                      "$ref": "#/components/schemas/ToolReturnMessage"
                    },
                    {
                      "$ref": "#/components/schemas/AssistantMessage"
                    },
                    {
                      "$ref": "#/components/schemas/ApprovalRequestMessage"
                    },
                    {
                      "$ref": "#/components/schemas/ApprovalResponseMessage"
                    },
                    {
                      "$ref": "#/components/schemas/SummaryMessage"
                    },
                    {
                      "$ref": "#/components/schemas/EventMessage"
                    }
                  ],
                  "discriminator": {
                    "propertyName": "message_type",
                    "mapping": {
                      "system_message": "#/components/schemas/SystemMessage",
                      "user_message": "#/components/schemas/UserMessage",
                      "reasoning_message": "#/components/schemas/ReasoningMessage",
                      "hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage",
                      "tool_call_message": "#/components/schemas/ToolCallMessage",
                      "tool_return_message": "#/components/schemas/ToolReturnMessage",
                      "assistant_message": "#/components/schemas/AssistantMessage",
                      "approval_request_message": "#/components/schemas/ApprovalRequestMessage",
                      "approval_response_message": "#/components/schemas/ApprovalResponseMessage",
                      "summary_message": "#/components/schemas/SummaryMessage",
                      "event_message": "#/components/schemas/EventMessage"
                    }
                  },
                  "title": "Response Modify Group Message"
                }
              }
            }
          },
          "422": {
            "description": "Validation Error",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/HTTPValidationError"
                }
              }
            }
          }
        }
      }
    },
    "/v1/groups/{group_id}/reset-messages": {
      "patch": {
        "tags": ["groups"],
--- a/letta/functions/function_sets/multi_agent.py
+++ b/letta/functions/function_sets/multi_agent.py
@@ -1,18 +1,15 @@
 import asyncio
 import json
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import TYPE_CHECKING, List
 from letta.functions.helpers import (
    _send_message_to_agents_matching_tags_async,
    _send_message_to_all_agents_in_group_async,
    execute_send_message_to_agent,
    extract_send_message_from_steps_messages,
    fire_and_forget_send_to_agent,
 )
 from letta.schemas.enums import MessageRole
 from letta.schemas.message import MessageCreate
 from letta.server.rest_api.dependencies import get_letta_server
 from letta.settings import settings
 def send_message_to_agent_and_wait_for_reply(self: "Agent", message: str, other_agent_id: str) -> str:
@@ -67,46 +64,11 @@ def send_message_to_agents_matching_tags(self: "Agent", message: str, match_all:
    if not matching_agents:
        return []
-    def process_agent(agent_id: str) -> str:
+    # Prepare the message
-        """Loads an agent, formats the message, and executes .step()"""
+    messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=self.agent_state.name)]
        actor = self.user  # Ensure correct actor context
        agent = server.load_agent(agent_id=agent_id, interface=None, actor=actor)
-        # Prepare the message
+    # Use async helper for parallel message sending
-        messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=self.agent_state.name)]
+    return asyncio.run(_send_message_to_agents_matching_tags_async(self, server, messages, matching_agents))
        # Run .step() and return the response
        usage_stats = agent.step(
            input_messages=messages,
            chaining=True,
            max_chaining_steps=None,
            stream=False,
            skip_verify=True,
            metadata=None,
            put_inner_thoughts_first=True,
        )
        send_messages = extract_send_message_from_steps_messages(usage_stats.steps_messages, logger=agent.logger)
        response_data = {
            "agent_id": agent_id,
            "response_messages": send_messages if send_messages else ["<no response>"],
        }
        return json.dumps(response_data, indent=2)
    # Use ThreadPoolExecutor for parallel execution
    results = []
    with ThreadPoolExecutor(max_workers=settings.multi_agent_concurrent_sends) as executor:
        future_to_agent = {executor.submit(process_agent, agent_state.id): agent_state for agent_state in matching_agents}
        for future in as_completed(future_to_agent):
            try:
                results.append(future.result())  # Collect results
            except Exception as e:
                # Log or handle failure for specific agents if needed
                self.logger.exception(f"Error processing agent {future_to_agent[future]}: {e}")
    return results
 def send_message_to_all_agents_in_group(self: "Agent", message: str) -> List[str]:
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -1557,23 +1557,6 @@ async def send_message(
        # Create a copy of agent state with the overridden llm_config
        agent = agent.model_copy(update={"llm_config": override_llm_config})
    agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
    model_compatible = agent.llm_config.model_endpoint_type in [
        "anthropic",
        "openai",
        "together",
        "google_ai",
        "google_vertex",
        "bedrock",
        "ollama",
        "azure",
        "xai",
        "zai",
        "groq",
        "deepseek",
        "chatgpt_oauth",
    ]
    # Create a new run for execution tracking
    if settings.track_agent_run:
        runs_manager = RunManager()
@@ -1597,32 +1580,17 @@ async def send_message(
    run_update_metadata = None
    try:
-        result = None
+        agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
-        if agent_eligible and model_compatible:
+        result = await agent_loop.step(
-            agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
+            request.messages,
-            result = await agent_loop.step(
+            max_steps=request.max_steps,
-                request.messages,
+            run_id=run.id if run else None,
-                max_steps=request.max_steps,
+            use_assistant_message=request.use_assistant_message,
-                run_id=run.id if run else None,
+            request_start_timestamp_ns=request_start_timestamp_ns,
-                use_assistant_message=request.use_assistant_message,
+            include_return_message_types=request.include_return_message_types,
-                request_start_timestamp_ns=request_start_timestamp_ns,
+            client_tools=request.client_tools,
-                include_return_message_types=request.include_return_message_types,
+            include_compaction_messages=request.include_compaction_messages,
-                client_tools=request.client_tools,
+        )
                include_compaction_messages=request.include_compaction_messages,
            )
        else:
            result = await server.send_message_to_agent(
                agent_id=agent_id,
                actor=actor,
                input_messages=request.messages,
                stream_steps=False,
                stream_tokens=False,
                # Support for AssistantMessage
                use_assistant_message=request.use_assistant_message,
                assistant_message_tool_name=request.assistant_message_tool_name,
                assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
                include_return_message_types=request.include_return_message_types,
            )
        run_status = result.stop_reason.stop_reason.run_status
        return result
    except PendingApprovalError as e:
@@ -1844,47 +1812,16 @@ async def _process_message_background(
            # Create a copy of agent state with the overridden llm_config
            agent = agent.model_copy(update={"llm_config": override_llm_config})
-        agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
+        agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
-        model_compatible = agent.llm_config.model_endpoint_type in [
+        result = await agent_loop.step(
-            "anthropic",
+            messages,
-            "openai",
+            max_steps=max_steps,
-            "together",
+            run_id=run_id,
-            "google_ai",
+            use_assistant_message=use_assistant_message,
-            "google_vertex",
+            request_start_timestamp_ns=request_start_timestamp_ns,
-            "bedrock",
+            include_return_message_types=include_return_message_types,
-            "ollama",
+            include_compaction_messages=include_compaction_messages,
-            "azure",
+        )
            "xai",
            "zai",
            "groq",
            "deepseek",
        ]
        if agent_eligible and model_compatible:
            agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
            result = await agent_loop.step(
                messages,
                max_steps=max_steps,
                run_id=run_id,
                use_assistant_message=use_assistant_message,
                request_start_timestamp_ns=request_start_timestamp_ns,
                include_return_message_types=include_return_message_types,
                include_compaction_messages=include_compaction_messages,
            )
        else:
            result = await server.send_message_to_agent(
                agent_id=agent_id,
                actor=actor,
                input_messages=messages,
                stream_steps=False,
                stream_tokens=False,
                metadata={"run_id": run_id},
                # Support for AssistantMessage
                use_assistant_message=use_assistant_message,
                assistant_message_tool_name=assistant_message_tool_name,
                assistant_message_tool_kwarg=assistant_message_tool_kwarg,
                include_return_message_types=include_return_message_types,
            )
        runs_manager = RunManager()
        from letta.schemas.enums import RunStatus
        from letta.schemas.letta_stop_reason import StopReasonType
@@ -2170,33 +2107,10 @@ async def preview_model_request(
    agent = await server.agent_manager.get_agent_by_id_async(
        agent_id, actor, include_relationships=["multi_agent_group", "memory", "sources"]
    )
-    agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
+    agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
-    model_compatible = agent.llm_config.model_endpoint_type in [
+    return await agent_loop.build_request(
-        "anthropic",
+        input_messages=request.messages,
-        "openai",
+    )
        "together",
        "google_ai",
        "google_vertex",
        "bedrock",
        "ollama",
        "azure",
        "xai",
        "zai",
        "groq",
        "deepseek",
        "chatgpt_oauth",
    ]
    if agent_eligible and model_compatible:
        agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
        return await agent_loop.build_request(
            input_messages=request.messages,
        )
    else:
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Payload inspection is not currently supported for this agent configuration.",
        )
 class CompactionRequest(BaseModel):
@@ -2225,53 +2139,31 @@ async def summarize_messages(
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
    agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
    model_compatible = agent.llm_config.model_endpoint_type in [
        "anthropic",
        "openai",
        "together",
        "google_ai",
        "google_vertex",
        "bedrock",
        "ollama",
        "azure",
        "xai",
        "zai",
        "groq",
        "deepseek",
        "chatgpt_oauth",
    ]
-    if agent_eligible and model_compatible:
+    agent_loop = LettaAgentV3(agent_state=agent, actor=actor)
-        agent_loop = LettaAgentV3(agent_state=agent, actor=actor)
+    in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
-        in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
+    compaction_settings = request.compaction_settings if request else None
-        compaction_settings = request.compaction_settings if request else None
+    num_messages_before = len(in_context_messages)
-        num_messages_before = len(in_context_messages)
+    summary_message, messages, summary = await agent_loop.compact(
-        summary_message, messages, summary = await agent_loop.compact(
+        messages=in_context_messages,
-            messages=in_context_messages,
+        compaction_settings=compaction_settings,
-            compaction_settings=compaction_settings,
+        use_summary_role=True,
-            use_summary_role=True,
+    )
-        )
+    num_messages_after = len(messages)
        num_messages_after = len(messages)
-        # update the agent state
+    # update the agent state
-        logger.info(f"Summarized {num_messages_before} messages to {num_messages_after}")
+    logger.info(f"Summarized {num_messages_before} messages to {num_messages_after}")
-        if num_messages_before <= num_messages_after:
+    if num_messages_before <= num_messages_after:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail="Summarization failed to reduce the number of messages. You may need to use a different CompactionSettings (e.g. using `all` mode).",
            )
        await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages)
        return CompactionResponse(
            summary=summary,
            num_messages_before=num_messages_before,
            num_messages_after=num_messages_after,
        )
    else:
        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
+            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Summarization is not currently supported for this agent configuration. Please contact Letta support.",
+            detail="Summarization failed to reduce the number of messages. You may need to use a different CompactionSettings (e.g. using `all` mode).",
        )
    await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages)
    return CompactionResponse(
        summary=summary,
        num_messages_before=num_messages_before,
        num_messages_after=num_messages_after,
    )
 class CaptureMessagesRequest(BaseModel):
--- a/letta/server/rest_api/routers/v1/conversations.py
+++ b/letta/server/rest_api/routers/v1/conversations.py
@@ -470,29 +470,6 @@ async def compact_conversation(
    # Get the agent state
    agent = await server.agent_manager.get_agent_by_id_async(conversation.agent_id, actor, include_relationships=["multi_agent_group"])
    # Check eligibility
    agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
    model_compatible = agent.llm_config.model_endpoint_type in [
        "anthropic",
        "openai",
        "together",
        "google_ai",
        "google_vertex",
        "bedrock",
        "ollama",
        "azure",
        "xai",
        "zai",
        "groq",
        "deepseek",
    ]
    if not (agent_eligible and model_compatible):
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Summarization is not currently supported for this agent configuration. Please contact Letta support.",
        )
    # Get in-context messages for this conversation
    in_context_messages = await conversation_manager.get_messages_for_conversation(
        conversation_id=conversation_id,
--- a/letta/server/rest_api/routers/v1/groups.py
+++ b/letta/server/rest_api/routers/v1/groups.py
@@ -7,7 +7,6 @@ from pydantic import Field
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.schemas.group import Group, GroupBase, GroupCreate, GroupUpdate, ManagerType
 from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion
 from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
 from letta.schemas.letta_response import LettaResponse
 from letta.schemas.message import BaseMessage
 from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
@@ -128,77 +127,6 @@ async def delete_group(
    return JSONResponse(status_code=status.HTTP_200_OK, content={"message": f"Group id={group_id} successfully deleted"})
@router.post(
    "/{group_id}/messages",
    response_model=LettaResponse,
    operation_id="send_group_message",
    deprecated=True,
 )
 async def send_group_message(
    group_id: GroupId,
    server: SyncServer = Depends(get_letta_server),
    request: LettaRequest = Body(...),
    headers: HeaderParams = Depends(get_headers),
 ):
    """
    Process a user message and return the group's response.
    This endpoint accepts a message from a user and processes it through through agents in the group based on the specified pattern
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    result = await server.send_group_message_to_agent(
        group_id=group_id,
        actor=actor,
        input_messages=request.messages,
        stream_steps=False,
        stream_tokens=False,
        # Support for AssistantMessage
        use_assistant_message=request.use_assistant_message,
        assistant_message_tool_name=request.assistant_message_tool_name,
        assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
    )
    return result
@router.post(
    "/{group_id}/messages/stream",
    response_model=None,
    operation_id="send_group_message_streaming",
    deprecated=True,
    responses={
        200: {
            "description": "Successful response",
            "content": {
                "text/event-stream": {"description": "Server-Sent Events stream"},
            },
        }
    },
 )
 async def send_group_message_streaming(
    group_id: GroupId,
    server: SyncServer = Depends(get_letta_server),
    request: LettaStreamingRequest = Body(...),
    headers: HeaderParams = Depends(get_headers),
 ):
    """
    Process a user message and return the group's responses.
    This endpoint accepts a message from a user and processes it through agents in the group based on the specified pattern.
    It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    result = await server.send_group_message_to_agent(
        group_id=group_id,
        actor=actor,
        input_messages=request.messages,
        stream_steps=True,
        stream_tokens=request.stream_tokens,
        # Support for AssistantMessage
        use_assistant_message=request.use_assistant_message,
        assistant_message_tool_name=request.assistant_message_tool_name,
        assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
    )
    return result
 GroupMessagesResponse = Annotated[
    List[LettaMessageUnion], Field(json_schema_extra={"type": "array", "items": {"$ref": "#/components/schemas/LettaMessageUnion"}})
 ]
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -1754,244 +1754,3 @@ class SyncServer(object):
            raise LettaInvalidArgumentError(f"Failed to write MCP config file {mcp_config_path}")
        return list(current_mcp_servers.values())
    @trace_method
    async def send_message_to_agent(
        self,
        agent_id: str,
        actor: User,
        # role: MessageRole,
        input_messages: List[MessageCreate],
        stream_steps: bool,
        stream_tokens: bool,
        # related to whether or not we return `LettaMessage`s or `Message`s
        chat_completion_mode: bool = False,
        # Support for AssistantMessage
        use_assistant_message: bool = True,
        assistant_message_tool_name: str = constants.DEFAULT_MESSAGE_TOOL,
        assistant_message_tool_kwarg: str = constants.DEFAULT_MESSAGE_TOOL_KWARG,
        metadata: Optional[dict] = None,
        request_start_timestamp_ns: Optional[int] = None,
        include_return_message_types: Optional[List[MessageType]] = None,
    ) -> Union[StreamingResponse, LettaResponse]:
        """Split off into a separate function so that it can be imported in the /chat/completion proxy."""
        # TODO: @charles is this the correct way to handle?
        include_final_message = True
        if not stream_steps and stream_tokens:
            raise HTTPException(status_code=400, detail="stream_steps must be 'true' if stream_tokens is 'true'")
        # For streaming response
        try:
            # TODO: move this logic into server.py
            # Get the generator object off of the agent's streaming interface
            # This will be attached to the POST SSE request used under-the-hood
            letta_agent = self.load_agent(agent_id=agent_id, actor=actor)
            # Disable token streaming if not OpenAI or Anthropic
            # TODO: cleanup this logic
            llm_config = letta_agent.agent_state.llm_config
            # supports_token_streaming = ["openai", "anthropic", "xai", "deepseek"]
            supports_token_streaming = ["openai", "anthropic", "deepseek", "chatgpt_oauth"]  # TODO re-enable xAI once streaming is patched
            if stream_tokens and (llm_config.model_endpoint_type not in supports_token_streaming):
                logger.warning(
                    f"Token streaming is only supported for models with type {' or '.join(supports_token_streaming)} in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
                )
                stream_tokens = False
            # Create a new interface per request
            letta_agent.interface = StreamingServerInterface(
                # multi_step=True,  # would we ever want to disable this?
                use_assistant_message=use_assistant_message,
                assistant_message_tool_name=assistant_message_tool_name,
                assistant_message_tool_kwarg=assistant_message_tool_kwarg,
                inner_thoughts_in_kwargs=(
                    llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
                ),
                # inner_thoughts_kwarg=INNER_THOUGHTS_KWARG,
            )
            streaming_interface = letta_agent.interface
            if not isinstance(streaming_interface, StreamingServerInterface):
                raise LettaInvalidArgumentError(
                    f"Agent has wrong type of interface: {type(streaming_interface)}", argument_name="interface"
                )
            # Enable token-streaming within the request if desired
            streaming_interface.streaming_mode = stream_tokens
            # "chatcompletion mode" does some remapping and ignores inner thoughts
            streaming_interface.streaming_chat_completion_mode = chat_completion_mode
            # streaming_interface.allow_assistant_message = stream
            # streaming_interface.function_call_legacy_mode = stream
            # Allow AssistantMessage is desired by client
            # streaming_interface.use_assistant_message = use_assistant_message
            # streaming_interface.assistant_message_tool_name = assistant_message_tool_name
            # streaming_interface.assistant_message_tool_kwarg = assistant_message_tool_kwarg
            # Related to JSON buffer reader
            # streaming_interface.inner_thoughts_in_kwargs = (
            #     llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
            # )
            # Offload the synchronous message_func to a separate thread
            streaming_interface.stream_start()
            task = safe_create_task(
                asyncio.to_thread(
                    self.send_messages,
                    actor=actor,
                    agent_id=agent_id,
                    input_messages=input_messages,
                    interface=streaming_interface,
                    metadata=metadata,
                ),
                label="send_messages_thread",
            )
            if stream_steps:
                # return a stream
                return StreamingResponse(
                    sse_async_generator(
                        streaming_interface.get_generator(),
                        usage_task=task,
                        finish_message=include_final_message,
                        request_start_timestamp_ns=request_start_timestamp_ns,
                        llm_config=llm_config,
                    ),
                    media_type="text/event-stream",
                )
            else:
                # buffer the stream, then return the list
                generated_stream = []
                async for message in streaming_interface.get_generator():
                    assert (
                        isinstance(message, LettaMessage)
                        or isinstance(message, LegacyLettaMessage)
                        or isinstance(message, MessageStreamStatus)
                    ), type(message)
                    generated_stream.append(message)
                    if message == MessageStreamStatus.done:
                        break
                # Get rid of the stream status messages
                filtered_stream = [d for d in generated_stream if not isinstance(d, MessageStreamStatus)]
                # Apply message type filtering if specified
                if include_return_message_types is not None:
                    filtered_stream = [msg for msg in filtered_stream if msg.message_type in include_return_message_types]
                usage = await task
                # By default the stream will be messages of type LettaMessage or LettaLegacyMessage
                # If we want to convert these to Message, we can use the attached IDs
                # NOTE: we will need to de-duplicate the Messsage IDs though (since Assistant->Inner+Func_Call)
                # TODO: eventually update the interface to use `Message` and `MessageChunk` (new) inside the deque instead
                return LettaResponse(
                    messages=filtered_stream,
                    stop_reason=LettaStopReason(stop_reason=StopReasonType.end_turn.value),
                    usage=usage,
                )
        except HTTPException:
            raise
        except Exception as e:
            logger.exception(f"Error sending message to agent: {e}")
            raise HTTPException(status_code=500, detail=f"{e}")
    @trace_method
    async def send_group_message_to_agent(
        self,
        group_id: str,
        actor: User,
        input_messages: Union[List[Message], List[MessageCreate]],
        stream_steps: bool,
        stream_tokens: bool,
        chat_completion_mode: bool = False,
        # Support for AssistantMessage
        use_assistant_message: bool = True,
        assistant_message_tool_name: str = constants.DEFAULT_MESSAGE_TOOL,
        assistant_message_tool_kwarg: str = constants.DEFAULT_MESSAGE_TOOL_KWARG,
        metadata: Optional[dict] = None,
    ) -> Union[StreamingResponse, LettaResponse]:
        include_final_message = True
        if not stream_steps and stream_tokens:
            raise LettaInvalidArgumentError("stream_steps must be 'true' if stream_tokens is 'true'", argument_name="stream_steps")
        group = await self.group_manager.retrieve_group_async(group_id=group_id, actor=actor)
        agent_state_id = group.manager_agent_id or (group.agent_ids[0] if len(group.agent_ids) > 0 else None)
        agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=agent_state_id, actor=actor) if agent_state_id else None
        letta_multi_agent = load_multi_agent(group=group, agent_state=agent_state, actor=actor)
        llm_config = letta_multi_agent.agent_state.llm_config
        supports_token_streaming = ["openai", "anthropic", "deepseek", "chatgpt_oauth"]
        if stream_tokens and (llm_config.model_endpoint_type not in supports_token_streaming):
            logger.warning(
                f"Token streaming is only supported for models with type {' or '.join(supports_token_streaming)} in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
            )
            stream_tokens = False
        # Create a new interface per request
        letta_multi_agent.interface = StreamingServerInterface(
            use_assistant_message=use_assistant_message,
            assistant_message_tool_name=assistant_message_tool_name,
            assistant_message_tool_kwarg=assistant_message_tool_kwarg,
            inner_thoughts_in_kwargs=(
                llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
            ),
        )
        streaming_interface = letta_multi_agent.interface
        if not isinstance(streaming_interface, StreamingServerInterface):
            raise LettaInvalidArgumentError(f"Agent has wrong type of interface: {type(streaming_interface)}", argument_name="interface")
        streaming_interface.streaming_mode = stream_tokens
        streaming_interface.streaming_chat_completion_mode = chat_completion_mode
        if metadata and hasattr(streaming_interface, "metadata"):
            streaming_interface.metadata = metadata
        streaming_interface.stream_start()
        task = safe_create_task(
            asyncio.to_thread(
                letta_multi_agent.step,
                input_messages=input_messages,
                chaining=self.chaining,
                max_chaining_steps=self.max_chaining_steps,
            ),
            label="multi_agent_step_thread",
        )
        if stream_steps:
            # return a stream
            return StreamingResponse(
                sse_async_generator(
                    streaming_interface.get_generator(),
                    usage_task=task,
                    finish_message=include_final_message,
                ),
                media_type="text/event-stream",
            )
        else:
            # buffer the stream, then return the list
            generated_stream = []
            async for message in streaming_interface.get_generator():
                assert (
                    isinstance(message, LettaMessage) or isinstance(message, LegacyLettaMessage) or isinstance(message, MessageStreamStatus)
                ), type(message)
                generated_stream.append(message)
                if message == MessageStreamStatus.done:
                    break
            # Get rid of the stream status messages
            filtered_stream = [d for d in generated_stream if not isinstance(d, MessageStreamStatus)]
            usage = await task
            # By default the stream will be messages of type LettaMessage or LettaLegacyMessage
            # If we want to convert these to Message, we can use the attached IDs
            # NOTE: we will need to de-duplicate the Messsage IDs though (since Assistant->Inner+Func_Call)
            # TODO: eventually update the interface to use `Message` and `MessageChunk` (new) inside the deque instead
            return LettaResponse(
                messages=filtered_stream,
                stop_reason=LettaStopReason(stop_reason=StopReasonType.end_turn.value),
                usage=usage,
            )
--- a/letta/services/streaming_service.py
+++ b/letta/services/streaming_service.py
@@ -111,8 +111,6 @@ class StreamingService:
            # Create a copy of agent state with the overridden llm_config
            agent = agent.model_copy(update={"llm_config": override_llm_config})
        agent_eligible = self._is_agent_eligible(agent)
        model_compatible = self._is_model_compatible(agent)
        model_compatible_token_streaming = self._is_token_streaming_compatible(agent)
        # Attempt to acquire conversation lock if conversation_id is provided
@@ -133,68 +131,40 @@ class StreamingService:
                run = await self._create_run(agent_id, request, run_type, actor, conversation_id=conversation_id)
                await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)
-            if agent_eligible and model_compatible:
+            # use agent loop for streaming
-                # use agent loop for streaming
+            agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
                agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
-                # create the base stream with error handling
+            # create the base stream with error handling
-                raw_stream = self._create_error_aware_stream(
+            raw_stream = self._create_error_aware_stream(
-                    agent_loop=agent_loop,
+                agent_loop=agent_loop,
-                    messages=request.messages,
+                messages=request.messages,
-                    max_steps=request.max_steps,
+                max_steps=request.max_steps,
-                    stream_tokens=request.stream_tokens and model_compatible_token_streaming,
+                stream_tokens=request.stream_tokens and model_compatible_token_streaming,
-                    run_id=run.id if run else None,
+                run_id=run.id if run else None,
-                    use_assistant_message=request.use_assistant_message,
+                use_assistant_message=request.use_assistant_message,
-                    request_start_timestamp_ns=request_start_timestamp_ns,
+                request_start_timestamp_ns=request_start_timestamp_ns,
-                    include_return_message_types=request.include_return_message_types,
+                include_return_message_types=request.include_return_message_types,
-                    actor=actor,
+                actor=actor,
-                    conversation_id=conversation_id,
+                conversation_id=conversation_id,
-                    client_tools=request.client_tools,
+                client_tools=request.client_tools,
-                    include_compaction_messages=request.include_compaction_messages,
+                include_compaction_messages=request.include_compaction_messages,
-                )
+            )
-                # handle background streaming if requested
+            # handle background streaming if requested
-                if request.background and settings.track_agent_run:
+            if request.background and settings.track_agent_run:
-                    if isinstance(redis_client, NoopAsyncRedisClient):
+                if isinstance(redis_client, NoopAsyncRedisClient):
-                        raise LettaServiceUnavailableError(
+                    raise LettaServiceUnavailableError(
-                            f"Background streaming requires Redis to be running. "
+                        f"Background streaming requires Redis to be running. "
-                            f"Please ensure Redis is properly configured. "
+                        f"Please ensure Redis is properly configured. "
-                            f"LETTA_REDIS_HOST: {settings.redis_host}, LETTA_REDIS_PORT: {settings.redis_port}",
+                        f"LETTA_REDIS_HOST: {settings.redis_host}, LETTA_REDIS_PORT: {settings.redis_port}",
-                            service_name="redis",
+                        service_name="redis",
                        )
                    # Wrap the agent loop stream with cancellation awareness for background task
                    background_stream = raw_stream
                    if settings.enable_cancellation_aware_streaming and run:
                        background_stream = cancellation_aware_stream_wrapper(
                            stream_generator=raw_stream,
                            run_manager=self.runs_manager,
                            run_id=run.id,
                            actor=actor,
                            cancellation_event=get_cancellation_event_for_run(run.id),
                        )
                    safe_create_task(
                        create_background_stream_processor(
                            stream_generator=background_stream,
                            redis_client=redis_client,
                            run_id=run.id,
                            run_manager=self.server.run_manager,
                            actor=actor,
                            conversation_id=conversation_id,
                        ),
                        label=f"background_stream_processor_{run.id}",
                    )
-                    raw_stream = redis_sse_stream_generator(
+                # Wrap the agent loop stream with cancellation awareness for background task
-                        redis_client=redis_client,
+                background_stream = raw_stream
-                        run_id=run.id,
+                if settings.enable_cancellation_aware_streaming and run:
-                    )
+                    background_stream = cancellation_aware_stream_wrapper(
                # wrap client stream with cancellation awareness if enabled and tracking runs
                stream = raw_stream
                if settings.enable_cancellation_aware_streaming and settings.track_agent_run and run and not request.background:
                    stream = cancellation_aware_stream_wrapper(
                        stream_generator=raw_stream,
                        run_manager=self.runs_manager,
                        run_id=run.id,
@@ -202,29 +172,43 @@ class StreamingService:
                        cancellation_event=get_cancellation_event_for_run(run.id),
                    )
-                # conditionally wrap with keepalive based on request parameter
+                safe_create_task(
-                if request.include_pings and settings.enable_keepalive:
+                    create_background_stream_processor(
-                    stream = add_keepalive_to_stream(stream, keepalive_interval=settings.keepalive_interval, run_id=run.id)
+                        stream_generator=background_stream,
                        redis_client=redis_client,
                        run_id=run.id,
                        run_manager=self.server.run_manager,
                        actor=actor,
                        conversation_id=conversation_id,
                    ),
                    label=f"background_stream_processor_{run.id}",
                )
-                result = StreamingResponseWithStatusCode(
+                raw_stream = redis_sse_stream_generator(
-                    stream,
+                    redis_client=redis_client,
-                    media_type="text/event-stream",
+                    run_id=run.id,
                )
-            else:
+
-                # fallback to non-agent-loop streaming
+            # wrap client stream with cancellation awareness if enabled and tracking runs
-                result = await self.server.send_message_to_agent(
+            stream = raw_stream
-                    agent_id=agent_id,
+            if settings.enable_cancellation_aware_streaming and settings.track_agent_run and run and not request.background:
                stream = cancellation_aware_stream_wrapper(
                    stream_generator=raw_stream,
                    run_manager=self.runs_manager,
                    run_id=run.id,
                    actor=actor,
-                    input_messages=request.messages,
+                    cancellation_event=get_cancellation_event_for_run(run.id),
                    stream_steps=True,
                    stream_tokens=request.stream_tokens,
                    use_assistant_message=request.use_assistant_message,
                    assistant_message_tool_name=request.assistant_message_tool_name,
                    assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
                    request_start_timestamp_ns=request_start_timestamp_ns,
                    include_return_message_types=request.include_return_message_types,
                )
            # conditionally wrap with keepalive based on request parameter
            if request.include_pings and settings.enable_keepalive:
                stream = add_keepalive_to_stream(stream, keepalive_interval=settings.keepalive_interval, run_id=run.id)
            result = StreamingResponseWithStatusCode(
                stream,
                media_type="text/event-stream",
            )
            # update run status to running before returning
            if settings.track_agent_run and run:
                # refetch run since it may have been updated by another service
@@ -499,30 +483,6 @@ class StreamingService:
        return error_aware_stream()
    def _is_agent_eligible(self, agent: AgentState) -> bool:
        """Check if agent is eligible for streaming."""
        return agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
    def _is_model_compatible(self, agent: AgentState) -> bool:
        """Check if agent's model is compatible with streaming."""
        return agent.llm_config.model_endpoint_type in [
            "anthropic",
            "openai",
            "together",
            "google_ai",
            "google_vertex",
            "bedrock",
            "ollama",
            "azure",
            "xai",
            "zai",
            "groq",
            "deepseek",
            "chatgpt_oauth",
            "minimax",
            "openrouter",
        ]
    def _is_token_streaming_compatible(self, agent: AgentState) -> bool:
        """Check if agent's model supports token-level streaming."""
        base_compatible = agent.llm_config.model_endpoint_type in [
--- a/tests/manual_test_multi_agent_broadcast_large.py
+++ b/tests/manual_test_multi_agent_broadcast_large.py
@@ -69,12 +69,9 @@ def test_multi_agent_large(server, default_user, roll_dice_tool, num_workers):
        actor=default_user,
    )
    manager_agent = server.load_agent(agent_id=manager_agent_state.id, actor=default_user)
    # Create N worker agents
    worker_agents = []
    for idx in tqdm(range(num_workers)):
-        worker_agent_state = server.create_agent(
+        server.create_agent(
            CreateAgent(
                name=f"worker-{idx}",
                tool_ids=[roll_dice_tool.id],
@@ -86,13 +83,11 @@ def test_multi_agent_large(server, default_user, roll_dice_tool, num_workers):
            ),
            actor=default_user,
        )
        worker_agent = server.load_agent(agent_id=worker_agent_state.id, actor=default_user)
        worker_agents.append(worker_agent)
    # Manager sends broadcast message
    broadcast_message = f"Send a message to all agents with tags {worker_tags} asking them to roll a dice for you!"
    server.send_messages(
        actor=default_user,
-        agent_id=manager_agent.agent_state.id,
+        agent_id=manager_agent_state.id,
        input_messages=[MessageCreate(role="user", content=broadcast_message)],
    )
--- a/tests/test_multi_agent.py
+++ b/tests/test_multi_agent.py
@@ -1,20 +1,14 @@
 import asyncio
 import pytest
 from letta.config import LettaConfig
 from letta.schemas.agent import CreateAgent
 from letta.schemas.block import CreateBlock
 from letta.schemas.group import (
    DynamicManager,
    DynamicManagerUpdate,
    GroupCreate,
    GroupUpdate,
    ManagerType,
    RoundRobinManagerUpdate,
    SupervisorManager,
 )
 from letta.schemas.message import MessageCreate
 from letta.server.server import SyncServer
@@ -125,30 +119,6 @@ async def manager_agent(server, default_user):
    yield agent_scooby
 async def test_empty_group(server, default_user):
    group = await server.group_manager.create_group_async(
        group=GroupCreate(
            description="This is a group chat between best friends all like to hang out together. In their free time they like to solve mysteries.",
            agent_ids=[],
        ),
        actor=default_user,
    )
    with pytest.raises(ValueError, match="Empty group"):
        await server.send_group_message_to_agent(
            group_id=group.id,
            actor=default_user,
            input_messages=[
                MessageCreate(
                    role="user",
                    content="what is everyone up to for the holidays?",
                ),
            ],
            stream_steps=False,
            stream_tokens=False,
        )
    await server.group_manager.delete_group_async(group_id=group.id, actor=default_user)
 async def test_modify_group_pattern(server, default_user, four_participant_agents, manager_agent):
    group = await server.group_manager.create_group_async(
        group=GroupCreate(
@@ -195,243 +165,3 @@ async def test_list_agent_groups(server, default_user, four_participant_agents):
    await server.group_manager.delete_group_async(group_id=group_a.id, actor=default_user)
    await server.group_manager.delete_group_async(group_id=group_b.id, actor=default_user)
 async def test_round_robin(server, default_user, four_participant_agents):
    description = (
        "This is a group chat between best friends all like to hang out together. In their free time they like to solve mysteries."
    )
    group = await server.group_manager.create_group_async(
        group=GroupCreate(
            description=description,
            agent_ids=[agent.id for agent in four_participant_agents],
        ),
        actor=default_user,
    )
    # verify group creation
    assert group.manager_type == ManagerType.round_robin
    assert group.description == description
    assert group.agent_ids == [agent.id for agent in four_participant_agents]
    assert group.max_turns is None
    assert group.manager_agent_id is None
    assert group.termination_token is None
    try:
        server.group_manager.reset_messages(group_id=group.id, actor=default_user)
        response = await server.send_group_message_to_agent(
            group_id=group.id,
            actor=default_user,
            input_messages=[
                MessageCreate(
                    role="user",
                    content="what is everyone up to for the holidays?",
                ),
            ],
            stream_steps=False,
            stream_tokens=False,
        )
        assert response.usage.step_count == len(group.agent_ids)
        assert len(response.messages) == response.usage.step_count * 2
        for i, message in enumerate(response.messages):
            assert message.message_type == "reasoning_message" if i % 2 == 0 else "assistant_message"
            assert message.name == four_participant_agents[i // 2].name
        for agent_id in group.agent_ids:
            agent_messages = await server.get_agent_recall(
                user_id=default_user.id,
                agent_id=agent_id,
                group_id=group.id,
                reverse=True,
                return_message_object=False,
            )
            assert len(agent_messages) == len(group.agent_ids) + 2  # add one for user message, one for reasoning message
        # TODO: filter this to return a clean conversation history
        messages = server.group_manager.list_group_messages(
            group_id=group.id,
            actor=default_user,
        )
        assert len(messages) == (len(group.agent_ids) + 2) * len(group.agent_ids)
        max_turns = 3
        group = await server.group_manager.modify_group_async(
            group_id=group.id,
            group_update=GroupUpdate(
                agent_ids=[agent.id for agent in four_participant_agents][::-1],
                manager_config=RoundRobinManagerUpdate(
                    max_turns=max_turns,
                ),
            ),
            actor=default_user,
        )
        assert group.manager_type == ManagerType.round_robin
        assert group.description == description
        assert group.agent_ids == [agent.id for agent in four_participant_agents][::-1]
        assert group.max_turns == max_turns
        assert group.manager_agent_id is None
        assert group.termination_token is None
        server.group_manager.reset_messages(group_id=group.id, actor=default_user)
        response = await server.send_group_message_to_agent(
            group_id=group.id,
            actor=default_user,
            input_messages=[
                MessageCreate(
                    role="user",
                    content="when should we plan our next adventure?",
                ),
            ],
            stream_steps=False,
            stream_tokens=False,
        )
        assert response.usage.step_count == max_turns
        assert len(response.messages) == max_turns * 2
        for i, message in enumerate(response.messages):
            assert message.message_type == "reasoning_message" if i % 2 == 0 else "assistant_message"
            assert message.name == four_participant_agents[::-1][i // 2].name
        for i in range(len(group.agent_ids)):
            agent_messages = await server.get_agent_recall(
                user_id=default_user.id,
                agent_id=group.agent_ids[i],
                group_id=group.id,
                reverse=True,
                return_message_object=False,
            )
            expected_message_count = max_turns + 1 if i >= max_turns else max_turns + 2
            assert len(agent_messages) == expected_message_count
    finally:
        await server.group_manager.delete_group_async(group_id=group.id, actor=default_user)
 async def test_supervisor(server, default_user, four_participant_agents):
    agent_scrappy = await server.create_agent_async(
        request=CreateAgent(
            name="shaggy",
            memory_blocks=[
                CreateBlock(
                    label="persona",
                    value="You are a puppy operations agent for Letta and you help run multi-agent group chats. Your role is to supervise the group, sending messages and aggregating the responses.",
                ),
                CreateBlock(
                    label="human",
                    value="",
                ),
            ],
            model="openai/gpt-4o-mini",
            embedding="openai/text-embedding-3-small",
        ),
        actor=default_user,
    )
    group = await server.group_manager.create_group_async(
        group=GroupCreate(
            description="This is a group chat between best friends all like to hang out together. In their free time they like to solve mysteries.",
            agent_ids=[agent.id for agent in four_participant_agents],
            manager_config=SupervisorManager(
                manager_agent_id=agent_scrappy.id,
            ),
        ),
        actor=default_user,
    )
    try:
        response = await server.send_group_message_to_agent(
            group_id=group.id,
            actor=default_user,
            input_messages=[
                MessageCreate(
                    role="user",
                    content="ask everyone what they like to do for fun and then come up with an activity for everyone to do together.",
                ),
            ],
            stream_steps=False,
            stream_tokens=False,
        )
        assert response.usage.step_count == 2
        assert len(response.messages) == 5
        # verify tool call
        assert response.messages[0].message_type == "reasoning_message"
        assert (
            response.messages[1].message_type == "tool_call_message"
            and response.messages[1].tool_call.name == "send_message_to_all_agents_in_group"
        )
        assert response.messages[2].message_type == "tool_return_message" and len(eval(response.messages[2].tool_return)) == len(
            four_participant_agents
        )
        assert response.messages[3].message_type == "reasoning_message"
        assert response.messages[4].message_type == "assistant_message"
    finally:
        await server.group_manager.delete_group_async(group_id=group.id, actor=default_user)
        server.agent_manager.delete_agent(agent_id=agent_scrappy.id, actor=default_user)
@pytest.mark.flaky(max_runs=2)
 async def test_dynamic_group_chat(server, default_user, manager_agent, four_participant_agents):
    description = (
        "This is a group chat between best friends all like to hang out together. In their free time they like to solve mysteries."
    )
    # error on duplicate agent in participant list
    with pytest.raises(ValueError, match="Duplicate agent ids"):
        await server.group_manager.create_group_async(
            group=GroupCreate(
                description=description,
                agent_ids=[agent.id for agent in four_participant_agents] + [four_participant_agents[0].id],
                manager_config=DynamicManager(
                    manager_agent_id=manager_agent.id,
                ),
            ),
            actor=default_user,
        )
    # error on duplicate agent names
    duplicate_agent_shaggy = server.create_agent(
        request=CreateAgent(
            name="shaggy",
            model="openai/gpt-4o-mini",
            embedding="openai/text-embedding-3-small",
        ),
        actor=default_user,
    )
    with pytest.raises(ValueError, match="Duplicate agent names"):
        await server.group_manager.create_group_async(
            group=GroupCreate(
                description=description,
                agent_ids=[agent.id for agent in four_participant_agents] + [duplicate_agent_shaggy.id],
                manager_config=DynamicManager(
                    manager_agent_id=manager_agent.id,
                ),
            ),
            actor=default_user,
        )
    server.agent_manager.delete_agent(duplicate_agent_shaggy.id, actor=default_user)
    group = await server.group_manager.create_group_async(
        group=GroupCreate(
            description=description,
            agent_ids=[agent.id for agent in four_participant_agents],
            manager_config=DynamicManager(
                manager_agent_id=manager_agent.id,
            ),
        ),
        actor=default_user,
    )
    try:
        response = await server.send_group_message_to_agent(
            group_id=group.id,
            actor=default_user,
            input_messages=[
                MessageCreate(role="user", content="what is everyone up to for the holidays?"),
            ],
            stream_steps=False,
            stream_tokens=False,
        )
        assert response.usage.step_count == len(four_participant_agents) * 2
        assert len(response.messages) == response.usage.step_count * 2
    finally:
        await server.group_manager.delete_group_async(group_id=group.id, actor=default_user)