feat: add fixes to anthropic and bump version (#2427)

2025-02-11 23:24:51 -08:00
parent e278384ae2 bed8572985
commit c24bddb8b3
22 changed files with 821 additions and 401 deletions
--- a/letta/init.py
+++ b/letta/init.py
@@ -1,5 +1,4 @@
-__version__ = "0.6.23"
-
+__version__ = "0.6.24"

 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client
--- a/letta/agent.py
+++ b/letta/agent.py
@@ -447,8 +447,6 @@ class Agent(BaseAgent):
            function_call = (
                response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
            )
-
-            # Get the name of the function
            function_name = function_call.name
            self.logger.info(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")

@@ -461,7 +459,9 @@ class Agent(BaseAgent):
            if not target_letta_tool:
                error_msg = f"No function named {function_name}"
                function_response = "None"  # more like "never ran?"
-                messages = self._handle_function_error_response(error_msg, tool_call_id, function_name, function_args, function_response, messages)
+                messages = self._handle_function_error_response(
+                    error_msg, tool_call_id, function_name, function_args, function_response, messages
+                )
                return messages, False, True  # force a heartbeat to allow agent to handle error

            # Failure case 2: function name is OK, but function args are bad JSON
@@ -471,7 +471,9 @@ class Agent(BaseAgent):
            except Exception:
                error_msg = f"Error parsing JSON for function '{function_name}' arguments: {function_call.arguments}"
                function_response = "None"  # more like "never ran?"
-                messages = self._handle_function_error_response(error_msg, tool_call_id, function_name, function_args, function_response, messages)
+                messages = self._handle_function_error_response(
+                    error_msg, tool_call_id, function_name, function_args, function_response, messages
+                )
                return messages, False, True  # force a heartbeat to allow agent to handle error

            # Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
--- a/letta/client/streaming.py
+++ b/letta/client/streaming.py
@@ -17,48 +17,45 @@ logger = get_logger(__name__)


 def _sse_post(url: str, data: dict, headers: dict) -> Generator[Union[LettaStreamingResponse, ChatCompletionChunk], None, None]:
-
-    with httpx.Client() as client:
+    """
+    Sends an SSE POST request and yields parsed response chunks.
+    """
+    # TODO: Please note his is a very generous timeout for e2b reasons
+    with httpx.Client(timeout=httpx.Timeout(5 * 60.0, read=5 * 60.0)) as client:
        with connect_sse(client, method="POST", url=url, json=data, headers=headers) as event_source:

-            # Inspect for errors before iterating (see https://github.com/florimondmanca/httpx-sse/pull/12)
+            # Check for immediate HTTP errors before processing the SSE stream
            if not event_source.response.is_success:
-                # handle errors
-                pass
-
-                logger.warning("Caught error before iterating SSE request:", vars(event_source.response))
-                logger.warning(event_source.response.read().decode("utf-8"))
+                response_bytes = event_source.response.read()
+                logger.warning(f"SSE request error: {vars(event_source.response)}")
+                logger.warning(response_bytes.decode("utf-8"))

                try:
-                    response_bytes = event_source.response.read()
                    response_dict = json.loads(response_bytes.decode("utf-8"))
-                    # e.g.: This model's maximum context length is 8192 tokens. However, your messages resulted in 8198 tokens (7450 in the messages, 748 in the functions). Please reduce the length of the messages or functions.
-                    if (
-                        "error" in response_dict
-                        and "message" in response_dict["error"]
-                        and OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in response_dict["error"]["message"]
-                    ):
-                        logger.error(response_dict["error"]["message"])
-                        raise LLMError(response_dict["error"]["message"])
+                    error_message = response_dict.get("error", {}).get("message", "")
+
+                    if OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in error_message:
+                        logger.error(error_message)
+                        raise LLMError(error_message)
                except LLMError:
                    raise
-                except:
-                    logger.error(f"Failed to parse SSE message, throwing SSE HTTP error up the stack")
+                except Exception:
+                    logger.error("Failed to parse SSE message, raising HTTP error")
                    event_source.response.raise_for_status()

            try:
                for sse in event_source.iter_sse():
-                    # if sse.data == OPENAI_SSE_DONE:
-                    # print("finished")
-                    # break
-                    if sse.data in [status.value for status in MessageStreamStatus]:
-                        # break
+                    if sse.data in {status.value for status in MessageStreamStatus}:
                        yield MessageStreamStatus(sse.data)
+                        if sse.data == MessageStreamStatus.done.value:
+                            # We received the [DONE], so stop reading the stream.
+                            break
                    else:
                        chunk_data = json.loads(sse.data)
+
                        if "reasoning" in chunk_data:
                            yield ReasoningMessage(**chunk_data)
-                        elif "message_type" in chunk_data and chunk_data["message_type"] == "assistant_message":
+                        elif chunk_data.get("message_type") == "assistant_message":
                            yield AssistantMessage(**chunk_data)
                        elif "tool_call" in chunk_data:
                            yield ToolCallMessage(**chunk_data)
@@ -67,33 +64,31 @@ def _sse_post(url: str, data: dict, headers: dict) -> Generator[Union[LettaStrea
                        elif "step_count" in chunk_data:
                            yield LettaUsageStatistics(**chunk_data)
                        elif chunk_data.get("object") == get_args(ChatCompletionChunk.__annotations__["object"])[0]:
-                            yield ChatCompletionChunk(**chunk_data)  # Add your processing logic for chat chunks here
+                            yield ChatCompletionChunk(**chunk_data)
                        else:
                            raise ValueError(f"Unknown message type in chunk_data: {chunk_data}")

            except SSEError as e:
-                logger.error("Caught an error while iterating the SSE stream:", str(e))
-                if "application/json" in str(e):  # Check if the error is because of JSON response
-                    # TODO figure out a better way to catch the error other than re-trying with a POST
-                    response = client.post(url=url, json=data, headers=headers)  # Make the request again to get the JSON response
-                    if response.headers["Content-Type"].startswith("application/json"):
-                        error_details = response.json()  # Parse the JSON to get the error message
-                        logger.error("Request:", vars(response.request))
-                        logger.error("POST Error:", error_details)
-                        logger.error("Original SSE Error:", str(e))
+                logger.error(f"SSE stream error: {e}")
+
+                if "application/json" in str(e):
+                    response = client.post(url=url, json=data, headers=headers)
+
+                    if response.headers.get("Content-Type", "").startswith("application/json"):
+                        error_details = response.json()
+                        logger.error(f"POST Error: {error_details}")
                    else:
                        logger.error("Failed to retrieve JSON error message via retry.")
-                else:
-                    logger.error("SSEError not related to 'application/json' content type.")

-                # Optionally re-raise the exception if you need to propagate it
                raise e

            except Exception as e:
-                if event_source.response.request is not None:
-                    logger.error("HTTP Request:", vars(event_source.response.request))
-                if event_source.response is not None:
-                    logger.error("HTTP Status:", event_source.response.status_code)
-                    logger.error("HTTP Headers:", event_source.response.headers)
-                logger.error("Exception message:", str(e))
+                logger.error(f"Unexpected exception: {e}")
+
+                if event_source.response.request:
+                    logger.error(f"HTTP Request: {vars(event_source.response.request)}")
+                if event_source.response:
+                    logger.error(f"HTTP Status: {event_source.response.status_code}")
+                    logger.error(f"HTTP Headers: {event_source.response.headers}")
+
                raise e
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -51,9 +51,6 @@ BASE_TOOLS = ["send_message", "conversation_search", "archival_memory_insert", "
 BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
 # Multi agent tools
 MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_all_tags", "send_message_to_agent_async"]
-MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES = 3
-MULTI_AGENT_SEND_MESSAGE_TIMEOUT = 20 * 60
-MULTI_AGENT_CONCURRENT_SENDS = 15

 # The name of the tool used to send message to the user
 # May not be relevant in cases where the agent has multiple ways to message to user (send_imessage, send_discord_mesasge, ...)
--- a/letta/llm_api/anthropic.py
+++ b/letta/llm_api/anthropic.py
@@ -19,6 +19,8 @@ from anthropic.types.beta import (

 from letta.errors import BedrockError, BedrockPermissionError
 from letta.llm_api.aws_bedrock import get_bedrock_client
+from letta.llm_api.helpers import add_inner_thoughts_to_functions
+from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
 from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
@@ -513,9 +515,23 @@ def convert_anthropic_stream_event_to_chatcompletion(
 def _prepare_anthropic_request(
    data: ChatCompletionRequest,
    inner_thoughts_xml_tag: Optional[str] = "thinking",
+    # if true, prefix fill the generation with the thinking tag
+    prefix_fill: bool = True,
+    # if true, put COT inside the tool calls instead of inside the content
+    put_inner_thoughts_in_kwargs: bool = False,
 ) -> dict:
    """Prepare the request data for Anthropic API format."""
-    # convert the tools
+
+    # if needed, put inner thoughts as a kwarg for all tools
+    if data.tools and put_inner_thoughts_in_kwargs:
+        functions = add_inner_thoughts_to_functions(
+            functions=[t.function.model_dump() for t in data.tools],
+            inner_thoughts_key=INNER_THOUGHTS_KWARG,
+            inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
+        )
+        data.tools = [Tool(function=f) for f in functions]
+
+    # convert the tools to Anthropic's payload format
    anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)

    # pydantic -> dict
@@ -529,11 +545,25 @@ def _prepare_anthropic_request(
        data.pop("tools")
        data.pop("tool_choice", None)
    elif anthropic_tools is not None:
+        # TODO eventually enable parallel tool use
        data["tools"] = anthropic_tools
-        if len(anthropic_tools) == 1:
+
+        # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
+        if put_inner_thoughts_in_kwargs:
+            if len(anthropic_tools) == 1:
+                data["tool_choice"] = {
+                    "type": "tool",
+                    "name": anthropic_tools[0]["name"],
+                    "disable_parallel_tool_use": True,
+                }
+            else:
+                data["tool_choice"] = {
+                    "type": "any",
+                    "disable_parallel_tool_use": True,
+                }
+        else:
            data["tool_choice"] = {
-                "type": "tool",
-                "name": anthropic_tools[0]["name"],
+                "type": "auto",
                "disable_parallel_tool_use": True,
            }

@@ -548,8 +578,21 @@ def _prepare_anthropic_request(
            message["content"] = None

    # Convert to Anthropic format
-    msg_objs = [_Message.dict_to_message(user_id=None, agent_id=None, openai_message_dict=m) for m in data["messages"]]
-    data["messages"] = [m.to_anthropic_dict(inner_thoughts_xml_tag=inner_thoughts_xml_tag) for m in msg_objs]
+    msg_objs = [
+        _Message.dict_to_message(
+            user_id=None,
+            agent_id=None,
+            openai_message_dict=m,
+        )
+        for m in data["messages"]
+    ]
+    data["messages"] = [
+        m.to_anthropic_dict(
+            inner_thoughts_xml_tag=inner_thoughts_xml_tag,
+            put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
+        )
+        for m in msg_objs
+    ]

    # Ensure first message is user
    if data["messages"][0]["role"] != "user":
@@ -558,6 +601,16 @@ def _prepare_anthropic_request(
    # Handle alternating messages
    data["messages"] = merge_tool_results_into_user_messages(data["messages"])

+    # Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
+    # https://docs.anthropic.com/en/api/messages#body-messages
+    # NOTE: cannot prefill with tools for opus:
+    # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
+    if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
+        data["messages"].append(
+            # Start the thinking process for the assistant
+            {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
+        )
+
    # Validate max_tokens
    assert "max_tokens" in data, data

@@ -571,6 +624,7 @@ def _prepare_anthropic_request(
 def anthropic_chat_completions_request(
    data: ChatCompletionRequest,
    inner_thoughts_xml_tag: Optional[str] = "thinking",
+    put_inner_thoughts_in_kwargs: bool = False,
    betas: List[str] = ["tools-2024-04-04"],
 ) -> ChatCompletionResponse:
    """https://docs.anthropic.com/claude/docs/tool-use"""
@@ -580,7 +634,11 @@ def anthropic_chat_completions_request(
        anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
    elif model_settings.anthropic_api_key:
        anthropic_client = anthropic.Anthropic()
-    data = _prepare_anthropic_request(data, inner_thoughts_xml_tag)
+    data = _prepare_anthropic_request(
+        data=data,
+        inner_thoughts_xml_tag=inner_thoughts_xml_tag,
+        put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
+    )
    response = anthropic_client.beta.messages.create(
        **data,
        betas=betas,
@@ -611,6 +669,7 @@ def anthropic_bedrock_chat_completions_request(
 def anthropic_chat_completions_request_stream(
    data: ChatCompletionRequest,
    inner_thoughts_xml_tag: Optional[str] = "thinking",
+    put_inner_thoughts_in_kwargs: bool = False,
    betas: List[str] = ["tools-2024-04-04"],
 ) -> Generator[ChatCompletionChunkResponse, None, None]:
    """Stream chat completions from Anthropic API.
@@ -618,7 +677,11 @@ def anthropic_chat_completions_request_stream(
    Similar to OpenAI's streaming, but using Anthropic's native streaming support.
    See: https://docs.anthropic.com/claude/reference/messages-streaming
    """
-    data = _prepare_anthropic_request(data, inner_thoughts_xml_tag)
+    data = _prepare_anthropic_request(
+        data=data,
+        inner_thoughts_xml_tag=inner_thoughts_xml_tag,
+        put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
+    )

    anthropic_override_key = ProviderManager().get_anthropic_override_key()
    if anthropic_override_key:
@@ -666,6 +729,7 @@ def anthropic_chat_completions_process_stream(
    chat_completion_request: ChatCompletionRequest,
    stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
    inner_thoughts_xml_tag: Optional[str] = "thinking",
+    put_inner_thoughts_in_kwargs: bool = False,
    create_message_id: bool = True,
    create_message_datetime: bool = True,
    betas: List[str] = ["tools-2024-04-04"],
@@ -743,6 +807,7 @@ def anthropic_chat_completions_process_stream(
            anthropic_chat_completions_request_stream(
                data=chat_completion_request,
                inner_thoughts_xml_tag=inner_thoughts_xml_tag,
+                put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
                betas=betas,
            )
        ):
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -111,7 +111,6 @@ def create(
    # streaming?
    stream: bool = False,
    stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
-    max_tokens: Optional[int] = None,
    model_settings: Optional[dict] = None,  # TODO: eventually pass from server
 ) -> ChatCompletionResponse:
    """Return response to chat completion with backoff"""
@@ -157,7 +156,7 @@ def create(
            else:
                function_call = "required"

-        data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens)
+        data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming)
        if stream:  # Client requested token streaming
            data.stream = True
            assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
@@ -212,7 +211,7 @@ def create(
        # For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
        llm_config.model_endpoint = model_settings.azure_base_url
        chat_completion_request = build_openai_chat_completions_request(
-            llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens
+            llm_config, messages, user_id, functions, function_call, use_tool_naming
        )

        response = azure_openai_chat_completions_request(
@@ -248,7 +247,7 @@ def create(
            data=dict(
                contents=[m.to_google_ai_dict() for m in messages],
                tools=tools,
-                generation_config={"temperature": llm_config.temperature},
+                generation_config={"temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens},
            ),
            inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
        )
@@ -268,7 +267,7 @@ def create(
            messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
            tools=([{"type": "function", "function": f} for f in functions] if functions else None),
            tool_choice=tool_call,
-            max_tokens=1024,  # TODO make dynamic
+            max_tokens=llm_config.max_tokens,  # Note: max_tokens is required for Anthropic API
            temperature=llm_config.temperature,
            stream=stream,
        )
@@ -279,14 +278,21 @@ def create(

            response = anthropic_chat_completions_process_stream(
                chat_completion_request=chat_completion_request,
+                put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
                stream_interface=stream_interface,
            )
-            return response

-        # Client did not request token streaming (expect a blocking backend response)
-        return anthropic_chat_completions_request(
-            data=chat_completion_request,
-        )
+        else:
+            # Client did not request token streaming (expect a blocking backend response)
+            response = anthropic_chat_completions_request(
+                data=chat_completion_request,
+                put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
+            )
+
+        if llm_config.put_inner_thoughts_in_kwargs:
+            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+
+        return response

    # elif llm_config.model_endpoint_type == "cohere":
    #     if stream:
@@ -416,7 +422,7 @@ def create(
                tool_choice=tool_call,
                # user=str(user_id),
                # NOTE: max_tokens is required for Anthropic API
-                max_tokens=1024,  # TODO make dynamic
+                max_tokens=llm_config.max_tokens,
            ),
        )

--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@@ -7,6 +7,7 @@ from openai import OpenAI
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
+from letta.log import get_logger
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
@@ -26,7 +27,7 @@ from letta.schemas.openai.embedding_response import EmbeddingResponse
 from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
 from letta.utils import get_tool_call_id, smart_urljoin

-OPENAI_SSE_DONE = "[DONE]"
+logger = get_logger(__name__)


 def openai_get_model_list(
@@ -93,7 +94,6 @@ def build_openai_chat_completions_request(
    functions: Optional[list],
    function_call: Optional[str],
    use_tool_naming: bool,
-    max_tokens: Optional[int],
 ) -> ChatCompletionRequest:
    if functions and llm_config.put_inner_thoughts_in_kwargs:
        # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
@@ -130,7 +130,7 @@ def build_openai_chat_completions_request(
            tools=[Tool(type="function", function=f) for f in functions] if functions else None,
            tool_choice=tool_choice,
            user=str(user_id),
-            max_completion_tokens=max_tokens,
+            max_completion_tokens=llm_config.max_tokens,
            temperature=llm_config.temperature,
        )
    else:
@@ -140,7 +140,7 @@ def build_openai_chat_completions_request(
            functions=functions,
            function_call=function_call,
            user=str(user_id),
-            max_completion_tokens=max_tokens,
+            max_completion_tokens=llm_config.max_tokens,
            temperature=llm_config.temperature,
        )
        # https://platform.openai.com/docs/guides/text-generation/json-mode
@@ -354,9 +354,10 @@ def openai_chat_completions_process_stream(
    except Exception as e:
        if stream_interface:
            stream_interface.stream_end()
-        print(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
+        logger.error(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
        raise e
    finally:
+        logger.info(f"Finally ending streaming interface.")
        if stream_interface:
            stream_interface.stream_end()

--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -15,6 +15,7 @@ class LLMConfig(BaseModel):
        context_window (int): The context window size for the model.
        put_inner_thoughts_in_kwargs (bool): Puts `inner_thoughts` as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.
        temperature (float): The temperature to use when generating text with the model. A higher temperature will result in more random text.
+        max_tokens (int): The maximum number of tokens to generate.
    """

    # TODO: 🤮 don't default to a vendor! bug city!
@@ -51,6 +52,10 @@ class LLMConfig(BaseModel):
        0.7,
        description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
    )
+    max_tokens: Optional[int] = Field(
+        1024,
+        description="The maximum number of tokens to generate. If not set, the model will use its default value.",
+    )

    # FIXME hack to silence pydantic protected namespace warning
    model_config = ConfigDict(protected_namespaces=())
--- a/letta/schemas/message.py
+++ b/letta/schemas/message.py
@@ -542,7 +542,11 @@ class Message(BaseMessage):

        return openai_message

-    def to_anthropic_dict(self, inner_thoughts_xml_tag="thinking") -> dict:
+    def to_anthropic_dict(
+        self,
+        inner_thoughts_xml_tag="thinking",
+        put_inner_thoughts_in_kwargs: bool = False,
+    ) -> dict:
        """
        Convert to an Anthropic message dictionary

@@ -586,26 +590,38 @@ class Message(BaseMessage):
                "role": self.role,
            }
            content = []
-            if self.text is not None:
+            # COT / reasoning / thinking
+            if self.text is not None and not put_inner_thoughts_in_kwargs:
                content.append(
                    {
                        "type": "text",
                        "text": add_xml_tag(string=self.text, xml_tag=inner_thoughts_xml_tag),
                    }
                )
+            # Tool calling
            if self.tool_calls is not None:
                for tool_call in self.tool_calls:
+
+                    if put_inner_thoughts_in_kwargs:
+                        tool_call_input = add_inner_thoughts_to_tool_call(
+                            tool_call,
+                            inner_thoughts=self.text,
+                            inner_thoughts_key=INNER_THOUGHTS_KWARG,
+                        ).model_dump()
+                    else:
+                        tool_call_input = json.loads(tool_call.function.arguments)
+
                    content.append(
                        {
                            "type": "tool_use",
                            "id": tool_call.id,
                            "name": tool_call.function.name,
-                            "input": json.loads(tool_call.function.arguments),
+                            "input": tool_call_input,
                        }
                    )

            # If the only content was text, unpack it back into a singleton
-            # TODO
+            # TODO support multi-modal
            anthropic_message["content"] = content

            # Optional fields, do not include if null
--- a/letta/schemas/providers.py
+++ b/letta/schemas/providers.py
@@ -347,6 +347,15 @@ class AnthropicProvider(Provider):

        configs = []
        for model in models:
+
+            # We set this to false by default, because Anthropic can
+            # natively support <thinking> tags inside of content fields
+            # However, putting COT inside of tool calls can make it more
+            # reliable for tool calling (no chance of a non-tool call step)
+            # Since tool_choice_type 'any' doesn't work with in-content COT
+            # NOTE For Haiku, it can be flaky if we don't enable this by default
+            inner_thoughts_in_kwargs = True if "haiku" in model["name"] else False
+
            configs.append(
                LLMConfig(
                    model=model["name"],
@@ -354,6 +363,7 @@ class AnthropicProvider(Provider):
                    model_endpoint=self.base_url,
                    context_window=model["context_window"],
                    handle=self.get_handle(model["name"]),
+                    put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
                )
            )
        return configs
--- a/letta/server/rest_api/chat_completions_interface.py
+++ b/letta/server/rest_api/chat_completions_interface.py
@@ -41,7 +41,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
    def __init__(
        self,
        multi_step: bool = True,
-        timeout: int = 150,
+        timeout: int = 3 * 60,
        # The following are placeholders for potential expansions; they
        # remain if you need to differentiate between actual "assistant messages"
        # vs. tool calls. By default, they are set for the "send_message" tool usage.
@@ -55,6 +55,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
        # Parsing state for incremental function-call data
        self.current_function_name = ""
        self.current_function_arguments = []
+        self.current_json_parse_result = {}

        # Internal chunk buffer and event for async notification
        self._chunks = deque()
@@ -85,6 +86,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
            try:
                await asyncio.wait_for(self._event.wait(), timeout=self.timeout)
            except asyncio.TimeoutError:
+                logger.warning("Chat completions interface timed out! Please check that this is intended.")
                break

            while self._chunks:
@@ -105,7 +107,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
        self,
        item: ChatCompletionChunk,
    ):
-        """
+        """m
        Add an item (a LettaMessage, status marker, or partial chunk)
        to the queue and signal waiting consumers.
        """
@@ -156,6 +158,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
        Called externally with a ChatCompletionChunkResponse. Transforms
        it if necessary, then enqueues partial messages for streaming back.
        """
+        # print("RECEIVED CHUNK...")
        processed_chunk = self._process_chunk_to_openai_style(chunk)
        if processed_chunk is not None:
            self._push_to_buffer(processed_chunk)
@@ -216,37 +219,43 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
                combined_args = "".join(self.current_function_arguments)
                parsed_args = OptimisticJSONParser().parse(combined_args)

-                # If we can see a "message" field, return it as partial content
-                if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
-                    return ChatCompletionChunk(
-                        id=chunk.id,
-                        object=chunk.object,
-                        created=chunk.created.timestamp(),
-                        model=chunk.model,
-                        choices=[
-                            Choice(
-                                index=choice.index,
-                                delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
-                                finish_reason=None,
-                            )
-                        ],
-                    )
+                # If the parsed result is different
+                # This is an edge case we need to consider. E.g. if the last streamed token is '}', we shouldn't stream that out
+                if parsed_args != self.current_json_parse_result:
+                    self.current_json_parse_result = parsed_args
+                    # If we can see a "message" field, return it as partial content
+                    if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
+                        return ChatCompletionChunk(
+                            id=chunk.id,
+                            object=chunk.object,
+                            created=chunk.created.timestamp(),
+                            model=chunk.model,
+                            choices=[
+                                Choice(
+                                    index=choice.index,
+                                    delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
+                                    finish_reason=None,
+                                )
+                            ],
+                        )

        # If there's a finish reason, pass that along
        if choice.finish_reason is not None:
-            return ChatCompletionChunk(
-                id=chunk.id,
-                object=chunk.object,
-                created=chunk.created.timestamp(),
-                model=chunk.model,
-                choices=[
-                    Choice(
-                        index=choice.index,
-                        delta=ChoiceDelta(),
-                        finish_reason=self.FINISH_REASON_STR,
-                    )
-                ],
-            )
+            # only emit a final chunk if finish_reason == "stop"
+            if choice.finish_reason == "stop":
+                return ChatCompletionChunk(
+                    id=chunk.id,
+                    object=chunk.object,
+                    created=chunk.created.timestamp(),
+                    model=chunk.model,
+                    choices=[
+                        Choice(
+                            index=choice.index,
+                            delta=ChoiceDelta(),  # no partial text here
+                            finish_reason="stop",
+                        )
+                    ],
+                )

        return None

--- a/letta/server/rest_api/interface.py
+++ b/letta/server/rest_api/interface.py
@@ -436,11 +436,15 @@ class StreamingServerInterface(AgentChunkStreamingInterface):

        # inner thoughts
        if message_delta.content is not None:
-            processed_chunk = ReasoningMessage(
-                id=message_id,
-                date=message_date,
-                reasoning=message_delta.content,
-            )
+            if message_delta.content == "":
+                print("skipping empty content")
+                processed_chunk = None
+            else:
+                processed_chunk = ReasoningMessage(
+                    id=message_id,
+                    date=message_date,
+                    reasoning=message_delta.content,
+                )

        # tool calls
        elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
@@ -496,15 +500,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                        if tool_call.function.name:
                            tool_call_delta["name"] = tool_call.function.name

-                    processed_chunk = ToolCallMessage(
-                        id=message_id,
-                        date=message_date,
-                        tool_call=ToolCallDelta(
-                            name=tool_call_delta.get("name"),
-                            arguments=tool_call_delta.get("arguments"),
-                            tool_call_id=tool_call_delta.get("id"),
-                        ),
-                    )
+                    # We might end up with a no-op, in which case we should omit
+                    if (
+                        tool_call_delta.get("name") is None
+                        and tool_call_delta.get("arguments") in [None, ""]
+                        and tool_call_delta.get("id") is None
+                    ):
+                        processed_chunk = None
+                        print("skipping empty chunk...")
+                    else:
+                        processed_chunk = ToolCallMessage(
+                            id=message_id,
+                            date=message_date,
+                            tool_call=ToolCallDelta(
+                                name=tool_call_delta.get("name"),
+                                arguments=tool_call_delta.get("arguments"),
+                                tool_call_id=tool_call_delta.get("id"),
+                            ),
+                        )

            elif self.inner_thoughts_in_kwargs and tool_call.function:
                processed_chunk = None
@@ -525,11 +538,12 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                        self.function_id_buffer += tool_call.id

                if tool_call.function.arguments:
-                    if chunk.model.startswith("claude-"):
-                        updates_main_json = tool_call.function.arguments
-                        updates_inner_thoughts = ""
-                    else:  # OpenAI
-                        updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
+                    # if chunk.model.startswith("claude-"):
+                    # updates_main_json = tool_call.function.arguments
+                    # updates_inner_thoughts = ""
+                    # else:  # OpenAI
+                    # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
+                    updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)

                    # If we have inner thoughts, we should output them as a chunk
                    if updates_inner_thoughts:
@@ -787,15 +801,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                    if tool_call.function.name:
                        tool_call_delta["name"] = tool_call.function.name

-                processed_chunk = ToolCallMessage(
-                    id=message_id,
-                    date=message_date,
-                    tool_call=ToolCallDelta(
-                        name=tool_call_delta.get("name"),
-                        arguments=tool_call_delta.get("arguments"),
-                        tool_call_id=tool_call_delta.get("id"),
-                    ),
-                )
+                # We might end up with a no-op, in which case we should omit
+                if (
+                    tool_call_delta.get("name") is None
+                    and tool_call_delta.get("arguments") in [None, ""]
+                    and tool_call_delta.get("id") is None
+                ):
+                    processed_chunk = None
+                    print("skipping empty chunk...")
+                else:
+                    processed_chunk = ToolCallMessage(
+                        id=message_id,
+                        date=message_date,
+                        tool_call=ToolCallDelta(
+                            name=tool_call_delta.get("name"),
+                            arguments=tool_call_delta.get("arguments"),
+                            tool_call_id=tool_call_delta.get("id"),
+                        ),
+                    )

        elif choice.finish_reason is not None:
            # skip if there's a finish
--- a/letta/server/rest_api/utils.py
+++ b/letta/server/rest_api/utils.py
@@ -9,6 +9,7 @@ from fastapi import Header
 from pydantic import BaseModel

 from letta.errors import ContextWindowExceededError, RateLimitExceededError
+from letta.log import get_logger
 from letta.schemas.usage import LettaUsageStatistics
 from letta.server.rest_api.interface import StreamingServerInterface

@@ -24,10 +25,14 @@ SSE_FINISH_MSG = "[DONE]"  # mimic openai
 SSE_ARTIFICIAL_DELAY = 0.1


+logger = get_logger(__name__)
+
+
 def sse_formatter(data: Union[dict, str]) -> str:
    """Prefix with 'data: ', and always include double newlines"""
    assert type(data) in [dict, str], f"Expected type dict or str, got type {type(data)}"
    data_str = json.dumps(data, separators=(",", ":")) if isinstance(data, dict) else data
+    # print(f"data: {data_str}\n\n")
    return f"data: {data_str}\n\n"


@@ -62,23 +67,29 @@ async def sse_async_generator(
                usage = await usage_task
                # Double-check the type
                if not isinstance(usage, LettaUsageStatistics):
-                    raise ValueError(f"Expected LettaUsageStatistics, got {type(usage)}")
+                    err_msg = f"Expected LettaUsageStatistics, got {type(usage)}"
+                    logger.error(err_msg)
+                    raise ValueError(err_msg)
                yield sse_formatter(usage.model_dump())

            except ContextWindowExceededError as e:
                log_error_to_sentry(e)
+                logger.error(f"ContextWindowExceededError error: {e}")
                yield sse_formatter({"error": f"Stream failed: {e}", "code": str(e.code.value) if e.code else None})

            except RateLimitExceededError as e:
                log_error_to_sentry(e)
+                logger.error(f"RateLimitExceededError error: {e}")
                yield sse_formatter({"error": f"Stream failed: {e}", "code": str(e.code.value) if e.code else None})

            except Exception as e:
                log_error_to_sentry(e)
-                yield sse_formatter({"error": f"Stream failed (internal error occured)"})
+                logger.error(f"Caught unexpected Exception: {e}")
+                yield sse_formatter({"error": f"Stream failed (internal error occurred)"})

    except Exception as e:
        log_error_to_sentry(e)
+        logger.error(f"Caught unexpected Exception: {e}")
        yield sse_formatter({"error": "Stream failed (decoder encountered an error)"})

    finally:
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -477,39 +477,39 @@ class AgentManager:
            )
            message = self.message_manager.create_message(message, actor=actor)
            message_ids = [message.id] + agent_state.message_ids[1:]  # swap index 0 (system)
-            return self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
+            return self._set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
        else:
            return agent_state

    @enforce_types
-    def set_in_context_messages(self, agent_id: str, message_ids: List[str], actor: PydanticUser) -> PydanticAgentState:
+    def _set_in_context_messages(self, agent_id: str, message_ids: List[str], actor: PydanticUser) -> PydanticAgentState:
        return self.update_agent(agent_id=agent_id, agent_update=UpdateAgent(message_ids=message_ids), actor=actor)

    @enforce_types
    def trim_older_in_context_messages(self, num: int, agent_id: str, actor: PydanticUser) -> PydanticAgentState:
        message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
        new_messages = [message_ids[0]] + message_ids[num:]  # 0 is system message
-        return self.set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
+        return self._set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)

    @enforce_types
    def trim_all_in_context_messages_except_system(self, agent_id: str, actor: PydanticUser) -> PydanticAgentState:
        message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
        new_messages = [message_ids[0]]  # 0 is system message
-        return self.set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
+        return self._set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)

    @enforce_types
    def prepend_to_in_context_messages(self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser) -> PydanticAgentState:
        message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
        new_messages = self.message_manager.create_many_messages(messages, actor=actor)
        message_ids = [message_ids[0]] + [m.id for m in new_messages] + message_ids[1:]
-        return self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
+        return self._set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)

    @enforce_types
    def append_to_in_context_messages(self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser) -> PydanticAgentState:
        messages = self.message_manager.create_many_messages(messages, actor=actor)
        message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids or []
        message_ids += [m.id for m in messages]
-        return self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
+        return self._set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)

    @enforce_types
    def reset_messages(self, agent_id: str, actor: PydanticUser, add_default_initial_messages: bool = False) -> PydanticAgentState:
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,14 +1,14 @@
-# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.

 [[package]]
 name = "aiohappyeyeballs"
-version = "2.4.4"
+version = "2.4.6"
 description = "Happy Eyeballs for asyncio"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "aiohappyeyeballs-2.4.4-py3-none-any.whl", hash = "sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8"},
-    {file = "aiohappyeyeballs-2.4.4.tar.gz", hash = "sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745"},
+    {file = "aiohappyeyeballs-2.4.6-py3-none-any.whl", hash = "sha256:147ec992cf873d74f5062644332c539fcd42956dc69453fe5204195e560517e1"},
+    {file = "aiohappyeyeballs-2.4.6.tar.gz", hash = "sha256:9b05052f9042985d32ecbe4b59a77ae19c006a78f1344d7fdad69d28ded3d0b0"},
 ]

 [[package]]
@@ -828,6 +828,7 @@ optional = false
 python-versions = "<4,>=3.9"
 files = [
    {file = "composio_langchain-0.6.19-py3-none-any.whl", hash = "sha256:d0811956fe22bfa20d08828edca1757523730a6a02e6021e8ce3509c926c7f9b"},
+    {file = "composio_langchain-0.6.19.tar.gz", hash = "sha256:17b8c7ee042c0cf2c154772d742fe19e9d79a7e9e2a32d382d6f722b2104d671"},
 ]

 [package.dependencies]
@@ -854,40 +855,42 @@ yaml = ["PyYAML"]

 [[package]]
 name = "cryptography"
-version = "44.0.0"
+version = "44.0.1"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = "!=3.9.0,!=3.9.1,>=3.7"
 files = [
-    {file = "cryptography-44.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123"},
-    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092"},
-    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f"},
-    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb"},
-    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b"},
-    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543"},
-    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:60eb32934076fa07e4316b7b2742fa52cbb190b42c2df2863dbc4230a0a9b385"},
-    {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e"},
-    {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e"},
-    {file = "cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053"},
-    {file = "cryptography-44.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd"},
-    {file = "cryptography-44.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591"},
-    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7"},
-    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc"},
-    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289"},
-    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7"},
-    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c"},
-    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9abcc2e083cbe8dde89124a47e5e53ec38751f0d7dfd36801008f316a127d7ba"},
-    {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64"},
-    {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285"},
-    {file = "cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417"},
-    {file = "cryptography-44.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede"},
-    {file = "cryptography-44.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37d76e6863da3774cd9db5b409a9ecfd2c71c981c38788d3fcfaf177f447b731"},
-    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:f677e1268c4e23420c3acade68fac427fffcb8d19d7df95ed7ad17cdef8404f4"},
-    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f5e7cb1e5e56ca0933b4873c0220a78b773b24d40d186b6738080b73d3d0a756"},
-    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:8b3e6eae66cf54701ee7d9c83c30ac0a1e3fa17be486033000f2a73a12ab507c"},
-    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:be4ce505894d15d5c5037167ffb7f0ae90b7be6f2a98f9a5c3442395501c32fa"},
-    {file = "cryptography-44.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:62901fb618f74d7d81bf408c8719e9ec14d863086efe4185afd07c352aee1d2c"},
-    {file = "cryptography-44.0.0.tar.gz", hash = "sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02"},
+    {file = "cryptography-44.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0"},
+    {file = "cryptography-44.0.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf"},
+    {file = "cryptography-44.0.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864"},
+    {file = "cryptography-44.0.1-cp37-abi3-win32.whl", hash = "sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a"},
+    {file = "cryptography-44.0.1-cp37-abi3-win_amd64.whl", hash = "sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00"},
+    {file = "cryptography-44.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41"},
+    {file = "cryptography-44.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b"},
+    {file = "cryptography-44.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7"},
+    {file = "cryptography-44.0.1-cp39-abi3-win32.whl", hash = "sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9"},
+    {file = "cryptography-44.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7"},
+    {file = "cryptography-44.0.1.tar.gz", hash = "sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14"},
 ]

 [package.dependencies]
@@ -900,7 +903,7 @@ nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"]
 pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"]
 sdist = ["build (>=1.0.0)"]
 ssh = ["bcrypt (>=3.1.5)"]
-test = ["certifi (>=2024)", "cryptography-vectors (==44.0.0)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"]
+test = ["certifi (>=2024)", "cryptography-vectors (==44.0.1)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"]
 test-randomorder = ["pytest-randomly"]

 [[package]]
@@ -1128,13 +1131,13 @@ files = [

 [[package]]
 name = "e2b"
-version = "1.0.6"
+version = "1.1.0"
 description = "E2B SDK that give agents cloud environments"
 optional = true
-python-versions = "<4.0,>=3.8"
+python-versions = "<4.0,>=3.9"
 files = [
-    {file = "e2b-1.0.6-py3-none-any.whl", hash = "sha256:4ae6e00d46e6b0b9ab05388c408f9155488ee9f022c5a6fd47939f492ccf3b58"},
-    {file = "e2b-1.0.6.tar.gz", hash = "sha256:e35d47f5581565060a5c18e4cb839cf61de310d275fa0a6589d8fc8bf65957a7"},
+    {file = "e2b-1.1.0-py3-none-any.whl", hash = "sha256:5d99c675e155cf124f457d77f91c4cb32b286d241ca6cd37ac8d6c0711fc272e"},
+    {file = "e2b-1.1.0.tar.gz", hash = "sha256:bd054fbaa9baed48919500ba853bdb72c750b04e0bac8365bde75cdfbdf80d18"},
 ]

 [package.dependencies]
@@ -1190,6 +1193,20 @@ files = [
 [package.extras]
 tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]

+[[package]]
+name = "faker"
+version = "36.1.0"
+description = "Faker is a Python package that generates fake data for you."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "Faker-36.1.0-py3-none-any.whl", hash = "sha256:aa0b93487d3adf7cd89953d172e3df896cb7b35d8a5222c0da873edbe2f7adf5"},
+    {file = "faker-36.1.0.tar.gz", hash = "sha256:f40510350aecfe006f45cb3f8879b35e861367cf347f51a7f2ca2c0571fdcc0b"},
+]
+
+[package.dependencies]
+tzdata = "*"
+
 [[package]]
 name = "fastapi"
 version = "0.115.8"
@@ -1963,13 +1980,13 @@ files = [

 [[package]]
 name = "identify"
-version = "2.6.6"
+version = "2.6.7"
 description = "File identification library for Python"
 optional = true
 python-versions = ">=3.9"
 files = [
-    {file = "identify-2.6.6-py2.py3-none-any.whl", hash = "sha256:cbd1810bce79f8b671ecb20f53ee0ae8e86ae84b557de31d89709dc2a48ba881"},
-    {file = "identify-2.6.6.tar.gz", hash = "sha256:7bec12768ed44ea4761efb47806f0a41f86e7c0a5fdf5950d4648c90eca7e251"},
+    {file = "identify-2.6.7-py2.py3-none-any.whl", hash = "sha256:155931cb617a401807b09ecec6635d6c692d180090a1cedca8ef7d58ba5b6aa0"},
+    {file = "identify-2.6.7.tar.gz", hash = "sha256:3fa266b42eba321ee0b2bb0936a6a6b9e36a1351cbb69055b3082f4193035684"},
 ]

 [package.extras]
@@ -2393,23 +2410,23 @@ test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"

 [[package]]
 name = "langchain"
-version = "0.3.17"
+version = "0.3.18"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langchain-0.3.17-py3-none-any.whl", hash = "sha256:4d6d3cf454cc261a5017fd1fa5014cffcc7aeaccd0ec0530fc10c5f71e6e97a0"},
-    {file = "langchain-0.3.17.tar.gz", hash = "sha256:cef56f0a7c8369f35f1fa2690ecf0caa4504a36a5383de0eb29b8a5e26f625a0"},
+    {file = "langchain-0.3.18-py3-none-any.whl", hash = "sha256:1a6e629f02a25962aa5b16932e8f073248104a66804ed5af1f78618ad7c1d38d"},
+    {file = "langchain-0.3.18.tar.gz", hash = "sha256:311ac227a995545ff7c3f74c7767930c5349edef0b39f19d3105b86d39316b69"},
 ]

 [package.dependencies]
 aiohttp = ">=3.8.3,<4.0.0"
 async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
-langchain-core = ">=0.3.33,<0.4.0"
-langchain-text-splitters = ">=0.3.3,<0.4.0"
+langchain-core = ">=0.3.34,<1.0.0"
+langchain-text-splitters = ">=0.3.6,<1.0.0"
 langsmith = ">=0.1.17,<0.4"
 numpy = [
-    {version = ">=1.22.4,<2", markers = "python_version < \"3.12\""},
+    {version = ">=1.26.4,<2", markers = "python_version < \"3.12\""},
    {version = ">=1.26.2,<3", markers = "python_version >= \"3.12\""},
 ]
 pydantic = ">=2.7.4,<3.0.0"
@@ -2418,26 +2435,42 @@ requests = ">=2,<3"
 SQLAlchemy = ">=1.4,<3"
 tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10"

+[package.extras]
+anthropic = ["langchain-anthropic"]
+aws = ["langchain-aws"]
+cohere = ["langchain-cohere"]
+community = ["langchain-community"]
+deepseek = ["langchain-deepseek"]
+fireworks = ["langchain-fireworks"]
+google-genai = ["langchain-google-genai"]
+google-vertexai = ["langchain-google-vertexai"]
+groq = ["langchain-groq"]
+huggingface = ["langchain-huggingface"]
+mistralai = ["langchain-mistralai"]
+ollama = ["langchain-ollama"]
+openai = ["langchain-openai"]
+together = ["langchain-together"]
+
 [[package]]
 name = "langchain-community"
-version = "0.3.16"
+version = "0.3.17"
 description = "Community contributed LangChain integrations."
 optional = true
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langchain_community-0.3.16-py3-none-any.whl", hash = "sha256:a702c577b048d48882a46708bb3e08ca9aec79657c421c3241a305409040c0d6"},
-    {file = "langchain_community-0.3.16.tar.gz", hash = "sha256:825709bc328e294942b045d0b7f55053e8e88f7f943576306d778cf56417126c"},
+    {file = "langchain_community-0.3.17-py3-none-any.whl", hash = "sha256:13bbd87d681b0df67bafa294321613b13ac524f173c92f11048d40c74e585f0b"},
+    {file = "langchain_community-0.3.17.tar.gz", hash = "sha256:d8547a3d4f8307950be88ca638cd6ab1abe2440d0012e401a172ba4a39aa8044"},
 ]

 [package.dependencies]
 aiohttp = ">=3.8.3,<4.0.0"
 dataclasses-json = ">=0.5.7,<0.7"
-httpx-sse = ">=0.4.0,<0.5.0"
-langchain = ">=0.3.16,<0.4.0"
-langchain-core = ">=0.3.32,<0.4.0"
+httpx-sse = ">=0.4.0,<1.0.0"
+langchain = ">=0.3.18,<1.0.0"
+langchain-core = ">=0.3.34,<1.0.0"
 langsmith = ">=0.1.125,<0.4"
 numpy = [
-    {version = ">=1.22.4,<2", markers = "python_version < \"3.12\""},
+    {version = ">=1.26.4,<2", markers = "python_version < \"3.12\""},
    {version = ">=1.26.2,<3", markers = "python_version >= \"3.12\""},
 ]
 pydantic-settings = ">=2.4.0,<3.0.0"
@@ -2448,13 +2481,13 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10"

 [[package]]
 name = "langchain-core"
-version = "0.3.33"
+version = "0.3.34"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langchain_core-0.3.33-py3-none-any.whl", hash = "sha256:269706408a2223f863ff1f9616f31903a5712403199d828b50aadbc4c28b553a"},
-    {file = "langchain_core-0.3.33.tar.gz", hash = "sha256:b5dd93a4e7f8198d2fc6048723b0bfecf7aaf128b0d268cbac19c34c1579b953"},
+    {file = "langchain_core-0.3.34-py3-none-any.whl", hash = "sha256:a057ebeddd2158d3be14bde341b25640ddf958b6989bd6e47160396f5a8202ae"},
+    {file = "langchain_core-0.3.34.tar.gz", hash = "sha256:26504cf1e8e6c310adad907b890d4e3c147581cfa7434114f6dc1134fe4bc6d3"},
 ]

 [package.dependencies]
@@ -2471,33 +2504,33 @@ typing-extensions = ">=4.7"

 [[package]]
 name = "langchain-openai"
-version = "0.3.3"
+version = "0.3.5"
 description = "An integration package connecting OpenAI and LangChain"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langchain_openai-0.3.3-py3-none-any.whl", hash = "sha256:979ef0d9eca9a34d7c39cd9d0f66d1d38f2f10a5a8c723bbc7e7a8275259c71a"},
-    {file = "langchain_openai-0.3.3.tar.gz", hash = "sha256:aaaee691f145d4ed3035fe23dce69e3212c8de7e208e650c1ce292960287725c"},
+    {file = "langchain_openai-0.3.5-py3-none-any.whl", hash = "sha256:137a7514f11afeab26e5fc1eda3c2b96fbbb18a96d963ba256faecceb189ea71"},
+    {file = "langchain_openai-0.3.5.tar.gz", hash = "sha256:40cd5649b93b1af20a20e1cbee5a47628a77e15114a11f9b3f2ab08c7d1302bf"},
 ]

 [package.dependencies]
-langchain-core = ">=0.3.33,<0.4.0"
+langchain-core = ">=0.3.34,<1.0.0"
 openai = ">=1.58.1,<2.0.0"
 tiktoken = ">=0.7,<1"

 [[package]]
 name = "langchain-text-splitters"
-version = "0.3.5"
+version = "0.3.6"
 description = "LangChain text splitting utilities"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langchain_text_splitters-0.3.5-py3-none-any.whl", hash = "sha256:8c9b059827438c5fa8f327b4df857e307828a5ec815163c9b5c9569a3e82c8ee"},
-    {file = "langchain_text_splitters-0.3.5.tar.gz", hash = "sha256:11cb7ca3694e5bdd342bc16d3875b7f7381651d4a53cbb91d34f22412ae16443"},
+    {file = "langchain_text_splitters-0.3.6-py3-none-any.whl", hash = "sha256:e5d7b850f6c14259ea930be4a964a65fa95d9df7e1dbdd8bad8416db72292f4e"},
+    {file = "langchain_text_splitters-0.3.6.tar.gz", hash = "sha256:c537972f4b7c07451df431353a538019ad9dadff7a1073ea363946cea97e1bee"},
 ]

 [package.dependencies]
-langchain-core = ">=0.3.29,<0.4.0"
+langchain-core = ">=0.3.34,<1.0.0"

 [[package]]
 name = "langchainhub"
@@ -2517,13 +2550,13 @@ types-requests = ">=2.31.0.2,<3.0.0.0"

 [[package]]
 name = "langsmith"
-version = "0.3.6"
+version = "0.3.8"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langsmith-0.3.6-py3-none-any.whl", hash = "sha256:f1784472a3bf8d6fe418e914e4d07043ecb1e578aa5fc9e1f116d738dc56d013"},
-    {file = "langsmith-0.3.6.tar.gz", hash = "sha256:ed2f26fbdf095c588cb1fcc1f98c2dd0de452c76f8496d5ff0557031ecbca095"},
+    {file = "langsmith-0.3.8-py3-none-any.whl", hash = "sha256:fbb9dd97b0f090219447fca9362698d07abaeda1da85aa7cc6ec6517b36581b1"},
+    {file = "langsmith-0.3.8.tar.gz", hash = "sha256:97f9bebe0b7cb0a4f278e6ff30ae7d5ededff3883b014442ec6d7d575b02a0f1"},
 ]

 [package.dependencies]
@@ -2543,13 +2576,13 @@ pytest = ["pytest (>=7.0.0)", "rich (>=13.9.4,<14.0.0)"]

 [[package]]
 name = "letta-client"
-version = "0.1.25"
+version = "0.1.28"
 description = ""
 optional = false
 python-versions = "<4.0,>=3.8"
 files = [
-    {file = "letta_client-0.1.25-py3-none-any.whl", hash = "sha256:6da0f1415608ed731f025e805c7626637beca1e69a16899caa3992b5b2806452"},
-    {file = "letta_client-0.1.25.tar.gz", hash = "sha256:bdb33a76b2e0cf05cb3ffffac044fb2f9f53bf3818a43f7b74f51e827d1fcab7"},
+    {file = "letta_client-0.1.28-py3-none-any.whl", hash = "sha256:ace0c95a7429d2335ff7221aacaef9db7220ab5a4e5d87c6af7d6adbb86362aa"},
+    {file = "letta_client-0.1.28.tar.gz", hash = "sha256:bdb41aa9a6def43f0e7a8c1ccc3b48d6028f332ee73804d59330596b7f96c4a9"},
 ]

 [package.dependencies]
@@ -2561,34 +2594,53 @@ typing_extensions = ">=4.0.0"

 [[package]]
 name = "llama-cloud"
-version = "0.1.6"
+version = "0.1.12"
 description = ""
 optional = false
 python-versions = "<4,>=3.8"
 files = [
-    {file = "llama_cloud-0.1.6-py3-none-any.whl", hash = "sha256:43595081e03ff552fd18d9553fcaada897ff267456c0f89f4cb098b927dc4dc7"},
-    {file = "llama_cloud-0.1.6.tar.gz", hash = "sha256:21200f6fdd46e08455d34b136f645ce6b8c3800e0ae13d8077913171a921da5a"},
+    {file = "llama_cloud-0.1.12-py3-none-any.whl", hash = "sha256:de1b4f89afc3cf3adf86ca9a6eb2b8de3f131b20fd25a5647b5a162e6bf2ed1b"},
+    {file = "llama_cloud-0.1.12.tar.gz", hash = "sha256:d51d26cc4c542398a3490813bc791f7504a40298225e62ed918951bf57266e2a"},
 ]

 [package.dependencies]
+certifi = ">=2024.7.4"
 httpx = ">=0.20.0"
 pydantic = ">=1.10"

+[[package]]
+name = "llama-cloud-services"
+version = "0.6.1"
+description = "Tailored SDK clients for LlamaCloud services."
+optional = false
+python-versions = "<4.0,>=3.9"
+files = [
+    {file = "llama_cloud_services-0.6.1-py3-none-any.whl", hash = "sha256:0427c98284bbfedbdf1686d29729d04b13e13f72017e184057892c8583c2b195"},
+    {file = "llama_cloud_services-0.6.1.tar.gz", hash = "sha256:92c7ee4fcc80adaa60f26c0da805182fa56d771fff11e9abb873f9ddb11b5e37"},
+]
+
+[package.dependencies]
+click = ">=8.1.7,<9.0.0"
+llama-cloud = ">=0.1.11,<0.2.0"
+llama-index-core = ">=0.11.0"
+pydantic = "!=2.10"
+python-dotenv = ">=1.0.1,<2.0.0"
+
 [[package]]
 name = "llama-index"
-version = "0.12.16"
+version = "0.12.17"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index-0.12.16-py3-none-any.whl", hash = "sha256:c94d0cf6735219d97d91e2eca5bcfac89ec1583990917f934b075d5a45686cf6"},
-    {file = "llama_index-0.12.16.tar.gz", hash = "sha256:4fd5f5b94eb3f8dd470bb8cc0e1b985d931e8f31473266ef69855488fd8ae3f2"},
+    {file = "llama_index-0.12.17-py3-none-any.whl", hash = "sha256:d8938e5e6e5ff78b6865f7890a01d1a40818a5df798555ee6eb7f2c5ab65aeb0"},
+    {file = "llama_index-0.12.17.tar.gz", hash = "sha256:761a2dad3eb74bd5242ecf8fd28337c0c8745fc8d39d2f9f9b18bf733ad679f4"},
 ]

 [package.dependencies]
 llama-index-agent-openai = ">=0.4.0,<0.5.0"
 llama-index-cli = ">=0.4.0,<0.5.0"
-llama-index-core = ">=0.12.16,<0.13.0"
+llama-index-core = ">=0.12.17,<0.13.0"
 llama-index-embeddings-openai = ">=0.3.0,<0.4.0"
 llama-index-indices-managed-llama-cloud = ">=0.4.0"
 llama-index-llms-openai = ">=0.3.0,<0.4.0"
@@ -2601,13 +2653,13 @@ nltk = ">3.8.1"

 [[package]]
 name = "llama-index-agent-openai"
-version = "0.4.3"
+version = "0.4.5"
 description = "llama-index agent openai integration"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_agent_openai-0.4.3-py3-none-any.whl", hash = "sha256:5d1fbb6831113e609296e457b0a4d1c08c9267acca219eb78cb702bd76a0744d"},
-    {file = "llama_index_agent_openai-0.4.3.tar.gz", hash = "sha256:ff1f4a13ba417cb4b9cfbc2ffa9f162bdbdda9b87d6645d512cbde2061f55412"},
+    {file = "llama_index_agent_openai-0.4.5-py3-none-any.whl", hash = "sha256:3fcadce03420a1974e6cf5ecd8e58337652df2f81d5f30033b3b32a576dc790a"},
+    {file = "llama_index_agent_openai-0.4.5.tar.gz", hash = "sha256:c09be43e01b3d5b2d8859814fcdabd000769ab1b54958a7025b3ce391147b005"},
 ]

 [package.dependencies]
@@ -2633,13 +2685,13 @@ llama-index-llms-openai = ">=0.3.0,<0.4.0"

 [[package]]
 name = "llama-index-core"
-version = "0.12.16.post1"
+version = "0.12.17"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_core-0.12.16.post1-py3-none-any.whl", hash = "sha256:95904a44f25e122a45963541c56a50c4daf2ffaf062d1a3224c84a6dc9e6801f"},
-    {file = "llama_index_core-0.12.16.post1.tar.gz", hash = "sha256:8fed0554ae71b6c1f80b53164723af28c887951eef7aa1b44ba6c8103c0efb2c"},
+    {file = "llama_index_core-0.12.17-py3-none-any.whl", hash = "sha256:867ec650a1f9eba9f6d65005045a68bc13bae8d65763e32029d9610360c03979"},
+    {file = "llama_index_core-0.12.17.tar.gz", hash = "sha256:2e8fb457983978af19db1ceba71d440f6891279525c5e7eb2ec73a6b727be113"},
 ]

 [package.dependencies]
@@ -2683,32 +2735,32 @@ openai = ">=1.1.0"

 [[package]]
 name = "llama-index-indices-managed-llama-cloud"
-version = "0.6.3"
+version = "0.6.4"
 description = "llama-index indices llama-cloud integration"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_indices_managed_llama_cloud-0.6.3-py3-none-any.whl", hash = "sha256:7f125602f624a2d321b6a4130cd98df35eb8c15818a159390755b2c13068f4ce"},
-    {file = "llama_index_indices_managed_llama_cloud-0.6.3.tar.gz", hash = "sha256:f09e4182cbc2a2bd75ae85cebb1681075247f0d91b931b094cac4315386ce87a"},
+    {file = "llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl", hash = "sha256:d7e85844a2e343dacebdef424decab3f5fd6361e25b3ff2bdcfb18607c1a49c5"},
+    {file = "llama_index_indices_managed_llama_cloud-0.6.4.tar.gz", hash = "sha256:0b45973cb2dc9702122006019bfb556dcabba31b0bdf79afc7b376ca8143df03"},
 ]

 [package.dependencies]
-llama-cloud = ">=0.1.5"
+llama-cloud = ">=0.1.8,<0.2.0"
 llama-index-core = ">=0.12.0,<0.13.0"

 [[package]]
 name = "llama-index-llms-openai"
-version = "0.3.18"
+version = "0.3.19"
 description = "llama-index llms openai integration"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_llms_openai-0.3.18-py3-none-any.whl", hash = "sha256:e2e78ab94fafda8ac99fbfea1b19c5ba4e49d292557d2bdd9c7cc4b445f8745f"},
-    {file = "llama_index_llms_openai-0.3.18.tar.gz", hash = "sha256:81807ba318bac28aca67873228c55242c5fe55f8beba35d23828af6e03b1b234"},
+    {file = "llama_index_llms_openai-0.3.19-py3-none-any.whl", hash = "sha256:ad3c4a8c86aef181eba6b34cfff995a7c288d6bd5b99207438e25c051d80532d"},
+    {file = "llama_index_llms_openai-0.3.19.tar.gz", hash = "sha256:2e2dad70e7a9cb7a1519be1af4ba60c651a0039bc88888332a17922be00b0299"},
 ]

 [package.dependencies]
-llama-index-core = ">=0.12.4,<0.13.0"
+llama-index-core = ">=0.12.17,<0.13.0"
 openai = ">=1.58.1,<2.0.0"

 [[package]]
@@ -2760,13 +2812,13 @@ llama-index-program-openai = ">=0.3.0,<0.4.0"

 [[package]]
 name = "llama-index-readers-file"
-version = "0.4.4"
+version = "0.4.5"
 description = "llama-index readers file integration"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_readers_file-0.4.4-py3-none-any.whl", hash = "sha256:01589a4895e2d4abad30294c9b0d2813520ee1f5164922ad92f11e64a1d65d6c"},
-    {file = "llama_index_readers_file-0.4.4.tar.gz", hash = "sha256:e076b3fa1e68eea1594d47cec1f64b384fb6067f2697ca8aae22b4a21ad27ca7"},
+    {file = "llama_index_readers_file-0.4.5-py3-none-any.whl", hash = "sha256:704ac6b549f0ec59c0bd796007fceced2fff89a44b03d7ee36bce2d26b39e526"},
+    {file = "llama_index_readers_file-0.4.5.tar.gz", hash = "sha256:3ce5c8ad7f285bb7ff828c5b2e20088856ac65cf96640287eca770b69a21df88"},
 ]

 [package.dependencies]
@@ -2796,29 +2848,27 @@ llama-parse = ">=0.5.0"

 [[package]]
 name = "llama-parse"
-version = "0.5.20"
+version = "0.6.1"
 description = "Parse files into RAG-Optimized formats."
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_parse-0.5.20-py3-none-any.whl", hash = "sha256:9617edb3428d3218ea01f1708f0b6105f3ffef142fedbeb8c98d50082c37e226"},
-    {file = "llama_parse-0.5.20.tar.gz", hash = "sha256:649e256431d3753025b9a320bb03b76849ce4b5a1121394c803df543e6c1006f"},
+    {file = "llama_parse-0.6.1-py3-none-any.whl", hash = "sha256:5f96c2951bc3ad514b67bb6886c99224f567d08290fc016e5c8de22c2df60e90"},
+    {file = "llama_parse-0.6.1.tar.gz", hash = "sha256:bd848d3ab7460f70f9e9acaef057fb14ae45f976bdf91830db86a8c40883ef34"},
 ]

 [package.dependencies]
-click = ">=8.1.7,<9.0.0"
-llama-index-core = ">=0.11.0"
-pydantic = "!=2.10"
+llama-cloud-services = ">=0.6.1"

 [[package]]
 name = "locust"
-version = "2.32.8"
+version = "2.32.9"
 description = "Developer-friendly load testing framework"
 optional = true
 python-versions = ">=3.9"
 files = [
-    {file = "locust-2.32.8-py3-none-any.whl", hash = "sha256:782ccc25e576c4af328ca40a12803b556f6ccc3ad3b073b8074e47b52049ae4b"},
-    {file = "locust-2.32.8.tar.gz", hash = "sha256:45904026bbe26471876e3f39ecab5403512491638d3974ed159b83e32e2c0f92"},
+    {file = "locust-2.32.9-py3-none-any.whl", hash = "sha256:d9447c26d2bbaec5a0ace7cadefa1a31820ed392234257b309965a43d5e8d26f"},
+    {file = "locust-2.32.9.tar.gz", hash = "sha256:4c297afa5cdc3de15dfa79279576e5f33c1d69dd70006b51d079dcbd212201cc"},
 ]

 [package.dependencies]
@@ -3980,6 +4030,7 @@ files = [
    {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
    {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
    {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
+    {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"},
    {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
    {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
    {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
@@ -4039,6 +4090,7 @@ files = [
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
+    {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
@@ -4359,13 +4411,13 @@ tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"]

 [[package]]
 name = "pypdf"
-version = "5.2.0"
+version = "5.3.0"
 description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pypdf-5.2.0-py3-none-any.whl", hash = "sha256:d107962ec45e65e3bd10c1d9242bdbbedaa38193c9e3a6617bd6d996e5747b19"},
-    {file = "pypdf-5.2.0.tar.gz", hash = "sha256:7c38e68420f038f2c4998fd9d6717b6db4f6cef1642e9cf384d519c9cf094663"},
+    {file = "pypdf-5.3.0-py3-none-any.whl", hash = "sha256:d7b6db242f5f8fdb4990ae11815c394b8e1b955feda0befcce862efd8559c181"},
+    {file = "pypdf-5.3.0.tar.gz", hash = "sha256:08393660dfea25b27ec6fe863fb2f2248e6270da5103fae49e9dea8178741951"},
 ]

 [package.dependencies]
@@ -5280,68 +5332,68 @@ files = [

 [[package]]
 name = "sqlalchemy"
-version = "2.0.37"
+version = "2.0.38"
 description = "Database Abstraction Library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "SQLAlchemy-2.0.37-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da36c3b0e891808a7542c5c89f224520b9a16c7f5e4d6a1156955605e54aef0e"},
-    {file = "SQLAlchemy-2.0.37-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e7402ff96e2b073a98ef6d6142796426d705addd27b9d26c3b32dbaa06d7d069"},
-    {file = "SQLAlchemy-2.0.37-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6f5d254a22394847245f411a2956976401e84da4288aa70cbcd5190744062c1"},
-    {file = "SQLAlchemy-2.0.37-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41296bbcaa55ef5fdd32389a35c710133b097f7b2609d8218c0eabded43a1d84"},
-    {file = "SQLAlchemy-2.0.37-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bedee60385c1c0411378cbd4dc486362f5ee88deceea50002772912d798bb00f"},
-    {file = "SQLAlchemy-2.0.37-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6c67415258f9f3c69867ec02fea1bf6508153709ecbd731a982442a590f2b7e4"},
-    {file = "SQLAlchemy-2.0.37-cp310-cp310-win32.whl", hash = "sha256:650dcb70739957a492ad8acff65d099a9586b9b8920e3507ca61ec3ce650bb72"},
-    {file = "SQLAlchemy-2.0.37-cp310-cp310-win_amd64.whl", hash = "sha256:93d1543cd8359040c02b6614421c8e10cd7a788c40047dbc507ed46c29ae5636"},
-    {file = "SQLAlchemy-2.0.37-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:78361be6dc9073ed17ab380985d1e45e48a642313ab68ab6afa2457354ff692c"},
-    {file = "SQLAlchemy-2.0.37-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b661b49d0cb0ab311a189b31e25576b7ac3e20783beb1e1817d72d9d02508bf5"},
-    {file = "SQLAlchemy-2.0.37-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d57bafbab289e147d064ffbd5cca2d7b1394b63417c0636cea1f2e93d16eb9e8"},
-    {file = "SQLAlchemy-2.0.37-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fa2c0913f02341d25fb858e4fb2031e6b0813494cca1ba07d417674128ce11b"},
-    {file = "SQLAlchemy-2.0.37-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9df21b8d9e5c136ea6cde1c50d2b1c29a2b5ff2b1d610165c23ff250e0704087"},
-    {file = "SQLAlchemy-2.0.37-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db18ff6b8c0f1917f8b20f8eca35c28bbccb9f83afa94743e03d40203ed83de9"},
-    {file = "SQLAlchemy-2.0.37-cp311-cp311-win32.whl", hash = "sha256:46954173612617a99a64aee103bcd3f078901b9a8dcfc6ae80cbf34ba23df989"},
-    {file = "SQLAlchemy-2.0.37-cp311-cp311-win_amd64.whl", hash = "sha256:7b7e772dc4bc507fdec4ee20182f15bd60d2a84f1e087a8accf5b5b7a0dcf2ba"},
-    {file = "SQLAlchemy-2.0.37-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2952748ecd67ed3b56773c185e85fc084f6bdcdec10e5032a7c25a6bc7d682ef"},
-    {file = "SQLAlchemy-2.0.37-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3151822aa1db0eb5afd65ccfafebe0ef5cda3a7701a279c8d0bf17781a793bb4"},
-    {file = "SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eaa8039b6d20137a4e02603aba37d12cd2dde7887500b8855356682fc33933f4"},
-    {file = "SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1cdba1f73b64530c47b27118b7053b8447e6d6f3c8104e3ac59f3d40c33aa9fd"},
-    {file = "SQLAlchemy-2.0.37-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1b2690456528a87234a75d1a1644cdb330a6926f455403c8e4f6cad6921f9098"},
-    {file = "SQLAlchemy-2.0.37-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf5ae8a9dcf657fd72144a7fd01f243236ea39e7344e579a121c4205aedf07bb"},
-    {file = "SQLAlchemy-2.0.37-cp312-cp312-win32.whl", hash = "sha256:ea308cec940905ba008291d93619d92edaf83232ec85fbd514dcb329f3192761"},
-    {file = "SQLAlchemy-2.0.37-cp312-cp312-win_amd64.whl", hash = "sha256:635d8a21577341dfe4f7fa59ec394b346da12420b86624a69e466d446de16aff"},
-    {file = "SQLAlchemy-2.0.37-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8c4096727193762e72ce9437e2a86a110cf081241919ce3fab8e89c02f6b6658"},
-    {file = "SQLAlchemy-2.0.37-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e4fb5ac86d8fe8151966814f6720996430462e633d225497566b3996966b9bdb"},
-    {file = "SQLAlchemy-2.0.37-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e56a139bfe136a22c438478a86f8204c1eb5eed36f4e15c4224e4b9db01cb3e4"},
-    {file = "SQLAlchemy-2.0.37-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f95fc8e3f34b5f6b3effb49d10ac97c569ec8e32f985612d9b25dd12d0d2e94"},
-    {file = "SQLAlchemy-2.0.37-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c505edd429abdfe3643fa3b2e83efb3445a34a9dc49d5f692dd087be966020e0"},
-    {file = "SQLAlchemy-2.0.37-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:12b0f1ec623cccf058cf21cb544f0e74656618165b083d78145cafde156ea7b6"},
-    {file = "SQLAlchemy-2.0.37-cp313-cp313-win32.whl", hash = "sha256:293f9ade06b2e68dd03cfb14d49202fac47b7bb94bffcff174568c951fbc7af2"},
-    {file = "SQLAlchemy-2.0.37-cp313-cp313-win_amd64.whl", hash = "sha256:d70f53a0646cc418ca4853da57cf3ddddbccb8c98406791f24426f2dd77fd0e2"},
-    {file = "SQLAlchemy-2.0.37-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:44f569d0b1eb82301b92b72085583277316e7367e038d97c3a1a899d9a05e342"},
-    {file = "SQLAlchemy-2.0.37-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2eae3423e538c10d93ae3e87788c6a84658c3ed6db62e6a61bb9495b0ad16bb"},
-    {file = "SQLAlchemy-2.0.37-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfff7be361048244c3aa0f60b5e63221c5e0f0e509f4e47b8910e22b57d10ae7"},
-    {file = "SQLAlchemy-2.0.37-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:5bc3339db84c5fb9130ac0e2f20347ee77b5dd2596ba327ce0d399752f4fce39"},
-    {file = "SQLAlchemy-2.0.37-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:84b9f23b0fa98a6a4b99d73989350a94e4a4ec476b9a7dfe9b79ba5939f5e80b"},
-    {file = "SQLAlchemy-2.0.37-cp37-cp37m-win32.whl", hash = "sha256:51bc9cfef83e0ac84f86bf2b10eaccb27c5a3e66a1212bef676f5bee6ef33ebb"},
-    {file = "SQLAlchemy-2.0.37-cp37-cp37m-win_amd64.whl", hash = "sha256:8e47f1af09444f87c67b4f1bb6231e12ba6d4d9f03050d7fc88df6d075231a49"},
-    {file = "SQLAlchemy-2.0.37-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6b788f14c5bb91db7f468dcf76f8b64423660a05e57fe277d3f4fad7b9dcb7ce"},
-    {file = "SQLAlchemy-2.0.37-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:521ef85c04c33009166777c77e76c8a676e2d8528dc83a57836b63ca9c69dcd1"},
-    {file = "SQLAlchemy-2.0.37-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75311559f5c9881a9808eadbeb20ed8d8ba3f7225bef3afed2000c2a9f4d49b9"},
-    {file = "SQLAlchemy-2.0.37-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cce918ada64c956b62ca2c2af59b125767097ec1dca89650a6221e887521bfd7"},
-    {file = "SQLAlchemy-2.0.37-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9d087663b7e1feabea8c578d6887d59bb00388158e8bff3a76be11aa3f748ca2"},
-    {file = "SQLAlchemy-2.0.37-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cf95a60b36997dad99692314c4713f141b61c5b0b4cc5c3426faad570b31ca01"},
-    {file = "SQLAlchemy-2.0.37-cp38-cp38-win32.whl", hash = "sha256:d75ead7dd4d255068ea0f21492ee67937bd7c90964c8f3c2bea83c7b7f81b95f"},
-    {file = "SQLAlchemy-2.0.37-cp38-cp38-win_amd64.whl", hash = "sha256:74bbd1d0a9bacf34266a7907d43260c8d65d31d691bb2356f41b17c2dca5b1d0"},
-    {file = "SQLAlchemy-2.0.37-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:648ec5acf95ad59255452ef759054f2176849662af4521db6cb245263ae4aa33"},
-    {file = "SQLAlchemy-2.0.37-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:35bd2df269de082065d4b23ae08502a47255832cc3f17619a5cea92ce478b02b"},
-    {file = "SQLAlchemy-2.0.37-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f581d365af9373a738c49e0c51e8b18e08d8a6b1b15cc556773bcd8a192fa8b"},
-    {file = "SQLAlchemy-2.0.37-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82df02816c14f8dc9f4d74aea4cb84a92f4b0620235daa76dde002409a3fbb5a"},
-    {file = "SQLAlchemy-2.0.37-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:94b564e38b344d3e67d2e224f0aec6ba09a77e4582ced41e7bfd0f757d926ec9"},
-    {file = "SQLAlchemy-2.0.37-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:955a2a765aa1bd81aafa69ffda179d4fe3e2a3ad462a736ae5b6f387f78bfeb8"},
-    {file = "SQLAlchemy-2.0.37-cp39-cp39-win32.whl", hash = "sha256:03f0528c53ca0b67094c4764523c1451ea15959bbf0a8a8a3096900014db0278"},
-    {file = "SQLAlchemy-2.0.37-cp39-cp39-win_amd64.whl", hash = "sha256:4b12885dc85a2ab2b7d00995bac6d967bffa8594123b02ed21e8eb2205a7584b"},
-    {file = "SQLAlchemy-2.0.37-py3-none-any.whl", hash = "sha256:a8998bf9f8658bd3839cbc44ddbe982955641863da0c1efe5b00c1ab4f5c16b1"},
-    {file = "sqlalchemy-2.0.37.tar.gz", hash = "sha256:12b28d99a9c14eaf4055810df1001557176716de0167b91026e648e65229bffb"},
+    {file = "SQLAlchemy-2.0.38-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5e1d9e429028ce04f187a9f522818386c8b076723cdbe9345708384f49ebcec6"},
+    {file = "SQLAlchemy-2.0.38-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b87a90f14c68c925817423b0424381f0e16d80fc9a1a1046ef202ab25b19a444"},
+    {file = "SQLAlchemy-2.0.38-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:402c2316d95ed90d3d3c25ad0390afa52f4d2c56b348f212aa9c8d072a40eee5"},
+    {file = "SQLAlchemy-2.0.38-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6493bc0eacdbb2c0f0d260d8988e943fee06089cd239bd7f3d0c45d1657a70e2"},
+    {file = "SQLAlchemy-2.0.38-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0561832b04c6071bac3aad45b0d3bb6d2c4f46a8409f0a7a9c9fa6673b41bc03"},
+    {file = "SQLAlchemy-2.0.38-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:49aa2cdd1e88adb1617c672a09bf4ebf2f05c9448c6dbeba096a3aeeb9d4d443"},
+    {file = "SQLAlchemy-2.0.38-cp310-cp310-win32.whl", hash = "sha256:64aa8934200e222f72fcfd82ee71c0130a9c07d5725af6fe6e919017d095b297"},
+    {file = "SQLAlchemy-2.0.38-cp310-cp310-win_amd64.whl", hash = "sha256:c57b8e0841f3fce7b703530ed70c7c36269c6d180ea2e02e36b34cb7288c50c7"},
+    {file = "SQLAlchemy-2.0.38-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bf89e0e4a30714b357f5d46b6f20e0099d38b30d45fa68ea48589faf5f12f62d"},
+    {file = "SQLAlchemy-2.0.38-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8455aa60da49cb112df62b4721bd8ad3654a3a02b9452c783e651637a1f21fa2"},
+    {file = "SQLAlchemy-2.0.38-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f53c0d6a859b2db58332e0e6a921582a02c1677cc93d4cbb36fdf49709b327b2"},
+    {file = "SQLAlchemy-2.0.38-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3c4817dff8cef5697f5afe5fec6bc1783994d55a68391be24cb7d80d2dbc3a6"},
+    {file = "SQLAlchemy-2.0.38-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9cea5b756173bb86e2235f2f871b406a9b9d722417ae31e5391ccaef5348f2c"},
+    {file = "SQLAlchemy-2.0.38-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:40e9cdbd18c1f84631312b64993f7d755d85a3930252f6276a77432a2b25a2f3"},
+    {file = "SQLAlchemy-2.0.38-cp311-cp311-win32.whl", hash = "sha256:cb39ed598aaf102251483f3e4675c5dd6b289c8142210ef76ba24aae0a8f8aba"},
+    {file = "SQLAlchemy-2.0.38-cp311-cp311-win_amd64.whl", hash = "sha256:f9d57f1b3061b3e21476b0ad5f0397b112b94ace21d1f439f2db472e568178ae"},
+    {file = "SQLAlchemy-2.0.38-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:12d5b06a1f3aeccf295a5843c86835033797fea292c60e72b07bcb5d820e6dd3"},
+    {file = "SQLAlchemy-2.0.38-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e036549ad14f2b414c725349cce0772ea34a7ab008e9cd67f9084e4f371d1f32"},
+    {file = "SQLAlchemy-2.0.38-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee3bee874cb1fadee2ff2b79fc9fc808aa638670f28b2145074538d4a6a5028e"},
+    {file = "SQLAlchemy-2.0.38-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e185ea07a99ce8b8edfc788c586c538c4b1351007e614ceb708fd01b095ef33e"},
+    {file = "SQLAlchemy-2.0.38-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b79ee64d01d05a5476d5cceb3c27b5535e6bb84ee0f872ba60d9a8cd4d0e6579"},
+    {file = "SQLAlchemy-2.0.38-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:afd776cf1ebfc7f9aa42a09cf19feadb40a26366802d86c1fba080d8e5e74bdd"},
+    {file = "SQLAlchemy-2.0.38-cp312-cp312-win32.whl", hash = "sha256:a5645cd45f56895cfe3ca3459aed9ff2d3f9aaa29ff7edf557fa7a23515a3725"},
+    {file = "SQLAlchemy-2.0.38-cp312-cp312-win_amd64.whl", hash = "sha256:1052723e6cd95312f6a6eff9a279fd41bbae67633415373fdac3c430eca3425d"},
+    {file = "SQLAlchemy-2.0.38-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ecef029b69843b82048c5b347d8e6049356aa24ed644006c9a9d7098c3bd3bfd"},
+    {file = "SQLAlchemy-2.0.38-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c8bcad7fc12f0cc5896d8e10fdf703c45bd487294a986903fe032c72201596b"},
+    {file = "SQLAlchemy-2.0.38-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a0ef3f98175d77180ffdc623d38e9f1736e8d86b6ba70bff182a7e68bed7727"},
+    {file = "SQLAlchemy-2.0.38-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b0ac78898c50e2574e9f938d2e5caa8fe187d7a5b69b65faa1ea4648925b096"},
+    {file = "SQLAlchemy-2.0.38-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9eb4fa13c8c7a2404b6a8e3772c17a55b1ba18bc711e25e4d6c0c9f5f541b02a"},
+    {file = "SQLAlchemy-2.0.38-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5dba1cdb8f319084f5b00d41207b2079822aa8d6a4667c0f369fce85e34b0c86"},
+    {file = "SQLAlchemy-2.0.38-cp313-cp313-win32.whl", hash = "sha256:eae27ad7580529a427cfdd52c87abb2dfb15ce2b7a3e0fc29fbb63e2ed6f8120"},
+    {file = "SQLAlchemy-2.0.38-cp313-cp313-win_amd64.whl", hash = "sha256:b335a7c958bc945e10c522c069cd6e5804f4ff20f9a744dd38e748eb602cbbda"},
+    {file = "SQLAlchemy-2.0.38-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:40310db77a55512a18827488e592965d3dec6a3f1e3d8af3f8243134029daca3"},
+    {file = "SQLAlchemy-2.0.38-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d3043375dd5bbcb2282894cbb12e6c559654c67b5fffb462fda815a55bf93f7"},
+    {file = "SQLAlchemy-2.0.38-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70065dfabf023b155a9c2a18f573e47e6ca709b9e8619b2e04c54d5bcf193178"},
+    {file = "SQLAlchemy-2.0.38-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:c058b84c3b24812c859300f3b5abf300daa34df20d4d4f42e9652a4d1c48c8a4"},
+    {file = "SQLAlchemy-2.0.38-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0398361acebb42975deb747a824b5188817d32b5c8f8aba767d51ad0cc7bb08d"},
+    {file = "SQLAlchemy-2.0.38-cp37-cp37m-win32.whl", hash = "sha256:a2bc4e49e8329f3283d99840c136ff2cd1a29e49b5624a46a290f04dff48e079"},
+    {file = "SQLAlchemy-2.0.38-cp37-cp37m-win_amd64.whl", hash = "sha256:9cd136184dd5f58892f24001cdce986f5d7e96059d004118d5410671579834a4"},
+    {file = "SQLAlchemy-2.0.38-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:665255e7aae5f38237b3a6eae49d2358d83a59f39ac21036413fab5d1e810578"},
+    {file = "SQLAlchemy-2.0.38-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:92f99f2623ff16bd4aaf786ccde759c1f676d39c7bf2855eb0b540e1ac4530c8"},
+    {file = "SQLAlchemy-2.0.38-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa498d1392216fae47eaf10c593e06c34476ced9549657fca713d0d1ba5f7248"},
+    {file = "SQLAlchemy-2.0.38-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9afbc3909d0274d6ac8ec891e30210563b2c8bdd52ebbda14146354e7a69373"},
+    {file = "SQLAlchemy-2.0.38-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:57dd41ba32430cbcc812041d4de8d2ca4651aeefad2626921ae2a23deb8cd6ff"},
+    {file = "SQLAlchemy-2.0.38-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3e35d5565b35b66905b79ca4ae85840a8d40d31e0b3e2990f2e7692071b179ca"},
+    {file = "SQLAlchemy-2.0.38-cp38-cp38-win32.whl", hash = "sha256:f0d3de936b192980209d7b5149e3c98977c3810d401482d05fb6d668d53c1c63"},
+    {file = "SQLAlchemy-2.0.38-cp38-cp38-win_amd64.whl", hash = "sha256:3868acb639c136d98107c9096303d2d8e5da2880f7706f9f8c06a7f961961149"},
+    {file = "SQLAlchemy-2.0.38-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07258341402a718f166618470cde0c34e4cec85a39767dce4e24f61ba5e667ea"},
+    {file = "SQLAlchemy-2.0.38-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a826f21848632add58bef4f755a33d45105d25656a0c849f2dc2df1c71f6f50"},
+    {file = "SQLAlchemy-2.0.38-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:386b7d136919bb66ced64d2228b92d66140de5fefb3c7df6bd79069a269a7b06"},
+    {file = "SQLAlchemy-2.0.38-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f2951dc4b4f990a4b394d6b382accb33141d4d3bd3ef4e2b27287135d6bdd68"},
+    {file = "SQLAlchemy-2.0.38-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8bf312ed8ac096d674c6aa9131b249093c1b37c35db6a967daa4c84746bc1bc9"},
+    {file = "SQLAlchemy-2.0.38-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6db316d6e340f862ec059dc12e395d71f39746a20503b124edc255973977b728"},
+    {file = "SQLAlchemy-2.0.38-cp39-cp39-win32.whl", hash = "sha256:c09a6ea87658695e527104cf857c70f79f14e9484605e205217aae0ec27b45fc"},
+    {file = "SQLAlchemy-2.0.38-cp39-cp39-win_amd64.whl", hash = "sha256:12f5c9ed53334c3ce719155424dc5407aaa4f6cadeb09c5b627e06abb93933a1"},
+    {file = "SQLAlchemy-2.0.38-py3-none-any.whl", hash = "sha256:63178c675d4c80def39f1febd625a6333f44c0ba269edd8a468b156394b27753"},
+    {file = "sqlalchemy-2.0.38.tar.gz", hash = "sha256:e5a4d82bdb4bf1ac1285a68eab02d253ab73355d9f0fe725a97e1e0fa689decb"},
 ]

 [package.dependencies]
@@ -5756,13 +5808,13 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)",

 [[package]]
 name = "virtualenv"
-version = "20.29.1"
+version = "20.29.2"
 description = "Virtual Python Environment builder"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "virtualenv-20.29.1-py3-none-any.whl", hash = "sha256:4e4cb403c0b0da39e13b46b1b2476e505cb0046b25f242bee80f62bf990b2779"},
-    {file = "virtualenv-20.29.1.tar.gz", hash = "sha256:b8b8970138d32fb606192cb97f6cd4bb644fa486be9308fb9b63f81091b5dc35"},
+    {file = "virtualenv-20.29.2-py3-none-any.whl", hash = "sha256:febddfc3d1ea571bdb1dc0f98d7b45d24def7428214d4fb73cc486c9568cce6a"},
+    {file = "virtualenv-20.29.2.tar.gz", hash = "sha256:fdaabebf6d03b5ba83ae0a02cfe96f48a716f4fae556461d180825866f75b728"},
 ]

 [package.dependencies]
@@ -6446,4 +6498,4 @@ tests = ["wikipedia"]
 [metadata]
 lock-version = "2.0"
 python-versions = "<3.14,>=3.10"
-content-hash = "36eb749e2733dad52b29f8032aa0d3808b82093cb5c2d1bdbfa706688781f746"
+content-hash = "c7fc4c28d463efcb2c555d3592a4dce11e36cd179513376ee23087b7784682e4"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "letta"
-version = "0.6.23"
+version = "0.6.24"
 packages = [
    {include = "letta"},
 ]
@@ -79,6 +79,7 @@ e2b-code-interpreter = {version = "^1.0.3", optional = true}
 anthropic = "^0.43.0"
 letta_client = "^0.1.23"
 openai = "^1.60.0"
+faker = "^36.1.0"
 colorama = "^0.4.6"


--- a/tests/configs/llm_model_configs/claude-3-5-sonnet.json
+++ b/tests/configs/llm_model_configs/claude-3-5-sonnet.json
@@ -0,0 +1,8 @@
+{
+  "model": "claude-3-5-sonnet-20241022",
+  "model_endpoint_type": "anthropic",
+  "model_endpoint": "https://api.anthropic.com/v1",
+  "model_wrapper": null,
+  "context_window": 200000,
+  "put_inner_thoughts_in_kwargs": true
+}
--- a/tests/configs/llm_model_configs/claude-3-sonnet-20240229.json
+++ b/tests/configs/llm_model_configs/claude-3-sonnet-20240229.json
@@ -1,9 +0,0 @@
-{
-    "context_window": 200000,
-    "model": "claude-3-5-sonnet-20241022",
-    "model_endpoint_type": "anthropic",
-    "model_endpoint": "https://api.anthropic.com/v1",
-    "context_window": 200000,
-    "model_wrapper": null,
-    "put_inner_thoughts_in_kwargs": true
-}
--- a/tests/integration_test_chat_completions.py
+++ b/tests/integration_test_chat_completions.py
@@ -5,101 +5,139 @@ import uuid

 import pytest
 from dotenv import load_dotenv
+from openai import AsyncOpenAI
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk

-from letta import RESTClient, create_client
+from letta import create_client
 from letta.client.streaming import _sse_post
-from letta.schemas.agent import AgentState
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import MessageStreamStatus
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, UserMessage
 from letta.schemas.usage import LettaUsageStatistics

+# --- Server Management --- #

-def run_server():
+
+def _run_server():
+    """Starts the Letta server in a background thread."""
    load_dotenv()
-
-    # _reset_config()
-
    from letta.server.rest_api.app import start_server

-    print("Starting server...")
    start_server(debug=True)


-@pytest.fixture(
-    scope="module",
-)
-def client():
-    # get URL from enviornment
-    server_url = os.getenv("LETTA_SERVER_URL")
-    if server_url is None:
-        # run server in thread
-        server_url = "http://localhost:8283"
-        print("Starting server thread")
-        thread = threading.Thread(target=run_server, daemon=True)
+@pytest.fixture(scope="session")
+def server_url():
+    """Ensures a server is running and returns its base URL."""
+    url = os.getenv("LETTA_SERVER_URL", "http://localhost:8283")
+
+    if not os.getenv("LETTA_SERVER_URL"):
+        thread = threading.Thread(target=_run_server, daemon=True)
        thread.start()
-        time.sleep(5)
-    print("Running client tests with server:", server_url)
-    # create user via admin client
-    client = create_client(base_url=server_url, token=None)  # This yields control back to the test function
+        time.sleep(5)  # Allow server startup time
+
+    return url
+
+
+# --- Client Setup --- #
+
+
+@pytest.fixture(scope="session")
+def client(server_url):
+    """Creates a REST client for testing."""
+    client = create_client(base_url=server_url, token=None)
    client.set_default_llm_config(LLMConfig.default_config("gpt-4o-mini"))
    client.set_default_embedding_config(EmbeddingConfig.default_config(provider="openai"))
    yield client


-# Fixture for test agent
-@pytest.fixture(scope="module")
-def agent_state(client: RESTClient):
-    agent_state = client.create_agent(name=f"test_client_{str(uuid.uuid4())}")
-    yield agent_state
+@pytest.fixture(scope="function")
+def roll_dice_tool(client):
+    def roll_dice():
+        """
+        Rolls a 6 sided die.

-    # delete agent
+        Returns:
+            str: The roll result.
+        """
+        return "Rolled a 10!"
+
+    tool = client.create_or_update_tool(func=roll_dice)
+    # Yield the created tool
+    yield tool
+
+
+@pytest.fixture(scope="function")
+def agent(client, roll_dice_tool):
+    """Creates an agent and ensures cleanup after tests."""
+    agent_state = client.create_agent(name=f"test_client_{uuid.uuid4()}", tool_ids=[roll_dice_tool.id])
+    yield agent_state
    client.delete_agent(agent_state.id)


-def test_voice_streaming(mock_e2b_api_key_none, client: RESTClient, agent_state: AgentState):
-    """
-    Test voice streaming for chat completions using the streaming API.
+# --- Helper Functions --- #

-    This test ensures the SSE (Server-Sent Events) response from the voice streaming endpoint
-    adheres to the expected structure and contains valid data for each type of chunk.
-    """

-    # Prepare the chat completion request with streaming enabled
-    request = ChatCompletionRequest(
+def _get_chat_request(agent_id, message, stream=True):
+    """Returns a chat completion request with streaming enabled."""
+    return ChatCompletionRequest(
        model="gpt-4o-mini",
-        messages=[UserMessage(content="Tell me something interesting about bananas.")],
-        user=agent_state.id,
-        stream=True,
+        messages=[UserMessage(content=message)],
+        user=agent_id,
+        stream=stream,
    )

-    # Perform a POST request to the voice/chat/completions endpoint and collect the streaming response
+
+def _assert_valid_chunk(chunk, idx, chunks):
+    """Validates the structure of each streaming chunk."""
+    if isinstance(chunk, ChatCompletionChunk):
+        assert chunk.choices, "Each ChatCompletionChunk should have at least one choice."
+
+    elif isinstance(chunk, LettaUsageStatistics):
+        assert chunk.completion_tokens > 0, "Completion tokens must be > 0."
+        assert chunk.prompt_tokens > 0, "Prompt tokens must be > 0."
+        assert chunk.total_tokens > 0, "Total tokens must be > 0."
+        assert chunk.step_count == 1, "Step count must be 1."
+
+    elif isinstance(chunk, MessageStreamStatus):
+        assert chunk == MessageStreamStatus.done, "Stream should end with 'done' status."
+        assert idx == len(chunks) - 1, "The last chunk must be 'done'."
+
+    else:
+        pytest.fail(f"Unexpected chunk type: {chunk}")
+
+
+# --- Test Cases --- #
+
+
+@pytest.mark.parametrize("message", ["Tell me something interesting about bananas."])
+def test_chat_completions_streaming(mock_e2b_api_key_none, client, agent, message):
+    """Tests chat completion streaming via SSE."""
+    request = _get_chat_request(agent.id, message)
+
    response = _sse_post(
        f"{client.base_url}/openai/{client.api_prefix}/chat/completions", request.model_dump(exclude_none=True), client.headers
    )

-    # Convert the streaming response into a list of chunks for processing
    chunks = list(response)
-
    for idx, chunk in enumerate(chunks):
-        if isinstance(chunk, ChatCompletionChunk):
-            # Assert that the chunk has at least one choice (a response from the model)
-            assert len(chunk.choices) > 0, "Each ChatCompletionChunk should have at least one choice."
+        _assert_valid_chunk(chunk, idx, chunks)

-        elif isinstance(chunk, LettaUsageStatistics):
-            # Assert that the usage statistics contain valid token counts
-            assert chunk.completion_tokens > 0, "Completion tokens should be greater than 0 in LettaUsageStatistics."
-            assert chunk.prompt_tokens > 0, "Prompt tokens should be greater than 0 in LettaUsageStatistics."
-            assert chunk.total_tokens > 0, "Total tokens should be greater than 0 in LettaUsageStatistics."
-            assert chunk.step_count == 1, "Step count in LettaUsageStatistics should always be 1 for a single request."

-        elif isinstance(chunk, MessageStreamStatus):
-            # Assert that the stream ends with a 'done' status
-            assert chunk == MessageStreamStatus.done, "The last chunk should indicate the stream has completed."
-            assert idx == len(chunks) - 1, "The 'done' status must be the last chunk in the stream."
+@pytest.mark.asyncio
+@pytest.mark.parametrize("message", ["Tell me something interesting about bananas.", "Roll a dice!"])
+async def test_chat_completions_streaming_async(client, agent, message):
+    """Tests chat completion streaming using the Async OpenAI client."""
+    request = _get_chat_request(agent.id, message)

-        else:
-            # Fail the test if an unexpected chunk type is encountered
-            pytest.fail(f"Unexpected chunk type: {chunk}", pytrace=True)
+    async_client = AsyncOpenAI(base_url=f"{client.base_url}/openai/{client.api_prefix}", max_retries=0)
+    stream = await async_client.chat.completions.create(**request.model_dump(exclude_none=True))
+
+    async with stream:
+        async for chunk in stream:
+            if isinstance(chunk, ChatCompletionChunk):
+                assert chunk.choices, "Each ChatCompletionChunk should have at least one choice."
+                assert chunk.choices[0].delta.content, f"Chunk at index 0 has no content: {chunk.model_dump_json(indent=4)}"
+            else:
+                pytest.fail(f"Unexpected chunk type: {chunk}")
--- a/tests/manual_test_many_messages.py
+++ b/tests/manual_test_many_messages.py
@@ -0,0 +1,196 @@
+import datetime
+import json
+import math
+import os
+import random
+import uuid
+
+import pytest
+from faker import Faker
+from tqdm import tqdm
+
+from letta import create_client
+from letta.orm import Base
+from letta.schemas.embedding_config import EmbeddingConfig
+from letta.schemas.llm_config import LLMConfig
+from letta.schemas.message import Message
+from letta.services.agent_manager import AgentManager
+from letta.services.message_manager import MessageManager
+from tests.integration_test_summarizer import LLM_CONFIG_DIR
+
+
+@pytest.fixture(autouse=True)
+def truncate_database():
+    from letta.server.server import db_context
+
+    with db_context() as session:
+        for table in reversed(Base.metadata.sorted_tables):  # Reverse to avoid FK issues
+            session.execute(table.delete())  # Truncate table
+        session.commit()
+
+
+@pytest.fixture(scope="function")
+def client():
+    filename = os.path.join(LLM_CONFIG_DIR, "claude-3-5-sonnet.json")
+    config_data = json.load(open(filename, "r"))
+    llm_config = LLMConfig(**config_data)
+    client = create_client()
+    client.set_default_llm_config(llm_config)
+    client.set_default_embedding_config(EmbeddingConfig.default_config(provider="openai"))
+
+    yield client
+
+
+def generate_tool_call_id():
+    """Generates a unique tool call ID."""
+    return "toolu_" + uuid.uuid4().hex[:24]
+
+
+def generate_timestamps(base_time):
+    """Creates a sequence of timestamps for user, assistant, and tool messages."""
+    user_time = base_time
+    send_time = user_time + datetime.timedelta(seconds=random.randint(2, 5))
+    tool_time = send_time + datetime.timedelta(seconds=random.randint(1, 3))
+    next_group_time = tool_time + datetime.timedelta(seconds=random.randint(5, 10))
+
+    return user_time, send_time, tool_time, next_group_time
+
+
+def get_conversation_pair():
+    fake = Faker()
+    return f"Where does {fake.name()} live?", f"{fake.address()}"
+
+
+def create_user_message(agent_id, organization_id, message_text, timestamp):
+    """Creates a user message dictionary."""
+    return {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": json.dumps(
+                    {"type": "user_message", "message": message_text, "time": timestamp.strftime("%Y-%m-%d %I:%M:%S %p PST-0800")}, indent=2
+                ),
+            }
+        ],
+        "organization_id": organization_id,
+        "agent_id": agent_id,
+        "model": None,
+        "name": None,
+        "tool_calls": None,
+        "tool_call_id": None,
+    }
+
+
+def create_send_message(agent_id, organization_id, assistant_text, tool_call_id, timestamp):
+    """Creates an assistant message dictionary."""
+    return {
+        "role": "assistant",
+        "content": [{"type": "text", "text": f"Assistant reply generated at {timestamp.strftime('%Y-%m-%d %I:%M:%S %p PST-0800')}."}],
+        "organization_id": organization_id,
+        "agent_id": agent_id,
+        "model": "claude-3-5-haiku-20241022",
+        "name": None,
+        "tool_calls": [
+            {
+                "id": tool_call_id,
+                "function": {
+                    "name": "send_message",
+                    "arguments": json.dumps(
+                        {"message": assistant_text, "time": timestamp.strftime("%Y-%m-%d %I:%M:%S %p PST-0800")}, indent=2
+                    ),
+                },
+                "type": "function",
+            }
+        ],
+        "tool_call_id": None,
+    }
+
+
+def create_tool_message(agent_id, organization_id, tool_call_id, timestamp):
+    """Creates a tool response message dictionary."""
+    return {
+        "role": "tool",
+        "content": [
+            {
+                "type": "text",
+                "text": json.dumps(
+                    {"status": "OK", "message": "None", "time": timestamp.strftime("%Y-%m-%d %I:%M:%S %p PST-0800")}, indent=2
+                ),
+            }
+        ],
+        "organization_id": organization_id,
+        "agent_id": agent_id,
+        "model": "claude-3-5-haiku-20241022",
+        "name": "send_message",
+        "tool_calls": None,
+        "tool_call_id": tool_call_id,
+    }
+
+
+@pytest.mark.parametrize("num_messages", [1000])
+def test_many_messages_performance(client, num_messages):
+    """Main test function to generate messages and insert them into the database."""
+    message_manager = MessageManager()
+    agent_manager = AgentManager()
+    actor = client.user
+
+    start_time = datetime.datetime.now()
+    last_event_time = start_time  # Track last event time
+
+    def log_event(event):
+        nonlocal last_event_time
+        now = datetime.datetime.now()
+        total_elapsed = (now - start_time).total_seconds()
+        step_elapsed = (now - last_event_time).total_seconds()
+        print(f"[+{total_elapsed:.3f}s | Δ{step_elapsed:.3f}s] {event}")
+        last_event_time = now  # Update last event time
+
+    log_event(f"Starting test with {num_messages} messages")
+
+    agent_state = client.create_agent(name="manager")
+    log_event(f"Created agent with ID {agent_state.id}")
+
+    message_group_size = 3
+    num_groups = math.ceil((num_messages - 4) / message_group_size)
+    base_time = datetime.datetime(2025, 2, 10, 16, 3, 22)
+    current_time = base_time
+    organization_id = "org-00000000-0000-4000-8000-000000000000"
+
+    all_messages = []
+
+    for _ in tqdm(range(num_groups)):
+        user_text, assistant_text = get_conversation_pair()
+        tool_call_id = generate_tool_call_id()
+        user_time, send_time, tool_time, current_time = generate_timestamps(current_time)
+        new_messages = [
+            Message(**create_user_message(agent_state.id, organization_id, user_text, user_time)),
+            Message(**create_send_message(agent_state.id, organization_id, assistant_text, tool_call_id, send_time)),
+            Message(**create_tool_message(agent_state.id, organization_id, tool_call_id, tool_time)),
+        ]
+        all_messages.extend(new_messages)
+
+    log_event(f"Finished generating {len(all_messages)} messages")
+
+    message_manager.create_many_messages(all_messages, actor=actor)
+    log_event("Inserted messages into the database")
+
+    agent_manager._set_in_context_messages(
+        agent_id=agent_state.id, message_ids=agent_state.message_ids + [m.id for m in all_messages], actor=client.user
+    )
+    log_event("Updated agent context with messages")
+
+    messages = message_manager.list_messages_for_agent(agent_id=agent_state.id, actor=client.user, limit=1000000000)
+    log_event(f"Retrieved {len(messages)} messages from the database")
+
+    assert len(messages) >= num_groups * message_group_size
+
+    response = client.send_message(
+        agent_id=agent_state.id,
+        role="user",
+        message="What have we been talking about?",
+    )
+    log_event("Sent message to agent and received response")
+
+    assert response
+    log_event("Test completed successfully")
--- a/tests/manual_test_multi_agent_broadcast_large.py
+++ b/tests/manual_test_multi_agent_broadcast_large.py
@@ -54,7 +54,8 @@ def roll_dice_tool(client):
    yield tool


-def test_multi_agent_large(client, roll_dice_tool):
+@pytest.mark.parametrize("num_workers", [50])
+def test_multi_agent_large(client, roll_dice_tool, num_workers):
    manager_tags = ["manager"]
    worker_tags = ["helpers"]

@@ -72,7 +73,6 @@ def test_multi_agent_large(client, roll_dice_tool):

    # Create 3 worker agents
    worker_agents = []
-    num_workers = 50
    for idx in tqdm(range(num_workers)):
        worker_agent_state = client.create_agent(
            name=f"worker-{idx}", include_multi_agent_tools=False, tags=worker_tags, tool_ids=[roll_dice_tool.id]
--- a/tests/test_google_embeddings.py
+++ b/tests/test_google_embeddings.py
@@ -1,21 +1,18 @@
-import pytest
 import httpx
+import pytest
+from dotenv import load_dotenv

 from letta.embeddings import GoogleEmbeddings  # Adjust the import based on your module structure
-from dotenv import load_dotenv

 load_dotenv()
 import os
-
-import pytest
-
+import threading
 import time
-import uuid
+
 import pytest
 from letta_client import CreateBlock
 from letta_client import Letta as LettaSDKClient
 from letta_client import MessageCreate
-import threading

 SERVER_PORT = 8283

@@ -93,7 +90,6 @@ def test_archival_insert_text_embedding_004(client: LettaSDKClient):
    )
    print(res.messages)

-
    # Retrieve the archival messages through the agent messaging API.
    archived_messages = client.agents.messages.create(
        agent_id=agent.id,
@@ -102,8 +98,8 @@ def test_archival_insert_text_embedding_004(client: LettaSDKClient):

    print(archived_messages.messages)
    # Assert that the archival message is present.
-    assert (
-        any(message.status == "success" for message in archived_messages.messages if message.message_type == "tool_return_message")
+    assert any(
+        message.status == "success" for message in archived_messages.messages if message.message_type == "tool_return_message"
    ), f"Archival message '{archival_message}' not found. Archived messages: {archived_messages}"

    # Cleanup: Delete the agent.
@@ -141,7 +137,6 @@ def test_archival_insert_embedding_001(client: LettaSDKClient):
        messages=[MessageCreate(role="user", content=f"archive : {archival_message}")],
    )

-
    # Retrieve the archival messages through the agent messaging API.
    archived_messages = client.agents.messages.create(
        agent_id=agent.id,
@@ -149,8 +144,8 @@ def test_archival_insert_embedding_001(client: LettaSDKClient):
    )

    # Assert that the archival message is present.
-    assert(
-        any(message.status == "success" for message in archived_messages.messages if message.message_type == "tool_return_message")
+    assert any(
+        message.status == "success" for message in archived_messages.messages if message.message_type == "tool_return_message"
    ), f"Archival message '{archival_message}' not found. Archived messages: {archived_messages}"

    # Cleanup: Delete the agent.