chore: update version 0.8.5

2025-06-19 10:43:46 -07:00
parent 951ea59b30 08ef32ace2
commit 1ebb58cdf1
30 changed files with 313 additions and 7878 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -11,20 +11,25 @@ assignees: ''
 A clear and concise description of what the bug is.

 **Please describe your setup**
- [ ] How did you install letta?
-  - `pip install letta`? `pip install letta-nightly`? `git clone`?
+- [ ] How are you running Letta?  
+  - Docker 
+  - pip (legacy) 
+  - From source 
+  - Desktop  
 - [ ] Describe your setup
  - What's your OS (Windows/MacOS/Linux)?
-  - How are you running `letta`? (`cmd.exe`/Powershell/Anaconda Shell/Terminal)
+  - What is your `docker run ...` command (if applicable)

 **Screenshots**
 If applicable, add screenshots to help explain your problem.

 **Additional context**
 Add any other context about the problem here.
+- What model you are using 
+
+**Agent File (optional)**
+Please attach your `.af` file, as this helps with reproducing issues. 

-**Letta Config**
-Please attach your `~/.letta/config` file or copy paste it below.

 ---

--- a/.github/workflows/notify-letta-cloud.yml
+++ b/.github/workflows/notify-letta-cloud.yml
@@ -1,19 +0,0 @@
-name: Notify Letta Cloud
-
-on:
-  push:
-    branches:
-      - main
-
-jobs:
-  notify:
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.head_commit.message, '[sync-skip]') }}
-    steps:
-      - name: Trigger repository_dispatch
-        run: |
-          curl -X POST \
-            -H "Authorization: token ${{ secrets.SYNC_PAT }}" \
-            -H "Accept: application/vnd.github.v3+json" \
-            https://api.github.com/repos/letta-ai/letta-cloud/dispatches \
-            -d '{"event_type":"oss-update"}'
--- a/.github/workflows/send-message-integration-tests.yaml
+++ b/.github/workflows/send-message-integration-tests.yaml
@@ -0,0 +1,155 @@
+name: Send Message SDK Tests
+on:
+  pull_request_target:
+    # branches: [main] # TODO: uncomment before merge
+    types: [labeled]
+    paths:
+      - 'letta/**'
+
+jobs:
+  send-messages:
+    # Only run when the "safe to test" label is applied
+    if: contains(github.event.pull_request.labels.*.name, 'safe to test')
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    strategy:
+      fail-fast: false
+      matrix:
+        config_file:
+          - "openai-gpt-4o-mini.json"
+          - "azure-gpt-4o-mini.json"
+          - "claude-3-5-sonnet.json"
+          - "claude-3-7-sonnet.json"
+          - "claude-3-7-sonnet-extended.json"
+          - "gemini-pro.json"
+          - "gemini-vertex.json"
+    services:
+      qdrant:
+        image: qdrant/qdrant
+        ports:
+          - 6333:6333
+      postgres:
+        image: pgvector/pgvector:pg17
+        ports:
+          - 5432:5432
+        env:
+          POSTGRES_HOST_AUTH_METHOD: trust
+          POSTGRES_DB: postgres
+          POSTGRES_USER: postgres
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+    
+    steps:
+      # Ensure secrets don't leak
+      - name: Configure git to hide secrets
+        run: |
+          git config --global core.logAllRefUpdates false
+          git config --global log.hideCredentials true
+      - name: Set up secret masking
+        run: |
+          # Automatically mask any environment variable ending with _KEY
+          for var in $(env | grep '_KEY=' | cut -d= -f1); do
+            value="${!var}"
+            if [[ -n "$value" ]]; then
+              # Mask the full value
+              echo "::add-mask::$value"
+              
+              # Also mask partial values (first and last several characters)
+              # This helps when only parts of keys appear in logs
+              if [[ ${#value} -gt 8 ]]; then
+                echo "::add-mask::${value:0:8}"
+                echo "::add-mask::${value:(-8)}"
+              fi
+              
+              # Also mask with common formatting changes
+              # Some logs might add quotes or other characters
+              echo "::add-mask::\"$value\""
+              echo "::add-mask::$value\""
+              echo "::add-mask::\"$value"
+              
+              echo "Masked secret: $var (length: ${#value})"
+            fi
+          done
+
+      # Check out base repository code, not the PR's code (for security)
+      - name: Checkout base repository
+        uses: actions/checkout@v4 # No ref specified means it uses base branch
+      
+      # Only extract relevant files from the PR (for security, specifically prevent modification of workflow files)
+      - name: Extract PR schema files
+        run: |
+          # Fetch PR without checking it out
+          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-${{ github.event.pull_request.number }}
+          
+          # Extract ONLY the schema files
+          git checkout pr-${{ github.event.pull_request.number }} -- letta/
+      - name: Set up python 3.12
+        id: setup-python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+      - name: Load cached Poetry Binary
+        id: cached-poetry-binary
+        uses: actions/cache@v4
+        with:
+          path: ~/.local
+          key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-1.8.3
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+        with:
+          version: 1.8.3
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+      - name: Load cached venv
+        id: cached-poetry-dependencies
+        uses: actions/cache@v4
+        with:
+          path: .venv
+          key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}${{ inputs.install-args || '-E dev -E postgres -E external-tools -E tests -E cloud-tool-sandbox' }}
+          # Restore cache with this prefix if not exact match with key
+          # Note cache-hit returns false in this case, so the below step will run
+          restore-keys: |
+            venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-
+      - name: Install dependencies
+        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+        shell: bash
+        run: poetry install --no-interaction --no-root ${{ inputs.install-args || '-E dev -E postgres -E external-tools -E tests -E cloud-tool-sandbox -E google' }}
+      - name: Install letta packages via Poetry
+        run: |
+          poetry run pip install --upgrade letta-client letta
+      - name: Migrate database
+        env:
+          LETTA_PG_PORT: 5432
+          LETTA_PG_USER: postgres
+          LETTA_PG_PASSWORD: postgres
+          LETTA_PG_DB: postgres
+          LETTA_PG_HOST: localhost
+        run: |
+          psql -h localhost -U postgres -d postgres -c 'CREATE EXTENSION vector'
+          poetry run alembic upgrade head
+      - name: Run integration tests for ${{ matrix.config_file }}
+        env:
+          LLM_CONFIG_FILE: ${{ matrix.config_file }}
+          LETTA_PG_PORT: 5432
+          LETTA_PG_USER: postgres
+          LETTA_PG_PASSWORD: postgres
+          LETTA_PG_DB: postgres
+          LETTA_PG_HOST: localhost
+          LETTA_SERVER_PASS: test_server_token
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+          AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          COMPOSIO_API_KEY: ${{ secrets.COMPOSIO_API_KEY }}
+          DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
+          GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
+          GOOGLE_CLOUD_LOCATION: ${{ secrets.GOOGLE_CLOUD_LOCATION }}
+        run: |
+          poetry run pytest \
+            -s -vv \
+            tests/integration_test_send_message.py \
+            --maxfail=1 --durations=10
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -28,7 +28,7 @@ First, install Poetry using [the official instructions here](https://python-poet
 Once Poetry is installed, navigate to the letta directory and install the Letta project with Poetry:
 ```shell
 cd letta
-poetry shell
+eval $(poetry env activate)
 poetry install --all-extras
 ```
 #### Setup PostgreSQL environment (optional)
--- a/README.md
+++ b/README.md
@@ -8,26 +8,13 @@

 <div align="center">
 <h1>Letta (previously MemGPT)</h1>
-
-**☄️ New release: Letta Agent Development Environment (_read more [here](#-access-the-ade-agent-development-environment)_) ☄️**
-
-<p align="center">
-  <picture>
-    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/letta-ai/letta/refs/heads/main/assets/example_ade_screenshot.png">
-    <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/letta-ai/letta/refs/heads/main/assets/example_ade_screenshot_light.png">
-    <img alt="Letta logo" src="https://raw.githubusercontent.com/letta-ai/letta/refs/heads/main/assets/example_ade_screenshot.png" width="800">
-  </picture>
-</p>
-
---
-
 <h3>

 [Homepage](https://letta.com) // [Documentation](https://docs.letta.com) // [ADE](https://docs.letta.com/agent-development-environment) // [Letta Cloud](https://forms.letta.com/early-access)

 </h3>

-**👾 Letta** is an open source framework for building stateful LLM applications. You can use Letta to build **stateful agents** with advanced reasoning capabilities and transparent long-term memory. The Letta framework is white box and model-agnostic.
+**👾 Letta** is an open source framework for building **stateful agents** with advanced reasoning capabilities and transparent long-term memory. The Letta framework is white box and model-agnostic.

 [![Discord](https://img.shields.io/discord/1161736243340640419?label=Discord&logo=discord&logoColor=5865F2&style=flat-square&color=5865F2)](https://discord.gg/letta)
 [![Twitter Follow](https://img.shields.io/badge/Follow-%40Letta__AI-1DA1F2?style=flat-square&logo=x&logoColor=white)](https://twitter.com/Letta_AI)
@@ -157,7 +144,7 @@ No, the data in your Letta server database stays on your machine. The Letta ADE

 > _"Do I have to use your ADE? Can I build my own?"_

-The ADE is built on top of the (fully open source) Letta server and Letta Agents API. You can build your own application like the ADE on top of the REST API (view the documention [here](https://docs.letta.com/api-reference)).
+The ADE is built on top of the (fully open source) Letta server and Letta Agents API. You can build your own application like the ADE on top of the REST API (view the documentation [here](https://docs.letta.com/api-reference)).

 > _"Can I interact with Letta agents via the CLI?"_

--- a/dev-compose.yaml
+++ b/dev-compose.yaml
@@ -28,7 +28,6 @@ services:
      - "8083:8083"
      - "8283:8283"
    environment:
-      - SERPAPI_API_KEY=${SERPAPI_API_KEY}
      - LETTA_PG_DB=${LETTA_PG_DB:-letta}
      - LETTA_PG_USER=${LETTA_PG_USER:-letta}
      - LETTA_PG_PASSWORD=${LETTA_PG_PASSWORD:-letta}
--- a/examples/docs/example.py
+++ b/examples/docs/example.py
@@ -8,6 +8,7 @@ If you're using Letta Cloud, replace 'baseURL' with 'token'
 See: https://docs.letta.com/api-reference/overview

 Execute this script using `poetry run python3 example.py`
+This will install `letta_client` and other dependencies.
 """
 client = Letta(
    base_url="http://localhost:8283",
--- a/examples/mcp_example.py
+++ b/examples/mcp_example.py
@@ -2,22 +2,33 @@ from pprint import pprint

 from letta_client import Letta

+# Connect to Letta server
 client = Letta(base_url="http://localhost:8283")

+# Use the "everything" mcp server:
+# https://github.com/modelcontextprotocol/servers/tree/main/src/everything
 mcp_server_name = "everything"
 mcp_tool_name = "echo"

+# List all McpTool belonging to the "everything" mcp server.
 mcp_tools = client.tools.list_mcp_tools_by_server(
    mcp_server_name=mcp_server_name,
 )
+
+# We can see that "echo" is one of the tools, but it's not
+# a letta tool that can be added to a client (it has no tool id).
 for tool in mcp_tools:
    pprint(tool)

+# Create a Tool (with a tool id) using the server and tool names.
 mcp_tool = client.tools.add_mcp_tool(
    mcp_server_name=mcp_server_name,
    mcp_tool_name=mcp_tool_name
 )

+# Create an agent with the tool, using tool.id -- note that
+# this is the ONLY tool in the agent, you typically want to
+# also include the default tools.
 agent = client.agents.create(
    memory_blocks=[
        {
@@ -31,6 +42,7 @@ agent = client.agents.create(
 )
 print(f"Created agent id {agent.id}")

+# Ask the agent to call the tool.
 response = client.agents.messages.create(
    agent_id=agent.id,
    messages=[
--- a/examples/notebooks/Customizing
+++ b/examples/notebooks/Customizing
@@ -253,15 +253,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
   "id": "7808912f-831b-4cdc-8606-40052eb809b4",
   "metadata": {},
   "outputs": [],
   "source": [
-    "from typing import Optional, List\n",
+    "from typing import Optional, List, TYPE_CHECKING\n",
    "import json\n",
    "\n",
-    "def task_queue_push(self: \"Agent\", task_description: str):\n",
+    "if TYPE_CHECKING:\n",
+    "    from letta import AgentState\n",
+    "\n",
+    "def task_queue_push(agent_state: \"AgentState\", task_description: str):\n",
    "    \"\"\"\n",
    "    Push to a task queue stored in core memory. \n",
    "\n",
@@ -273,12 +276,12 @@
    "        does not produce a response.\n",
    "    \"\"\"\n",
    "    import json\n",
-    "    tasks = json.loads(self.memory.get_block(\"tasks\").value)\n",
+    "    tasks = json.loads(agent_state.memory.get_block(\"tasks\").value)\n",
    "    tasks.append(task_description)\n",
-    "    self.memory.update_block_value(\"tasks\", json.dumps(tasks))\n",
+    "    agent_state.memory.update_block_value(\"tasks\", json.dumps(tasks))\n",
    "    return None\n",
    "\n",
-    "def task_queue_pop(self: \"Agent\"):\n",
+    "def task_queue_pop(agent_state: \"AgentState\"):\n",
    "    \"\"\"\n",
    "    Get the next task from the task queue \n",
    "\n",
@@ -288,12 +291,12 @@
    "        None (the task queue is empty)\n",
    "    \"\"\"\n",
    "    import json\n",
-    "    tasks = json.loads(self.memory.get_block(\"tasks\").value)\n",
+    "    tasks = json.loads(agent_state.memory.get_block(\"tasks\").value)\n",
    "    if len(tasks) == 0: \n",
    "        return None\n",
    "    task = tasks[0]\n",
    "    print(\"CURRENT TASKS: \", tasks)\n",
-    "    self.memory.update_block_value(\"tasks\", json.dumps(tasks[1:]))\n",
+    "    agent_state.memory.update_block_value(\"tasks\", json.dumps(tasks[1:]))\n",
    "    return task\n",
    "\n",
    "push_task_tool = client.tools.upsert_from_function(func=task_queue_push)\n",
@@ -310,7 +313,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
   "id": "135fcf3e-59c4-4da3-b86b-dbffb21aa343",
   "metadata": {},
   "outputs": [],
@@ -336,10 +339,12 @@
    "        ),\n",
    "        CreateBlock(\n",
    "            label=\"tasks\",\n",
-    "            value=\"\",\n",
+    "            value=\"[]\",\n",
    "        ),\n",
    "    ],\n",
    "    tool_ids=[push_task_tool.id, pop_task_tool.id],\n",
+    "    model=\"letta/letta-free\",\n",
+    "    embedding=\"letta/letta-free\",\n",
    ")"
   ]
  },
--- a/letta/init.py
+++ b/letta/init.py
@@ -1,6 +1,6 @@
 import os

-__version__ = "0.7.14"
+__version__ = "0.8.5"

 if os.environ.get("LETTA_VERSION"):
    __version__ = os.environ["LETTA_VERSION"]
@@ -9,7 +9,7 @@ if os.environ.get("LETTA_VERSION"):
 # import clients
 from letta.client.client import RESTClient

-# # imports for easier access
+# imports for easier access
 from letta.schemas.agent import AgentState
 from letta.schemas.block import Block
 from letta.schemas.embedding_config import EmbeddingConfig
--- a/letta/main.py
+++ b/letta/main.py
@@ -1,3 +0,0 @@
-from .main import app
-
-app()
--- a/letta/embeddings.py
+++ b/letta/embeddings.py
@@ -235,7 +235,9 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None

    if endpoint_type == "openai":
        return OpenAIEmbeddings(
-            api_key=model_settings.openai_api_key, model=config.embedding_model, base_url=model_settings.openai_api_base
+            api_key=model_settings.openai_api_key,
+            model=config.embedding_model,
+            base_url=model_settings.openai_api_base,
        )

    elif endpoint_type == "azure":
--- a/letta/functions/function_sets/base.py
+++ b/letta/functions/function_sets/base.py
@@ -46,7 +46,7 @@ def conversation_search(self: "Agent", query: str, page: Optional[int] = 0) -> O
    count = RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
    # TODO: add paging by page number. currently cursor only works with strings.
    # original: start=page * count
-    messages = self.message_manager.list_user_messages_for_agent(
+    messages = self.message_manager.list_messages_for_agent(
        agent_id=self.agent_state.id,
        actor=self.user,
        query_text=query,
--- a/letta/llm_api/anthropic.py
+++ b/letta/llm_api/anthropic.py
@@ -55,6 +55,18 @@ BASE_URL = "https://api.anthropic.com/v1"
 # https://docs.anthropic.com/claude/docs/models-overview
 # Sadly hardcoded
 MODEL_LIST = [
+    {
+        "name": "claude-opus-4-20250514",
+        "context_window": 200000,
+    },
+    {
+        "name": "claude-sonnet-4-20250514",
+        "context_window": 200000,
+    },
+    {
+        "name": "claude-3-5-haiku-20241022",
+        "context_window": 200000,
+    },
    ## Opus
    {
        "name": "claude-3-opus-20240229",
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -243,7 +243,8 @@ class AnthropicClient(LLMClientBase):
        # Move 'system' to the top level
        if messages[0].role != "system":
            raise RuntimeError(f"First message is not a system message, instead has role {messages[0].role}")
-        data["system"] = messages[0].content if isinstance(messages[0].content, str) else messages[0].content[0].text
+        system_content = messages[0].content if isinstance(messages[0].content, str) else messages[0].content[0].text
+        data["system"] = self._add_cache_control_to_system_message(system_content)
        data["messages"] = [
            m.to_anthropic_dict(
                inner_thoughts_xml_tag=inner_thoughts_xml_tag,
@@ -492,6 +493,22 @@ class AnthropicClient(LLMClientBase):

        return chat_completion_response

+    def _add_cache_control_to_system_message(self, system_content):
+        """Add cache control to system message content"""
+        if isinstance(system_content, str):
+            # For string content, convert to list format with cache control
+            return [{"type": "text", "text": system_content, "cache_control": {"type": "ephemeral"}}]
+        elif isinstance(system_content, list):
+            # For list content, add cache control to the last text block
+            cached_content = system_content.copy()
+            for i in range(len(cached_content) - 1, -1, -1):
+                if cached_content[i].get("type") == "text":
+                    cached_content[i]["cache_control"] = {"type": "ephemeral"}
+                    break
+            return cached_content
+
+        return system_content
+

 def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
    """See: https://docs.anthropic.com/claude/docs/tool-use
--- a/letta/llm_api/aws_bedrock.py
+++ b/letta/llm_api/aws_bedrock.py
@@ -3,14 +3,19 @@ from typing import Any, Dict, List, Optional

 from anthropic import AnthropicBedrock

+from letta.log import get_logger
 from letta.settings import model_settings

+logger = get_logger(__name__)
+

 def has_valid_aws_credentials() -> bool:
    """
    Check if AWS credentials are properly configured.
    """
-    valid_aws_credentials = os.getenv("AWS_ACCESS_KEY") and os.getenv("AWS_SECRET_ACCESS_KEY") and os.getenv("AWS_REGION")
+    valid_aws_credentials = (
+        os.getenv("AWS_ACCESS_KEY") is not None and os.getenv("AWS_SECRET_ACCESS_KEY") is not None and os.getenv("AWS_REGION") is not None
+    )
    return valid_aws_credentials


@@ -24,6 +29,7 @@ def get_bedrock_client(
    """
    import boto3

+    logger.debug(f"Getting Bedrock client for {model_settings.aws_region}")
    sts_client = boto3.client(
        "sts",
        aws_access_key_id=access_key or model_settings.aws_access_key,
@@ -55,12 +61,13 @@ def bedrock_get_model_list(region_name: str) -> List[dict]:
    """
    import boto3

+    logger.debug(f"Getting model list for {region_name}")
    try:
        bedrock = boto3.client("bedrock", region_name=region_name)
        response = bedrock.list_inference_profiles()
        return response["inferenceProfileSummaries"]
    except Exception as e:
-        print(f"Error getting model list: {str(e)}")
+        logger.exception(f"Error getting model list: {str(e)}", e)
        raise e


@@ -71,6 +78,7 @@ def bedrock_get_model_details(region_name: str, model_id: str) -> Dict[str, Any]
    import boto3
    from botocore.exceptions import ClientError

+    logger.debug(f"Getting model details for {model_id}")
    try:
        bedrock = boto3.client("bedrock", region_name=region_name)
        response = bedrock.get_foundation_model(modelIdentifier=model_id)
--- a/letta/orm/sqlalchemy_base.py
+++ b/letta/orm/sqlalchemy_base.py
@@ -490,16 +490,21 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
        Raises:
            NoResultFound: if the object is not found
        """
+        from letta.settings import settings
+
        identifiers = [] if identifier is None else [identifier]
        query, query_conditions = cls._read_multiple_preprocess(identifiers, actor, access, access_type, check_is_deleted, **kwargs)
        if query is None:
            raise NoResultFound(f"{cls.__name__} not found with identifier {identifier}")
-        await db_session.execute(text("SET LOCAL enable_seqscan = OFF"))
+
+        if settings.letta_pg_uri_no_default:
+            await db_session.execute(text("SET LOCAL enable_seqscan = OFF"))
        try:
            result = await db_session.execute(query)
            item = result.scalar_one_or_none()
        finally:
-            await db_session.execute(text("SET LOCAL enable_seqscan = ON"))
+            if settings.letta_pg_uri_no_default:
+                await db_session.execute(text("SET LOCAL enable_seqscan = ON"))

        if item is None:
            raise NoResultFound(f"{cls.__name__} not found with {', '.join(query_conditions if query_conditions else ['no conditions'])}")
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -75,7 +75,8 @@ class LLMConfig(BaseModel):
        description="The reasoning effort to use when generating text reasoning models",
    )
    max_reasoning_tokens: int = Field(
-        0, description="Configurable thinking budget for extended thinking, only used if enable_reasoner is True. Minimum value is 1024."
+        0,
+        description="Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.",
    )

    # FIXME hack to silence pydantic protected namespace warning
--- a/letta/schemas/providers.py
+++ b/letta/schemas/providers.py
@@ -299,7 +299,7 @@ class OpenAIProvider(Provider):

            # for openai, filter models
            if self.base_url == "https://api.openai.com/v1":
-                allowed_types = ["gpt-4", "o1", "o3"]
+                allowed_types = ["gpt-4", "o1", "o3", "o4"]
                # NOTE: o1-mini and o1-preview do not support tool calling
                # NOTE: o1-pro is only available in Responses API
                disallowed_types = ["transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro"]
--- a/letta/server/rest_api/routers/openai/chat_completions/chat_completions.py
+++ b/letta/server/rest_api/routers/openai/chat_completions/chat_completions.py
@@ -30,9 +30,7 @@ logger = get_logger(__name__)
    responses={
        200: {
            "description": "Successful response",
-            "content": {
-                "text/event-stream": {"description": "Server-Sent Events stream"},
-            },
+            "content": {"text/event-stream": {}},
        }
    },
 )
--- a/letta/server/rest_api/routers/v1/tools.py
+++ b/letta/server/rest_api/routers/v1/tools.py
@@ -101,6 +101,21 @@ async def list_tools(
        raise HTTPException(status_code=500, detail=str(e))


+@router.get("/count", response_model=int, operation_id="count_tools")
+def count_tools(
+    server: SyncServer = Depends(get_letta_server),
+    actor_id: Optional[str] = Header(None, alias="user_id"),
+):
+    """
+    Get a count of all tools available to agents belonging to the org of the user
+    """
+    try:
+        return server.tool_manager.size(actor=server.user_manager.get_user_or_default(user_id=actor_id))
+    except Exception as e:
+        print(f"Error occurred: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
@router.post("/", response_model=Tool, operation_id="create_tool")
 async def create_tool(
    request: ToolCreate = Body(...),
--- a/letta/server/rest_api/routers/v1/voice.py
+++ b/letta/server/rest_api/routers/v1/voice.py
@@ -25,9 +25,7 @@ logger = get_logger(__name__)
    responses={
        200: {
            "description": "Successful response",
-            "content": {
-                "text/event-stream": {"description": "Server-Sent Events stream"},
-            },
+            "content": {"text/event-stream": {}},
        }
    },
 )
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -2635,7 +2635,7 @@ class AgentManager:
        agent_state = await self.rebuild_system_prompt_async(agent_id=agent_id, actor=actor, force=True)
        calculator = ContextWindowCalculator()

-        if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION" or agent_state.llm_config.model_endpoint_type == "anthropic":
+        if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION" and agent_state.llm_config.model_endpoint_type == "anthropic":
            anthropic_client = LLMClient.create(provider_type=ProviderType.anthropic, actor=actor)
            model = agent_state.llm_config.model if agent_state.llm_config.model_endpoint_type == "anthropic" else None

--- a/letta/services/user_manager.py
+++ b/letta/services/user_manager.py
@@ -13,6 +13,7 @@ from letta.otel.tracing import trace_method
 from letta.schemas.user import User as PydanticUser
 from letta.schemas.user import UserUpdate
 from letta.server.db import db_registry
+from letta.settings import settings
 from letta.utils import enforce_types

 logger = get_logger(__name__)
@@ -157,13 +158,15 @@ class UserManager:
        """Fetch a user by ID asynchronously."""
        async with db_registry.async_session() as session:
            # Turn off seqscan to force use pk index
-            await session.execute(text("SET LOCAL enable_seqscan = OFF"))
+            if settings.letta_pg_uri_no_default:
+                await session.execute(text("SET LOCAL enable_seqscan = OFF"))
            try:
                stmt = select(UserModel).where(UserModel.id == actor_id)
                result = await session.execute(stmt)
                user = result.scalar_one_or_none()
            finally:
-                await session.execute(text("SET LOCAL enable_seqscan = ON"))
+                if settings.letta_pg_uri_no_default:
+                    await session.execute(text("SET LOCAL enable_seqscan = ON"))

            if not user:
                raise NoResultFound(f"User not found with id={actor_id}")
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "letta"
-version = "0.7.14"
+version = "0.8.5"
 packages = [
    {include = "letta"},
 ]
@@ -98,6 +98,7 @@ granian = {version = "^2.3.2", extras = ["uvloop", "reload"], optional = true}
 redis = {version = "^6.2.0", optional = true}
 structlog = "^25.4.0"
 certifi = "^2025.6.15"
+aiosqlite = "^0.21.0"


 [tool.poetry.extras]
@@ -114,6 +115,7 @@ google = ["google-genai"]
 desktop = ["pgvector", "pg8000", "psycopg2-binary", "psycopg2", "pyright", "websockets", "fastapi", "uvicorn", "docker", "langchain", "wikipedia", "langchain-community", "locust"]
 all = ["pgvector", "pg8000", "psycopg2-binary", "psycopg2", "pytest", "pytest-asyncio", "pexpect", "black", "pre-commit", "pyright", "pytest-order", "autoflake", "isort", "websockets", "fastapi", "uvicorn", "docker", "langchain", "wikipedia", "langchain-community", "locust", "uvloop", "granian", "redis"]

+
 [tool.poetry.group.dev.dependencies]
 black = "^24.4.2"
 ipykernel = "^6.29.5"
--- a/scripts/docker-compose.yml
+++ b/scripts/docker-compose.yml
@@ -0,0 +1,32 @@
+version: '3.7'
+services:
+  redis:
+    image: redis:alpine
+    container_name: redis
+    healthcheck:
+      test: ['CMD-SHELL', 'redis-cli ping | grep PONG']
+      interval: 1s
+      timeout: 3s
+      retries: 5
+    ports:
+      - '6379:6379'
+    volumes:
+      - ./data/redis:/data
+    command: redis-server --appendonly yes
+  postgres:
+    image: ankane/pgvector
+    container_name: postgres
+    healthcheck:
+      test: ['CMD-SHELL', 'pg_isready -U postgres']
+      interval: 1s
+      timeout: 3s
+      retries: 5
+    ports:
+      - '5432:5432'
+    environment:
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: postgres
+      POSTGRES_DB: letta
+    volumes:
+      - ./data/postgres:/var/lib/postgresql/data
+      - ./scripts/postgres-db-init/init.sql:/docker-entrypoint-initdb.d/init.sql
--- a/tests/integration_test_sleeptime_agent.py
+++ b/tests/integration_test_sleeptime_agent.py
@@ -156,6 +156,7 @@ async def test_sleeptime_group_chat(server, actor):

    # 6. Verify run status after sleep
    time.sleep(2)
+
    for run_id in run_ids:
        job = server.job_manager.get_job_by_id(job_id=run_id, actor=actor)
        assert job.status == JobStatus.running or job.status == JobStatus.completed
--- a/tests/test_base_functions.py
+++ b/tests/test_base_functions.py
@@ -151,6 +151,7 @@ def test_archival(agent_obj):
 def test_recall(server, agent_obj, default_user):
    """Test that an agent can recall messages using a keyword via conversation search."""
    keyword = "banana"
+    "".join(reversed(keyword))

    # Send messages
    for msg in ["hello", keyword, "tell me a fun fact"]:
--- a/tests/test_tool_sandbox/restaurant_management_system/adjust_menu_prices.py
+++ b/tests/test_tool_sandbox/restaurant_management_system/adjust_menu_prices.py
@@ -8,10 +8,9 @@ def adjust_menu_prices(percentage: float) -> str:
        str: A formatted string summarizing the price adjustments.
    """
    import cowsay
-    from tqdm import tqdm
-
    from core.menu import Menu, MenuItem  # Import a class from the codebase
    from core.utils import format_currency  # Use a utility function to test imports
+    from tqdm import tqdm

    if not isinstance(percentage, (int, float)):
        raise TypeError("percentage must be a number")