feat: Add built in code interpreter tool (#2252)

2025-05-20 07:01:40 +08:00
parent 0a54b998a8
commit 6e5ab8b151
13 changed files with 337 additions and 528 deletions
--- a/letta/agent.py
+++ b/letta/agent.py
@@ -18,6 +18,7 @@ from letta.constants import (
    LETTA_CORE_TOOL_MODULE_NAME,
    LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
    LLM_MAX_TOKENS,
+    READ_ONLY_BLOCK_EDIT_ERROR,
    REQ_HEARTBEAT_MESSAGE,
    SEND_MESSAGE_TOOL_NAME,
 )
--- a/letta/agents/letta_agent.py
+++ b/letta/agents/letta_agent.py
@@ -442,6 +442,7 @@ class LettaAgent(BaseAgent):
                ToolType.LETTA_MULTI_AGENT_CORE,
                ToolType.LETTA_SLEEPTIME_CORE,
                ToolType.LETTA_VOICE_SLEEPTIME_CORE,
+                ToolType.LETTA_BUILTIN,
            }
            or (t.tool_type == ToolType.LETTA_MULTI_AGENT_CORE and t.name == "send_message_to_agents_matching_tags")
            or (t.tool_type == ToolType.EXTERNAL_COMPOSIO)
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -19,6 +19,7 @@ MCP_TOOL_TAG_NAME_PREFIX = "mcp"  # full format, mcp:server_name
 LETTA_CORE_TOOL_MODULE_NAME = "letta.functions.function_sets.base"
 LETTA_MULTI_AGENT_TOOL_MODULE_NAME = "letta.functions.function_sets.multi_agent"
 LETTA_VOICE_TOOL_MODULE_NAME = "letta.functions.function_sets.voice"
+LETTA_BUILTIN_TOOL_MODULE_NAME = "letta.functions.function_sets.builtin"


 # String in the error message for when the context window is too large
@@ -83,9 +84,19 @@ BASE_VOICE_SLEEPTIME_TOOLS = [
 ]
 # Multi agent tools
 MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_tags", "send_message_to_agent_async"]
+
+# Built in tools
+BUILTIN_TOOLS = ["run_code"]
+
 # Set of all built-in Letta tools
 LETTA_TOOL_SET = set(
-    BASE_TOOLS + BASE_MEMORY_TOOLS + MULTI_AGENT_TOOLS + BASE_SLEEPTIME_TOOLS + BASE_VOICE_SLEEPTIME_TOOLS + BASE_VOICE_SLEEPTIME_CHAT_TOOLS
+    BASE_TOOLS
+    + BASE_MEMORY_TOOLS
+    + MULTI_AGENT_TOOLS
+    + BASE_SLEEPTIME_TOOLS
+    + BASE_VOICE_SLEEPTIME_TOOLS
+    + BASE_VOICE_SLEEPTIME_CHAT_TOOLS
+    + BUILTIN_TOOLS
 )

 # The name of the tool used to send message to the user
--- a/letta/functions/function_sets/builtin.py
+++ b/letta/functions/function_sets/builtin.py
@@ -0,0 +1,15 @@
+from typing import Literal
+
+
+def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
+    """
+    Run code in a sandbox. Supports Python, Javascript, Typescript, R, and Java.
+
+    Args:
+        code (str): The code to run.
+        language (Literal["python", "js", "ts", "r", "java"]): The language of the code.
+    Returns:
+        str: The output of the code, the stdout, the stderr, and error traces (if any).
+    """
+
+    raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
--- a/letta/orm/enums.py
+++ b/letta/orm/enums.py
@@ -8,6 +8,7 @@ class ToolType(str, Enum):
    LETTA_MULTI_AGENT_CORE = "letta_multi_agent_core"
    LETTA_SLEEPTIME_CORE = "letta_sleeptime_core"
    LETTA_VOICE_SLEEPTIME_CORE = "letta_voice_sleeptime_core"
+    LETTA_BUILTIN = "letta_builtin"
    EXTERNAL_COMPOSIO = "external_composio"
    EXTERNAL_LANGCHAIN = "external_langchain"
    # TODO is "external" the right name here? Since as of now, MCP is local / doesn't support remote?
--- a/letta/schemas/tool.py
+++ b/letta/schemas/tool.py
@@ -5,6 +5,7 @@ from pydantic import Field, model_validator
 from letta.constants import (
    COMPOSIO_TOOL_TAG_NAME,
    FUNCTION_RETURN_CHAR_LIMIT,
+    LETTA_BUILTIN_TOOL_MODULE_NAME,
    LETTA_CORE_TOOL_MODULE_NAME,
    LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
    LETTA_VOICE_TOOL_MODULE_NAME,
@@ -104,6 +105,9 @@ class Tool(BaseTool):
        elif self.tool_type in {ToolType.LETTA_VOICE_SLEEPTIME_CORE}:
            # If it's letta voice tool, we generate the json_schema on the fly here
            self.json_schema = get_json_schema_from_module(module_name=LETTA_VOICE_TOOL_MODULE_NAME, function_name=self.name)
+        elif self.tool_type in {ToolType.LETTA_BUILTIN}:
+            # If it's letta voice tool, we generate the json_schema on the fly here
+            self.json_schema = get_json_schema_from_module(module_name=LETTA_BUILTIN_TOOL_MODULE_NAME, function_name=self.name)

        # At this point, we need to validate that at least json_schema is populated
        if not self.json_schema:
--- a/letta/server/rest_api/routers/v1/tools.py
+++ b/letta/server/rest_api/routers/v1/tools.py
@@ -189,7 +189,7 @@ async def upsert_base_tools(
    Upsert base tools
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
-    return await server.tool_manager.upsert_base_tools_async(actor=actor)
+    return server.tool_manager.upsert_base_tools(actor=actor)


@router.post("/run", response_model=ToolReturnMessage, operation_id="run_tool_from_source")
--- a/letta/services/tool_executor/tool_execution_manager.py
+++ b/letta/services/tool_executor/tool_execution_manager.py
@@ -11,6 +11,7 @@ from letta.schemas.user import User
 from letta.services.tool_executor.tool_executor import (
    ExternalComposioToolExecutor,
    ExternalMCPToolExecutor,
+    LettaBuiltinToolExecutor,
    LettaCoreToolExecutor,
    LettaMultiAgentToolExecutor,
    SandboxToolExecutor,
@@ -28,6 +29,7 @@ class ToolExecutorFactory:
        ToolType.LETTA_MEMORY_CORE: LettaCoreToolExecutor,
        ToolType.LETTA_SLEEPTIME_CORE: LettaCoreToolExecutor,
        ToolType.LETTA_MULTI_AGENT_CORE: LettaMultiAgentToolExecutor,
+        ToolType.LETTA_BUILTIN: LettaBuiltinToolExecutor,
        ToolType.EXTERNAL_COMPOSIO: ExternalComposioToolExecutor,
        ToolType.EXTERNAL_MCP: ExternalMCPToolExecutor,
    }
@@ -100,7 +102,7 @@ class ToolExecutionManager:
        try:
            executor = ToolExecutorFactory.get_executor(tool.tool_type)
            # TODO: Extend this async model to composio
-            if isinstance(executor, (SandboxToolExecutor, ExternalComposioToolExecutor)):
+            if isinstance(executor, (SandboxToolExecutor, ExternalComposioToolExecutor, LettaBuiltinToolExecutor)):
                result = await executor.execute(function_name, function_args, self.agent_state, tool, self.actor)
            else:
                result = executor.execute(function_name, function_args, self.agent_state, tool, self.actor)
--- a/letta/services/tool_executor/tool_executor.py
+++ b/letta/services/tool_executor/tool_executor.py
@@ -1,7 +1,7 @@
 import math
 import traceback
 from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Literal, Optional

 from letta.constants import (
    COMPOSIO_ENTITY_ENV_VAR_KEY,
@@ -674,3 +674,48 @@ class SandboxToolExecutor(ToolExecutor):
            func_return=error_message,
            stderr=[stderr],
        )
+
+
+class LettaBuiltinToolExecutor(ToolExecutor):
+    """Executor for built in Letta tools."""
+
+    async def execute(
+        self,
+        function_name: str,
+        function_args: dict,
+        agent_state: AgentState,
+        tool: Tool,
+        actor: User,
+        sandbox_config: Optional[SandboxConfig] = None,
+        sandbox_env_vars: Optional[Dict[str, Any]] = None,
+    ) -> ToolExecutionResult:
+        function_map = {
+            "run_code": self.run_code,
+        }
+
+        if function_name not in function_map:
+            raise ValueError(f"Unknown function: {function_name}")
+
+        # Execute the appropriate function
+        function_args_copy = function_args.copy()  # Make a copy to avoid modifying the original
+        function_response = await function_map[function_name](**function_args_copy)
+
+        return ToolExecutionResult(
+            status="success",
+            func_return=function_response,
+        )
+
+    async def run_code(self, code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
+        from e2b_code_interpreter import AsyncSandbox
+
+        if tool_settings.e2b_api_key is None:
+            raise ValueError("E2B_API_KEY is not set")
+
+        sbx = await AsyncSandbox.create(api_key=tool_settings.e2b_api_key)
+        params = {"code": code}
+        if language != "python":
+            # Leave empty for python
+            params["language"] = language
+
+        res = await sbx.run_code(**params)
+        return str(res)
--- a/letta/services/tool_manager.py
+++ b/letta/services/tool_manager.py
@@ -9,6 +9,7 @@ from letta.constants import (
    BASE_TOOLS,
    BASE_VOICE_SLEEPTIME_CHAT_TOOLS,
    BASE_VOICE_SLEEPTIME_TOOLS,
+    BUILTIN_TOOLS,
    LETTA_TOOL_SET,
    MCP_TOOL_TAG_NAME_PREFIX,
    MULTI_AGENT_TOOLS,
@@ -307,7 +308,7 @@ class ToolManager:
    def upsert_base_tools(self, actor: PydanticUser) -> List[PydanticTool]:
        """Add default tools in base.py and multi_agent.py"""
        functions_to_schema = {}
-        module_names = ["base", "multi_agent", "voice"]
+        module_names = ["base", "multi_agent", "voice", "builtin"]

        for module_name in module_names:
            full_module_name = f"letta.functions.function_sets.{module_name}"
@@ -343,67 +344,8 @@ class ToolManager:
                elif name in BASE_VOICE_SLEEPTIME_TOOLS or name in BASE_VOICE_SLEEPTIME_CHAT_TOOLS:
                    tool_type = ToolType.LETTA_VOICE_SLEEPTIME_CORE
                    tags = [tool_type.value]
-                else:
-                    raise ValueError(
-                        f"Tool name {name} is not in the list of base tool names: {BASE_TOOLS + BASE_MEMORY_TOOLS + MULTI_AGENT_TOOLS + BASE_SLEEPTIME_TOOLS + BASE_VOICE_SLEEPTIME_TOOLS + BASE_VOICE_SLEEPTIME_CHAT_TOOLS}"
-                    )
-
-                # create to tool
-                tools.append(
-                    self.create_or_update_tool(
-                        PydanticTool(
-                            name=name,
-                            tags=tags,
-                            source_type="python",
-                            tool_type=tool_type,
-                            return_char_limit=BASE_FUNCTION_RETURN_CHAR_LIMIT,
-                        ),
-                        actor=actor,
-                    )
-                )
-
-        # TODO: Delete any base tools that are stale
-        return tools
-
-    @enforce_types
-    async def upsert_base_tools_async(self, actor: PydanticUser) -> List[PydanticTool]:
-        """Add default tools in base.py and multi_agent.py"""
-        functions_to_schema = {}
-        module_names = ["base", "multi_agent", "voice"]
-
-        for module_name in module_names:
-            full_module_name = f"letta.functions.function_sets.{module_name}"
-            try:
-                module = importlib.import_module(full_module_name)
-            except Exception as e:
-                # Handle other general exceptions
-                raise e
-
-            try:
-                # Load the function set
-                functions_to_schema.update(load_function_set(module))
-            except ValueError as e:
-                err = f"Error loading function set '{module_name}': {e}"
-                warnings.warn(err)
-
-        # create tool in db
-        tools = []
-        for name, schema in functions_to_schema.items():
-            if name in LETTA_TOOL_SET:
-                if name in BASE_TOOLS:
-                    tool_type = ToolType.LETTA_CORE
-                    tags = [tool_type.value]
-                elif name in BASE_MEMORY_TOOLS:
-                    tool_type = ToolType.LETTA_MEMORY_CORE
-                    tags = [tool_type.value]
-                elif name in MULTI_AGENT_TOOLS:
-                    tool_type = ToolType.LETTA_MULTI_AGENT_CORE
-                    tags = [tool_type.value]
-                elif name in BASE_SLEEPTIME_TOOLS:
-                    tool_type = ToolType.LETTA_SLEEPTIME_CORE
-                    tags = [tool_type.value]
-                elif name in BASE_VOICE_SLEEPTIME_TOOLS or name in BASE_VOICE_SLEEPTIME_CHAT_TOOLS:
-                    tool_type = ToolType.LETTA_VOICE_SLEEPTIME_CORE
+                elif name in BUILTIN_TOOLS:
+                    tool_type = ToolType.LETTA_BUILTIN
                    tags = [tool_type.value]
                else:
                    raise ValueError(
--- a/poetry.lock
+++ b/poetry.lock
--- a/tests/integration_test_builtin_tools.py
+++ b/tests/integration_test_builtin_tools.py
@@ -0,0 +1,189 @@
+import json
+import os
+import threading
+import time
+import uuid
+from typing import List
+
+import pytest
+import requests
+from dotenv import load_dotenv
+from letta_client import AsyncLetta, Letta, MessageCreate
+from letta_client.types import ToolReturnMessage
+
+from letta.schemas.agent import AgentState
+from letta.schemas.llm_config import LLMConfig
+from letta.settings import settings
+
+# ------------------------------
+# Fixtures
+# ------------------------------
+
+
+@pytest.fixture(scope="module")
+def server_url() -> str:
+    """
+    Provides the URL for the Letta server.
+    If LETTA_SERVER_URL is not set, starts the server in a background thread
+    and polls until it’s accepting connections.
+    """
+
+    def _run_server() -> None:
+        load_dotenv()
+        from letta.server.rest_api.app import start_server
+
+        start_server(debug=True)
+
+    url: str = os.getenv("LETTA_SERVER_URL", "http://localhost:8283")
+
+    if not os.getenv("LETTA_SERVER_URL"):
+        thread = threading.Thread(target=_run_server, daemon=True)
+        thread.start()
+
+        # Poll until the server is up (or timeout)
+        timeout_seconds = 30
+        deadline = time.time() + timeout_seconds
+        while time.time() < deadline:
+            try:
+                resp = requests.get(url + "/v1/health")
+                if resp.status_code < 500:
+                    break
+            except requests.exceptions.RequestException:
+                pass
+            time.sleep(0.1)
+        else:
+            raise RuntimeError(f"Could not reach {url} within {timeout_seconds}s")
+
+    temp = settings.use_experimental
+    settings.use_experimental = True
+    yield url
+    settings.use_experimental = temp
+
+
+@pytest.fixture(scope="module")
+def client(server_url: str) -> Letta:
+    """
+    Creates and returns a synchronous Letta REST client for testing.
+    """
+    client_instance = Letta(base_url=server_url)
+    yield client_instance
+
+
+@pytest.fixture(scope="function")
+def async_client(server_url: str) -> AsyncLetta:
+    """
+    Creates and returns an asynchronous Letta REST client for testing.
+    """
+    async_client_instance = AsyncLetta(base_url=server_url)
+    yield async_client_instance
+
+
+@pytest.fixture(scope="module")
+def agent_state(client: Letta) -> AgentState:
+    """
+    Creates and returns an agent state for testing with a pre-configured agent.
+    The agent is named 'supervisor' and is configured with base tools and the roll_dice tool.
+    """
+    client.tools.upsert_base_tools()
+
+    send_message_tool = client.tools.list(name="send_message")[0]
+    run_code_tool = client.tools.list(name="run_code")[0]
+    agent_state_instance = client.agents.create(
+        name="supervisor",
+        include_base_tools=False,
+        tool_ids=[send_message_tool.id, run_code_tool.id],
+        model="openai/gpt-4o",
+        embedding="letta/letta-free",
+        tags=["supervisor"],
+    )
+    yield agent_state_instance
+
+    client.agents.delete(agent_state_instance.id)
+
+
+# ------------------------------
+# Helper Functions and Constants
+# ------------------------------
+
+
+def get_llm_config(filename: str, llm_config_dir: str = "tests/configs/llm_model_configs") -> LLMConfig:
+    filename = os.path.join(llm_config_dir, filename)
+    config_data = json.load(open(filename, "r"))
+    llm_config = LLMConfig(**config_data)
+    return llm_config
+
+
+USER_MESSAGE_OTID = str(uuid.uuid4())
+all_configs = [
+    "openai-gpt-4o-mini.json",
+]
+requested = os.getenv("LLM_CONFIG_FILE")
+filenames = [requested] if requested else all_configs
+TESTED_LLM_CONFIGS: List[LLMConfig] = [get_llm_config(fn) for fn in filenames]
+
+TEST_LANGUAGES = ["Python", "Javascript", "Typescript"]
+EXPECTED_INTEGER_PARTITION_OUTPUT = "190569292"
+
+
+# Reference implementation in Python, to embed in the user prompt
+REFERENCE_CODE = """\
+def reference_partition(n):
+    partitions = [1] + [0] * (n + 1)
+    for k in range(1, n + 1):
+        for i in range(k, n + 1):
+            partitions[i] += partitions[i - k]
+    return partitions[n]
+"""
+
+
+def reference_partition(n: int) -> int:
+    # Same logic, used to compute expected result in the test
+    partitions = [1] + [0] * (n + 1)
+    for k in range(1, n + 1):
+        for i in range(k, n + 1):
+            partitions[i] += partitions[i - k]
+    return partitions[n]
+
+
+# ------------------------------
+# Test Cases
+# ------------------------------
+
+
+@pytest.mark.parametrize("language", TEST_LANGUAGES, ids=TEST_LANGUAGES)
+@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS])
+def test_run_code(
+    client: Letta,
+    agent_state: AgentState,
+    llm_config: LLMConfig,
+    language: str,
+) -> None:
+    """
+    Sends a reference Python implementation, asks the model to translate & run it
+    in different languages, and verifies the exact partition(100) result.
+    """
+    expected = str(reference_partition(100))
+
+    user_message = MessageCreate(
+        role="user",
+        content=(
+            "Here is a Python reference implementation:\n\n"
+            f"{REFERENCE_CODE}\n"
+            f"Please translate and execute this code in {language} to compute p(100), "
+            "and return **only** the result with no extra formatting."
+        ),
+        otid=USER_MESSAGE_OTID,
+    )
+
+    response = client.agents.messages.create(
+        agent_id=agent_state.id,
+        messages=[user_message],
+    )
+
+    tool_returns = [m for m in response.messages if isinstance(m, ToolReturnMessage)]
+    assert tool_returns, f"No ToolReturnMessage found for language: {language}"
+
+    returns = [m.tool_return for m in tool_returns]
+    assert any(expected in ret for ret in returns), (
+        f"For language={language!r}, expected to find '{expected}' in tool_return, " f"but got {returns!r}"
+    )
--- a/tests/test_managers.py
+++ b/tests/test_managers.py
@@ -24,7 +24,9 @@ from letta.constants import (
    BASE_TOOLS,
    BASE_VOICE_SLEEPTIME_CHAT_TOOLS,
    BASE_VOICE_SLEEPTIME_TOOLS,
+    BUILTIN_TOOLS,
    LETTA_TOOL_EXECUTION_DIR,
+    LETTA_TOOL_SET,
    MCP_TOOL_TAG_NAME_PREFIX,
    MULTI_AGENT_TOOLS,
 )
@@ -2401,16 +2403,8 @@ async def test_delete_tool_by_id(server: SyncServer, print_tool, default_user, e

 def test_upsert_base_tools(server: SyncServer, default_user):
    tools = server.tool_manager.upsert_base_tools(actor=default_user)
-    expected_tool_names = sorted(
-        set(
-            BASE_TOOLS
-            + BASE_MEMORY_TOOLS
-            + MULTI_AGENT_TOOLS
-            + BASE_SLEEPTIME_TOOLS
-            + BASE_VOICE_SLEEPTIME_TOOLS
-            + BASE_VOICE_SLEEPTIME_CHAT_TOOLS
-        )
-    )
+    expected_tool_names = sorted(LETTA_TOOL_SET)
+
    assert sorted([t.name for t in tools]) == expected_tool_names

    # Call it again to make sure it doesn't create duplicates
@@ -2431,6 +2425,8 @@ def test_upsert_base_tools(server: SyncServer, default_user):
            assert t.tool_type == ToolType.LETTA_VOICE_SLEEPTIME_CORE
        elif t.name in BASE_VOICE_SLEEPTIME_CHAT_TOOLS:
            assert t.tool_type == ToolType.LETTA_VOICE_SLEEPTIME_CORE
+        elif t.name in BUILTIN_TOOLS:
+            assert t.tool_type == ToolType.LETTA_BUILTIN
        else:
            pytest.fail(f"The tool name is unrecognized as a base tool: {t.name}")
        assert t.source_code is None