feat: Add built in code interpreter tool (#2252)

This commit is contained in:
Matthew Zhou
2025-05-20 07:01:40 +08:00
committed by GitHub
parent 0a54b998a8
commit 6e5ab8b151
13 changed files with 337 additions and 528 deletions

View File

@@ -18,6 +18,7 @@ from letta.constants import (
LETTA_CORE_TOOL_MODULE_NAME,
LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
LLM_MAX_TOKENS,
READ_ONLY_BLOCK_EDIT_ERROR,
REQ_HEARTBEAT_MESSAGE,
SEND_MESSAGE_TOOL_NAME,
)

View File

@@ -442,6 +442,7 @@ class LettaAgent(BaseAgent):
ToolType.LETTA_MULTI_AGENT_CORE,
ToolType.LETTA_SLEEPTIME_CORE,
ToolType.LETTA_VOICE_SLEEPTIME_CORE,
ToolType.LETTA_BUILTIN,
}
or (t.tool_type == ToolType.LETTA_MULTI_AGENT_CORE and t.name == "send_message_to_agents_matching_tags")
or (t.tool_type == ToolType.EXTERNAL_COMPOSIO)

View File

@@ -19,6 +19,7 @@ MCP_TOOL_TAG_NAME_PREFIX = "mcp" # full format, mcp:server_name
LETTA_CORE_TOOL_MODULE_NAME = "letta.functions.function_sets.base"
LETTA_MULTI_AGENT_TOOL_MODULE_NAME = "letta.functions.function_sets.multi_agent"
LETTA_VOICE_TOOL_MODULE_NAME = "letta.functions.function_sets.voice"
LETTA_BUILTIN_TOOL_MODULE_NAME = "letta.functions.function_sets.builtin"
# String in the error message for when the context window is too large
@@ -83,9 +84,19 @@ BASE_VOICE_SLEEPTIME_TOOLS = [
]
# Multi agent tools
MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_tags", "send_message_to_agent_async"]
# Built in tools
BUILTIN_TOOLS = ["run_code"]
# Set of all built-in Letta tools
LETTA_TOOL_SET = set(
BASE_TOOLS + BASE_MEMORY_TOOLS + MULTI_AGENT_TOOLS + BASE_SLEEPTIME_TOOLS + BASE_VOICE_SLEEPTIME_TOOLS + BASE_VOICE_SLEEPTIME_CHAT_TOOLS
BASE_TOOLS
+ BASE_MEMORY_TOOLS
+ MULTI_AGENT_TOOLS
+ BASE_SLEEPTIME_TOOLS
+ BASE_VOICE_SLEEPTIME_TOOLS
+ BASE_VOICE_SLEEPTIME_CHAT_TOOLS
+ BUILTIN_TOOLS
)
# The name of the tool used to send message to the user

View File

@@ -0,0 +1,15 @@
from typing import Literal
def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
"""
Run code in a sandbox. Supports Python, Javascript, Typescript, R, and Java.
Args:
code (str): The code to run.
language (Literal["python", "js", "ts", "r", "java"]): The language of the code.
Returns:
str: The output of the code, the stdout, the stderr, and error traces (if any).
"""
raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")

View File

@@ -8,6 +8,7 @@ class ToolType(str, Enum):
LETTA_MULTI_AGENT_CORE = "letta_multi_agent_core"
LETTA_SLEEPTIME_CORE = "letta_sleeptime_core"
LETTA_VOICE_SLEEPTIME_CORE = "letta_voice_sleeptime_core"
LETTA_BUILTIN = "letta_builtin"
EXTERNAL_COMPOSIO = "external_composio"
EXTERNAL_LANGCHAIN = "external_langchain"
# TODO is "external" the right name here? Since as of now, MCP is local / doesn't support remote?

View File

@@ -5,6 +5,7 @@ from pydantic import Field, model_validator
from letta.constants import (
COMPOSIO_TOOL_TAG_NAME,
FUNCTION_RETURN_CHAR_LIMIT,
LETTA_BUILTIN_TOOL_MODULE_NAME,
LETTA_CORE_TOOL_MODULE_NAME,
LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
LETTA_VOICE_TOOL_MODULE_NAME,
@@ -104,6 +105,9 @@ class Tool(BaseTool):
elif self.tool_type in {ToolType.LETTA_VOICE_SLEEPTIME_CORE}:
# If it's letta voice tool, we generate the json_schema on the fly here
self.json_schema = get_json_schema_from_module(module_name=LETTA_VOICE_TOOL_MODULE_NAME, function_name=self.name)
elif self.tool_type in {ToolType.LETTA_BUILTIN}:
# If it's letta voice tool, we generate the json_schema on the fly here
self.json_schema = get_json_schema_from_module(module_name=LETTA_BUILTIN_TOOL_MODULE_NAME, function_name=self.name)
# At this point, we need to validate that at least json_schema is populated
if not self.json_schema:

View File

@@ -189,7 +189,7 @@ async def upsert_base_tools(
Upsert base tools
"""
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
return await server.tool_manager.upsert_base_tools_async(actor=actor)
return server.tool_manager.upsert_base_tools(actor=actor)
@router.post("/run", response_model=ToolReturnMessage, operation_id="run_tool_from_source")

View File

@@ -11,6 +11,7 @@ from letta.schemas.user import User
from letta.services.tool_executor.tool_executor import (
ExternalComposioToolExecutor,
ExternalMCPToolExecutor,
LettaBuiltinToolExecutor,
LettaCoreToolExecutor,
LettaMultiAgentToolExecutor,
SandboxToolExecutor,
@@ -28,6 +29,7 @@ class ToolExecutorFactory:
ToolType.LETTA_MEMORY_CORE: LettaCoreToolExecutor,
ToolType.LETTA_SLEEPTIME_CORE: LettaCoreToolExecutor,
ToolType.LETTA_MULTI_AGENT_CORE: LettaMultiAgentToolExecutor,
ToolType.LETTA_BUILTIN: LettaBuiltinToolExecutor,
ToolType.EXTERNAL_COMPOSIO: ExternalComposioToolExecutor,
ToolType.EXTERNAL_MCP: ExternalMCPToolExecutor,
}
@@ -100,7 +102,7 @@ class ToolExecutionManager:
try:
executor = ToolExecutorFactory.get_executor(tool.tool_type)
# TODO: Extend this async model to composio
if isinstance(executor, (SandboxToolExecutor, ExternalComposioToolExecutor)):
if isinstance(executor, (SandboxToolExecutor, ExternalComposioToolExecutor, LettaBuiltinToolExecutor)):
result = await executor.execute(function_name, function_args, self.agent_state, tool, self.actor)
else:
result = executor.execute(function_name, function_args, self.agent_state, tool, self.actor)

View File

@@ -1,7 +1,7 @@
import math
import traceback
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional
from typing import Any, Dict, Literal, Optional
from letta.constants import (
COMPOSIO_ENTITY_ENV_VAR_KEY,
@@ -674,3 +674,48 @@ class SandboxToolExecutor(ToolExecutor):
func_return=error_message,
stderr=[stderr],
)
class LettaBuiltinToolExecutor(ToolExecutor):
"""Executor for built in Letta tools."""
async def execute(
self,
function_name: str,
function_args: dict,
agent_state: AgentState,
tool: Tool,
actor: User,
sandbox_config: Optional[SandboxConfig] = None,
sandbox_env_vars: Optional[Dict[str, Any]] = None,
) -> ToolExecutionResult:
function_map = {
"run_code": self.run_code,
}
if function_name not in function_map:
raise ValueError(f"Unknown function: {function_name}")
# Execute the appropriate function
function_args_copy = function_args.copy() # Make a copy to avoid modifying the original
function_response = await function_map[function_name](**function_args_copy)
return ToolExecutionResult(
status="success",
func_return=function_response,
)
async def run_code(self, code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
from e2b_code_interpreter import AsyncSandbox
if tool_settings.e2b_api_key is None:
raise ValueError("E2B_API_KEY is not set")
sbx = await AsyncSandbox.create(api_key=tool_settings.e2b_api_key)
params = {"code": code}
if language != "python":
# Leave empty for python
params["language"] = language
res = await sbx.run_code(**params)
return str(res)

View File

@@ -9,6 +9,7 @@ from letta.constants import (
BASE_TOOLS,
BASE_VOICE_SLEEPTIME_CHAT_TOOLS,
BASE_VOICE_SLEEPTIME_TOOLS,
BUILTIN_TOOLS,
LETTA_TOOL_SET,
MCP_TOOL_TAG_NAME_PREFIX,
MULTI_AGENT_TOOLS,
@@ -307,7 +308,7 @@ class ToolManager:
def upsert_base_tools(self, actor: PydanticUser) -> List[PydanticTool]:
"""Add default tools in base.py and multi_agent.py"""
functions_to_schema = {}
module_names = ["base", "multi_agent", "voice"]
module_names = ["base", "multi_agent", "voice", "builtin"]
for module_name in module_names:
full_module_name = f"letta.functions.function_sets.{module_name}"
@@ -343,67 +344,8 @@ class ToolManager:
elif name in BASE_VOICE_SLEEPTIME_TOOLS or name in BASE_VOICE_SLEEPTIME_CHAT_TOOLS:
tool_type = ToolType.LETTA_VOICE_SLEEPTIME_CORE
tags = [tool_type.value]
else:
raise ValueError(
f"Tool name {name} is not in the list of base tool names: {BASE_TOOLS + BASE_MEMORY_TOOLS + MULTI_AGENT_TOOLS + BASE_SLEEPTIME_TOOLS + BASE_VOICE_SLEEPTIME_TOOLS + BASE_VOICE_SLEEPTIME_CHAT_TOOLS}"
)
# create to tool
tools.append(
self.create_or_update_tool(
PydanticTool(
name=name,
tags=tags,
source_type="python",
tool_type=tool_type,
return_char_limit=BASE_FUNCTION_RETURN_CHAR_LIMIT,
),
actor=actor,
)
)
# TODO: Delete any base tools that are stale
return tools
@enforce_types
async def upsert_base_tools_async(self, actor: PydanticUser) -> List[PydanticTool]:
"""Add default tools in base.py and multi_agent.py"""
functions_to_schema = {}
module_names = ["base", "multi_agent", "voice"]
for module_name in module_names:
full_module_name = f"letta.functions.function_sets.{module_name}"
try:
module = importlib.import_module(full_module_name)
except Exception as e:
# Handle other general exceptions
raise e
try:
# Load the function set
functions_to_schema.update(load_function_set(module))
except ValueError as e:
err = f"Error loading function set '{module_name}': {e}"
warnings.warn(err)
# create tool in db
tools = []
for name, schema in functions_to_schema.items():
if name in LETTA_TOOL_SET:
if name in BASE_TOOLS:
tool_type = ToolType.LETTA_CORE
tags = [tool_type.value]
elif name in BASE_MEMORY_TOOLS:
tool_type = ToolType.LETTA_MEMORY_CORE
tags = [tool_type.value]
elif name in MULTI_AGENT_TOOLS:
tool_type = ToolType.LETTA_MULTI_AGENT_CORE
tags = [tool_type.value]
elif name in BASE_SLEEPTIME_TOOLS:
tool_type = ToolType.LETTA_SLEEPTIME_CORE
tags = [tool_type.value]
elif name in BASE_VOICE_SLEEPTIME_TOOLS or name in BASE_VOICE_SLEEPTIME_CHAT_TOOLS:
tool_type = ToolType.LETTA_VOICE_SLEEPTIME_CORE
elif name in BUILTIN_TOOLS:
tool_type = ToolType.LETTA_BUILTIN
tags = [tool_type.value]
else:
raise ValueError(

506
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,189 @@
import json
import os
import threading
import time
import uuid
from typing import List
import pytest
import requests
from dotenv import load_dotenv
from letta_client import AsyncLetta, Letta, MessageCreate
from letta_client.types import ToolReturnMessage
from letta.schemas.agent import AgentState
from letta.schemas.llm_config import LLMConfig
from letta.settings import settings
# ------------------------------
# Fixtures
# ------------------------------
@pytest.fixture(scope="module")
def server_url() -> str:
"""
Provides the URL for the Letta server.
If LETTA_SERVER_URL is not set, starts the server in a background thread
and polls until its accepting connections.
"""
def _run_server() -> None:
load_dotenv()
from letta.server.rest_api.app import start_server
start_server(debug=True)
url: str = os.getenv("LETTA_SERVER_URL", "http://localhost:8283")
if not os.getenv("LETTA_SERVER_URL"):
thread = threading.Thread(target=_run_server, daemon=True)
thread.start()
# Poll until the server is up (or timeout)
timeout_seconds = 30
deadline = time.time() + timeout_seconds
while time.time() < deadline:
try:
resp = requests.get(url + "/v1/health")
if resp.status_code < 500:
break
except requests.exceptions.RequestException:
pass
time.sleep(0.1)
else:
raise RuntimeError(f"Could not reach {url} within {timeout_seconds}s")
temp = settings.use_experimental
settings.use_experimental = True
yield url
settings.use_experimental = temp
@pytest.fixture(scope="module")
def client(server_url: str) -> Letta:
"""
Creates and returns a synchronous Letta REST client for testing.
"""
client_instance = Letta(base_url=server_url)
yield client_instance
@pytest.fixture(scope="function")
def async_client(server_url: str) -> AsyncLetta:
"""
Creates and returns an asynchronous Letta REST client for testing.
"""
async_client_instance = AsyncLetta(base_url=server_url)
yield async_client_instance
@pytest.fixture(scope="module")
def agent_state(client: Letta) -> AgentState:
"""
Creates and returns an agent state for testing with a pre-configured agent.
The agent is named 'supervisor' and is configured with base tools and the roll_dice tool.
"""
client.tools.upsert_base_tools()
send_message_tool = client.tools.list(name="send_message")[0]
run_code_tool = client.tools.list(name="run_code")[0]
agent_state_instance = client.agents.create(
name="supervisor",
include_base_tools=False,
tool_ids=[send_message_tool.id, run_code_tool.id],
model="openai/gpt-4o",
embedding="letta/letta-free",
tags=["supervisor"],
)
yield agent_state_instance
client.agents.delete(agent_state_instance.id)
# ------------------------------
# Helper Functions and Constants
# ------------------------------
def get_llm_config(filename: str, llm_config_dir: str = "tests/configs/llm_model_configs") -> LLMConfig:
filename = os.path.join(llm_config_dir, filename)
config_data = json.load(open(filename, "r"))
llm_config = LLMConfig(**config_data)
return llm_config
USER_MESSAGE_OTID = str(uuid.uuid4())
all_configs = [
"openai-gpt-4o-mini.json",
]
requested = os.getenv("LLM_CONFIG_FILE")
filenames = [requested] if requested else all_configs
TESTED_LLM_CONFIGS: List[LLMConfig] = [get_llm_config(fn) for fn in filenames]
TEST_LANGUAGES = ["Python", "Javascript", "Typescript"]
EXPECTED_INTEGER_PARTITION_OUTPUT = "190569292"
# Reference implementation in Python, to embed in the user prompt
REFERENCE_CODE = """\
def reference_partition(n):
partitions = [1] + [0] * (n + 1)
for k in range(1, n + 1):
for i in range(k, n + 1):
partitions[i] += partitions[i - k]
return partitions[n]
"""
def reference_partition(n: int) -> int:
# Same logic, used to compute expected result in the test
partitions = [1] + [0] * (n + 1)
for k in range(1, n + 1):
for i in range(k, n + 1):
partitions[i] += partitions[i - k]
return partitions[n]
# ------------------------------
# Test Cases
# ------------------------------
@pytest.mark.parametrize("language", TEST_LANGUAGES, ids=TEST_LANGUAGES)
@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS])
def test_run_code(
client: Letta,
agent_state: AgentState,
llm_config: LLMConfig,
language: str,
) -> None:
"""
Sends a reference Python implementation, asks the model to translate & run it
in different languages, and verifies the exact partition(100) result.
"""
expected = str(reference_partition(100))
user_message = MessageCreate(
role="user",
content=(
"Here is a Python reference implementation:\n\n"
f"{REFERENCE_CODE}\n"
f"Please translate and execute this code in {language} to compute p(100), "
"and return **only** the result with no extra formatting."
),
otid=USER_MESSAGE_OTID,
)
response = client.agents.messages.create(
agent_id=agent_state.id,
messages=[user_message],
)
tool_returns = [m for m in response.messages if isinstance(m, ToolReturnMessage)]
assert tool_returns, f"No ToolReturnMessage found for language: {language}"
returns = [m.tool_return for m in tool_returns]
assert any(expected in ret for ret in returns), (
f"For language={language!r}, expected to find '{expected}' in tool_return, " f"but got {returns!r}"
)

View File

@@ -24,7 +24,9 @@ from letta.constants import (
BASE_TOOLS,
BASE_VOICE_SLEEPTIME_CHAT_TOOLS,
BASE_VOICE_SLEEPTIME_TOOLS,
BUILTIN_TOOLS,
LETTA_TOOL_EXECUTION_DIR,
LETTA_TOOL_SET,
MCP_TOOL_TAG_NAME_PREFIX,
MULTI_AGENT_TOOLS,
)
@@ -2401,16 +2403,8 @@ async def test_delete_tool_by_id(server: SyncServer, print_tool, default_user, e
def test_upsert_base_tools(server: SyncServer, default_user):
tools = server.tool_manager.upsert_base_tools(actor=default_user)
expected_tool_names = sorted(
set(
BASE_TOOLS
+ BASE_MEMORY_TOOLS
+ MULTI_AGENT_TOOLS
+ BASE_SLEEPTIME_TOOLS
+ BASE_VOICE_SLEEPTIME_TOOLS
+ BASE_VOICE_SLEEPTIME_CHAT_TOOLS
)
)
expected_tool_names = sorted(LETTA_TOOL_SET)
assert sorted([t.name for t in tools]) == expected_tool_names
# Call it again to make sure it doesn't create duplicates
@@ -2431,6 +2425,8 @@ def test_upsert_base_tools(server: SyncServer, default_user):
assert t.tool_type == ToolType.LETTA_VOICE_SLEEPTIME_CORE
elif t.name in BASE_VOICE_SLEEPTIME_CHAT_TOOLS:
assert t.tool_type == ToolType.LETTA_VOICE_SLEEPTIME_CORE
elif t.name in BUILTIN_TOOLS:
assert t.tool_type == ToolType.LETTA_BUILTIN
else:
pytest.fail(f"The tool name is unrecognized as a base tool: {t.name}")
assert t.source_code is None