feat: Ungate file upload for simple MIME types even without Mistral API key (#2898)

This commit is contained in:
Matthew Zhou
2025-06-18 15:11:30 -07:00
committed by GitHub
parent a1a203c943
commit 698d99a66e
21 changed files with 95 additions and 76 deletions

View File

@@ -30,7 +30,7 @@ agent_state = client.agents.create(
model="openai/gpt-4o-mini",
context_window_limit=8000,
# embedding model & endpoint configuration (cannot be changed)
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
# system instructions for the agent (defaults to `memgpt_chat`)
system=gpt_system.get_system_text("memgpt_chat"),
# whether to include base letta tools (default: True)

View File

@@ -19,7 +19,7 @@ agent_state = client.agents.create(
],
# set automatic defaults for LLM/embedding config
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
)
print(f"Created agent with name {agent_state.name} and unique ID {agent_state.id}")

View File

@@ -21,7 +21,7 @@ agent = client.agents.create(
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
)
print(f"Created agent with name {agent.name}")
@@ -120,7 +120,7 @@ for chunk in stream:
agent_copy = client.agents.create(
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
)
block = client.agents.blocks.retrieve(agent.id, block_label="human")
agent_copy = client.agents.blocks.attach(agent_copy.id, block.id)

View File

@@ -46,7 +46,7 @@ agent_state = client.agents.create(
],
# set automatic defaults for LLM/embedding config
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
# create the agent with an additional tool
tool_ids=[tool.id],
tool_rules=[
@@ -89,7 +89,7 @@ agent_state = client.agents.create(
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
include_base_tools=False,
tool_ids=[tool.id, send_message_tool.id],
)

View File

@@ -26,7 +26,7 @@ agent = client.agents.create(
}
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
tool_ids=[mcp_tool.id]
)
print(f"Created agent id {agent.id}")

View File

@@ -30,7 +30,7 @@ source_name = "employee_handbook"
source = client.sources.create(
name=source_name,
description="Provides reference information for the employee handbook",
embedding="openai/text-embedding-ada-002" # must match agent
embedding="openai/text-embedding-3-small" # must match agent
)
# attach the source to the agent
client.agents.sources.attach(

View File

@@ -65,7 +65,7 @@ DEFAULT_EMBEDDING_CHUNK_SIZE = 300
# tokenizers
EMBEDDING_TO_TOKENIZER_MAP = {
"text-embedding-ada-002": "cl100k_base",
"text-embedding-3-small": "cl100k_base",
}
EMBEDDING_TO_TOKENIZER_DEFAULT = "cl100k_base"

View File

@@ -63,6 +63,14 @@ class EmbeddingConfig(BaseModel):
embedding_dim=1536,
embedding_chunk_size=300,
)
if model_name == "text-embedding-3-small" and provider == "openai":
return cls(
embedding_model="text-embedding-3-small",
embedding_endpoint_type="openai",
embedding_endpoint="https://api.openai.com/v1",
embedding_dim=2000,
embedding_chunk_size=300,
)
elif model_name == "letta":
return cls(
embedding_endpoint="https://embeddings.memgpt.ai",

View File

@@ -21,9 +21,14 @@ from letta.server.server import SyncServer
from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
from letta.services.file_processor.embedder.openai_embedder import OpenAIEmbedder
from letta.services.file_processor.file_processor import FileProcessor
from letta.services.file_processor.file_types import get_allowed_media_types, get_extension_to_mime_type_map, register_mime_types
from letta.services.file_processor.file_types import (
get_allowed_media_types,
get_extension_to_mime_type_map,
is_simple_text_mime_type,
register_mime_types,
)
from letta.services.file_processor.parser.mistral_parser import MistralFileParser
from letta.settings import model_settings, settings
from letta.settings import settings
from letta.utils import safe_create_task, sanitize_filename
logger = get_logger(__name__)
@@ -228,20 +233,26 @@ async def upload_file_to_source(
agent_states = await server.source_manager.list_attached_agents(source_id=source_id, actor=actor)
# NEW: Cloud based file processing
if settings.mistral_api_key and model_settings.openai_api_key:
logger.info("Running experimental cloud based file processing...")
safe_create_task(
load_file_to_source_cloud(server, agent_states, content, file, job, source_id, actor),
logger=logger,
label="file_processor.process",
)
else:
# create background tasks
safe_create_task(
load_file_to_source_async(server, source_id=source.id, filename=file.filename, job_id=job.id, bytes=content, actor=actor),
logger=logger,
label="load_file_to_source_async",
# Determine file's MIME type
file_mime_type = mimetypes.guess_type(file.filename)[0] or "application/octet-stream"
# Check if it's a simple text file
is_simple_file = is_simple_text_mime_type(file_mime_type)
# For complex files, require Mistral API key
if not is_simple_file and not settings.mistral_api_key:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Mistral API key is required to process this file type {file_mime_type}. Please configure your Mistral API key to upload complex file formats.",
)
# Use cloud processing for all files (simple files always, complex files with Mistral key)
logger.info("Running experimental cloud based file processing...")
safe_create_task(
load_file_to_source_cloud(server, agent_states, content, file, job, source_id, actor),
logger=logger,
label="file_processor.process",
)
safe_create_task(sleeptime_document_ingest_async(server, source_id, actor), logger=logger, label="sleeptime_document_ingest_async")
return job

View File

@@ -16,7 +16,7 @@ class OpenAIEmbedder:
"""OpenAI-based embedding generation"""
def __init__(self, embedding_config: Optional[EmbeddingConfig] = None):
self.embedding_config = embedding_config or EmbeddingConfig.default_config(provider="openai")
self.embedding_config = embedding_config or EmbeddingConfig.default_config(model_name="text-embedding-3-small", provider="openai")
# TODO: Unify to global OpenAI client
self.client = openai.AsyncOpenAI(api_key=model_settings.openai_api_key)

View File

@@ -20,11 +20,10 @@ class MistralFileParser(FileParser):
async def extract_text(self, content: bytes, mime_type: str) -> OCRResponse:
"""Extract text using Mistral OCR or shortcut for plain text."""
try:
logger.info(f"Extracting text using Mistral OCR model: {self.model}")
# TODO: Kind of hacky...we try to exit early here?
# TODO: Create our internal file parser representation we return instead of OCRResponse
if is_simple_text_mime_type(mime_type):
logger.info(f"Extracting text directly (no Mistral): {self.model}")
text = content.decode("utf-8", errors="replace")
return OCRResponse(
model=self.model,
@@ -43,6 +42,7 @@ class MistralFileParser(FileParser):
base64_encoded_content = base64.b64encode(content).decode("utf-8")
document_url = f"data:{mime_type};base64,{base64_encoded_content}"
logger.info(f"Extracting text using Mistral OCR model: {self.model}")
async with Mistral(api_key=settings.mistral_api_key) as mistral:
ocr_response = await mistral.ocr.process_async(
model="mistral-ocr-latest", document={"type": "document_url", "document_url": document_url}, include_image_base64=False

View File

@@ -7,7 +7,7 @@ asked to use the provided documents to answer the question. Similar to Liu et al
we evaluate reader accuracy as the number of retrieved documents K increases. In our evaluation setup, both
the fixed-context baselines and Letta use the same retriever, which selects the top K documents
according using Faiss efficient similarity search (Johnson et al., 2019) (which corresponds to
approximate nearest neighbor search) on OpenAI's text-embedding-ada-002 embeddings. In
approximate nearest neighbor search) on OpenAI's text-embedding-3-small embeddings. In
Letta, the entire document set is loaded into archival storage, and the retriever naturally emerges
via the archival storage search functionality (which performs embedding-based similarity search).
In the fixed-context baselines, the top-K documents are fetched using the retriever independently

View File

@@ -76,7 +76,7 @@ async def test_sleeptime_group_chat(server, actor):
],
# model="openai/gpt-4o-mini",
model="anthropic/claude-3-5-sonnet-20240620",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
enable_sleeptime=True,
),
actor=actor,
@@ -190,7 +190,7 @@ async def test_sleeptime_group_chat_v2(server, actor):
],
# model="openai/gpt-4o-mini",
model="anthropic/claude-3-5-sonnet-20240620",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
enable_sleeptime=True,
),
actor=actor,
@@ -310,7 +310,7 @@ async def test_sleeptime_removes_redundant_information(server, actor):
),
],
model="anthropic/claude-3-5-sonnet-20240620",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
enable_sleeptime=True,
),
actor=actor,
@@ -389,7 +389,7 @@ async def test_sleeptime_edit(server, actor):
),
],
model="anthropic/claude-3-5-sonnet-20240620",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
enable_sleeptime=True,
),
actor=actor,

View File

@@ -190,7 +190,7 @@ def voice_agent(server, actor, roll_dice_tool):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
enable_sleeptime=True,
tool_ids=[roll_dice_tool.id, run_code_tool.id],
),
@@ -279,7 +279,7 @@ async def test_model_compatibility(model, message, server, server_url, actor, ro
),
],
model=model,
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
enable_sleeptime=True,
tool_ids=[roll_dice_tool.id, run_code_tool.id],
),

View File

@@ -1,7 +1,7 @@
from conftest import create_test_module
AGENTS_CREATE_PARAMS = [
("caren_agent", {"name": "caren", "model": "openai/gpt-4o-mini", "embedding": "openai/text-embedding-ada-002"}, {}, None),
("caren_agent", {"name": "caren", "model": "openai/gpt-4o-mini", "embedding": "openai/text-embedding-3-small"}, {}, None),
]
AGENTS_MODIFY_PARAMS = [

View File

@@ -87,7 +87,7 @@ def create_test_module(
agent = client.agents.create(
name="caren_agent",
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
)
# Add finalizer to ensure cleanup happens in the right order

View File

@@ -57,7 +57,7 @@ def test_letta_run_create_new_agent(swap_letta_config):
# Optional: Embedding model selection
try:
child.expect("Select embedding model:", timeout=20)
child.sendline("text-embedding-ada-002")
child.sendline("text-embedding-3-small")
except (pexpect.TIMEOUT, pexpect.EOF):
print("[WARNING] Embedding model selection step was skipped.")

View File

@@ -66,7 +66,7 @@ def participant_agents(server, actor):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)
@@ -80,7 +80,7 @@ def participant_agents(server, actor):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)
@@ -94,7 +94,7 @@ def participant_agents(server, actor):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)
@@ -108,7 +108,7 @@ def participant_agents(server, actor):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)
@@ -137,7 +137,7 @@ def manager_agent(server, actor):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)
@@ -350,7 +350,7 @@ async def test_supervisor(server, actor, participant_agents):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)
@@ -420,7 +420,7 @@ async def test_dynamic_group_chat(server, actor, manager_agent, participant_agen
request=CreateAgent(
name="shaggy",
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)

View File

@@ -48,7 +48,7 @@ def agent(client: LettaSDKClient):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
)
yield agent_state
@@ -74,7 +74,7 @@ def test_shared_blocks(client: LettaSDKClient):
],
block_ids=[block.id],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
)
agent_state2 = client.agents.create(
name="agent2",
@@ -86,7 +86,7 @@ def test_shared_blocks(client: LettaSDKClient):
],
block_ids=[block.id],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
)
# update memory
@@ -132,7 +132,7 @@ def test_read_only_block(client: LettaSDKClient):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
)
# make sure agent cannot update read-only block
@@ -175,7 +175,7 @@ def test_add_and_manage_tags_for_agent(client: LettaSDKClient):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
)
assert len(agent.tags) == 0
@@ -227,7 +227,7 @@ def test_agent_tags(client: LettaSDKClient):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
tags=["test", "agent1", "production"],
)
@@ -239,7 +239,7 @@ def test_agent_tags(client: LettaSDKClient):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
tags=["test", "agent2", "development"],
)
@@ -251,7 +251,7 @@ def test_agent_tags(client: LettaSDKClient):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
tags=["test", "agent3", "production"],
)
@@ -556,7 +556,7 @@ def test_agent_creation(client: LettaSDKClient):
name=f"test_agent_{str(uuid.uuid4())}",
memory_blocks=[sleeptime_persona_block, mindy_block],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
tool_ids=[tool1.id, tool2.id],
include_base_tools=False,
tags=["test"],
@@ -595,7 +595,7 @@ def test_many_blocks(client: LettaSDKClient):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
include_base_tools=False,
tags=["test"],
)
@@ -612,7 +612,7 @@ def test_many_blocks(client: LettaSDKClient):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
include_base_tools=False,
tags=["test"],
)

View File

@@ -330,7 +330,7 @@ def agent_id(server, user_id, base_tools):
tool_ids=[t.id for t in base_tools],
memory_blocks=[],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)
@@ -350,7 +350,7 @@ def other_agent_id(server, user_id, base_tools):
tool_ids=[t.id for t in base_tools],
memory_blocks=[],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)
@@ -523,7 +523,7 @@ def test_delete_agent_same_org(server: SyncServer, org_id: str, user: User):
name="nonexistent_tools_agent",
memory_blocks=[],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=user,
)
@@ -577,7 +577,7 @@ async def test_read_local_llm_configs(server: SyncServer, user: User, event_loop
request=CreateAgent(
model="caren/my-custom-model",
context_window_limit=context_window_override,
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=user,
)
@@ -914,7 +914,7 @@ async def test_memory_rebuild_count(server, user, disable_e2b_api_key, base_tool
CreateBlock(label="persona", value="My name is Alice."),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)
@@ -965,7 +965,7 @@ def test_add_nonexisting_tool(server: SyncServer, user_id: str, base_tools):
CreateBlock(label="persona", value="My name is Alice."),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
include_base_tools=True,
),
actor=actor,
@@ -982,7 +982,7 @@ def test_default_tool_rules(server: SyncServer, user_id: str, base_tools, base_m
tool_ids=[t.id for t in base_tools + base_memory_tools],
memory_blocks=[],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
include_base_tools=False,
),
actor=actor,
@@ -1005,7 +1005,7 @@ def test_add_remove_tools_update_agent(server: SyncServer, user_id: str, base_to
CreateBlock(label="persona", value="My name is Alice."),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
include_base_tools=False,
),
actor=actor,
@@ -1035,7 +1035,7 @@ def test_add_remove_tools_update_agent(server: SyncServer, user_id: str, base_to
"embedding_config": {
"embedding_endpoint_type": "openai",
"embedding_endpoint": "https://api.openai.com/v1",
"embedding_model": "text-embedding-ada-002",
"embedding_model": "text-embedding-3-small",
"embedding_dim": 1536,
"embedding_chunk_size": 300,
"azure_endpoint": None,
@@ -1086,7 +1086,7 @@ async def test_messages_with_provider_override(server: SyncServer, user_id: str,
memory_blocks=[],
model="caren-anthropic/claude-3-5-sonnet-20240620",
context_window_limit=100000,
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
),
actor=actor,
)

View File

@@ -68,7 +68,7 @@ def agent_state(client: LettaSDKClient):
),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
tool_ids=[open_file_tool.id, close_file_tool.id, search_files_tool.id, grep_tool.id],
)
yield agent_state
@@ -85,7 +85,7 @@ def test_auto_attach_detach_files_tools(client: LettaSDKClient):
CreateBlock(label="human", value="username: sarah"),
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-ada-002",
embedding="openai/text-embedding-3-small",
)
# Helper function to get file tools from agent
@@ -106,14 +106,14 @@ def test_auto_attach_detach_files_tools(client: LettaSDKClient):
assert_no_file_tools(agent)
# Create and attach first source
source_1 = client.sources.create(name="test_source", embedding="openai/text-embedding-ada-002")
source_1 = client.sources.create(name="test_source", embedding="openai/text-embedding-3-small")
assert len(client.sources.list()) == 1
agent = client.agents.sources.attach(source_id=source_1.id, agent_id=agent.id)
assert_file_tools_present(agent, set(FILES_TOOLS))
# Create and attach second source
source_2 = client.sources.create(name="another_test_source", embedding="openai/text-embedding-ada-002")
source_2 = client.sources.create(name="another_test_source", embedding="openai/text-embedding-3-small")
assert len(client.sources.list()) == 2
agent = client.agents.sources.attach(source_id=source_2.id, agent_id=agent.id)
@@ -152,7 +152,7 @@ def test_file_upload_creates_source_blocks_correctly(
expected_label_regex: str,
):
# Create a new source
source = client.sources.create(name="test_source", embedding="openai/text-embedding-ada-002")
source = client.sources.create(name="test_source", embedding="openai/text-embedding-3-small")
assert len(client.sources.list()) == 1
# Attach
@@ -196,7 +196,7 @@ def test_file_upload_creates_source_blocks_correctly(
def test_attach_existing_files_creates_source_blocks_correctly(client: LettaSDKClient, agent_state: AgentState):
# Create a new source
source = client.sources.create(name="test_source", embedding="openai/text-embedding-ada-002")
source = client.sources.create(name="test_source", embedding="openai/text-embedding-3-small")
assert len(client.sources.list()) == 1
# Load files into the source
@@ -240,7 +240,7 @@ def test_attach_existing_files_creates_source_blocks_correctly(client: LettaSDKC
def test_delete_source_removes_source_blocks_correctly(client: LettaSDKClient, agent_state: AgentState):
# Create a new source
source = client.sources.create(name="test_source", embedding="openai/text-embedding-ada-002")
source = client.sources.create(name="test_source", embedding="openai/text-embedding-3-small")
assert len(client.sources.list()) == 1
# Attach
@@ -279,7 +279,7 @@ def test_delete_source_removes_source_blocks_correctly(client: LettaSDKClient, a
def test_agent_uses_open_close_file_correctly(client: LettaSDKClient, agent_state: AgentState):
# Create a new source
source = client.sources.create(name="test_source", embedding="openai/text-embedding-ada-002")
source = client.sources.create(name="test_source", embedding="openai/text-embedding-3-small")
sources_list = client.sources.list()
assert len(sources_list) == 1
@@ -388,7 +388,7 @@ def test_agent_uses_open_close_file_correctly(client: LettaSDKClient, agent_stat
def test_agent_uses_search_files_correctly(client: LettaSDKClient, agent_state: AgentState):
# Create a new source
source = client.sources.create(name="test_source", embedding="openai/text-embedding-ada-002")
source = client.sources.create(name="test_source", embedding="openai/text-embedding-3-small")
sources_list = client.sources.list()
assert len(sources_list) == 1
@@ -440,7 +440,7 @@ def test_agent_uses_search_files_correctly(client: LettaSDKClient, agent_state:
def test_agent_uses_grep_correctly(client: LettaSDKClient, agent_state: AgentState):
# Create a new source
source = client.sources.create(name="test_source", embedding="openai/text-embedding-ada-002")
source = client.sources.create(name="test_source", embedding="openai/text-embedding-3-small")
sources_list = client.sources.list()
assert len(sources_list) == 1
@@ -490,7 +490,7 @@ def test_agent_uses_grep_correctly(client: LettaSDKClient, agent_state: AgentSta
def test_view_ranges_have_metadata(client: LettaSDKClient, agent_state: AgentState):
# Create a new source
source = client.sources.create(name="test_source", embedding="openai/text-embedding-ada-002")
source = client.sources.create(name="test_source", embedding="openai/text-embedding-3-small")
sources_list = client.sources.list()
assert len(sources_list) == 1