diff --git a/letta/schemas/agent.py b/letta/schemas/agent.py
index 939df981..799ca473 100644
--- a/letta/schemas/agent.py
+++ b/letta/schemas/agent.py
@@ -347,20 +347,24 @@ def get_prompt_template_for_agent_type(agent_type: Optional[AgentType] = None):
"{% for block in file_blocks %}"
f"\n"
"<{{ block.label }}>\n"
+ "{% if block.description %}"
"\n"
"{{ block.description }}\n"
"\n"
+ "{% endif %}"
""
"{% if block.read_only %}\n- read_only=true{% endif %}\n"
"- chars_current={{ block.value|length }}\n"
"- chars_limit={{ block.limit }}\n"
"\n"
+ "{% if block.value %}"
"\n"
f"{CORE_MEMORY_LINE_NUMBER_WARNING}\n"
"{% for line in block.value.split('\\n') %}"
- "Line {{ loop.index }}: {{ line }}\n"
+ "{{ loop.index }}: {{ line }}\n"
"{% endfor %}"
"\n"
+ "{% endif %}"
"{{ block.label }}>\n"
"\n"
"{% if not loop.last %}\n{% endif %}"
@@ -399,17 +403,21 @@ def get_prompt_template_for_agent_type(agent_type: Optional[AgentType] = None):
"{% for block in file_blocks %}"
f"\n"
"<{{ block.label }}>\n"
+ "{% if block.description %}"
"\n"
"{{ block.description }}\n"
"\n"
+ "{% endif %}"
""
"{% if block.read_only %}\n- read_only=true{% endif %}\n"
"- chars_current={{ block.value|length }}\n"
"- chars_limit={{ block.limit }}\n"
"\n"
+ "{% if block.value %}"
"\n"
"{{ block.value }}\n"
"\n"
+ "{% endif %}"
"{{ block.label }}>\n"
"\n"
"{% if not loop.last %}\n{% endif %}"
diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py
index 68bbc8ef..15496985 100644
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -323,17 +323,7 @@ async def attach_source(
agent_state = await server.agent_manager.attach_missing_files_tools_async(agent_state=agent_state, actor=actor)
files = await server.file_manager.list_files(source_id, actor, include_content=True)
- texts = []
- file_ids = []
- file_names = []
- for f in files:
- texts.append(f.content if f.content else "")
- file_ids.append(f.id)
- file_names.append(f.file_name)
-
- await server.insert_files_into_context_window(
- agent_state=agent_state, texts=texts, file_ids=file_ids, file_names=file_names, actor=actor
- )
+ await server.insert_files_into_context_window(agent_state=agent_state, file_metadata_with_content=files, actor=actor)
if agent_state.enable_sleeptime:
source = await server.source_manager.get_source_by_id(source_id=source_id)
diff --git a/letta/server/server.py b/letta/server/server.py
index ba3665f4..fb11fc2d 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -43,6 +43,7 @@ from letta.schemas.embedding_config import EmbeddingConfig
# openai schemas
from letta.schemas.enums import JobStatus, MessageStreamStatus, ProviderCategory, ProviderType
from letta.schemas.environment_variables import SandboxEnvironmentVariableCreate
+from letta.schemas.file import FileMetadata
from letta.schemas.group import GroupCreate, ManagerType, SleeptimeManager, VoiceSleeptimeManager
from letta.schemas.job import Job, JobUpdate
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage, MessageType, ToolReturnMessage
@@ -82,6 +83,7 @@ from letta.server.rest_api.utils import sse_async_generator
from letta.services.agent_manager import AgentManager
from letta.services.block_manager import BlockManager
from letta.services.file_manager import FileManager
+from letta.services.file_processor.chunker.line_chunker import LineChunker
from letta.services.files_agents_manager import FileAgentManager
from letta.services.group_manager import GroupManager
from letta.services.helpers.tool_execution_helper import prepare_local_sandbox
@@ -827,8 +829,6 @@ class SyncServer(Server):
self,
request: CreateAgent,
actor: User,
- # interface
- interface: Union[AgentInterface, None] = None,
) -> AgentState:
if request.llm_config is None:
if request.model is None:
@@ -868,6 +868,16 @@ class SyncServer(Server):
)
log_event(name="end create_agent db")
+ log_event(name="start insert_files_into_context_window db")
+ if request.source_ids:
+ for source_id in request.source_ids:
+ files = await self.file_manager.list_files(source_id, actor, include_content=True)
+ await self.insert_files_into_context_window(agent_state=main_agent, file_metadata_with_content=files, actor=actor)
+
+ main_agent = await self.agent_manager.refresh_file_blocks(agent_state=main_agent, actor=actor)
+ main_agent = await self.agent_manager.attach_missing_files_tools_async(agent_state=main_agent, actor=actor)
+ log_event(name="end insert_files_into_context_window db")
+
if request.enable_sleeptime:
if request.agent_type == AgentType.voice_convo_agent:
main_agent = await self.create_voice_sleeptime_agent_async(main_agent=main_agent, actor=actor)
@@ -1371,15 +1381,23 @@ class SyncServer(Server):
)
await self.agent_manager.delete_agent_async(agent_id=sleeptime_agent_state.id, actor=actor)
- async def _upsert_file_to_agent(self, agent_id: str, text: str, file_id: str, file_name: str, actor: User) -> List[str]:
+ async def _upsert_file_to_agent(self, agent_id: str, file_metadata_with_content: FileMetadata, actor: User) -> List[str]:
"""
Internal method to create or update a file <-> agent association
Returns:
List of file names that were closed due to LRU eviction
"""
+ # TODO: Maybe have LineChunker object be on the server level?
+ content_lines = LineChunker().chunk_text(file_metadata=file_metadata_with_content)
+ visible_content = "\n".join(content_lines)
+
file_agent, closed_files = await self.file_agent_manager.attach_file(
- agent_id=agent_id, file_id=file_id, file_name=file_name, actor=actor, visible_content=text
+ agent_id=agent_id,
+ file_id=file_metadata_with_content.id,
+ file_name=file_metadata_with_content.file_name,
+ actor=actor,
+ visible_content=visible_content,
)
return closed_files
@@ -1397,7 +1415,7 @@ class SyncServer(Server):
logger.info(f"File {file_id} already removed from agent {agent_id}, skipping...")
async def insert_file_into_context_windows(
- self, source_id: str, text: str, file_id: str, file_name: str, actor: User, agent_states: Optional[List[AgentState]] = None
+ self, source_id: str, file_metadata_with_content: FileMetadata, actor: User, agent_states: Optional[List[AgentState]] = None
) -> List[AgentState]:
"""
Insert the uploaded document into the context window of all agents
@@ -1414,7 +1432,7 @@ class SyncServer(Server):
# Collect any files that were closed due to LRU eviction during bulk attach
all_closed_files = await asyncio.gather(
- *(self._upsert_file_to_agent(agent_state.id, text, file_id, file_name, actor) for agent_state in agent_states)
+ *(self._upsert_file_to_agent(agent_state.id, file_metadata_with_content, actor) for agent_state in agent_states)
)
# Flatten and log if any files were closed
closed_files = [file for closed_list in all_closed_files for file in closed_list]
@@ -1424,7 +1442,7 @@ class SyncServer(Server):
return agent_states
async def insert_files_into_context_window(
- self, agent_state: AgentState, texts: List[str], file_ids: List[str], file_names: List[str], actor: User
+ self, agent_state: AgentState, file_metadata_with_content: List[FileMetadata], actor: User
) -> None:
"""
Insert the uploaded documents into the context window of an agent
@@ -1432,15 +1450,9 @@ class SyncServer(Server):
"""
logger.info(f"Inserting documents into context window for agent_state: {agent_state.id}")
- if len(texts) != len(file_ids):
- raise ValueError(f"Mismatch between number of texts ({len(texts)}) and file ids ({len(file_ids)})")
-
# Collect any files that were closed due to LRU eviction during bulk insert
all_closed_files = await asyncio.gather(
- *(
- self._upsert_file_to_agent(agent_state.id, text, file_id, file_name, actor)
- for text, file_id, file_name in zip(texts, file_ids, file_names)
- )
+ *(self._upsert_file_to_agent(agent_state.id, file_metadata, actor) for file_metadata in file_metadata_with_content)
)
# Flatten and log if any files were closed
closed_files = [file for closed_list in all_closed_files for file in closed_list]
diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py
index d8a6c1e5..0542f3c1 100644
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -1704,6 +1704,13 @@ class AgentManager:
return agent_state
+ @trace_method
+ @enforce_types
+ async def refresh_file_blocks(self, agent_state: PydanticAgentState, actor: PydanticUser) -> PydanticAgentState:
+ file_blocks = await self.file_agent_manager.list_files_for_agent(agent_id=agent_state.id, actor=actor, return_as_blocks=True)
+ agent_state.memory.file_blocks = [b for b in file_blocks if b is not None]
+ return agent_state
+
# ======================================================================================================================
# Source Management
# ======================================================================================================================
diff --git a/letta/services/file_processor/chunker/line_chunker.py b/letta/services/file_processor/chunker/line_chunker.py
index 9bf65bea..4a13f444 100644
--- a/letta/services/file_processor/chunker/line_chunker.py
+++ b/letta/services/file_processor/chunker/line_chunker.py
@@ -99,10 +99,11 @@ class LineChunker:
return [line for line in lines if line.strip()]
def chunk_text(
- self, text: str, file_metadata: FileMetadata, start: Optional[int] = None, end: Optional[int] = None, add_metadata: bool = True
+ self, file_metadata: FileMetadata, start: Optional[int] = None, end: Optional[int] = None, add_metadata: bool = True
) -> List[str]:
"""Content-aware text chunking based on file type"""
strategy = self._determine_chunking_strategy(file_metadata)
+ text = file_metadata.content
# Apply the appropriate chunking strategy
if strategy == ChunkingStrategy.DOCUMENTATION:
diff --git a/letta/services/file_processor/file_processor.py b/letta/services/file_processor/file_processor.py
index 36cb8b7c..e88083e7 100644
--- a/letta/services/file_processor/file_processor.py
+++ b/letta/services/file_processor/file_processor.py
@@ -75,21 +75,14 @@ class FileProcessor:
# update file with raw text
raw_markdown_text = "".join([page.markdown for page in ocr_response.pages])
- file_metadata = await self.file_manager.upsert_file_content(file_id=file_metadata.id, text=raw_markdown_text, actor=self.actor)
file_metadata = await self.file_manager.update_file_status(
file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.EMBEDDING
)
-
- # Insert to agent context window
- # TODO: Rethink this line chunking mechanism
- content_lines = self.line_chunker.chunk_text(text=raw_markdown_text, file_metadata=file_metadata)
- visible_content = "\n".join(content_lines)
+ file_metadata = await self.file_manager.upsert_file_content(file_id=file_metadata.id, text=raw_markdown_text, actor=self.actor)
await server.insert_file_into_context_windows(
source_id=source_id,
- text=visible_content,
- file_id=file_metadata.id,
- file_name=file_metadata.file_name,
+ file_metadata_with_content=file_metadata,
actor=self.actor,
agent_states=agent_states,
)
diff --git a/letta/services/files_agents_manager.py b/letta/services/files_agents_manager.py
index 5ae19a1b..7b5905cd 100644
--- a/letta/services/files_agents_manager.py
+++ b/letta/services/files_agents_manager.py
@@ -208,10 +208,7 @@ class FileAgentManager:
@enforce_types
@trace_method
async def list_files_for_agent(
- self,
- agent_id: str,
- actor: PydanticUser,
- is_open_only: bool = False,
+ self, agent_id: str, actor: PydanticUser, is_open_only: bool = False, return_as_blocks: bool = False
) -> List[PydanticFileAgent]:
"""Return associations for *agent_id* (filtering by `is_open` if asked)."""
async with db_registry.async_session() as session:
@@ -223,7 +220,11 @@ class FileAgentManager:
conditions.append(FileAgentModel.is_open.is_(True))
rows = (await session.execute(select(FileAgentModel).where(and_(*conditions)))).scalars().all()
- return [r.to_pydantic() for r in rows]
+
+ if return_as_blocks:
+ return [r.to_pydantic_block() for r in rows]
+ else:
+ return [r.to_pydantic() for r in rows]
@enforce_types
@trace_method
diff --git a/letta/services/tool_executor/files_tool_executor.py b/letta/services/tool_executor/files_tool_executor.py
index add7780e..95044e43 100644
--- a/letta/services/tool_executor/files_tool_executor.py
+++ b/letta/services/tool_executor/files_tool_executor.py
@@ -124,7 +124,7 @@ class LettaFileToolExecutor(ToolExecutor):
# TODO: Inefficient, maybe we can pre-compute this
# TODO: This is also not the best way to split things - would be cool to have "content aware" splitting
# TODO: Split code differently from large text blurbs
- content_lines = LineChunker().chunk_text(text=file.content, file_metadata=file, start=start, end=end)
+ content_lines = LineChunker().chunk_text(file_metadata=file, start=start, end=end)
visible_content = "\n".join(content_lines)
# Efficiently handle LRU eviction and file opening in a single transaction
@@ -304,7 +304,7 @@ class LettaFileToolExecutor(ToolExecutor):
# Use LineChunker to get all lines with proper formatting
chunker = LineChunker()
- formatted_lines = chunker.chunk_text(file.content, file_metadata=file)
+ formatted_lines = chunker.chunk_text(file_metadata=file)
# Remove metadata header
if formatted_lines and formatted_lines[0].startswith("[Viewing"):
diff --git a/tests/data/long_test.txt b/tests/data/long_test.txt
index 618bcc4c..f4fb0ccd 100644
--- a/tests/data/long_test.txt
+++ b/tests/data/long_test.txt
@@ -1,4 +1,4 @@
-testEnrico Letta (Italian: [enˈriːko ˈlɛtta]; born 20 August 1966) is an Italian politician who served as Prime Minister of Italy from April 2013 to February 2014, leading a grand coalition of centre-left and centre-right parties.[1] He was the leader of the Democratic Party (PD) from March 2021 to March 2023.[2]
+Enrico Letta (Italian: [enˈriːko ˈlɛtta]; born 20 August 1966) is an Italian politician who served as Prime Minister of Italy from April 2013 to February 2014, leading a grand coalition of centre-left and centre-right parties.[1] He was the leader of the Democratic Party (PD) from March 2021 to March 2023.[2]
After working as an academic, Letta entered politics in 1998 when he was appointed to the Cabinet as Minister for the Community Policies, a role he held until 1999 when he was promoted to become Minister of Industry, Commerce, and Crafts. In 2001, he left the Cabinet upon his election to the Chamber of Deputies. From 2006 to 2008, he was appointed Secretary of the Council of Ministers.[3] In 2007, Letta was one of the senior founding members of the Democratic Party, and in 2009 was elected as its Deputy Secretary.[4]
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 96010e9a..50d03e5f 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -170,7 +170,10 @@ def test_together():
)
models = provider.list_llm_models()
assert len(models) > 0
- assert models[0].handle == f"{provider.name}/{models[0].model}"
+ # Handle may be different from raw model name due to LLM_HANDLE_OVERRIDES
+ assert models[0].handle.startswith(f"{provider.name}/")
+ # Verify the handle is properly constructed via get_handle method
+ assert models[0].handle == provider.get_handle(models[0].model)
# TODO: We don't have embedding models on together for CI
# embedding_models = provider.list_embedding_models()
@@ -187,7 +190,10 @@ async def test_together_async():
)
models = await provider.list_llm_models_async()
assert len(models) > 0
- assert models[0].handle == f"{provider.name}/{models[0].model}"
+ # Handle may be different from raw model name due to LLM_HANDLE_OVERRIDES
+ assert models[0].handle.startswith(f"{provider.name}/")
+ # Verify the handle is properly constructed via get_handle method
+ assert models[0].handle == provider.get_handle(models[0].model)
# TODO: We don't have embedding models on together for CI
# embedding_models = provider.list_embedding_models()
diff --git a/tests/test_sources.py b/tests/test_sources.py
index 42a289de..d1c23e0e 100644
--- a/tests/test_sources.py
+++ b/tests/test_sources.py
@@ -182,6 +182,7 @@ def test_file_upload_creates_source_blocks_correctly(
blocks = agent_state.memory.file_blocks
assert len(blocks) == 1
assert any(expected_value in b.value for b in blocks)
+ assert any(b.value.startswith("[Viewing file start") for b in blocks)
assert any(re.fullmatch(expected_label_regex, b.label) for b in blocks)
# Remove file from source
@@ -226,6 +227,7 @@ def test_attach_existing_files_creates_source_blocks_correctly(client: LettaSDKC
blocks = agent_state.memory.file_blocks
assert len(blocks) == 1
assert any("test" in b.value for b in blocks)
+ assert any(b.value.startswith("[Viewing file start") for b in blocks)
assert any(re.fullmatch(r"test_[a-z0-9]+\.txt", b.label) for b in blocks)
# Detach the source
@@ -547,6 +549,60 @@ def test_agent_uses_grep_correctly_advanced(client: LettaSDKClient, agent_state:
assert "513:" in tool_return_message.tool_return
+def test_create_agent_with_source_ids_creates_source_blocks_correctly(client: LettaSDKClient):
+ """Test that creating an agent with source_ids parameter correctly creates source blocks."""
+ # Create a new source
+ source = client.sources.create(name="test_source", embedding="openai/text-embedding-3-small")
+ assert len(client.sources.list()) == 1
+
+ # Upload a file to the source before attaching
+ file_path = "tests/data/long_test.txt"
+ with open(file_path, "rb") as f:
+ job = client.sources.files.upload(source_id=source.id, file=f)
+
+ # Wait for the job to complete
+ while job.status != "completed" and job.status != "failed":
+ time.sleep(1)
+ job = client.jobs.retrieve(job_id=job.id)
+ print("Waiting for file upload job to complete...", job.status)
+
+ if job.status == "failed":
+ pytest.fail("File upload job failed. Check error logs.")
+
+ # Get uploaded files to verify
+ files = client.sources.files.list(source_id=source.id, limit=1)
+ assert len(files) == 1
+ assert files[0].source_id == source.id
+
+ # Create agent with source_ids parameter
+ temp_agent_state = client.agents.create(
+ name="test_agent_with_sources",
+ memory_blocks=[
+ CreateBlock(
+ label="human",
+ value="username: sarah",
+ ),
+ ],
+ model="openai/gpt-4o-mini",
+ embedding="openai/text-embedding-3-small",
+ source_ids=[source.id], # Attach source during creation
+ )
+
+ # Verify agent was created successfully
+ assert temp_agent_state is not None
+ assert temp_agent_state.name == "test_agent_with_sources"
+
+ # Check that source blocks were created correctly
+ blocks = temp_agent_state.memory.file_blocks
+ assert len(blocks) == 1
+ assert any(b.value.startswith("[Viewing file start (out of 554 chunks)]") for b in blocks)
+ assert any(re.fullmatch(r"long_test_[a-z0-9]+\.txt", b.label) for b in blocks)
+
+ # Verify file tools were automatically attached
+ file_tools = {tool.name for tool in temp_agent_state.tools if tool.tool_type == ToolType.LETTA_FILES_CORE}
+ assert file_tools == set(FILES_TOOLS)
+
+
def test_view_ranges_have_metadata(client: LettaSDKClient, agent_state: AgentState):
# Create a new source
source = client.sources.create(name="test_source", embedding="openai/text-embedding-3-small")
diff --git a/tests/test_tool_rule_solver.py b/tests/test_tool_rule_solver.py
index bdff9e2e..d319b575 100644
--- a/tests/test_tool_rule_solver.py
+++ b/tests/test_tool_rule_solver.py
@@ -250,17 +250,6 @@ def test_required_before_exit_tool_rule_mixed_with_other_tools():
assert solver.get_uncalled_required_tools() == [], "Should return empty list after required tool is called"
-def test_required_before_exit_tool_rule_is_terminal():
- """Test that required-before-exit tools are considered terminal tools."""
- required_rule = RequiredBeforeExitToolRule(tool_name=SAVE_TOOL)
- terminal_rule = TerminalToolRule(tool_name=END_TOOL)
- solver = ToolRulesSolver(tool_rules=[required_rule, terminal_rule])
-
- assert solver.is_terminal_tool(SAVE_TOOL) is True, "Required-before-exit tool should be considered terminal"
- assert solver.is_terminal_tool(END_TOOL) is True, "Regular terminal tool should still be considered terminal"
- assert solver.is_terminal_tool(START_TOOL) is False, "Non-terminal tool should not be considered terminal"
-
-
def test_required_before_exit_tool_rule_clear_history():
"""Test that clearing history resets the required tools state."""
required_rule = RequiredBeforeExitToolRule(tool_name=SAVE_TOOL)