fix(core): handle None message_ids in context window calculator (#9330)

* fix(core): always create system message even with _init_with_no_messages When _init_with_no_messages=True (used by agent import flows), the agent was created with message_ids=None. If subsequent message initialization failed, this left orphaned agents that crash when context window is calculated (TypeError on message_ids[1:]). Now the system message is always generated and persisted, even when skipping the rest of the initial message sequence. This ensures every agent has at least message_ids=[system_message_id]. Fixes Datadog issue 773a24ea-eeb3-11f0-8f9f-da7ad0900000 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): clean up placeholder messages during import and add test Delete placeholder system messages after imported messages are successfully created (not before), so agents retain their safety-net system message if import fails. Also adds a test verifying that _init_with_no_messages=True still produces a valid context window. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): add descriptive error for empty message_ids in get_system_message 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: Letta <noreply@letta.com>
2026-02-18 18:40:26 -08:00
parent e8d5922ff9
commit e65795b5f1
3 changed files with 59 additions and 11 deletions
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -24,6 +24,8 @@ from letta.constants import (
    INCLUDE_MODEL_KEYWORDS_BASE_TOOL_RULES,
    RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE,
 )
+
+from letta.errors import LettaAgentNotFoundError, LettaError, LettaInvalidArgumentError
 from letta.helpers import ToolRulesSolver
 from letta.helpers.datetime_helpers import get_utc_time
 from letta.log import get_logger
@@ -598,21 +600,27 @@ class AgentManager:
                    result.tool_exec_environment_variables = env_vars
                    result.secrets = env_vars

-                # initial message sequence (skip if _init_with_no_messages is True)
+                # initial message sequence (skip non-system messages if _init_with_no_messages is True)
                if not _init_with_no_messages:
                    init_messages = await self._generate_initial_message_sequence_async(
                        actor,
                        agent_state=result,
                        supplied_initial_message_sequence=agent_create.initial_message_sequence,
                    )
+                else:
+                    all_messages = await initialize_message_sequence_async(
+                        agent_state=result, memory_edit_timestamp=get_utc_time(), include_initial_boot_message=True
+                    )
+                    init_messages = [
+                        PydanticMessage.dict_to_message(
+                            agent_id=result.id, model=result.llm_config.model, openai_message_dict=all_messages[0]
+                        )
+                    ]
+
                result.message_ids = [msg.id for msg in init_messages]
                new_agent.message_ids = [msg.id for msg in init_messages]
                await new_agent.update_async(session, no_refresh=True)
-                else:
-                    init_messages = []

-        # Only create messages if we initialized with messages
-        if not _init_with_no_messages:
        await self.message_manager.create_many_messages_async(
            pydantic_msgs=init_messages, actor=actor, project_id=result.project_id, template_id=result.template_id
        )
@@ -1320,6 +1328,11 @@ class AgentManager:
    @trace_method
    def get_system_message(self, agent_id: str, actor: PydanticUser) -> PydanticMessage:
        message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
+        if not message_ids:
+            raise LettaError(
+                message=f"Agent {agent_id} has no in-context messages. "
+                "This typically means the agent's system message was not initialized correctly.",
+            )
        return self.message_manager.get_message_by_id(message_id=message_ids[0], actor=actor)

    @enforce_types
@@ -1327,6 +1340,11 @@ class AgentManager:
    @trace_method
    async def get_system_message_async(self, agent_id: str, actor: PydanticUser) -> PydanticMessage:
        agent = await self.get_agent_by_id_async(agent_id=agent_id, include_relationships=[], actor=actor)
+        if not agent.message_ids:
+            raise LettaError(
+                message=f"Agent {agent_id} has no in-context messages. "
+                "This typically means the agent's system message was not initialized correctly.",
+            )
        return await self.message_manager.get_message_by_id_async(message_id=agent.message_ids[0], actor=actor)

    # TODO: This is duplicated below
--- a/letta/services/agent_serialization_manager.py
+++ b/letta/services/agent_serialization_manager.py
@@ -755,6 +755,10 @@ class AgentSerializationManager:
                agent_db_id = file_to_db_ids[agent_schema.id]
                message_file_to_db_ids = {}

+                # Save placeholder message IDs so we can clean them up after successful import
+                agent_state = await self.agent_manager.get_agent_by_id_async(agent_db_id, actor)
+                placeholder_message_ids = list(agent_state.message_ids) if agent_state.message_ids else []
+
                # Create messages for this agent
                messages = []
                for message_schema in agent_schema.messages:
@@ -780,6 +784,10 @@ class AgentSerializationManager:
                # Update agent with the correct message_ids
                await self.agent_manager.update_message_ids_async(agent_id=agent_db_id, message_ids=in_context_db_ids, actor=actor)

+                # Clean up placeholder messages now that import succeeded
+                for placeholder_id in placeholder_message_ids:
+                    await self.message_manager.delete_message_by_id_async(message_id=placeholder_id, actor=actor)
+
            # 8. Create file-agent relationships (depends on agents and files)
            for agent_schema in schema.agents:
                if agent_schema.files_agents:
--- a/tests/test_agent_serialization_v2.py
+++ b/tests/test_agent_serialization_v2.py
@@ -1538,7 +1538,29 @@ class TestAgentFileEdgeCases:
        imported_agent_id = next(db_id for file_id, db_id in result.id_mappings.items() if file_id == "agent-0")
        imported_agent = await server.agent_manager.get_agent_by_id_async(imported_agent_id, other_user)

-        assert len(imported_agent.message_ids) == 0
+        assert len(imported_agent.message_ids) == 1
+
+    async def test_init_with_no_messages_still_has_system_message(self, server, default_user):
+        """Test that _init_with_no_messages=True still creates a system message so context window doesn't crash."""
+        create_agent_request = CreateAgent(
+            name="partially_initialized_agent",
+            system="Test system prompt",
+            llm_config=LLMConfig.default_config("gpt-4o-mini"),
+            embedding_config=EmbeddingConfig.default_config(provider="openai"),
+            initial_message_sequence=[],
+        )
+
+        agent_state = await server.agent_manager.create_agent_async(
+            agent_create=create_agent_request,
+            actor=default_user,
+            _init_with_no_messages=True,
+        )
+
+        assert agent_state.message_ids is not None
+        assert len(agent_state.message_ids) == 1
+
+        context_window = await server.agent_manager.get_context_window(agent_id=agent_state.id, actor=default_user)
+        assert context_window is not None

    async def test_large_agent_file(self, server, agent_serialization_manager, default_user, other_user, weather_tool):
        """Test handling of larger agent files with many messages."""