feat: add new letta error message stream response type (#6192)

This commit is contained in:
cthomas
2025-11-19 16:11:22 -08:00
committed by Caren Thomas
parent 1d71468ab2
commit 1be2f61f05
11 changed files with 204 additions and 69 deletions

View File

@@ -615,6 +615,7 @@ paths:
- $ref: '#/components/schemas/SummaryMessage'
- $ref: '#/components/schemas/EventMessage'
- $ref: '#/components/schemas/LettaPing'
- $ref: '#/components/schemas/LettaErrorMessage'
- $ref: '#/components/schemas/LettaStopReason'
- $ref: '#/components/schemas/LettaUsageStatistics'
/v1/agents/{agent_id}/messages/cancel:

View File

@@ -14373,6 +14373,9 @@
{
"$ref": "#/components/schemas/LettaPing"
},
{
"$ref": "#/components/schemas/LettaErrorMessage"
},
{
"$ref": "#/components/schemas/LettaStopReason"
},
@@ -29007,6 +29010,41 @@
"required": ["agent_id"],
"title": "LettaBatchRequest"
},
"LettaErrorMessage": {
"properties": {
"message_type": {
"type": "string",
"const": "error_message",
"title": "Message Type",
"description": "The type of the message.",
"default": "error_message"
},
"run_id": {
"type": "string",
"title": "Run ID",
"description": "The ID of the run."
},
"error_type": {
"type": "string",
"title": "Error Type",
"description": "The type of error."
},
"message": {
"type": "string",
"title": "Message",
"description": "The error message."
},
"detail": {
"type": "string",
"title": "Detail",
"description": "An optional error detail."
}
},
"type": "object",
"required": ["message_type", "run_id", "error_type", "message"],
"title": "LettaErrorMessage",
"description": "Error messages are used to notify the client of an error that occurred during the agent's execution."
},
"LettaImage": {
"properties": {
"type": {
@@ -29480,6 +29518,9 @@
{
"$ref": "#/components/schemas/LettaPing"
},
{
"$ref": "#/components/schemas/LettaErrorMessage"
},
{
"$ref": "#/components/schemas/LettaStopReason"
},
@@ -29495,6 +29536,7 @@
"approval_request_message": "#/components/schemas/ApprovalRequestMessage",
"approval_response_message": "#/components/schemas/ApprovalResponseMessage",
"assistant_message": "#/components/schemas/AssistantMessage",
"error_message": "#/components/schemas/LettaErrorMessage",
"hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage",
"ping": "#/components/schemas/LettaPing",
"reasoning_message": "#/components/schemas/ReasoningMessage",

View File

@@ -28,7 +28,7 @@ from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.enums import JobStatus
from letta.schemas.file import FileMetadata
from letta.schemas.job import Job
from letta.schemas.letta_message import LettaMessage, LettaPing
from letta.schemas.letta_message import LettaErrorMessage, LettaMessage, LettaPing
from letta.schemas.letta_stop_reason import LettaStopReason
from letta.schemas.llm_config import LLMConfig
from letta.schemas.memory import ArchivalMemorySummary, BasicBlockMemory, ChatMemory, Memory, RecallMemorySummary

View File

@@ -28,7 +28,7 @@ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState
from letta.schemas.enums import MessageRole
from letta.schemas.letta_message import ApprovalReturn, LettaMessage, MessageType
from letta.schemas.letta_message import ApprovalReturn, LettaErrorMessage, LettaMessage, MessageType
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
from letta.schemas.letta_response import LettaResponse
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
@@ -319,14 +319,13 @@ class LettaAgentV3(LettaAgentV2):
yield f"data: {self.stop_reason.model_dump_json()}\n\n"
# Mid-stream error: yield error event to client in SSE format
error_chunk = {
"error": {
"type": "internal_error",
"message": "An error occurred during agent execution.",
"detail": str(e),
}
}
yield f"event: error\ndata: {json.dumps(error_chunk)}\n\n"
error_message = LettaErrorMessage(
run_id=run_id,
error_type="internal_error",
message="An error occurred during agent execution.",
detail=str(e),
)
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
# Return immediately - don't fall through to finish chunks
# This prevents sending end_turn finish chunks after an error
@@ -360,15 +359,16 @@ class LettaAgentV3(LettaAgentV2):
if self.stop_reason is None:
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
yield f"data: {self.stop_reason.model_dump_json()}\n\n"
# Send error event
error_chunk = {
"error": {
"type": "cleanup_error",
"message": "An error occurred during stream finalization.",
"detail": str(cleanup_error),
}
}
yield f"event: error\ndata: {json.dumps(error_chunk)}\n\n"
error_message = LettaErrorMessage(
run_id=run_id,
error_type="cleanup_error",
message="An error occurred during stream finalization.",
detail=str(cleanup_error),
)
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
# Note: we don't send finish chunks here since we already errored
@trace_method

View File

@@ -368,6 +368,24 @@ class LettaPing(LettaMessage):
)
class LettaErrorMessage(BaseModel):
"""
Message returning any error that occurred during the agent's execution, mid SSE stream.
Args:
run_id (str): The ID of the run
error_type (str): The type of error
message (str): The error message
detail (Optional[str]): An optional error detail
"""
message_type: Literal["error_message"] = "error_message"
run_id: str
error_type: str
message: str
detail: Optional[str] = None
class SummaryMessage(LettaMessage):
"""
A message representing a summary of the conversation. Sent to the LLM as a user or system message depending on the provider.
@@ -458,6 +476,44 @@ def create_letta_ping_schema():
}
def create_letta_error_message_schema():
return {
"properties": {
"message_type": {
"type": "string",
"const": "error_message",
"title": "Message Type",
"description": "The type of the message.",
"default": "error_message",
},
"run_id": {
"type": "string",
"title": "Run ID",
"description": "The ID of the run.",
},
"error_type": {
"type": "string",
"title": "Error Type",
"description": "The type of error.",
},
"message": {
"type": "string",
"title": "Message",
"description": "The error message.",
},
"detail": {
"type": "string",
"title": "Detail",
"description": "An optional error detail.",
},
},
"type": "object",
"required": ["message_type", "run_id", "error_type", "message"],
"title": "LettaErrorMessage",
"description": "Error messages are used to notify the client of an error that occurred during the agent's execution.",
}
# --------------------------
# Message Update API Schemas
# --------------------------

View File

@@ -13,6 +13,7 @@ from letta.schemas.letta_message import (
ApprovalResponseMessage,
AssistantMessage,
HiddenReasoningMessage,
LettaErrorMessage,
LettaMessage,
LettaMessageUnion,
LettaPing,
@@ -201,6 +202,7 @@ class LettaStreamingResponse(RootModel):
ApprovalRequestMessage,
ApprovalResponseMessage,
LettaPing,
LettaErrorMessage,
LettaStopReason,
LettaUsageStatistics,
] = Field(..., discriminator="message_type")

View File

@@ -63,7 +63,7 @@ from letta.jobs.scheduler import start_scheduler_with_leader_election
from letta.log import get_logger
from letta.orm.errors import DatabaseTimeoutError, ForeignKeyConstraintViolationError, NoResultFound, UniqueConstraintViolationError
from letta.otel.tracing import get_trace_id
from letta.schemas.letta_message import create_letta_message_union_schema, create_letta_ping_schema
from letta.schemas.letta_message import create_letta_error_message_schema, create_letta_message_union_schema, create_letta_ping_schema
from letta.schemas.letta_message_content import (
create_letta_assistant_message_content_union_schema,
create_letta_message_content_union_schema,
@@ -110,6 +110,7 @@ def generate_openapi_schema(app: FastAPI):
letta_docs["components"]["schemas"]["LettaAssistantMessageContentUnion"] = create_letta_assistant_message_content_union_schema()
letta_docs["components"]["schemas"]["LettaUserMessageContentUnion"] = create_letta_user_message_content_union_schema()
letta_docs["components"]["schemas"]["LettaPing"] = create_letta_ping_schema()
letta_docs["components"]["schemas"]["LettaErrorMessage"] = create_letta_error_message_schema()
# Update the app's schema with our modified version
app.openapi_schema = letta_docs

View File

@@ -9,7 +9,8 @@ from typing import AsyncIterator, Dict, List, Optional
from letta.data_sources.redis_client import AsyncRedisClient
from letta.log import get_logger
from letta.schemas.enums import RunStatus
from letta.schemas.letta_stop_reason import StopReasonType
from letta.schemas.letta_message import LettaErrorMessage
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
from letta.schemas.run import RunUpdate
from letta.schemas.user import User
from letta.server.rest_api.streaming_response import RunCancelledException
@@ -266,8 +267,14 @@ async def create_background_stream_processor(
saw_done = True
else:
# No stop_reason and no terminal - this is an error condition
error_chunk = {"error": "Stream ended unexpectedly without stop_reason", "code": "STREAM_INCOMPLETE"}
await writer.write_chunk(run_id=run_id, data=f"event: error\ndata: {json.dumps(error_chunk)}\n\n", is_complete=False)
error_message = LettaErrorMessage(
run_id=run_id,
error_type="stream_incomplete",
message="Stream ended unexpectedly without stop_reason.",
detail=None,
)
yield f"data: {LettaStopReason(stop_reason=StopReasonType.error).model_dump_json()}\n\n"
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
await writer.write_chunk(run_id=run_id, data="data: [DONE]\n\n", is_complete=True)
saw_error = True
saw_done = True
@@ -284,8 +291,17 @@ async def create_background_stream_processor(
except Exception as e:
logger.error(f"Error processing stream for run {run_id}: {e}")
# Write error chunk
error_chunk = {"error": str(e), "code": "INTERNAL_SERVER_ERROR"}
await writer.write_chunk(run_id=run_id, data=f"event: error\ndata: {json.dumps(error_chunk)}\n\n", is_complete=False)
stop_reason = StopReasonType.error.value
error_message = LettaErrorMessage(
run_id=run_id,
error_type="internal_error",
message="An unknown error occurred with the LLM streaming request.",
detail=str(e),
)
await writer.write_chunk(
run_id=run_id, data=f"data: {LettaStopReason(stop_reason=stop_reason).model_dump_json()}\n\n", is_complete=False
)
await writer.write_chunk(run_id=run_id, data=f"event: error\ndata: {error_message.model_dump_json()}\n\n", is_complete=False)
await writer.write_chunk(run_id=run_id, data="data: [DONE]\n\n", is_complete=True)
saw_error = True
saw_done = True

View File

@@ -282,6 +282,7 @@ async def delete_run(
{"$ref": "#/components/schemas/ApprovalRequestMessage"},
{"$ref": "#/components/schemas/ApprovalResponseMessage"},
{"$ref": "#/components/schemas/LettaPing"},
{"$ref": "#/components/schemas/LettaErrorMessage"},
{"$ref": "#/components/schemas/LettaStopReason"},
{"$ref": "#/components/schemas/LettaUsageStatistics"},
]

View File

@@ -27,7 +27,7 @@ from letta.otel.metric_registry import MetricRegistry
from letta.schemas.agent import AgentState
from letta.schemas.enums import AgentType, MessageStreamStatus, RunStatus
from letta.schemas.job import LettaRequestConfig
from letta.schemas.letta_message import AssistantMessage, MessageType
from letta.schemas.letta_message import AssistantMessage, LettaErrorMessage, MessageType
from letta.schemas.letta_message_content import TextContent
from letta.schemas.letta_request import LettaStreamingRequest
from letta.schemas.letta_response import LettaResponse
@@ -331,19 +331,20 @@ class StreamingService:
f"Stream for run {run_id} ended without terminal event. "
f"Agent stop_reason: {agent_loop.stop_reason}. Emitting error + [DONE]."
)
error_chunk = {
"error": {
"type": "stream_incomplete",
"message": "Stream ended unexpectedly without a terminal event.",
"detail": None,
}
}
yield f"event: error\ndata: {json.dumps(error_chunk)}\n\n"
stop_reason = LettaStopReason(stop_reason=StopReasonType.error)
error_message = LettaErrorMessage(
run_id=run_id,
error_type="stream_incomplete",
message="Stream ended unexpectedly without a terminal event.",
detail=None,
)
yield f"data: {stop_reason.model_dump_json()}\n\n"
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
yield "data: [DONE]\n\n"
saw_error = True
saw_done = True
run_status = RunStatus.failed
stop_reason = StopReasonType.error
else:
# set run status after successful completion
if agent_loop.stop_reason and agent_loop.stop_reason.stop_reason.value == "cancelled":
@@ -354,60 +355,72 @@ class StreamingService:
except LLMTimeoutError as e:
run_status = RunStatus.failed
error_data = {"error": {"type": "llm_timeout", "message": "The LLM request timed out. Please try again.", "detail": str(e)}}
stop_reason = StopReasonType.llm_api_error
logger.error(f"Run {run_id} stopped with LLM timeout error: {e}, error_data: {error_data}")
yield f"event: error\ndata: {json.dumps(error_data)}\n\n"
stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error)
error_message = LettaErrorMessage(
run_id=run_id,
error_type="llm_timeout",
message="The LLM request timed out. Please try again.",
detail=str(e),
)
logger.error(f"Run {run_id} stopped with LLM timeout error: {e}, error_data: {error_message.model_dump()}")
yield f"data: {stop_reason.model_dump_json()}\n\n"
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
# Send [DONE] marker to properly close the stream
yield "data: [DONE]\n\n"
except LLMRateLimitError as e:
run_status = RunStatus.failed
error_data = {
"error": {
"type": "llm_rate_limit",
"message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
"detail": str(e),
}
}
stop_reason = StopReasonType.llm_api_error
logger.warning(f"Run {run_id} stopped with LLM rate limit error: {e}, error_data: {error_data}")
yield f"event: error\ndata: {json.dumps(error_data)}\n\n"
stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error)
error_message = LettaErrorMessage(
run_id=run_id,
error_type="llm_rate_limit",
message="Rate limit exceeded for LLM model provider. Please wait before making another request.",
detail=str(e),
)
logger.warning(f"Run {run_id} stopped with LLM rate limit error: {e}, error_data: {error_message.model_dump()}")
yield f"data: {stop_reason.model_dump_json()}\n\n"
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
# Send [DONE] marker to properly close the stream
yield "data: [DONE]\n\n"
except LLMAuthenticationError as e:
run_status = RunStatus.failed
error_data = {
"error": {
"type": "llm_authentication",
"message": "Authentication failed with the LLM model provider.",
"detail": str(e),
}
}
logger.warning(f"Run {run_id} stopped with LLM authentication error: {e}, error_data: {error_data}")
stop_reason = StopReasonType.llm_api_error
yield f"event: error\ndata: {json.dumps(error_data)}\n\n"
stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error)
error_message = LettaErrorMessage(
run_id=run_id,
error_type="llm_authentication",
message="Authentication failed with the LLM model provider.",
detail=str(e),
)
yield f"data: {stop_reason.model_dump_json()}\n\n"
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
# Send [DONE] marker to properly close the stream
yield "data: [DONE]\n\n"
except LLMError as e:
run_status = RunStatus.failed
error_data = {"error": {"type": "llm_error", "message": "An error occurred with the LLM request.", "detail": str(e)}}
logger.error(f"Run {run_id} stopped with LLM error: {e}, error_data: {error_data}")
stop_reason = StopReasonType.llm_api_error
yield f"event: error\ndata: {json.dumps(error_data)}\n\n"
stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error)
error_message = LettaErrorMessage(
run_id=run_id,
error_type="llm_error",
message="An error occurred with the LLM request.",
detail=str(e),
)
yield f"data: {stop_reason.model_dump_json()}\n\n"
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
# Send [DONE] marker to properly close the stream
yield "data: [DONE]\n\n"
except Exception as e:
run_status = RunStatus.failed
error_data = {
"error": {
"type": "internal_error",
"message": "An unknown error occurred with the LLM streaming request.",
"detail": str(e),
}
}
logger.error(f"Run {run_id} stopped with unknown error: {e}, error_data: {error_data}")
stop_reason = StopReasonType.error
yield f"event: error\ndata: {json.dumps(error_data)}\n\n"
stop_reason = LettaStopReason(stop_reason=StopReasonType.error)
error_message = LettaErrorMessage(
run_id=run_id,
error_type="internal_error",
message="An unknown error occurred with the LLM streaming request.",
detail=str(e),
)
yield f"data: {stop_reason.model_dump_json()}\n\n"
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
# Send [DONE] marker to properly close the stream
yield "data: [DONE]\n\n"
# Capture for Sentry but don't re-raise to allow stream to complete gracefully

View File

@@ -608,6 +608,9 @@ def test_approve_and_follow_up_with_error(
messages = accumulate_chunks(response)
assert messages is not None
print("\n\nmessages:\n\n")
for m in messages:
print(m)
stop_reason_message = [m for m in messages if m.message_type == "stop_reason"][0]
assert stop_reason_message
assert stop_reason_message.stop_reason == "invalid_llm_response"