chore: continued logging cleanup and bugfixes (#5882)
* gracefully handle mcp error, runs/temporal error, and otel exporter bug fixes * move error handling to managers * remove migrated error handling from routers * move logger.error calls to logger.warnings
This commit is contained in:
@@ -23,7 +23,6 @@ from letta.server.rest_api.streaming_response import (
|
|||||||
cancellation_aware_stream_wrapper,
|
cancellation_aware_stream_wrapper,
|
||||||
)
|
)
|
||||||
from letta.server.server import SyncServer
|
from letta.server.server import SyncServer
|
||||||
from letta.services.lettuce import LettuceClient
|
|
||||||
from letta.services.run_manager import RunManager
|
from letta.services.run_manager import RunManager
|
||||||
from letta.settings import settings
|
from letta.settings import settings
|
||||||
|
|
||||||
@@ -150,26 +149,7 @@ async def retrieve_run(
|
|||||||
"""
|
"""
|
||||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||||
runs_manager = RunManager()
|
runs_manager = RunManager()
|
||||||
|
return await runs_manager.get_run_with_status(run_id=run_id, actor=actor)
|
||||||
run = await runs_manager.get_run_by_id(run_id=run_id, actor=actor)
|
|
||||||
|
|
||||||
use_lettuce = run.metadata and run.metadata.get("lettuce")
|
|
||||||
if use_lettuce and run.status not in [RunStatus.completed, RunStatus.failed, RunStatus.cancelled]:
|
|
||||||
lettuce_client = await LettuceClient.create()
|
|
||||||
status = await lettuce_client.get_status(run_id=run_id)
|
|
||||||
|
|
||||||
# Map the status to our enum
|
|
||||||
run_status = run.status
|
|
||||||
if status == "RUNNING":
|
|
||||||
run_status = RunStatus.running
|
|
||||||
elif status == "COMPLETED":
|
|
||||||
run_status = RunStatus.completed
|
|
||||||
elif status == "FAILED":
|
|
||||||
run_status = RunStatus.failed
|
|
||||||
elif status == "CANCELLED":
|
|
||||||
run_status = RunStatus.cancelled
|
|
||||||
run.status = run_status
|
|
||||||
return run
|
|
||||||
|
|
||||||
|
|
||||||
RunMessagesResponse = Annotated[
|
RunMessagesResponse = Annotated[
|
||||||
|
|||||||
@@ -38,8 +38,8 @@ class AsyncBaseMCPClient:
|
|||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# MCP connection failures are often due to user misconfiguration, not system errors
|
# MCP connection failures are often due to user misconfiguration, not system errors
|
||||||
# Log at info level to help with debugging without triggering Sentry alerts
|
# Log as warning for visibility in monitoring
|
||||||
logger.info(
|
logger.warning(
|
||||||
f"Connecting to MCP server failed. Please review your server config: {self.server_config.model_dump_json(indent=4)}. Error: {str(e)}"
|
f"Connecting to MCP server failed. Please review your server config: {self.server_config.model_dump_json(indent=4)}. Error: {str(e)}"
|
||||||
)
|
)
|
||||||
if hasattr(self.server_config, "server_url") and self.server_config.server_url:
|
if hasattr(self.server_config, "server_url") and self.server_config.server_url:
|
||||||
@@ -78,7 +78,13 @@ class AsyncBaseMCPClient:
|
|||||||
|
|
||||||
async def execute_tool(self, tool_name: str, tool_args: dict) -> Tuple[str, bool]:
|
async def execute_tool(self, tool_name: str, tool_args: dict) -> Tuple[str, bool]:
|
||||||
self._check_initialized()
|
self._check_initialized()
|
||||||
result = await self.session.call_tool(tool_name, tool_args)
|
try:
|
||||||
|
result = await self.session.call_tool(tool_name, tool_args)
|
||||||
|
except Exception as e:
|
||||||
|
if e.__class__.__name__ == "McpError":
|
||||||
|
logger.warning(f"MCP tool '{tool_name}' execution failed: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
parsed_content = []
|
parsed_content = []
|
||||||
for content_piece in result.content:
|
for content_piece in result.content:
|
||||||
if isinstance(content_piece, TextContent):
|
if isinstance(content_piece, TextContent):
|
||||||
|
|||||||
@@ -97,6 +97,34 @@ class RunManager:
|
|||||||
raise NoResultFound(f"Run with id {run_id} not found")
|
raise NoResultFound(f"Run with id {run_id} not found")
|
||||||
return run.to_pydantic()
|
return run.to_pydantic()
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
async def get_run_with_status(self, run_id: str, actor: PydanticUser) -> PydanticRun:
|
||||||
|
"""Get a run by its ID and update status from Lettuce if applicable."""
|
||||||
|
run = await self.get_run_by_id(run_id=run_id, actor=actor)
|
||||||
|
|
||||||
|
use_lettuce = run.metadata and run.metadata.get("lettuce")
|
||||||
|
if use_lettuce and run.status not in [RunStatus.completed, RunStatus.failed, RunStatus.cancelled]:
|
||||||
|
try:
|
||||||
|
from letta.services.lettuce_client import LettuceClient
|
||||||
|
|
||||||
|
lettuce_client = await LettuceClient.create()
|
||||||
|
status = await lettuce_client.get_status(run_id=run_id)
|
||||||
|
|
||||||
|
# Map the status to our enum
|
||||||
|
if status == "RUNNING":
|
||||||
|
run.status = RunStatus.running
|
||||||
|
elif status == "COMPLETED":
|
||||||
|
run.status = RunStatus.completed
|
||||||
|
elif status == "FAILED":
|
||||||
|
run.status = RunStatus.failed
|
||||||
|
elif status == "CANCELLED":
|
||||||
|
run.status = RunStatus.cancelled
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to get status from Lettuce for run {run_id}: {str(e)}")
|
||||||
|
# Return run with current status from DB if Lettuce fails
|
||||||
|
|
||||||
|
return run
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
async def list_runs(
|
async def list_runs(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ receivers:
|
|||||||
filelog:
|
filelog:
|
||||||
include:
|
include:
|
||||||
- /root/.letta/logs/Letta.log
|
- /root/.letta/logs/Letta.log
|
||||||
|
multiline:
|
||||||
|
line_start_pattern: '^[\{\[]|^[0-9]{4}-[0-9]{2}-[0-9]{2}'
|
||||||
operators:
|
operators:
|
||||||
# Parse JSON logs (skip non-JSON lines)
|
# Parse JSON logs (skip non-JSON lines)
|
||||||
- type: json_parser
|
- type: json_parser
|
||||||
@@ -19,8 +21,14 @@ receivers:
|
|||||||
layout_type: gotime
|
layout_type: gotime
|
||||||
layout: '2006-01-02T15:04:05.999999Z07:00'
|
layout: '2006-01-02T15:04:05.999999Z07:00'
|
||||||
on_error: send
|
on_error: send
|
||||||
|
if: 'attributes.timestamp != nil'
|
||||||
|
|
||||||
processors:
|
processors:
|
||||||
|
resource:
|
||||||
|
attributes:
|
||||||
|
- key: environment
|
||||||
|
value: ${env:LETTA_ENVIRONMENT}
|
||||||
|
action: upsert
|
||||||
memory_limiter:
|
memory_limiter:
|
||||||
check_interval: 1s
|
check_interval: 1s
|
||||||
limit_mib: 1024
|
limit_mib: 1024
|
||||||
@@ -64,7 +72,7 @@ service:
|
|||||||
exporters: [clickhouse]
|
exporters: [clickhouse]
|
||||||
logs:
|
logs:
|
||||||
receivers: [filelog]
|
receivers: [filelog]
|
||||||
processors: [memory_limiter, batch]
|
processors: [resource, memory_limiter, batch]
|
||||||
exporters: [clickhouse]
|
exporters: [clickhouse]
|
||||||
metrics:
|
metrics:
|
||||||
receivers: [otlp]
|
receivers: [otlp]
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ receivers:
|
|||||||
filelog:
|
filelog:
|
||||||
include:
|
include:
|
||||||
- /root/.letta/logs/Letta.log
|
- /root/.letta/logs/Letta.log
|
||||||
|
multiline:
|
||||||
|
line_start_pattern: '^[\{\[]|^[0-9]{4}-[0-9]{2}-[0-9]{2}'
|
||||||
operators:
|
operators:
|
||||||
# Parse JSON logs (skip non-JSON lines)
|
# Parse JSON logs (skip non-JSON lines)
|
||||||
- type: json_parser
|
- type: json_parser
|
||||||
@@ -19,8 +21,14 @@ receivers:
|
|||||||
layout_type: gotime
|
layout_type: gotime
|
||||||
layout: '2006-01-02T15:04:05.999999Z07:00'
|
layout: '2006-01-02T15:04:05.999999Z07:00'
|
||||||
on_error: send
|
on_error: send
|
||||||
|
if: 'attributes.timestamp != nil'
|
||||||
|
|
||||||
processors:
|
processors:
|
||||||
|
resource:
|
||||||
|
attributes:
|
||||||
|
- key: environment
|
||||||
|
value: ${env:LETTA_ENVIRONMENT}
|
||||||
|
action: upsert
|
||||||
memory_limiter:
|
memory_limiter:
|
||||||
check_interval: 1s
|
check_interval: 1s
|
||||||
limit_mib: 1024
|
limit_mib: 1024
|
||||||
@@ -65,7 +73,7 @@ service:
|
|||||||
exporters: [clickhouse]
|
exporters: [clickhouse]
|
||||||
logs:
|
logs:
|
||||||
receivers: [filelog]
|
receivers: [filelog]
|
||||||
processors: [memory_limiter, batch]
|
processors: [resource, memory_limiter, batch]
|
||||||
exporters: [clickhouse]
|
exporters: [clickhouse]
|
||||||
metrics:
|
metrics:
|
||||||
receivers: [otlp]
|
receivers: [otlp]
|
||||||
|
|||||||
Reference in New Issue
Block a user