feat: track metrics for runs in db

This commit is contained in:
Andy Li
2025-08-06 15:46:50 -07:00
committed by GitHub
parent 8faa8711bc
commit ca6f474c4e
9 changed files with 202 additions and 95 deletions

View File

@@ -25,11 +25,8 @@ from anthropic.types.beta import (
)
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
from letta.log import get_logger
from letta.otel.context import get_ctx_attributes
from letta.otel.metric_registry import MetricRegistry
from letta.schemas.letta_message import (
AssistantMessage,
HiddenReasoningMessage,
@@ -133,28 +130,12 @@ class AnthropicStreamingInterface:
self,
stream: AsyncStream[BetaRawMessageStreamEvent],
ttft_span: Optional["Span"] = None,
provider_request_start_timestamp_ns: int | None = None,
) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
prev_message_type = None
message_index = 0
first_chunk = True
try:
async with stream:
async for event in stream:
# TODO (cliandy): reconsider in stream cancellations
# await cancellation_token.check_and_raise_if_cancelled()
if first_chunk and ttft_span is not None and provider_request_start_timestamp_ns is not None:
now = get_utc_timestamp_ns()
ttft_ns = now - provider_request_start_timestamp_ns
ttft_span.add_event(
name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ns_to_ms(ttft_ns)}
)
metric_attributes = get_ctx_attributes()
if isinstance(event, BetaRawMessageStartEvent):
metric_attributes["model.name"] = event.message.model
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
first_chunk = False
# TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
if isinstance(event, BetaRawContentBlockStartEvent):
content = event.content_block

View File

@@ -7,12 +7,9 @@ from openai import AsyncStream
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
from letta.llm_api.openai_client import is_openai_reasoning_model
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
from letta.log import get_logger
from letta.otel.context import get_ctx_attributes
from letta.otel.metric_registry import MetricRegistry
from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
from letta.schemas.letta_message_content import OmittedReasoningContent, TextContent
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
@@ -35,7 +32,6 @@ class OpenAIStreamingInterface:
def __init__(
self,
use_assistant_message: bool = False,
put_inner_thoughts_in_kwarg: bool = False,
is_openai_proxy: bool = False,
messages: Optional[list] = None,
tools: Optional[list] = None,
@@ -107,7 +103,6 @@ class OpenAIStreamingInterface:
self,
stream: AsyncStream[ChatCompletionChunk],
ttft_span: Optional["Span"] = None,
provider_request_start_timestamp_ns: int | None = None,
) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
"""
Iterates over the OpenAI stream, yielding SSE events.
@@ -125,29 +120,11 @@ class OpenAIStreamingInterface:
tool_dicts = [tool["function"] if isinstance(tool, dict) and "function" in tool else tool for tool in self.tools]
self.fallback_input_tokens += num_tokens_from_functions(tool_dicts)
first_chunk = True
try:
async with stream:
prev_message_type = None
message_index = 0
async for chunk in stream:
# TODO (cliandy): reconsider in stream cancellations
# await cancellation_token.check_and_raise_if_cancelled()
if first_chunk and ttft_span is not None and provider_request_start_timestamp_ns is not None:
now = get_utc_timestamp_ns()
ttft_ns = now - provider_request_start_timestamp_ns
ttft_span.add_event(
name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
)
metric_attributes = get_ctx_attributes()
metric_attributes["model.name"] = chunk.model
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
if self.is_openai_proxy:
self.fallback_output_tokens += count_tokens(chunk.model_dump_json())
first_chunk = False
if not self.model or not self.message_id:
self.model = chunk.model
self.message_id = chunk.id