fix: add LLMCallType enum and ensure call_type is set on all provider traces (#9258)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
committed by
Caren Thomas
parent
96c4b7175e
commit
eaf64fb510
@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
|
|||||||
from typing import AsyncGenerator
|
from typing import AsyncGenerator
|
||||||
|
|
||||||
from letta.llm_api.llm_client_base import LLMClientBase
|
from letta.llm_api.llm_client_base import LLMClientBase
|
||||||
|
from letta.schemas.enums import LLMCallType
|
||||||
from letta.schemas.letta_message import LettaMessage
|
from letta.schemas.letta_message import LettaMessage
|
||||||
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
@@ -24,6 +25,7 @@ class LettaLLMAdapter(ABC):
|
|||||||
self,
|
self,
|
||||||
llm_client: LLMClientBase,
|
llm_client: LLMClientBase,
|
||||||
llm_config: LLMConfig,
|
llm_config: LLMConfig,
|
||||||
|
call_type: LLMCallType,
|
||||||
agent_id: str | None = None,
|
agent_id: str | None = None,
|
||||||
agent_tags: list[str] | None = None,
|
agent_tags: list[str] | None = None,
|
||||||
run_id: str | None = None,
|
run_id: str | None = None,
|
||||||
@@ -32,6 +34,7 @@ class LettaLLMAdapter(ABC):
|
|||||||
) -> None:
|
) -> None:
|
||||||
self.llm_client: LLMClientBase = llm_client
|
self.llm_client: LLMClientBase = llm_client
|
||||||
self.llm_config: LLMConfig = llm_config
|
self.llm_config: LLMConfig = llm_config
|
||||||
|
self.call_type: LLMCallType = call_type
|
||||||
self.agent_id: str | None = agent_id
|
self.agent_id: str | None = agent_id
|
||||||
self.agent_tags: list[str] | None = agent_tags
|
self.agent_tags: list[str] | None = agent_tags
|
||||||
self.run_id: str | None = run_id
|
self.run_id: str | None = run_id
|
||||||
|
|||||||
@@ -127,7 +127,7 @@ class LettaLLMRequestAdapter(LettaLLMAdapter):
|
|||||||
agent_id=self.agent_id,
|
agent_id=self.agent_id,
|
||||||
agent_tags=self.agent_tags,
|
agent_tags=self.agent_tags,
|
||||||
run_id=self.run_id,
|
run_id=self.run_id,
|
||||||
call_type="agent_step",
|
call_type=self.call_type,
|
||||||
org_id=self.org_id,
|
org_id=self.org_id,
|
||||||
user_id=self.user_id,
|
user_id=self.user_id,
|
||||||
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInt
|
|||||||
from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
|
from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
|
||||||
from letta.llm_api.llm_client_base import LLMClientBase
|
from letta.llm_api.llm_client_base import LLMClientBase
|
||||||
from letta.otel.tracing import log_attributes, safe_json_dumps, trace_method
|
from letta.otel.tracing import log_attributes, safe_json_dumps, trace_method
|
||||||
from letta.schemas.enums import ProviderType
|
from letta.schemas.enums import LLMCallType, ProviderType
|
||||||
from letta.schemas.letta_message import LettaMessage
|
from letta.schemas.letta_message import LettaMessage
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
from letta.schemas.provider_trace import ProviderTrace
|
from letta.schemas.provider_trace import ProviderTrace
|
||||||
@@ -30,13 +30,14 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
|||||||
self,
|
self,
|
||||||
llm_client: LLMClientBase,
|
llm_client: LLMClientBase,
|
||||||
llm_config: LLMConfig,
|
llm_config: LLMConfig,
|
||||||
|
call_type: LLMCallType,
|
||||||
agent_id: str | None = None,
|
agent_id: str | None = None,
|
||||||
agent_tags: list[str] | None = None,
|
agent_tags: list[str] | None = None,
|
||||||
run_id: str | None = None,
|
run_id: str | None = None,
|
||||||
org_id: str | None = None,
|
org_id: str | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(llm_client, llm_config, agent_id=agent_id, agent_tags=agent_tags, run_id=run_id, org_id=org_id, user_id=user_id)
|
super().__init__(llm_client, llm_config, call_type=call_type, agent_id=agent_id, agent_tags=agent_tags, run_id=run_id, org_id=org_id, user_id=user_id)
|
||||||
self.interface: OpenAIStreamingInterface | AnthropicStreamingInterface | None = None
|
self.interface: OpenAIStreamingInterface | AnthropicStreamingInterface | None = None
|
||||||
|
|
||||||
async def invoke_llm(
|
async def invoke_llm(
|
||||||
@@ -205,7 +206,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
|||||||
agent_id=self.agent_id,
|
agent_id=self.agent_id,
|
||||||
agent_tags=self.agent_tags,
|
agent_tags=self.agent_tags,
|
||||||
run_id=self.run_id,
|
run_id=self.run_id,
|
||||||
call_type="agent_step",
|
call_type=self.call_type,
|
||||||
org_id=self.org_id,
|
org_id=self.org_id,
|
||||||
user_id=self.user_id,
|
user_id=self.user_id,
|
||||||
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from typing import AsyncGenerator
|
|||||||
|
|
||||||
from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
|
from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
|
||||||
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
||||||
|
from letta.schemas.enums import LLMCallType
|
||||||
from letta.schemas.letta_message import LettaMessage
|
from letta.schemas.letta_message import LettaMessage
|
||||||
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, TextContent
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, TextContent
|
||||||
from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens
|
from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens
|
||||||
@@ -45,7 +46,7 @@ class SimpleLLMRequestAdapter(LettaLLMRequestAdapter):
|
|||||||
agent_id=self.agent_id,
|
agent_id=self.agent_id,
|
||||||
agent_tags=self.agent_tags,
|
agent_tags=self.agent_tags,
|
||||||
run_id=self.run_id,
|
run_id=self.run_id,
|
||||||
call_type="agent_step",
|
call_type=LLMCallType.agent_step,
|
||||||
org_id=self.org_id,
|
org_id=self.org_id,
|
||||||
user_id=self.user_id,
|
user_id=self.user_id,
|
||||||
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
||||||
|
|||||||
@@ -254,7 +254,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
|
|||||||
agent_id=self.agent_id,
|
agent_id=self.agent_id,
|
||||||
agent_tags=self.agent_tags,
|
agent_tags=self.agent_tags,
|
||||||
run_id=self.run_id,
|
run_id=self.run_id,
|
||||||
call_type="agent_step",
|
call_type=self.call_type,
|
||||||
org_id=self.org_id,
|
org_id=self.org_id,
|
||||||
user_id=self.user_id,
|
user_id=self.user_id,
|
||||||
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from letta.log import get_logger
|
|||||||
from letta.orm.errors import NoResultFound
|
from letta.orm.errors import NoResultFound
|
||||||
from letta.prompts.gpt_system import get_system_text
|
from letta.prompts.gpt_system import get_system_text
|
||||||
from letta.schemas.block import Block, BlockUpdate
|
from letta.schemas.block import Block, BlockUpdate
|
||||||
from letta.schemas.enums import MessageRole
|
from letta.schemas.enums import LLMCallType, MessageRole
|
||||||
from letta.schemas.letta_message_content import TextContent
|
from letta.schemas.letta_message_content import TextContent
|
||||||
from letta.schemas.message import Message, MessageCreate
|
from letta.schemas.message import Message, MessageCreate
|
||||||
from letta.schemas.user import User
|
from letta.schemas.user import User
|
||||||
@@ -92,7 +92,7 @@ class EphemeralSummaryAgent(BaseAgent):
|
|||||||
telemetry_manager=TelemetryManager(),
|
telemetry_manager=TelemetryManager(),
|
||||||
agent_id=self.agent_id,
|
agent_id=self.agent_id,
|
||||||
agent_tags=agent_state.tags,
|
agent_tags=agent_state.tags,
|
||||||
call_type="summarization",
|
call_type=LLMCallType.summarization,
|
||||||
)
|
)
|
||||||
response_data = await llm_client.request_async_with_telemetry(request_data, agent_state.llm_config)
|
response_data = await llm_client.request_async_with_telemetry(request_data, agent_state.llm_config)
|
||||||
response = await llm_client.convert_response_to_chat_completion(response_data, messages, agent_state.llm_config)
|
response = await llm_client.convert_response_to_chat_completion(response_data, messages, agent_state.llm_config)
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ from letta.otel.context import get_ctx_attributes
|
|||||||
from letta.otel.metric_registry import MetricRegistry
|
from letta.otel.metric_registry import MetricRegistry
|
||||||
from letta.otel.tracing import log_event, trace_method, tracer
|
from letta.otel.tracing import log_event, trace_method, tracer
|
||||||
from letta.schemas.agent import AgentState, UpdateAgent
|
from letta.schemas.agent import AgentState, UpdateAgent
|
||||||
from letta.schemas.enums import JobStatus, ProviderType, StepStatus, ToolType
|
from letta.schemas.enums import JobStatus, LLMCallType, ProviderType, StepStatus, ToolType
|
||||||
from letta.schemas.letta_message import MessageType
|
from letta.schemas.letta_message import MessageType
|
||||||
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
||||||
from letta.schemas.letta_response import LettaResponse
|
from letta.schemas.letta_response import LettaResponse
|
||||||
@@ -420,7 +420,7 @@ class LettaAgent(BaseAgent):
|
|||||||
agent_id=self.agent_id,
|
agent_id=self.agent_id,
|
||||||
agent_tags=agent_state.tags,
|
agent_tags=agent_state.tags,
|
||||||
run_id=self.current_run_id,
|
run_id=self.current_run_id,
|
||||||
call_type="agent_step",
|
call_type=LLMCallType.agent_step,
|
||||||
org_id=self.actor.organization_id,
|
org_id=self.actor.organization_id,
|
||||||
user_id=self.actor.id,
|
user_id=self.actor.id,
|
||||||
llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None,
|
llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None,
|
||||||
@@ -774,7 +774,7 @@ class LettaAgent(BaseAgent):
|
|||||||
agent_id=self.agent_id,
|
agent_id=self.agent_id,
|
||||||
agent_tags=agent_state.tags,
|
agent_tags=agent_state.tags,
|
||||||
run_id=self.current_run_id,
|
run_id=self.current_run_id,
|
||||||
call_type="agent_step",
|
call_type=LLMCallType.agent_step,
|
||||||
org_id=self.actor.organization_id,
|
org_id=self.actor.organization_id,
|
||||||
user_id=self.actor.id,
|
user_id=self.actor.id,
|
||||||
llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None,
|
llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None,
|
||||||
@@ -1252,7 +1252,7 @@ class LettaAgent(BaseAgent):
|
|||||||
agent_id=self.agent_id,
|
agent_id=self.agent_id,
|
||||||
agent_tags=agent_state.tags,
|
agent_tags=agent_state.tags,
|
||||||
run_id=self.current_run_id,
|
run_id=self.current_run_id,
|
||||||
call_type="agent_step",
|
call_type=LLMCallType.agent_step,
|
||||||
org_id=self.actor.organization_id,
|
org_id=self.actor.organization_id,
|
||||||
user_id=self.actor.id,
|
user_id=self.actor.id,
|
||||||
llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None,
|
llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None,
|
||||||
@@ -1486,7 +1486,7 @@ class LettaAgent(BaseAgent):
|
|||||||
agent_tags=agent_state.tags,
|
agent_tags=agent_state.tags,
|
||||||
run_id=self.current_run_id,
|
run_id=self.current_run_id,
|
||||||
step_id=step_metrics.id,
|
step_id=step_metrics.id,
|
||||||
call_type="agent_step",
|
call_type=LLMCallType.agent_step,
|
||||||
)
|
)
|
||||||
response = await llm_client.request_async_with_telemetry(request_data, agent_state.llm_config)
|
response = await llm_client.request_async_with_telemetry(request_data, agent_state.llm_config)
|
||||||
|
|
||||||
@@ -1559,7 +1559,7 @@ class LettaAgent(BaseAgent):
|
|||||||
agent_tags=agent_state.tags,
|
agent_tags=agent_state.tags,
|
||||||
run_id=self.current_run_id,
|
run_id=self.current_run_id,
|
||||||
step_id=step_id,
|
step_id=step_id,
|
||||||
call_type="agent_step",
|
call_type=LLMCallType.agent_step,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Attempt LLM request with telemetry wrapper
|
# Attempt LLM request with telemetry wrapper
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ from letta.log import get_logger
|
|||||||
from letta.otel.tracing import log_event, trace_method, tracer
|
from letta.otel.tracing import log_event, trace_method, tracer
|
||||||
from letta.prompts.prompt_generator import PromptGenerator
|
from letta.prompts.prompt_generator import PromptGenerator
|
||||||
from letta.schemas.agent import AgentState, UpdateAgent
|
from letta.schemas.agent import AgentState, UpdateAgent
|
||||||
from letta.schemas.enums import AgentType, MessageStreamStatus, RunStatus, StepStatus
|
from letta.schemas.enums import AgentType, LLMCallType, MessageStreamStatus, RunStatus, StepStatus
|
||||||
from letta.schemas.letta_message import LettaMessage, MessageType
|
from letta.schemas.letta_message import LettaMessage, MessageType
|
||||||
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
||||||
from letta.schemas.letta_request import ClientToolSchema
|
from letta.schemas.letta_request import ClientToolSchema
|
||||||
@@ -158,6 +158,8 @@ class LettaAgentV2(BaseAgentV2):
|
|||||||
llm_adapter=LettaLLMRequestAdapter(
|
llm_adapter=LettaLLMRequestAdapter(
|
||||||
llm_client=self.llm_client,
|
llm_client=self.llm_client,
|
||||||
llm_config=self.agent_state.llm_config,
|
llm_config=self.agent_state.llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
|
agent_id=self.agent_state.id,
|
||||||
agent_tags=self.agent_state.tags,
|
agent_tags=self.agent_state.tags,
|
||||||
org_id=self.actor.organization_id,
|
org_id=self.actor.organization_id,
|
||||||
user_id=self.actor.id,
|
user_id=self.actor.id,
|
||||||
@@ -216,6 +218,7 @@ class LettaAgentV2(BaseAgentV2):
|
|||||||
llm_adapter=LettaLLMRequestAdapter(
|
llm_adapter=LettaLLMRequestAdapter(
|
||||||
llm_client=self.llm_client,
|
llm_client=self.llm_client,
|
||||||
llm_config=self.agent_state.llm_config,
|
llm_config=self.agent_state.llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
agent_id=self.agent_state.id,
|
agent_id=self.agent_state.id,
|
||||||
agent_tags=self.agent_state.tags,
|
agent_tags=self.agent_state.tags,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
@@ -305,6 +308,7 @@ class LettaAgentV2(BaseAgentV2):
|
|||||||
llm_adapter = LettaLLMStreamAdapter(
|
llm_adapter = LettaLLMStreamAdapter(
|
||||||
llm_client=self.llm_client,
|
llm_client=self.llm_client,
|
||||||
llm_config=self.agent_state.llm_config,
|
llm_config=self.agent_state.llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
agent_id=self.agent_state.id,
|
agent_id=self.agent_state.id,
|
||||||
agent_tags=self.agent_state.tags,
|
agent_tags=self.agent_state.tags,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
@@ -315,6 +319,7 @@ class LettaAgentV2(BaseAgentV2):
|
|||||||
llm_adapter = LettaLLMRequestAdapter(
|
llm_adapter = LettaLLMRequestAdapter(
|
||||||
llm_client=self.llm_client,
|
llm_client=self.llm_client,
|
||||||
llm_config=self.agent_state.llm_config,
|
llm_config=self.agent_state.llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
agent_id=self.agent_state.id,
|
agent_id=self.agent_state.id,
|
||||||
agent_tags=self.agent_state.tags,
|
agent_tags=self.agent_state.tags,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ from letta.helpers.tool_execution_helper import enable_strict_mode
|
|||||||
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
||||||
from letta.otel.tracing import trace_method
|
from letta.otel.tracing import trace_method
|
||||||
from letta.schemas.agent import AgentState
|
from letta.schemas.agent import AgentState
|
||||||
from letta.schemas.enums import MessageRole
|
from letta.schemas.enums import LLMCallType, MessageRole
|
||||||
from letta.schemas.letta_message import (
|
from letta.schemas.letta_message import (
|
||||||
ApprovalReturn,
|
ApprovalReturn,
|
||||||
CompactionStats,
|
CompactionStats,
|
||||||
@@ -209,6 +209,7 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
llm_adapter=SimpleLLMRequestAdapter(
|
llm_adapter=SimpleLLMRequestAdapter(
|
||||||
llm_client=self.llm_client,
|
llm_client=self.llm_client,
|
||||||
llm_config=self.agent_state.llm_config,
|
llm_config=self.agent_state.llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
agent_id=self.agent_state.id,
|
agent_id=self.agent_state.id,
|
||||||
agent_tags=self.agent_state.tags,
|
agent_tags=self.agent_state.tags,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
@@ -356,6 +357,7 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
llm_adapter = SimpleLLMStreamAdapter(
|
llm_adapter = SimpleLLMStreamAdapter(
|
||||||
llm_client=self.llm_client,
|
llm_client=self.llm_client,
|
||||||
llm_config=self.agent_state.llm_config,
|
llm_config=self.agent_state.llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
agent_id=self.agent_state.id,
|
agent_id=self.agent_state.id,
|
||||||
agent_tags=self.agent_state.tags,
|
agent_tags=self.agent_state.tags,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
@@ -366,6 +368,7 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
llm_adapter = SimpleLLMRequestAdapter(
|
llm_adapter = SimpleLLMRequestAdapter(
|
||||||
llm_client=self.llm_client,
|
llm_client=self.llm_client,
|
||||||
llm_config=self.agent_state.llm_config,
|
llm_config=self.agent_state.llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
agent_id=self.agent_state.id,
|
agent_id=self.agent_state.id,
|
||||||
agent_tags=self.agent_state.tags,
|
agent_tags=self.agent_state.tags,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
|||||||
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
||||||
from letta.orm.user import User
|
from letta.orm.user import User
|
||||||
from letta.otel.tracing import log_event, trace_method
|
from letta.otel.tracing import log_event, trace_method
|
||||||
from letta.schemas.enums import ProviderCategory
|
from letta.schemas.enums import LLMCallType, ProviderCategory
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
from letta.schemas.message import Message
|
from letta.schemas.message import Message
|
||||||
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
||||||
@@ -245,7 +245,7 @@ def create(
|
|||||||
request_json=prepare_openai_payload(data),
|
request_json=prepare_openai_payload(data),
|
||||||
response_json=response.model_json_schema(),
|
response_json=response.model_json_schema(),
|
||||||
step_id=step_id,
|
step_id=step_id,
|
||||||
call_type="agent_step",
|
call_type=LLMCallType.agent_step,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|||||||
from letta.errors import ErrorCode, LLMConnectionError, LLMError
|
from letta.errors import ErrorCode, LLMConnectionError, LLMError
|
||||||
from letta.otel.tracing import log_event, trace_method
|
from letta.otel.tracing import log_event, trace_method
|
||||||
from letta.schemas.embedding_config import EmbeddingConfig
|
from letta.schemas.embedding_config import EmbeddingConfig
|
||||||
from letta.schemas.enums import AgentType, ProviderCategory
|
from letta.schemas.enums import AgentType, LLMCallType, ProviderCategory
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
from letta.schemas.message import Message
|
from letta.schemas.message import Message
|
||||||
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
||||||
@@ -229,6 +229,7 @@ class LLMClientBase:
|
|||||||
request_json=request_data,
|
request_json=request_data,
|
||||||
response_json=response_data,
|
response_json=response_data,
|
||||||
step_id=step_id,
|
step_id=step_id,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
log_event(name="llm_response_received", attributes=response_data)
|
log_event(name="llm_response_received", attributes=response_data)
|
||||||
@@ -262,6 +263,7 @@ class LLMClientBase:
|
|||||||
request_json=request_data,
|
request_json=request_data,
|
||||||
response_json=response_data,
|
response_json=response_data,
|
||||||
step_id=step_id,
|
step_id=step_id,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -96,6 +96,14 @@ class ProviderCategory(str, Enum):
|
|||||||
byok = "byok"
|
byok = "byok"
|
||||||
|
|
||||||
|
|
||||||
|
class LLMCallType(str, Enum):
|
||||||
|
"""Type of LLM call for telemetry tracking."""
|
||||||
|
|
||||||
|
agent_step = "agent_step"
|
||||||
|
summarization = "summarization"
|
||||||
|
tool_generation = "tool_generation"
|
||||||
|
|
||||||
|
|
||||||
class MessageRole(str, Enum):
|
class MessageRole(str, Enum):
|
||||||
assistant = "assistant"
|
assistant = "assistant"
|
||||||
user = "user"
|
user = "user"
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ from letta.log import get_logger
|
|||||||
from letta.orm.errors import UniqueConstraintViolationError
|
from letta.orm.errors import UniqueConstraintViolationError
|
||||||
from letta.orm.mcp_oauth import OAuthSessionStatus
|
from letta.orm.mcp_oauth import OAuthSessionStatus
|
||||||
from letta.prompts.gpt_system import get_system_text
|
from letta.prompts.gpt_system import get_system_text
|
||||||
from letta.schemas.enums import AgentType, MessageRole, ToolType
|
from letta.schemas.enums import AgentType, LLMCallType, MessageRole, ToolType
|
||||||
from letta.schemas.letta_message import ToolReturnMessage
|
from letta.schemas.letta_message import ToolReturnMessage
|
||||||
from letta.schemas.letta_message_content import TextContent
|
from letta.schemas.letta_message_content import TextContent
|
||||||
from letta.schemas.mcp import UpdateSSEMCPServer, UpdateStdioMCPServer, UpdateStreamableHTTPMCPServer
|
from letta.schemas.mcp import UpdateSSEMCPServer, UpdateStdioMCPServer, UpdateStreamableHTTPMCPServer
|
||||||
@@ -956,7 +956,7 @@ async def generate_tool_from_prompt(
|
|||||||
|
|
||||||
llm_client.set_telemetry_context(
|
llm_client.set_telemetry_context(
|
||||||
telemetry_manager=TelemetryManager(),
|
telemetry_manager=TelemetryManager(),
|
||||||
call_type="tool_generation",
|
call_type=LLMCallType.tool_generation,
|
||||||
)
|
)
|
||||||
response_data = await llm_client.request_async_with_telemetry(request_data, llm_config)
|
response_data = await llm_client.request_async_with_telemetry(request_data, llm_config)
|
||||||
response = await llm_client.convert_response_to_chat_completion(response_data, input_messages, llm_config)
|
response = await llm_client.convert_response_to_chat_completion(response_data, input_messages, llm_config)
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ from letta.llm_api.llm_client import LLMClient
|
|||||||
from letta.log import get_logger
|
from letta.log import get_logger
|
||||||
from letta.otel.tracing import trace_method
|
from letta.otel.tracing import trace_method
|
||||||
from letta.prompts import gpt_summarize
|
from letta.prompts import gpt_summarize
|
||||||
from letta.schemas.enums import AgentType, MessageRole, ProviderType
|
from letta.schemas.enums import AgentType, LLMCallType, MessageRole, ProviderType
|
||||||
from letta.schemas.letta_message_content import TextContent
|
from letta.schemas.letta_message_content import TextContent
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
from letta.schemas.message import Message, MessageCreate
|
from letta.schemas.message import Message, MessageCreate
|
||||||
@@ -482,7 +482,7 @@ async def simple_summary(
|
|||||||
agent_tags=agent_tags,
|
agent_tags=agent_tags,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
step_id=step_id,
|
step_id=step_id,
|
||||||
call_type="summarization",
|
call_type=LLMCallType.summarization,
|
||||||
org_id=actor.organization_id if actor else None,
|
org_id=actor.organization_id if actor else None,
|
||||||
user_id=actor.id if actor else None,
|
user_id=actor.id if actor else None,
|
||||||
compaction_settings=compaction_settings,
|
compaction_settings=compaction_settings,
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import pytest
|
|||||||
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
||||||
from letta.errors import ContextWindowExceededError, LLMConnectionError, LLMServerError
|
from letta.errors import ContextWindowExceededError, LLMConnectionError, LLMServerError
|
||||||
from letta.llm_api.anthropic_client import AnthropicClient
|
from letta.llm_api.anthropic_client import AnthropicClient
|
||||||
|
from letta.schemas.enums import LLMCallType
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
|
|
||||||
|
|
||||||
@@ -42,7 +43,7 @@ async def test_letta_llm_stream_adapter_converts_anthropic_streaming_api_status_
|
|||||||
|
|
||||||
llm_client = AnthropicClient()
|
llm_client = AnthropicClient()
|
||||||
llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000)
|
llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000)
|
||||||
adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config)
|
adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
|
|
||||||
gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
|
gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
|
||||||
with pytest.raises(LLMServerError):
|
with pytest.raises(LLMServerError):
|
||||||
@@ -83,7 +84,7 @@ async def test_letta_llm_stream_adapter_converts_anthropic_413_request_too_large
|
|||||||
|
|
||||||
llm_client = AnthropicClient()
|
llm_client = AnthropicClient()
|
||||||
llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000)
|
llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000)
|
||||||
adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config)
|
adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
|
|
||||||
gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
|
gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
|
||||||
with pytest.raises(ContextWindowExceededError):
|
with pytest.raises(ContextWindowExceededError):
|
||||||
@@ -117,7 +118,7 @@ async def test_letta_llm_stream_adapter_converts_httpx_read_error(monkeypatch):
|
|||||||
|
|
||||||
llm_client = AnthropicClient()
|
llm_client = AnthropicClient()
|
||||||
llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000)
|
llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000)
|
||||||
adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config)
|
adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
|
|
||||||
gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
|
gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
|
||||||
with pytest.raises(LLMConnectionError):
|
with pytest.raises(LLMConnectionError):
|
||||||
@@ -151,7 +152,7 @@ async def test_letta_llm_stream_adapter_converts_httpx_write_error(monkeypatch):
|
|||||||
|
|
||||||
llm_client = AnthropicClient()
|
llm_client = AnthropicClient()
|
||||||
llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000)
|
llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000)
|
||||||
adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config)
|
adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
|
|
||||||
gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
|
gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
|
||||||
with pytest.raises(LLMConnectionError):
|
with pytest.raises(LLMConnectionError):
|
||||||
|
|||||||
@@ -198,6 +198,7 @@ class TestAdapterTelemetryAttributes:
|
|||||||
"""Verify base LettaLLMAdapter has telemetry attributes."""
|
"""Verify base LettaLLMAdapter has telemetry attributes."""
|
||||||
from letta.adapters.letta_llm_adapter import LettaLLMAdapter
|
from letta.adapters.letta_llm_adapter import LettaLLMAdapter
|
||||||
from letta.llm_api.llm_client import LLMClient
|
from letta.llm_api.llm_client import LLMClient
|
||||||
|
from letta.schemas.enums import LLMCallType
|
||||||
|
|
||||||
mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True)
|
mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True)
|
||||||
|
|
||||||
@@ -212,6 +213,7 @@ class TestAdapterTelemetryAttributes:
|
|||||||
adapter = TestAdapter(
|
adapter = TestAdapter(
|
||||||
llm_client=mock_client,
|
llm_client=mock_client,
|
||||||
llm_config=mock_llm_config,
|
llm_config=mock_llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
agent_id=agent_id,
|
agent_id=agent_id,
|
||||||
agent_tags=agent_tags,
|
agent_tags=agent_tags,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
@@ -220,11 +222,13 @@ class TestAdapterTelemetryAttributes:
|
|||||||
assert adapter.agent_id == agent_id
|
assert adapter.agent_id == agent_id
|
||||||
assert adapter.agent_tags == agent_tags
|
assert adapter.agent_tags == agent_tags
|
||||||
assert adapter.run_id == run_id
|
assert adapter.run_id == run_id
|
||||||
|
assert adapter.call_type == LLMCallType.agent_step
|
||||||
|
|
||||||
def test_request_adapter_inherits_telemetry_attributes(self, mock_llm_config):
|
def test_request_adapter_inherits_telemetry_attributes(self, mock_llm_config):
|
||||||
"""Verify LettaLLMRequestAdapter inherits telemetry attributes."""
|
"""Verify LettaLLMRequestAdapter inherits telemetry attributes."""
|
||||||
from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
|
from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
|
||||||
from letta.llm_api.llm_client import LLMClient
|
from letta.llm_api.llm_client import LLMClient
|
||||||
|
from letta.schemas.enums import LLMCallType
|
||||||
|
|
||||||
mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True)
|
mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True)
|
||||||
|
|
||||||
@@ -235,6 +239,7 @@ class TestAdapterTelemetryAttributes:
|
|||||||
adapter = LettaLLMRequestAdapter(
|
adapter = LettaLLMRequestAdapter(
|
||||||
llm_client=mock_client,
|
llm_client=mock_client,
|
||||||
llm_config=mock_llm_config,
|
llm_config=mock_llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
agent_id=agent_id,
|
agent_id=agent_id,
|
||||||
agent_tags=agent_tags,
|
agent_tags=agent_tags,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
@@ -248,6 +253,7 @@ class TestAdapterTelemetryAttributes:
|
|||||||
"""Verify LettaLLMStreamAdapter inherits telemetry attributes."""
|
"""Verify LettaLLMStreamAdapter inherits telemetry attributes."""
|
||||||
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
||||||
from letta.llm_api.llm_client import LLMClient
|
from letta.llm_api.llm_client import LLMClient
|
||||||
|
from letta.schemas.enums import LLMCallType
|
||||||
|
|
||||||
mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True)
|
mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True)
|
||||||
|
|
||||||
@@ -258,6 +264,7 @@ class TestAdapterTelemetryAttributes:
|
|||||||
adapter = LettaLLMStreamAdapter(
|
adapter = LettaLLMStreamAdapter(
|
||||||
llm_client=mock_client,
|
llm_client=mock_client,
|
||||||
llm_config=mock_llm_config,
|
llm_config=mock_llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
agent_id=agent_id,
|
agent_id=agent_id,
|
||||||
agent_tags=agent_tags,
|
agent_tags=agent_tags,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
@@ -272,13 +279,14 @@ class TestAdapterTelemetryAttributes:
|
|||||||
from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
|
from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
|
||||||
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
||||||
from letta.llm_api.llm_client import LLMClient
|
from letta.llm_api.llm_client import LLMClient
|
||||||
|
from letta.schemas.enums import LLMCallType
|
||||||
|
|
||||||
mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True)
|
mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True)
|
||||||
|
|
||||||
request_adapter = LettaLLMRequestAdapter(llm_client=mock_client, llm_config=mock_llm_config)
|
request_adapter = LettaLLMRequestAdapter(llm_client=mock_client, llm_config=mock_llm_config, call_type=LLMCallType.agent_step)
|
||||||
stream_adapter = LettaLLMStreamAdapter(llm_client=mock_client, llm_config=mock_llm_config)
|
stream_adapter = LettaLLMStreamAdapter(llm_client=mock_client, llm_config=mock_llm_config, call_type=LLMCallType.agent_step)
|
||||||
|
|
||||||
for attr in ["agent_id", "agent_tags", "run_id"]:
|
for attr in ["agent_id", "agent_tags", "run_id", "call_type"]:
|
||||||
assert hasattr(request_adapter, attr), f"LettaLLMRequestAdapter missing {attr}"
|
assert hasattr(request_adapter, attr), f"LettaLLMRequestAdapter missing {attr}"
|
||||||
assert hasattr(stream_adapter, attr), f"LettaLLMStreamAdapter missing {attr}"
|
assert hasattr(stream_adapter, attr), f"LettaLLMStreamAdapter missing {attr}"
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ from letta.errors import LLMAuthenticationError
|
|||||||
from letta.llm_api.anthropic_client import AnthropicClient
|
from letta.llm_api.anthropic_client import AnthropicClient
|
||||||
from letta.llm_api.google_ai_client import GoogleAIClient
|
from letta.llm_api.google_ai_client import GoogleAIClient
|
||||||
from letta.llm_api.openai_client import OpenAIClient
|
from letta.llm_api.openai_client import OpenAIClient
|
||||||
from letta.schemas.enums import AgentType, MessageRole
|
from letta.schemas.enums import AgentType, LLMCallType, MessageRole
|
||||||
from letta.schemas.letta_message_content import TextContent
|
from letta.schemas.letta_message_content import TextContent
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
from letta.schemas.message import Message
|
from letta.schemas.message import Message
|
||||||
@@ -156,6 +156,7 @@ async def test_openai_usage_via_adapter():
|
|||||||
adapter = SimpleLLMRequestAdapter(
|
adapter = SimpleLLMRequestAdapter(
|
||||||
llm_client=client,
|
llm_client=client,
|
||||||
llm_config=llm_config,
|
llm_config=llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
)
|
)
|
||||||
|
|
||||||
messages = _build_simple_messages("Say hello in exactly 5 words.")
|
messages = _build_simple_messages("Say hello in exactly 5 words.")
|
||||||
@@ -209,6 +210,7 @@ async def test_anthropic_usage_via_adapter():
|
|||||||
adapter = SimpleLLMRequestAdapter(
|
adapter = SimpleLLMRequestAdapter(
|
||||||
llm_client=client,
|
llm_client=client,
|
||||||
llm_config=llm_config,
|
llm_config=llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Anthropic requires a system message first
|
# Anthropic requires a system message first
|
||||||
@@ -262,6 +264,7 @@ async def test_gemini_usage_via_adapter():
|
|||||||
adapter = SimpleLLMRequestAdapter(
|
adapter = SimpleLLMRequestAdapter(
|
||||||
llm_client=client,
|
llm_client=client,
|
||||||
llm_config=llm_config,
|
llm_config=llm_config,
|
||||||
|
call_type=LLMCallType.agent_step,
|
||||||
)
|
)
|
||||||
|
|
||||||
messages = _build_simple_messages("Say hello in exactly 5 words.")
|
messages = _build_simple_messages("Say hello in exactly 5 words.")
|
||||||
@@ -307,7 +310,7 @@ async def test_openai_prefix_caching_via_adapter():
|
|||||||
llm_config = LLMConfig.default_config("gpt-4o-mini")
|
llm_config = LLMConfig.default_config("gpt-4o-mini")
|
||||||
|
|
||||||
# First request - should populate the cache
|
# First request - should populate the cache
|
||||||
adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config)
|
adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
messages1 = [
|
messages1 = [
|
||||||
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
||||||
Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]),
|
Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]),
|
||||||
@@ -323,7 +326,7 @@ async def test_openai_prefix_caching_via_adapter():
|
|||||||
print(f"Request 1 - prompt={adapter1.usage.prompt_tokens}, cached={adapter1.usage.cached_input_tokens}")
|
print(f"Request 1 - prompt={adapter1.usage.prompt_tokens}, cached={adapter1.usage.cached_input_tokens}")
|
||||||
|
|
||||||
# Second request - same system prompt, should hit cache
|
# Second request - same system prompt, should hit cache
|
||||||
adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config)
|
adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
messages2 = [
|
messages2 = [
|
||||||
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
||||||
Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]),
|
Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]),
|
||||||
@@ -368,7 +371,7 @@ async def test_anthropic_prefix_caching_via_adapter():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# First request
|
# First request
|
||||||
adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config)
|
adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
messages1 = [
|
messages1 = [
|
||||||
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
||||||
Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]),
|
Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]),
|
||||||
@@ -386,7 +389,7 @@ async def test_anthropic_prefix_caching_via_adapter():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Second request
|
# Second request
|
||||||
adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config)
|
adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
messages2 = [
|
messages2 = [
|
||||||
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
||||||
Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]),
|
Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]),
|
||||||
@@ -435,7 +438,7 @@ async def test_gemini_prefix_caching_via_adapter():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# First request
|
# First request
|
||||||
adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config)
|
adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
messages1 = [
|
messages1 = [
|
||||||
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
||||||
Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]),
|
Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]),
|
||||||
@@ -451,7 +454,7 @@ async def test_gemini_prefix_caching_via_adapter():
|
|||||||
print(f"Request 1 - prompt={adapter1.usage.prompt_tokens}, cached={adapter1.usage.cached_input_tokens}")
|
print(f"Request 1 - prompt={adapter1.usage.prompt_tokens}, cached={adapter1.usage.cached_input_tokens}")
|
||||||
|
|
||||||
# Second request
|
# Second request
|
||||||
adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config)
|
adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
messages2 = [
|
messages2 = [
|
||||||
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]),
|
||||||
Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]),
|
Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]),
|
||||||
|
|||||||
Reference in New Issue
Block a user