feat: dump stack trace on segfault (#6121)
* dump stack trace on segfault * log tokenizer
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
import faulthandler
|
||||||
import importlib.util
|
import importlib.util
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@@ -10,6 +11,9 @@ from pathlib import Path
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
||||||
|
# Enable Python fault handler to get stack traces on segfaults
|
||||||
|
faulthandler.enable()
|
||||||
from fastapi import FastAPI, Request
|
from fastapi import FastAPI, Request
|
||||||
from fastapi.exceptions import RequestValidationError
|
from fastapi.exceptions import RequestValidationError
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
|
|||||||
@@ -3239,8 +3239,18 @@ class AgentManager:
|
|||||||
model = agent_state.llm_config.model if agent_state.llm_config.model_endpoint_type == "anthropic" else None
|
model = agent_state.llm_config.model if agent_state.llm_config.model_endpoint_type == "anthropic" else None
|
||||||
|
|
||||||
token_counter = AnthropicTokenCounter(anthropic_client, model) # noqa
|
token_counter = AnthropicTokenCounter(anthropic_client, model) # noqa
|
||||||
|
logger.info(
|
||||||
|
f"Using AnthropicTokenCounter for agent_id={agent_id}, model={model}, "
|
||||||
|
f"model_endpoint_type={agent_state.llm_config.model_endpoint_type}, "
|
||||||
|
f"environment={settings.environment}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
token_counter = TiktokenCounter(agent_state.llm_config.model)
|
token_counter = TiktokenCounter(agent_state.llm_config.model)
|
||||||
|
logger.info(
|
||||||
|
f"Using TiktokenCounter for agent_id={agent_id}, model={agent_state.llm_config.model}, "
|
||||||
|
f"model_endpoint_type={agent_state.llm_config.model_endpoint_type}, "
|
||||||
|
f"environment={settings.environment}"
|
||||||
|
)
|
||||||
|
|
||||||
return await calculator.calculate_context_window(
|
return await calculator.calculate_context_window(
|
||||||
agent_state=agent_state,
|
agent_state=agent_state,
|
||||||
|
|||||||
@@ -90,9 +90,24 @@ class TiktokenCounter(TokenCounter):
|
|||||||
ttl_s=3600, # cache for 1 hour
|
ttl_s=3600, # cache for 1 hour
|
||||||
)
|
)
|
||||||
async def count_text_tokens(self, text: str) -> int:
|
async def count_text_tokens(self, text: str) -> int:
|
||||||
|
from letta.log import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
if not text:
|
if not text:
|
||||||
return 0
|
return 0
|
||||||
return count_tokens(text)
|
|
||||||
|
text_length = len(text)
|
||||||
|
text_preview = text[:100] + "..." if len(text) > 100 else text
|
||||||
|
logger.debug(f"TiktokenCounter.count_text_tokens: model={self.model}, text_length={text_length}, preview={repr(text_preview)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = count_tokens(text)
|
||||||
|
logger.debug(f"TiktokenCounter.count_text_tokens: completed successfully, tokens={result}")
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"TiktokenCounter.count_text_tokens: FAILED with {type(e).__name__}: {e}, text_length={text_length}")
|
||||||
|
raise
|
||||||
|
|
||||||
@trace_method
|
@trace_method
|
||||||
@async_redis_cache(
|
@async_redis_cache(
|
||||||
@@ -102,11 +117,28 @@ class TiktokenCounter(TokenCounter):
|
|||||||
ttl_s=3600, # cache for 1 hour
|
ttl_s=3600, # cache for 1 hour
|
||||||
)
|
)
|
||||||
async def count_message_tokens(self, messages: List[Dict[str, Any]]) -> int:
|
async def count_message_tokens(self, messages: List[Dict[str, Any]]) -> int:
|
||||||
|
from letta.log import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
if not messages:
|
if not messages:
|
||||||
return 0
|
return 0
|
||||||
from letta.local_llm.utils import num_tokens_from_messages
|
|
||||||
|
|
||||||
return num_tokens_from_messages(messages=messages, model=self.model)
|
num_messages = len(messages)
|
||||||
|
total_content_length = sum(len(str(m.get("content", ""))) for m in messages)
|
||||||
|
logger.debug(
|
||||||
|
f"TiktokenCounter.count_message_tokens: model={self.model}, num_messages={num_messages}, total_content_length={total_content_length}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from letta.local_llm.utils import num_tokens_from_messages
|
||||||
|
|
||||||
|
result = num_tokens_from_messages(messages=messages, model=self.model)
|
||||||
|
logger.debug(f"TiktokenCounter.count_message_tokens: completed successfully, tokens={result}")
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"TiktokenCounter.count_message_tokens: FAILED with {type(e).__name__}: {e}, num_messages={num_messages}")
|
||||||
|
raise
|
||||||
|
|
||||||
@trace_method
|
@trace_method
|
||||||
@async_redis_cache(
|
@async_redis_cache(
|
||||||
|
|||||||
Reference in New Issue
Block a user