feat: Extract more text from messages [LET-4154] (#4359)
* Extract more message types * Add extra tests testing for multiple parts
This commit is contained in:
@@ -17,6 +17,14 @@ class MessageContentType(str, Enum):
|
||||
class MessageContent(BaseModel):
|
||||
type: MessageContentType = Field(..., description="The type of the message.")
|
||||
|
||||
def to_text(self) -> Optional[str]:
|
||||
"""Extract text representation from this content type.
|
||||
|
||||
Returns:
|
||||
Text representation of the content, None if no text available.
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
# -------------------------------
|
||||
# Text Content
|
||||
@@ -27,6 +35,10 @@ class TextContent(MessageContent):
|
||||
type: Literal[MessageContentType.text] = Field(default=MessageContentType.text, description="The type of the message.")
|
||||
text: str = Field(..., description="The text content of the message.")
|
||||
|
||||
def to_text(self) -> str:
|
||||
"""Return the text content."""
|
||||
return self.text
|
||||
|
||||
|
||||
# -------------------------------
|
||||
# Image Content
|
||||
@@ -172,6 +184,13 @@ class ToolCallContent(MessageContent):
|
||||
..., description="The parameters being passed to the tool, structured as a dictionary of parameter names to values."
|
||||
)
|
||||
|
||||
def to_text(self) -> str:
|
||||
"""Return a text representation of the tool call."""
|
||||
import json
|
||||
|
||||
input_str = json.dumps(self.input, indent=2)
|
||||
return f"Tool call: {self.name}({input_str})"
|
||||
|
||||
|
||||
class ToolReturnContent(MessageContent):
|
||||
type: Literal[MessageContentType.tool_return] = Field(
|
||||
@@ -181,6 +200,11 @@ class ToolReturnContent(MessageContent):
|
||||
content: str = Field(..., description="The content returned by the tool execution.")
|
||||
is_error: bool = Field(..., description="Indicates whether the tool execution resulted in an error.")
|
||||
|
||||
def to_text(self) -> str:
|
||||
"""Return the tool return content."""
|
||||
prefix = "Tool error: " if self.is_error else "Tool result: "
|
||||
return f"{prefix}{self.content}"
|
||||
|
||||
|
||||
class ReasoningContent(MessageContent):
|
||||
type: Literal[MessageContentType.reasoning] = Field(
|
||||
@@ -190,6 +214,10 @@ class ReasoningContent(MessageContent):
|
||||
reasoning: str = Field(..., description="The intermediate reasoning or thought process content.")
|
||||
signature: Optional[str] = Field(default=None, description="A unique identifier for this reasoning step.")
|
||||
|
||||
def to_text(self) -> str:
|
||||
"""Return the reasoning content."""
|
||||
return self.reasoning
|
||||
|
||||
|
||||
class RedactedReasoningContent(MessageContent):
|
||||
type: Literal[MessageContentType.redacted_reasoning] = Field(
|
||||
|
||||
@@ -40,7 +40,6 @@ class MessageManager:
|
||||
Returns:
|
||||
Concatenated text content from the message
|
||||
"""
|
||||
# TODO: Make this much more complex/extend to beyond text content
|
||||
if not message.content:
|
||||
return ""
|
||||
|
||||
@@ -48,14 +47,12 @@ class MessageManager:
|
||||
if isinstance(message.content, str):
|
||||
return message.content
|
||||
|
||||
# handle list of content items
|
||||
# handle list of content items using the to_text() method
|
||||
text_parts = []
|
||||
for content_item in message.content:
|
||||
if isinstance(content_item, TextContent):
|
||||
text_parts.append(content_item.text)
|
||||
elif hasattr(content_item, "text"):
|
||||
# handle other content types that might have text
|
||||
text_parts.append(content_item.text)
|
||||
text = content_item.to_text()
|
||||
if text: # only add non-None text
|
||||
text_parts.append(text)
|
||||
|
||||
return " ".join(text_parts)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ from letta.config import LettaConfig
|
||||
from letta.helpers.tpuf_client import TurbopufferClient, should_use_tpuf, should_use_tpuf_for_messages
|
||||
from letta.schemas.embedding_config import EmbeddingConfig
|
||||
from letta.schemas.enums import MessageRole, TagMatchMode, VectorDBProvider
|
||||
from letta.schemas.letta_message_content import TextContent
|
||||
from letta.schemas.letta_message_content import ReasoningContent, TextContent, ToolCallContent, ToolReturnContent
|
||||
from letta.schemas.message import Message as PydanticMessage
|
||||
from letta.schemas.passage import Passage
|
||||
from letta.server.server import SyncServer
|
||||
@@ -886,6 +886,26 @@ class TestTurbopufferMessagesIntegration:
|
||||
text5 = manager._extract_message_text(msg5)
|
||||
assert text5 == ""
|
||||
|
||||
# Test 6: Mixed content types with to_text() methods
|
||||
msg6 = PydanticMessage(
|
||||
role=MessageRole.assistant,
|
||||
content=[
|
||||
TextContent(text="User said:"),
|
||||
ToolCallContent(id="call-123", name="search", input={"query": "test"}),
|
||||
ToolReturnContent(tool_call_id="call-123", content="Found 5 results", is_error=False),
|
||||
ReasoningContent(is_native=True, reasoning="I should help the user", signature="step-1"),
|
||||
],
|
||||
agent_id="test-agent",
|
||||
)
|
||||
text6 = manager._extract_message_text(msg6)
|
||||
expected_parts = [
|
||||
"User said:",
|
||||
'Tool call: search({\n "query": "test"\n})',
|
||||
"Tool result: Found 5 results",
|
||||
"I should help the user",
|
||||
]
|
||||
assert text6 == " ".join(expected_parts)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skipif(not settings.tpuf_api_key, reason="Turbopuffer API key not configured")
|
||||
async def test_message_embedding_without_config(self, server, default_user, sarah_agent, enable_message_embedding):
|
||||
|
||||
Reference in New Issue
Block a user