Compare commits
10 Commits
54369a7c12
...
1d1adb261a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1d1adb261a | ||
|
|
08d3c26732 | ||
|
|
2b3cfb1797 | ||
|
|
9af8e94fc9 | ||
|
|
93337ce680 | ||
|
|
c79de81cea | ||
|
|
4cb2f21c65 | ||
|
|
9eb7ae39a2 | ||
|
|
aeeec41859 | ||
|
|
a5bac26556 |
@@ -57,7 +57,7 @@ RUN set -eux; \
|
||||
esac; \
|
||||
apt-get update && \
|
||||
# Install curl, Python, and PostgreSQL client libraries
|
||||
apt-get install -y curl python3 python3-venv libpq-dev redis-server && \
|
||||
apt-get install -y curl python3 python3-venv libpq-dev redis-server git && \
|
||||
# Install Node.js
|
||||
curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - && \
|
||||
apt-get install -y nodejs && \
|
||||
@@ -71,6 +71,9 @@ RUN set -eux; \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Configure git to ignore ownership checks for mounted repos (safe.directory fix)
|
||||
RUN git config --global --add safe.directory '*'
|
||||
|
||||
# Add OpenTelemetry Collector configs
|
||||
COPY otel/otel-collector-config-file.yaml /etc/otel/config-file.yaml
|
||||
COPY otel/otel-collector-config-clickhouse.yaml /etc/otel/config-clickhouse.yaml
|
||||
|
||||
@@ -50,6 +50,10 @@ services:
|
||||
- CLICKHOUSE_DATABASE=${CLICKHOUSE_DATABASE}
|
||||
- CLICKHOUSE_USERNAME=${CLICKHOUSE_USERNAME}
|
||||
- CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD}
|
||||
# Git safe.directory config (backup to Dockerfile fix for dubious ownership)
|
||||
- GIT_CONFIG_COUNT=1
|
||||
- GIT_CONFIG_KEY_0=safe.directory
|
||||
- GIT_CONFIG_VALUE_0=*
|
||||
# volumes:
|
||||
# - ./configs/server_config.yaml:/root/.letta/config # config file
|
||||
# - ~/.letta/credentials:/root/.letta/credentials # credentials file
|
||||
|
||||
@@ -233,7 +233,7 @@ letta:
|
||||
# =============================================================================
|
||||
model:
|
||||
# Global settings
|
||||
global_max_context_window_limit: 32000
|
||||
global_max_context_window_limit: 220000
|
||||
inner_thoughts_kwarg: thinking
|
||||
default_prompt_formatter: chatml
|
||||
|
||||
|
||||
@@ -46,3 +46,7 @@ services:
|
||||
- VLLM_API_BASE=${VLLM_API_BASE}
|
||||
- OPENLLM_AUTH_TYPE=${OPENLLM_AUTH_TYPE}
|
||||
- OPENLLM_API_KEY=${OPENLLM_API_KEY}
|
||||
# Git safe.directory config (backup to Dockerfile fix for dubious ownership)
|
||||
- GIT_CONFIG_COUNT=1
|
||||
- GIT_CONFIG_KEY_0=safe.directory
|
||||
- GIT_CONFIG_VALUE_0=*
|
||||
|
||||
@@ -8629,7 +8629,7 @@
|
||||
"schema": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionRequest"
|
||||
"$ref": "#/components/schemas/letta__server__rest_api__routers__v1__agents__CompactionRequest"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
@@ -8855,18 +8855,14 @@
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 42,
|
||||
"pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'.",
|
||||
"examples": [
|
||||
"default",
|
||||
"conv-123e4567-e89b-42d3-8456-426614174000",
|
||||
"agent-123e4567-e89b-42d3-8456-426614174000"
|
||||
],
|
||||
"minLength": 41,
|
||||
"maxLength": 41,
|
||||
"pattern": "^conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
|
||||
"description": "The ID of the conv in the format 'conv-<uuid4>'",
|
||||
"examples": ["conv-123e4567-e89b-42d3-8456-426614174000"],
|
||||
"title": "Conversation Id"
|
||||
},
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'."
|
||||
"description": "The ID of the conv in the format 'conv-<uuid4>'"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
@@ -8904,18 +8900,14 @@
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 42,
|
||||
"pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'.",
|
||||
"examples": [
|
||||
"default",
|
||||
"conv-123e4567-e89b-42d3-8456-426614174000",
|
||||
"agent-123e4567-e89b-42d3-8456-426614174000"
|
||||
],
|
||||
"minLength": 41,
|
||||
"maxLength": 41,
|
||||
"pattern": "^conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
|
||||
"description": "The ID of the conv in the format 'conv-<uuid4>'",
|
||||
"examples": ["conv-123e4567-e89b-42d3-8456-426614174000"],
|
||||
"title": "Conversation Id"
|
||||
},
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'."
|
||||
"description": "The ID of the conv in the format 'conv-<uuid4>'"
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
@@ -8963,18 +8955,14 @@
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 42,
|
||||
"pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'.",
|
||||
"examples": [
|
||||
"default",
|
||||
"conv-123e4567-e89b-42d3-8456-426614174000",
|
||||
"agent-123e4567-e89b-42d3-8456-426614174000"
|
||||
],
|
||||
"minLength": 41,
|
||||
"maxLength": 41,
|
||||
"pattern": "^conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
|
||||
"description": "The ID of the conv in the format 'conv-<uuid4>'",
|
||||
"examples": ["conv-123e4567-e89b-42d3-8456-426614174000"],
|
||||
"title": "Conversation Id"
|
||||
},
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'."
|
||||
"description": "The ID of the conv in the format 'conv-<uuid4>'"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
@@ -9003,7 +8991,7 @@
|
||||
"get": {
|
||||
"tags": ["conversations"],
|
||||
"summary": "List Conversation Messages",
|
||||
"description": "List all messages in a conversation.\n\nReturns LettaMessage objects (UserMessage, AssistantMessage, etc.) for all\nmessages in the conversation, with support for cursor-based pagination.\n\nIf conversation_id is an agent ID (starts with \"agent-\"), returns messages\nfrom the agent's default conversation (no conversation isolation).",
|
||||
"description": "List all messages in a conversation.\n\nReturns LettaMessage objects (UserMessage, AssistantMessage, etc.) for all\nmessages in the conversation, with support for cursor-based pagination.\n\n**Agent-direct mode**: Pass conversation_id=\"default\" with agent_id parameter\nto list messages from the agent's default conversation.\n\n**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.",
|
||||
"operationId": "list_conversation_messages",
|
||||
"parameters": [
|
||||
{
|
||||
@@ -9015,7 +9003,7 @@
|
||||
"minLength": 1,
|
||||
"maxLength": 42,
|
||||
"pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'.",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), 'default' for agent-direct mode (with agent_id parameter), or an agent ID ('agent-<uuid4>') for backwards compatibility (deprecated).",
|
||||
"examples": [
|
||||
"default",
|
||||
"conv-123e4567-e89b-42d3-8456-426614174000",
|
||||
@@ -9023,7 +9011,25 @@
|
||||
],
|
||||
"title": "Conversation Id"
|
||||
},
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'."
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), 'default' for agent-direct mode (with agent_id parameter), or an agent ID ('agent-<uuid4>') for backwards compatibility (deprecated)."
|
||||
},
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "query",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Agent ID for agent-direct mode with 'default' conversation",
|
||||
"title": "Agent Id"
|
||||
},
|
||||
"description": "Agent ID for agent-direct mode with 'default' conversation"
|
||||
},
|
||||
{
|
||||
"name": "before",
|
||||
@@ -9173,7 +9179,7 @@
|
||||
"post": {
|
||||
"tags": ["conversations"],
|
||||
"summary": "Send Conversation Message",
|
||||
"description": "Send a message to a conversation and get a response.\n\nThis endpoint sends a message to an existing conversation.\nBy default (streaming=true), returns a streaming response (Server-Sent Events).\nSet streaming=false to get a complete JSON response.\n\nIf conversation_id is an agent ID (starts with \"agent-\"), routes to agent-direct\nmode with locking but without conversation-specific features.",
|
||||
"description": "Send a message to a conversation and get a response.\n\nThis endpoint sends a message to an existing conversation.\nBy default (streaming=true), returns a streaming response (Server-Sent Events).\nSet streaming=false to get a complete JSON response.\n\n**Agent-direct mode**: Pass conversation_id=\"default\" with agent_id in request body\nto send messages to the agent's default conversation with locking.\n\n**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.",
|
||||
"operationId": "send_conversation_message",
|
||||
"parameters": [
|
||||
{
|
||||
@@ -9185,7 +9191,7 @@
|
||||
"minLength": 1,
|
||||
"maxLength": 42,
|
||||
"pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'.",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), 'default' for agent-direct mode (with agent_id parameter), or an agent ID ('agent-<uuid4>') for backwards compatibility (deprecated).",
|
||||
"examples": [
|
||||
"default",
|
||||
"conv-123e4567-e89b-42d3-8456-426614174000",
|
||||
@@ -9193,7 +9199,7 @@
|
||||
],
|
||||
"title": "Conversation Id"
|
||||
},
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'."
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), 'default' for agent-direct mode (with agent_id parameter), or an agent ID ('agent-<uuid4>') for backwards compatibility (deprecated)."
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
@@ -9238,7 +9244,7 @@
|
||||
"post": {
|
||||
"tags": ["conversations"],
|
||||
"summary": "Retrieve Conversation Stream",
|
||||
"description": "Resume the stream for the most recent active run in a conversation.\n\nThis endpoint allows you to reconnect to an active background stream\nfor a conversation, enabling recovery from network interruptions.\n\nIf conversation_id is an agent ID (starts with \"agent-\"), retrieves the\nstream for the agent's most recent active run.",
|
||||
"description": "Resume the stream for the most recent active run in a conversation.\n\nThis endpoint allows you to reconnect to an active background stream\nfor a conversation, enabling recovery from network interruptions.\n\n**Agent-direct mode**: Pass conversation_id=\"default\" with agent_id in request body\nto retrieve the stream for the agent's most recent active run.\n\n**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.",
|
||||
"operationId": "retrieve_conversation_stream",
|
||||
"parameters": [
|
||||
{
|
||||
@@ -9250,7 +9256,7 @@
|
||||
"minLength": 1,
|
||||
"maxLength": 42,
|
||||
"pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'.",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), 'default' for agent-direct mode (with agent_id parameter), or an agent ID ('agent-<uuid4>') for backwards compatibility (deprecated).",
|
||||
"examples": [
|
||||
"default",
|
||||
"conv-123e4567-e89b-42d3-8456-426614174000",
|
||||
@@ -9258,7 +9264,7 @@
|
||||
],
|
||||
"title": "Conversation Id"
|
||||
},
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'."
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), 'default' for agent-direct mode (with agent_id parameter), or an agent ID ('agent-<uuid4>') for backwards compatibility (deprecated)."
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
@@ -9342,7 +9348,7 @@
|
||||
"post": {
|
||||
"tags": ["conversations"],
|
||||
"summary": "Cancel Conversation",
|
||||
"description": "Cancel runs associated with a conversation.\n\nNote: To cancel active runs, Redis is required.\n\nIf conversation_id is an agent ID (starts with \"agent-\"), cancels runs\nfor the agent's default conversation.",
|
||||
"description": "Cancel runs associated with a conversation.\n\nNote: To cancel active runs, Redis is required.\n\n**Agent-direct mode**: Pass conversation_id=\"default\" with agent_id query parameter\nto cancel runs for the agent's default conversation.\n\n**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.",
|
||||
"operationId": "cancel_conversation",
|
||||
"parameters": [
|
||||
{
|
||||
@@ -9354,7 +9360,7 @@
|
||||
"minLength": 1,
|
||||
"maxLength": 42,
|
||||
"pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'.",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), 'default' for agent-direct mode (with agent_id parameter), or an agent ID ('agent-<uuid4>') for backwards compatibility (deprecated).",
|
||||
"examples": [
|
||||
"default",
|
||||
"conv-123e4567-e89b-42d3-8456-426614174000",
|
||||
@@ -9362,7 +9368,25 @@
|
||||
],
|
||||
"title": "Conversation Id"
|
||||
},
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'."
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), 'default' for agent-direct mode (with agent_id parameter), or an agent ID ('agent-<uuid4>') for backwards compatibility (deprecated)."
|
||||
},
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "query",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Agent ID for agent-direct mode with 'default' conversation",
|
||||
"title": "Agent Id"
|
||||
},
|
||||
"description": "Agent ID for agent-direct mode with 'default' conversation"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
@@ -9395,7 +9419,7 @@
|
||||
"post": {
|
||||
"tags": ["conversations"],
|
||||
"summary": "Compact Conversation",
|
||||
"description": "Compact (summarize) a conversation's message history.\n\nThis endpoint summarizes the in-context messages for a specific conversation,\nreducing the message count while preserving important context.\n\nIf conversation_id is an agent ID (starts with \"agent-\"), compacts the\nagent's default conversation messages.",
|
||||
"description": "Compact (summarize) a conversation's message history.\n\nThis endpoint summarizes the in-context messages for a specific conversation,\nreducing the message count while preserving important context.\n\n**Agent-direct mode**: Pass conversation_id=\"default\" with agent_id in request body\nto compact the agent's default conversation messages.\n\n**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.",
|
||||
"operationId": "compact_conversation",
|
||||
"parameters": [
|
||||
{
|
||||
@@ -9407,7 +9431,7 @@
|
||||
"minLength": 1,
|
||||
"maxLength": 42,
|
||||
"pattern": "^(default|conv-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})$",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'.",
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), 'default' for agent-direct mode (with agent_id parameter), or an agent ID ('agent-<uuid4>') for backwards compatibility (deprecated).",
|
||||
"examples": [
|
||||
"default",
|
||||
"conv-123e4567-e89b-42d3-8456-426614174000",
|
||||
@@ -9415,7 +9439,7 @@
|
||||
],
|
||||
"title": "Conversation Id"
|
||||
},
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), an agent ID ('agent-<uuid4>') for agent-direct messaging, or 'default'."
|
||||
"description": "The conversation identifier. Can be a conversation ID ('conv-<uuid4>'), 'default' for agent-direct mode (with agent_id parameter), or an agent ID ('agent-<uuid4>') for backwards compatibility (deprecated)."
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
@@ -9424,7 +9448,7 @@
|
||||
"schema": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionRequest"
|
||||
"$ref": "#/components/schemas/letta__server__rest_api__routers__v1__conversations__CompactionRequest"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
@@ -28104,14 +28128,14 @@
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"]
|
||||
"enum": ["low", "medium", "high", "max"]
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Effort",
|
||||
"description": "Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'."
|
||||
"description": "Effort level for supported Anthropic models (controls token spending). 'max' is only available on Opus 4.6. Not setting this gives similar performance to 'high'."
|
||||
},
|
||||
"strict": {
|
||||
"type": "boolean",
|
||||
@@ -31460,23 +31484,6 @@
|
||||
"required": ["code"],
|
||||
"title": "CodeInput"
|
||||
},
|
||||
"CompactionRequest": {
|
||||
"properties": {
|
||||
"compaction_settings": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionSettings-Input"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used."
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"title": "CompactionRequest"
|
||||
},
|
||||
"CompactionResponse": {
|
||||
"properties": {
|
||||
"summary": {
|
||||
@@ -32611,6 +32618,18 @@
|
||||
"description": "If True, returns token IDs and logprobs for ALL LLM generations in the agent step, not just the last one. Uses SGLang native /generate endpoint. Returns 'turns' field with TurnTokenData for each assistant/tool turn. Required for proper multi-turn RL training with loss masking.",
|
||||
"default": false
|
||||
},
|
||||
"agent_id": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Agent Id",
|
||||
"description": "Agent ID for agent-direct mode with 'default' conversation. Use with conversation_id='default' in the URL path."
|
||||
},
|
||||
"streaming": {
|
||||
"type": "boolean",
|
||||
"title": "Streaming",
|
||||
@@ -43448,6 +43467,18 @@
|
||||
},
|
||||
"RetrieveStreamRequest": {
|
||||
"properties": {
|
||||
"agent_id": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Agent Id",
|
||||
"description": "Agent ID for agent-direct mode with 'default' conversation. Use with conversation_id='default' in the URL path."
|
||||
},
|
||||
"starting_after": {
|
||||
"type": "integer",
|
||||
"title": "Starting After",
|
||||
@@ -51563,6 +51594,52 @@
|
||||
],
|
||||
"title": "ToolSchema"
|
||||
},
|
||||
"letta__server__rest_api__routers__v1__agents__CompactionRequest": {
|
||||
"properties": {
|
||||
"compaction_settings": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionSettings-Input"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used."
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"title": "CompactionRequest"
|
||||
},
|
||||
"letta__server__rest_api__routers__v1__conversations__CompactionRequest": {
|
||||
"properties": {
|
||||
"agent_id": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Agent Id",
|
||||
"description": "Agent ID for agent-direct mode with 'default' conversation. Use with conversation_id='default' in the URL path."
|
||||
},
|
||||
"compaction_settings": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionSettings-Input"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used."
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"title": "CompactionRequest"
|
||||
},
|
||||
"letta__server__rest_api__routers__v1__tools__ToolExecuteRequest": {
|
||||
"properties": {
|
||||
"args": {
|
||||
|
||||
@@ -5,7 +5,7 @@ try:
|
||||
__version__ = version("letta")
|
||||
except PackageNotFoundError:
|
||||
# Fallback for development installations
|
||||
__version__ = "0.16.5"
|
||||
__version__ = "0.16.6"
|
||||
|
||||
if os.environ.get("LETTA_VERSION"):
|
||||
__version__ = os.environ["LETTA_VERSION"]
|
||||
|
||||
@@ -12,7 +12,7 @@ from letta.helpers.datetime_helpers import get_utc_time
|
||||
from letta.log import get_logger
|
||||
from letta.otel.tracing import trace_method
|
||||
from letta.schemas.agent import AgentState
|
||||
from letta.schemas.enums import MessageRole
|
||||
from letta.schemas.enums import MessageRole, RunStatus
|
||||
from letta.schemas.letta_message import MessageType
|
||||
from letta.schemas.letta_message_content import TextContent
|
||||
from letta.schemas.letta_response import LettaResponse
|
||||
@@ -300,7 +300,31 @@ async def _prepare_in_context_messages_no_persist_async(
|
||||
else:
|
||||
# User is trying to send a regular message
|
||||
if current_in_context_messages and current_in_context_messages[-1].is_approval_request():
|
||||
raise PendingApprovalError(pending_request_id=current_in_context_messages[-1].id)
|
||||
# Check if the run associated with this approval request is still active
|
||||
# If the run was cancelled/failed, the approval is orphaned and should be skipped
|
||||
approval_msg = current_in_context_messages[-1]
|
||||
approval_run_id = approval_msg.run_id
|
||||
is_orphaned_approval = False
|
||||
|
||||
if approval_run_id:
|
||||
try:
|
||||
from letta.services.run_manager import RunManager
|
||||
|
||||
run_manager = RunManager()
|
||||
approval_run = await run_manager.get_run_by_id(run_id=approval_run_id, actor=actor)
|
||||
# Note: completed runs may still have valid approvals (stop_reason=requires_approval)
|
||||
if approval_run.status in [RunStatus.cancelled, RunStatus.failed]:
|
||||
logger.info(
|
||||
f"Skipping orphaned approval request {approval_msg.id} - associated run {approval_run_id} "
|
||||
f"has status {approval_run.status.value}"
|
||||
)
|
||||
is_orphaned_approval = True
|
||||
except Exception as e:
|
||||
# If we can't check the run status, be conservative and raise the error
|
||||
logger.warning(f"Failed to check run status for approval request {approval_msg.id}: {e}")
|
||||
|
||||
if not is_orphaned_approval:
|
||||
raise PendingApprovalError(pending_request_id=approval_msg.id)
|
||||
|
||||
# Create a new user message from the input but dont store it yet
|
||||
new_in_context_messages = await create_input_messages(
|
||||
|
||||
@@ -1096,7 +1096,7 @@ class LettaAgentV3(LettaAgentV2):
|
||||
)
|
||||
# update metrics
|
||||
self._update_global_usage_stats(llm_adapter.usage)
|
||||
self.context_token_estimate = llm_adapter.usage.total_tokens
|
||||
self.context_token_estimate = llm_adapter.usage.prompt_tokens
|
||||
self.logger.info(f"Context token estimate after LLM request: {self.context_token_estimate}")
|
||||
|
||||
# Extract logprobs if present (for RL training)
|
||||
|
||||
@@ -247,7 +247,7 @@ CORE_MEMORY_LINE_NUMBER_WARNING = "# NOTE: Line numbers shown below (with arrows
|
||||
# Constants to do with summarization / conversation length window
|
||||
# The max amount of tokens supported by the underlying model (eg 8k for gpt-4 and Mistral 7B)
|
||||
LLM_MAX_CONTEXT_WINDOW = {
|
||||
"DEFAULT": 30000,
|
||||
"DEFAULT": 220000,
|
||||
# deepseek
|
||||
"deepseek-chat": 64000,
|
||||
"deepseek-reasoner": 64000,
|
||||
@@ -444,7 +444,7 @@ REQ_HEARTBEAT_MESSAGE = f"{NON_USER_MSG_PREFIX}Function called using request_hea
|
||||
FUNC_FAILED_HEARTBEAT_MESSAGE = f"{NON_USER_MSG_PREFIX}Function call failed, returning control"
|
||||
|
||||
|
||||
RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE = 5
|
||||
RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE = 25
|
||||
|
||||
MAX_FILENAME_LENGTH = 255
|
||||
RESERVED_FILENAMES = {"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "LPT1", "LPT2"}
|
||||
|
||||
@@ -371,7 +371,7 @@ def serialize_message_content(message_content: Optional[List[Union[MessageConten
|
||||
for content in message_content:
|
||||
if isinstance(content, MessageContent):
|
||||
if content.type == MessageContentType.image:
|
||||
assert content.source.type == ImageSourceType.letta, f"Invalid image source type: {content.source.type}"
|
||||
assert content.source.type in (ImageSourceType.letta, ImageSourceType.base64), f"Invalid image source type: {content.source.type}"
|
||||
# Sanitize null bytes from message content to prevent PostgreSQL errors
|
||||
serialized_message_content.append(sanitize_null_bytes(content.model_dump(mode="json")))
|
||||
elif isinstance(content, dict):
|
||||
@@ -396,7 +396,7 @@ def deserialize_message_content(data: Optional[List[Dict]]) -> List[MessageConte
|
||||
if content_type == MessageContentType.text:
|
||||
content = TextContent(**item)
|
||||
elif content_type == MessageContentType.image:
|
||||
assert item["source"]["type"] == ImageSourceType.letta, f"Invalid image source type: {item['source']['type']}"
|
||||
assert item["source"]["type"] in (ImageSourceType.letta, ImageSourceType.base64), f"Invalid image source type: {item['source']['type']}"
|
||||
content = ImageContent(**item)
|
||||
elif content_type == MessageContentType.tool_call:
|
||||
content = ToolCallContent(**item)
|
||||
|
||||
@@ -51,6 +51,58 @@ def sanitize_unicode_surrogates(value: Any) -> Any:
|
||||
return value
|
||||
|
||||
|
||||
_UNICODE_TO_ASCII = {
|
||||
"\u2014": "--", # em-dash
|
||||
"\u2013": "-", # en-dash
|
||||
"\u2012": "-", # figure dash
|
||||
"\u2010": "-", # hyphen
|
||||
"\u2011": "-", # non-breaking hyphen
|
||||
"\u201c": '"', # left double quotation mark
|
||||
"\u201d": '"', # right double quotation mark
|
||||
"\u2018": "'", # left single quotation mark
|
||||
"\u2019": "'", # right single quotation mark
|
||||
"\u201a": ",", # single low-9 quotation mark
|
||||
"\u201e": '"', # double low-9 quotation mark
|
||||
"\u2026": "...", # horizontal ellipsis
|
||||
"\u00a0": " ", # non-breaking space
|
||||
"\u00ad": "", # soft hyphen (invisible, strip)
|
||||
}
|
||||
|
||||
|
||||
def sanitize_control_characters(value: Any) -> Any:
|
||||
"""Recursively sanitize strings for strict ASCII-only JSON backends (e.g. Synthetic).
|
||||
|
||||
Removes ASCII control characters (0x00-0x1F) except tab/newline/CR.
|
||||
Replaces common non-ASCII typography (em-dash, curly quotes, ellipsis, etc.)
|
||||
with ASCII equivalents. Strips remaining non-ASCII chars (> 0x7E) that would
|
||||
appear as raw multi-byte UTF-8 sequences in the request body and cause parse
|
||||
failures on backends that expect ASCII-safe JSON.
|
||||
|
||||
This function sanitizes:
|
||||
- Strings: replaces/strips non-ASCII; strips control chars except whitespace
|
||||
- Dicts: recursively sanitizes all string values
|
||||
- Lists: recursively sanitizes all elements
|
||||
- Other types: returned as-is
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
# Replace known typographic Unicode with ASCII equivalents first
|
||||
for uni, asc in _UNICODE_TO_ASCII.items():
|
||||
value = value.replace(uni, asc)
|
||||
return "".join(
|
||||
char for char in value
|
||||
if ord(char) <= 0x7E # printable ASCII only
|
||||
or char in ("\t", "\n", "\r") # allowed whitespace
|
||||
)
|
||||
elif isinstance(value, dict):
|
||||
return {sanitize_control_characters(k): sanitize_control_characters(v) for k, v in value.items()}
|
||||
elif isinstance(value, list):
|
||||
return [sanitize_control_characters(item) for item in value]
|
||||
elif isinstance(value, tuple):
|
||||
return tuple(sanitize_control_characters(item) for item in value)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
def sanitize_null_bytes(value: Any) -> Any:
|
||||
"""Recursively remove null bytes (0x00) from strings.
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ from letta.errors import (
|
||||
LLMTimeoutError,
|
||||
LLMUnprocessableEntityError,
|
||||
)
|
||||
from letta.helpers.json_helpers import sanitize_unicode_surrogates
|
||||
from letta.helpers.json_helpers import sanitize_control_characters, sanitize_unicode_surrogates
|
||||
from letta.llm_api.error_utils import is_context_window_overflow_message, is_insufficient_credits_message
|
||||
from letta.llm_api.helpers import (
|
||||
add_inner_thoughts_to_functions,
|
||||
@@ -453,7 +453,16 @@ class OpenAIClient(LLMClientBase):
|
||||
request_obj=data,
|
||||
)
|
||||
|
||||
request_data = data.model_dump(exclude_unset=True)
|
||||
request_data = data.model_dump(exclude_unset=True, exclude_none=True)
|
||||
|
||||
# Strip reasoning fields (see streaming build_request_data for explanation)
|
||||
_REASONING_FIELDS = ("reasoning_content", "reasoning_content_signature",
|
||||
"redacted_reasoning_content", "omitted_reasoning_content")
|
||||
if "messages" in request_data:
|
||||
for message in request_data["messages"]:
|
||||
for field in _REASONING_FIELDS:
|
||||
message.pop(field, None)
|
||||
|
||||
return request_data
|
||||
|
||||
@trace_method
|
||||
@@ -639,14 +648,16 @@ class OpenAIClient(LLMClientBase):
|
||||
if not supports_structured_output(llm_config):
|
||||
# Provider doesn't support structured output - ensure strict is False
|
||||
tool.function.strict = False
|
||||
request_data = data.model_dump(exclude_unset=True)
|
||||
request_data = data.model_dump(exclude_unset=True, exclude_none=True)
|
||||
|
||||
# Fireworks uses strict validation (additionalProperties: false) and rejects
|
||||
# reasoning fields that are not in their schema.
|
||||
is_fireworks = llm_config.model_endpoint and "fireworks.ai" in llm_config.model_endpoint
|
||||
if is_fireworks and "messages" in request_data:
|
||||
# Strip reasoning fields that strict backends (Fireworks/Synthetic) reject.
|
||||
# exclude_none handles fields that are None, but reasoning_content has actual
|
||||
# text from previous assistant turns and must be explicitly removed.
|
||||
_REASONING_FIELDS = ("reasoning_content", "reasoning_content_signature",
|
||||
"redacted_reasoning_content", "omitted_reasoning_content")
|
||||
if "messages" in request_data:
|
||||
for message in request_data["messages"]:
|
||||
for field in ("reasoning_content_signature", "redacted_reasoning_content", "omitted_reasoning_content"):
|
||||
for field in _REASONING_FIELDS:
|
||||
message.pop(field, None)
|
||||
|
||||
# If Ollama
|
||||
@@ -676,6 +687,7 @@ class OpenAIClient(LLMClientBase):
|
||||
"""
|
||||
# Sanitize Unicode surrogates to prevent encoding errors
|
||||
request_data = sanitize_unicode_surrogates(request_data)
|
||||
request_data = sanitize_control_characters(request_data)
|
||||
|
||||
client = OpenAI(**self._prepare_client_kwargs(llm_config))
|
||||
# Route based on payload shape: Responses uses 'input', Chat Completions uses 'messages'
|
||||
@@ -701,6 +713,7 @@ class OpenAIClient(LLMClientBase):
|
||||
"""
|
||||
# Sanitize Unicode surrogates to prevent encoding errors
|
||||
request_data = sanitize_unicode_surrogates(request_data)
|
||||
request_data = sanitize_control_characters(request_data)
|
||||
|
||||
kwargs = await self._prepare_client_kwargs_async(llm_config)
|
||||
client = AsyncOpenAI(**kwargs)
|
||||
@@ -920,6 +933,7 @@ class OpenAIClient(LLMClientBase):
|
||||
"""
|
||||
# Sanitize Unicode surrogates to prevent encoding errors
|
||||
request_data = sanitize_unicode_surrogates(request_data)
|
||||
request_data = sanitize_control_characters(request_data)
|
||||
|
||||
kwargs = await self._prepare_client_kwargs_async(llm_config)
|
||||
client = AsyncOpenAI(**kwargs)
|
||||
@@ -954,6 +968,7 @@ class OpenAIClient(LLMClientBase):
|
||||
"""
|
||||
# Sanitize Unicode surrogates to prevent encoding errors
|
||||
request_data = sanitize_unicode_surrogates(request_data)
|
||||
request_data = sanitize_control_characters(request_data)
|
||||
|
||||
kwargs = await self._prepare_client_kwargs_async(llm_config)
|
||||
client = AsyncOpenAI(**kwargs)
|
||||
|
||||
@@ -88,8 +88,7 @@ class LettaRequest(BaseModel):
|
||||
)
|
||||
top_logprobs: Optional[int] = Field(
|
||||
default=None,
|
||||
description="Number of most likely tokens to return at each position (0-20). "
|
||||
"Requires return_logprobs=True.",
|
||||
description="Number of most likely tokens to return at each position (0-20). Requires return_logprobs=True.",
|
||||
)
|
||||
return_token_ids: bool = Field(
|
||||
default=False,
|
||||
@@ -155,6 +154,10 @@ class LettaStreamingRequest(LettaRequest):
|
||||
class ConversationMessageRequest(LettaRequest):
|
||||
"""Request for sending messages to a conversation. Streams by default."""
|
||||
|
||||
agent_id: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Agent ID for agent-direct mode with 'default' conversation. Use with conversation_id='default' in the URL path.",
|
||||
)
|
||||
streaming: bool = Field(
|
||||
default=True,
|
||||
description="If True (default), returns a streaming response (Server-Sent Events). If False, returns a complete JSON response.",
|
||||
@@ -194,6 +197,10 @@ class CreateBatch(BaseModel):
|
||||
|
||||
|
||||
class RetrieveStreamRequest(BaseModel):
|
||||
agent_id: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Agent ID for agent-direct mode with 'default' conversation. Use with conversation_id='default' in the URL path.",
|
||||
)
|
||||
starting_after: int = Field(
|
||||
0, description="Sequence id to use as a cursor for pagination. Response will start streaming after this chunk sequence id"
|
||||
)
|
||||
|
||||
@@ -1442,11 +1442,12 @@ class Message(BaseMessage):
|
||||
tool_return = self.tool_returns[0]
|
||||
if not tool_return.tool_call_id:
|
||||
raise TypeError("OpenAI API requires tool_call_id to be set.")
|
||||
# Convert to text first (replaces images with placeholders), then truncate
|
||||
# Tool message content must be a string per OpenAI Chat Completions spec.
|
||||
# Images are handled in to_openai_dicts_from_list via injected user messages.
|
||||
func_response_text = tool_return_to_text(tool_return.func_response)
|
||||
func_response = truncate_tool_return(func_response_text, tool_return_truncation_chars)
|
||||
openai_content = truncate_tool_return(func_response_text, tool_return_truncation_chars)
|
||||
openai_message = {
|
||||
"content": func_response,
|
||||
"content": openai_content,
|
||||
"role": self.role,
|
||||
"tool_call_id": tool_return.tool_call_id[:max_tool_id_length] if max_tool_id_length else tool_return.tool_call_id,
|
||||
}
|
||||
@@ -1499,16 +1500,52 @@ class Message(BaseMessage):
|
||||
for tr in m.tool_returns:
|
||||
if not tr.tool_call_id:
|
||||
raise TypeError("ToolReturn came back without a tool_call_id.")
|
||||
# Convert multi-modal to text (images → placeholders), then truncate
|
||||
func_response_text = tool_return_to_text(tr.func_response)
|
||||
func_response = truncate_tool_return(func_response_text, tool_return_truncation_chars)
|
||||
# OpenAI Chat Completions: tool message content must be a string.
|
||||
# Images can only go in user messages, so split: text in tool return,
|
||||
# image in a follow-up user message.
|
||||
func_response = tr.func_response
|
||||
image_parts = []
|
||||
if isinstance(func_response, list) and any(
|
||||
isinstance(p, ImageContent) or (isinstance(p, dict) and p.get("type") == "image")
|
||||
for p in func_response
|
||||
):
|
||||
# Extract text for the tool return, collect images for user message
|
||||
text_pieces = []
|
||||
for part in func_response:
|
||||
if isinstance(part, TextContent):
|
||||
text_pieces.append(part.text)
|
||||
elif isinstance(part, ImageContent):
|
||||
image_url = Message._image_source_to_data_url(part)
|
||||
if image_url:
|
||||
image_parts.append({"type": "image_url", "image_url": {"url": image_url}})
|
||||
elif isinstance(part, dict):
|
||||
if part.get("type") == "text":
|
||||
text_pieces.append(part.get("text", ""))
|
||||
elif part.get("type") == "image":
|
||||
image_url = Message._image_dict_to_data_url(part)
|
||||
if image_url:
|
||||
image_parts.append({"type": "image_url", "image_url": {"url": image_url}})
|
||||
else:
|
||||
text_pieces.append(str(part))
|
||||
openai_content = truncate_tool_return("\n".join(text_pieces), tool_return_truncation_chars)
|
||||
else:
|
||||
func_response_text = tool_return_to_text(func_response)
|
||||
openai_content = truncate_tool_return(func_response_text, tool_return_truncation_chars)
|
||||
result.append(
|
||||
{
|
||||
"content": func_response,
|
||||
"content": openai_content,
|
||||
"role": "tool",
|
||||
"tool_call_id": tr.tool_call_id[:max_tool_id_length] if max_tool_id_length else tr.tool_call_id,
|
||||
}
|
||||
)
|
||||
# Inject image as a user message right after the tool return
|
||||
if image_parts:
|
||||
result.append(
|
||||
{
|
||||
"content": [{"type": "text", "text": "[Tool returned image]"}] + image_parts,
|
||||
"role": "user",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
d = m.to_openai_dict(
|
||||
|
||||
@@ -282,10 +282,10 @@ class AnthropicModelSettings(ModelSettings):
|
||||
description="Soft control for how verbose model output should be, used for GPT-5 models.",
|
||||
)
|
||||
|
||||
# Opus 4.5 effort parameter
|
||||
effort: Optional[Literal["low", "medium", "high"]] = Field(
|
||||
# Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6
|
||||
effort: Optional[Literal["low", "medium", "high", "max"]] = Field(
|
||||
None,
|
||||
description="Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'.",
|
||||
description="Effort level for supported Anthropic models (controls token spending). 'max' is only available on Opus 4.6. Not setting this gives similar performance to 'high'.",
|
||||
)
|
||||
|
||||
# Anthropic supports strict mode for tool calling - defaults to False
|
||||
|
||||
@@ -34,7 +34,7 @@ from letta.services.run_manager import RunManager
|
||||
from letta.services.streaming_service import StreamingService
|
||||
from letta.services.summarizer.summarizer_config import CompactionSettings
|
||||
from letta.settings import settings
|
||||
from letta.validators import ConversationId
|
||||
from letta.validators import ConversationId, ConversationIdOrDefault
|
||||
|
||||
router = APIRouter(prefix="/conversations", tags=["conversations"])
|
||||
|
||||
@@ -150,7 +150,8 @@ ConversationMessagesResponse = Annotated[
|
||||
operation_id="list_conversation_messages",
|
||||
)
|
||||
async def list_conversation_messages(
|
||||
conversation_id: ConversationId,
|
||||
conversation_id: ConversationIdOrDefault,
|
||||
agent_id: Optional[str] = Query(None, description="Agent ID for agent-direct mode with 'default' conversation"),
|
||||
server: SyncServer = Depends(get_letta_server),
|
||||
headers: HeaderParams = Depends(get_headers),
|
||||
before: Optional[str] = Query(
|
||||
@@ -175,15 +176,24 @@ async def list_conversation_messages(
|
||||
Returns LettaMessage objects (UserMessage, AssistantMessage, etc.) for all
|
||||
messages in the conversation, with support for cursor-based pagination.
|
||||
|
||||
If conversation_id is an agent ID (starts with "agent-"), returns messages
|
||||
from the agent's default conversation (no conversation isolation).
|
||||
**Agent-direct mode**: Pass conversation_id="default" with agent_id parameter
|
||||
to list messages from the agent's default conversation.
|
||||
|
||||
**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.
|
||||
"""
|
||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||
|
||||
# Agent-direct mode: list agent's default conversation messages
|
||||
if conversation_id.startswith("agent-"):
|
||||
# Agent-direct mode: conversation_id="default" + agent_id param (preferred)
|
||||
# OR conversation_id="agent-*" (backwards compat, deprecated)
|
||||
resolved_agent_id = None
|
||||
if conversation_id == "default" and agent_id:
|
||||
resolved_agent_id = agent_id
|
||||
elif conversation_id.startswith("agent-"):
|
||||
resolved_agent_id = conversation_id
|
||||
|
||||
if resolved_agent_id:
|
||||
return await server.get_agent_recall_async(
|
||||
agent_id=conversation_id,
|
||||
agent_id=resolved_agent_id,
|
||||
after=after,
|
||||
before=before,
|
||||
limit=limit,
|
||||
@@ -324,7 +334,7 @@ async def _send_agent_direct_message(
|
||||
},
|
||||
)
|
||||
async def send_conversation_message(
|
||||
conversation_id: ConversationId,
|
||||
conversation_id: ConversationIdOrDefault,
|
||||
request: ConversationMessageRequest = Body(...),
|
||||
server: SyncServer = Depends(get_letta_server),
|
||||
headers: HeaderParams = Depends(get_headers),
|
||||
@@ -336,22 +346,28 @@ async def send_conversation_message(
|
||||
By default (streaming=true), returns a streaming response (Server-Sent Events).
|
||||
Set streaming=false to get a complete JSON response.
|
||||
|
||||
If conversation_id is an agent ID (starts with "agent-"), routes to agent-direct
|
||||
mode with locking but without conversation-specific features.
|
||||
**Agent-direct mode**: Pass conversation_id="default" with agent_id in request body
|
||||
to send messages to the agent's default conversation with locking.
|
||||
|
||||
**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.
|
||||
"""
|
||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||
|
||||
if not request.messages or len(request.messages) == 0:
|
||||
raise HTTPException(status_code=422, detail="Messages must not be empty")
|
||||
|
||||
# Detect agent-direct mode: conversation_id is actually an agent ID
|
||||
is_agent_direct = conversation_id.startswith("agent-")
|
||||
# Agent-direct mode: conversation_id="default" + agent_id in body (preferred)
|
||||
# OR conversation_id="agent-*" (backwards compat, deprecated)
|
||||
resolved_agent_id = None
|
||||
if conversation_id == "default" and request.agent_id:
|
||||
resolved_agent_id = request.agent_id
|
||||
elif conversation_id.startswith("agent-"):
|
||||
resolved_agent_id = conversation_id
|
||||
|
||||
if is_agent_direct:
|
||||
if resolved_agent_id:
|
||||
# Agent-direct mode: use agent ID, enable locking, skip conversation features
|
||||
agent_id = conversation_id
|
||||
return await _send_agent_direct_message(
|
||||
agent_id=agent_id,
|
||||
agent_id=resolved_agent_id,
|
||||
request=request,
|
||||
server=server,
|
||||
actor=actor,
|
||||
@@ -488,7 +504,7 @@ async def send_conversation_message(
|
||||
},
|
||||
)
|
||||
async def retrieve_conversation_stream(
|
||||
conversation_id: ConversationId,
|
||||
conversation_id: ConversationIdOrDefault,
|
||||
request: RetrieveStreamRequest = Body(None),
|
||||
headers: HeaderParams = Depends(get_headers),
|
||||
server: SyncServer = Depends(get_letta_server),
|
||||
@@ -499,18 +515,28 @@ async def retrieve_conversation_stream(
|
||||
This endpoint allows you to reconnect to an active background stream
|
||||
for a conversation, enabling recovery from network interruptions.
|
||||
|
||||
If conversation_id is an agent ID (starts with "agent-"), retrieves the
|
||||
stream for the agent's most recent active run.
|
||||
**Agent-direct mode**: Pass conversation_id="default" with agent_id in request body
|
||||
to retrieve the stream for the agent's most recent active run.
|
||||
|
||||
**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.
|
||||
"""
|
||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||
runs_manager = RunManager()
|
||||
|
||||
# Agent-direct mode: conversation_id="default" + agent_id in body (preferred)
|
||||
# OR conversation_id="agent-*" (backwards compat, deprecated)
|
||||
resolved_agent_id = None
|
||||
if conversation_id == "default" and request and request.agent_id:
|
||||
resolved_agent_id = request.agent_id
|
||||
elif conversation_id.startswith("agent-"):
|
||||
resolved_agent_id = conversation_id
|
||||
|
||||
# Find the most recent active run
|
||||
if conversation_id.startswith("agent-"):
|
||||
if resolved_agent_id:
|
||||
# Agent-direct mode: find runs by agent_id
|
||||
active_runs = await runs_manager.list_runs(
|
||||
actor=actor,
|
||||
agent_id=conversation_id,
|
||||
agent_id=resolved_agent_id,
|
||||
statuses=[RunStatus.created, RunStatus.running],
|
||||
limit=1,
|
||||
ascending=False,
|
||||
@@ -578,7 +604,8 @@ async def retrieve_conversation_stream(
|
||||
|
||||
@router.post("/{conversation_id}/cancel", operation_id="cancel_conversation")
|
||||
async def cancel_conversation(
|
||||
conversation_id: ConversationId,
|
||||
conversation_id: ConversationIdOrDefault,
|
||||
agent_id: Optional[str] = Query(None, description="Agent ID for agent-direct mode with 'default' conversation"),
|
||||
server: SyncServer = Depends(get_letta_server),
|
||||
headers: HeaderParams = Depends(get_headers),
|
||||
) -> dict:
|
||||
@@ -587,8 +614,10 @@ async def cancel_conversation(
|
||||
|
||||
Note: To cancel active runs, Redis is required.
|
||||
|
||||
If conversation_id is an agent ID (starts with "agent-"), cancels runs
|
||||
for the agent's default conversation.
|
||||
**Agent-direct mode**: Pass conversation_id="default" with agent_id query parameter
|
||||
to cancel runs for the agent's default conversation.
|
||||
|
||||
**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.
|
||||
"""
|
||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||
logger.info(
|
||||
@@ -601,13 +630,20 @@ async def cancel_conversation(
|
||||
if not settings.track_agent_run:
|
||||
raise HTTPException(status_code=400, detail="Agent run tracking is disabled")
|
||||
|
||||
# Agent-direct mode: use agent_id directly, skip conversation lookup
|
||||
if conversation_id.startswith("agent-"):
|
||||
agent_id = conversation_id
|
||||
# Agent-direct mode: conversation_id="default" + agent_id param (preferred)
|
||||
# OR conversation_id="agent-*" (backwards compat, deprecated)
|
||||
resolved_agent_id = None
|
||||
if conversation_id == "default" and agent_id:
|
||||
resolved_agent_id = agent_id
|
||||
elif conversation_id.startswith("agent-"):
|
||||
resolved_agent_id = conversation_id
|
||||
|
||||
if resolved_agent_id:
|
||||
# Agent-direct mode: use agent_id directly, skip conversation lookup
|
||||
# Find active runs for this agent (default conversation has conversation_id=None)
|
||||
runs = await server.run_manager.list_runs(
|
||||
actor=actor,
|
||||
agent_id=agent_id,
|
||||
agent_id=resolved_agent_id,
|
||||
statuses=[RunStatus.created, RunStatus.running],
|
||||
ascending=False,
|
||||
limit=100,
|
||||
@@ -657,6 +693,10 @@ async def cancel_conversation(
|
||||
|
||||
|
||||
class CompactionRequest(BaseModel):
|
||||
agent_id: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Agent ID for agent-direct mode with 'default' conversation. Use with conversation_id='default' in the URL path.",
|
||||
)
|
||||
compaction_settings: Optional[CompactionSettings] = Field(
|
||||
default=None,
|
||||
description="Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used.",
|
||||
@@ -671,7 +711,7 @@ class CompactionResponse(BaseModel):
|
||||
|
||||
@router.post("/{conversation_id}/compact", response_model=CompactionResponse, operation_id="compact_conversation")
|
||||
async def compact_conversation(
|
||||
conversation_id: ConversationId,
|
||||
conversation_id: ConversationIdOrDefault,
|
||||
request: Optional[CompactionRequest] = Body(default=None),
|
||||
server: SyncServer = Depends(get_letta_server),
|
||||
headers: HeaderParams = Depends(get_headers),
|
||||
@@ -682,15 +722,24 @@ async def compact_conversation(
|
||||
This endpoint summarizes the in-context messages for a specific conversation,
|
||||
reducing the message count while preserving important context.
|
||||
|
||||
If conversation_id is an agent ID (starts with "agent-"), compacts the
|
||||
agent's default conversation messages.
|
||||
**Agent-direct mode**: Pass conversation_id="default" with agent_id in request body
|
||||
to compact the agent's default conversation messages.
|
||||
|
||||
**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.
|
||||
"""
|
||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||
|
||||
# Agent-direct mode: compact agent's default conversation
|
||||
if conversation_id.startswith("agent-"):
|
||||
agent_id = conversation_id
|
||||
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
|
||||
# Agent-direct mode: conversation_id="default" + agent_id in body (preferred)
|
||||
# OR conversation_id="agent-*" (backwards compat, deprecated)
|
||||
resolved_agent_id = None
|
||||
if conversation_id == "default" and request and request.agent_id:
|
||||
resolved_agent_id = request.agent_id
|
||||
elif conversation_id.startswith("agent-"):
|
||||
resolved_agent_id = conversation_id
|
||||
|
||||
if resolved_agent_id:
|
||||
# Agent-direct mode: compact agent's default conversation
|
||||
agent = await server.agent_manager.get_agent_by_id_async(resolved_agent_id, actor, include_relationships=["multi_agent_group"])
|
||||
in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
|
||||
agent_loop = LettaAgentV3(agent_state=agent, actor=actor)
|
||||
else:
|
||||
|
||||
@@ -109,7 +109,9 @@ class GitEnabledBlockManager(BlockManager):
|
||||
block = result.scalar_one_or_none()
|
||||
|
||||
if block:
|
||||
# Update existing block
|
||||
# Update existing block only if content changed
|
||||
if block.value == value:
|
||||
return block.to_pydantic()
|
||||
block.value = value
|
||||
if description is not None:
|
||||
block.description = description
|
||||
|
||||
@@ -51,7 +51,7 @@ class MemfsClient:
|
||||
"""
|
||||
self.local_path = local_path or DEFAULT_LOCAL_PATH
|
||||
self.storage = LocalStorageBackend(base_path=self.local_path)
|
||||
self.git = GitOperations(storage=self.storage, redis_client=None)
|
||||
self.git = GitOperations(storage=self.storage)
|
||||
|
||||
logger.info(f"MemfsClient initialized with local storage at {self.local_path}")
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ PATH_VALIDATORS = {primitive_type.value: _create_path_validator_factory(primitiv
|
||||
|
||||
|
||||
def _create_conversation_id_or_default_path_validator_factory():
|
||||
"""Conversation IDs accept the usual primitive format, 'default', or an agent ID."""
|
||||
"""Conversation IDs with support for 'default' and agent IDs (backwards compatibility)."""
|
||||
|
||||
conversation_primitive = PrimitiveType.CONVERSATION.value
|
||||
agent_primitive = PrimitiveType.AGENT.value
|
||||
@@ -59,7 +59,8 @@ def _create_conversation_id_or_default_path_validator_factory():
|
||||
return Path(
|
||||
description=(
|
||||
f"The conversation identifier. Can be a conversation ID ('{conversation_primitive}-<uuid4>'), "
|
||||
f"an agent ID ('{agent_primitive}-<uuid4>') for agent-direct messaging, or 'default'."
|
||||
f"'default' for agent-direct mode (with agent_id parameter), "
|
||||
f"or an agent ID ('{agent_primitive}-<uuid4>') for backwards compatibility (deprecated)."
|
||||
),
|
||||
pattern=conversation_or_agent_or_default_pattern,
|
||||
examples=[
|
||||
@@ -74,10 +75,6 @@ def _create_conversation_id_or_default_path_validator_factory():
|
||||
return factory
|
||||
|
||||
|
||||
# Override conversation ID path validation to also allow 'default' and agent IDs.
|
||||
PATH_VALIDATORS[PrimitiveType.CONVERSATION.value] = _create_conversation_id_or_default_path_validator_factory()
|
||||
|
||||
|
||||
# Type aliases for common ID types
|
||||
# These can be used directly in route handler signatures for cleaner code
|
||||
AgentId = Annotated[str, PATH_VALIDATORS[PrimitiveType.AGENT.value]()]
|
||||
@@ -98,6 +95,10 @@ StepId = Annotated[str, PATH_VALIDATORS[PrimitiveType.STEP.value]()]
|
||||
IdentityId = Annotated[str, PATH_VALIDATORS[PrimitiveType.IDENTITY.value]()]
|
||||
ConversationId = Annotated[str, PATH_VALIDATORS[PrimitiveType.CONVERSATION.value]()]
|
||||
|
||||
# Conversation ID with support for 'default' and agent IDs (for agent-direct mode endpoints)
|
||||
# Backwards compatible - agent-* will be deprecated in favor of conversation_id='default' + agent_id param
|
||||
ConversationIdOrDefault = Annotated[str, _create_conversation_id_or_default_path_validator_factory()()]
|
||||
|
||||
# Infrastructure types
|
||||
McpServerId = Annotated[str, PATH_VALIDATORS[PrimitiveType.MCP_SERVER.value]()]
|
||||
McpOAuthId = Annotated[str, PATH_VALIDATORS[PrimitiveType.MCP_OAUTH.value]()]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "letta"
|
||||
version = "0.16.5"
|
||||
version = "0.16.6"
|
||||
description = "Create LLM agents with long-term memory and custom tools"
|
||||
authors = [
|
||||
{name = "Letta Team", email = "contact@letta.com"},
|
||||
|
||||
@@ -725,6 +725,132 @@ class TestConversationsSDK:
|
||||
if "No active runs" not in str(e):
|
||||
raise
|
||||
|
||||
def test_backwards_compatibility_old_pattern(self, client: Letta, agent, server_url: str):
|
||||
"""Test that the old pattern (agent_id as conversation_id) still works for backwards compatibility."""
|
||||
# OLD PATTERN: conversation_id=agent.id (should still work)
|
||||
# Use raw HTTP requests since SDK might not be up to date
|
||||
|
||||
# Test 1: Send message using old pattern
|
||||
response = requests.post(
|
||||
f"{server_url}/v1/conversations/{agent.id}/messages",
|
||||
json={
|
||||
"messages": [{"role": "user", "content": "Testing old pattern still works"}],
|
||||
"streaming": False,
|
||||
},
|
||||
)
|
||||
assert response.status_code == 200, f"Old pattern should work for sending messages: {response.text}"
|
||||
data = response.json()
|
||||
assert "messages" in data, "Response should contain messages"
|
||||
assert len(data["messages"]) > 0, "Should receive response messages"
|
||||
|
||||
# Test 2: List messages using old pattern
|
||||
response = requests.get(f"{server_url}/v1/conversations/{agent.id}/messages")
|
||||
assert response.status_code == 200, f"Old pattern should work for listing messages: {response.text}"
|
||||
data = response.json()
|
||||
# Response is a list of messages directly
|
||||
assert isinstance(data, list), "Response should be a list of messages"
|
||||
assert len(data) >= 3, "Should have at least system + user + assistant messages"
|
||||
|
||||
# Verify our message is there
|
||||
user_messages = [m for m in data if m.get("message_type") == "user_message"]
|
||||
assert any("Testing old pattern still works" in str(m.get("content", "")) for m in user_messages), "Should find our test message"
|
||||
|
||||
def test_new_pattern_send_message(self, client: Letta, agent, server_url: str):
|
||||
"""Test sending messages using the new pattern: conversation_id='default' + agent_id in body."""
|
||||
# NEW PATTERN: conversation_id='default' + agent_id in request body
|
||||
response = requests.post(
|
||||
f"{server_url}/v1/conversations/default/messages",
|
||||
json={
|
||||
"agent_id": agent.id,
|
||||
"messages": [{"role": "user", "content": "Testing new pattern send message"}],
|
||||
"streaming": False,
|
||||
},
|
||||
)
|
||||
assert response.status_code == 200, f"New pattern should work for sending messages: {response.text}"
|
||||
data = response.json()
|
||||
assert "messages" in data, "Response should contain messages"
|
||||
assert len(data["messages"]) > 0, "Should receive response messages"
|
||||
|
||||
# Verify we got an assistant message
|
||||
assistant_messages = [m for m in data["messages"] if m.get("message_type") == "assistant_message"]
|
||||
assert len(assistant_messages) > 0, "Should receive at least one assistant message"
|
||||
|
||||
def test_new_pattern_list_messages(self, client: Letta, agent, server_url: str):
|
||||
"""Test listing messages using the new pattern: conversation_id='default' + agent_id query param."""
|
||||
# First send a message to populate the conversation
|
||||
requests.post(
|
||||
f"{server_url}/v1/conversations/{agent.id}/messages",
|
||||
json={
|
||||
"messages": [{"role": "user", "content": "Setup message for list test"}],
|
||||
"streaming": False,
|
||||
},
|
||||
)
|
||||
|
||||
# NEW PATTERN: conversation_id='default' + agent_id as query param
|
||||
response = requests.get(
|
||||
f"{server_url}/v1/conversations/default/messages",
|
||||
params={"agent_id": agent.id},
|
||||
)
|
||||
assert response.status_code == 200, f"New pattern should work for listing messages: {response.text}"
|
||||
data = response.json()
|
||||
# Response is a list of messages directly
|
||||
assert isinstance(data, list), "Response should be a list of messages"
|
||||
assert len(data) >= 3, "Should have at least system + user + assistant messages"
|
||||
|
||||
def test_new_pattern_cancel(self, client: Letta, agent, server_url: str):
|
||||
"""Test canceling runs using the new pattern: conversation_id='default' + agent_id query param."""
|
||||
from letta.settings import settings
|
||||
|
||||
if not settings.track_agent_run:
|
||||
pytest.skip("Run tracking disabled - skipping cancel test")
|
||||
|
||||
# NEW PATTERN: conversation_id='default' + agent_id as query param
|
||||
response = requests.post(
|
||||
f"{server_url}/v1/conversations/default/cancel",
|
||||
params={"agent_id": agent.id},
|
||||
)
|
||||
# Returns 200 with results if runs exist, or 409 if no active runs
|
||||
assert response.status_code in [200, 409], f"New pattern should work for cancel: {response.text}"
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
assert isinstance(data, dict), "Cancel should return a dict"
|
||||
|
||||
def test_new_pattern_compact(self, client: Letta, agent, server_url: str):
|
||||
"""Test compacting conversation using the new pattern: conversation_id='default' + agent_id in body."""
|
||||
# Send many messages to have enough for compaction
|
||||
for i in range(10):
|
||||
requests.post(
|
||||
f"{server_url}/v1/conversations/{agent.id}/messages",
|
||||
json={
|
||||
"messages": [{"role": "user", "content": f"Message {i} for compaction test"}],
|
||||
"streaming": False,
|
||||
},
|
||||
)
|
||||
|
||||
# NEW PATTERN: conversation_id='default' + agent_id in request body
|
||||
response = requests.post(
|
||||
f"{server_url}/v1/conversations/default/compact",
|
||||
json={"agent_id": agent.id},
|
||||
)
|
||||
# May return 200 (success) or 400 (not enough messages to compact)
|
||||
assert response.status_code in [200, 400], f"New pattern should accept agent_id parameter: {response.text}"
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
assert "summary" in data, "Response should contain summary"
|
||||
assert "num_messages_before" in data, "Response should contain num_messages_before"
|
||||
assert "num_messages_after" in data, "Response should contain num_messages_after"
|
||||
|
||||
def test_new_pattern_stream_retrieve(self, client: Letta, agent, server_url: str):
|
||||
"""Test retrieving stream using the new pattern: conversation_id='default' + agent_id in body."""
|
||||
# NEW PATTERN: conversation_id='default' + agent_id in request body
|
||||
# Note: This will likely return 400 if no active run exists, which is expected
|
||||
response = requests.post(
|
||||
f"{server_url}/v1/conversations/default/stream",
|
||||
json={"agent_id": agent.id},
|
||||
)
|
||||
# Either 200 (if run exists) or 400 (no active run) are both acceptable
|
||||
assert response.status_code in [200, 400], f"Stream retrieve should accept new pattern: {response.text}"
|
||||
|
||||
|
||||
class TestConversationDelete:
|
||||
"""Tests for the conversation delete endpoint."""
|
||||
|
||||
Reference in New Issue
Block a user