chore: bump 0.16.6 (#3211)
This commit is contained in:
@@ -260,6 +260,7 @@ model:
|
|||||||
base_url: https://generativelanguage.googleapis.com/
|
base_url: https://generativelanguage.googleapis.com/
|
||||||
force_minimum_thinking_budget: false
|
force_minimum_thinking_budget: false
|
||||||
max_retries: 5
|
max_retries: 5
|
||||||
|
timeout_seconds: 600.0
|
||||||
|
|
||||||
# Google Vertex (-> GOOGLE_CLOUD_*)
|
# Google Vertex (-> GOOGLE_CLOUD_*)
|
||||||
# google_cloud:
|
# google_cloud:
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
220
fern/scripts/prepare-openapi.ts
Normal file
220
fern/scripts/prepare-openapi.ts
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
|
||||||
|
import { omit } from 'lodash';
|
||||||
|
import { execSync } from 'child_process';
|
||||||
|
import { merge, isErrorResult } from 'openapi-merge';
|
||||||
|
import type { Swagger } from 'atlassian-openapi';
|
||||||
|
import { RESTRICTED_ROUTE_BASE_PATHS } from '@letta-cloud/sdk-core';
|
||||||
|
|
||||||
|
const lettaWebOpenAPIPath = path.join(
|
||||||
|
__dirname,
|
||||||
|
'..',
|
||||||
|
'..',
|
||||||
|
'..',
|
||||||
|
'web',
|
||||||
|
'autogenerated',
|
||||||
|
'letta-web-openapi.json',
|
||||||
|
);
|
||||||
|
const lettaAgentsAPIPath = path.join(
|
||||||
|
__dirname,
|
||||||
|
'..',
|
||||||
|
'..',
|
||||||
|
'letta',
|
||||||
|
'server',
|
||||||
|
'openapi_letta.json',
|
||||||
|
);
|
||||||
|
|
||||||
|
const lettaWebOpenAPI = JSON.parse(
|
||||||
|
fs.readFileSync(lettaWebOpenAPIPath, 'utf8'),
|
||||||
|
) as Swagger.SwaggerV3;
|
||||||
|
const lettaAgentsAPI = JSON.parse(
|
||||||
|
fs.readFileSync(lettaAgentsAPIPath, 'utf8'),
|
||||||
|
) as Swagger.SwaggerV3;
|
||||||
|
|
||||||
|
// removes any routes that are restricted
|
||||||
|
lettaAgentsAPI.paths = Object.fromEntries(
|
||||||
|
Object.entries(lettaAgentsAPI.paths).filter(([path]) =>
|
||||||
|
RESTRICTED_ROUTE_BASE_PATHS.every(
|
||||||
|
(restrictedPath) => !path.startsWith(restrictedPath),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
const lettaAgentsAPIWithNoEndslash = Object.keys(lettaAgentsAPI.paths).reduce(
|
||||||
|
(acc, path) => {
|
||||||
|
const pathWithoutSlash = path.endsWith('/')
|
||||||
|
? path.slice(0, path.length - 1)
|
||||||
|
: path;
|
||||||
|
acc[pathWithoutSlash] = lettaAgentsAPI.paths[path];
|
||||||
|
return acc;
|
||||||
|
},
|
||||||
|
{} as Swagger.SwaggerV3['paths'],
|
||||||
|
);
|
||||||
|
|
||||||
|
// remove duplicate paths, delete from letta-web-openapi if it exists in sdk-core
|
||||||
|
// some paths will have an extra / at the end, so we need to remove that as well
|
||||||
|
lettaWebOpenAPI.paths = Object.fromEntries(
|
||||||
|
Object.entries(lettaWebOpenAPI.paths).filter(([path]) => {
|
||||||
|
const pathWithoutSlash = path.endsWith('/')
|
||||||
|
? path.slice(0, path.length - 1)
|
||||||
|
: path;
|
||||||
|
return !lettaAgentsAPIWithNoEndslash[pathWithoutSlash];
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const agentStatePathsToOverride: Array<[string, string]> = [
|
||||||
|
['/v1/templates/{project}/{template_version}/agents', '201'],
|
||||||
|
['/v1/agents/search', '200'],
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const [path, responseCode] of agentStatePathsToOverride) {
|
||||||
|
if (lettaWebOpenAPI.paths[path]?.post?.responses?.[responseCode]) {
|
||||||
|
// Get direct reference to the schema object
|
||||||
|
const responseSchema =
|
||||||
|
lettaWebOpenAPI.paths[path].post.responses[responseCode];
|
||||||
|
const contentSchema = responseSchema.content['application/json'].schema;
|
||||||
|
|
||||||
|
// Replace the entire agents array schema with the reference
|
||||||
|
if (contentSchema.properties?.agents) {
|
||||||
|
contentSchema.properties.agents = {
|
||||||
|
type: 'array',
|
||||||
|
items: {
|
||||||
|
$ref: '#/components/schemas/AgentState',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// go through the paths and remove "user_id"/"actor_id" from the headers
|
||||||
|
for (const path of Object.keys(lettaAgentsAPI.paths)) {
|
||||||
|
for (const method of Object.keys(lettaAgentsAPI.paths[path])) {
|
||||||
|
// @ts-expect-error - a
|
||||||
|
if (lettaAgentsAPI.paths[path][method]?.parameters) {
|
||||||
|
// @ts-expect-error - a
|
||||||
|
lettaAgentsAPI.paths[path][method].parameters = lettaAgentsAPI.paths[
|
||||||
|
path
|
||||||
|
][method].parameters.filter(
|
||||||
|
(param: Record<string, string>) =>
|
||||||
|
param.in !== 'header' ||
|
||||||
|
(
|
||||||
|
param.name !== 'user_id' &&
|
||||||
|
param.name !== 'User-Agent' &&
|
||||||
|
param.name !== 'X-Project-Id' &&
|
||||||
|
param.name !== 'X-Letta-Source' &&
|
||||||
|
param.name !== 'X-Stainless-Package-Version' &&
|
||||||
|
!param.name.startsWith('X-Experimental') &&
|
||||||
|
!param.name.startsWith('X-Billing')
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = merge([
|
||||||
|
{
|
||||||
|
oas: lettaAgentsAPI,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
oas: lettaWebOpenAPI,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (isErrorResult(result)) {
|
||||||
|
console.error(`${result.message} (${result.type})`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
result.output.openapi = '3.1.0';
|
||||||
|
result.output.info = {
|
||||||
|
title: 'Letta API',
|
||||||
|
version: '1.0.0',
|
||||||
|
};
|
||||||
|
|
||||||
|
result.output.servers = [
|
||||||
|
{
|
||||||
|
url: 'https://app.letta.com',
|
||||||
|
description: 'Letta Cloud',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
url: 'http://localhost:8283',
|
||||||
|
description: 'Self-hosted',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
result.output.components = {
|
||||||
|
...result.output.components,
|
||||||
|
securitySchemes: {
|
||||||
|
bearerAuth: {
|
||||||
|
type: 'http',
|
||||||
|
scheme: 'bearer',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
result.output.security = [
|
||||||
|
...(result.output.security || []),
|
||||||
|
{
|
||||||
|
bearerAuth: [],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
// omit all instances of "user_id" from the openapi.json file
|
||||||
|
function deepOmitPreserveArrays(obj: unknown, key: string): unknown {
|
||||||
|
if (Array.isArray(obj)) {
|
||||||
|
return obj.map((item) => deepOmitPreserveArrays(item, key));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof obj !== 'object' || obj === null) {
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (key in obj) {
|
||||||
|
return omit(obj, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Object.fromEntries(
|
||||||
|
Object.entries(obj).map(([k, v]) => [k, deepOmitPreserveArrays(v, key)]),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||||
|
// @ts-ignore
|
||||||
|
result.output.components = deepOmitPreserveArrays(
|
||||||
|
result.output.components,
|
||||||
|
'user_id',
|
||||||
|
);
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||||
|
// @ts-ignore
|
||||||
|
result.output.components = deepOmitPreserveArrays(
|
||||||
|
result.output.components,
|
||||||
|
'actor_id',
|
||||||
|
);
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||||
|
// @ts-ignore
|
||||||
|
result.output.components = deepOmitPreserveArrays(
|
||||||
|
result.output.components,
|
||||||
|
'organization_id',
|
||||||
|
);
|
||||||
|
|
||||||
|
fs.writeFileSync(
|
||||||
|
path.join(__dirname, '..', 'openapi.json'),
|
||||||
|
JSON.stringify(result.output, null, 2),
|
||||||
|
);
|
||||||
|
|
||||||
|
function formatOpenAPIJson() {
|
||||||
|
const openApiPath = path.join(__dirname, '..', 'openapi.json');
|
||||||
|
|
||||||
|
try {
|
||||||
|
execSync(`npx prettier --write "${openApiPath}"`, { stdio: 'inherit' });
|
||||||
|
console.log('Successfully formatted openapi.json with Prettier');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error formatting openapi.json:', error);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
formatOpenAPIJson();
|
||||||
@@ -5,7 +5,7 @@ try:
|
|||||||
__version__ = version("letta")
|
__version__ = version("letta")
|
||||||
except PackageNotFoundError:
|
except PackageNotFoundError:
|
||||||
# Fallback for development installations
|
# Fallback for development installations
|
||||||
__version__ = "0.16.5"
|
__version__ = "0.16.6"
|
||||||
|
|
||||||
if os.environ.get("LETTA_VERSION"):
|
if os.environ.get("LETTA_VERSION"):
|
||||||
__version__ = os.environ["LETTA_VERSION"]
|
__version__ = os.environ["LETTA_VERSION"]
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from letta.schemas.letta_message import LettaMessage
|
|||||||
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, ChoiceLogprobs, ToolCall
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, ChoiceLogprobs, ToolCall
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.usage import LettaUsageStatistics
|
from letta.schemas.usage import LettaUsageStatistics
|
||||||
from letta.schemas.user import User
|
from letta.schemas.user import User
|
||||||
from letta.services.telemetry_manager import TelemetryManager
|
from letta.services.telemetry_manager import TelemetryManager
|
||||||
@@ -31,6 +32,7 @@ class LettaLLMAdapter(ABC):
|
|||||||
run_id: str | None = None,
|
run_id: str | None = None,
|
||||||
org_id: str | None = None,
|
org_id: str | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
|
billing_context: BillingContext | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.llm_client: LLMClientBase = llm_client
|
self.llm_client: LLMClientBase = llm_client
|
||||||
self.llm_config: LLMConfig = llm_config
|
self.llm_config: LLMConfig = llm_config
|
||||||
@@ -40,6 +42,7 @@ class LettaLLMAdapter(ABC):
|
|||||||
self.run_id: str | None = run_id
|
self.run_id: str | None = run_id
|
||||||
self.org_id: str | None = org_id
|
self.org_id: str | None = org_id
|
||||||
self.user_id: str | None = user_id
|
self.user_id: str | None = user_id
|
||||||
|
self.billing_context: BillingContext | None = billing_context
|
||||||
self.message_id: str | None = None
|
self.message_id: str | None = None
|
||||||
self.request_data: dict | None = None
|
self.request_data: dict | None = None
|
||||||
self.response_data: dict | None = None
|
self.response_data: dict | None = None
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from letta.otel.tracing import log_attributes, safe_json_dumps, trace_method
|
|||||||
from letta.schemas.enums import LLMCallType, ProviderType
|
from letta.schemas.enums import LLMCallType, ProviderType
|
||||||
from letta.schemas.letta_message import LettaMessage
|
from letta.schemas.letta_message import LettaMessage
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
from letta.schemas.provider_trace import ProviderTrace
|
from letta.schemas.provider_trace import BillingContext, ProviderTrace
|
||||||
from letta.schemas.user import User
|
from letta.schemas.user import User
|
||||||
from letta.settings import settings
|
from letta.settings import settings
|
||||||
from letta.utils import safe_create_task
|
from letta.utils import safe_create_task
|
||||||
@@ -36,6 +36,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
|||||||
run_id: str | None = None,
|
run_id: str | None = None,
|
||||||
org_id: str | None = None,
|
org_id: str | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(
|
super().__init__(
|
||||||
llm_client,
|
llm_client,
|
||||||
@@ -46,6 +47,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
|||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
org_id=org_id,
|
org_id=org_id,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
self.interface: OpenAIStreamingInterface | AnthropicStreamingInterface | None = None
|
self.interface: OpenAIStreamingInterface | AnthropicStreamingInterface | None = None
|
||||||
|
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ class SimpleLLMRequestAdapter(LettaLLMRequestAdapter):
|
|||||||
org_id=self.org_id,
|
org_id=self.org_id,
|
||||||
user_id=self.user_id,
|
user_id=self.user_id,
|
||||||
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
||||||
|
billing_context=self.billing_context,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
self.response_data = await self.llm_client.request_async_with_telemetry(request_data, self.llm_config)
|
self.response_data = await self.llm_client.request_async_with_telemetry(request_data, self.llm_config)
|
||||||
|
|||||||
@@ -278,6 +278,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
|
|||||||
org_id=self.org_id,
|
org_id=self.org_id,
|
||||||
user_id=self.user_id,
|
user_id=self.user_id,
|
||||||
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
llm_config=self.llm_config.model_dump() if self.llm_config else None,
|
||||||
|
billing_context=self.billing_context,
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
label="create_provider_trace",
|
label="create_provider_trace",
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from letta.schemas.letta_message_content import TextContent
|
|||||||
from letta.schemas.letta_response import LettaResponse
|
from letta.schemas.letta_response import LettaResponse
|
||||||
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
||||||
from letta.schemas.message import Message, MessageCreate, MessageUpdate
|
from letta.schemas.message import Message, MessageCreate, MessageUpdate
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.usage import LettaUsageStatistics
|
from letta.schemas.usage import LettaUsageStatistics
|
||||||
from letta.schemas.user import User
|
from letta.schemas.user import User
|
||||||
from letta.services.agent_manager import AgentManager
|
from letta.services.agent_manager import AgentManager
|
||||||
@@ -51,7 +52,11 @@ class BaseAgent(ABC):
|
|||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def step(
|
async def step(
|
||||||
self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS, run_id: Optional[str] = None
|
self,
|
||||||
|
input_messages: List[MessageCreate],
|
||||||
|
max_steps: int = DEFAULT_MAX_STEPS,
|
||||||
|
run_id: Optional[str] = None,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> LettaResponse:
|
) -> LettaResponse:
|
||||||
"""
|
"""
|
||||||
Main execution loop for the agent.
|
Main execution loop for the agent.
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from letta.schemas.user import User
|
|||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from letta.schemas.letta_request import ClientToolSchema
|
from letta.schemas.letta_request import ClientToolSchema
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
|
|
||||||
|
|
||||||
class BaseAgentV2(ABC):
|
class BaseAgentV2(ABC):
|
||||||
@@ -52,6 +53,7 @@ class BaseAgentV2(ABC):
|
|||||||
request_start_timestamp_ns: int | None = None,
|
request_start_timestamp_ns: int | None = None,
|
||||||
client_tools: list["ClientToolSchema"] | None = None,
|
client_tools: list["ClientToolSchema"] | None = None,
|
||||||
include_compaction_messages: bool = False, # Not used in V2, but accepted for API compatibility
|
include_compaction_messages: bool = False, # Not used in V2, but accepted for API compatibility
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> LettaResponse:
|
) -> LettaResponse:
|
||||||
"""
|
"""
|
||||||
Execute the agent loop in blocking mode, returning all messages at once.
|
Execute the agent loop in blocking mode, returning all messages at once.
|
||||||
@@ -76,6 +78,7 @@ class BaseAgentV2(ABC):
|
|||||||
conversation_id: str | None = None,
|
conversation_id: str | None = None,
|
||||||
client_tools: list["ClientToolSchema"] | None = None,
|
client_tools: list["ClientToolSchema"] | None = None,
|
||||||
include_compaction_messages: bool = False, # Not used in V2, but accepted for API compatibility
|
include_compaction_messages: bool = False, # Not used in V2, but accepted for API compatibility
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> AsyncGenerator[LettaMessage | LegacyLettaMessage | MessageStreamStatus, None]:
|
) -> AsyncGenerator[LettaMessage | LegacyLettaMessage | MessageStreamStatus, None]:
|
||||||
"""
|
"""
|
||||||
Execute the agent loop in streaming mode, yielding chunks as they become available.
|
Execute the agent loop in streaming mode, yielding chunks as they become available.
|
||||||
|
|||||||
@@ -192,44 +192,15 @@ async def _prepare_in_context_messages_no_persist_async(
|
|||||||
# Otherwise, include the full list of messages from the conversation
|
# Otherwise, include the full list of messages from the conversation
|
||||||
current_in_context_messages = await message_manager.get_messages_by_ids_async(message_ids=message_ids, actor=actor)
|
current_in_context_messages = await message_manager.get_messages_by_ids_async(message_ids=message_ids, actor=actor)
|
||||||
else:
|
else:
|
||||||
# No messages in conversation yet - compile a new system message for this conversation
|
# No messages in conversation yet (fallback) - compile a new system message
|
||||||
# Each conversation gets its own system message (captures memory state at conversation start)
|
# Normally this is handled at conversation creation time, but this covers
|
||||||
from letta.prompts.prompt_generator import PromptGenerator
|
# edge cases where a conversation exists without a system message.
|
||||||
from letta.services.passage_manager import PassageManager
|
system_message = await conversation_manager.compile_and_save_system_message_for_conversation(
|
||||||
|
|
||||||
num_messages = await message_manager.size_async(actor=actor, agent_id=agent_state.id)
|
|
||||||
passage_manager = PassageManager()
|
|
||||||
num_archival_memories = await passage_manager.agent_passage_size_async(actor=actor, agent_id=agent_state.id)
|
|
||||||
|
|
||||||
system_message_str = await PromptGenerator.compile_system_message_async(
|
|
||||||
system_prompt=agent_state.system,
|
|
||||||
in_context_memory=agent_state.memory,
|
|
||||||
in_context_memory_last_edit=get_utc_time(),
|
|
||||||
timezone=agent_state.timezone,
|
|
||||||
user_defined_variables=None,
|
|
||||||
append_icm_if_missing=True,
|
|
||||||
previous_message_count=num_messages,
|
|
||||||
archival_memory_size=num_archival_memories,
|
|
||||||
sources=agent_state.sources,
|
|
||||||
max_files_open=agent_state.max_files_open,
|
|
||||||
)
|
|
||||||
system_message = Message.dict_to_message(
|
|
||||||
agent_id=agent_state.id,
|
|
||||||
model=agent_state.llm_config.model,
|
|
||||||
openai_message_dict={"role": "system", "content": system_message_str},
|
|
||||||
)
|
|
||||||
|
|
||||||
# Persist the new system message
|
|
||||||
persisted_messages = await message_manager.create_many_messages_async([system_message], actor=actor)
|
|
||||||
system_message = persisted_messages[0]
|
|
||||||
|
|
||||||
# Add it to the conversation tracking
|
|
||||||
await conversation_manager.add_messages_to_conversation(
|
|
||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
agent_id=agent_state.id,
|
agent_id=agent_state.id,
|
||||||
message_ids=[system_message.id],
|
|
||||||
actor=actor,
|
actor=actor,
|
||||||
starting_position=0,
|
agent_state=agent_state,
|
||||||
|
message_manager=message_manager,
|
||||||
)
|
)
|
||||||
|
|
||||||
current_in_context_messages = [system_message]
|
current_in_context_messages = [system_message]
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ from letta.schemas.openai.chat_completion_response import (
|
|||||||
UsageStatisticsCompletionTokenDetails,
|
UsageStatisticsCompletionTokenDetails,
|
||||||
UsageStatisticsPromptTokenDetails,
|
UsageStatisticsPromptTokenDetails,
|
||||||
)
|
)
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.step import StepProgression
|
from letta.schemas.step import StepProgression
|
||||||
from letta.schemas.step_metrics import StepMetrics
|
from letta.schemas.step_metrics import StepMetrics
|
||||||
from letta.schemas.tool_execution_result import ToolExecutionResult
|
from letta.schemas.tool_execution_result import ToolExecutionResult
|
||||||
@@ -179,6 +180,7 @@ class LettaAgent(BaseAgent):
|
|||||||
request_start_timestamp_ns: int | None = None,
|
request_start_timestamp_ns: int | None = None,
|
||||||
include_return_message_types: list[MessageType] | None = None,
|
include_return_message_types: list[MessageType] | None = None,
|
||||||
dry_run: bool = False,
|
dry_run: bool = False,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> Union[LettaResponse, dict]:
|
) -> Union[LettaResponse, dict]:
|
||||||
# TODO (cliandy): pass in run_id and use at send_message endpoints for all step functions
|
# TODO (cliandy): pass in run_id and use at send_message endpoints for all step functions
|
||||||
agent_state = await self.agent_manager.get_agent_by_id_async(
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ from letta.schemas.openai.chat_completion_response import (
|
|||||||
UsageStatisticsCompletionTokenDetails,
|
UsageStatisticsCompletionTokenDetails,
|
||||||
UsageStatisticsPromptTokenDetails,
|
UsageStatisticsPromptTokenDetails,
|
||||||
)
|
)
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.step import Step, StepProgression
|
from letta.schemas.step import Step, StepProgression
|
||||||
from letta.schemas.step_metrics import StepMetrics
|
from letta.schemas.step_metrics import StepMetrics
|
||||||
from letta.schemas.tool import Tool
|
from letta.schemas.tool import Tool
|
||||||
@@ -185,6 +186,7 @@ class LettaAgentV2(BaseAgentV2):
|
|||||||
request_start_timestamp_ns: int | None = None,
|
request_start_timestamp_ns: int | None = None,
|
||||||
client_tools: list[ClientToolSchema] | None = None,
|
client_tools: list[ClientToolSchema] | None = None,
|
||||||
include_compaction_messages: bool = False, # Not used in V2, but accepted for API compatibility
|
include_compaction_messages: bool = False, # Not used in V2, but accepted for API compatibility
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> LettaResponse:
|
) -> LettaResponse:
|
||||||
"""
|
"""
|
||||||
Execute the agent loop in blocking mode, returning all messages at once.
|
Execute the agent loop in blocking mode, returning all messages at once.
|
||||||
@@ -290,6 +292,7 @@ class LettaAgentV2(BaseAgentV2):
|
|||||||
conversation_id: str | None = None, # Not used in V2, but accepted for API compatibility
|
conversation_id: str | None = None, # Not used in V2, but accepted for API compatibility
|
||||||
client_tools: list[ClientToolSchema] | None = None,
|
client_tools: list[ClientToolSchema] | None = None,
|
||||||
include_compaction_messages: bool = False, # Not used in V2, but accepted for API compatibility
|
include_compaction_messages: bool = False, # Not used in V2, but accepted for API compatibility
|
||||||
|
billing_context: BillingContext | None = None,
|
||||||
) -> AsyncGenerator[str, None]:
|
) -> AsyncGenerator[str, None]:
|
||||||
"""
|
"""
|
||||||
Execute the agent loop in streaming mode, yielding chunks as they become available.
|
Execute the agent loop in streaming mode, yielding chunks as they become available.
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from letta.agents.helpers import (
|
|||||||
)
|
)
|
||||||
from letta.agents.letta_agent_v2 import LettaAgentV2
|
from letta.agents.letta_agent_v2 import LettaAgentV2
|
||||||
from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX, REQUEST_HEARTBEAT_PARAM
|
from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX, REQUEST_HEARTBEAT_PARAM
|
||||||
from letta.errors import ContextWindowExceededError, LLMError, SystemPromptTokenExceededError
|
from letta.errors import ContextWindowExceededError, LLMEmptyResponseError, LLMError, SystemPromptTokenExceededError
|
||||||
from letta.helpers import ToolRulesSolver
|
from letta.helpers import ToolRulesSolver
|
||||||
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
|
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
|
||||||
from letta.helpers.tool_execution_helper import enable_strict_mode
|
from letta.helpers.tool_execution_helper import enable_strict_mode
|
||||||
@@ -45,6 +45,7 @@ from letta.schemas.letta_response import LettaResponse, TurnTokenData
|
|||||||
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
||||||
from letta.schemas.message import Message, MessageCreate, ToolReturn
|
from letta.schemas.message import Message, MessageCreate, ToolReturn
|
||||||
from letta.schemas.openai.chat_completion_response import ChoiceLogprobs, ToolCall, ToolCallDenial, UsageStatistics
|
from letta.schemas.openai.chat_completion_response import ChoiceLogprobs, ToolCall, ToolCallDenial, UsageStatistics
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.step import StepProgression
|
from letta.schemas.step import StepProgression
|
||||||
from letta.schemas.step_metrics import StepMetrics
|
from letta.schemas.step_metrics import StepMetrics
|
||||||
from letta.schemas.tool_execution_result import ToolExecutionResult
|
from letta.schemas.tool_execution_result import ToolExecutionResult
|
||||||
@@ -149,6 +150,7 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
conversation_id: str | None = None,
|
conversation_id: str | None = None,
|
||||||
client_tools: list[ClientToolSchema] | None = None,
|
client_tools: list[ClientToolSchema] | None = None,
|
||||||
include_compaction_messages: bool = False,
|
include_compaction_messages: bool = False,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> LettaResponse:
|
) -> LettaResponse:
|
||||||
"""
|
"""
|
||||||
Execute the agent loop in blocking mode, returning all messages at once.
|
Execute the agent loop in blocking mode, returning all messages at once.
|
||||||
@@ -232,6 +234,7 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
org_id=self.actor.organization_id,
|
org_id=self.actor.organization_id,
|
||||||
user_id=self.actor.id,
|
user_id=self.actor.id,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
credit_task = None
|
credit_task = None
|
||||||
@@ -362,6 +365,7 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
conversation_id: str | None = None,
|
conversation_id: str | None = None,
|
||||||
client_tools: list[ClientToolSchema] | None = None,
|
client_tools: list[ClientToolSchema] | None = None,
|
||||||
include_compaction_messages: bool = False,
|
include_compaction_messages: bool = False,
|
||||||
|
billing_context: BillingContext | None = None,
|
||||||
) -> AsyncGenerator[str, None]:
|
) -> AsyncGenerator[str, None]:
|
||||||
"""
|
"""
|
||||||
Execute the agent loop in streaming mode, yielding chunks as they become available.
|
Execute the agent loop in streaming mode, yielding chunks as they become available.
|
||||||
@@ -419,6 +423,7 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
org_id=self.actor.organization_id,
|
org_id=self.actor.organization_id,
|
||||||
user_id=self.actor.id,
|
user_id=self.actor.id,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
elif use_sglang_native:
|
elif use_sglang_native:
|
||||||
# Use SGLang native adapter for multi-turn RL training
|
# Use SGLang native adapter for multi-turn RL training
|
||||||
@@ -431,6 +436,7 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
org_id=self.actor.organization_id,
|
org_id=self.actor.organization_id,
|
||||||
user_id=self.actor.id,
|
user_id=self.actor.id,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
# Reset turns tracking for this step
|
# Reset turns tracking for this step
|
||||||
self.turns = []
|
self.turns = []
|
||||||
@@ -444,6 +450,7 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
org_id=self.actor.organization_id,
|
org_id=self.actor.organization_id,
|
||||||
user_id=self.actor.id,
|
user_id=self.actor.id,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -764,7 +771,12 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
# Old behavior: UserMessage with packed JSON
|
# Old behavior: UserMessage with packed JSON
|
||||||
return list(Message.to_letta_messages(summary_message))
|
messages = list(Message.to_letta_messages(summary_message))
|
||||||
|
# Set otid on returned messages (summary Message doesn't have otid set at creation)
|
||||||
|
for i, msg in enumerate(messages):
|
||||||
|
if not msg.otid:
|
||||||
|
msg.otid = Message.generate_otid_from_id(summary_message.id, i)
|
||||||
|
return messages
|
||||||
|
|
||||||
@trace_method
|
@trace_method
|
||||||
async def _step(
|
async def _step(
|
||||||
@@ -990,6 +1002,9 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
|
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
|
||||||
raise e
|
raise e
|
||||||
|
except LLMEmptyResponseError as e:
|
||||||
|
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
|
||||||
|
raise e
|
||||||
except LLMError as e:
|
except LLMError as e:
|
||||||
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error.value)
|
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error.value)
|
||||||
raise e
|
raise e
|
||||||
|
|||||||
@@ -134,7 +134,7 @@ def _flatten_model_settings(d: dict, env_vars: dict[str, str]) -> None:
|
|||||||
api_base: yyy -> OPENAI_API_BASE
|
api_base: yyy -> OPENAI_API_BASE
|
||||||
anthropic:
|
anthropic:
|
||||||
api_key: zzz -> ANTHROPIC_API_KEY
|
api_key: zzz -> ANTHROPIC_API_KEY
|
||||||
global_max_context_window_limit: 32000 -> GLOBAL_MAX_CONTEXT_WINDOW_LIMIT
|
global_max_context_window_limit: 128000 -> GLOBAL_MAX_CONTEXT_WINDOW_LIMIT
|
||||||
"""
|
"""
|
||||||
for key, value in d.items():
|
for key, value in d.items():
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ DEFAULT_MAX_STEPS = 50
|
|||||||
|
|
||||||
# context window size
|
# context window size
|
||||||
MIN_CONTEXT_WINDOW = 4096
|
MIN_CONTEXT_WINDOW = 4096
|
||||||
DEFAULT_CONTEXT_WINDOW = 32000
|
DEFAULT_CONTEXT_WINDOW = 128000
|
||||||
|
|
||||||
# Summarization trigger threshold (multiplier of context_window limit)
|
# Summarization trigger threshold (multiplier of context_window limit)
|
||||||
# Summarization triggers when step usage > context_window * SUMMARIZATION_TRIGGER_MULTIPLIER
|
# Summarization triggers when step usage > context_window * SUMMARIZATION_TRIGGER_MULTIPLIER
|
||||||
@@ -253,10 +253,10 @@ LLM_MAX_CONTEXT_WINDOW = {
|
|||||||
"deepseek-reasoner": 64000,
|
"deepseek-reasoner": 64000,
|
||||||
# glm (Z.AI)
|
# glm (Z.AI)
|
||||||
"glm-4.5": 128000,
|
"glm-4.5": 128000,
|
||||||
"glm-4.6": 200000,
|
"glm-4.6": 180000,
|
||||||
"glm-4.7": 200000,
|
"glm-4.7": 180000,
|
||||||
"glm-5": 200000,
|
"glm-5": 180000,
|
||||||
"glm-5-code": 200000,
|
"glm-5-code": 180000,
|
||||||
## OpenAI models: https://platform.openai.com/docs/models/overview
|
## OpenAI models: https://platform.openai.com/docs/models/overview
|
||||||
# gpt-5
|
# gpt-5
|
||||||
"gpt-5": 272000,
|
"gpt-5": 272000,
|
||||||
@@ -278,6 +278,8 @@ LLM_MAX_CONTEXT_WINDOW = {
|
|||||||
"gpt-5.2-pro": 272000,
|
"gpt-5.2-pro": 272000,
|
||||||
"gpt-5.2-pro-2025-12-11": 272000,
|
"gpt-5.2-pro-2025-12-11": 272000,
|
||||||
"gpt-5.2-codex": 272000,
|
"gpt-5.2-codex": 272000,
|
||||||
|
# gpt-5.3
|
||||||
|
"gpt-5.3-codex": 272000,
|
||||||
# reasoners
|
# reasoners
|
||||||
"o1": 200000,
|
"o1": 200000,
|
||||||
# "o1-pro": 200000, # responses API only
|
# "o1-pro": 200000, # responses API only
|
||||||
@@ -419,7 +421,7 @@ MAX_ERROR_MESSAGE_CHAR_LIMIT = 1000
|
|||||||
# Default memory limits
|
# Default memory limits
|
||||||
CORE_MEMORY_PERSONA_CHAR_LIMIT: int = 20000
|
CORE_MEMORY_PERSONA_CHAR_LIMIT: int = 20000
|
||||||
CORE_MEMORY_HUMAN_CHAR_LIMIT: int = 20000
|
CORE_MEMORY_HUMAN_CHAR_LIMIT: int = 20000
|
||||||
CORE_MEMORY_BLOCK_CHAR_LIMIT: int = 20000
|
CORE_MEMORY_BLOCK_CHAR_LIMIT: int = 100000
|
||||||
|
|
||||||
# Function return limits
|
# Function return limits
|
||||||
FUNCTION_RETURN_CHAR_LIMIT = 50000 # ~300 words
|
FUNCTION_RETURN_CHAR_LIMIT = 50000 # ~300 words
|
||||||
|
|||||||
@@ -283,6 +283,15 @@ class LLMServerError(LLMError):
|
|||||||
while processing the request."""
|
while processing the request."""
|
||||||
|
|
||||||
|
|
||||||
|
class LLMEmptyResponseError(LLMServerError):
|
||||||
|
"""Error when LLM returns an empty response (no content and no tool calls).
|
||||||
|
|
||||||
|
This is a subclass of LLMServerError to maintain retry behavior, but allows
|
||||||
|
specific handling for empty response cases which may benefit from request
|
||||||
|
modification before retry.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class LLMTimeoutError(LLMError):
|
class LLMTimeoutError(LLMError):
|
||||||
"""Error when LLM request times out"""
|
"""Error when LLM request times out"""
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ from letta.schemas.letta_message import MessageType
|
|||||||
from letta.schemas.letta_message_content import TextContent
|
from letta.schemas.letta_message_content import TextContent
|
||||||
from letta.schemas.letta_response import LettaResponse
|
from letta.schemas.letta_response import LettaResponse
|
||||||
from letta.schemas.message import Message, MessageCreate
|
from letta.schemas.message import Message, MessageCreate
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.run import Run
|
from letta.schemas.run import Run
|
||||||
from letta.schemas.user import User
|
from letta.schemas.user import User
|
||||||
from letta.services.agent_manager import AgentManager
|
from letta.services.agent_manager import AgentManager
|
||||||
@@ -69,6 +70,7 @@ class SleeptimeMultiAgentV2(BaseAgent):
|
|||||||
use_assistant_message: bool = True,
|
use_assistant_message: bool = True,
|
||||||
request_start_timestamp_ns: int | None = None,
|
request_start_timestamp_ns: int | None = None,
|
||||||
include_return_message_types: list[MessageType] | None = None,
|
include_return_message_types: list[MessageType] | None = None,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> LettaResponse:
|
) -> LettaResponse:
|
||||||
run_ids = []
|
run_ids = []
|
||||||
|
|
||||||
@@ -100,6 +102,7 @@ class SleeptimeMultiAgentV2(BaseAgent):
|
|||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
use_assistant_message=use_assistant_message,
|
use_assistant_message=use_assistant_message,
|
||||||
include_return_message_types=include_return_message_types,
|
include_return_message_types=include_return_message_types,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get last response messages
|
# Get last response messages
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from letta.schemas.letta_request import ClientToolSchema
|
|||||||
from letta.schemas.letta_response import LettaResponse
|
from letta.schemas.letta_response import LettaResponse
|
||||||
from letta.schemas.letta_stop_reason import StopReasonType
|
from letta.schemas.letta_stop_reason import StopReasonType
|
||||||
from letta.schemas.message import Message, MessageCreate
|
from letta.schemas.message import Message, MessageCreate
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.run import Run, RunUpdate
|
from letta.schemas.run import Run, RunUpdate
|
||||||
from letta.schemas.user import User
|
from letta.schemas.user import User
|
||||||
from letta.services.group_manager import GroupManager
|
from letta.services.group_manager import GroupManager
|
||||||
@@ -47,6 +48,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2):
|
|||||||
request_start_timestamp_ns: int | None = None,
|
request_start_timestamp_ns: int | None = None,
|
||||||
client_tools: list[ClientToolSchema] | None = None,
|
client_tools: list[ClientToolSchema] | None = None,
|
||||||
include_compaction_messages: bool = False,
|
include_compaction_messages: bool = False,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> LettaResponse:
|
) -> LettaResponse:
|
||||||
self.run_ids = []
|
self.run_ids = []
|
||||||
|
|
||||||
@@ -62,6 +64,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2):
|
|||||||
request_start_timestamp_ns=request_start_timestamp_ns,
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
||||||
client_tools=client_tools,
|
client_tools=client_tools,
|
||||||
include_compaction_messages=include_compaction_messages,
|
include_compaction_messages=include_compaction_messages,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
await self.run_sleeptime_agents()
|
await self.run_sleeptime_agents()
|
||||||
@@ -81,6 +84,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2):
|
|||||||
include_return_message_types: list[MessageType] | None = None,
|
include_return_message_types: list[MessageType] | None = None,
|
||||||
client_tools: list[ClientToolSchema] | None = None,
|
client_tools: list[ClientToolSchema] | None = None,
|
||||||
include_compaction_messages: bool = False,
|
include_compaction_messages: bool = False,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> AsyncGenerator[str, None]:
|
) -> AsyncGenerator[str, None]:
|
||||||
self.run_ids = []
|
self.run_ids = []
|
||||||
|
|
||||||
@@ -99,6 +103,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2):
|
|||||||
request_start_timestamp_ns=request_start_timestamp_ns,
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
||||||
client_tools=client_tools,
|
client_tools=client_tools,
|
||||||
include_compaction_messages=include_compaction_messages,
|
include_compaction_messages=include_compaction_messages,
|
||||||
|
billing_context=billing_context,
|
||||||
):
|
):
|
||||||
yield chunk
|
yield chunk
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from letta.schemas.letta_request import ClientToolSchema
|
|||||||
from letta.schemas.letta_response import LettaResponse
|
from letta.schemas.letta_response import LettaResponse
|
||||||
from letta.schemas.letta_stop_reason import StopReasonType
|
from letta.schemas.letta_stop_reason import StopReasonType
|
||||||
from letta.schemas.message import Message, MessageCreate
|
from letta.schemas.message import Message, MessageCreate
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.run import Run, RunUpdate
|
from letta.schemas.run import Run, RunUpdate
|
||||||
from letta.schemas.user import User
|
from letta.schemas.user import User
|
||||||
from letta.services.group_manager import GroupManager
|
from letta.services.group_manager import GroupManager
|
||||||
@@ -47,6 +48,7 @@ class SleeptimeMultiAgentV4(LettaAgentV3):
|
|||||||
conversation_id: str | None = None,
|
conversation_id: str | None = None,
|
||||||
client_tools: list[ClientToolSchema] | None = None,
|
client_tools: list[ClientToolSchema] | None = None,
|
||||||
include_compaction_messages: bool = False,
|
include_compaction_messages: bool = False,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> LettaResponse:
|
) -> LettaResponse:
|
||||||
self.run_ids = []
|
self.run_ids = []
|
||||||
|
|
||||||
@@ -63,6 +65,7 @@ class SleeptimeMultiAgentV4(LettaAgentV3):
|
|||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
client_tools=client_tools,
|
client_tools=client_tools,
|
||||||
include_compaction_messages=include_compaction_messages,
|
include_compaction_messages=include_compaction_messages,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
run_ids = await self.run_sleeptime_agents()
|
run_ids = await self.run_sleeptime_agents()
|
||||||
@@ -82,6 +85,7 @@ class SleeptimeMultiAgentV4(LettaAgentV3):
|
|||||||
conversation_id: str | None = None,
|
conversation_id: str | None = None,
|
||||||
client_tools: list[ClientToolSchema] | None = None,
|
client_tools: list[ClientToolSchema] | None = None,
|
||||||
include_compaction_messages: bool = False,
|
include_compaction_messages: bool = False,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> AsyncGenerator[str, None]:
|
) -> AsyncGenerator[str, None]:
|
||||||
self.run_ids = []
|
self.run_ids = []
|
||||||
|
|
||||||
@@ -101,6 +105,7 @@ class SleeptimeMultiAgentV4(LettaAgentV3):
|
|||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
client_tools=client_tools,
|
client_tools=client_tools,
|
||||||
include_compaction_messages=include_compaction_messages,
|
include_compaction_messages=include_compaction_messages,
|
||||||
|
billing_context=billing_context,
|
||||||
):
|
):
|
||||||
yield chunk
|
yield chunk
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ from anthropic.types.beta import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
||||||
|
from letta.errors import LLMEmptyResponseError
|
||||||
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
||||||
from letta.log import get_logger
|
from letta.log import get_logger
|
||||||
from letta.schemas.letta_message import (
|
from letta.schemas.letta_message import (
|
||||||
@@ -104,6 +105,10 @@ class AnthropicStreamingInterface:
|
|||||||
self.inner_thoughts_complete = False
|
self.inner_thoughts_complete = False
|
||||||
self.put_inner_thoughts_in_kwarg = put_inner_thoughts_in_kwarg
|
self.put_inner_thoughts_in_kwarg = put_inner_thoughts_in_kwarg
|
||||||
|
|
||||||
|
# Track whether any content was produced (text or tool calls)
|
||||||
|
# Used to detect empty responses from models like Opus 4.6
|
||||||
|
self.has_content = False
|
||||||
|
|
||||||
# Buffer to handle partial XML tags across chunks
|
# Buffer to handle partial XML tags across chunks
|
||||||
self.partial_tag_buffer = ""
|
self.partial_tag_buffer = ""
|
||||||
|
|
||||||
@@ -298,9 +303,11 @@ class AnthropicStreamingInterface:
|
|||||||
|
|
||||||
if isinstance(content, BetaTextBlock):
|
if isinstance(content, BetaTextBlock):
|
||||||
self.anthropic_mode = EventMode.TEXT
|
self.anthropic_mode = EventMode.TEXT
|
||||||
|
self.has_content = True # Track that we received text content
|
||||||
# TODO: Can capture citations, etc.
|
# TODO: Can capture citations, etc.
|
||||||
elif isinstance(content, BetaToolUseBlock):
|
elif isinstance(content, BetaToolUseBlock):
|
||||||
self.anthropic_mode = EventMode.TOOL_USE
|
self.anthropic_mode = EventMode.TOOL_USE
|
||||||
|
self.has_content = True # Track that we received tool use content
|
||||||
self.tool_call_id = content.id
|
self.tool_call_id = content.id
|
||||||
self.tool_call_name = content.name
|
self.tool_call_name = content.name
|
||||||
self.inner_thoughts_complete = False
|
self.inner_thoughts_complete = False
|
||||||
@@ -589,8 +596,12 @@ class AnthropicStreamingInterface:
|
|||||||
# message_delta event are *cumulative*." So we assign, not accumulate.
|
# message_delta event are *cumulative*." So we assign, not accumulate.
|
||||||
self.output_tokens = event.usage.output_tokens
|
self.output_tokens = event.usage.output_tokens
|
||||||
elif isinstance(event, BetaRawMessageStopEvent):
|
elif isinstance(event, BetaRawMessageStopEvent):
|
||||||
# Don't do anything here! We don't want to stop the stream.
|
# Check if any content was produced during the stream
|
||||||
pass
|
# Empty responses (no text and no tool calls) should raise an error
|
||||||
|
if not self.has_content:
|
||||||
|
raise LLMEmptyResponseError(
|
||||||
|
message=f"LLM provider returned empty content in streaming response (model: {self.model}, message_id: {self.message_id})"
|
||||||
|
)
|
||||||
elif isinstance(event, BetaRawContentBlockStopEvent):
|
elif isinstance(event, BetaRawContentBlockStopEvent):
|
||||||
# If we're exiting a tool use block and there are still buffered messages,
|
# If we're exiting a tool use block and there are still buffered messages,
|
||||||
# we should flush them now.
|
# we should flush them now.
|
||||||
@@ -837,10 +848,12 @@ class SimpleAnthropicStreamingInterface:
|
|||||||
|
|
||||||
if isinstance(content, BetaTextBlock):
|
if isinstance(content, BetaTextBlock):
|
||||||
self.anthropic_mode = EventMode.TEXT
|
self.anthropic_mode = EventMode.TEXT
|
||||||
|
self.has_content = True # Track that we received text content
|
||||||
# TODO: Can capture citations, etc.
|
# TODO: Can capture citations, etc.
|
||||||
|
|
||||||
elif isinstance(content, BetaToolUseBlock):
|
elif isinstance(content, BetaToolUseBlock):
|
||||||
self.anthropic_mode = EventMode.TOOL_USE
|
self.anthropic_mode = EventMode.TOOL_USE
|
||||||
|
self.has_content = True # Track that we received tool use content
|
||||||
self.tool_call_id = content.id
|
self.tool_call_id = content.id
|
||||||
self.tool_call_name = content.name
|
self.tool_call_name = content.name
|
||||||
|
|
||||||
@@ -1014,8 +1027,12 @@ class SimpleAnthropicStreamingInterface:
|
|||||||
self.output_tokens = event.usage.output_tokens
|
self.output_tokens = event.usage.output_tokens
|
||||||
|
|
||||||
elif isinstance(event, BetaRawMessageStopEvent):
|
elif isinstance(event, BetaRawMessageStopEvent):
|
||||||
# Don't do anything here! We don't want to stop the stream.
|
# Check if any content was produced during the stream
|
||||||
pass
|
# Empty responses (no text and no tool calls) should raise an error
|
||||||
|
if not self.has_content:
|
||||||
|
raise LLMEmptyResponseError(
|
||||||
|
message=f"LLM provider returned empty content in streaming response (model: {self.model}, message_id: {self.message_id})"
|
||||||
|
)
|
||||||
|
|
||||||
elif isinstance(event, BetaRawContentBlockStopEvent):
|
elif isinstance(event, BetaRawContentBlockStopEvent):
|
||||||
self.anthropic_mode = None
|
self.anthropic_mode = None
|
||||||
|
|||||||
@@ -19,6 +19,8 @@ from letta.errors import (
|
|||||||
LLMAuthenticationError,
|
LLMAuthenticationError,
|
||||||
LLMBadRequestError,
|
LLMBadRequestError,
|
||||||
LLMConnectionError,
|
LLMConnectionError,
|
||||||
|
LLMEmptyResponseError,
|
||||||
|
LLMError,
|
||||||
LLMInsufficientCreditsError,
|
LLMInsufficientCreditsError,
|
||||||
LLMNotFoundError,
|
LLMNotFoundError,
|
||||||
LLMPermissionDeniedError,
|
LLMPermissionDeniedError,
|
||||||
@@ -957,6 +959,11 @@ class AnthropicClient(LLMClientBase):
|
|||||||
|
|
||||||
@trace_method
|
@trace_method
|
||||||
def handle_llm_error(self, e: Exception, llm_config: Optional[LLMConfig] = None) -> Exception:
|
def handle_llm_error(self, e: Exception, llm_config: Optional[LLMConfig] = None) -> Exception:
|
||||||
|
# Pass through errors that are already LLMError instances unchanged
|
||||||
|
# This preserves specific error types like LLMEmptyResponseError
|
||||||
|
if isinstance(e, LLMError):
|
||||||
|
return e
|
||||||
|
|
||||||
is_byok = (llm_config.provider_category == ProviderCategory.byok) if llm_config else None
|
is_byok = (llm_config.provider_category == ProviderCategory.byok) if llm_config else None
|
||||||
|
|
||||||
# make sure to check for overflow errors, regardless of error type
|
# make sure to check for overflow errors, regardless of error type
|
||||||
@@ -1278,7 +1285,7 @@ class AnthropicClient(LLMClientBase):
|
|||||||
response.stop_reason,
|
response.stop_reason,
|
||||||
json.dumps(response_data),
|
json.dumps(response_data),
|
||||||
)
|
)
|
||||||
raise LLMServerError(
|
raise LLMEmptyResponseError(
|
||||||
message=f"LLM provider returned empty content in response (ID: {response.id}, model: {response.model}, stop_reason: {response.stop_reason})",
|
message=f"LLM provider returned empty content in response (ID: {response.id}, model: {response.model}, stop_reason: {response.stop_reason})",
|
||||||
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
||||||
details={
|
details={
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK
|
|||||||
from letta.llm_api.google_vertex_client import GoogleVertexClient
|
from letta.llm_api.google_vertex_client import GoogleVertexClient
|
||||||
from letta.log import get_logger
|
from letta.log import get_logger
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
from letta.settings import model_settings, settings
|
from letta.settings import model_settings
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
@@ -18,7 +18,7 @@ class GoogleAIClient(GoogleVertexClient):
|
|||||||
provider_label = "Google AI"
|
provider_label = "Google AI"
|
||||||
|
|
||||||
def _get_client(self, llm_config: Optional[LLMConfig] = None):
|
def _get_client(self, llm_config: Optional[LLMConfig] = None):
|
||||||
timeout_ms = int(settings.llm_request_timeout_seconds * 1000)
|
timeout_ms = int(model_settings.gemini_timeout_seconds * 1000)
|
||||||
api_key = None
|
api_key = None
|
||||||
if llm_config:
|
if llm_config:
|
||||||
api_key, _, _ = self.get_byok_overrides(llm_config)
|
api_key, _, _ = self.get_byok_overrides(llm_config)
|
||||||
@@ -30,7 +30,7 @@ class GoogleAIClient(GoogleVertexClient):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def _get_client_async(self, llm_config: Optional[LLMConfig] = None):
|
async def _get_client_async(self, llm_config: Optional[LLMConfig] = None):
|
||||||
timeout_ms = int(settings.llm_request_timeout_seconds * 1000)
|
timeout_ms = int(model_settings.gemini_timeout_seconds * 1000)
|
||||||
api_key = None
|
api_key = None
|
||||||
if llm_config:
|
if llm_config:
|
||||||
api_key, _, _ = await self.get_byok_overrides_async(llm_config)
|
api_key, _, _ = await self.get_byok_overrides_async(llm_config)
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from letta.schemas.enums import AgentType, LLMCallType, ProviderCategory
|
|||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
from letta.schemas.message import Message
|
from letta.schemas.message import Message
|
||||||
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
||||||
from letta.schemas.provider_trace import ProviderTrace
|
from letta.schemas.provider_trace import BillingContext, ProviderTrace
|
||||||
from letta.schemas.usage import LettaUsageStatistics
|
from letta.schemas.usage import LettaUsageStatistics
|
||||||
from letta.services.telemetry_manager import TelemetryManager
|
from letta.services.telemetry_manager import TelemetryManager
|
||||||
from letta.settings import settings
|
from letta.settings import settings
|
||||||
@@ -48,6 +48,7 @@ class LLMClientBase:
|
|||||||
self._telemetry_user_id: Optional[str] = None
|
self._telemetry_user_id: Optional[str] = None
|
||||||
self._telemetry_compaction_settings: Optional[Dict] = None
|
self._telemetry_compaction_settings: Optional[Dict] = None
|
||||||
self._telemetry_llm_config: Optional[Dict] = None
|
self._telemetry_llm_config: Optional[Dict] = None
|
||||||
|
self._telemetry_billing_context: Optional[BillingContext] = None
|
||||||
|
|
||||||
def set_telemetry_context(
|
def set_telemetry_context(
|
||||||
self,
|
self,
|
||||||
@@ -62,6 +63,7 @@ class LLMClientBase:
|
|||||||
compaction_settings: Optional[Dict] = None,
|
compaction_settings: Optional[Dict] = None,
|
||||||
llm_config: Optional[Dict] = None,
|
llm_config: Optional[Dict] = None,
|
||||||
actor: Optional["User"] = None,
|
actor: Optional["User"] = None,
|
||||||
|
billing_context: Optional[BillingContext] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Set telemetry context for provider trace logging."""
|
"""Set telemetry context for provider trace logging."""
|
||||||
if actor is not None:
|
if actor is not None:
|
||||||
@@ -76,6 +78,7 @@ class LLMClientBase:
|
|||||||
self._telemetry_user_id = user_id
|
self._telemetry_user_id = user_id
|
||||||
self._telemetry_compaction_settings = compaction_settings
|
self._telemetry_compaction_settings = compaction_settings
|
||||||
self._telemetry_llm_config = llm_config
|
self._telemetry_llm_config = llm_config
|
||||||
|
self._telemetry_billing_context = billing_context
|
||||||
|
|
||||||
def extract_usage_statistics(self, response_data: Optional[dict], llm_config: LLMConfig) -> LettaUsageStatistics:
|
def extract_usage_statistics(self, response_data: Optional[dict], llm_config: LLMConfig) -> LettaUsageStatistics:
|
||||||
"""Provider-specific usage parsing hook (override in subclasses). Returns LettaUsageStatistics."""
|
"""Provider-specific usage parsing hook (override in subclasses). Returns LettaUsageStatistics."""
|
||||||
@@ -125,6 +128,7 @@ class LLMClientBase:
|
|||||||
user_id=self._telemetry_user_id,
|
user_id=self._telemetry_user_id,
|
||||||
compaction_settings=self._telemetry_compaction_settings,
|
compaction_settings=self._telemetry_compaction_settings,
|
||||||
llm_config=llm_config.model_dump() if llm_config else self._telemetry_llm_config,
|
llm_config=llm_config.model_dump() if llm_config else self._telemetry_llm_config,
|
||||||
|
billing_context=self._telemetry_billing_context,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -186,6 +190,7 @@ class LLMClientBase:
|
|||||||
user_id=self._telemetry_user_id,
|
user_id=self._telemetry_user_id,
|
||||||
compaction_settings=self._telemetry_compaction_settings,
|
compaction_settings=self._telemetry_compaction_settings,
|
||||||
llm_config=llm_config.model_dump() if llm_config else self._telemetry_llm_config,
|
llm_config=llm_config.model_dump() if llm_config else self._telemetry_llm_config,
|
||||||
|
billing_context=self._telemetry_billing_context,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ def supports_none_reasoning_effort(model: str) -> bool:
|
|||||||
|
|
||||||
Currently, GPT-5.1 and GPT-5.2 models support the 'none' reasoning effort level.
|
Currently, GPT-5.1 and GPT-5.2 models support the 'none' reasoning effort level.
|
||||||
"""
|
"""
|
||||||
return model.startswith("gpt-5.1") or model.startswith("gpt-5.2")
|
return model.startswith("gpt-5.1") or model.startswith("gpt-5.2") or model.startswith("gpt-5.3")
|
||||||
|
|
||||||
|
|
||||||
def is_openai_5_model(model: str) -> bool:
|
def is_openai_5_model(model: str) -> bool:
|
||||||
@@ -389,7 +389,6 @@ class OpenAIClient(LLMClientBase):
|
|||||||
input=openai_messages_list,
|
input=openai_messages_list,
|
||||||
tools=responses_tools,
|
tools=responses_tools,
|
||||||
tool_choice=tool_choice,
|
tool_choice=tool_choice,
|
||||||
max_output_tokens=llm_config.max_tokens,
|
|
||||||
temperature=llm_config.temperature if supports_temperature_param(model) else None,
|
temperature=llm_config.temperature if supports_temperature_param(model) else None,
|
||||||
parallel_tool_calls=llm_config.parallel_tool_calls if tools and supports_parallel_tool_calling(model) else False,
|
parallel_tool_calls=llm_config.parallel_tool_calls if tools and supports_parallel_tool_calling(model) else False,
|
||||||
)
|
)
|
||||||
@@ -397,6 +396,10 @@ class OpenAIClient(LLMClientBase):
|
|||||||
# Handle text configuration (verbosity and response format)
|
# Handle text configuration (verbosity and response format)
|
||||||
text_config_kwargs = {}
|
text_config_kwargs = {}
|
||||||
|
|
||||||
|
# Only set max_output_tokens if explicitly configured
|
||||||
|
if llm_config.max_tokens is not None:
|
||||||
|
data.max_output_tokens = llm_config.max_tokens
|
||||||
|
|
||||||
# Add verbosity control for GPT-5 models
|
# Add verbosity control for GPT-5 models
|
||||||
if supports_verbosity_control(model) and llm_config.verbosity:
|
if supports_verbosity_control(model) and llm_config.verbosity:
|
||||||
text_config_kwargs["verbosity"] = llm_config.verbosity
|
text_config_kwargs["verbosity"] = llm_config.verbosity
|
||||||
@@ -451,7 +454,6 @@ class OpenAIClient(LLMClientBase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
request_data = data.model_dump(exclude_unset=True)
|
request_data = data.model_dump(exclude_unset=True)
|
||||||
# print("responses request data", request_data)
|
|
||||||
return request_data
|
return request_data
|
||||||
|
|
||||||
@trace_method
|
@trace_method
|
||||||
@@ -639,6 +641,14 @@ class OpenAIClient(LLMClientBase):
|
|||||||
tool.function.strict = False
|
tool.function.strict = False
|
||||||
request_data = data.model_dump(exclude_unset=True)
|
request_data = data.model_dump(exclude_unset=True)
|
||||||
|
|
||||||
|
# Fireworks uses strict validation (additionalProperties: false) and rejects
|
||||||
|
# reasoning fields that are not in their schema.
|
||||||
|
is_fireworks = llm_config.model_endpoint and "fireworks.ai" in llm_config.model_endpoint
|
||||||
|
if is_fireworks and "messages" in request_data:
|
||||||
|
for message in request_data["messages"]:
|
||||||
|
for field in ("reasoning_content_signature", "redacted_reasoning_content", "omitted_reasoning_content"):
|
||||||
|
message.pop(field, None)
|
||||||
|
|
||||||
# If Ollama
|
# If Ollama
|
||||||
# if llm_config.handle.startswith("ollama/") and llm_config.enable_reasoner:
|
# if llm_config.handle.startswith("ollama/") and llm_config.enable_reasoner:
|
||||||
# Sadly, reasoning via the OpenAI proxy on Ollama only works for Harmony/gpt-oss
|
# Sadly, reasoning via the OpenAI proxy on Ollama only works for Harmony/gpt-oss
|
||||||
|
|||||||
@@ -68,6 +68,12 @@ class ZAIClient(OpenAIClient):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Z.ai's API uses max_tokens, not max_completion_tokens.
|
||||||
|
# If max_completion_tokens is sent, Z.ai ignores it and falls back to its
|
||||||
|
# default of 65536, silently truncating input to ~137K of the 200K context window.
|
||||||
|
if "max_completion_tokens" in data:
|
||||||
|
data["max_tokens"] = data.pop("max_completion_tokens")
|
||||||
|
|
||||||
# Sanitize empty text content — ZAI rejects empty text blocks
|
# Sanitize empty text content — ZAI rejects empty text blocks
|
||||||
if "messages" in data:
|
if "messages" in data:
|
||||||
for msg in data["messages"]:
|
for msg in data["messages"]:
|
||||||
|
|||||||
@@ -17295,6 +17295,58 @@
|
|||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
"supports_vision": true
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
|
"gpt-5.3-chat-latest": {
|
||||||
|
"cache_read_input_token_cost": 1.75e-7,
|
||||||
|
"cache_read_input_token_cost_priority": 3.5e-7,
|
||||||
|
"input_cost_per_token": 1.75e-6,
|
||||||
|
"input_cost_per_token_priority": 3.5e-6,
|
||||||
|
"litellm_provider": "openai",
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 16384,
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"mode": "chat",
|
||||||
|
"output_cost_per_token": 1.4e-5,
|
||||||
|
"output_cost_per_token_priority": 2.8e-5,
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/responses"],
|
||||||
|
"supported_modalities": ["text", "image"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_native_streaming": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_pdf_input": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_reasoning": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
|
"gpt-5.3-codex": {
|
||||||
|
"cache_read_input_token_cost": 1.75e-7,
|
||||||
|
"cache_read_input_token_cost_priority": 3.5e-7,
|
||||||
|
"input_cost_per_token": 1.75e-6,
|
||||||
|
"input_cost_per_token_priority": 3.5e-6,
|
||||||
|
"litellm_provider": "openai",
|
||||||
|
"max_input_tokens": 272000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"mode": "responses",
|
||||||
|
"output_cost_per_token": 1.4e-5,
|
||||||
|
"output_cost_per_token_priority": 2.8e-5,
|
||||||
|
"supported_endpoints": ["/v1/responses"],
|
||||||
|
"supported_modalities": ["text", "image"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_native_streaming": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_pdf_input": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_reasoning": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_system_messages": false,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
"gpt-5-mini": {
|
"gpt-5-mini": {
|
||||||
"cache_read_input_token_cost": 2.5e-8,
|
"cache_read_input_token_cost": 2.5e-8,
|
||||||
"cache_read_input_token_cost_flex": 1.25e-8,
|
"cache_read_input_token_cost_flex": 1.25e-8,
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ class Conversation(SqlalchemyBase, OrganizationMixin):
|
|||||||
"ConversationMessage",
|
"ConversationMessage",
|
||||||
back_populates="conversation",
|
back_populates="conversation",
|
||||||
cascade="all, delete-orphan",
|
cascade="all, delete-orphan",
|
||||||
lazy="selectin",
|
lazy="raise",
|
||||||
)
|
)
|
||||||
isolated_blocks: Mapped[List["Block"]] = relationship(
|
isolated_blocks: Mapped[List["Block"]] = relationship(
|
||||||
"Block",
|
"Block",
|
||||||
|
|||||||
@@ -69,5 +69,5 @@ class ConversationMessage(SqlalchemyBase, OrganizationMixin):
|
|||||||
)
|
)
|
||||||
message: Mapped["Message"] = relationship(
|
message: Mapped["Message"] = relationship(
|
||||||
"Message",
|
"Message",
|
||||||
lazy="selectin",
|
lazy="raise",
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -88,8 +88,7 @@ class LettaRequest(BaseModel):
|
|||||||
)
|
)
|
||||||
top_logprobs: Optional[int] = Field(
|
top_logprobs: Optional[int] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Number of most likely tokens to return at each position (0-20). "
|
description="Number of most likely tokens to return at each position (0-20). Requires return_logprobs=True.",
|
||||||
"Requires return_logprobs=True.",
|
|
||||||
)
|
)
|
||||||
return_token_ids: bool = Field(
|
return_token_ids: bool = Field(
|
||||||
default=False,
|
default=False,
|
||||||
@@ -155,6 +154,10 @@ class LettaStreamingRequest(LettaRequest):
|
|||||||
class ConversationMessageRequest(LettaRequest):
|
class ConversationMessageRequest(LettaRequest):
|
||||||
"""Request for sending messages to a conversation. Streams by default."""
|
"""Request for sending messages to a conversation. Streams by default."""
|
||||||
|
|
||||||
|
agent_id: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Agent ID for agent-direct mode with 'default' conversation. Use with conversation_id='default' in the URL path.",
|
||||||
|
)
|
||||||
streaming: bool = Field(
|
streaming: bool = Field(
|
||||||
default=True,
|
default=True,
|
||||||
description="If True (default), returns a streaming response (Server-Sent Events). If False, returns a complete JSON response.",
|
description="If True (default), returns a streaming response (Server-Sent Events). If False, returns a complete JSON response.",
|
||||||
@@ -194,6 +197,10 @@ class CreateBatch(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class RetrieveStreamRequest(BaseModel):
|
class RetrieveStreamRequest(BaseModel):
|
||||||
|
agent_id: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Agent ID for agent-direct mode with 'default' conversation. Use with conversation_id='default' in the URL path.",
|
||||||
|
)
|
||||||
starting_after: int = Field(
|
starting_after: int = Field(
|
||||||
0, description="Sequence id to use as a cursor for pagination. Response will start streaming after this chunk sequence id"
|
0, description="Sequence id to use as a cursor for pagination. Response will start streaming after this chunk sequence id"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import re
|
||||||
from typing import TYPE_CHECKING, Literal, Optional
|
from typing import TYPE_CHECKING, Literal, Optional
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
||||||
@@ -139,7 +140,9 @@ class LLMConfig(BaseModel):
|
|||||||
|
|
||||||
# Set max_tokens defaults based on model (only if not explicitly provided)
|
# Set max_tokens defaults based on model (only if not explicitly provided)
|
||||||
if "max_tokens" not in values:
|
if "max_tokens" not in values:
|
||||||
if model.startswith("gpt-5"): # Covers both gpt-5 and gpt-5.1
|
if re.match(r"^gpt-5\.[23]", model) and "-chat" not in model:
|
||||||
|
values["max_tokens"] = 128000
|
||||||
|
elif model.startswith("gpt-5"):
|
||||||
values["max_tokens"] = 16384
|
values["max_tokens"] = 16384
|
||||||
elif model == "gpt-4.1":
|
elif model == "gpt-4.1":
|
||||||
values["max_tokens"] = 8192
|
values["max_tokens"] = 8192
|
||||||
@@ -299,7 +302,7 @@ class LLMConfig(BaseModel):
|
|||||||
context_window=272000,
|
context_window=272000,
|
||||||
reasoning_effort="none", # Default to "none" for GPT-5.2
|
reasoning_effort="none", # Default to "none" for GPT-5.2
|
||||||
verbosity="medium",
|
verbosity="medium",
|
||||||
max_tokens=16384,
|
max_tokens=128000,
|
||||||
)
|
)
|
||||||
elif model_name == "letta":
|
elif model_name == "letta":
|
||||||
return cls(
|
return cls(
|
||||||
|
|||||||
@@ -95,6 +95,11 @@ class LLMTrace(LettaBase):
|
|||||||
response_json: str = Field(..., description="Full response payload as JSON string")
|
response_json: str = Field(..., description="Full response payload as JSON string")
|
||||||
llm_config_json: str = Field(default="", description="LLM config as JSON string")
|
llm_config_json: str = Field(default="", description="LLM config as JSON string")
|
||||||
|
|
||||||
|
# Billing context
|
||||||
|
billing_plan_type: Optional[str] = Field(default=None, description="Subscription tier (e.g., 'basic', 'standard', 'max', 'enterprise')")
|
||||||
|
billing_cost_source: Optional[str] = Field(default=None, description="Cost source: 'quota' or 'credits'")
|
||||||
|
billing_customer_id: Optional[str] = Field(default=None, description="Customer ID for cross-referencing billing records")
|
||||||
|
|
||||||
# Timestamp
|
# Timestamp
|
||||||
created_at: datetime = Field(default_factory=get_utc_time, description="When the trace was created")
|
created_at: datetime = Field(default_factory=get_utc_time, description="When the trace was created")
|
||||||
|
|
||||||
@@ -128,6 +133,9 @@ class LLMTrace(LettaBase):
|
|||||||
self.request_json,
|
self.request_json,
|
||||||
self.response_json,
|
self.response_json,
|
||||||
self.llm_config_json,
|
self.llm_config_json,
|
||||||
|
self.billing_plan_type or "",
|
||||||
|
self.billing_cost_source or "",
|
||||||
|
self.billing_customer_id or "",
|
||||||
self.created_at,
|
self.created_at,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -162,5 +170,8 @@ class LLMTrace(LettaBase):
|
|||||||
"request_json",
|
"request_json",
|
||||||
"response_json",
|
"response_json",
|
||||||
"llm_config_json",
|
"llm_config_json",
|
||||||
|
"billing_plan_type",
|
||||||
|
"billing_cost_source",
|
||||||
|
"billing_customer_id",
|
||||||
"created_at",
|
"created_at",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -226,8 +226,6 @@ class Memory(BaseModel, validate_assignment=True):
|
|||||||
front_lines = []
|
front_lines = []
|
||||||
if block.description:
|
if block.description:
|
||||||
front_lines.append(f"description: {block.description}")
|
front_lines.append(f"description: {block.description}")
|
||||||
if block.limit is not None:
|
|
||||||
front_lines.append(f"limit: {block.limit}")
|
|
||||||
if getattr(block, "read_only", False):
|
if getattr(block, "read_only", False):
|
||||||
front_lines.append("read_only: true")
|
front_lines.append("read_only: true")
|
||||||
|
|
||||||
@@ -291,7 +289,40 @@ class Memory(BaseModel, validate_assignment=True):
|
|||||||
|
|
||||||
s.write("\n\n<memory_filesystem>\n")
|
s.write("\n\n<memory_filesystem>\n")
|
||||||
|
|
||||||
def _render_tree(node: dict, prefix: str = ""):
|
def _render_tree(node: dict, prefix: str = "", in_system: bool = False, path_parts: tuple[str, ...] = ()):
|
||||||
|
# Render skills/ as concise top-level entries only, using both
|
||||||
|
# current (`skills/<name>`) and legacy (`skills/<name>/SKILL`) labels.
|
||||||
|
if path_parts == ("skills",):
|
||||||
|
skill_entries: list[tuple[str, str]] = []
|
||||||
|
for name, val in node.items():
|
||||||
|
if name == LEAF_KEY:
|
||||||
|
continue
|
||||||
|
|
||||||
|
block = None
|
||||||
|
if isinstance(val, dict):
|
||||||
|
legacy_skill_block = val.get("SKILL")
|
||||||
|
if legacy_skill_block is not None and not isinstance(legacy_skill_block, dict):
|
||||||
|
block = legacy_skill_block
|
||||||
|
elif LEAF_KEY in val and not isinstance(val[LEAF_KEY], dict):
|
||||||
|
block = val[LEAF_KEY]
|
||||||
|
else:
|
||||||
|
block = val
|
||||||
|
|
||||||
|
if block is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
desc = getattr(block, "description", None)
|
||||||
|
desc_line = (desc or "").strip().split("\n")[0].strip()
|
||||||
|
skill_entries.append((name, desc_line))
|
||||||
|
|
||||||
|
skill_entries.sort(key=lambda e: e[0])
|
||||||
|
for i, (name, desc_line) in enumerate(skill_entries):
|
||||||
|
is_last = i == len(skill_entries) - 1
|
||||||
|
connector = "└── " if is_last else "├── "
|
||||||
|
desc_suffix = f" ({desc_line})" if desc_line else ""
|
||||||
|
s.write(f"{prefix}{connector}{name}{desc_suffix}\n")
|
||||||
|
return
|
||||||
|
|
||||||
# Sort: directories first, then files. If a node is both a directory and a
|
# Sort: directories first, then files. If a node is both a directory and a
|
||||||
# leaf (LEAF_KEY present), show both <name>/ and <name>.md.
|
# leaf (LEAF_KEY present), show both <name>/ and <name>.md.
|
||||||
dirs = []
|
dirs = []
|
||||||
@@ -316,9 +347,24 @@ class Memory(BaseModel, validate_assignment=True):
|
|||||||
if is_dir:
|
if is_dir:
|
||||||
s.write(f"{prefix}{connector}{name}/\n")
|
s.write(f"{prefix}{connector}{name}/\n")
|
||||||
extension = " " if is_last else "│ "
|
extension = " " if is_last else "│ "
|
||||||
_render_tree(node[name], prefix + extension)
|
_render_tree(
|
||||||
|
node[name],
|
||||||
|
prefix + extension,
|
||||||
|
in_system=in_system or name == "system",
|
||||||
|
path_parts=(*path_parts, name),
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
s.write(f"{prefix}{connector}{name}.md\n")
|
# For files outside system/, append the block description
|
||||||
|
desc_suffix = ""
|
||||||
|
if not in_system:
|
||||||
|
val = node[name]
|
||||||
|
block = val[LEAF_KEY] if isinstance(val, dict) else val
|
||||||
|
desc = getattr(block, "description", None)
|
||||||
|
if desc:
|
||||||
|
desc_line = desc.strip().split("\n")[0].strip()
|
||||||
|
if desc_line:
|
||||||
|
desc_suffix = f" ({desc_line})"
|
||||||
|
s.write(f"{prefix}{connector}{name}.md{desc_suffix}\n")
|
||||||
|
|
||||||
_render_tree(tree)
|
_render_tree(tree)
|
||||||
s.write("</memory_filesystem>")
|
s.write("</memory_filesystem>")
|
||||||
|
|||||||
@@ -282,10 +282,10 @@ class AnthropicModelSettings(ModelSettings):
|
|||||||
description="Soft control for how verbose model output should be, used for GPT-5 models.",
|
description="Soft control for how verbose model output should be, used for GPT-5 models.",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Opus 4.5 effort parameter
|
# Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6
|
||||||
effort: Optional[Literal["low", "medium", "high"]] = Field(
|
effort: Optional[Literal["low", "medium", "high", "max"]] = Field(
|
||||||
None,
|
None,
|
||||||
description="Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'.",
|
description="Effort level for supported Anthropic models (controls token spending). 'max' is only available on Opus 4.6. Not setting this gives similar performance to 'high'.",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Anthropic supports strict mode for tool calling - defaults to False
|
# Anthropic supports strict mode for tool calling - defaults to False
|
||||||
|
|||||||
@@ -3,13 +3,21 @@ from __future__ import annotations
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
from pydantic import Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from letta.helpers.datetime_helpers import get_utc_time
|
from letta.helpers.datetime_helpers import get_utc_time
|
||||||
from letta.schemas.enums import PrimitiveType
|
from letta.schemas.enums import PrimitiveType
|
||||||
from letta.schemas.letta_base import OrmMetadataBase
|
from letta.schemas.letta_base import OrmMetadataBase
|
||||||
|
|
||||||
|
|
||||||
|
class BillingContext(BaseModel):
|
||||||
|
"""Billing context for LLM request cost tracking."""
|
||||||
|
|
||||||
|
plan_type: Optional[str] = Field(None, description="Subscription tier")
|
||||||
|
cost_source: Optional[str] = Field(None, description="Cost source: 'quota' or 'credits'")
|
||||||
|
customer_id: Optional[str] = Field(None, description="Customer ID for billing records")
|
||||||
|
|
||||||
|
|
||||||
class BaseProviderTrace(OrmMetadataBase):
|
class BaseProviderTrace(OrmMetadataBase):
|
||||||
__id_prefix__ = PrimitiveType.PROVIDER_TRACE.value
|
__id_prefix__ = PrimitiveType.PROVIDER_TRACE.value
|
||||||
|
|
||||||
@@ -53,6 +61,8 @@ class ProviderTrace(BaseProviderTrace):
|
|||||||
compaction_settings: Optional[Dict[str, Any]] = Field(None, description="Compaction/summarization settings (summarization calls only)")
|
compaction_settings: Optional[Dict[str, Any]] = Field(None, description="Compaction/summarization settings (summarization calls only)")
|
||||||
llm_config: Optional[Dict[str, Any]] = Field(None, description="LLM configuration used for this call (non-summarization calls only)")
|
llm_config: Optional[Dict[str, Any]] = Field(None, description="LLM configuration used for this call (non-summarization calls only)")
|
||||||
|
|
||||||
|
billing_context: Optional[BillingContext] = Field(None, description="Billing context from request headers")
|
||||||
|
|
||||||
created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.")
|
created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from letta.schemas.providers.base import Provider
|
|||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
|
ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
|
||||||
DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro", "chat"}
|
DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro"}
|
||||||
DEFAULT_EMBEDDING_BATCH_SIZE = 1024
|
DEFAULT_EMBEDDING_BATCH_SIZE = 1024
|
||||||
|
|
||||||
|
|
||||||
@@ -50,10 +50,22 @@ class OpenAIProvider(Provider):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
|
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _openai_default_max_output_tokens(model_name: str) -> int:
|
||||||
|
"""Return a sensible max-output-tokens default for OpenAI models.
|
||||||
|
|
||||||
|
gpt-5.2* / gpt-5.3* support 128k output tokens, except the
|
||||||
|
`-chat` variants which are capped at 16k.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
if re.match(r"^gpt-5\.[23]", model_name) and "-chat" not in model_name:
|
||||||
|
return 128000
|
||||||
|
return 16384
|
||||||
|
|
||||||
def get_default_max_output_tokens(self, model_name: str) -> int:
|
def get_default_max_output_tokens(self, model_name: str) -> int:
|
||||||
"""Get the default max output tokens for OpenAI models (sync fallback)."""
|
"""Get the default max output tokens for OpenAI models (sync fallback)."""
|
||||||
# Simple default for openai
|
return self._openai_default_max_output_tokens(model_name)
|
||||||
return 16384
|
|
||||||
|
|
||||||
async def get_default_max_output_tokens_async(self, model_name: str) -> int:
|
async def get_default_max_output_tokens_async(self, model_name: str) -> int:
|
||||||
"""Get the default max output tokens for OpenAI models.
|
"""Get the default max output tokens for OpenAI models.
|
||||||
@@ -67,8 +79,7 @@ class OpenAIProvider(Provider):
|
|||||||
if max_output is not None:
|
if max_output is not None:
|
||||||
return max_output
|
return max_output
|
||||||
|
|
||||||
# Simple default for openai
|
return self._openai_default_max_output_tokens(model_name)
|
||||||
return 16384
|
|
||||||
|
|
||||||
async def _get_models_async(self) -> list[dict]:
|
async def _get_models_async(self) -> list[dict]:
|
||||||
from letta.llm_api.openai import openai_get_model_list_async
|
from letta.llm_api.openai import openai_get_model_list_async
|
||||||
|
|||||||
@@ -12,12 +12,13 @@ from letta.schemas.providers.openai import OpenAIProvider
|
|||||||
|
|
||||||
# Z.ai model context windows
|
# Z.ai model context windows
|
||||||
# Reference: https://docs.z.ai/
|
# Reference: https://docs.z.ai/
|
||||||
|
# GLM-5 max context window is 200K tokens but max_output_tokens (default 16k) counts against that --> 180k
|
||||||
MODEL_CONTEXT_WINDOWS = {
|
MODEL_CONTEXT_WINDOWS = {
|
||||||
"glm-4.5": 128000,
|
"glm-4.5": 128000,
|
||||||
"glm-4.6": 200000,
|
"glm-4.6": 180000,
|
||||||
"glm-4.7": 200000,
|
"glm-4.7": 180000,
|
||||||
"glm-5": 200000,
|
"glm-5": 180000,
|
||||||
"glm-5-code": 200000,
|
"glm-5-code": 180000,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import uuid
|
|||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from typing import AsyncGenerator
|
from typing import AsyncGenerator
|
||||||
|
|
||||||
from sqlalchemy import NullPool, text
|
from sqlalchemy import NullPool
|
||||||
from sqlalchemy.ext.asyncio import (
|
from sqlalchemy.ext.asyncio import (
|
||||||
AsyncEngine,
|
AsyncEngine,
|
||||||
AsyncSession,
|
AsyncSession,
|
||||||
@@ -88,10 +88,6 @@ class DatabaseRegistry:
|
|||||||
try:
|
try:
|
||||||
async with async_session_factory() as session:
|
async with async_session_factory() as session:
|
||||||
try:
|
try:
|
||||||
result = await session.execute(text("SELECT pg_backend_pid(), current_setting('statement_timeout')"))
|
|
||||||
pid, timeout = result.one()
|
|
||||||
logger.warning(f"[stmt_timeout_debug] pid={pid} statement_timeout={timeout}")
|
|
||||||
await session.rollback()
|
|
||||||
yield session
|
yield session
|
||||||
await session.commit()
|
await session.commit()
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from pydantic import BaseModel
|
|||||||
from letta.errors import LettaInvalidArgumentError
|
from letta.errors import LettaInvalidArgumentError
|
||||||
from letta.otel.tracing import tracer
|
from letta.otel.tracing import tracer
|
||||||
from letta.schemas.enums import PrimitiveType
|
from letta.schemas.enums import PrimitiveType
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.validators import PRIMITIVE_ID_PATTERNS
|
from letta.validators import PRIMITIVE_ID_PATTERNS
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -30,18 +31,24 @@ class HeaderParams(BaseModel):
|
|||||||
letta_source: Optional[str] = None
|
letta_source: Optional[str] = None
|
||||||
sdk_version: Optional[str] = None
|
sdk_version: Optional[str] = None
|
||||||
experimental_params: Optional[ExperimentalParams] = None
|
experimental_params: Optional[ExperimentalParams] = None
|
||||||
|
billing_context: Optional[BillingContext] = None
|
||||||
|
|
||||||
|
|
||||||
def get_headers(
|
def get_headers(
|
||||||
actor_id: Optional[str] = Header(None, alias="user_id"),
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
||||||
user_agent: Optional[str] = Header(None, alias="User-Agent"),
|
user_agent: Optional[str] = Header(None, alias="User-Agent"),
|
||||||
project_id: Optional[str] = Header(None, alias="X-Project-Id"),
|
project_id: Optional[str] = Header(None, alias="X-Project-Id"),
|
||||||
letta_source: Optional[str] = Header(None, alias="X-Letta-Source"),
|
letta_source: Optional[str] = Header(None, alias="X-Letta-Source", include_in_schema=False),
|
||||||
sdk_version: Optional[str] = Header(None, alias="X-Stainless-Package-Version"),
|
sdk_version: Optional[str] = Header(None, alias="X-Stainless-Package-Version", include_in_schema=False),
|
||||||
message_async: Optional[str] = Header(None, alias="X-Experimental-Message-Async"),
|
message_async: Optional[str] = Header(None, alias="X-Experimental-Message-Async", include_in_schema=False),
|
||||||
letta_v1_agent: Optional[str] = Header(None, alias="X-Experimental-Letta-V1-Agent"),
|
letta_v1_agent: Optional[str] = Header(None, alias="X-Experimental-Letta-V1-Agent", include_in_schema=False),
|
||||||
letta_v1_agent_message_async: Optional[str] = Header(None, alias="X-Experimental-Letta-V1-Agent-Message-Async"),
|
letta_v1_agent_message_async: Optional[str] = Header(
|
||||||
modal_sandbox: Optional[str] = Header(None, alias="X-Experimental-Modal-Sandbox"),
|
None, alias="X-Experimental-Letta-V1-Agent-Message-Async", include_in_schema=False
|
||||||
|
),
|
||||||
|
modal_sandbox: Optional[str] = Header(None, alias="X-Experimental-Modal-Sandbox", include_in_schema=False),
|
||||||
|
billing_plan_type: Optional[str] = Header(None, alias="X-Billing-Plan-Type", include_in_schema=False),
|
||||||
|
billing_cost_source: Optional[str] = Header(None, alias="X-Billing-Cost-Source", include_in_schema=False),
|
||||||
|
billing_customer_id: Optional[str] = Header(None, alias="X-Billing-Customer-Id", include_in_schema=False),
|
||||||
) -> HeaderParams:
|
) -> HeaderParams:
|
||||||
"""Dependency injection function to extract common headers from requests."""
|
"""Dependency injection function to extract common headers from requests."""
|
||||||
with tracer.start_as_current_span("dependency.get_headers"):
|
with tracer.start_as_current_span("dependency.get_headers"):
|
||||||
@@ -63,6 +70,13 @@ def get_headers(
|
|||||||
letta_v1_agent_message_async=(letta_v1_agent_message_async == "true") if letta_v1_agent_message_async else None,
|
letta_v1_agent_message_async=(letta_v1_agent_message_async == "true") if letta_v1_agent_message_async else None,
|
||||||
modal_sandbox=(modal_sandbox == "true") if modal_sandbox else None,
|
modal_sandbox=(modal_sandbox == "true") if modal_sandbox else None,
|
||||||
),
|
),
|
||||||
|
billing_context=BillingContext(
|
||||||
|
plan_type=billing_plan_type,
|
||||||
|
cost_source=billing_cost_source,
|
||||||
|
customer_id=billing_customer_id,
|
||||||
|
)
|
||||||
|
if any([billing_plan_type, billing_cost_source, billing_customer_id])
|
||||||
|
else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ from letta.schemas.memory import (
|
|||||||
)
|
)
|
||||||
from letta.schemas.message import Message, MessageCreate, MessageCreateType, MessageSearchRequest, MessageSearchResult
|
from letta.schemas.message import Message, MessageCreate, MessageCreateType, MessageSearchRequest, MessageSearchResult
|
||||||
from letta.schemas.passage import Passage
|
from letta.schemas.passage import Passage
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.run import Run as PydanticRun, RunUpdate
|
from letta.schemas.run import Run as PydanticRun, RunUpdate
|
||||||
from letta.schemas.source import Source
|
from letta.schemas.source import Source
|
||||||
from letta.schemas.tool import Tool
|
from letta.schemas.tool import Tool
|
||||||
@@ -156,7 +157,7 @@ async def list_agents(
|
|||||||
order: Literal["asc", "desc"] = Query(
|
order: Literal["asc", "desc"] = Query(
|
||||||
"desc", description="Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first"
|
"desc", description="Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first"
|
||||||
),
|
),
|
||||||
order_by: Literal["created_at", "last_run_completion"] = Query("created_at", description="Field to sort by"),
|
order_by: Literal["created_at", "updated_at", "last_run_completion"] = Query("created_at", description="Field to sort by"),
|
||||||
ascending: bool = Query(
|
ascending: bool = Query(
|
||||||
False,
|
False,
|
||||||
description="Whether to sort agents oldest to newest (True) or newest to oldest (False, default)",
|
description="Whether to sort agents oldest to newest (True) or newest to oldest (False, default)",
|
||||||
@@ -1697,6 +1698,7 @@ async def send_message(
|
|||||||
actor=actor,
|
actor=actor,
|
||||||
request=request,
|
request=request,
|
||||||
run_type="send_message",
|
run_type="send_message",
|
||||||
|
billing_context=headers.billing_context,
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -1767,6 +1769,7 @@ async def send_message(
|
|||||||
include_return_message_types=request.include_return_message_types,
|
include_return_message_types=request.include_return_message_types,
|
||||||
client_tools=request.client_tools,
|
client_tools=request.client_tools,
|
||||||
include_compaction_messages=request.include_compaction_messages,
|
include_compaction_messages=request.include_compaction_messages,
|
||||||
|
billing_context=headers.billing_context,
|
||||||
)
|
)
|
||||||
run_status = result.stop_reason.stop_reason.run_status
|
run_status = result.stop_reason.stop_reason.run_status
|
||||||
return result
|
return result
|
||||||
@@ -1845,6 +1848,7 @@ async def send_message_streaming(
|
|||||||
actor=actor,
|
actor=actor,
|
||||||
request=request,
|
request=request,
|
||||||
run_type="send_message_streaming",
|
run_type="send_message_streaming",
|
||||||
|
billing_context=headers.billing_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
@@ -1868,6 +1872,13 @@ async def cancel_message(
|
|||||||
"""
|
"""
|
||||||
# TODO: WHY DOES THIS CANCEL A LIST OF RUNS?
|
# TODO: WHY DOES THIS CANCEL A LIST OF RUNS?
|
||||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||||
|
logger.info(
|
||||||
|
"[Interrupt] Cancel request received for agent=%s by actor=%s (org=%s), explicit_run_ids=%s",
|
||||||
|
agent_id,
|
||||||
|
actor.id,
|
||||||
|
actor.organization_id,
|
||||||
|
request.run_ids if request else None,
|
||||||
|
)
|
||||||
if not settings.track_agent_run:
|
if not settings.track_agent_run:
|
||||||
raise HTTPException(status_code=400, detail="Agent run tracking is disabled")
|
raise HTTPException(status_code=400, detail="Agent run tracking is disabled")
|
||||||
run_ids = request.run_ids if request else None
|
run_ids = request.run_ids if request else None
|
||||||
@@ -2036,6 +2047,7 @@ async def _process_message_background(
|
|||||||
include_return_message_types: list[MessageType] | None = None,
|
include_return_message_types: list[MessageType] | None = None,
|
||||||
override_model: str | None = None,
|
override_model: str | None = None,
|
||||||
include_compaction_messages: bool = False,
|
include_compaction_messages: bool = False,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Background task to process the message and update run status."""
|
"""Background task to process the message and update run status."""
|
||||||
request_start_timestamp_ns = get_utc_timestamp_ns()
|
request_start_timestamp_ns = get_utc_timestamp_ns()
|
||||||
@@ -2067,6 +2079,7 @@ async def _process_message_background(
|
|||||||
request_start_timestamp_ns=request_start_timestamp_ns,
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
||||||
include_return_message_types=include_return_message_types,
|
include_return_message_types=include_return_message_types,
|
||||||
include_compaction_messages=include_compaction_messages,
|
include_compaction_messages=include_compaction_messages,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
runs_manager = RunManager()
|
runs_manager = RunManager()
|
||||||
from letta.schemas.enums import RunStatus
|
from letta.schemas.enums import RunStatus
|
||||||
@@ -2235,6 +2248,7 @@ async def send_message_async(
|
|||||||
include_return_message_types=request.include_return_message_types,
|
include_return_message_types=request.include_return_message_types,
|
||||||
override_model=request.override_model,
|
override_model=request.override_model,
|
||||||
include_compaction_messages=request.include_compaction_messages,
|
include_compaction_messages=request.include_compaction_messages,
|
||||||
|
billing_context=headers.billing_context,
|
||||||
),
|
),
|
||||||
label=f"process_message_background_{run.id}",
|
label=f"process_message_background_{run.id}",
|
||||||
)
|
)
|
||||||
@@ -2419,7 +2433,11 @@ async def summarize_messages(
|
|||||||
|
|
||||||
# If mode changed from agent's original settings and prompt not explicitly set in request, then use the default prompt for the new mode
|
# If mode changed from agent's original settings and prompt not explicitly set in request, then use the default prompt for the new mode
|
||||||
# Ex: previously was sliding_window, now is all, so we need to use the default prompt for all mode
|
# Ex: previously was sliding_window, now is all, so we need to use the default prompt for all mode
|
||||||
if "mode" in changed_fields and agent.compaction_settings.mode != request.compaction_settings.mode:
|
if (
|
||||||
|
"mode" in changed_fields
|
||||||
|
and "prompt" not in changed_fields
|
||||||
|
and agent.compaction_settings.mode != request.compaction_settings.mode
|
||||||
|
):
|
||||||
from letta.services.summarizer.summarizer_config import get_default_prompt_for_mode
|
from letta.services.summarizer.summarizer_config import get_default_prompt_for_mode
|
||||||
|
|
||||||
compaction_settings.prompt = get_default_prompt_for_mode(compaction_settings.mode)
|
compaction_settings.prompt = get_default_prompt_for_mode(compaction_settings.mode)
|
||||||
@@ -2439,7 +2457,7 @@ async def summarize_messages(
|
|||||||
logger.warning(f"Summarization failed to reduce the number of messages. {num_messages_before} messages -> {num_messages_after}.")
|
logger.warning(f"Summarization failed to reduce the number of messages. {num_messages_before} messages -> {num_messages_after}.")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
detail="Summarization failed to reduce the number of messages. You may need to use a different CompactionSettings (e.g. using `all` mode).",
|
detail="Summarization failed to reduce the number of messages. You may not have enough messages to compact or need to use a different CompactionSettings (e.g. using `all` mode).",
|
||||||
)
|
)
|
||||||
await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages)
|
await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages)
|
||||||
return CompactionResponse(
|
return CompactionResponse(
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from typing import Annotated, List, Literal, Optional
|
from typing import Annotated, List, Literal, Optional
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
from fastapi import APIRouter, Body, Depends, HTTPException, Query, status
|
from fastapi import APIRouter, Body, Depends, HTTPException, Query, status
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
@@ -18,6 +19,7 @@ from letta.schemas.job import LettaRequestConfig
|
|||||||
from letta.schemas.letta_message import LettaMessageUnion
|
from letta.schemas.letta_message import LettaMessageUnion
|
||||||
from letta.schemas.letta_request import ConversationMessageRequest, LettaStreamingRequest, RetrieveStreamRequest
|
from letta.schemas.letta_request import ConversationMessageRequest, LettaStreamingRequest, RetrieveStreamRequest
|
||||||
from letta.schemas.letta_response import LettaResponse
|
from letta.schemas.letta_response import LettaResponse
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.run import Run as PydanticRun
|
from letta.schemas.run import Run as PydanticRun
|
||||||
from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
|
from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
|
||||||
from letta.server.rest_api.redis_stream_manager import redis_sse_stream_generator
|
from letta.server.rest_api.redis_stream_manager import redis_sse_stream_generator
|
||||||
@@ -32,7 +34,7 @@ from letta.services.run_manager import RunManager
|
|||||||
from letta.services.streaming_service import StreamingService
|
from letta.services.streaming_service import StreamingService
|
||||||
from letta.services.summarizer.summarizer_config import CompactionSettings
|
from letta.services.summarizer.summarizer_config import CompactionSettings
|
||||||
from letta.settings import settings
|
from letta.settings import settings
|
||||||
from letta.validators import ConversationId
|
from letta.validators import ConversationId, ConversationIdOrDefault
|
||||||
|
|
||||||
router = APIRouter(prefix="/conversations", tags=["conversations"])
|
router = APIRouter(prefix="/conversations", tags=["conversations"])
|
||||||
|
|
||||||
@@ -148,7 +150,8 @@ ConversationMessagesResponse = Annotated[
|
|||||||
operation_id="list_conversation_messages",
|
operation_id="list_conversation_messages",
|
||||||
)
|
)
|
||||||
async def list_conversation_messages(
|
async def list_conversation_messages(
|
||||||
conversation_id: ConversationId,
|
conversation_id: ConversationIdOrDefault,
|
||||||
|
agent_id: Optional[str] = Query(None, description="Agent ID for agent-direct mode with 'default' conversation"),
|
||||||
server: SyncServer = Depends(get_letta_server),
|
server: SyncServer = Depends(get_letta_server),
|
||||||
headers: HeaderParams = Depends(get_headers),
|
headers: HeaderParams = Depends(get_headers),
|
||||||
before: Optional[str] = Query(
|
before: Optional[str] = Query(
|
||||||
@@ -172,8 +175,36 @@ async def list_conversation_messages(
|
|||||||
|
|
||||||
Returns LettaMessage objects (UserMessage, AssistantMessage, etc.) for all
|
Returns LettaMessage objects (UserMessage, AssistantMessage, etc.) for all
|
||||||
messages in the conversation, with support for cursor-based pagination.
|
messages in the conversation, with support for cursor-based pagination.
|
||||||
|
|
||||||
|
**Agent-direct mode**: Pass conversation_id="default" with agent_id parameter
|
||||||
|
to list messages from the agent's default conversation.
|
||||||
|
|
||||||
|
**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.
|
||||||
"""
|
"""
|
||||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||||
|
|
||||||
|
# Agent-direct mode: conversation_id="default" + agent_id param (preferred)
|
||||||
|
# OR conversation_id="agent-*" (backwards compat, deprecated)
|
||||||
|
resolved_agent_id = None
|
||||||
|
if conversation_id == "default" and agent_id:
|
||||||
|
resolved_agent_id = agent_id
|
||||||
|
elif conversation_id.startswith("agent-"):
|
||||||
|
resolved_agent_id = conversation_id
|
||||||
|
|
||||||
|
if resolved_agent_id:
|
||||||
|
return await server.get_agent_recall_async(
|
||||||
|
agent_id=resolved_agent_id,
|
||||||
|
after=after,
|
||||||
|
before=before,
|
||||||
|
limit=limit,
|
||||||
|
group_id=group_id,
|
||||||
|
conversation_id=None, # Default conversation (no isolation)
|
||||||
|
reverse=(order == "desc"),
|
||||||
|
return_message_object=False,
|
||||||
|
include_err=include_err,
|
||||||
|
actor=actor,
|
||||||
|
)
|
||||||
|
|
||||||
return await conversation_manager.list_conversation_messages(
|
return await conversation_manager.list_conversation_messages(
|
||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
actor=actor,
|
actor=actor,
|
||||||
@@ -186,6 +217,108 @@ async def list_conversation_messages(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _send_agent_direct_message(
|
||||||
|
agent_id: str,
|
||||||
|
request: ConversationMessageRequest,
|
||||||
|
server: SyncServer,
|
||||||
|
actor,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
|
) -> StreamingResponse | LettaResponse:
|
||||||
|
"""
|
||||||
|
Handle agent-direct messaging with locking but without conversation features.
|
||||||
|
|
||||||
|
This is used when the conversation_id in the URL is actually an agent ID,
|
||||||
|
providing a unified endpoint while maintaining agent-level locking.
|
||||||
|
"""
|
||||||
|
redis_client = await get_redis_client()
|
||||||
|
|
||||||
|
# Streaming mode (default)
|
||||||
|
if request.streaming:
|
||||||
|
streaming_request = LettaStreamingRequest(
|
||||||
|
messages=request.messages,
|
||||||
|
streaming=True,
|
||||||
|
stream_tokens=request.stream_tokens,
|
||||||
|
include_pings=request.include_pings,
|
||||||
|
background=request.background,
|
||||||
|
max_steps=request.max_steps,
|
||||||
|
use_assistant_message=request.use_assistant_message,
|
||||||
|
assistant_message_tool_name=request.assistant_message_tool_name,
|
||||||
|
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
||||||
|
include_return_message_types=request.include_return_message_types,
|
||||||
|
override_model=request.override_model,
|
||||||
|
client_tools=request.client_tools,
|
||||||
|
)
|
||||||
|
streaming_service = StreamingService(server)
|
||||||
|
run, result = await streaming_service.create_agent_stream(
|
||||||
|
agent_id=agent_id,
|
||||||
|
actor=actor,
|
||||||
|
request=streaming_request,
|
||||||
|
run_type="send_message",
|
||||||
|
conversation_id=None,
|
||||||
|
should_lock=True,
|
||||||
|
billing_context=billing_context,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Non-streaming mode with locking
|
||||||
|
agent = await server.agent_manager.get_agent_by_id_async(
|
||||||
|
agent_id,
|
||||||
|
actor,
|
||||||
|
include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handle model override if specified in the request
|
||||||
|
if request.override_model:
|
||||||
|
override_llm_config = await server.get_llm_config_from_handle_async(
|
||||||
|
actor=actor,
|
||||||
|
handle=request.override_model,
|
||||||
|
)
|
||||||
|
agent = agent.model_copy(update={"llm_config": override_llm_config})
|
||||||
|
|
||||||
|
# Acquire lock using agent_id as lock key
|
||||||
|
if not isinstance(redis_client, NoopAsyncRedisClient):
|
||||||
|
await redis_client.acquire_conversation_lock(
|
||||||
|
conversation_id=agent_id,
|
||||||
|
token=str(uuid4()),
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create a run for execution tracking
|
||||||
|
run = None
|
||||||
|
if settings.track_agent_run:
|
||||||
|
runs_manager = RunManager()
|
||||||
|
run = await runs_manager.create_run(
|
||||||
|
pydantic_run=PydanticRun(
|
||||||
|
agent_id=agent_id,
|
||||||
|
background=False,
|
||||||
|
metadata={
|
||||||
|
"run_type": "send_message",
|
||||||
|
},
|
||||||
|
request_config=LettaRequestConfig.from_letta_request(request),
|
||||||
|
),
|
||||||
|
actor=actor,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set run_id in Redis for cancellation support
|
||||||
|
await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)
|
||||||
|
|
||||||
|
agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
|
||||||
|
return await agent_loop.step(
|
||||||
|
request.messages,
|
||||||
|
max_steps=request.max_steps,
|
||||||
|
run_id=run.id if run else None,
|
||||||
|
use_assistant_message=request.use_assistant_message,
|
||||||
|
include_return_message_types=request.include_return_message_types,
|
||||||
|
client_tools=request.client_tools,
|
||||||
|
conversation_id=None,
|
||||||
|
include_compaction_messages=request.include_compaction_messages,
|
||||||
|
billing_context=billing_context,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
# Release lock
|
||||||
|
await redis_client.release_conversation_lock(agent_id)
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
"/{conversation_id}/messages",
|
"/{conversation_id}/messages",
|
||||||
response_model=LettaResponse,
|
response_model=LettaResponse,
|
||||||
@@ -201,7 +334,7 @@ async def list_conversation_messages(
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
async def send_conversation_message(
|
async def send_conversation_message(
|
||||||
conversation_id: ConversationId,
|
conversation_id: ConversationIdOrDefault,
|
||||||
request: ConversationMessageRequest = Body(...),
|
request: ConversationMessageRequest = Body(...),
|
||||||
server: SyncServer = Depends(get_letta_server),
|
server: SyncServer = Depends(get_letta_server),
|
||||||
headers: HeaderParams = Depends(get_headers),
|
headers: HeaderParams = Depends(get_headers),
|
||||||
@@ -212,12 +345,36 @@ async def send_conversation_message(
|
|||||||
This endpoint sends a message to an existing conversation.
|
This endpoint sends a message to an existing conversation.
|
||||||
By default (streaming=true), returns a streaming response (Server-Sent Events).
|
By default (streaming=true), returns a streaming response (Server-Sent Events).
|
||||||
Set streaming=false to get a complete JSON response.
|
Set streaming=false to get a complete JSON response.
|
||||||
|
|
||||||
|
**Agent-direct mode**: Pass conversation_id="default" with agent_id in request body
|
||||||
|
to send messages to the agent's default conversation with locking.
|
||||||
|
|
||||||
|
**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.
|
||||||
"""
|
"""
|
||||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||||
|
|
||||||
if not request.messages or len(request.messages) == 0:
|
if not request.messages or len(request.messages) == 0:
|
||||||
raise HTTPException(status_code=422, detail="Messages must not be empty")
|
raise HTTPException(status_code=422, detail="Messages must not be empty")
|
||||||
|
|
||||||
|
# Agent-direct mode: conversation_id="default" + agent_id in body (preferred)
|
||||||
|
# OR conversation_id="agent-*" (backwards compat, deprecated)
|
||||||
|
resolved_agent_id = None
|
||||||
|
if conversation_id == "default" and request.agent_id:
|
||||||
|
resolved_agent_id = request.agent_id
|
||||||
|
elif conversation_id.startswith("agent-"):
|
||||||
|
resolved_agent_id = conversation_id
|
||||||
|
|
||||||
|
if resolved_agent_id:
|
||||||
|
# Agent-direct mode: use agent ID, enable locking, skip conversation features
|
||||||
|
return await _send_agent_direct_message(
|
||||||
|
agent_id=resolved_agent_id,
|
||||||
|
request=request,
|
||||||
|
server=server,
|
||||||
|
actor=actor,
|
||||||
|
billing_context=headers.billing_context,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Normal conversation mode
|
||||||
conversation = await conversation_manager.get_conversation_by_id(
|
conversation = await conversation_manager.get_conversation_by_id(
|
||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
actor=actor,
|
actor=actor,
|
||||||
@@ -247,6 +404,7 @@ async def send_conversation_message(
|
|||||||
request=streaming_request,
|
request=streaming_request,
|
||||||
run_type="send_conversation_message",
|
run_type="send_conversation_message",
|
||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
|
billing_context=headers.billing_context,
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -265,6 +423,10 @@ async def send_conversation_message(
|
|||||||
)
|
)
|
||||||
if conversation.model_settings is not None:
|
if conversation.model_settings is not None:
|
||||||
update_params = conversation.model_settings._to_legacy_config_params()
|
update_params = conversation.model_settings._to_legacy_config_params()
|
||||||
|
# Don't clobber max_tokens with the Pydantic default when the caller
|
||||||
|
# didn't explicitly provide max_output_tokens.
|
||||||
|
if "max_output_tokens" not in conversation.model_settings.model_fields_set:
|
||||||
|
update_params.pop("max_tokens", None)
|
||||||
conversation_llm_config = conversation_llm_config.model_copy(update=update_params)
|
conversation_llm_config = conversation_llm_config.model_copy(update=update_params)
|
||||||
agent = agent.model_copy(update={"llm_config": conversation_llm_config})
|
agent = agent.model_copy(update={"llm_config": conversation_llm_config})
|
||||||
|
|
||||||
@@ -305,6 +467,7 @@ async def send_conversation_message(
|
|||||||
client_tools=request.client_tools,
|
client_tools=request.client_tools,
|
||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
include_compaction_messages=request.include_compaction_messages,
|
include_compaction_messages=request.include_compaction_messages,
|
||||||
|
billing_context=headers.billing_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -341,7 +504,7 @@ async def send_conversation_message(
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
async def retrieve_conversation_stream(
|
async def retrieve_conversation_stream(
|
||||||
conversation_id: ConversationId,
|
conversation_id: ConversationIdOrDefault,
|
||||||
request: RetrieveStreamRequest = Body(None),
|
request: RetrieveStreamRequest = Body(None),
|
||||||
headers: HeaderParams = Depends(get_headers),
|
headers: HeaderParams = Depends(get_headers),
|
||||||
server: SyncServer = Depends(get_letta_server),
|
server: SyncServer = Depends(get_letta_server),
|
||||||
@@ -351,18 +514,42 @@ async def retrieve_conversation_stream(
|
|||||||
|
|
||||||
This endpoint allows you to reconnect to an active background stream
|
This endpoint allows you to reconnect to an active background stream
|
||||||
for a conversation, enabling recovery from network interruptions.
|
for a conversation, enabling recovery from network interruptions.
|
||||||
|
|
||||||
|
**Agent-direct mode**: Pass conversation_id="default" with agent_id in request body
|
||||||
|
to retrieve the stream for the agent's most recent active run.
|
||||||
|
|
||||||
|
**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.
|
||||||
"""
|
"""
|
||||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||||
runs_manager = RunManager()
|
runs_manager = RunManager()
|
||||||
|
|
||||||
# Find the most recent active run for this conversation
|
# Agent-direct mode: conversation_id="default" + agent_id in body (preferred)
|
||||||
active_runs = await runs_manager.list_runs(
|
# OR conversation_id="agent-*" (backwards compat, deprecated)
|
||||||
actor=actor,
|
resolved_agent_id = None
|
||||||
conversation_id=conversation_id,
|
if conversation_id == "default" and request and request.agent_id:
|
||||||
statuses=[RunStatus.created, RunStatus.running],
|
resolved_agent_id = request.agent_id
|
||||||
limit=1,
|
elif conversation_id.startswith("agent-"):
|
||||||
ascending=False,
|
resolved_agent_id = conversation_id
|
||||||
)
|
|
||||||
|
# Find the most recent active run
|
||||||
|
if resolved_agent_id:
|
||||||
|
# Agent-direct mode: find runs by agent_id
|
||||||
|
active_runs = await runs_manager.list_runs(
|
||||||
|
actor=actor,
|
||||||
|
agent_id=resolved_agent_id,
|
||||||
|
statuses=[RunStatus.created, RunStatus.running],
|
||||||
|
limit=1,
|
||||||
|
ascending=False,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Normal mode: find runs by conversation_id
|
||||||
|
active_runs = await runs_manager.list_runs(
|
||||||
|
actor=actor,
|
||||||
|
conversation_id=conversation_id,
|
||||||
|
statuses=[RunStatus.created, RunStatus.running],
|
||||||
|
limit=1,
|
||||||
|
ascending=False,
|
||||||
|
)
|
||||||
|
|
||||||
if not active_runs:
|
if not active_runs:
|
||||||
raise LettaInvalidArgumentError("No active runs found for this conversation.")
|
raise LettaInvalidArgumentError("No active runs found for this conversation.")
|
||||||
@@ -417,7 +604,8 @@ async def retrieve_conversation_stream(
|
|||||||
|
|
||||||
@router.post("/{conversation_id}/cancel", operation_id="cancel_conversation")
|
@router.post("/{conversation_id}/cancel", operation_id="cancel_conversation")
|
||||||
async def cancel_conversation(
|
async def cancel_conversation(
|
||||||
conversation_id: ConversationId,
|
conversation_id: ConversationIdOrDefault,
|
||||||
|
agent_id: Optional[str] = Query(None, description="Agent ID for agent-direct mode with 'default' conversation"),
|
||||||
server: SyncServer = Depends(get_letta_server),
|
server: SyncServer = Depends(get_letta_server),
|
||||||
headers: HeaderParams = Depends(get_headers),
|
headers: HeaderParams = Depends(get_headers),
|
||||||
) -> dict:
|
) -> dict:
|
||||||
@@ -425,26 +613,58 @@ async def cancel_conversation(
|
|||||||
Cancel runs associated with a conversation.
|
Cancel runs associated with a conversation.
|
||||||
|
|
||||||
Note: To cancel active runs, Redis is required.
|
Note: To cancel active runs, Redis is required.
|
||||||
|
|
||||||
|
**Agent-direct mode**: Pass conversation_id="default" with agent_id query parameter
|
||||||
|
to cancel runs for the agent's default conversation.
|
||||||
|
|
||||||
|
**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.
|
||||||
"""
|
"""
|
||||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||||
|
logger.info(
|
||||||
|
"[Interrupt] Cancel request received for conversation=%s by actor=%s (org=%s)",
|
||||||
|
conversation_id,
|
||||||
|
actor.id,
|
||||||
|
actor.organization_id,
|
||||||
|
)
|
||||||
|
|
||||||
if not settings.track_agent_run:
|
if not settings.track_agent_run:
|
||||||
raise HTTPException(status_code=400, detail="Agent run tracking is disabled")
|
raise HTTPException(status_code=400, detail="Agent run tracking is disabled")
|
||||||
|
|
||||||
# Verify conversation exists and get agent_id
|
# Agent-direct mode: conversation_id="default" + agent_id param (preferred)
|
||||||
conversation = await conversation_manager.get_conversation_by_id(
|
# OR conversation_id="agent-*" (backwards compat, deprecated)
|
||||||
conversation_id=conversation_id,
|
resolved_agent_id = None
|
||||||
actor=actor,
|
if conversation_id == "default" and agent_id:
|
||||||
)
|
resolved_agent_id = agent_id
|
||||||
|
elif conversation_id.startswith("agent-"):
|
||||||
|
resolved_agent_id = conversation_id
|
||||||
|
|
||||||
|
if resolved_agent_id:
|
||||||
|
# Agent-direct mode: use agent_id directly, skip conversation lookup
|
||||||
|
# Find active runs for this agent (default conversation has conversation_id=None)
|
||||||
|
runs = await server.run_manager.list_runs(
|
||||||
|
actor=actor,
|
||||||
|
agent_id=resolved_agent_id,
|
||||||
|
statuses=[RunStatus.created, RunStatus.running],
|
||||||
|
ascending=False,
|
||||||
|
limit=100,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Verify conversation exists and get agent_id
|
||||||
|
conversation = await conversation_manager.get_conversation_by_id(
|
||||||
|
conversation_id=conversation_id,
|
||||||
|
actor=actor,
|
||||||
|
)
|
||||||
|
agent_id = conversation.agent_id
|
||||||
|
|
||||||
|
# Find active runs for this conversation
|
||||||
|
runs = await server.run_manager.list_runs(
|
||||||
|
actor=actor,
|
||||||
|
statuses=[RunStatus.created, RunStatus.running],
|
||||||
|
ascending=False,
|
||||||
|
conversation_id=conversation_id,
|
||||||
|
limit=100,
|
||||||
|
)
|
||||||
|
|
||||||
# Find active runs for this conversation
|
|
||||||
runs = await server.run_manager.list_runs(
|
|
||||||
actor=actor,
|
|
||||||
statuses=[RunStatus.created, RunStatus.running],
|
|
||||||
ascending=False,
|
|
||||||
conversation_id=conversation_id,
|
|
||||||
limit=100,
|
|
||||||
)
|
|
||||||
run_ids = [run.id for run in runs]
|
run_ids = [run.id for run in runs]
|
||||||
|
|
||||||
if not run_ids:
|
if not run_ids:
|
||||||
@@ -461,7 +681,7 @@ async def cancel_conversation(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to cancel Lettuce run {run_id}: {e}")
|
logger.error(f"Failed to cancel Lettuce run {run_id}: {e}")
|
||||||
|
|
||||||
await server.run_manager.cancel_run(actor=actor, agent_id=conversation.agent_id, run_id=run_id)
|
await server.run_manager.cancel_run(actor=actor, agent_id=agent_id, run_id=run_id)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
results[run_id] = "failed"
|
results[run_id] = "failed"
|
||||||
logger.error(f"Failed to cancel run {run_id}: {str(e)}")
|
logger.error(f"Failed to cancel run {run_id}: {str(e)}")
|
||||||
@@ -473,6 +693,10 @@ async def cancel_conversation(
|
|||||||
|
|
||||||
|
|
||||||
class CompactionRequest(BaseModel):
|
class CompactionRequest(BaseModel):
|
||||||
|
agent_id: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Agent ID for agent-direct mode with 'default' conversation. Use with conversation_id='default' in the URL path.",
|
||||||
|
)
|
||||||
compaction_settings: Optional[CompactionSettings] = Field(
|
compaction_settings: Optional[CompactionSettings] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used.",
|
description="Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used.",
|
||||||
@@ -487,7 +711,7 @@ class CompactionResponse(BaseModel):
|
|||||||
|
|
||||||
@router.post("/{conversation_id}/compact", response_model=CompactionResponse, operation_id="compact_conversation")
|
@router.post("/{conversation_id}/compact", response_model=CompactionResponse, operation_id="compact_conversation")
|
||||||
async def compact_conversation(
|
async def compact_conversation(
|
||||||
conversation_id: ConversationId,
|
conversation_id: ConversationIdOrDefault,
|
||||||
request: Optional[CompactionRequest] = Body(default=None),
|
request: Optional[CompactionRequest] = Body(default=None),
|
||||||
server: SyncServer = Depends(get_letta_server),
|
server: SyncServer = Depends(get_letta_server),
|
||||||
headers: HeaderParams = Depends(get_headers),
|
headers: HeaderParams = Depends(get_headers),
|
||||||
@@ -497,23 +721,45 @@ async def compact_conversation(
|
|||||||
|
|
||||||
This endpoint summarizes the in-context messages for a specific conversation,
|
This endpoint summarizes the in-context messages for a specific conversation,
|
||||||
reducing the message count while preserving important context.
|
reducing the message count while preserving important context.
|
||||||
|
|
||||||
|
**Agent-direct mode**: Pass conversation_id="default" with agent_id in request body
|
||||||
|
to compact the agent's default conversation messages.
|
||||||
|
|
||||||
|
**Deprecated**: Passing an agent ID as conversation_id still works but will be removed.
|
||||||
"""
|
"""
|
||||||
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||||
|
|
||||||
# Get the conversation to find the agent_id
|
# Agent-direct mode: conversation_id="default" + agent_id in body (preferred)
|
||||||
conversation = await conversation_manager.get_conversation_by_id(
|
# OR conversation_id="agent-*" (backwards compat, deprecated)
|
||||||
conversation_id=conversation_id,
|
resolved_agent_id = None
|
||||||
actor=actor,
|
if conversation_id == "default" and request and request.agent_id:
|
||||||
)
|
resolved_agent_id = request.agent_id
|
||||||
|
elif conversation_id.startswith("agent-"):
|
||||||
|
resolved_agent_id = conversation_id
|
||||||
|
|
||||||
# Get the agent state
|
if resolved_agent_id:
|
||||||
agent = await server.agent_manager.get_agent_by_id_async(conversation.agent_id, actor, include_relationships=["multi_agent_group"])
|
# Agent-direct mode: compact agent's default conversation
|
||||||
|
agent = await server.agent_manager.get_agent_by_id_async(resolved_agent_id, actor, include_relationships=["multi_agent_group"])
|
||||||
|
in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
|
||||||
|
agent_loop = LettaAgentV3(agent_state=agent, actor=actor)
|
||||||
|
else:
|
||||||
|
# Get the conversation to find the agent_id
|
||||||
|
conversation = await conversation_manager.get_conversation_by_id(
|
||||||
|
conversation_id=conversation_id,
|
||||||
|
actor=actor,
|
||||||
|
)
|
||||||
|
|
||||||
# Get in-context messages for this conversation
|
# Get the agent state
|
||||||
in_context_messages = await conversation_manager.get_messages_for_conversation(
|
agent = await server.agent_manager.get_agent_by_id_async(conversation.agent_id, actor, include_relationships=["multi_agent_group"])
|
||||||
conversation_id=conversation_id,
|
|
||||||
actor=actor,
|
# Get in-context messages for this conversation
|
||||||
)
|
in_context_messages = await conversation_manager.get_messages_for_conversation(
|
||||||
|
conversation_id=conversation_id,
|
||||||
|
actor=actor,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create agent loop with conversation context
|
||||||
|
agent_loop = LettaAgentV3(agent_state=agent, actor=actor, conversation_id=conversation_id)
|
||||||
|
|
||||||
if not in_context_messages:
|
if not in_context_messages:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
@@ -521,10 +767,27 @@ async def compact_conversation(
|
|||||||
detail="No in-context messages found for this conversation.",
|
detail="No in-context messages found for this conversation.",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create agent loop with conversation context
|
# Merge request compaction_settings with agent's settings (request overrides agent)
|
||||||
agent_loop = LettaAgentV3(agent_state=agent, actor=actor, conversation_id=conversation_id)
|
if agent.compaction_settings and request and request.compaction_settings:
|
||||||
|
# Start with agent's settings, override with new values from request
|
||||||
|
# Use model_fields_set to get the fields that were changed in the request (want to ignore the defaults that get set automatically)
|
||||||
|
compaction_settings = agent.compaction_settings.copy() # do not mutate original agent compaction settings
|
||||||
|
changed_fields = request.compaction_settings.model_fields_set
|
||||||
|
for field in changed_fields:
|
||||||
|
setattr(compaction_settings, field, getattr(request.compaction_settings, field))
|
||||||
|
|
||||||
compaction_settings = request.compaction_settings if request else None
|
# If mode changed from agent's original settings and prompt not explicitly set in request, then use the default prompt for the new mode
|
||||||
|
# Ex: previously was sliding_window, now is all, so we need to use the default prompt for all mode
|
||||||
|
if (
|
||||||
|
"mode" in changed_fields
|
||||||
|
and "prompt" not in changed_fields
|
||||||
|
and agent.compaction_settings.mode != request.compaction_settings.mode
|
||||||
|
):
|
||||||
|
from letta.services.summarizer.summarizer_config import get_default_prompt_for_mode
|
||||||
|
|
||||||
|
compaction_settings.prompt = get_default_prompt_for_mode(compaction_settings.mode)
|
||||||
|
else:
|
||||||
|
compaction_settings = (request and request.compaction_settings) or agent.compaction_settings
|
||||||
num_messages_before = len(in_context_messages)
|
num_messages_before = len(in_context_messages)
|
||||||
|
|
||||||
# Run compaction
|
# Run compaction
|
||||||
@@ -537,13 +800,11 @@ async def compact_conversation(
|
|||||||
|
|
||||||
# Validate compaction reduced messages
|
# Validate compaction reduced messages
|
||||||
if num_messages_before <= num_messages_after:
|
if num_messages_before <= num_messages_after:
|
||||||
logger.warning(
|
logger.warning(f"Summarization failed to reduce the number of messages. {num_messages_before} messages -> {num_messages_after}.")
|
||||||
f"Summarization failed to reduce the number of messages. {num_messages_before} messages -> {num_messages_after} (only expected if drop_tool_returns is True)."
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail="Summarization failed to reduce the number of messages. You may not have enough messages to compact or need to use a different CompactionSettings (e.g. using `all` mode).",
|
||||||
)
|
)
|
||||||
# raise HTTPException(
|
|
||||||
# status_code=status.HTTP_400_BAD_REQUEST,
|
|
||||||
# detail="Summarization failed to reduce the number of messages. You may need to use a different CompactionSettings (e.g. using `all` mode).",
|
|
||||||
# )
|
|
||||||
|
|
||||||
# Checkpoint the messages (this will update the conversation_messages table)
|
# Checkpoint the messages (this will update the conversation_messages table)
|
||||||
await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages)
|
await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages)
|
||||||
|
|||||||
@@ -29,11 +29,23 @@ from starlette.background import BackgroundTask
|
|||||||
|
|
||||||
from letta.log import get_logger
|
from letta.log import get_logger
|
||||||
from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
|
from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
|
||||||
|
from letta.services.memory_repo.path_mapping import memory_block_label_from_markdown_path
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
_background_tasks: set[asyncio.Task] = set()
|
_background_tasks: set[asyncio.Task] = set()
|
||||||
|
|
||||||
|
|
||||||
|
def _is_syncable_block_markdown_path(path: str) -> bool:
|
||||||
|
"""Return whether a markdown path should be mirrored into block cache.
|
||||||
|
|
||||||
|
Special-case skills so only skill definitions are mirrored:
|
||||||
|
- sync `skills/{skill_name}/SKILL.md` as label `skills/{skill_name}`
|
||||||
|
- ignore all other markdown under `skills/`
|
||||||
|
"""
|
||||||
|
return memory_block_label_from_markdown_path(path) is not None
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/git", tags=["git"], include_in_schema=False)
|
router = APIRouter(prefix="/git", tags=["git"], include_in_schema=False)
|
||||||
|
|
||||||
# Global storage for the server instance (set during app startup)
|
# Global storage for the server instance (set during app startup)
|
||||||
@@ -100,7 +112,7 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None:
|
|||||||
expected_labels = set()
|
expected_labels = set()
|
||||||
from letta.services.memory_repo.block_markdown import parse_block_markdown
|
from letta.services.memory_repo.block_markdown import parse_block_markdown
|
||||||
|
|
||||||
md_file_paths = sorted([file_path for file_path in files if file_path.endswith(".md")])
|
md_file_paths = sorted([file_path for file_path in files if _is_syncable_block_markdown_path(file_path)])
|
||||||
nested_md_file_paths = [file_path for file_path in md_file_paths if "/" in file_path[:-3]]
|
nested_md_file_paths = [file_path for file_path in md_file_paths if "/" in file_path[:-3]]
|
||||||
logger.info(
|
logger.info(
|
||||||
"Post-push sync file scan: agent=%s total_files=%d md_files=%d nested_md_files=%d sample_md_paths=%s",
|
"Post-push sync file scan: agent=%s total_files=%d md_files=%d nested_md_files=%d sample_md_paths=%s",
|
||||||
@@ -113,10 +125,12 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None:
|
|||||||
|
|
||||||
synced = 0
|
synced = 0
|
||||||
for file_path, content in files.items():
|
for file_path, content in files.items():
|
||||||
if not file_path.endswith(".md"):
|
if not _is_syncable_block_markdown_path(file_path):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
label = file_path[:-3]
|
label = memory_block_label_from_markdown_path(file_path)
|
||||||
|
if label is None:
|
||||||
|
continue
|
||||||
expected_labels.add(label)
|
expected_labels.add(label)
|
||||||
|
|
||||||
# Parse frontmatter to extract metadata alongside value
|
# Parse frontmatter to extract metadata alongside value
|
||||||
|
|||||||
@@ -364,6 +364,8 @@ def create_approval_request_message_from_llm_response(
|
|||||||
)
|
)
|
||||||
if pre_computed_assistant_message_id:
|
if pre_computed_assistant_message_id:
|
||||||
approval_message.id = decrement_message_uuid(pre_computed_assistant_message_id)
|
approval_message.id = decrement_message_uuid(pre_computed_assistant_message_id)
|
||||||
|
# Set otid to match streaming interface pattern (index -1 returns id unchanged)
|
||||||
|
approval_message.otid = Message.generate_otid_from_id(approval_message.id, -1)
|
||||||
messages.append(approval_message)
|
messages.append(approval_message)
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|||||||
@@ -562,6 +562,10 @@ class SyncServer(object):
|
|||||||
# update with model_settings
|
# update with model_settings
|
||||||
if request.model_settings is not None:
|
if request.model_settings is not None:
|
||||||
update_llm_config_params = request.model_settings._to_legacy_config_params()
|
update_llm_config_params = request.model_settings._to_legacy_config_params()
|
||||||
|
# Don't clobber max_tokens with the Pydantic default when the caller
|
||||||
|
# didn't explicitly provide max_output_tokens in the request.
|
||||||
|
if "max_output_tokens" not in request.model_settings.model_fields_set:
|
||||||
|
update_llm_config_params.pop("max_tokens", None)
|
||||||
request.llm_config = request.llm_config.model_copy(update=update_llm_config_params)
|
request.llm_config = request.llm_config.model_copy(update=update_llm_config_params)
|
||||||
|
|
||||||
# Copy parallel_tool_calls from request to llm_config if provided
|
# Copy parallel_tool_calls from request to llm_config if provided
|
||||||
@@ -675,6 +679,12 @@ class SyncServer(object):
|
|||||||
# Get the current agent's llm_config if not already set
|
# Get the current agent's llm_config if not already set
|
||||||
agent = await self.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
|
agent = await self.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
|
||||||
request.llm_config = agent.llm_config.model_copy()
|
request.llm_config = agent.llm_config.model_copy()
|
||||||
|
else:
|
||||||
|
# TODO: Refactor update_agent to accept partial llm_config so we
|
||||||
|
# don't need to fetch the full agent just to preserve max_tokens.
|
||||||
|
if request.max_tokens is None and "max_output_tokens" not in request.model_settings.model_fields_set:
|
||||||
|
agent = await self.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
|
||||||
|
request.llm_config.max_tokens = agent.llm_config.max_tokens
|
||||||
update_llm_config_params = request.model_settings._to_legacy_config_params()
|
update_llm_config_params = request.model_settings._to_legacy_config_params()
|
||||||
# Don't clobber max_tokens with the Pydantic default when the caller
|
# Don't clobber max_tokens with the Pydantic default when the caller
|
||||||
# didn't explicitly provide max_output_tokens in the request.
|
# didn't explicitly provide max_output_tokens in the request.
|
||||||
|
|||||||
@@ -24,8 +24,7 @@ from letta.constants import (
|
|||||||
INCLUDE_MODEL_KEYWORDS_BASE_TOOL_RULES,
|
INCLUDE_MODEL_KEYWORDS_BASE_TOOL_RULES,
|
||||||
RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE,
|
RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE,
|
||||||
)
|
)
|
||||||
|
from letta.errors import LettaError
|
||||||
from letta.errors import LettaAgentNotFoundError, LettaError, LettaInvalidArgumentError
|
|
||||||
from letta.helpers import ToolRulesSolver
|
from letta.helpers import ToolRulesSolver
|
||||||
from letta.helpers.datetime_helpers import get_utc_time
|
from letta.helpers.datetime_helpers import get_utc_time
|
||||||
from letta.log import get_logger
|
from letta.log import get_logger
|
||||||
@@ -789,6 +788,25 @@ class AgentManager:
|
|||||||
agent.agent_type,
|
agent.agent_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Upsert compaction_settings: merge incoming partial update with existing settings
|
||||||
|
if agent_update.compaction_settings is not None:
|
||||||
|
# If mode changed, update the prompt to the default for the new mode
|
||||||
|
changed_fields = agent_update.compaction_settings.model_fields_set
|
||||||
|
if (
|
||||||
|
agent.compaction_settings is not None
|
||||||
|
and "mode" in changed_fields
|
||||||
|
and agent_update.compaction_settings.mode != agent.compaction_settings.mode
|
||||||
|
):
|
||||||
|
from letta.services.summarizer.summarizer_config import get_default_prompt_for_mode
|
||||||
|
|
||||||
|
agent_update.compaction_settings.prompt = get_default_prompt_for_mode(agent_update.compaction_settings.mode)
|
||||||
|
|
||||||
|
# Fill in unchanged fields from existing settings
|
||||||
|
if agent.compaction_settings is not None:
|
||||||
|
for field in agent.compaction_settings.model_fields:
|
||||||
|
if field not in changed_fields:
|
||||||
|
setattr(agent_update.compaction_settings, field, getattr(agent.compaction_settings, field))
|
||||||
|
|
||||||
scalar_updates = {
|
scalar_updates = {
|
||||||
"name": agent_update.name,
|
"name": agent_update.name,
|
||||||
"system": agent_update.system,
|
"system": agent_update.system,
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ if TYPE_CHECKING:
|
|||||||
from sqlalchemy import and_, asc, delete, desc, func, nulls_last, or_, select
|
from sqlalchemy import and_, asc, delete, desc, func, nulls_last, or_, select
|
||||||
|
|
||||||
from letta.errors import LettaInvalidArgumentError
|
from letta.errors import LettaInvalidArgumentError
|
||||||
|
from letta.helpers.datetime_helpers import get_utc_time
|
||||||
from letta.orm.agent import Agent as AgentModel
|
from letta.orm.agent import Agent as AgentModel
|
||||||
from letta.orm.block import Block as BlockModel
|
from letta.orm.block import Block as BlockModel
|
||||||
from letta.orm.blocks_conversations import BlocksConversations
|
from letta.orm.blocks_conversations import BlocksConversations
|
||||||
@@ -29,6 +30,21 @@ from letta.utils import enforce_types
|
|||||||
class ConversationManager:
|
class ConversationManager:
|
||||||
"""Manager class to handle business logic related to Conversations."""
|
"""Manager class to handle business logic related to Conversations."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _serialize_model_settings(model_settings) -> Optional[dict]:
|
||||||
|
"""Serialize model settings for DB storage, stripping max_output_tokens if not explicitly set.
|
||||||
|
|
||||||
|
Uses model_dump() to preserve all fields (including the provider_type discriminator),
|
||||||
|
but removes max_output_tokens when it wasn't explicitly provided by the caller so we
|
||||||
|
don't persist the Pydantic default (4096) and later overwrite the agent's own value.
|
||||||
|
"""
|
||||||
|
if model_settings is None:
|
||||||
|
return None
|
||||||
|
data = model_settings.model_dump()
|
||||||
|
if "max_output_tokens" not in model_settings.model_fields_set:
|
||||||
|
data.pop("max_output_tokens", None)
|
||||||
|
return data
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
@trace_method
|
@trace_method
|
||||||
async def create_conversation(
|
async def create_conversation(
|
||||||
@@ -56,7 +72,7 @@ class ConversationManager:
|
|||||||
summary=conversation_create.summary,
|
summary=conversation_create.summary,
|
||||||
organization_id=actor.organization_id,
|
organization_id=actor.organization_id,
|
||||||
model=conversation_create.model,
|
model=conversation_create.model,
|
||||||
model_settings=conversation_create.model_settings.model_dump() if conversation_create.model_settings else None,
|
model_settings=self._serialize_model_settings(conversation_create.model_settings),
|
||||||
)
|
)
|
||||||
await conversation.create_async(session, actor=actor)
|
await conversation.create_async(session, actor=actor)
|
||||||
|
|
||||||
@@ -73,7 +89,101 @@ class ConversationManager:
|
|||||||
|
|
||||||
pydantic_conversation = conversation.to_pydantic()
|
pydantic_conversation = conversation.to_pydantic()
|
||||||
pydantic_conversation.isolated_block_ids = isolated_block_ids
|
pydantic_conversation.isolated_block_ids = isolated_block_ids
|
||||||
return pydantic_conversation
|
|
||||||
|
# Compile and persist the initial system message for this conversation
|
||||||
|
# This ensures the conversation captures the latest memory block state at creation time
|
||||||
|
await self.compile_and_save_system_message_for_conversation(
|
||||||
|
conversation_id=pydantic_conversation.id,
|
||||||
|
agent_id=agent_id,
|
||||||
|
actor=actor,
|
||||||
|
)
|
||||||
|
|
||||||
|
return pydantic_conversation
|
||||||
|
|
||||||
|
@trace_method
|
||||||
|
async def compile_and_save_system_message_for_conversation(
|
||||||
|
self,
|
||||||
|
conversation_id: str,
|
||||||
|
agent_id: str,
|
||||||
|
actor: PydanticUser,
|
||||||
|
agent_state: Optional["AgentState"] = None,
|
||||||
|
message_manager: Optional[object] = None,
|
||||||
|
) -> PydanticMessage:
|
||||||
|
"""Compile and persist the initial system message for a conversation.
|
||||||
|
|
||||||
|
This recompiles the system prompt with the latest memory block values
|
||||||
|
and metadata, ensuring the conversation starts with an up-to-date
|
||||||
|
system message.
|
||||||
|
|
||||||
|
This is the single source of truth for creating a conversation's system
|
||||||
|
message — used both at conversation creation time and as a fallback
|
||||||
|
when a conversation has no messages yet.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
conversation_id: The conversation to add the system message to
|
||||||
|
agent_id: The agent this conversation belongs to
|
||||||
|
actor: The user performing the action
|
||||||
|
agent_state: Optional pre-loaded agent state (avoids redundant DB load)
|
||||||
|
message_manager: Optional pre-loaded MessageManager instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The persisted system message
|
||||||
|
"""
|
||||||
|
# Lazy imports to avoid circular dependencies
|
||||||
|
from letta.prompts.prompt_generator import PromptGenerator
|
||||||
|
from letta.services.message_manager import MessageManager
|
||||||
|
from letta.services.passage_manager import PassageManager
|
||||||
|
|
||||||
|
if message_manager is None:
|
||||||
|
message_manager = MessageManager()
|
||||||
|
|
||||||
|
if agent_state is None:
|
||||||
|
from letta.services.agent_manager import AgentManager
|
||||||
|
|
||||||
|
agent_state = await AgentManager().get_agent_by_id_async(
|
||||||
|
agent_id=agent_id,
|
||||||
|
include_relationships=["memory", "sources"],
|
||||||
|
actor=actor,
|
||||||
|
)
|
||||||
|
|
||||||
|
passage_manager = PassageManager()
|
||||||
|
num_messages = await message_manager.size_async(actor=actor, agent_id=agent_id)
|
||||||
|
num_archival_memories = await passage_manager.agent_passage_size_async(actor=actor, agent_id=agent_id)
|
||||||
|
|
||||||
|
# Compile the system message with current memory state
|
||||||
|
system_message_str = await PromptGenerator.compile_system_message_async(
|
||||||
|
system_prompt=agent_state.system,
|
||||||
|
in_context_memory=agent_state.memory,
|
||||||
|
in_context_memory_last_edit=get_utc_time(),
|
||||||
|
timezone=agent_state.timezone,
|
||||||
|
user_defined_variables=None,
|
||||||
|
append_icm_if_missing=True,
|
||||||
|
previous_message_count=num_messages,
|
||||||
|
archival_memory_size=num_archival_memories,
|
||||||
|
sources=agent_state.sources,
|
||||||
|
max_files_open=agent_state.max_files_open,
|
||||||
|
)
|
||||||
|
|
||||||
|
system_message = PydanticMessage.dict_to_message(
|
||||||
|
agent_id=agent_id,
|
||||||
|
model=agent_state.llm_config.model,
|
||||||
|
openai_message_dict={"role": "system", "content": system_message_str},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Persist the new system message
|
||||||
|
persisted_messages = await message_manager.create_many_messages_async([system_message], actor=actor)
|
||||||
|
system_message = persisted_messages[0]
|
||||||
|
|
||||||
|
# Add it to the conversation tracking at position 0
|
||||||
|
await self.add_messages_to_conversation(
|
||||||
|
conversation_id=conversation_id,
|
||||||
|
agent_id=agent_id,
|
||||||
|
message_ids=[system_message.id],
|
||||||
|
actor=actor,
|
||||||
|
starting_position=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
return system_message
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
@trace_method
|
@trace_method
|
||||||
@@ -133,22 +243,15 @@ class ConversationManager:
|
|||||||
if sort_by == "last_run_completion":
|
if sort_by == "last_run_completion":
|
||||||
# Subquery to get the latest completed_at for each conversation
|
# Subquery to get the latest completed_at for each conversation
|
||||||
latest_run_subquery = (
|
latest_run_subquery = (
|
||||||
select(
|
select(RunModel.conversation_id, func.max(RunModel.completed_at).label("last_run_completion"))
|
||||||
RunModel.conversation_id,
|
|
||||||
func.max(RunModel.completed_at).label("last_run_completion")
|
|
||||||
)
|
|
||||||
.where(RunModel.conversation_id.isnot(None))
|
.where(RunModel.conversation_id.isnot(None))
|
||||||
.group_by(RunModel.conversation_id)
|
.group_by(RunModel.conversation_id)
|
||||||
.subquery()
|
.subquery()
|
||||||
)
|
)
|
||||||
|
|
||||||
# Join conversations with the subquery
|
# Join conversations with the subquery
|
||||||
stmt = (
|
stmt = select(ConversationModel).outerjoin(
|
||||||
select(ConversationModel)
|
latest_run_subquery, ConversationModel.id == latest_run_subquery.c.conversation_id
|
||||||
.outerjoin(
|
|
||||||
latest_run_subquery,
|
|
||||||
ConversationModel.id == latest_run_subquery.c.conversation_id
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
sort_column = latest_run_subquery.c.last_run_completion
|
sort_column = latest_run_subquery.c.last_run_completion
|
||||||
sort_nulls_last = True
|
sort_nulls_last = True
|
||||||
@@ -170,10 +273,12 @@ class ConversationManager:
|
|||||||
|
|
||||||
# Add summary search filter if provided
|
# Add summary search filter if provided
|
||||||
if summary_search:
|
if summary_search:
|
||||||
conditions.extend([
|
conditions.extend(
|
||||||
ConversationModel.summary.isnot(None),
|
[
|
||||||
ConversationModel.summary.contains(summary_search),
|
ConversationModel.summary.isnot(None),
|
||||||
])
|
ConversationModel.summary.contains(summary_search),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
stmt = stmt.where(and_(*conditions))
|
stmt = stmt.where(and_(*conditions))
|
||||||
|
|
||||||
@@ -182,10 +287,7 @@ class ConversationManager:
|
|||||||
# Get the sort value for the cursor conversation
|
# Get the sort value for the cursor conversation
|
||||||
if sort_by == "last_run_completion":
|
if sort_by == "last_run_completion":
|
||||||
cursor_query = (
|
cursor_query = (
|
||||||
select(
|
select(ConversationModel.id, func.max(RunModel.completed_at).label("last_run_completion"))
|
||||||
ConversationModel.id,
|
|
||||||
func.max(RunModel.completed_at).label("last_run_completion")
|
|
||||||
)
|
|
||||||
.outerjoin(RunModel, ConversationModel.id == RunModel.conversation_id)
|
.outerjoin(RunModel, ConversationModel.id == RunModel.conversation_id)
|
||||||
.where(ConversationModel.id == after)
|
.where(ConversationModel.id == after)
|
||||||
.group_by(ConversationModel.id)
|
.group_by(ConversationModel.id)
|
||||||
@@ -198,16 +300,11 @@ class ConversationManager:
|
|||||||
# Cursor is at NULL - if ascending, get non-NULLs or NULLs with greater ID
|
# Cursor is at NULL - if ascending, get non-NULLs or NULLs with greater ID
|
||||||
if ascending:
|
if ascending:
|
||||||
stmt = stmt.where(
|
stmt = stmt.where(
|
||||||
or_(
|
or_(and_(sort_column.is_(None), ConversationModel.id > after_id), sort_column.isnot(None))
|
||||||
and_(sort_column.is_(None), ConversationModel.id > after_id),
|
|
||||||
sort_column.isnot(None)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# If descending, get NULLs with smaller ID
|
# If descending, get NULLs with smaller ID
|
||||||
stmt = stmt.where(
|
stmt = stmt.where(and_(sort_column.is_(None), ConversationModel.id < after_id))
|
||||||
and_(sort_column.is_(None), ConversationModel.id < after_id)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
# Cursor is at non-NULL
|
# Cursor is at non-NULL
|
||||||
if ascending:
|
if ascending:
|
||||||
@@ -217,8 +314,8 @@ class ConversationManager:
|
|||||||
sort_column.isnot(None),
|
sort_column.isnot(None),
|
||||||
or_(
|
or_(
|
||||||
sort_column > after_sort_value,
|
sort_column > after_sort_value,
|
||||||
and_(sort_column == after_sort_value, ConversationModel.id > after_id)
|
and_(sort_column == after_sort_value, ConversationModel.id > after_id),
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -227,7 +324,7 @@ class ConversationManager:
|
|||||||
or_(
|
or_(
|
||||||
sort_column.is_(None),
|
sort_column.is_(None),
|
||||||
sort_column < after_sort_value,
|
sort_column < after_sort_value,
|
||||||
and_(sort_column == after_sort_value, ConversationModel.id < after_id)
|
and_(sort_column == after_sort_value, ConversationModel.id < after_id),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -277,7 +374,11 @@ class ConversationManager:
|
|||||||
for key, value in update_data.items():
|
for key, value in update_data.items():
|
||||||
# model_settings needs to be serialized to dict for the JSON column
|
# model_settings needs to be serialized to dict for the JSON column
|
||||||
if key == "model_settings" and value is not None:
|
if key == "model_settings" and value is not None:
|
||||||
setattr(conversation, key, conversation_update.model_settings.model_dump() if conversation_update.model_settings else value)
|
setattr(
|
||||||
|
conversation,
|
||||||
|
key,
|
||||||
|
self._serialize_model_settings(conversation_update.model_settings) if conversation_update.model_settings else value,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
setattr(conversation, key, value)
|
setattr(conversation, key, value)
|
||||||
|
|
||||||
|
|||||||
@@ -604,6 +604,9 @@ def _apply_pagination(
|
|||||||
if sort_by == "last_run_completion":
|
if sort_by == "last_run_completion":
|
||||||
sort_column = AgentModel.last_run_completion
|
sort_column = AgentModel.last_run_completion
|
||||||
sort_nulls_last = True # TODO: handle this as a query param eventually
|
sort_nulls_last = True # TODO: handle this as a query param eventually
|
||||||
|
elif sort_by == "updated_at":
|
||||||
|
sort_column = AgentModel.updated_at
|
||||||
|
sort_nulls_last = False
|
||||||
else:
|
else:
|
||||||
sort_column = AgentModel.created_at
|
sort_column = AgentModel.created_at
|
||||||
sort_nulls_last = False
|
sort_nulls_last = False
|
||||||
@@ -637,6 +640,9 @@ async def _apply_pagination_async(
|
|||||||
if sort_by == "last_run_completion":
|
if sort_by == "last_run_completion":
|
||||||
sort_column = AgentModel.last_run_completion
|
sort_column = AgentModel.last_run_completion
|
||||||
sort_nulls_last = True # TODO: handle this as a query param eventually
|
sort_nulls_last = True # TODO: handle this as a query param eventually
|
||||||
|
elif sort_by == "updated_at":
|
||||||
|
sort_column = AgentModel.updated_at
|
||||||
|
sort_nulls_last = False
|
||||||
else:
|
else:
|
||||||
sort_column = AgentModel.created_at
|
sort_column = AgentModel.created_at
|
||||||
sort_nulls_last = False
|
sort_nulls_last = False
|
||||||
|
|||||||
@@ -73,7 +73,6 @@ class LLMTraceWriter:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._client = None
|
self._client = None
|
||||||
self._shutdown = False
|
self._shutdown = False
|
||||||
self._write_lock = asyncio.Lock() # Serialize writes - clickhouse_connect isn't thread-safe
|
|
||||||
|
|
||||||
# Check if ClickHouse is configured - if not, writing is disabled
|
# Check if ClickHouse is configured - if not, writing is disabled
|
||||||
self._enabled = bool(settings.clickhouse_endpoint and settings.clickhouse_password)
|
self._enabled = bool(settings.clickhouse_endpoint and settings.clickhouse_password)
|
||||||
@@ -82,11 +81,7 @@ class LLMTraceWriter:
|
|||||||
atexit.register(self._sync_shutdown)
|
atexit.register(self._sync_shutdown)
|
||||||
|
|
||||||
def _get_client(self):
|
def _get_client(self):
|
||||||
"""Initialize ClickHouse client on first use (lazy loading).
|
"""Initialize ClickHouse client on first use (lazy loading)."""
|
||||||
|
|
||||||
Configures async_insert with wait_for_async_insert=1 for reliable
|
|
||||||
server-side batching with acknowledgment.
|
|
||||||
"""
|
|
||||||
if self._client is not None:
|
if self._client is not None:
|
||||||
return self._client
|
return self._client
|
||||||
|
|
||||||
@@ -108,8 +103,10 @@ class LLMTraceWriter:
|
|||||||
settings={
|
settings={
|
||||||
# Enable server-side batching
|
# Enable server-side batching
|
||||||
"async_insert": 1,
|
"async_insert": 1,
|
||||||
# Wait for acknowledgment (reliable)
|
# Don't wait for server-side flush acknowledgment — fire and forget.
|
||||||
"wait_for_async_insert": 1,
|
# Waiting (value=1) caused each insert to hold an asyncio.Lock for ~1s,
|
||||||
|
# creating unbounded task queues that saturated the event loop under load.
|
||||||
|
"wait_for_async_insert": 0,
|
||||||
# Flush after 1 second if batch not full
|
# Flush after 1 second if batch not full
|
||||||
"async_insert_busy_timeout_ms": 1000,
|
"async_insert_busy_timeout_ms": 1000,
|
||||||
},
|
},
|
||||||
@@ -148,15 +145,15 @@ class LLMTraceWriter:
|
|||||||
row = trace.to_clickhouse_row()
|
row = trace.to_clickhouse_row()
|
||||||
columns = LLMTrace.clickhouse_columns()
|
columns = LLMTrace.clickhouse_columns()
|
||||||
|
|
||||||
# Serialize writes - clickhouse_connect client isn't thread-safe
|
# Run synchronous insert in thread pool. clickhouse-connect supports
|
||||||
async with self._write_lock:
|
# multithreaded use via a thread-safe connection pool:
|
||||||
# Run synchronous insert in thread pool
|
# https://clickhouse.com/docs/integrations/language-clients/python/advanced-usage#multithreaded-multiprocess-and-asyncevent-driven-use-cases
|
||||||
await asyncio.to_thread(
|
await asyncio.to_thread(
|
||||||
client.insert,
|
client.insert,
|
||||||
"llm_traces",
|
"llm_traces",
|
||||||
[row],
|
[row],
|
||||||
column_names=columns,
|
column_names=columns,
|
||||||
)
|
)
|
||||||
return # Success
|
return # Success
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -3,11 +3,11 @@
|
|||||||
File format:
|
File format:
|
||||||
---
|
---
|
||||||
description: "Who I am and how I approach work"
|
description: "Who I am and how I approach work"
|
||||||
limit: 20000
|
|
||||||
---
|
---
|
||||||
My name is Memo. I'm a stateful coding assistant...
|
My name is Memo. I'm a stateful coding assistant...
|
||||||
|
|
||||||
- Frontmatter fields are only rendered when they differ from defaults.
|
- Frontmatter fields are only rendered when they differ from defaults.
|
||||||
|
- ``limit`` is intentionally excluded from frontmatter (deprecated for git-base memory).
|
||||||
- Files without frontmatter are treated as value-only (backward compat).
|
- Files without frontmatter are treated as value-only (backward compat).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -37,12 +37,12 @@ def serialize_block(
|
|||||||
This is used for initial file creation. For updates to existing files,
|
This is used for initial file creation. For updates to existing files,
|
||||||
prefer `merge_frontmatter_with_body` to preserve user formatting.
|
prefer `merge_frontmatter_with_body` to preserve user formatting.
|
||||||
"""
|
"""
|
||||||
# description and limit are always included in frontmatter.
|
# description is always included in frontmatter.
|
||||||
# read_only and metadata are only included when non-default.
|
# read_only and metadata are only included when non-default.
|
||||||
|
# limit is intentionally excluded (deprecated for git-base memory).
|
||||||
front: Dict[str, Any] = {}
|
front: Dict[str, Any] = {}
|
||||||
|
|
||||||
front["description"] = description
|
front["description"] = description
|
||||||
front["limit"] = limit if limit is not None else _get_field_default("limit")
|
|
||||||
|
|
||||||
if read_only != _get_field_default("read_only"):
|
if read_only != _get_field_default("read_only"):
|
||||||
front["read_only"] = read_only
|
front["read_only"] = read_only
|
||||||
@@ -111,7 +111,6 @@ def merge_frontmatter_with_body(
|
|||||||
|
|
||||||
# Desired values
|
# Desired values
|
||||||
desired_description = description
|
desired_description = description
|
||||||
desired_limit = limit if limit is not None else _get_field_default("limit")
|
|
||||||
desired_read_only = read_only
|
desired_read_only = read_only
|
||||||
desired_metadata = metadata if metadata is not None else _get_field_default("metadata")
|
desired_metadata = metadata if metadata is not None else _get_field_default("metadata")
|
||||||
|
|
||||||
@@ -122,8 +121,9 @@ def merge_frontmatter_with_body(
|
|||||||
parsed["description"] = desired_description
|
parsed["description"] = desired_description
|
||||||
changed = True
|
changed = True
|
||||||
|
|
||||||
if "limit" not in parsed or parsed.get("limit") != desired_limit:
|
# Remove limit from frontmatter if it exists (deprecated for git-base memory)
|
||||||
parsed["limit"] = desired_limit
|
if "limit" in parsed:
|
||||||
|
del parsed["limit"]
|
||||||
changed = True
|
changed = True
|
||||||
|
|
||||||
if desired_read_only != _get_field_default("read_only"):
|
if desired_read_only != _get_field_default("read_only"):
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ from letta.schemas.memory_repo import MemoryCommit
|
|||||||
from letta.schemas.user import User as PydanticUser
|
from letta.schemas.user import User as PydanticUser
|
||||||
from letta.services.memory_repo.block_markdown import parse_block_markdown, serialize_block
|
from letta.services.memory_repo.block_markdown import parse_block_markdown, serialize_block
|
||||||
from letta.services.memory_repo.git_operations import GitOperations
|
from letta.services.memory_repo.git_operations import GitOperations
|
||||||
|
from letta.services.memory_repo.path_mapping import memory_block_label_from_markdown_path
|
||||||
from letta.services.memory_repo.storage.local import LocalStorageBackend
|
from letta.services.memory_repo.storage.local import LocalStorageBackend
|
||||||
from letta.utils import enforce_types
|
from letta.utils import enforce_types
|
||||||
|
|
||||||
@@ -133,26 +134,29 @@ class MemfsClient:
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Convert block files to PydanticBlock (metadata is in frontmatter)
|
# Convert block files to PydanticBlock (metadata is in frontmatter).
|
||||||
|
# skills/{skill_name}/SKILL.md is mapped to block label skills/{skill_name};
|
||||||
|
# other files under skills/ are intentionally ignored.
|
||||||
blocks = []
|
blocks = []
|
||||||
for file_path, content in files.items():
|
for file_path, content in files.items():
|
||||||
if file_path.endswith(".md"):
|
label = memory_block_label_from_markdown_path(file_path)
|
||||||
label = file_path[:-3]
|
if label is None:
|
||||||
|
continue
|
||||||
|
|
||||||
parsed = parse_block_markdown(content)
|
parsed = parse_block_markdown(content)
|
||||||
|
|
||||||
synthetic_uuid = uuid.UUID(hashlib.md5(f"{agent_id}:{label}".encode()).hexdigest())
|
synthetic_uuid = uuid.UUID(hashlib.md5(f"{agent_id}:{label}".encode()).hexdigest())
|
||||||
blocks.append(
|
blocks.append(
|
||||||
PydanticBlock(
|
PydanticBlock(
|
||||||
id=f"block-{synthetic_uuid}",
|
id=f"block-{synthetic_uuid}",
|
||||||
label=label,
|
label=label,
|
||||||
value=parsed["value"],
|
value=parsed["value"],
|
||||||
description=parsed.get("description"),
|
description=parsed.get("description"),
|
||||||
limit=parsed.get("limit", CORE_MEMORY_BLOCK_CHAR_LIMIT),
|
limit=parsed.get("limit", CORE_MEMORY_BLOCK_CHAR_LIMIT),
|
||||||
read_only=parsed.get("read_only", False),
|
read_only=parsed.get("read_only", False),
|
||||||
metadata=parsed.get("metadata", {}),
|
metadata=parsed.get("metadata", {}),
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return blocks
|
return blocks
|
||||||
|
|
||||||
|
|||||||
29
letta/services/memory_repo/path_mapping.py
Normal file
29
letta/services/memory_repo/path_mapping.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
"""Helpers for mapping memory-repo markdown paths to block labels.
|
||||||
|
|
||||||
|
Special handling for skills:
|
||||||
|
- sync `skills/{skill_name}/SKILL.md` as block label `skills/{skill_name}`
|
||||||
|
- ignore all other markdown files under `skills/`
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def memory_block_label_from_markdown_path(path: str) -> str | None:
|
||||||
|
"""Return block label for a syncable markdown path, else None.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Non-`.md` files are ignored.
|
||||||
|
- `skills/{skill_name}/SKILL.md` -> `skills/{skill_name}`
|
||||||
|
- Other `skills/**` markdown files are ignored.
|
||||||
|
- All other markdown files map to `path[:-3]`.
|
||||||
|
"""
|
||||||
|
if not path.endswith(".md"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if path.startswith("skills/"):
|
||||||
|
parts = path.split("/")
|
||||||
|
if len(parts) == 3 and parts[0] == "skills" and parts[1] and parts[2] == "SKILL.md":
|
||||||
|
return f"skills/{parts[1]}"
|
||||||
|
return None
|
||||||
|
|
||||||
|
return path[:-3]
|
||||||
@@ -141,6 +141,9 @@ class ClickhouseProviderTraceBackend(ProviderTraceBackendClient):
|
|||||||
request_json=request_json_str,
|
request_json=request_json_str,
|
||||||
response_json=response_json_str,
|
response_json=response_json_str,
|
||||||
llm_config_json=llm_config_json_str,
|
llm_config_json=llm_config_json_str,
|
||||||
|
billing_plan_type=provider_trace.billing_context.plan_type if provider_trace.billing_context else None,
|
||||||
|
billing_cost_source=provider_trace.billing_context.cost_source if provider_trace.billing_context else None,
|
||||||
|
billing_customer_id=provider_trace.billing_context.customer_id if provider_trace.billing_context else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _extract_usage(self, response_json: dict, provider: str) -> dict:
|
def _extract_usage(self, response_json: dict, provider: str) -> dict:
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ class PostgresProviderTraceBackend(ProviderTraceBackendClient):
|
|||||||
) -> ProviderTrace:
|
) -> ProviderTrace:
|
||||||
"""Write full provider trace to provider_traces table."""
|
"""Write full provider trace to provider_traces table."""
|
||||||
async with db_registry.async_session() as session:
|
async with db_registry.async_session() as session:
|
||||||
provider_trace_model = ProviderTraceModel(**provider_trace.model_dump())
|
provider_trace_model = ProviderTraceModel(**provider_trace.model_dump(exclude={"billing_context"}))
|
||||||
provider_trace_model.organization_id = actor.organization_id
|
provider_trace_model.organization_id = actor.organization_id
|
||||||
|
|
||||||
if provider_trace.request_json:
|
if provider_trace.request_json:
|
||||||
|
|||||||
@@ -638,7 +638,13 @@ class RunManager:
|
|||||||
raise NoResultFound(f"Run with id {run_id} not found")
|
raise NoResultFound(f"Run with id {run_id} not found")
|
||||||
agent_id = run.agent_id
|
agent_id = run.agent_id
|
||||||
|
|
||||||
logger.debug(f"Cancelling run {run_id} for agent {agent_id}")
|
logger.info(
|
||||||
|
"[Interrupt] Processing cancellation for run=%s, agent=%s, current_status=%s, current_stop_reason=%s",
|
||||||
|
run_id,
|
||||||
|
agent_id,
|
||||||
|
run.status if run else "unknown",
|
||||||
|
run.stop_reason if run else "unknown",
|
||||||
|
)
|
||||||
|
|
||||||
# Cancellation should be idempotent: if a run is already terminated, treat this as a no-op.
|
# Cancellation should be idempotent: if a run is already terminated, treat this as a no-op.
|
||||||
# This commonly happens when a run finishes between client request and server handling.
|
# This commonly happens when a run finishes between client request and server handling.
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from letta.errors import (
|
|||||||
LettaInvalidArgumentError,
|
LettaInvalidArgumentError,
|
||||||
LettaServiceUnavailableError,
|
LettaServiceUnavailableError,
|
||||||
LLMAuthenticationError,
|
LLMAuthenticationError,
|
||||||
|
LLMEmptyResponseError,
|
||||||
LLMError,
|
LLMError,
|
||||||
LLMRateLimitError,
|
LLMRateLimitError,
|
||||||
LLMTimeoutError,
|
LLMTimeoutError,
|
||||||
@@ -33,6 +34,7 @@ from letta.schemas.letta_request import ClientToolSchema, LettaStreamingRequest
|
|||||||
from letta.schemas.letta_response import LettaResponse
|
from letta.schemas.letta_response import LettaResponse
|
||||||
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
||||||
from letta.schemas.message import MessageCreate
|
from letta.schemas.message import MessageCreate
|
||||||
|
from letta.schemas.provider_trace import BillingContext
|
||||||
from letta.schemas.run import Run as PydanticRun, RunUpdate
|
from letta.schemas.run import Run as PydanticRun, RunUpdate
|
||||||
from letta.schemas.usage import LettaUsageStatistics
|
from letta.schemas.usage import LettaUsageStatistics
|
||||||
from letta.schemas.user import User
|
from letta.schemas.user import User
|
||||||
@@ -76,6 +78,8 @@ class StreamingService:
|
|||||||
request: LettaStreamingRequest,
|
request: LettaStreamingRequest,
|
||||||
run_type: str = "streaming",
|
run_type: str = "streaming",
|
||||||
conversation_id: Optional[str] = None,
|
conversation_id: Optional[str] = None,
|
||||||
|
should_lock: bool = False,
|
||||||
|
billing_context: "BillingContext | None" = None,
|
||||||
) -> tuple[Optional[PydanticRun], Union[StreamingResponse, LettaResponse]]:
|
) -> tuple[Optional[PydanticRun], Union[StreamingResponse, LettaResponse]]:
|
||||||
"""
|
"""
|
||||||
Create a streaming response for an agent.
|
Create a streaming response for an agent.
|
||||||
@@ -86,6 +90,7 @@ class StreamingService:
|
|||||||
request: The LettaStreamingRequest containing all request parameters
|
request: The LettaStreamingRequest containing all request parameters
|
||||||
run_type: Type of run for tracking
|
run_type: Type of run for tracking
|
||||||
conversation_id: Optional conversation ID for conversation-scoped messaging
|
conversation_id: Optional conversation ID for conversation-scoped messaging
|
||||||
|
should_lock: If True and conversation_id is None, use agent_id as lock key
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (run object or None, streaming response)
|
Tuple of (run object or None, streaming response)
|
||||||
@@ -116,6 +121,10 @@ class StreamingService:
|
|||||||
)
|
)
|
||||||
if conversation.model_settings is not None:
|
if conversation.model_settings is not None:
|
||||||
update_params = conversation.model_settings._to_legacy_config_params()
|
update_params = conversation.model_settings._to_legacy_config_params()
|
||||||
|
# Don't clobber max_tokens with the Pydantic default when the caller
|
||||||
|
# didn't explicitly provide max_output_tokens.
|
||||||
|
if "max_output_tokens" not in conversation.model_settings.model_fields_set:
|
||||||
|
update_params.pop("max_tokens", None)
|
||||||
conversation_llm_config = conversation_llm_config.model_copy(update=update_params)
|
conversation_llm_config = conversation_llm_config.model_copy(update=update_params)
|
||||||
agent = agent.model_copy(update={"llm_config": conversation_llm_config})
|
agent = agent.model_copy(update={"llm_config": conversation_llm_config})
|
||||||
|
|
||||||
@@ -130,12 +139,15 @@ class StreamingService:
|
|||||||
|
|
||||||
model_compatible_token_streaming = self._is_token_streaming_compatible(agent)
|
model_compatible_token_streaming = self._is_token_streaming_compatible(agent)
|
||||||
|
|
||||||
# Attempt to acquire conversation lock if conversation_id is provided
|
# Determine lock key: use conversation_id if provided, else agent_id if should_lock
|
||||||
# This prevents concurrent message processing for the same conversation
|
lock_key = conversation_id if conversation_id else (agent_id if should_lock else None)
|
||||||
|
|
||||||
|
# Attempt to acquire lock if lock_key is set
|
||||||
|
# This prevents concurrent message processing for the same conversation/agent
|
||||||
# Skip locking if Redis is not available (graceful degradation)
|
# Skip locking if Redis is not available (graceful degradation)
|
||||||
if conversation_id and not isinstance(redis_client, NoopAsyncRedisClient):
|
if lock_key and not isinstance(redis_client, NoopAsyncRedisClient):
|
||||||
await redis_client.acquire_conversation_lock(
|
await redis_client.acquire_conversation_lock(
|
||||||
conversation_id=conversation_id,
|
conversation_id=lock_key,
|
||||||
token=str(uuid4()),
|
token=str(uuid4()),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -163,8 +175,10 @@ class StreamingService:
|
|||||||
include_return_message_types=request.include_return_message_types,
|
include_return_message_types=request.include_return_message_types,
|
||||||
actor=actor,
|
actor=actor,
|
||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
|
lock_key=lock_key, # For lock release (may differ from conversation_id)
|
||||||
client_tools=request.client_tools,
|
client_tools=request.client_tools,
|
||||||
include_compaction_messages=request.include_compaction_messages,
|
include_compaction_messages=request.include_compaction_messages,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
# handle background streaming if requested
|
# handle background streaming if requested
|
||||||
@@ -195,7 +209,7 @@ class StreamingService:
|
|||||||
run_id=run.id,
|
run_id=run.id,
|
||||||
run_manager=self.server.run_manager,
|
run_manager=self.server.run_manager,
|
||||||
actor=actor,
|
actor=actor,
|
||||||
conversation_id=conversation_id,
|
conversation_id=lock_key, # Use lock_key for lock release
|
||||||
),
|
),
|
||||||
label=f"background_stream_processor_{run.id}",
|
label=f"background_stream_processor_{run.id}",
|
||||||
)
|
)
|
||||||
@@ -251,7 +265,7 @@ class StreamingService:
|
|||||||
if settings.track_agent_run and run and run_status:
|
if settings.track_agent_run and run and run_status:
|
||||||
await self.server.run_manager.update_run_by_id_async(
|
await self.server.run_manager.update_run_by_id_async(
|
||||||
run_id=run.id,
|
run_id=run.id,
|
||||||
conversation_id=conversation_id,
|
conversation_id=lock_key, # Use lock_key for lock release
|
||||||
update=RunUpdate(status=run_status, metadata=run_update_metadata),
|
update=RunUpdate(status=run_status, metadata=run_update_metadata),
|
||||||
actor=actor,
|
actor=actor,
|
||||||
)
|
)
|
||||||
@@ -326,8 +340,10 @@ class StreamingService:
|
|||||||
include_return_message_types: Optional[list[MessageType]],
|
include_return_message_types: Optional[list[MessageType]],
|
||||||
actor: User,
|
actor: User,
|
||||||
conversation_id: Optional[str] = None,
|
conversation_id: Optional[str] = None,
|
||||||
|
lock_key: Optional[str] = None,
|
||||||
client_tools: Optional[list[ClientToolSchema]] = None,
|
client_tools: Optional[list[ClientToolSchema]] = None,
|
||||||
include_compaction_messages: bool = False,
|
include_compaction_messages: bool = False,
|
||||||
|
billing_context: BillingContext | None = None,
|
||||||
) -> AsyncIterator:
|
) -> AsyncIterator:
|
||||||
"""
|
"""
|
||||||
Create a stream with unified error handling.
|
Create a stream with unified error handling.
|
||||||
@@ -356,6 +372,7 @@ class StreamingService:
|
|||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
client_tools=client_tools,
|
client_tools=client_tools,
|
||||||
include_compaction_messages=include_compaction_messages,
|
include_compaction_messages=include_compaction_messages,
|
||||||
|
billing_context=billing_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
async for chunk in stream:
|
async for chunk in stream:
|
||||||
@@ -442,6 +459,21 @@ class StreamingService:
|
|||||||
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
|
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
|
||||||
# Send [DONE] marker to properly close the stream
|
# Send [DONE] marker to properly close the stream
|
||||||
yield "data: [DONE]\n\n"
|
yield "data: [DONE]\n\n"
|
||||||
|
except LLMEmptyResponseError as e:
|
||||||
|
run_status = RunStatus.failed
|
||||||
|
stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response)
|
||||||
|
error_message = LettaErrorMessage(
|
||||||
|
run_id=run_id,
|
||||||
|
error_type="llm_empty_response",
|
||||||
|
message="LLM returned an empty response.",
|
||||||
|
detail=str(e),
|
||||||
|
)
|
||||||
|
error_data = {"error": error_message.model_dump()}
|
||||||
|
logger.warning(f"Run {run_id} stopped with LLM empty response: {e}, error_data: {error_message.model_dump()}")
|
||||||
|
yield f"data: {stop_reason.model_dump_json()}\n\n"
|
||||||
|
yield f"event: error\ndata: {error_message.model_dump_json()}\n\n"
|
||||||
|
# Send [DONE] marker to properly close the stream
|
||||||
|
yield "data: [DONE]\n\n"
|
||||||
except LLMError as e:
|
except LLMError as e:
|
||||||
run_status = RunStatus.failed
|
run_status = RunStatus.failed
|
||||||
stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error)
|
stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error)
|
||||||
@@ -491,7 +523,7 @@ class StreamingService:
|
|||||||
stop_reason_value = stop_reason.stop_reason if stop_reason else StopReasonType.error.value
|
stop_reason_value = stop_reason.stop_reason if stop_reason else StopReasonType.error.value
|
||||||
await self.runs_manager.update_run_by_id_async(
|
await self.runs_manager.update_run_by_id_async(
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
conversation_id=conversation_id,
|
conversation_id=lock_key, # Use lock_key for lock release
|
||||||
update=RunUpdate(status=run_status, stop_reason=stop_reason_value, metadata=error_data),
|
update=RunUpdate(status=run_status, stop_reason=stop_reason_value, metadata=error_data),
|
||||||
actor=actor,
|
actor=actor,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -96,6 +96,10 @@ async def build_summarizer_llm_config(
|
|||||||
# them just like server.create_agent_async does for agents.
|
# them just like server.create_agent_async does for agents.
|
||||||
if summarizer_config.model_settings is not None:
|
if summarizer_config.model_settings is not None:
|
||||||
update_params = summarizer_config.model_settings._to_legacy_config_params()
|
update_params = summarizer_config.model_settings._to_legacy_config_params()
|
||||||
|
# Don't clobber max_tokens with the Pydantic default when the caller
|
||||||
|
# didn't explicitly provide max_output_tokens.
|
||||||
|
if "max_output_tokens" not in summarizer_config.model_settings.model_fields_set:
|
||||||
|
update_params.pop("max_tokens", None)
|
||||||
return base.model_copy(update=update_params)
|
return base.model_copy(update=update_params)
|
||||||
|
|
||||||
return base
|
return base
|
||||||
|
|||||||
@@ -196,7 +196,7 @@ async def self_summarize_sliding_window(
|
|||||||
return message.tool_calls is not None and len(message.tool_calls) > 0
|
return message.tool_calls is not None and len(message.tool_calls) > 0
|
||||||
return False
|
return False
|
||||||
|
|
||||||
post_summarization_buffer = [system_prompt]
|
post_summarization_buffer = []
|
||||||
while approx_token_count >= goal_tokens and eviction_percentage < 1.0:
|
while approx_token_count >= goal_tokens and eviction_percentage < 1.0:
|
||||||
# more eviction percentage
|
# more eviction percentage
|
||||||
eviction_percentage += 0.10
|
eviction_percentage += 0.10
|
||||||
@@ -217,8 +217,8 @@ async def self_summarize_sliding_window(
|
|||||||
|
|
||||||
# update token count
|
# update token count
|
||||||
logger.info(f"Attempting to compact messages to index {assistant_message_index} messages")
|
logger.info(f"Attempting to compact messages to index {assistant_message_index} messages")
|
||||||
post_summarization_buffer = [system_prompt, *messages[assistant_message_index:]]
|
post_summarization_buffer = list(messages[assistant_message_index:])
|
||||||
approx_token_count = await count_tokens(actor, agent_llm_config, post_summarization_buffer)
|
approx_token_count = await count_tokens(actor, agent_llm_config, [system_prompt, *post_summarization_buffer])
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Compacting messages index 1:{assistant_message_index} messages resulted in {approx_token_count} tokens, goal is {goal_tokens}"
|
f"Compacting messages index 1:{assistant_message_index} messages resulted in {approx_token_count} tokens, goal is {goal_tokens}"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from letta.settings import summarizer_settings
|
|||||||
def get_default_summarizer_model(provider_type: ProviderType) -> str | None:
|
def get_default_summarizer_model(provider_type: ProviderType) -> str | None:
|
||||||
"""Get default model for summarization for given provider type."""
|
"""Get default model for summarization for given provider type."""
|
||||||
summarizer_defaults = {
|
summarizer_defaults = {
|
||||||
ProviderType.anthropic: "anthropic/claude-haiku-4-5-20251001",
|
ProviderType.anthropic: "anthropic/claude-haiku-4-5",
|
||||||
ProviderType.openai: "openai/gpt-5-mini",
|
ProviderType.openai: "openai/gpt-5-mini",
|
||||||
ProviderType.google_ai: "google_ai/gemini-2.5-flash",
|
ProviderType.google_ai: "google_ai/gemini-2.5-flash",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ class SummarizerSettings(BaseSettings):
|
|||||||
class ModelSettings(BaseSettings):
|
class ModelSettings(BaseSettings):
|
||||||
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
||||||
|
|
||||||
global_max_context_window_limit: int = 32000
|
global_max_context_window_limit: int = 128000
|
||||||
|
|
||||||
inner_thoughts_kwarg: str | None = Field(default=INNER_THOUGHTS_KWARG, description="Key used for passing in inner thoughts.")
|
inner_thoughts_kwarg: str | None = Field(default=INNER_THOUGHTS_KWARG, description="Key used for passing in inner thoughts.")
|
||||||
|
|
||||||
@@ -204,6 +204,7 @@ class ModelSettings(BaseSettings):
|
|||||||
gemini_base_url: str = "https://generativelanguage.googleapis.com/"
|
gemini_base_url: str = "https://generativelanguage.googleapis.com/"
|
||||||
gemini_force_minimum_thinking_budget: bool = False
|
gemini_force_minimum_thinking_budget: bool = False
|
||||||
gemini_max_retries: int = 5
|
gemini_max_retries: int = 5
|
||||||
|
gemini_timeout_seconds: float = 600.0
|
||||||
|
|
||||||
# google vertex
|
# google vertex
|
||||||
google_cloud_project: Optional[str] = None
|
google_cloud_project: Optional[str] = None
|
||||||
|
|||||||
@@ -45,30 +45,36 @@ PATH_VALIDATORS = {primitive_type.value: _create_path_validator_factory(primitiv
|
|||||||
|
|
||||||
|
|
||||||
def _create_conversation_id_or_default_path_validator_factory():
|
def _create_conversation_id_or_default_path_validator_factory():
|
||||||
"""Conversation IDs accept the usual primitive format or the special value 'default'."""
|
"""Conversation IDs with support for 'default' and agent IDs (backwards compatibility)."""
|
||||||
|
|
||||||
primitive = PrimitiveType.CONVERSATION.value
|
conversation_primitive = PrimitiveType.CONVERSATION.value
|
||||||
prefix_pattern = PRIMITIVE_ID_PATTERNS[primitive].pattern
|
agent_primitive = PrimitiveType.AGENT.value
|
||||||
# Make the full regex accept either the primitive ID format or 'default'.
|
conversation_pattern = PRIMITIVE_ID_PATTERNS[conversation_primitive].pattern
|
||||||
# `prefix_pattern` already contains the ^...$ anchors.
|
agent_pattern = PRIMITIVE_ID_PATTERNS[agent_primitive].pattern
|
||||||
conversation_or_default_pattern = f"^(default|{prefix_pattern[1:-1]})$"
|
# Make the full regex accept: conversation ID, agent ID, or 'default'.
|
||||||
|
# Patterns already contain ^...$ anchors, so strip them for the alternation.
|
||||||
|
conversation_or_agent_or_default_pattern = f"^(default|{conversation_pattern[1:-1]}|{agent_pattern[1:-1]})$"
|
||||||
|
|
||||||
def factory():
|
def factory():
|
||||||
return Path(
|
return Path(
|
||||||
description=(f"The conversation identifier. Either the special value 'default' or an ID in the format '{primitive}-<uuid4>'"),
|
description=(
|
||||||
pattern=conversation_or_default_pattern,
|
f"The conversation identifier. Can be a conversation ID ('{conversation_primitive}-<uuid4>'), "
|
||||||
examples=["default", f"{primitive}-123e4567-e89b-42d3-8456-426614174000"],
|
f"'default' for agent-direct mode (with agent_id parameter), "
|
||||||
|
f"or an agent ID ('{agent_primitive}-<uuid4>') for backwards compatibility (deprecated)."
|
||||||
|
),
|
||||||
|
pattern=conversation_or_agent_or_default_pattern,
|
||||||
|
examples=[
|
||||||
|
"default",
|
||||||
|
f"{conversation_primitive}-123e4567-e89b-42d3-8456-426614174000",
|
||||||
|
f"{agent_primitive}-123e4567-e89b-42d3-8456-426614174000",
|
||||||
|
],
|
||||||
min_length=1,
|
min_length=1,
|
||||||
max_length=len(primitive) + 1 + 36,
|
max_length=max(len(conversation_primitive), len(agent_primitive)) + 1 + 36,
|
||||||
)
|
)
|
||||||
|
|
||||||
return factory
|
return factory
|
||||||
|
|
||||||
|
|
||||||
# Override conversation ID path validation to also allow the special value 'default'.
|
|
||||||
PATH_VALIDATORS[PrimitiveType.CONVERSATION.value] = _create_conversation_id_or_default_path_validator_factory()
|
|
||||||
|
|
||||||
|
|
||||||
# Type aliases for common ID types
|
# Type aliases for common ID types
|
||||||
# These can be used directly in route handler signatures for cleaner code
|
# These can be used directly in route handler signatures for cleaner code
|
||||||
AgentId = Annotated[str, PATH_VALIDATORS[PrimitiveType.AGENT.value]()]
|
AgentId = Annotated[str, PATH_VALIDATORS[PrimitiveType.AGENT.value]()]
|
||||||
@@ -89,6 +95,10 @@ StepId = Annotated[str, PATH_VALIDATORS[PrimitiveType.STEP.value]()]
|
|||||||
IdentityId = Annotated[str, PATH_VALIDATORS[PrimitiveType.IDENTITY.value]()]
|
IdentityId = Annotated[str, PATH_VALIDATORS[PrimitiveType.IDENTITY.value]()]
|
||||||
ConversationId = Annotated[str, PATH_VALIDATORS[PrimitiveType.CONVERSATION.value]()]
|
ConversationId = Annotated[str, PATH_VALIDATORS[PrimitiveType.CONVERSATION.value]()]
|
||||||
|
|
||||||
|
# Conversation ID with support for 'default' and agent IDs (for agent-direct mode endpoints)
|
||||||
|
# Backwards compatible - agent-* will be deprecated in favor of conversation_id='default' + agent_id param
|
||||||
|
ConversationIdOrDefault = Annotated[str, _create_conversation_id_or_default_path_validator_factory()()]
|
||||||
|
|
||||||
# Infrastructure types
|
# Infrastructure types
|
||||||
McpServerId = Annotated[str, PATH_VALIDATORS[PrimitiveType.MCP_SERVER.value]()]
|
McpServerId = Annotated[str, PATH_VALIDATORS[PrimitiveType.MCP_SERVER.value]()]
|
||||||
McpOAuthId = Annotated[str, PATH_VALIDATORS[PrimitiveType.MCP_OAUTH.value]()]
|
McpOAuthId = Annotated[str, PATH_VALIDATORS[PrimitiveType.MCP_OAUTH.value]()]
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "letta"
|
name = "letta"
|
||||||
version = "0.16.5"
|
version = "0.16.6"
|
||||||
description = "Create LLM agents with long-term memory and custom tools"
|
description = "Create LLM agents with long-term memory and custom tools"
|
||||||
authors = [
|
authors = [
|
||||||
{name = "Letta Team", email = "contact@letta.com"},
|
{name = "Letta Team", email = "contact@letta.com"},
|
||||||
|
|||||||
@@ -2,6 +2,12 @@ import anthropic
|
|||||||
import httpx
|
import httpx
|
||||||
import openai
|
import openai
|
||||||
import pytest
|
import pytest
|
||||||
|
from anthropic.types.beta import (
|
||||||
|
BetaMessage,
|
||||||
|
BetaRawMessageStartEvent,
|
||||||
|
BetaRawMessageStopEvent,
|
||||||
|
BetaUsage,
|
||||||
|
)
|
||||||
from google.genai import errors as google_errors
|
from google.genai import errors as google_errors
|
||||||
|
|
||||||
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
||||||
@@ -9,6 +15,7 @@ from letta.errors import (
|
|||||||
ContextWindowExceededError,
|
ContextWindowExceededError,
|
||||||
LLMBadRequestError,
|
LLMBadRequestError,
|
||||||
LLMConnectionError,
|
LLMConnectionError,
|
||||||
|
LLMEmptyResponseError,
|
||||||
LLMInsufficientCreditsError,
|
LLMInsufficientCreditsError,
|
||||||
LLMServerError,
|
LLMServerError,
|
||||||
)
|
)
|
||||||
@@ -287,3 +294,70 @@ def test_openai_client_handle_llm_error_non_credit_api_error():
|
|||||||
result = client.handle_llm_error(error)
|
result = client.handle_llm_error(error)
|
||||||
assert isinstance(result, LLMBadRequestError)
|
assert isinstance(result, LLMBadRequestError)
|
||||||
assert not isinstance(result, LLMInsufficientCreditsError)
|
assert not isinstance(result, LLMInsufficientCreditsError)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_letta_llm_stream_adapter_raises_empty_response_error_for_anthropic(monkeypatch):
|
||||||
|
"""LET-7679: Empty streaming responses (no content blocks) should raise LLMEmptyResponseError.
|
||||||
|
|
||||||
|
This tests the case where Opus 4.6 returns a response with:
|
||||||
|
- BetaRawMessageStartEvent (with usage tokens)
|
||||||
|
- BetaRawMessageStopEvent (end_turn)
|
||||||
|
- NO content blocks in between
|
||||||
|
|
||||||
|
This should raise LLMEmptyResponseError, not complete successfully with stop_reason=end_turn.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class FakeAsyncStream:
|
||||||
|
"""Mimics anthropic.AsyncStream that returns empty content (no content blocks)."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.events = [
|
||||||
|
# Message start with some usage info
|
||||||
|
BetaRawMessageStartEvent(
|
||||||
|
type="message_start",
|
||||||
|
message=BetaMessage(
|
||||||
|
id="msg_test_empty",
|
||||||
|
type="message",
|
||||||
|
role="assistant",
|
||||||
|
content=[], # Empty content
|
||||||
|
model="claude-opus-4-6",
|
||||||
|
stop_reason="end_turn",
|
||||||
|
stop_sequence=None,
|
||||||
|
usage=BetaUsage(input_tokens=1000, output_tokens=26, cache_creation_input_tokens=0, cache_read_input_tokens=0),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
# Message stop immediately after start - no content blocks
|
||||||
|
BetaRawMessageStopEvent(type="message_stop"),
|
||||||
|
]
|
||||||
|
self.index = 0
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc, tb):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __aiter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __anext__(self):
|
||||||
|
if self.index >= len(self.events):
|
||||||
|
raise StopAsyncIteration
|
||||||
|
event = self.events[self.index]
|
||||||
|
self.index += 1
|
||||||
|
return event
|
||||||
|
|
||||||
|
async def fake_stream_async(self, request_data: dict, llm_config):
|
||||||
|
return FakeAsyncStream()
|
||||||
|
|
||||||
|
monkeypatch.setattr(AnthropicClient, "stream_async", fake_stream_async, raising=True)
|
||||||
|
|
||||||
|
llm_client = AnthropicClient()
|
||||||
|
llm_config = LLMConfig(model="claude-opus-4-6", model_endpoint_type="anthropic", context_window=200000)
|
||||||
|
adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step)
|
||||||
|
|
||||||
|
gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
|
||||||
|
with pytest.raises(LLMEmptyResponseError):
|
||||||
|
async for _ in gen:
|
||||||
|
pass
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"context_window": 32000,
|
||||||
|
"model": "gpt-5.3-codex",
|
||||||
|
"model_endpoint_type": "openai",
|
||||||
|
"model_endpoint": "https://api.openai.com/v1",
|
||||||
|
"model_wrapper": null,
|
||||||
|
"reasoning_effort": "low"
|
||||||
|
}
|
||||||
@@ -141,7 +141,7 @@ async def create_test_agent(name, actor, test_id: Optional[str] = None, model="a
|
|||||||
model="claude-3-7-sonnet-latest",
|
model="claude-3-7-sonnet-latest",
|
||||||
model_endpoint_type="anthropic",
|
model_endpoint_type="anthropic",
|
||||||
model_endpoint="https://api.anthropic.com/v1",
|
model_endpoint="https://api.anthropic.com/v1",
|
||||||
context_window=32000,
|
context_window=128000,
|
||||||
handle="anthropic/claude-3-7-sonnet-latest",
|
handle="anthropic/claude-3-7-sonnet-latest",
|
||||||
put_inner_thoughts_in_kwargs=True,
|
put_inner_thoughts_in_kwargs=True,
|
||||||
max_tokens=4096,
|
max_tokens=4096,
|
||||||
@@ -193,7 +193,7 @@ async def create_test_batch_item(server, batch_id, agent_id, default_user):
|
|||||||
model="claude-3-7-sonnet-latest",
|
model="claude-3-7-sonnet-latest",
|
||||||
model_endpoint_type="anthropic",
|
model_endpoint_type="anthropic",
|
||||||
model_endpoint="https://api.anthropic.com/v1",
|
model_endpoint="https://api.anthropic.com/v1",
|
||||||
context_window=32000,
|
context_window=128000,
|
||||||
handle="anthropic/claude-3-7-sonnet-latest",
|
handle="anthropic/claude-3-7-sonnet-latest",
|
||||||
put_inner_thoughts_in_kwargs=True,
|
put_inner_thoughts_in_kwargs=True,
|
||||||
max_tokens=4096,
|
max_tokens=4096,
|
||||||
|
|||||||
@@ -62,12 +62,14 @@ class TestConversationsSDK:
|
|||||||
# Create a conversation
|
# Create a conversation
|
||||||
created = client.conversations.create(agent_id=agent.id)
|
created = client.conversations.create(agent_id=agent.id)
|
||||||
|
|
||||||
# Retrieve it (should have empty in_context_message_ids initially)
|
# Retrieve it (should have system message from creation)
|
||||||
retrieved = client.conversations.retrieve(conversation_id=created.id)
|
retrieved = client.conversations.retrieve(conversation_id=created.id)
|
||||||
|
|
||||||
assert retrieved.id == created.id
|
assert retrieved.id == created.id
|
||||||
assert retrieved.agent_id == created.agent_id
|
assert retrieved.agent_id == created.agent_id
|
||||||
assert retrieved.in_context_message_ids == []
|
# Conversation should have 1 system message immediately after creation
|
||||||
|
assert len(retrieved.in_context_message_ids) == 1
|
||||||
|
assert retrieved.in_context_message_ids[0].startswith("message-")
|
||||||
|
|
||||||
# Send a message to the conversation
|
# Send a message to the conversation
|
||||||
list(
|
list(
|
||||||
@@ -566,6 +568,289 @@ class TestConversationsSDK:
|
|||||||
# Should not contain the cursor message
|
# Should not contain the cursor message
|
||||||
assert first_message_id not in [m.id for m in messages_after]
|
assert first_message_id not in [m.id for m in messages_after]
|
||||||
|
|
||||||
|
def test_agent_direct_messaging_via_conversations_endpoint(self, client: Letta, agent):
|
||||||
|
"""Test sending messages using agent ID as conversation_id (agent-direct mode).
|
||||||
|
|
||||||
|
This allows clients to use a unified endpoint pattern without managing conversation IDs.
|
||||||
|
"""
|
||||||
|
# Send a message using the agent ID directly as conversation_id
|
||||||
|
# This should route to agent-direct mode with locking
|
||||||
|
messages = list(
|
||||||
|
client.conversations.messages.create(
|
||||||
|
conversation_id=agent.id, # Using agent ID instead of conversation ID
|
||||||
|
messages=[{"role": "user", "content": "Hello via agent-direct mode!"}],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify we got a response
|
||||||
|
assert len(messages) > 0, "Should receive response messages"
|
||||||
|
|
||||||
|
# Verify we got an assistant message in the response
|
||||||
|
assistant_messages = [m for m in messages if hasattr(m, "message_type") and m.message_type == "assistant_message"]
|
||||||
|
assert len(assistant_messages) > 0, "Should receive at least one assistant message"
|
||||||
|
|
||||||
|
def test_agent_direct_messaging_with_locking(self, client: Letta, agent):
|
||||||
|
"""Test that agent-direct mode properly acquires and releases locks.
|
||||||
|
|
||||||
|
Sequential requests should both succeed if locks are properly released.
|
||||||
|
"""
|
||||||
|
from letta.settings import settings
|
||||||
|
|
||||||
|
# Skip if Redis is not configured
|
||||||
|
if settings.redis_host is None or settings.redis_port is None:
|
||||||
|
pytest.skip("Redis not configured - skipping agent-direct lock test")
|
||||||
|
|
||||||
|
# Send first message via agent-direct mode
|
||||||
|
messages1 = list(
|
||||||
|
client.conversations.messages.create(
|
||||||
|
conversation_id=agent.id,
|
||||||
|
messages=[{"role": "user", "content": "First message"}],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert len(messages1) > 0, "First message should succeed"
|
||||||
|
|
||||||
|
# Send second message - should succeed if lock was released
|
||||||
|
messages2 = list(
|
||||||
|
client.conversations.messages.create(
|
||||||
|
conversation_id=agent.id,
|
||||||
|
messages=[{"role": "user", "content": "Second message"}],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert len(messages2) > 0, "Second message should succeed after lock released"
|
||||||
|
|
||||||
|
def test_agent_direct_concurrent_requests_blocked(self, client: Letta, agent):
|
||||||
|
"""Test that concurrent requests to agent-direct mode are properly serialized.
|
||||||
|
|
||||||
|
One request should succeed and one should get a 409 CONVERSATION_BUSY error.
|
||||||
|
"""
|
||||||
|
import concurrent.futures
|
||||||
|
|
||||||
|
from letta_client import ConflictError
|
||||||
|
|
||||||
|
from letta.settings import settings
|
||||||
|
|
||||||
|
# Skip if Redis is not configured
|
||||||
|
if settings.redis_host is None or settings.redis_port is None:
|
||||||
|
pytest.skip("Redis not configured - skipping agent-direct lock test")
|
||||||
|
|
||||||
|
results = {"success": 0, "conflict": 0, "other_error": 0}
|
||||||
|
|
||||||
|
def send_message(msg: str):
|
||||||
|
try:
|
||||||
|
messages = list(
|
||||||
|
client.conversations.messages.create(
|
||||||
|
conversation_id=agent.id, # Agent-direct mode
|
||||||
|
messages=[{"role": "user", "content": msg}],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return ("success", messages)
|
||||||
|
except ConflictError:
|
||||||
|
return ("conflict", None)
|
||||||
|
except Exception as e:
|
||||||
|
return ("other_error", str(e))
|
||||||
|
|
||||||
|
# Fire off two messages concurrently
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||||
|
future1 = executor.submit(send_message, "Concurrent message 1")
|
||||||
|
future2 = executor.submit(send_message, "Concurrent message 2")
|
||||||
|
|
||||||
|
result1 = future1.result()
|
||||||
|
result2 = future2.result()
|
||||||
|
|
||||||
|
# Count results
|
||||||
|
for result_type, _ in [result1, result2]:
|
||||||
|
results[result_type] += 1
|
||||||
|
|
||||||
|
# One should succeed and one should get conflict
|
||||||
|
assert results["success"] == 1, f"Expected 1 success, got {results['success']}"
|
||||||
|
assert results["conflict"] == 1, f"Expected 1 conflict, got {results['conflict']}"
|
||||||
|
assert results["other_error"] == 0, f"Unexpected errors: {results['other_error']}"
|
||||||
|
|
||||||
|
# Now send another message - should succeed since lock is released
|
||||||
|
messages = list(
|
||||||
|
client.conversations.messages.create(
|
||||||
|
conversation_id=agent.id,
|
||||||
|
messages=[{"role": "user", "content": "Message after concurrent requests"}],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert len(messages) > 0, "Should be able to send message after concurrent requests complete"
|
||||||
|
|
||||||
|
def test_agent_direct_list_messages(self, client: Letta, agent):
|
||||||
|
"""Test listing messages using agent ID as conversation_id."""
|
||||||
|
# First send a message via agent-direct mode
|
||||||
|
list(
|
||||||
|
client.conversations.messages.create(
|
||||||
|
conversation_id=agent.id,
|
||||||
|
messages=[{"role": "user", "content": "Test message for listing"}],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# List messages using agent ID
|
||||||
|
messages_page = client.conversations.messages.list(conversation_id=agent.id)
|
||||||
|
messages = list(messages_page)
|
||||||
|
|
||||||
|
# Should have messages (at least system + user + assistant)
|
||||||
|
assert len(messages) >= 3, f"Expected at least 3 messages, got {len(messages)}"
|
||||||
|
|
||||||
|
# Verify we can find our test message
|
||||||
|
user_messages = [m for m in messages if hasattr(m, "message_type") and m.message_type == "user_message"]
|
||||||
|
assert any("Test message for listing" in str(m.content) for m in user_messages), "Should find our test message"
|
||||||
|
|
||||||
|
def test_agent_direct_cancel(self, client: Letta, agent):
|
||||||
|
"""Test canceling runs using agent ID as conversation_id."""
|
||||||
|
from letta.settings import settings
|
||||||
|
|
||||||
|
# Skip if run tracking is disabled
|
||||||
|
if not settings.track_agent_run:
|
||||||
|
pytest.skip("Run tracking disabled - skipping cancel test")
|
||||||
|
|
||||||
|
# Start a background request that we can cancel
|
||||||
|
try:
|
||||||
|
# Send a message in background mode
|
||||||
|
stream = client.conversations.messages.create(
|
||||||
|
conversation_id=agent.id,
|
||||||
|
messages=[{"role": "user", "content": "Background message to cancel"}],
|
||||||
|
background=True,
|
||||||
|
)
|
||||||
|
# Consume a bit of the stream to ensure it started
|
||||||
|
next(iter(stream), None)
|
||||||
|
|
||||||
|
# Cancel using agent ID
|
||||||
|
result = client.conversations.cancel(conversation_id=agent.id)
|
||||||
|
|
||||||
|
# Should return results (may be empty if run already completed)
|
||||||
|
assert isinstance(result, dict), "Cancel should return a dict of results"
|
||||||
|
except Exception as e:
|
||||||
|
# If no active runs, that's okay - the run may have completed quickly
|
||||||
|
if "No active runs" not in str(e):
|
||||||
|
raise
|
||||||
|
|
||||||
|
def test_backwards_compatibility_old_pattern(self, client: Letta, agent, server_url: str):
|
||||||
|
"""Test that the old pattern (agent_id as conversation_id) still works for backwards compatibility."""
|
||||||
|
# OLD PATTERN: conversation_id=agent.id (should still work)
|
||||||
|
# Use raw HTTP requests since SDK might not be up to date
|
||||||
|
|
||||||
|
# Test 1: Send message using old pattern
|
||||||
|
response = requests.post(
|
||||||
|
f"{server_url}/v1/conversations/{agent.id}/messages",
|
||||||
|
json={
|
||||||
|
"messages": [{"role": "user", "content": "Testing old pattern still works"}],
|
||||||
|
"streaming": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Old pattern should work for sending messages: {response.text}"
|
||||||
|
data = response.json()
|
||||||
|
assert "messages" in data, "Response should contain messages"
|
||||||
|
assert len(data["messages"]) > 0, "Should receive response messages"
|
||||||
|
|
||||||
|
# Test 2: List messages using old pattern
|
||||||
|
response = requests.get(f"{server_url}/v1/conversations/{agent.id}/messages")
|
||||||
|
assert response.status_code == 200, f"Old pattern should work for listing messages: {response.text}"
|
||||||
|
data = response.json()
|
||||||
|
# Response is a list of messages directly
|
||||||
|
assert isinstance(data, list), "Response should be a list of messages"
|
||||||
|
assert len(data) >= 3, "Should have at least system + user + assistant messages"
|
||||||
|
|
||||||
|
# Verify our message is there
|
||||||
|
user_messages = [m for m in data if m.get("message_type") == "user_message"]
|
||||||
|
assert any("Testing old pattern still works" in str(m.get("content", "")) for m in user_messages), "Should find our test message"
|
||||||
|
|
||||||
|
def test_new_pattern_send_message(self, client: Letta, agent, server_url: str):
|
||||||
|
"""Test sending messages using the new pattern: conversation_id='default' + agent_id in body."""
|
||||||
|
# NEW PATTERN: conversation_id='default' + agent_id in request body
|
||||||
|
response = requests.post(
|
||||||
|
f"{server_url}/v1/conversations/default/messages",
|
||||||
|
json={
|
||||||
|
"agent_id": agent.id,
|
||||||
|
"messages": [{"role": "user", "content": "Testing new pattern send message"}],
|
||||||
|
"streaming": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"New pattern should work for sending messages: {response.text}"
|
||||||
|
data = response.json()
|
||||||
|
assert "messages" in data, "Response should contain messages"
|
||||||
|
assert len(data["messages"]) > 0, "Should receive response messages"
|
||||||
|
|
||||||
|
# Verify we got an assistant message
|
||||||
|
assistant_messages = [m for m in data["messages"] if m.get("message_type") == "assistant_message"]
|
||||||
|
assert len(assistant_messages) > 0, "Should receive at least one assistant message"
|
||||||
|
|
||||||
|
def test_new_pattern_list_messages(self, client: Letta, agent, server_url: str):
|
||||||
|
"""Test listing messages using the new pattern: conversation_id='default' + agent_id query param."""
|
||||||
|
# First send a message to populate the conversation
|
||||||
|
requests.post(
|
||||||
|
f"{server_url}/v1/conversations/{agent.id}/messages",
|
||||||
|
json={
|
||||||
|
"messages": [{"role": "user", "content": "Setup message for list test"}],
|
||||||
|
"streaming": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# NEW PATTERN: conversation_id='default' + agent_id as query param
|
||||||
|
response = requests.get(
|
||||||
|
f"{server_url}/v1/conversations/default/messages",
|
||||||
|
params={"agent_id": agent.id},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"New pattern should work for listing messages: {response.text}"
|
||||||
|
data = response.json()
|
||||||
|
# Response is a list of messages directly
|
||||||
|
assert isinstance(data, list), "Response should be a list of messages"
|
||||||
|
assert len(data) >= 3, "Should have at least system + user + assistant messages"
|
||||||
|
|
||||||
|
def test_new_pattern_cancel(self, client: Letta, agent, server_url: str):
|
||||||
|
"""Test canceling runs using the new pattern: conversation_id='default' + agent_id query param."""
|
||||||
|
from letta.settings import settings
|
||||||
|
|
||||||
|
if not settings.track_agent_run:
|
||||||
|
pytest.skip("Run tracking disabled - skipping cancel test")
|
||||||
|
|
||||||
|
# NEW PATTERN: conversation_id='default' + agent_id as query param
|
||||||
|
response = requests.post(
|
||||||
|
f"{server_url}/v1/conversations/default/cancel",
|
||||||
|
params={"agent_id": agent.id},
|
||||||
|
)
|
||||||
|
# Returns 200 with results if runs exist, or 409 if no active runs
|
||||||
|
assert response.status_code in [200, 409], f"New pattern should work for cancel: {response.text}"
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
assert isinstance(data, dict), "Cancel should return a dict"
|
||||||
|
|
||||||
|
def test_new_pattern_compact(self, client: Letta, agent, server_url: str):
|
||||||
|
"""Test compacting conversation using the new pattern: conversation_id='default' + agent_id in body."""
|
||||||
|
# Send many messages to have enough for compaction
|
||||||
|
for i in range(10):
|
||||||
|
requests.post(
|
||||||
|
f"{server_url}/v1/conversations/{agent.id}/messages",
|
||||||
|
json={
|
||||||
|
"messages": [{"role": "user", "content": f"Message {i} for compaction test"}],
|
||||||
|
"streaming": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# NEW PATTERN: conversation_id='default' + agent_id in request body
|
||||||
|
response = requests.post(
|
||||||
|
f"{server_url}/v1/conversations/default/compact",
|
||||||
|
json={"agent_id": agent.id},
|
||||||
|
)
|
||||||
|
# May return 200 (success) or 400 (not enough messages to compact)
|
||||||
|
assert response.status_code in [200, 400], f"New pattern should accept agent_id parameter: {response.text}"
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
assert "summary" in data, "Response should contain summary"
|
||||||
|
assert "num_messages_before" in data, "Response should contain num_messages_before"
|
||||||
|
assert "num_messages_after" in data, "Response should contain num_messages_after"
|
||||||
|
|
||||||
|
def test_new_pattern_stream_retrieve(self, client: Letta, agent, server_url: str):
|
||||||
|
"""Test retrieving stream using the new pattern: conversation_id='default' + agent_id in body."""
|
||||||
|
# NEW PATTERN: conversation_id='default' + agent_id in request body
|
||||||
|
# Note: This will likely return 400 if no active run exists, which is expected
|
||||||
|
response = requests.post(
|
||||||
|
f"{server_url}/v1/conversations/default/stream",
|
||||||
|
json={"agent_id": agent.id},
|
||||||
|
)
|
||||||
|
# Either 200 (if run exists) or 400 (no active run) are both acceptable
|
||||||
|
assert response.status_code in [200, 400], f"Stream retrieve should accept new pattern: {response.text}"
|
||||||
|
|
||||||
|
|
||||||
class TestConversationDelete:
|
class TestConversationDelete:
|
||||||
"""Tests for the conversation delete endpoint."""
|
"""Tests for the conversation delete endpoint."""
|
||||||
@@ -834,3 +1119,130 @@ class TestConversationCompact:
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert response.status_code == 404
|
assert response.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
class TestConversationSystemMessageRecompilation:
|
||||||
|
"""Tests that verify the system message is recompiled with latest memory state on new conversation creation."""
|
||||||
|
|
||||||
|
def test_new_conversation_recompiles_system_message_with_updated_memory(self, client: Letta, server_url: str):
|
||||||
|
"""Test the full workflow:
|
||||||
|
1. Agent is created
|
||||||
|
2. Send message to agent (through a conversation)
|
||||||
|
3. Modify the memory block -> check system message is NOT updated with the modified value
|
||||||
|
4. Create a new conversation
|
||||||
|
5. Check new conversation system message DOES have the modified value
|
||||||
|
"""
|
||||||
|
unique_marker = f"UNIQUE_MARKER_{uuid.uuid4().hex[:8]}"
|
||||||
|
|
||||||
|
# Step 1: Create an agent with known memory blocks
|
||||||
|
agent = client.agents.create(
|
||||||
|
name=f"test_sys_msg_recompile_{uuid.uuid4().hex[:8]}",
|
||||||
|
model="openai/gpt-4o-mini",
|
||||||
|
embedding="openai/text-embedding-3-small",
|
||||||
|
memory_blocks=[
|
||||||
|
{"label": "human", "value": "The user is a test user."},
|
||||||
|
{"label": "persona", "value": "You are a helpful assistant."},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 2: Create a conversation and send a message to it
|
||||||
|
conv1 = client.conversations.create(agent_id=agent.id)
|
||||||
|
|
||||||
|
list(
|
||||||
|
client.conversations.messages.create(
|
||||||
|
conversation_id=conv1.id,
|
||||||
|
messages=[{"role": "user", "content": "Hello, just a quick test."}],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the conversation has messages including a system message
|
||||||
|
conv1_messages = client.conversations.messages.list(
|
||||||
|
conversation_id=conv1.id,
|
||||||
|
order="asc",
|
||||||
|
)
|
||||||
|
assert len(conv1_messages) >= 3 # system + user + assistant
|
||||||
|
assert conv1_messages[0].message_type == "system_message"
|
||||||
|
|
||||||
|
# Get the original system message content
|
||||||
|
original_system_content = conv1_messages[0].content
|
||||||
|
assert unique_marker not in original_system_content, "Marker should not be in original system message"
|
||||||
|
|
||||||
|
# Step 3: Modify the memory block with a unique marker
|
||||||
|
client.agents.blocks.update(
|
||||||
|
agent_id=agent.id,
|
||||||
|
block_label="human",
|
||||||
|
value=f"The user is a test user. {unique_marker}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the block was actually updated
|
||||||
|
updated_block = client.agents.blocks.retrieve(agent_id=agent.id, block_label="human")
|
||||||
|
assert unique_marker in updated_block.value
|
||||||
|
|
||||||
|
# Check that the OLD conversation's system message is NOT updated
|
||||||
|
conv1_messages_after_update = client.conversations.messages.list(
|
||||||
|
conversation_id=conv1.id,
|
||||||
|
order="asc",
|
||||||
|
)
|
||||||
|
old_system_content = conv1_messages_after_update[0].content
|
||||||
|
assert unique_marker not in old_system_content, "Old conversation system message should NOT contain the updated memory value"
|
||||||
|
|
||||||
|
# Step 4: Create a new conversation
|
||||||
|
conv2 = client.conversations.create(agent_id=agent.id)
|
||||||
|
|
||||||
|
# Step 5: Check the new conversation's system message has the updated value
|
||||||
|
# The system message should be compiled at creation time with the latest memory
|
||||||
|
conv2_retrieved = client.conversations.retrieve(conversation_id=conv2.id)
|
||||||
|
assert len(conv2_retrieved.in_context_message_ids) == 1, (
|
||||||
|
f"New conversation should have exactly 1 system message, got {len(conv2_retrieved.in_context_message_ids)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
conv2_messages = client.conversations.messages.list(
|
||||||
|
conversation_id=conv2.id,
|
||||||
|
order="asc",
|
||||||
|
)
|
||||||
|
assert len(conv2_messages) >= 1
|
||||||
|
assert conv2_messages[0].message_type == "system_message"
|
||||||
|
|
||||||
|
new_system_content = conv2_messages[0].content
|
||||||
|
assert unique_marker in new_system_content, (
|
||||||
|
f"New conversation system message should contain the updated memory value '{unique_marker}', "
|
||||||
|
f"but system message content did not include it"
|
||||||
|
)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
client.agents.delete(agent_id=agent.id)
|
||||||
|
|
||||||
|
def test_conversation_creation_initializes_system_message(self, client: Letta, server_url: str):
|
||||||
|
"""Test that creating a conversation immediately initializes it with a system message."""
|
||||||
|
agent = client.agents.create(
|
||||||
|
name=f"test_conv_init_{uuid.uuid4().hex[:8]}",
|
||||||
|
model="openai/gpt-4o-mini",
|
||||||
|
embedding="openai/text-embedding-3-small",
|
||||||
|
memory_blocks=[
|
||||||
|
{"label": "human", "value": "Test user for system message init."},
|
||||||
|
{"label": "persona", "value": "You are a helpful assistant."},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create a conversation (without sending any messages)
|
||||||
|
conversation = client.conversations.create(agent_id=agent.id)
|
||||||
|
|
||||||
|
# Verify the conversation has a system message immediately
|
||||||
|
retrieved = client.conversations.retrieve(conversation_id=conversation.id)
|
||||||
|
assert len(retrieved.in_context_message_ids) == 1, (
|
||||||
|
f"Expected 1 system message after conversation creation, got {len(retrieved.in_context_message_ids)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the system message content contains memory block values
|
||||||
|
messages = client.conversations.messages.list(
|
||||||
|
conversation_id=conversation.id,
|
||||||
|
order="asc",
|
||||||
|
)
|
||||||
|
assert len(messages) == 1
|
||||||
|
assert messages[0].message_type == "system_message"
|
||||||
|
assert "Test user for system message init." in messages[0].content
|
||||||
|
|
||||||
|
finally:
|
||||||
|
client.agents.delete(agent_id=agent.id)
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ def agent_obj(client: Letta) -> AgentState:
|
|||||||
tool_ids=[send_message_to_agent_tool.id],
|
tool_ids=[send_message_to_agent_tool.id],
|
||||||
model="openai/gpt-4o",
|
model="openai/gpt-4o",
|
||||||
embedding="openai/text-embedding-3-small",
|
embedding="openai/text-embedding-3-small",
|
||||||
context_window_limit=32000,
|
context_window_limit=128000,
|
||||||
)
|
)
|
||||||
yield agent_state_instance
|
yield agent_state_instance
|
||||||
|
|
||||||
@@ -107,7 +107,7 @@ def other_agent_obj(client: Letta) -> AgentState:
|
|||||||
include_multi_agent_tools=False,
|
include_multi_agent_tools=False,
|
||||||
model="openai/gpt-4o",
|
model="openai/gpt-4o",
|
||||||
embedding="openai/text-embedding-3-small",
|
embedding="openai/text-embedding-3-small",
|
||||||
context_window_limit=32000,
|
context_window_limit=128000,
|
||||||
)
|
)
|
||||||
|
|
||||||
yield agent_state_instance
|
yield agent_state_instance
|
||||||
|
|||||||
@@ -366,6 +366,8 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat
|
|||||||
async def test_create_agent_sets_default_compaction_model_anthropic(server: SyncServer, default_user):
|
async def test_create_agent_sets_default_compaction_model_anthropic(server: SyncServer, default_user):
|
||||||
"""When no compaction_settings provided for Anthropic agent, default haiku model should be set."""
|
"""When no compaction_settings provided for Anthropic agent, default haiku model should be set."""
|
||||||
from letta.schemas.agent import CreateAgent
|
from letta.schemas.agent import CreateAgent
|
||||||
|
from letta.schemas.enums import ProviderType
|
||||||
|
from letta.services.summarizer.summarizer_config import get_default_summarizer_model
|
||||||
|
|
||||||
await server.init_async(init_with_default_org_and_user=True)
|
await server.init_async(init_with_default_org_and_user=True)
|
||||||
|
|
||||||
@@ -384,7 +386,7 @@ async def test_create_agent_sets_default_compaction_model_anthropic(server: Sync
|
|||||||
|
|
||||||
# Should have default haiku model set
|
# Should have default haiku model set
|
||||||
assert agent.compaction_settings is not None
|
assert agent.compaction_settings is not None
|
||||||
assert agent.compaction_settings.model == "anthropic/claude-haiku-4-5-20251001"
|
assert agent.compaction_settings.model == get_default_summarizer_model(ProviderType.anthropic)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -808,6 +810,79 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv
|
|||||||
assert updated_agent.compaction_settings.prompt_acknowledgement == False
|
assert updated_agent.compaction_settings.prompt_acknowledgement == False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_update_agent_partial_compaction_settings(server: SyncServer, comprehensive_test_agent_fixture, default_user):
|
||||||
|
"""Test that an agent's compaction_settings can be upserted."""
|
||||||
|
from letta.services.summarizer.summarizer_config import get_default_prompt_for_mode
|
||||||
|
|
||||||
|
agent, _ = comprehensive_test_agent_fixture
|
||||||
|
|
||||||
|
# Create new compaction settings
|
||||||
|
original_compaction_settings = agent.compaction_settings.model_copy()
|
||||||
|
|
||||||
|
new_compaction_settings = CompactionSettings(
|
||||||
|
mode="all",
|
||||||
|
prompt_acknowledgement=True,
|
||||||
|
clip_chars=3000,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update agent with compaction settings
|
||||||
|
update_agent_request = UpdateAgent(
|
||||||
|
compaction_settings=new_compaction_settings,
|
||||||
|
)
|
||||||
|
|
||||||
|
updated_agent = await server.agent_manager.update_agent_async(agent.id, update_agent_request, actor=default_user)
|
||||||
|
|
||||||
|
# Verify compaction settings were updated correctly
|
||||||
|
assert updated_agent.compaction_settings is not None
|
||||||
|
assert updated_agent.compaction_settings.model == original_compaction_settings.model
|
||||||
|
assert updated_agent.compaction_settings.model_settings == original_compaction_settings.model_settings
|
||||||
|
assert updated_agent.compaction_settings.sliding_window_percentage == original_compaction_settings.sliding_window_percentage
|
||||||
|
assert updated_agent.compaction_settings.mode == "all"
|
||||||
|
assert updated_agent.compaction_settings.clip_chars == 3000
|
||||||
|
assert updated_agent.compaction_settings.prompt == get_default_prompt_for_mode("all")
|
||||||
|
assert updated_agent.compaction_settings.prompt_acknowledgement == True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_update_agent_partial_compaction_settings_same_mode(server: SyncServer, comprehensive_test_agent_fixture, default_user):
|
||||||
|
"""Test that if the mode stays the same without a prompt passed in, the prompt is not updated."""
|
||||||
|
|
||||||
|
agent, _ = comprehensive_test_agent_fixture
|
||||||
|
|
||||||
|
update_agent_request = UpdateAgent(
|
||||||
|
compaction_settings=CompactionSettings(mode="sliding_window", prompt="This is a fake prompt."),
|
||||||
|
)
|
||||||
|
updated_agent = await server.agent_manager.update_agent_async(agent.id, update_agent_request, actor=default_user)
|
||||||
|
|
||||||
|
assert updated_agent.compaction_settings is not None
|
||||||
|
assert updated_agent.compaction_settings.prompt == "This is a fake prompt."
|
||||||
|
|
||||||
|
# Create new compaction settings
|
||||||
|
original_compaction_settings = updated_agent.compaction_settings.model_copy()
|
||||||
|
|
||||||
|
new_compaction_settings = CompactionSettings(
|
||||||
|
mode="sliding_window",
|
||||||
|
model="openai/gpt-4o-mini",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update agent with compaction settings
|
||||||
|
update_agent_request = UpdateAgent(
|
||||||
|
compaction_settings=new_compaction_settings,
|
||||||
|
)
|
||||||
|
|
||||||
|
final_agent = await server.agent_manager.update_agent_async(updated_agent.id, update_agent_request, actor=default_user)
|
||||||
|
|
||||||
|
# Verify compaction settings were updated correctly
|
||||||
|
assert final_agent.compaction_settings is not None
|
||||||
|
assert final_agent.compaction_settings.sliding_window_percentage == original_compaction_settings.sliding_window_percentage
|
||||||
|
assert final_agent.compaction_settings.prompt == original_compaction_settings.prompt
|
||||||
|
assert final_agent.compaction_settings.clip_chars == original_compaction_settings.clip_chars
|
||||||
|
assert final_agent.compaction_settings.prompt_acknowledgement == original_compaction_settings.prompt_acknowledgement
|
||||||
|
assert final_agent.compaction_settings.mode == "sliding_window"
|
||||||
|
assert final_agent.compaction_settings.model == "openai/gpt-4o-mini"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_agent_file_defaults_based_on_context_window(server: SyncServer, default_user, default_block):
|
async def test_agent_file_defaults_based_on_context_window(server: SyncServer, default_user, default_block):
|
||||||
"""Test that file-related defaults are set based on the model's context window size"""
|
"""Test that file-related defaults are set based on the model's context window size"""
|
||||||
|
|||||||
@@ -562,7 +562,9 @@ async def test_update_block(server: SyncServer, default_user):
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_update_block_limit(server: SyncServer, default_user):
|
async def test_update_block_limit(server: SyncServer, default_user):
|
||||||
block_manager = BlockManager()
|
block_manager = BlockManager()
|
||||||
block = await block_manager.create_or_update_block_async(PydanticBlock(label="persona", value="Original Content"), actor=default_user)
|
block = await block_manager.create_or_update_block_async(
|
||||||
|
PydanticBlock(label="persona", value="Original Content", limit=20000), actor=default_user
|
||||||
|
)
|
||||||
|
|
||||||
limit = len("Updated Content") * 2000
|
limit = len("Updated Content") * 2000
|
||||||
update_data = BlockUpdate(value="Updated Content" * 2000, description="Updated description")
|
update_data = BlockUpdate(value="Updated Content" * 2000, description="Updated description")
|
||||||
|
|||||||
@@ -355,8 +355,9 @@ async def test_add_messages_to_conversation(
|
|||||||
actor=default_user,
|
actor=default_user,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(message_ids) == 1
|
# create_conversation auto-creates a system message at position 0
|
||||||
assert message_ids[0] == hello_world_message_fixture.id
|
assert len(message_ids) == 2
|
||||||
|
assert hello_world_message_fixture.id in message_ids
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -385,8 +386,9 @@ async def test_get_messages_for_conversation(
|
|||||||
actor=default_user,
|
actor=default_user,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(messages) == 1
|
# create_conversation auto-creates a system message at position 0
|
||||||
assert messages[0].id == hello_world_message_fixture.id
|
assert len(messages) == 2
|
||||||
|
assert any(m.id == hello_world_message_fixture.id for m in messages)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -430,7 +432,10 @@ async def test_message_ordering_in_conversation(conversation_manager, server: Sy
|
|||||||
actor=default_user,
|
actor=default_user,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert retrieved_ids == [m.id for m in messages]
|
# create_conversation auto-creates a system message at position 0,
|
||||||
|
# so the user messages start at index 1
|
||||||
|
assert len(retrieved_ids) == len(messages) + 1
|
||||||
|
assert retrieved_ids[1:] == [m.id for m in messages]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -489,7 +494,7 @@ async def test_update_in_context_messages(conversation_manager, server: SyncServ
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_empty_conversation_message_ids(conversation_manager, server: SyncServer, sarah_agent, default_user):
|
async def test_empty_conversation_message_ids(conversation_manager, server: SyncServer, sarah_agent, default_user):
|
||||||
"""Test getting message IDs from an empty conversation."""
|
"""Test getting message IDs from a newly created conversation (has auto-created system message)."""
|
||||||
# Create a conversation
|
# Create a conversation
|
||||||
conversation = await conversation_manager.create_conversation(
|
conversation = await conversation_manager.create_conversation(
|
||||||
agent_id=sarah_agent.id,
|
agent_id=sarah_agent.id,
|
||||||
@@ -497,13 +502,14 @@ async def test_empty_conversation_message_ids(conversation_manager, server: Sync
|
|||||||
actor=default_user,
|
actor=default_user,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get message IDs (should be empty)
|
# create_conversation auto-creates a system message at position 0,
|
||||||
|
# so a newly created conversation has exactly one message
|
||||||
message_ids = await conversation_manager.get_message_ids_for_conversation(
|
message_ids = await conversation_manager.get_message_ids_for_conversation(
|
||||||
conversation_id=conversation.id,
|
conversation_id=conversation.id,
|
||||||
actor=default_user,
|
actor=default_user,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert message_ids == []
|
assert len(message_ids) == 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -551,9 +557,11 @@ async def test_list_conversation_messages(conversation_manager, server: SyncServ
|
|||||||
actor=default_user,
|
actor=default_user,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(letta_messages) == 2
|
# create_conversation auto-creates a system message, so we get 3 total
|
||||||
|
assert len(letta_messages) == 3
|
||||||
# Check message types
|
# Check message types
|
||||||
message_types = [m.message_type for m in letta_messages]
|
message_types = [m.message_type for m in letta_messages]
|
||||||
|
assert "system_message" in message_types
|
||||||
assert "user_message" in message_types
|
assert "user_message" in message_types
|
||||||
assert "assistant_message" in message_types
|
assert "assistant_message" in message_types
|
||||||
|
|
||||||
@@ -902,9 +910,12 @@ async def test_list_conversation_messages_ascending_order(conversation_manager,
|
|||||||
reverse=False,
|
reverse=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
# First message should be "Message 0" (oldest)
|
# create_conversation auto-creates a system message at position 0,
|
||||||
assert len(letta_messages) == 3
|
# so we get 4 messages total (system + 3 user messages)
|
||||||
assert "Message 0" in letta_messages[0].content
|
assert len(letta_messages) == 4
|
||||||
|
# First message is the auto-created system message; "Message 0" is second
|
||||||
|
assert letta_messages[0].message_type == "system_message"
|
||||||
|
assert "Message 0" in letta_messages[1].content
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -949,8 +960,9 @@ async def test_list_conversation_messages_descending_order(conversation_manager,
|
|||||||
reverse=True,
|
reverse=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# First message should be "Message 2" (newest)
|
# create_conversation auto-creates a system message, so 4 total
|
||||||
assert len(letta_messages) == 3
|
# First message should be "Message 2" (newest) in descending order
|
||||||
|
assert len(letta_messages) == 4
|
||||||
assert "Message 2" in letta_messages[0].content
|
assert "Message 2" in letta_messages[0].content
|
||||||
|
|
||||||
|
|
||||||
@@ -1081,7 +1093,8 @@ async def test_list_conversation_messages_no_group_id_returns_all(conversation_m
|
|||||||
actor=default_user,
|
actor=default_user,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(all_messages) == 3
|
# create_conversation auto-creates a system message, so 4 total
|
||||||
|
assert len(all_messages) == 4
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -1137,8 +1150,8 @@ async def test_list_conversation_messages_order_with_pagination(conversation_man
|
|||||||
|
|
||||||
# The first messages should be different
|
# The first messages should be different
|
||||||
assert page_asc[0].content != page_desc[0].content
|
assert page_asc[0].content != page_desc[0].content
|
||||||
# In ascending, first should be "Message 0"
|
# In ascending, first is the auto-created system message, second is "Message 0"
|
||||||
assert "Message 0" in page_asc[0].content
|
assert page_asc[0].message_type == "system_message"
|
||||||
# In descending, first should be "Message 4"
|
# In descending, first should be "Message 4"
|
||||||
assert "Message 4" in page_desc[0].content
|
assert "Message 4" in page_desc[0].content
|
||||||
|
|
||||||
|
|||||||
@@ -579,8 +579,11 @@ async def test_server_startup_syncs_base_providers(default_user, default_organiz
|
|||||||
yield item
|
yield item
|
||||||
|
|
||||||
# Mock the Anthropic AsyncAnthropic client
|
# Mock the Anthropic AsyncAnthropic client
|
||||||
|
# NOTE: list() must be a regular (non-async) method that returns an async iterable,
|
||||||
|
# because the real Anthropic SDK's models.list() returns an AsyncPage (which has __aiter__)
|
||||||
|
# directly, and the code uses `async for model in client.models.list()`.
|
||||||
class MockAnthropicModels:
|
class MockAnthropicModels:
|
||||||
async def list(self):
|
def list(self):
|
||||||
return MockAnthropicAsyncPage(mock_anthropic_models["data"])
|
return MockAnthropicAsyncPage(mock_anthropic_models["data"])
|
||||||
|
|
||||||
class MockAsyncAnthropic:
|
class MockAsyncAnthropic:
|
||||||
@@ -877,8 +880,10 @@ async def test_server_startup_handles_api_errors_gracefully(default_user, defaul
|
|||||||
for item in self._items:
|
for item in self._items:
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
|
# NOTE: The real SDK's models.list() is a regular (non-async) method that
|
||||||
|
# returns an AsyncPaginator (which is async-iterable).
|
||||||
class MockAnthropicModels:
|
class MockAnthropicModels:
|
||||||
async def list(self):
|
def list(self):
|
||||||
return MockAnthropicAsyncPage(mock_anthropic_data)
|
return MockAnthropicAsyncPage(mock_anthropic_data)
|
||||||
|
|
||||||
class MockAsyncAnthropic:
|
class MockAsyncAnthropic:
|
||||||
|
|||||||
11
tests/model_settings/openai-gpt-5.3-chat-latest.json
Normal file
11
tests/model_settings/openai-gpt-5.3-chat-latest.json
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"handle": "openai/gpt-5.3-chat-latest",
|
||||||
|
"model_settings": {
|
||||||
|
"provider_type": "openai",
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"parallel_tool_calls": false,
|
||||||
|
"reasoning": {
|
||||||
|
"reasoning_effort": "minimal"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,11 +1,11 @@
|
|||||||
from conftest import create_test_module
|
from conftest import create_test_module
|
||||||
from letta_client import UnprocessableEntityError
|
from letta_client import UnprocessableEntityError
|
||||||
|
|
||||||
from letta.constants import CORE_MEMORY_HUMAN_CHAR_LIMIT, CORE_MEMORY_PERSONA_CHAR_LIMIT
|
from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
|
||||||
|
|
||||||
BLOCKS_CREATE_PARAMS = [
|
BLOCKS_CREATE_PARAMS = [
|
||||||
("human_block", {"label": "human", "value": "test"}, {"limit": CORE_MEMORY_HUMAN_CHAR_LIMIT}, None),
|
("human_block", {"label": "human", "value": "test"}, {"limit": CORE_MEMORY_BLOCK_CHAR_LIMIT}, None),
|
||||||
("persona_block", {"label": "persona", "value": "test1"}, {"limit": CORE_MEMORY_PERSONA_CHAR_LIMIT}, None),
|
("persona_block", {"label": "persona", "value": "test1"}, {"limit": CORE_MEMORY_BLOCK_CHAR_LIMIT}, None),
|
||||||
]
|
]
|
||||||
|
|
||||||
BLOCKS_UPDATE_PARAMS = [
|
BLOCKS_UPDATE_PARAMS = [
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -44,7 +44,7 @@
|
|||||||
"provider_name": null,
|
"provider_name": null,
|
||||||
"provider_category": null,
|
"provider_category": null,
|
||||||
"model_wrapper": null,
|
"model_wrapper": null,
|
||||||
"context_window": 32000,
|
"context_window": 128000,
|
||||||
"put_inner_thoughts_in_kwargs": false,
|
"put_inner_thoughts_in_kwargs": false,
|
||||||
"handle": "anthropic/claude-3.5-sonnet",
|
"handle": "anthropic/claude-3.5-sonnet",
|
||||||
"temperature": 1.0,
|
"temperature": 1.0,
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -56,7 +56,7 @@
|
|||||||
"provider_name": "openai",
|
"provider_name": "openai",
|
||||||
"provider_category": "base",
|
"provider_category": "base",
|
||||||
"model_wrapper": null,
|
"model_wrapper": null,
|
||||||
"context_window": 32000,
|
"context_window": 128000,
|
||||||
"put_inner_thoughts_in_kwargs": true,
|
"put_inner_thoughts_in_kwargs": true,
|
||||||
"handle": "openai/gpt-4o-mini",
|
"handle": "openai/gpt-4o-mini",
|
||||||
"temperature": 1.0,
|
"temperature": 1.0,
|
||||||
|
|||||||
@@ -55,7 +55,7 @@
|
|||||||
"provider_name": "openai",
|
"provider_name": "openai",
|
||||||
"provider_category": "base",
|
"provider_category": "base",
|
||||||
"model_wrapper": null,
|
"model_wrapper": null,
|
||||||
"context_window": 32000,
|
"context_window": 128000,
|
||||||
"put_inner_thoughts_in_kwargs": true,
|
"put_inner_thoughts_in_kwargs": true,
|
||||||
"handle": "openai/gpt-4.1-mini",
|
"handle": "openai/gpt-4.1-mini",
|
||||||
"temperature": 1.0,
|
"temperature": 1.0,
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ def llm_config():
|
|||||||
model="claude-3-7-sonnet-20250219",
|
model="claude-3-7-sonnet-20250219",
|
||||||
model_endpoint_type="anthropic",
|
model_endpoint_type="anthropic",
|
||||||
model_endpoint="https://api.anthropic.com/v1",
|
model_endpoint="https://api.anthropic.com/v1",
|
||||||
context_window=32000,
|
context_window=128000,
|
||||||
handle="anthropic/claude-sonnet-4-20250514",
|
handle="anthropic/claude-sonnet-4-20250514",
|
||||||
put_inner_thoughts_in_kwargs=False,
|
put_inner_thoughts_in_kwargs=False,
|
||||||
max_tokens=4096,
|
max_tokens=4096,
|
||||||
|
|||||||
@@ -52,8 +52,17 @@ class TestLogContextMiddleware:
|
|||||||
async def get_files(self, agent_id, org_id, ref):
|
async def get_files(self, agent_id, org_id, ref):
|
||||||
assert ref == "HEAD"
|
assert ref == "HEAD"
|
||||||
return {
|
return {
|
||||||
"system/human.md": "---\ndescription: human\nlimit: 20000\n---\nname: sarah",
|
"system/human.md": "---\ndescription: human\n---\nname: sarah",
|
||||||
"system/persona.md": "---\ndescription: persona\nlimit: 20000\n---\nbe helpful",
|
"system/persona.md": "---\ndescription: persona\n---\nbe helpful",
|
||||||
|
"skills/research-helper/SKILL.md": (
|
||||||
|
"---\n"
|
||||||
|
"name: research-helper\n"
|
||||||
|
"description: Search the web and summarize findings.\n"
|
||||||
|
"---\n"
|
||||||
|
"# Research Helper\n\n"
|
||||||
|
"Use this skill to do deep web research and summarize results.\n"
|
||||||
|
),
|
||||||
|
"skills/research-helper/references/details.md": "---\ndescription: nested\n---\nShould not be synced",
|
||||||
}
|
}
|
||||||
|
|
||||||
class DummyMemoryRepoManager:
|
class DummyMemoryRepoManager:
|
||||||
@@ -95,6 +104,12 @@ class TestLogContextMiddleware:
|
|||||||
labels = {call["label"] for call in synced_calls}
|
labels = {call["label"] for call in synced_calls}
|
||||||
assert "system/human" in labels
|
assert "system/human" in labels
|
||||||
assert "system/persona" in labels
|
assert "system/persona" in labels
|
||||||
|
assert "skills/research-helper" in labels
|
||||||
|
assert "skills/research-helper/references/details" not in labels
|
||||||
|
|
||||||
|
by_label = {call["label"]: call for call in synced_calls}
|
||||||
|
assert by_label["skills/research-helper"]["description"] == "Search the web and summarize findings."
|
||||||
|
assert by_label["skills/research-helper"]["value"].startswith("# Research Helper")
|
||||||
|
|
||||||
def test_extracts_actor_id_from_headers(self, client):
|
def test_extracts_actor_id_from_headers(self, client):
|
||||||
response = client.get("/v1/agents/agent-123e4567-e89b-42d3-8456-426614174000", headers={"user_id": "user-abc123"})
|
response = client.get("/v1/agents/agent-123e4567-e89b-42d3-8456-426614174000", headers={"user_id": "user-abc123"})
|
||||||
|
|||||||
@@ -25,9 +25,9 @@ def test_chat_memory_init_and_utils(chat_memory: Memory):
|
|||||||
|
|
||||||
def test_memory_limit_validation(chat_memory: Memory):
|
def test_memory_limit_validation(chat_memory: Memory):
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
ChatMemory(persona="x " * 50000, human="y " * 50000)
|
ChatMemory(persona="x " * 60000, human="y " * 60000)
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
chat_memory.get_block("persona").value = "x " * 50000
|
chat_memory.get_block("persona").value = "x " * 60000
|
||||||
|
|
||||||
|
|
||||||
def test_get_block_not_found(chat_memory: Memory):
|
def test_get_block_not_found(chat_memory: Memory):
|
||||||
@@ -253,3 +253,104 @@ def test_compile_git_memory_filesystem_handles_leaf_directory_collisions():
|
|||||||
assert "system/" in out
|
assert "system/" in out
|
||||||
assert "system.md" in out
|
assert "system.md" in out
|
||||||
assert "human.md" in out
|
assert "human.md" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_compile_git_memory_filesystem_renders_descriptions_for_non_system_files():
|
||||||
|
"""Files outside system/ should render their description in the filesystem tree.
|
||||||
|
|
||||||
|
e.g. `reference/api.md (Contains API specifications)`
|
||||||
|
System files should NOT render descriptions in the tree.
|
||||||
|
"""
|
||||||
|
|
||||||
|
m = Memory(
|
||||||
|
agent_type=AgentType.letta_v1_agent,
|
||||||
|
git_enabled=True,
|
||||||
|
blocks=[
|
||||||
|
Block(label="system/human", value="human data", limit=100, description="The human block"),
|
||||||
|
Block(label="system/persona", value="persona data", limit=100, description="The persona block"),
|
||||||
|
Block(label="reference/api", value="api specs", limit=100, description="Contains API specifications"),
|
||||||
|
Block(label="notes", value="my notes", limit=100, description="Personal notes and reminders"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
out = m.compile()
|
||||||
|
|
||||||
|
# Filesystem tree should exist
|
||||||
|
assert "<memory_filesystem>" in out
|
||||||
|
|
||||||
|
# Non-system files should have descriptions rendered
|
||||||
|
assert "api.md (Contains API specifications)" in out
|
||||||
|
assert "notes.md (Personal notes and reminders)" in out
|
||||||
|
|
||||||
|
# System files should NOT have descriptions in the tree
|
||||||
|
assert "human.md (The human block)" not in out
|
||||||
|
assert "persona.md (The persona block)" not in out
|
||||||
|
# But they should still be in the tree (without description)
|
||||||
|
assert "human.md" in out
|
||||||
|
assert "persona.md" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_compile_git_memory_filesystem_no_description_when_empty():
|
||||||
|
"""Files outside system/ with no description should render without parentheses."""
|
||||||
|
|
||||||
|
m = Memory(
|
||||||
|
agent_type=AgentType.letta_v1_agent,
|
||||||
|
git_enabled=True,
|
||||||
|
blocks=[
|
||||||
|
Block(label="system/human", value="human data", limit=100),
|
||||||
|
Block(label="notes", value="my notes", limit=100),
|
||||||
|
Block(label="reference/api", value="api specs", limit=100, description="API docs"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
out = m.compile()
|
||||||
|
|
||||||
|
# notes.md has no description, so no parentheses
|
||||||
|
assert "notes.md\n" in out or "notes.md\n" in out
|
||||||
|
# reference/api.md has a description
|
||||||
|
assert "api.md (API docs)" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_compile_git_memory_filesystem_condenses_skills_to_top_level_entries():
|
||||||
|
"""skills/ should render as top-level skill entries with description.
|
||||||
|
|
||||||
|
We intentionally avoid showing nested files under skills/ in the system
|
||||||
|
prompt tree to keep context concise.
|
||||||
|
"""
|
||||||
|
|
||||||
|
m = Memory(
|
||||||
|
agent_type=AgentType.letta_v1_agent,
|
||||||
|
git_enabled=True,
|
||||||
|
blocks=[
|
||||||
|
Block(label="system/human", value="human data", limit=100),
|
||||||
|
Block(
|
||||||
|
label="skills/searching-messages",
|
||||||
|
value="# searching messages",
|
||||||
|
limit=100,
|
||||||
|
description="Search past messages to recall context.",
|
||||||
|
),
|
||||||
|
Block(
|
||||||
|
label="skills/creating-skills",
|
||||||
|
value="# creating skills",
|
||||||
|
limit=100,
|
||||||
|
description="Guide for creating effective skills.",
|
||||||
|
),
|
||||||
|
Block(
|
||||||
|
label="skills/creating-skills/references/workflows",
|
||||||
|
value="nested docs",
|
||||||
|
limit=100,
|
||||||
|
description="Nested workflow docs (should not appear)",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
out = m.compile()
|
||||||
|
|
||||||
|
# Condensed top-level skill entries with descriptions.
|
||||||
|
assert "searching-messages (Search past messages to recall context.)" in out
|
||||||
|
assert "creating-skills (Guide for creating effective skills.)" in out
|
||||||
|
|
||||||
|
# Do not show .md suffixes or nested skill docs in tree.
|
||||||
|
assert "searching-messages.md" not in out
|
||||||
|
assert "creating-skills.md" not in out
|
||||||
|
assert "references/workflows" not in out
|
||||||
|
|||||||
@@ -24,6 +24,9 @@ def test_get_headers_user_id_allows_none():
|
|||||||
letta_v1_agent=None,
|
letta_v1_agent=None,
|
||||||
letta_v1_agent_message_async=None,
|
letta_v1_agent_message_async=None,
|
||||||
modal_sandbox=None,
|
modal_sandbox=None,
|
||||||
|
billing_plan_type=None,
|
||||||
|
billing_cost_source=None,
|
||||||
|
billing_customer_id=None,
|
||||||
)
|
)
|
||||||
assert isinstance(headers, HeaderParams)
|
assert isinstance(headers, HeaderParams)
|
||||||
|
|
||||||
@@ -40,6 +43,9 @@ def test_get_headers_user_id_rejects_invalid_format():
|
|||||||
letta_v1_agent=None,
|
letta_v1_agent=None,
|
||||||
letta_v1_agent_message_async=None,
|
letta_v1_agent_message_async=None,
|
||||||
modal_sandbox=None,
|
modal_sandbox=None,
|
||||||
|
billing_plan_type=None,
|
||||||
|
billing_cost_source=None,
|
||||||
|
billing_customer_id=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -54,6 +60,9 @@ def test_get_headers_user_id_accepts_valid_format():
|
|||||||
letta_v1_agent=None,
|
letta_v1_agent=None,
|
||||||
letta_v1_agent_message_async=None,
|
letta_v1_agent_message_async=None,
|
||||||
modal_sandbox=None,
|
modal_sandbox=None,
|
||||||
|
billing_plan_type=None,
|
||||||
|
billing_cost_source=None,
|
||||||
|
billing_customer_id=None,
|
||||||
)
|
)
|
||||||
assert headers.actor_id == "user-123e4567-e89b-42d3-8456-426614174000"
|
assert headers.actor_id == "user-123e4567-e89b-42d3-8456-426614174000"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user