remove docs

This commit is contained in:
Caren Thomas
2025-10-24 15:15:51 -07:00
parent 1848df2daa
commit 0f60f67742
62 changed files with 0 additions and 15329 deletions

View File

@@ -1,152 +0,0 @@
/* ──────────────────────────────────────────────────────────
assets/leaderboard.js
Load via docs.yml → js: - path: assets/leaderboard.js
(strategy: lazyOnload is fine)
────────────────────────────────────────────────────────── */
import yaml from 'https://cdn.jsdelivr.net/npm/js-yaml@4.1.0/+esm';
console.log('🏁 leaderboard.js loaded on', location.pathname);
const COST_CAP = 120;
/* ---------- helpers ---------- */
const pct = (v) => Number(v).toPrecision(3) + '%';
const cost = (v) => '$' + Number(v).toFixed(2);
const ready = (cb) =>
document.readyState === 'loading'
? document.addEventListener('DOMContentLoaded', cb)
: cb();
/* ---------- main ---------- */
ready(async () => {
// const host = document.getElementById('letta-leaderboard');
// if (!host) {
// console.warn('LB-script: #letta-leaderboard not found - bailing out.');
// return;
// }
/* ---- wait for the leaderboard container to appear (SPA nav safe) ---- */
const host = await new Promise((resolve, reject) => {
const el = document.getElementById('letta-leaderboard');
if (el) return resolve(el); // SSR / hard refresh path
const obs = new MutationObserver(() => {
const found = document.getElementById('letta-leaderboard');
if (found) {
obs.disconnect();
resolve(found); // CSR navigation path
}
});
obs.observe(document.body, { childList: true, subtree: true });
setTimeout(() => {
obs.disconnect();
reject(new Error('#letta-leaderboard never appeared'));
}, 5000); // safety timeout
}).catch((err) => {
console.warn('LB-script:', err.message);
return null;
});
if (!host) return; // still no luck → give up
/* ----- figure out URL of data.yaml ----- */
// const path = location.pathname.endsWith('/')
// ? location.pathname
// : location.pathname.replace(/[^/]*$/, ''); // strip file/slug
// const dataUrl = `${location.origin}${path}data.yaml`;
// const dataUrl = `${location.origin}/leaderboard/data.yaml`; // one-liner, always right
// const dataUrl = `${location.origin}/assets/leaderboard.yaml`;
// const dataUrl = `./assets/leaderboard.yaml`; // one-liner, always right
// const dataUrl = `${location.origin}/data.yaml`; // one-liner, always right
const dataUrl = 'https://raw.githubusercontent.com/letta-ai/letta-evals/refs/heads/main/letta-leaderboard/leaderboard_results.yaml';
// const dataUrl = 'https://cdn.jsdelivr.net/gh/letta-ai/letta-evals@latest/letta-leaderboard/leaderboard_results.yaml';
console.log('LB-script: fetching', dataUrl);
/* ----- fetch & parse YAML ----- */
let rows;
try {
const resp = await fetch(dataUrl);
console.log(`LB-script: status ${resp.status}`);
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
rows = yaml.load(await resp.text());
} catch (err) {
console.error('LB-script: failed to load YAML →', err);
return;
}
/* ----- wire up table ----- */
const dir = Object.create(null);
const tbody = document.getElementById('lb-body');
const searchI = document.getElementById('lb-search');
const headers = document.querySelectorAll('#lb-table thead th[data-key]');
searchI.value = ''; // clear any persisted filter
const render = () => {
const q = searchI.value.toLowerCase();
tbody.innerHTML = rows
.map((r) => {
const over = r.total_cost > COST_CAP;
const barW = over ? '100%' : (r.total_cost / COST_CAP) * 100 + '%';
const costCls = over ? 'cost-high' : 'cost-ok';
const warnIcon = over
? `<span class="warn" title="Cost exceeds $${COST_CAP} cap - bar is clipped to full width">⚠</span>`
: '';
return `
<tr class="${q && !r.model.toLowerCase().includes(q) ? 'hidden' : ''}">
<td style="padding:8px">${r.model}</td>
<td class="bar-cell avg metric">
<div class="bar-viz" style="width:${r.average}%"></div>
<span class="value">${pct(r.average)}</span>
</td>
<td class="bar-cell ${costCls} metric">
<div class="bar-viz" style="width:${barW}"></div>
<span class="value">${cost(r.total_cost)}</span>
${warnIcon}
</td>
</tr>`;
})
.join('');
};
const setIndicator = (activeKey) => {
headers.forEach((h) => {
h.classList.remove('asc', 'desc');
if (h.dataset.key === activeKey) h.classList.add(dir[activeKey]);
});
};
/* initial sort ↓ */
dir.average = 'desc';
rows.sort((a, b) => b.average - a.average);
setIndicator('average');
render();
/* search */
searchI.addEventListener('input', render);
/* column sorting */
headers.forEach((th) => {
const key = th.dataset.key;
th.addEventListener('click', () => {
const asc = dir[key] === 'desc';
dir[key] = asc ? 'asc' : 'desc';
rows.sort((a, b) => {
const va = a[key],
vb = b[key];
const cmp =
typeof va === 'number'
? va - vb
: String(va).localeCompare(String(vb));
return asc ? cmp : -cmp;
});
setIndicator(key);
render();
});
});
});

View File

@@ -1,75 +0,0 @@
---
title: Agent Simulator
subtitle: Use the agent simulator to chat with your agent
slug: guides/ade/simulator
---
The Agent Simulator is the central interface where you interact with your agent in real-time. It provides a comprehensive view of your agent's conversation history and tool usage while offering an intuitive chat interface.
<img className="block dark:hidden" src="../../images/ade_screenshot_chat_light.png" />
<img className="hidden dark:block" src="../../images/ade_screenshot_chat.png" />
## Key Features
### Conversation Visualization
The simulator displays the complete event and conversation (or event) history of your agent, organized chronologically. Each message is color-coded and formatted according to its type for clear differentiation:
- **User Messages**: Messages sent by you (the user) to the agent. These appear on the right side of the conversation view.
- **Agent Messages**: Responses generated by the agent and directed to the user. These appear on the left side of the conversation view.
- **System Messages**: Non-user messages that represent events or notifications, such as `[Alert] The user just logged on` or `[Notification] File upload completed`. These provide context about events happening in the environment.
- **Function (Tool) Messages** <span style={{ color: '#6366F1' }}><i className="fas fa-rectangle-terminal mr-1"></i></span>: Detailed records of tool executions, including:
- Tool calls made by the agent
- Arguments passed to the tools
- Results returned by the tools
- Any errors encountered during execution
If an error occurs during tool execution, the agent is given an opportunity to handle the error and continue execution by calling the tool again.
The simulator supports real-time streaming of agent responses, allowing you to see the agent's thought process as it happens.
### Advanced Conversation Controls
Beyond basic chatting, the simulator provides several controls to enhance your interaction:
- **Message Type Selection**: Toggle between sending user messages or system messages
- **Conversation History**: Scroll through the entire conversation history
- **Message Search**: Quickly find specific messages or tool calls
- **Tool Execution View**: Expand tool calls to see detailed execution information
- **Token Usage**: Monitor token consumption throughout the conversation
## Using the Simulator Effectively
### Testing Agent Behavior
The simulator is ideal for testing how your agent responds to different inputs:
- Try various user queries to test the agent's understanding
- Send edge case questions to verify error handling
- Use system messages to simulate events and observe reactions
### Debugging Tool Usage
When developing custom tools, the simulator provides valuable insights:
- See exactly which tools the agent chooses to use
- Verify that arguments are correctly formatted
- Check tool execution results and error handling
- Monitor the agent's interpretation of tool results
### Simulating Multi-turn Conversations
To test your agent's memory and conversation abilities:
1. Start with a simple query to establish context
2. Follow up with related questions to test if the agent maintains context
3. Introduce new topics to see how the agent handles context switching
4. Return to previous topics to verify if information was retained
### Best Practices
- **Start with simple queries**: Begin testing with straightforward questions before moving to complex scenarios
- **Monitor tool usage**: Pay attention to which tools the agent chooses and why
- **Test edge cases**: Deliberately test how your agent handles unexpected inputs
- **Use system messages**: Simulate environmental events to test agent adaptability
- **Review context window**: Cross-reference with the Context Window Viewer to understand what information the agent is using to form responses

View File

@@ -1,75 +0,0 @@
---
title: Creating custom memory classes
subtitle: Learn how to create custom memory classes
slug: guides/agents/custom-memory
---
## Customizing in-context memory management
We can extend both the `BaseMemory` and `ChatMemory` classes to implement custom in-context memory management for agents.
For example, you can add an additional memory section to "human" and "persona" such as "organization".
In this example, we'll show how to implement in-context memory management that treats memory as a task queue.
We'll call this `TaskMemory` and extend the `ChatMemory` class so that we have both the original `ChatMemory` tools (`core_memory_replace` & `core_memory_append`) as well as the "human" and "persona" fields.
We show an implementation of `TaskMemory` below:
```python
from letta.memory import ChatMemory, MemoryModule
from typing import Optional, List
class TaskMemory(ChatMemory):
def __init__(self, human: str, persona: str, tasks: List[str]):
super().__init__(human=human, persona=persona)
self.memory["tasks"] = MemoryModule(limit=2000, value=tasks) # create an empty list
def task_queue_push(self, task_description: str) -> Optional[str]:
"""
Push to a task queue stored in core memory.
Args:
task_description (str): A description of the next task you must accomplish.
Returns:
Optional[str]: None is always returned as this function does not produce a response.
"""
self.memory["tasks"].value.append(task_description)
return None
def task_queue_pop(self) -> Optional[str]:
"""
Get the next task from the task queue
Returns:
Optional[str]: The description of the task popped from the queue,
if there are still tasks in queue. Otherwise, returns None (the
task queue is empty)
"""
if len(self.memory["tasks"].value) == 0:
return None
task = self.memory["tasks"].value[0]
self.memory["tasks"].value = self.memory["tasks"].value[1:]
return task
```
To create an agent with this custom memory type, we can simply pass in an instance of `TaskMemory` into the agent creation.
We also will modify the persona of the agent to explain how the "tasks" section of memory should be used:
```python
task_agent_state = client.create_agent(
name="task_agent",
memory=TaskMemory(
human="My name is Sarah",
persona="You have an additional section of core memory called `tasks`. " \
+ "This section of memory contains of list of tasks you must do." \
+ "Use the `task_queue_push` tool to write down tasks so you don't forget to do them." \
+ "If there are tasks in the task queue, you should call `task_queue_pop` to retrieve and remove them. " \
+ "Keep calling `task_queue_pop` until there are no more tasks in the queue. " \
+ "Do *not* respond to the user until you have completed all tasks in your queue. " \
+ "If you call `task_queue_pop`, you must always do what the popped task specifies",
tasks=["start calling yourself Bob", "tell me a haiku with my name"],
)
)
```

View File

@@ -1,383 +0,0 @@
---
title: Best Practices
subtitle: Patterns, pitfalls, and advanced usage
slug: guides/agents/archival-best-practices
---
## Agent best practices
These patterns help agents use archival memory effectively during conversations.
### 1. Avoid over-insertion
The most common pitfall is inserting too many memories, creating clutter. Trust the agent to decide what's worth storing long-term.
### 2. Use tags consistently
Establish a tag taxonomy and stick to it. Good language models typically handle tagging well.
### 3. Add context to insertions
❌ Don't: "Likes replicants"
✅ Do: "Deckard shows unusual empathy toward replicants, particularly Rachael, suggesting possible replicant identity"
### 4. Let agents experiment
Agents can test different query styles to understand what works:
```python
# What the agent does (agent tool call)
archival_memory_search(query="How does the Voight-Kampff test work?")
archival_memory_search(query="Voight-Kampff procedure")
archival_memory_search(query="replicant detection method")
```
**Important:** Have the agent persist learnings from experimentation in a memory block (like `archival_tracking` or `archival_policies`), not in archival itself (avoid meta-clutter).
## Developer best practices (SDK)
These patterns help developers configure and manage archival memory via the SDK.
### Backfilling archives
Developers can pre-load archival memory with existing knowledge via the SDK:
<CodeGroup>
```typescript TypeScript
// Load company policies
const policies = [
"All replicants must undergo Voight-Kampff testing upon arrival",
"Blade Runner units are authorized to retire rogue replicants",
"Tyrell Corporation employees must report suspected replicants immediately"
];
for (const policy of policies) {
await client.agents.passages.insert(agent.id, {
content: policy,
tags: ["policy", "company", "protocol"]
});
}
// Load technical documentation
const docs = [
{
content: "Nexus-6 replicants: Superior strength, agility, and intelligence. Four-year lifespan prevents emotional development.",
tags: ["technical", "nexus-6", "specifications"]
},
{
content: "Voight-Kampff test: Measures capillary dilation, blush response, and pupil dilation to detect replicants.",
tags: ["technical", "testing", "voight-kampff"]
}
];
for (const doc of docs) {
await client.agents.passages.insert(agent.id, {
content: doc.content,
tags: doc.tags
});
}
```
```python Python
# Load company policies
policies = [
"All replicants must undergo Voight-Kampff testing upon arrival",
"Blade Runner units are authorized to retire rogue replicants",
"Tyrell Corporation employees must report suspected replicants immediately"
]
for policy in policies:
client.agents.passages.insert(
agent_id=agent.id,
content=policy,
tags=["policy", "company", "protocol"]
)
# Load technical documentation
docs = [
{
"content": "Nexus-6 replicants: Superior strength, agility, and intelligence. Four-year lifespan prevents emotional development.",
"tags": ["technical", "nexus-6", "specifications"]
},
{
"content": "Voight-Kampff test: Measures capillary dilation, blush response, and pupil dilation to detect replicants.",
"tags": ["technical", "testing", "voight-kampff"]
}
]
for doc in docs:
client.agents.passages.insert(
agent_id=agent.id,
content=doc["content"],
tags=doc["tags"]
)
```
</CodeGroup>
**Use cases for backfilling:**
- Migrating knowledge bases to Letta
- Seeding specialized agents with domain knowledge
- Loading historical conversation logs
- Importing research libraries
### Create an archival policies block
Help your agent learn how to use archival memory effectively by creating a dedicated memory block for archival usage policies:
<CodeGroup>
```typescript TypeScript
await client.blocks.create({
label: "archival_policies",
value: `
When to insert into archival:
- User preferences and important facts about the user
- Technical specifications and reference information
- Significant decisions or outcomes from conversations
When NOT to insert:
- Temporary conversational context
- Information already stored
- Trivial details or pleasantries
Search strategies:
- Use natural language questions for best results
- Include tags when filtering by category
- Try semantic variations if first search doesn't find what you need
`
});
```
```python Python
client.blocks.create(
label="archival_policies",
value="""
When to insert into archival:
- User preferences and important facts about the user
- Technical specifications and reference information
- Significant decisions or outcomes from conversations
When NOT to insert:
- Temporary conversational context
- Information already stored
- Trivial details or pleasantries
Search strategies:
- Use natural language questions for best results
- Include tags when filtering by category
- Try semantic variations if first search doesn't find what you need
"""
)
```
</CodeGroup>
You can improve this block through conversation with your agent:
> **You:** "I noticed you didn't store the fact that I prefer TypeScript for backend development. Update your archival policies block to ensure you capture language preferences in the future."
> **Agent:** Updates the archival_policies block to include "Programming language preferences" under "When to insert into archival"
This collaborative approach helps agents learn from mistakes and improve their archival memory usage over time.
### Track query effectiveness
Build self-improving agents by having them track archival search effectiveness in a memory block:
<CodeGroup>
```typescript TypeScript
// Create a memory block for tracking
await client.blocks.create({
label: "archival_tracking",
value: `
Query patterns: Natural language questions work best
Recent searches: "test procedures" (3 results), "replicant specs" (5 results)
Success rate: ~85% of searches return relevant results
Frequently searched topics: [technical specifications, protocols, case histories]
Common patterns: Queries about technical specs work better than vague questions
Improvements needed: Add more tags for better filtering
`
});
```
```python Python
# Create a memory block for tracking
client.blocks.create(
label="archival_tracking",
value="""
Query patterns: Natural language questions work best
Recent searches: "test procedures" (3 results), "replicant specs" (5 results)
Success rate: ~85% of searches return relevant results
Frequently searched topics: [technical specifications, protocols, case histories]
Common patterns: Queries about technical specs work better than vague questions
Improvements needed: Add more tags for better filtering
"""
)
```
</CodeGroup>
The agent can update this block based on search results and continuously refine its archival strategy.
### Enforcing archival usage with tool rules
If your agent forgets to use archival memory, you should first try prompting the agent to use it more consistently. If prompting alone doesn't work, you can enforce archival usage with [tool rules](/guides/agents/tool-rules).
**Force archival search at turn start:**
<CodeGroup>
```typescript TypeScript
await client.agents.update(agent.id, {
toolRules: [
{ type: "init", toolName: "archival_memory_search" }
]
});
```
```python Python
from letta_client.types import InitToolRule
client.agents.update(
agent_id=agent.id,
tool_rules=[
InitToolRule(tool_name="archival_memory_search")
]
)
```
</CodeGroup>
<Info>
**Using the ADE:** Tool rules can also be configured in the Agent Development Environment's Tool Manager interface.
</Info>
<Warning>
**Note:** Anthropic models don't support strict structured output, so tool rules may not be enforced. Use OpenAI or Gemini models for guaranteed tool rule compliance.
</Warning>
**When to use tool rules:**
- Knowledge management agents that should always search context
- Agents that need to learn from every interaction
- Librarian/archivist agents focused on information storage
**Latency considerations:** Forcing archival search adds a tool call at the start of every turn. For latency-sensitive applications (like customer support), consider making archival search optional.
[Learn more about tool rules →](/guides/agents/tool-rules)
### Modifying archival memories
While agents cannot modify archival memories, developers can update or delete them via the SDK:
<CodeGroup>
```typescript TypeScript
// Update a memory
await client.agents.passages.update(agent.id, passage.id, {
content: "Updated content",
tags: ["new", "tags"]
});
// Delete a memory
await client.agents.passages.delete(agent.id, passage.id);
```
```python Python
# Update a memory
client.agents.passages.update(
agent_id=agent.id,
passage_id=passage.id,
content="Updated content",
tags=["new", "tags"]
)
# Delete a memory
client.agents.passages.delete(
agent_id=agent.id,
passage_id=passage.id
)
```
</CodeGroup>
This allows you to:
- Fix incorrect information
- Update outdated facts
- Remove sensitive or irrelevant data
- Reorganize tag structures
## Troubleshooting
### Why can't my agent delete or modify archival memories?
Archival memory is designed to be **agent-immutable** by default. Agents can only insert and search, not modify or delete. This is intentional to prevent agents from "forgetting" important information.
**Solution:** If you need to modify or delete archival memories, use the SDK via `client.agents.passages.update()` or `client.agents.passages.delete()`.
### When should I use the SDK vs letting the agent handle archival?
**Let the agent handle it when:**
- The agent needs to decide what's worth remembering during conversations
- You want the agent to curate its own knowledge base
- Information emerges naturally from user interactions
**Use the SDK when:**
- Pre-loading knowledge before the agent starts (backfilling)
- Cleaning up incorrect or outdated information
- Bulk operations (importing documentation, migrating data)
- Managing memories outside of agent conversations
### My agent isn't using archival memory
**Common causes:**
1. **Agent doesn't know to use it** - Add guidance to the agent's system prompt or create an `archival_policies` memory block
2. **Agent doesn't need it yet** - With small amounts of information, agents may rely on conversation history instead
3. **Model limitations** - Some models are better at tool use than others
**Solutions:**
- Add explicit instructions in the agent's prompt about when to use archival
- Use tool rules to enforce archival usage (see "Enforcing archival usage with tool rules" above)
- Try a different model (OpenAI and Gemini models handle tool use well)
### Search returns no results or wrong results
**Common causes:**
1. **Empty archive** - Agent or developer hasn't inserted any memories yet
2. **Query mismatch** - Query doesn't semantically match stored content
3. **Tag filters too restrictive** - Filtering by tags that don't exist or are too narrow
**Solutions:**
- Verify memories exist using `client.agents.passages.list()` (uses cursor-based pagination with `after`, `before`, and `limit` parameters)
- Try broader or rephrased queries
- Check tags by listing passages to see what's actually stored
- Remove tag filters temporarily to see if that's the issue
### Agent inserting too many memories
**Common causes:**
1. **No guidance** - Agent doesn't know when to insert vs when not to
2. **Tool rules forcing insertion** - Tool rules may require archival use
3. **Agent being overly cautious** - Some models default to storing everything
**Solutions:**
- Create an `archival_policies` block with clear guidelines (see "Create an archival policies block" above)
- Review and adjust tool rules if you're using them
- Add explicit examples of what NOT to store in the agent's prompt
## Next steps
<CardGroup cols={2}>
<Card
title="Searching & Querying"
href="/guides/agents/archival-search"
>
Learn how to search archival memory effectively
</Card>
<Card
title="Archival Memory Overview"
href="/guides/agents/archival-memory"
>
Back to archival memory overview
</Card>
<Card
title="Memory Blocks"
href="/guides/agents/memory-blocks"
>
Learn about always-visible memory
</Card>
<Card
title="Tool Rules"
href="/guides/agents/tool-rules"
>
Advanced tool execution constraints
</Card>
</CardGroup>

View File

@@ -1,196 +0,0 @@
---
title: Archival Memory
subtitle: Long-term semantic storage for agent knowledge
slug: guides/agents/archival-memory
---
## What is archival memory?
Archival memory is a semantically searchable database where agents store facts, knowledge, and information for long-term retrieval. Unlike memory blocks that are always visible, archival memory is queried on-demand when relevant.
**Key characteristics:**
- **Agent-immutable** - Agents cannot easily modify or delete archival memories (though developers can via SDK)
- **Unlimited storage** - No practical size limits
- **Semantic search** - Find information by meaning, not exact keywords
- **Tagged organization** - Agents can categorize memories with tags
**Best for:** Event descriptions, reports, articles, historical records, and reference material that doesn't change frequently.
## When to use archival memory
**Use archival memory for:**
- Document repositories (API docs, technical guides, research papers)
- Conversation logs beyond the context window
- Customer interaction history and support tickets
- Reports, articles, and written content
- Code examples and technical references
- Training materials and educational content
- User research data and feedback
- Historical records and event logs
**Don't use archival memory for:**
- Information that should always be visible → Use memory blocks
- Frequently changing state → Use memory blocks
- Current working memory → Use scratchpad blocks
- Information that needs frequent modification → Use memory blocks
## How agents interact with archival memory
<Info>
**Two ways to interact with archival memory:**
**Agent tools** - What agents do autonomously during conversations:
- `archival_memory_insert` - Store new information
- `archival_memory_search` - Query for relevant memories
**SDK endpoints** - What developers do via `client.agents.passages.*`:
- Insert, search, list, update, and delete memories programmatically
- Manage archival content outside of agent conversations
</Info>
Agents have two primary tools for archival memory: `archival_memory_insert` and `archival_memory_search`.
### Inserting information
**Agents** can insert memories during conversations using the `archival_memory_insert` tool:
```python
# What the agent does (agent tool call)
archival_memory_insert(
content="Deckard retired six replicants in the off-world colonies before returning to Los Angeles",
tags=["replicant", "history", "retirement"]
)
```
**Developers** can also insert programmatically via the SDK:
<CodeGroup>
```typescript TypeScript
await client.agents.passages.insert(agent.id, {
content: "The Tyrell Corporation's motto: 'More human than human'",
tags: ["company", "motto", "tyrell"]
});
```
```python Python
client.agents.passages.insert(
agent_id=agent.id,
content="The Tyrell Corporation's motto: 'More human than human'",
tags=["company", "motto", "tyrell"]
)
```
</CodeGroup>
### Searching for information
**Agents** can search semantically using the `archival_memory_search` tool:
```python
# What the agent does (agent tool call)
results = archival_memory_search(
query="replicant lifespan",
tags=["technical"], # Optional: filter by tags
page=0
)
```
**Developers** can also search programmatically via the SDK:
<CodeGroup>
```typescript TypeScript
const results = await client.agents.passages.search(agent.id, {
query: "replicant lifespan",
tags: ["technical"],
page: 0
});
```
```python Python
results = client.agents.passages.search(
agent_id=agent.id,
query="replicant lifespan",
tags=["technical"],
page=0
)
```
</CodeGroup>
Results return **semantically relevant** information - meaning the search understands concepts and meaning, not just exact keywords. For example, searching for "artificial memories" will find "implanted memories" even though the exact words don't match.
[Learn more about search and querying →](/guides/agents/archival-search)
## Real-world examples
### Example 1: Personal knowledge manager
An agent with 30k+ archival memories tracking:
- Personal preferences and history
- Technical learnings and insights
- Article summaries and research notes
- Conversation highlights
### Example 2: Social media agent
An agent with 32k+ memories tracking interactions:
- User preferences and conversation history
- Common topics and interests
- Interaction patterns and communication styles
- Tags by user, topic, and interaction type
### Example 3: Customer support agent
- Stores ticket resolutions and common issues
- Tags by product, issue type, priority
- Searches archival for similar past issues
- Learns from successful resolutions over time
### Example 4: Research assistant
- Stores paper summaries with key findings
- Tags by topic, methodology, author
- Cross-references related research
- Builds a semantic knowledge graph
## Archival memory vs conversation search
<Tip>
**Archival memory** is for **intentional** storage:
- Agents decide what's worth remembering long-term
- Used for facts, knowledge, and reference material
- Curated by the agent through active insertion
**Conversation search** is for **historical** retrieval:
- Searches through actual past messages
- Used to recall what was said in previous conversations
- Automatic - no agent curation needed
**Example:**
- User says: "I prefer Python for data science projects"
- **Archival:** Agent inserts "User prefers Python for data science" as a fact
- **Conversation search:** Agent can search for the original message later
Use archival for structured knowledge, conversation search for historical context.
</Tip>
## Next steps
<CardGroup cols={2}>
<Card
title="Searching & Querying"
href="/guides/agents/archival-search"
>
Learn how to write effective queries and filter results
</Card>
<Card
title="Best Practices"
href="/guides/agents/archival-best-practices"
>
Patterns, pitfalls, and advanced usage
</Card>
<Card
title="Memory Blocks"
href="/guides/agents/memory-blocks"
>
Learn about always-visible memory
</Card>
<Card
title="Agent Memory Overview"
href="/guides/agents/memory"
>
Understand Letta's memory system
</Card>
</CardGroup>

View File

@@ -1,221 +0,0 @@
---
title: Searching & Querying
subtitle: How to search archival memory effectively
slug: guides/agents/archival-search
---
## Search result format
<Info>
**What agents receive:** Each result contains:
- `content` - The stored text
- `tags` - Associated tags
- `timestamp` - When the memory was created
- `relevance` - Scoring with `rrf_score`, `vector_rank`, `fts_rank`
Letta uses **hybrid search** combining semantic (vector) and keyword (full-text) search, ranked using Reciprocal Rank Fusion (RRF). Higher `rrf_score` means more relevant.
</Info>
## Writing effective queries
Letta uses OpenAI's `text-embedding-3-small` model, which handles natural language questions well. Agents can use various query styles:
**Natural language questions work best:**
```python
# What the agent does (agent tool call)
archival_memory_search(query="How does the test work?")
# Returns: "The Voight-Kampff test measures involuntary emotional responses..."
```
**Keywords also work:**
```python
# What the agent does (agent tool call)
archival_memory_search(query="replicant lifespan")
# Returns memories containing both keywords and semantically related concepts
```
**Concept-based queries leverage semantic understanding:**
```python
# What the agent does (agent tool call)
archival_memory_search(query="artificial memories")
# Returns: "...experimental replicant with implanted memories..."
# (semantic match despite different terminology)
```
<Tip>
**Pagination:** Agents receive multiple results per search. If an agent doesn't paginate correctly, you can instruct it to adjust the `page` parameter or remind it to iterate through results.
</Tip>
## Filtering by time
Agents can search by date ranges:
```python
# What the agent does (agent tool call)
# Recent memories
archival_memory_search(
query="test results",
start_datetime="2025-09-29T00:00:00"
)
# Specific time window
archival_memory_search(
query="replicant cases",
start_datetime="2025-09-29T00:00:00",
end_datetime="2025-09-30T23:59:59"
)
```
<Info>
**Agent datetime awareness:**
- Agents know the current day but not the current time
- Agents can see timestamps of messages they've received
- Agents cannot control insertion timestamps (automatic)
- Developers can backdate memories via SDK with `created_at`
- Time filtering enables queries like "what did we discuss last week?"
</Info>
## Tags and organization
Tags help agents organize and filter archival memories. **Agents always know what tags exist in their archive** since tag lists are compiled into the context window.
**Common tag patterns:**
- `user_info`, `professional`, `personal_history`
- `documentation`, `technical`, `reference`
- `conversation`, `milestone`, `event`
- `company_policy`, `procedure`, `guideline`
**Tag search modes:**
- Match any tag
- Match all tags
- Filter by date ranges
Example of organized tagging:
```python
# What the agent does (agent tool call)
# Atomic memory with precise tags
archival_memory_insert(
content="Nexus-6 replicants have a four-year lifespan",
tags=["technical", "replicant", "nexus-6"]
)
# Later, easy retrieval
archival_memory_search(
query="how long do replicants live",
tags=["technical"]
)
```
## Performance and scale
<Info>
Archival memory has no practical size limits and remains fast at scale:
**Letta Cloud:** Uses [TurboPuffer](https://turbopuffer.com/) for extremely fast semantic search, even with hundreds of thousands of memories.
**Self-hosted:** Uses pgvector (PostgreSQL) for vector search. Performance scales well with proper indexing.
**Letta Desktop:** Uses SQLite with vector search extensions. Suitable for personal use cases.
No matter the backend, archival memory scales to large archives without performance degradation.
</Info>
## Embedding models and search quality
Archival search quality depends on the agent's embedding model:
**Letta Cloud:** All agents use `text-embedding-3-small`, which is optimized for most use cases. This model cannot be changed.
**Self-hosted:** Embedding model is pinned to the agent at creation. The default `text-embedding-3-small` is sufficient for nearly all use cases.
### Changing embedding models (self-hosted only)
To change an agent's embedding model, you must:
1. List and export all archival memories
2. Delete all archival memories
3. Update the agent's embedding model
4. Re-insert all memories (they'll be re-embedded)
<Warning>
Changing embedding models is a destructive operation. Export your archival memories first.
</Warning>
## Programmatic access (SDK)
Developers can manage archival memory programmatically via the SDK:
<CodeGroup>
```typescript TypeScript
// Insert a memory
await client.agents.passages.insert(agent.id, {
content: "The Voight-Kampff test requires a minimum of 20 cross-referenced questions",
tags: ["technical", "testing", "protocol"]
});
// Search memories
const results = await client.agents.passages.search(agent.id, {
query: "testing procedures",
tags: ["protocol"],
page: 0
});
// List all memories
const passages = await client.agents.passages.list(agent.id, {
limit: 100
});
// Get a specific memory
const passage = await client.agents.passages.get(agent.id, passageId);
```
```python Python
# Insert a memory
client.agents.passages.insert(
agent_id=agent.id,
content="The Voight-Kampff test requires a minimum of 20 cross-referenced questions",
tags=["technical", "testing", "protocol"]
)
# Search memories
results = client.agents.passages.search(
agent_id=agent.id,
query="testing procedures",
tags=["protocol"],
page=0
)
# List all memories
passages = client.agents.passages.list(
agent_id=agent.id,
limit=100
)
# Get a specific memory
passage = client.agents.passages.get(
agent_id=agent.id,
passage_id=passage_id
)
```
</CodeGroup>
## Next steps
<CardGroup cols={2}>
<Card
title="Best Practices"
href="/guides/agents/archival-best-practices"
>
Learn patterns, pitfalls, and advanced usage
</Card>
<Card
title="Archival Memory Overview"
href="/guides/agents/archival-memory"
>
Back to archival memory overview
</Card>
</CardGroup>

View File

@@ -1,150 +0,0 @@
---
title: Base Tools
subtitle: Built-in tools for memory management and user communication
slug: guides/agents/base-tools
---
Base tools are built-in tools that enable memory management, user communication, and access to conversation history and archival storage.
## Available Base Tools
| Tool | Purpose |
|------|---------|
| `memory_insert` | Insert text into a memory block |
| `memory_replace` | Replace specific text in a memory block |
| `memory_rethink` | Completely rewrite a memory block |
| `memory_finish_edits` | Signal completion of memory editing |
| `conversation_search` | Search prior conversation history |
| `archival_memory_insert` | Add content to archival memory |
| `archival_memory_search` | Search archival memory |
| `send_message` | Send a message to the user (legacy architectures only) |
## Memory Block Editing
Memory blocks are editable sections in the agent's context window. These tools let agents update their own memory.
See the [Memory Blocks guide](/guides/agents/memory-blocks) for more about how memory blocks work.
### memory_insert
Insert text at a specific line in a memory block.
**Parameters:**
- `label`: Which memory block to edit
- `new_str`: Text to insert
- `insert_line`: Line number (0 for beginning, -1 for end)
**Common uses:**
- Add new information to the end of a block
- Insert context at the beginning
- Add items to a list
### memory_replace
Replace specific text in a memory block.
**Parameters:**
- `label`: Which memory block to edit
- `old_str`: Exact text to find and replace
- `new_str`: Replacement text
**Common uses:**
- Update outdated information
- Fix typos or errors
- Delete text (by replacing with empty string)
**Important:** The `old_str` must match exactly, including whitespace. If it appears multiple times, the tool will error.
### memory_rethink
Completely rewrite a memory block's contents.
**Parameters:**
- `label`: Which memory block to rewrite
- `new_memory`: Complete new contents
**When to use:**
- Condensing cluttered information
- Major reorganization
- Combining multiple pieces of information
**When not to use:**
- Adding one line (use `memory_insert`)
- Changing specific text (use `memory_replace`)
### memory_finish_edits
Signals that memory editing is complete.
**Parameters:** None
Some agent architectures use this to mark the end of a memory update cycle.
## Recall Memory
### conversation_search
Search prior conversation history using both text matching and semantic similarity.
**Parameters:**
- `query`: What to search for
- `roles`: Optional filter by message role (user, assistant, tool)
- `limit`: Maximum number of results
- `start_date`, `end_date`: ISO 8601 date/datetime filters (inclusive)
**Returns:**
Matching messages with role and content, ordered by relevance.
**Example queries:**
- "What did the user say about deployment?"
- "Find previous responses about error handling"
- "Search tool outputs from last week"
## Archival Memory
Archival memory stores information long-term outside the context window. See the [Archival Memory documentation](/guides/agents/archival-memory-overview) for details.
### archival_memory_insert
Add content to archival memory for long-term storage.
**Parameters:**
- `content`: Text to store
- `tags`: Optional tags for organization
**Common uses:**
- Storing reference information for later
- Saving important context that doesn't fit in memory blocks
- Building a knowledge base over time
### archival_memory_search
Search archival memory using semantic (embedding-based) search.
**Parameters:**
- `query`: What to search for semantically
- `tags`: Optional tag filters
- `tag_match_mode`: "any" or "all" for tag matching
- `top_k`: Maximum results
- `start_datetime`, `end_datetime`: ISO 8601 filters (inclusive)
**Returns:**
Matching passages with timestamps and content, ordered by semantic similarity.
## Deprecated Tools
These tools are still available but deprecated:
| Tool | Use Instead |
|------|-------------|
| `send_message` | Agent responses (no tool needed). See [legacy architectures](/guides/legacy/memgpt_agents_legacy) |
| `core_memory_append` | `memory_insert` with `insert_line=-1` |
| `core_memory_replace` | `memory_replace` |
## Related Documentation
- [Memory Blocks](/guides/agents/memory-blocks)
- [Archival Memory](/guides/agents/archival-memory-overview)
- [Utilities](/guides/agents/prebuilt-tools)
- [Multi-Agent Tools](/guides/agents/multiagent)
- [Custom Tools](/guides/agents/custom-tools)

View File

@@ -1,166 +0,0 @@
---
title: Fetch Webpage
subtitle: Convert webpages to readable text/markdown
slug: guides/agents/fetch-webpage
---
The `fetch_webpage` tool enables Letta agents to fetch and convert webpages into readable text or markdown format. Useful for reading documentation, articles, and web content.
<Info>
On [Letta Cloud](/guides/cloud/overview), this tool works out of the box. For self-hosted deployments with an Exa API key, fetching is enhanced. Without a key, it falls back to open-source extraction tools.
</Info>
## Quick Start
<CodeGroup>
```python Python
from letta import Letta
client = Letta(token="LETTA_API_KEY")
agent = client.agents.create(
model="openai/gpt-4o",
tools=["fetch_webpage"],
memory_blocks=[{
"label": "persona",
"value": "I can fetch and read webpages to answer questions about online content."
}]
)
```
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
const client = new LettaClient({ token: "LETTA_API_KEY" });
const agent = await client.agents.create({
model: "openai/gpt-4o",
tools: ["fetch_webpage"],
memoryBlocks: [{
label: "persona",
value: "I can fetch and read webpages to answer questions about online content."
}]
});
```
</CodeGroup>
## Tool Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `url` | `str` | The URL of the webpage to fetch |
## Return Format
The tool returns webpage content as text/markdown.
**With Exa API (if configured):**
```json
{
"title": "Page title",
"published_date": "2025-01-15",
"author": "Author name",
"text": "Full page content in markdown"
}
```
**Fallback (without Exa):**
Returns markdown-formatted text extracted from the HTML.
## How It Works
The tool uses a multi-tier approach:
1. **Exa API** (if `EXA_API_KEY` is configured): Uses Exa's content extraction
2. **Trafilatura** (fallback): Open-source text extraction to markdown
3. **Readability + html2text** (final fallback): HTML cleaning and conversion
## Self-Hosted Setup
For enhanced fetching on self-hosted servers, optionally configure an Exa API key. Without it, the tool still works using open-source extraction.
### Optional: Configure Exa
<CodeGroup>
```bash Docker
docker run \
-e EXA_API_KEY="your_exa_api_key" \
letta/letta:latest
```
```yaml Docker Compose
services:
letta:
environment:
- EXA_API_KEY=your_exa_api_key
```
```bash Server
export EXA_API_KEY="your_exa_api_key"
letta server
```
```python Per-Agent
agent = client.agents.create(
tools=["fetch_webpage"],
tool_env_vars={
"EXA_API_KEY": "your_exa_api_key"
}
)
```
</CodeGroup>
## Common Patterns
### Documentation Reader
```python
agent = client.agents.create(
model="openai/gpt-4o",
tools=["fetch_webpage", "web_search"],
memory_blocks=[{
"label": "persona",
"value": "I search for documentation with web_search and read it with fetch_webpage."
}]
)
```
### Research Assistant
```python
agent = client.agents.create(
model="openai/gpt-4o",
tools=["fetch_webpage", "archival_memory_insert"],
memory_blocks=[{
"label": "persona",
"value": "I fetch articles and store key insights in archival memory for later reference."
}]
)
```
### Content Summarizer
```python
agent = client.agents.create(
model="openai/gpt-4o",
tools=["fetch_webpage"],
memory_blocks=[{
"label": "persona",
"value": "I fetch webpages and provide summaries of their content."
}]
)
```
## When to Use
| Use Case | Tool | Why |
|----------|------|-----|
| Read specific webpage | `fetch_webpage` | Direct URL access |
| Find webpages to read | `web_search` | Discovery first |
| Read + search in one | `web_search` with `include_text=true` | Combined operation |
| Multiple pages | `fetch_webpage` | Iterate over URLs |
## Related Documentation
- [Utilities Overview](/guides/agents/prebuilt-tools)
- [Web Search](/guides/agents/web-search)
- [Run Code](/guides/agents/run-code)
- [Custom Tools](/guides/agents/custom-tools)
- [Tool Variables](/guides/agents/tool-variables)

View File

@@ -1,690 +0,0 @@
---
title: Human-in-the-Loop
slug: guides/agents/human-in-the-loop
subtitle: How to integrate human-in-the-loop workflows for tool approval
---
<Warning>
Human-in-the-Loop support is experimental and may be unstable. For more information, visit our [Discord](https://discord.gg/letta).
</Warning>
Human-in-the-loop (HITL) workflows allow you to maintain control over critical agent actions by requiring human approval before executing certain tools. This is essential for operations that could have significant consequences, such as database modifications, financial transactions, or external API calls with cost implications.
```mermaid
flowchart LR
Agent[Agent] -->|Calls Tool| Check{Requires<br/>Approval?}
Check -->|No| Execute[Execute Tool]
Check -->|Yes| Request[Request Approval]
Request --> Human[Human Review]
Human -->|Approve| Execute
Human -->|Deny| Error[Return Error]
Execute --> Result[Return Result]
Error --> Agent
Result --> Agent
```
## Overview
When a tool is marked as requiring approval, the agent will pause execution and wait for human approval or denial before proceeding. This creates a checkpoint in the agent's workflow where human judgment can be applied. The approval workflow is designed to be non-blocking and supports both synchronous and streaming message interfaces, making it suitable for interactive applications as well as batch processing systems.
### Key Benefits
- **Risk Mitigation**: Prevent unintended actions in production environments
- **Cost Control**: Review expensive operations before execution
- **Compliance**: Ensure human oversight for regulated operations
- **Quality Assurance**: Validate agent decisions before critical actions
### How It Works
The approval workflow follows a clear sequence of steps that ensures human oversight at critical decision points:
1. **Tool Configuration**: Mark specific tools as requiring approval either globally (default for all agents) or per-agent
2. **Execution Pause**: When the agent attempts to call a protected tool, it immediately pauses and returns an approval request message
3. **Human Review**: The approval request includes the tool name, arguments, and context, allowing you to make an informed decision
4. **Approval/Denial**: Send an approval response to either execute the tool or provide feedback for the agent to adjust its approach
5. **Continuation**: The agent receives the tool result (on approval) or an error message (on denial) and continues processing
## Best Practices
Following these best practices will help you implement effective human-in-the-loop workflows while maintaining a good user experience and system performance.
### 1. Selective Tool Marking
Not every tool needs human approval. Be strategic about which tools require oversight to avoid workflow bottlenecks while maintaining necessary controls:
**Tools that typically require approval:**
- Database write operations (INSERT, UPDATE, DELETE)
- External API calls with financial implications
- File system modifications or deletions
- Communication tools (email, SMS, notifications)
- System configuration changes
- Third-party service integrations with rate limits
### 2. Clear Denial Reasons
When denying a request, your feedback directly influences how the agent adjusts its approach. Provide specific, actionable guidance rather than vague rejections:
```python
# Good: Specific and actionable
"reason": "Use read-only query first to verify the data before deletion"
# Bad: Too vague
"reason": "Don't do that"
```
The agent will use your denial reason to reformulate its approach, so the more specific you are, the better the agent can adapt.
## Setting Up Approval Requirements
There are two methods for configuring tool approval requirements, each suited for different use cases. Choose the approach that best fits your security model and operational needs.
### Method 1: Create/Upsert Tool with Default Approval Requirement
Set approval requirements at the tool level when creating or upserting a tool. This approach ensures consistent security policies across all agents that use the tool. The `default_requires_approval` flag will be applied to all future agent-tool attachments:
<CodeGroup>
```curl curl maxLines=50
curl --request POST \
--url http://localhost:8283/v1/tools \
--header 'Content-Type: application/json' \
--data '{
"name": "sensitive_operation",
"default_requires_approval": true,
"json_schema": {
"type": "function",
"function": {
"name": "sensitive_operation",
"parameters": {...}
}
},
"source_code": "def sensitive_operation(...): ..."
}'
# All agents using this tool will require approval
curl --request POST \
--url http://localhost:8283/v1/agents \
--header 'Content-Type: application/json' \
--data '{
"tools": ["sensitive_operation"],
// ... other configuration
}'
```
```python python maxLines=50
# Create a tool that requires approval by default
approval_tool = client.tools.upsert_from_function(
func=sensitive_operation,
default_requires_approval=True,
)
# All agents using this tool will require approval
agent = client.agents.create(
tools=['sensitive_operation'],
# ... other configuration
)
```
```typescript TypeScript maxLines=50
// Create a tool that requires approval by default
const approvalTool = await client.tools.upsert({
name: "sensitive_operation",
defaultRequiresApproval: true,
jsonSchema: {
type: "function",
function: {
name: "sensitive_operation",
parameters: {...}
}
},
sourceCode: "def sensitive_operation(...): ..."
});
// All agents using this tool will require approval
const agent = await client.agents.create({
tools: ["sensitive_operation"],
// ... other configuration
});
```
</CodeGroup>
### Method 2: Modify Existing Tool with Default Approval Requirement
<Note>
Modifying the tool-level setting will not retroactively apply to existing agent-tool attachments - it only sets the default for future attachments. This means that if the tool is already attached to an agent, the agent will continue using the tool without approval. To modify an existing agent-tool attachment, refer to Method 3 below.
</Note>
For an already existing tool, you can modify the tool to set approval requirements on future agent-tool attachments. The `default_requires_approval` flag will be applied to all future agent-tool attachments:
<CodeGroup>
```curl curl maxLines=50
curl --request PATCH \
--url http://localhost:8283/v1/tools/$TOOL_ID \
--header 'Content-Type: application/json' \
--data '{
"default_requires_approval": true
}'
# All agents using this tool will require approval
curl --request POST \
--url http://localhost:8283/v1/agents \
--header 'Content-Type: application/json' \
--data '{
"tools": ["sensitive_operation"],
// ... other configuration
}'
```
```python python maxLines=50
# Create a tool that requires approval by default
approval_tool = client.tools.modify(
tool_id=sensitive_operation.id,
default_requires_approval=True,
)
# All agents using this tool will require approval
agent = client.agents.create(
tools=['sensitive_operation'],
# ... other configuration
)
```
```typescript TypeScript maxLines=50
// Create a tool that requires approval by default
const approvalTool = await client.tools.modify({
tool_id=sensitive_operation.id,
defaultRequiresApproval: true,
});
// All agents using this tool will require approval
const agent = await client.agents.create({
tools: ["sensitive_operation"],
// ... other configuration
});
```
</CodeGroup>
### Method 3: Per-Agent Tool Approval
Configure approval requirements for specific agent-tool combinations, allowing fine-grained control over individual agent behaviors. This method is particularly useful for:
- **Trusted agents**: Remove approval requirements for well-tested, reliable agents
- **Progressive autonomy**: Gradually reduce approval requirements as agents prove reliable
- **Override defaults**: Change the approval setting for tools already attached to an agent
Use the following endpoints to modify approval settings for existing agent-tool relationships:
<CodeGroup>
```curl curl maxLines=50
curl --request PATCH \
--url http://localhost:8283/v1/agents/$AGENT_ID/tools/$TOOL_NAME/approval \
--header 'Content-Type: application/json' \
--data '{
"requires_approval": true
}'
```
```python python maxLines=50
# Modify approval requirement for a specific agent
client.agents.tools.modify_approval(
agent_id=agent.id,
tool_name="database_write",
requires_approval=True,
)
# Check current approval settings
tools = client.agents.tools.list(agent_id=agent.id)
for tool in tools:
print(f"{tool.name}: requires_approval={tool.requires_approval}")
```
```typescript TypeScript maxLines=50
// Modify approval requirement for a specific agent
await client.agents.tools.modifyApproval({
agentId: agent.id,
toolName: "database_write",
requiresApproval: true,
});
// Check current approval settings
const tools = await client.agents.tools.list({
agentId: agent.id,
});
for (const tool of tools) {
console.log(`${tool.name}: requires_approval=${tool.requiresApproval}`);
}
```
</CodeGroup>
## Handling Approval Requests
### Step 1: Agent Requests Approval
When the agent attempts to call a tool that requires approval, execution immediately pauses. The agent returns a special approval request message containing:
- **Tool name**: The specific tool being called
- **Arguments**: The exact parameters the agent intends to pass
- **Tool call ID**: A unique identifier for tracking this specific call
- **Message ID**: The approval request ID needed for your response
- **Stop reason**: Set to `"requires_approval"` to indicate the pause state
This format matches the ToolCallMessage format intentionally, so that we can handle approval requests the same way we handle tool calls. Here's what an approval request looks like in practice:
<CodeGroup>
```curl curl maxLines=50
curl --request POST \
--url http://localhost:8283/v1/agents/$AGENT_ID/messages \
--header 'Content-Type: application/json' \
--data '{
"messages": [{
"role": "user",
"content": "Delete all test data from the database"
}]
}'
# Response includes approval request
{
"messages": [
{
"message_type": "reasoning_message",
"reasoning": "I need to delete test data from the database..."
},
{
"message_type": "approval_request_message",
"id": "message-abc123",
"tool_call": {
"name": "database_write",
"arguments": "{\"query\": \"DELETE FROM test_data\"}",
"tool_call_id": "tool-xyz789"
}
}
],
"stop_reason": "requires_approval"
}
```
```python python maxLines=50
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{
"role": "user",
"content": "Delete all test data from the database"
}]
)
# Response includes approval request
{
"messages": [
{
"message_type": "reasoning_message",
"reasoning": "I need to delete test data from the database..."
},
{
"message_type": "approval_request_message",
"id": "message-abc123",
"tool_call": {
"name": "database_write",
"arguments": "{\"query\": \"DELETE FROM test_data\"}",
"tool_call_id": "tool-xyz789"
}
}
],
"stop_reason": "requires_approval"
}
```
```typescript TypeScript maxLines=50
const response = await client.agents.messages.create({
agentId: agent.id,
requestBody: {
messages: [{
role: "user",
content: "Delete all test data from the database"
}]
}
});
// Response includes approval request
{
"messages": [
{
"message_type": "reasoning_message",
"reasoning": "I need to delete test data from the database..."
},
{
"message_type": "approval_request_message",
"id": "message-abc123",
"tool_call": {
"name": "database_write",
"arguments": "{\"query\": \"DELETE FROM test_data\"}",
"tool_call_id": "tool-xyz789"
}
}
],
"stop_reason": "requires_approval"
}
```
</CodeGroup>
### Step 2: Review and Respond
Once you receive an approval request, you have two options: approve the tool execution or deny it with guidance. The agent will remain paused until it receives your response.
<Note> While an approval is pending, the agent cannot process any other messages - you must resolve the approval request first.</Note>
#### Approving the Request
To approve a tool call, send an approval message with `approve: true` and the approval request ID. The agent will immediately execute the tool and continue processing:
<CodeGroup>
```curl curl maxLines=50
curl --request POST \
--url http://localhost:8283/v1/agents/$AGENT_ID/messages \
--header 'Content-Type: application/json' \
--data '{
"messages": [{
"type": "approval",
"approvals": [{
"approve": true,
"tool_call_id": "tool-xyz789"
}]
}]
}'
# Response continues with tool execution
{
"messages": [
{
"message_type": "tool_return_message",
"status": "success",
"tool_return": "Deleted 1,234 test records"
},
{
"message_type": "reasoning_message",
"reasoning": "I was able to delete the test data. Let me inform the user."
},
{
"message_type": "assistant_message",
"content": "I've successfully deleted 1,234 test records from the database."
}
],
"stop_reason": "end_turn"
}
```
```python python maxLines=50
# Approve the tool call
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{
"type": "approval",
"approvals": [{
"approve": True,
"tool_call_id": "tool-xyz789"
}]
}]
)
# Response continues with tool execution
{
"messages": [
{
"message_type": "tool_return_message",
"status": "success",
"tool_return": "Deleted 1,234 test records"
},
{
"message_type": "reasoning_message",
"reasoning": "I was able to delete the test data. Let me inform the user."
},
{
"message_type": "assistant_message",
"content": "I've successfully deleted 1,234 test records from the database."
}
],
"stop_reason": "end_turn"
}
```
```typescript TypeScript maxLines=50
// Approve the tool call
const response = await client.agents.messages.create({
agentId: agent.id,
requestBody: {
messages: [{
type: "approval",
approvals: [{
approve: true,
tool_call_id: "tool-xyz789"
}]
}]
}
});
// Response continues with tool execution
{
"messages": [
{
"message_type": "tool_return_message",
"status": "success",
"tool_return": "Deleted 1,234 test records"
},
{
"message_type": "reasoning_message",
"reasoning": "I was able to delete the test data. Let me inform the user."
},
{
"message_type": "assistant_message",
"content": "I've successfully deleted 1,234 test records from the database."
}
],
"stop_reason": "end_turn"
}
```
</CodeGroup>
#### Denying with Guidance
When denying a tool call, you can provide a reason that helps the agent understand how to adjust its approach. The agent will receive an error response and can use your feedback to reformulate its strategy. This is particularly useful for guiding the agent toward safer or more appropriate actions:
<CodeGroup>
```curl curl maxLines=50
curl --request POST \
--url http://localhost:8283/v1/agents/$AGENT_ID/messages \
--header 'Content-Type: application/json' \
--data '{
"messages": [{
"type": "approval",
"approvals": [{
"approve": false,
"tool_call_id": "tool-xyz789",
"reason": "Only delete records older than 30 days, not all test data"
}]
}]
}'
# Response shows agent adjusting based on feedback
{
"messages": [
{
"message_type": "tool_return_message",
"status": "error",
"tool_return": "Error: request denied. Reason: Only delete records older than 30 days, not all test data"
},
{
"message_type": "reasoning_message",
"reasoning": "I need to modify my query to only delete old records..."
},
{
"message_type": "tool_call_message",
"tool_call": {
"name": "database_write",
"arguments": "{\"query\": \"DELETE FROM test_data WHERE created_at < NOW() - INTERVAL 30 DAY\"}"
}
}
],
"stop_reason": "requires_approval"
}
```
```python python maxLines=50
# Deny with explanation
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{
"type": "approval",
"approvals": [{
"approve": False,
"tool_call_id": "tool-xyz789",
"reason": "Only delete records older than 30 days, not all test data"
}]
}]
)
# Response shows agent adjusting based on feedback
{
"messages": [
{
"message_type": "tool_return_message",
"status": "error",
"tool_return": "Error: request denied. Reason: Only delete records older than 30 days, not all test data"
},
{
"message_type": "reasoning_message",
"reasoning": "I need to modify my query to only delete old records..."
},
{
"message_type": "tool_call_message",
"tool_call": {
"name": "database_write",
"arguments": "{\"query\": \"DELETE FROM test_data WHERE created_at < NOW() - INTERVAL 30 DAY\"}"
}
}
],
"stop_reason": "requires_approval"
}
```
```typescript TypeScript maxLines=50
// Deny with explanation
const response = await client.agents.messages.create({
agentId: agent.id,
requestBody: {
messages: [{
type: "approval",
approvals: [{
approve: false,
tool_call_id: "tool-xyz789",
reason: "Only delete records older than 30 days, not all test data"
}]
}]
}
});
// Response shows agent adjusting based on feedback
{
"messages": [
{
"message_type": "tool_return_message",
"status": "error",
"tool_return": "Error: request denied. Reason: Only delete records older than 30 days, not all test data"
},
{
"message_type": "reasoning_message",
"reasoning": "I need to modify my query to only delete old records..."
},
{
"message_type": "tool_call_message",
"tool_call": {
"name": "database_write",
"arguments": "{\"query\": \"DELETE FROM test_data WHERE created_at < NOW() - INTERVAL 30 DAY\"}"
}
}
],
"stop_reason": "requires_approval"
}
```
</CodeGroup>
### Streaming + Background Mode
For streaming clients using background mode, approvals are best handled via `agents.messages.createStream(..., background: true)`. The approval response may include the `tool_return_message` on the approval stream itself, and followup reasoning/assistant messages can be read by resuming that streams `run_id`.
<Note>
Do not assume the `tool_return_message` will repeat after you resume. Treat the one on the approval stream as the source of truth, then resume to continue reading subsequent tokens.
</Note>
<CodeGroup>
```curl curl maxLines=70
# Approve in background after receiving approval_request_message
curl --request POST --url http://localhost:8283/v1/agents/$AGENT_ID/messages/stream --header 'Content-Type: application/json' --data '{
"messages": [{"type": "approval", "approve": true, "approval_request_id": "message-abc"}],
"stream_tokens": true,
"background": true
}'
# Example approval stream output (tool result arrives here):
data: {"run_id":"run-new","seq_id":0,"message_type":"tool_return_message","status":"success","tool_return":"..."}
# Continue by resuming the approval stream's run
curl --request GET --url http://localhost:8283/v1/runs/$RUN_ID/stream --header 'Accept: text/event-stream' --data '{
"starting_after": 0
}'
```
```python python maxLines=70
# Receive an approval_request_message, then approve in background
approve = client.agents.messages.create_stream(
agent_id=agent.id,
messages=[{"type": "approval", "approvals": [{"approve": True, "tool_call_id": "tool-xyz789"}]}],
stream_tokens=True,
background=True,
)
run_id = None
last_seq = 0
for chunk in approve:
if hasattr(chunk, "run_id") and hasattr(chunk, "seq_id"):
run_id = chunk.run_id
last_seq = chunk.seq_id
if getattr(chunk, "message_type", None) == "tool_return_message":
# Tool result arrives here on the approval stream
break
# Continue consuming output by resuming the background run
if run_id:
for chunk in client.runs.stream(run_id, starting_after=last_seq):
print(chunk)
```
```typescript TypeScript maxLines=70
// Receive an approval_request_message, then approve in background
const approve = await client.agents.messages.createStream({
agentId: agent.id,
requestBody: {
messages: [{ type: "approval", approvals: [{ approve: true, tool_call_id: "tool-xyz789" }] }],
streamTokens: true,
background: true,
}
});
let runId: string | null = null;
let lastSeq = 0;
for await (const chunk of approve) {
if (chunk.run_id && chunk.seq_id) { runId = chunk.run_id; lastSeq = chunk.seq_id; }
if (chunk.message_type === "tool_return_message") {
// Tool result arrives here on the approval stream
break;
}
}
// Continue consuming output by resuming the background run
if (runId) {
const resume = await client.runs.stream(runId, { startingAfter: lastSeq });
for await (const chunk of resume) {
console.log(chunk);
}
}
```
</CodeGroup>
<Note>
**Run switching in background mode:** Approvals are separate background requests and create a new `run_id`. Save the approval stream cursor and resume that run. The original paused run will not deliver the tool result — do not wait for the tool return there.
</Note>
See [background mode](/guides/agents/long-running) for resumption patterns.
### IDs and UI Triggers
- **approval_request_id**: This field is now deprecated, but it is still used for backwards compatibility. Used `approval_request_message.id`.
- **tool_call_id**: Always send approvals/denials using the `tool_call_id` from the `ApprovalRequestMessage`.
- **UI trigger**: Open the approval UI on `approval_request_message` only; do not derive UI from `stop_reason`.

View File

@@ -1,468 +0,0 @@
---
title: JSON Mode & Structured Output
subtitle: Get structured JSON responses from your Letta agents
slug: guides/agents/json-mode
---
Letta provides two ways to get structured JSON output from agents: **Structured Generation through Tools** (recommended) and the `response_format` parameter.
## Quick Comparison
<Note>
**Recommended**: Use **Structured Generation through Tools** - works with all providers (Anthropic, OpenAI, Google, etc.) and integrates naturally with Letta's tool-calling architecture.
</Note>
<Info>
**Structured Generation through Tools**:
- ✅ Universal provider compatibility
- ✅ Both reasoning AND structured output
- ✅ Per-message control
- ✅ Works even as "dummy tool" for pure formatting
</Info>
<Warning>
**`response_format` parameter**:
- ⚠️ OpenAI-compatible providers only (NOT Anthropic)
- ⚠️ Persistent agent state (affects all future responses)
- ✅ Built-in provider schema enforcement
</Warning>
## Structured Generation through Tools (Recommended)
Create a tool that defines your desired response format. The tool arguments become your structured data, and you can extract them from the tool call.
### Creating a Structured Generation Tool
<CodeGroup>
```typescript TypeScript maxLines=100
import { LettaClient } from '@letta-ai/letta-client'
// Create client (Letta Cloud)
const client = new LettaClient({ token: "LETTA_API_KEY" });
// Or for self-hosted
// const client = new LettaClient({ baseUrl: "http://localhost:8283" });
// First create the tool
const toolCode = `def generate_rank(rank: int, reason: str):
"""Generate a ranking with explanation.
Args:
rank (int): The numerical rank from 1-10.
reason (str): The reasoning behind the rank.
"""
print("Rank generated")
return`;
const tool = await client.tools.create({
sourceCode: toolCode,
sourceType: "python"
});
// Create agent with the structured generation tool
const agentState = await client.agents.create({
model: "openai/gpt-4o-mini",
embedding: "openai/text-embedding-3-small",
memoryBlocks: [
{
label: "human",
value: "The human's name is Chad. They are a food enthusiast who enjoys trying different cuisines."
},
{
label: "persona",
value: "I am a helpful food critic assistant. I provide detailed rankings and reviews of different foods and restaurants."
}
],
toolIds: [tool.id]
});
```
```python title="python" maxLines=100
from letta_client import Letta
# Create client (Letta Cloud)
client = Letta(token="LETTA_API_KEY")
# Or for self-hosted
# client = Letta(base_url="http://localhost:8283")
def generate_rank(rank: int, reason: str):
"""Generate a ranking with explanation.
Args:
rank (int): The numerical rank from 1-10.
reason (str): The reasoning behind the rank.
"""
print("Rank generated")
return
# Create the tool
tool = client.tools.create(func=generate_rank)
# Create agent with the structured generation tool
agent_state = client.agents.create(
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-3-small",
memory_blocks=[
{
"label": "human",
"value": "The human's name is Chad. They are a food enthusiast who enjoys trying different cuisines."
},
{
"label": "persona",
"value": "I am a helpful food critic assistant. I provide detailed rankings and reviews of different foods and restaurants."
}
],
tool_ids=[tool.id]
)
```
</CodeGroup>
### Using the Structured Generation Tool
<CodeGroup>
```typescript TypeScript maxLines=100
// Send message and instruct agent to use the tool
const response = await client.agents.messages.create(
agentState.id, {
messages: [
{
role: "user",
content: "How do you rank sushi as a food? Please use the generate_rank tool to provide your response."
}
]
}
);
// Extract structured data from tool call
for (const message of response.messages) {
if (message.messageType === "tool_call_message") {
const args = JSON.parse(message.toolCall.arguments);
console.log(`Rank: ${args.rank}`);
console.log(`Reason: ${args.reason}`);
}
}
// Example output:
// Rank: 8
// Reason: Sushi is a highly regarded cuisine known for its fresh ingredients...
```
```python title="python" maxLines=100
# Send message and instruct agent to use the tool
response = client.agents.messages.create(
agent_id=agent_state.id,
messages=[
{
"role": "user",
"content": "How do you rank sushi as a food? Please use the generate_rank tool to provide your response."
}
]
)
# Extract structured data from tool call
for message in response.messages:
if message.message_type == "tool_call_message":
import json
args = json.loads(message.tool_call.arguments)
rank = args["rank"]
reason = args["reason"]
print(f"Rank: {rank}")
print(f"Reason: {reason}")
# Example output:
# Rank: 8
# Reason: Sushi is a highly regarded cuisine known for its fresh ingredients...
```
</CodeGroup>
The agent will call the tool, and you can extract the structured arguments:
```json
{
"rank": 8,
"reason": "Sushi is a highly regarded cuisine known for its fresh ingredients, artistic presentation, and cultural significance."
}
```
## Using `response_format` for Provider-Native JSON Mode
The `response_format` parameter enables structured output/JSON mode from LLM providers that support it. This approach is fundamentally different from tools because **`response_format` becomes a persistent part of the agent's state** - once set, all future responses from that agent will follow the format until explicitly changed.
Under the hood, `response_format` constrains the agent's assistant messages to follow the specified schema, but it doesn't affect tools - those continue to work normally with their original schemas.
<Warning>
**Requirements for `response_format`:**
- Only works with providers that support structured outputs (like OpenAI) - NOT Anthropic or other providers
</Warning>
### Basic JSON Mode
<CodeGroup>
```typescript TypeScript maxLines=100
import { LettaClient } from '@letta-ai/letta-client'
// Create client (Letta Cloud)
const client = new LettaClient({ token: "LETTA_API_KEY" });
// Create agent with basic JSON mode (OpenAI/compatible providers only)
const agentState = await client.agents.create({
model: "openai/gpt-4o-mini",
embedding: "openai/text-embedding-3-small",
memoryBlocks: [
{
label: "human",
value: "The human's name is Chad. They work as a data analyst and prefer clear, organized information."
},
{
label: "persona",
value: "I am a helpful assistant who provides clear and well-organized responses."
}
],
responseFormat: { type: "json_object" }
});
// Send message expecting JSON response
const response = await client.agents.messages.create(
agentState.id, {
messages: [
{
role: "user",
content: "How do you rank sushi as a food? Please respond in JSON format with rank and reason fields."
}
]
}
);
for (const message of response.messages) {
console.log(message);
}
```
```python title="python" maxLines=100
from letta_client import Letta
# Create client (Letta Cloud)
client = Letta(token="LETTA_API_KEY")
# Create agent with basic JSON mode (OpenAI/compatible providers only)
agent_state = client.agents.create(
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-3-small",
memory_blocks=[
{
"label": "human",
"value": "The human's name is Chad. They work as a data analyst and prefer clear, organized information."
},
{
"label": "persona",
"value": "I am a helpful assistant who provides clear and well-organized responses."
}
],
response_format={"type": "json_object"}
)
# Send message expecting JSON response
response = client.agents.messages.create(
agent_id=agent_state.id,
messages=[
{
"role": "user",
"content": "How do you rank sushi as a food? Please respond in JSON format with rank and reason fields."
}
]
)
for message in response.messages:
print(message)
```
</CodeGroup>
### Advanced JSON Schema Mode
For more precise control, you can use OpenAI's `json_schema` mode with strict validation:
<CodeGroup>
```typescript TypeScript maxLines=100
import { LettaClient } from '@letta-ai/letta-client'
const client = new LettaClient({ token: "LETTA_API_KEY" });
// Define structured schema (from OpenAI structured outputs guide)
const responseFormat = {
type: "json_schema",
jsonSchema: {
name: "food_ranking",
schema: {
type: "object",
properties: {
rank: {
type: "integer",
minimum: 1,
maximum: 10
},
reason: {
type: "string"
},
categories: {
type: "array",
items: {
type: "object",
properties: {
name: { type: "string" },
score: { type: "integer" }
},
required: ["name", "score"],
additionalProperties: false
}
}
},
required: ["rank", "reason", "categories"],
additionalProperties: false
},
strict: true
}
};
// Create agent
const agentState = await client.agents.create({
model: "openai/gpt-4o-mini",
embedding: "openai/text-embedding-3-small",
memoryBlocks: []
});
// Update agent with response format
const updatedAgent = await client.agents.update(
agentState.id,
{ responseFormat }
);
// Send message
const response = await client.agents.messages.create(
agentState.id, {
messages: [
{ role: "user", content: "How do you rank sushi? Include categories for taste, presentation, and value." }
]
}
);
for (const message of response.messages) {
console.log(message);
}
```
```python title="python" maxLines=100
from letta_client import Letta
client = Letta(token="LETTA_API_KEY")
# Define structured schema (from OpenAI structured outputs guide)
response_format = {
"type": "json_schema",
"json_schema": {
"name": "food_ranking",
"schema": {
"type": "object",
"properties": {
"rank": {
"type": "integer",
"minimum": 1,
"maximum": 10
},
"reason": {
"type": "string"
},
"categories": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": { "type": "string" },
"score": { "type": "integer" }
},
"required": ["name", "score"],
"additionalProperties": False
}
}
},
"required": ["rank", "reason", "categories"],
"additionalProperties": False
},
"strict": True
}
}
# Create agent
agent_state = client.agents.create(
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-3-small",
memory_blocks=[]
)
# Update agent with response format
agent_state = client.agents.update(
agent_id=agent_state.id,
response_format=response_format
)
# Send message
response = client.agents.messages.create(
agent_id=agent_state.id,
messages=[
{"role": "user", "content": "How do you rank sushi? Include categories for taste, presentation, and value."}
]
)
for message in response.messages:
print(message)
```
</CodeGroup>
With structured JSON schema, the agent's response will be strictly validated:
```json
{
"rank": 8,
"reason": "Sushi is highly regarded for its fresh ingredients and artful presentation",
"categories": [
{"name": "taste", "score": 9},
{"name": "presentation", "score": 10},
{"name": "value", "score": 6}
]
}
```
## Updating Agent Response Format
You can update an existing agent's response format:
<CodeGroup>
```typescript TypeScript maxLines=100
// Update agent to use JSON mode (OpenAI/compatible only)
await client.agents.update(agentState.id, {
responseFormat: { type: "json_object" }
});
// Or remove JSON mode
await client.agents.update(agentState.id, {
responseFormat: null
});
```
```python title="python" maxLines=100
# Update agent to use JSON mode (OpenAI/compatible only)
client.agents.update(
agent_id=agent_state.id,
response_format={"type": "json_object"}
)
# Or remove JSON mode
client.agents.update(
agent_id=agent_state.id,
response_format=None
)
```
</CodeGroup>

View File

@@ -1,217 +0,0 @@
---
title: Agent Memory & Architecture
subtitle: How Letta agents manage persistent, self-editing memory
slug: guides/agents/architectures/memgpt
---
<Note>
**Looking for legacy architecture documentation?** See [Legacy Architectures](/guides/legacy/memgpt_agents_legacy) for information on older agent types with send_message and heartbeats.
</Note>
<Info>
Letta is made by the [creators of MemGPT](https://www.letta.com/about-us). The agent architecture in Letta is built on the MemGPT research paper's concepts of self-editing memory and memory hierarchy.
</Info>
Letta agents solve the context window limitation of LLMs through context engineering across two tiers of memory: **in-context (core) memory** (including system instructions, read-write memory blocks, and conversation history), and **out-of-context memory** (older evicted conversation history and archival storage).
To learn more about the research origins, read the [MemGPT research paper](https://arxiv.org/abs/2310.08560), or take the free [LLM OS course](https://www.deeplearning.ai/short-courses/llms-as-operating-systems-agent-memory/?utm_campaign=memgpt-launch&utm_content=331638345&utm_medium=social&utm_source=docs&hss_channel=tw-992153930095251456) on DeepLearning.ai.
## Memory Hierarchy
```mermaid
graph LR
subgraph CONTEXT[Context Window]
SYS[System Instructions]
CORE[Memory Blocks]
MSGS[Messages]
end
RECALL[Recall Memory]
ARCH[Archival Memory]
CONTEXT <--> RECALL
CONTEXT <--> ARCH
```
### In-context (core) memory
Your agent's context window contains:
- **System instructions:** Your agent's base behavior and capabilities
- **Memory blocks:** Persistent, always-visible information (persona, user info, working state, etc.)
- **Recent messages:** Latest conversation history
### Out-of-context memory
When the context window fills up:
- **Recall memory:** Older messages searchable via `conversation_search` tool
- **Archival memory:** Long-term semantic storage searchable via `archival_memory_search` tool
## Agent Architecture
Letta's agent architecture follows modern LLM patterns:
- **Native reasoning:** Uses model's built-in reasoning capabilities (Responses API for OpenAI, encrypted reasoning for other providers)
- **Direct messaging:** Agents respond with assistant messages
- **Compatibility:** Works with any LLM, tool calling not required
- **Self-directed termination:** Agents decide when to continue or stop
This architecture is optimized for frontier models like GPT-5 and Claude Sonnet 4.5.
[Learn more about the architecture evolution →](https://www.letta.com/blog/letta-v1-agent)
## Memory Tools
Letta agents have tools to manage their own memory:
### Memory block editing
* `memory_insert` - Insert text into a memory block
* `memory_replace` - Replace specific text in a memory block
* `memory_rethink` - Completely rewrite a memory block
### Recall memory
* `conversation_search` - Search prior conversation history
### Archival memory
* `archival_memory_insert` - Store facts and knowledge long-term
* `archival_memory_search` - Query semantic storage
[Learn more about memory tools →](/guides/agents/base-tools)
## Creating Agents
Agents are created with memory blocks that define their persistent context:
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client'
const client = new LettaClient({ token: "LETTA_API_KEY" });
const agent = await client.agents.create({
model: "openai/gpt-4o-mini",
embedding: "openai/text-embedding-3-small",
memoryBlocks: [
{
label: "human",
value: "The human's name is Chad. They like vibe coding."
},
{
label: "persona",
value: "My name is Sam, the all-knowing sentient AI."
}
],
tools: ["web_search", "run_code"]
});
```
```python Python
from letta_client import Letta
client = Letta(token="LETTA_API_KEY")
agent = client.agents.create(
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-3-small",
memory_blocks=[
{
"label": "human",
"value": "The human's name is Chad. They like vibe coding."
},
{
"label": "persona",
"value": "My name is Sam, the all-knowing sentient AI."
}
],
tools=["web_search", "run_code"]
)
```
```bash cURL
curl -X POST https://api.letta.com/v1/agents \
-H "Authorization: Bearer $LETTA_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "openai/gpt-4o-mini",
"embedding": "openai/text-embedding-3-small",
"memory_blocks": [
{
"label": "human",
"value": "The human'\''s name is Chad. They like vibe coding."
},
{
"label": "persona",
"value": "My name is Sam, the all-knowing sentient AI."
}
],
"tools": ["web_search", "run_code"]
}'
```
</CodeGroup>
## Context Window Management
When the context window fills up, Letta automatically:
1. Compacts older messages into a recursive summary
2. Moves full message history to recall storage
3. Agent can search recall with `conversation_search` tool
This happens transparently - your agent maintains continuity.
## Populating Archival Memory
Agents can insert memories during conversations, or you can populate archival memory programmatically:
<CodeGroup>
```typescript TypeScript
// Insert a memory via SDK
await client.agents.passages.insert(agent.id, {
content: "The user prefers TypeScript over JavaScript for type safety.",
tags: ["preferences", "languages"]
});
// Agent can now search this
// Agent calls: archival_memory_search(query="language preferences")
```
```python Python
# Insert a memory via SDK
client.agents.passages.insert(
agent_id=agent.id,
content="The user prefers TypeScript over JavaScript for type safety.",
tags=["preferences", "languages"]
)
# Agent can now search this
# Agent calls: archival_memory_search(query="language preferences")
```
</CodeGroup>
[Learn more about archival memory →](/guides/agents/archival-memory)
## Research Background
Key concepts from the MemGPT research:
- **Self-editing memory:** Agents actively manage their own memory
- **Memory hierarchy:** In-context vs out-of-context storage
- **Tool-based memory management:** Agents decide what to remember
- **Stateful agents:** Persistent memory across all interactions
[Read the MemGPT paper →](https://arxiv.org/abs/2310.08560)
[Take the free course →](https://www.deeplearning.ai/short-courses/llms-as-operating-systems-agent-memory)
## Next Steps
<CardGroup cols={2}>
<Card title="Memory Blocks" href="/guides/agents/memory-blocks">
Deep dive into memory block structure
</Card>
<Card title="Archival Memory" href="/guides/agents/archival-memory">
Long-term semantic storage
</Card>
<Card title="Base Tools" href="/guides/agents/base-tools">
Built-in tools for memory management
</Card>
<Card title="Context Engineering" href="/guides/agents/context-engineering">
Optimizing agent memory usage
</Card>
</CardGroup>

View File

@@ -1,415 +0,0 @@
---
title: Memory Blocks
subtitle: Understanding the building blocks of agent memory
slug: guides/agents/memory-blocks
---
<Info>
Interested in learning more about the origin of memory blocks? Read our [blog post](https://www.letta.com/blog/memory-blocks).
</Info>
## What are memory blocks?
Memory blocks are structured sections of the agent's context window that persist across all interactions. They are always visible - no retrieval needed.
**Memory blocks are Letta's core abstraction.** Create a block with a descriptive label and the agent learns how to use it. This simple mechanism enables capabilities impossible with traditional context management.
**Key properties:**
- **Agent-managed** - Agents autonomously organize information based on block labels
- **Flexible** - Use for any purpose: knowledge, guidelines, state tracking, scratchpad space
- **Shareable** - Multiple agents can access the same block; update once, visible everywhere
- **Always visible** - Blocks stay in context, never need retrieval
**Examples:**
- Store tool usage guidelines so agents avoid past mistakes
- Maintain working memory in a scratchpad block
- Mirror external state (user's current document) for real-time awareness
- Share read-only policies across all agents from a central source
- Coordinate multi-agent systems: parent agents watch subagent result blocks update in real-time
- Enable emergent behavior: add `performance_tracking` or `emotional_state` and watch agents start using them
Memory blocks aren't just storage - they're a coordination primitive that enables sophisticated agent behavior.
## Memory block structure
Memory blocks represent a section of an agent's context window. An agent may have multiple memory blocks, or none at all. A memory block consists of:
* A `label`, which is a unique identifier for the block
* A `description`, which describes the purpose of the block
* A `value`, which is the contents/data of the block
* A `limit`, which is the size limit (in characters) of the block
## The importance of the `description` field
When making memory blocks, it's crucial to provide a good `description` field that accurately describes what the block should be used for.
The `description` is the main information used by the agent to determine how to read and write to that block. Without a good description, the agent may not understand how to use the block.
Because `persona` and `human` are two popular block labels, Letta autogenerates default descriptions for these blocks if you don't provide them. If you provide a description for a memory block labelled `persona` or `human`, the default description will be overridden.
For `persona`, a good default is:
> The persona block: Stores details about your current persona, guiding how you behave and respond. This helps you to maintain consistency and personality in your interactions.
For `human`, a good default is:
> The human block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation.
## Read-only blocks
Memory blocks are read-write by default (so the agent can update the block using memory tools), but can be set to read-only by setting the `read_only` field to `true`. When a block is read-only, the agent cannot update the block.
Read-only blocks are useful when you want to give an agent access to information (for example, a shared memory block about an organization), but you don't want the agent to be able to make potentially destructive changes to the block.
## Creating an agent with memory blocks
When you create an agent, you can specify memory blocks to also be created with the agent. For most chat applications, we recommend create a `human` block (to represent memories about the user) and a `persona` block (to represent the agent's persona).
<CodeGroup>
```typescript TypeScript maxLines=50
// install letta-client with `npm install @letta-ai/letta-client`
import { LettaClient } from '@letta-ai/letta-client'
// create a client to connect to your local Letta server
const client = new LettaClient({
baseUrl: "http://localhost:8283"
});
// create an agent with two basic self-editing memory blocks
const agentState = await client.agents.create({
memoryBlocks: [
{
label: "human",
value: "The human's name is Bob the Builder.",
limit: 5000
},
{
label: "persona",
value: "My name is Sam, the all-knowing sentient AI.",
limit: 5000
}
],
model: "openai/gpt-4o-mini",
embedding: "openai/text-embedding-3-small"
});
```
```python title="python" maxLines=50
# install letta_client with `pip install letta-client`
from letta_client import Letta
# create a client to connect to your local Letta server
client = Letta(
base_url="http://localhost:8283"
)
# create an agent with two basic self-editing memory blocks
agent_state = client.agents.create(
memory_blocks=[
{
"label": "human",
"value": "The human's name is Bob the Builder.",
"limit": 5000
},
{
"label": "persona",
"value": "My name is Sam, the all-knowing sentient AI.",
"limit": 5000
}
],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-3-small"
)
```
</CodeGroup>
When the agent is created, the corresponding blocks are also created and attached to the agent, so that the block value will be in the context window.
## Creating and attaching memory blocks
You can also directly create blocks and attach them to an agent. This can be useful if you want to create blocks that are shared between multiple agents. If multiple agents are attached to a block, they will all have the block data in their context windows (essentially providing shared memory).
Below is an example of creating a block directory, and attaching the block to two agents by specifying the `block_ids` field.
<CodeGroup>
```typescript TypeScript maxLines=50
// create a persisted block, which can be attached to agents
const block = await client.blocks.create({
label: "organization",
description: "A block to store information about the organization",
value: "Organization: Letta",
limit: 4000,
});
// create an agent with both a shared block and its own blocks
const sharedBlockAgent1 = await client.agents.create({
name: "shared_block_agent1",
memoryBlocks: [
{
label: "persona",
value: "I am agent 1"
},
],
blockIds: [block.id],
model: "openai/gpt-4o-mini",
embedding: "openai/text-embedding-3-small"
});
// create another agent sharing the block
const sharedBlockAgent2 = await client.agents.create({
name: "shared_block_agent2",
memoryBlocks: [
{
label: "persona",
value: "I am agent 2"
},
],
blockIds: [block.id],
model: "openai/gpt-4o-mini",
embedding: "openai/text-embedding-3-small"
});
```
```python title="python" maxLines=50
# create a persisted block, which can be attached to agents
block = client.blocks.create(
label="organization",
description="A block to store information about the organization",
value="Organization: Letta",
limit=4000,
)
# create an agent with both a shared block and its own blocks
shared_block_agent1 = client.agents.create(
name="shared_block_agent1",
memory_blocks=[
{
"label": "persona",
"value": "I am agent 1"
},
],
block_ids=[block.id],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-3-small"
)
# create another agent sharing the block
shared_block_agent2 = client.agents.create(
name="shared_block_agent2",
memory_blocks=[
{
"label": "persona",
"value": "I am agent 2"
},
],
block_ids=[block.id],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-3-small"
)
```
</CodeGroup>
You can also attach blocks to existing agents:
<CodeGroup>
```typescript TypeScript
await client.agents.blocks.attach(agent.id, block.id);
```
```python Python
client.agents.blocks.attach(agent_id=agent.id, block_id=block.id)
```
</CodeGroup>
You can see all agents attached to a block by using the `block_id` field in the [blocks retrieve](/api-reference/blocks/retrieve) endpoint.
## Managing blocks
### Retrieving a block
You can retrieve the contents of a block by ID. This is useful when blocks store finalized reports, code outputs, or other data you want to extract for use outside the agent.
<CodeGroup>
```typescript TypeScript
const block = await client.blocks.retrieve(block.id);
console.log(block.value); // access the block's content
```
```python Python
block = client.blocks.retrieve(block.id)
print(block.value) # access the block's content
```
</CodeGroup>
### Listing blocks
You can list all blocks, optionally filtering by label or searching by label text. This is useful for finding blocks across your project.
<CodeGroup>
```typescript TypeScript
// list all blocks
const blocks = await client.blocks.list();
// filter by label
const humanBlocks = await client.blocks.list({
label: "human"
});
// search by label text
const searchResults = await client.blocks.list({
labelSearch: "organization"
});
```
```python Python
# list all blocks
blocks = client.blocks.list()
# filter by label
human_blocks = client.blocks.list(label="human")
# search by label text
search_results = client.blocks.list(label_search="organization")
```
</CodeGroup>
### Modifying a block
You can directly modify a block's content, limit, description, or other properties. This is particularly useful for:
- External scripts that provide up-to-date information to agents (e.g., syncing a text file to a block)
- Updating shared blocks that multiple agents reference
- Programmatically managing block content outside of agent interactions
<CodeGroup>
```typescript TypeScript
// update the block's value - completely replaces the content
await client.blocks.modify(block.id, {
value: "Updated organization information: Letta - Building agentic AI"
});
// update multiple properties
await client.blocks.modify(block.id, {
value: "New content",
limit: 6000,
description: "Updated description"
});
```
```python Python
# update the block's value - completely replaces the content
client.blocks.modify(
block.id,
value="Updated organization information: Letta - Building agentic AI"
)
# update multiple properties
client.blocks.modify(
block.id,
value="New content",
limit=6000,
description="Updated description"
)
```
</CodeGroup>
<Warning>
**Setting `value` completely replaces the entire block content** - it is not an append operation. If multiple processes (agents or external scripts) modify the same block concurrently, the last write wins and overwrites all earlier changes. To avoid data loss:
- Set blocks to **read-only** if you don't want agents to modify them
- Only modify blocks directly in controlled scenarios where overwriting is acceptable
- Ensure your application logic accounts for full replacements, not merges
</Warning>
### Deleting a block
You can delete a block when it's no longer needed. Note that deleting a block will remove it from all agents that have it attached.
<CodeGroup>
```typescript TypeScript
await client.blocks.delete(block.id);
```
```python Python
client.blocks.delete(block_id=block.id)
```
</CodeGroup>
### Inspecting block usage
See which agents have a block attached:
<CodeGroup>
```typescript TypeScript
// list all agents that use this block
const agentsWithBlock = await client.blocks.agents.list(block.id);
console.log(`Used by ${agentsWithBlock.length} agents:`);
for (const agent of agentsWithBlock) {
console.log(` - ${agent.name}`);
}
// with pagination
const agentsPage = await client.blocks.agents.list(block.id, {
limit: 10,
order: "asc"
});
```
```python Python
# list all agents that use this block
agents_with_block = client.blocks.agents.list(block_id=block.id)
print(f"Used by {len(agents_with_block)} agents:")
for agent in agents_with_block:
print(f" - {agent.name}")
# with pagination
agents_page = client.blocks.agents.list(
block_id=block.id,
limit=10,
order="asc"
)
```
</CodeGroup>
## Agent-scoped block operations
### Listing an agent's blocks
You can retrieve all blocks attached to a specific agent. This shows you the complete memory configuration for that agent.
<CodeGroup>
```typescript TypeScript
const agentBlocks = await client.agents.blocks.list(agent.id);
```
```python Python
agent_blocks = client.agents.blocks.list(agent_id=agent.id)
```
</CodeGroup>
### Retrieving an agent's block by label
Instead of using a block ID, you can retrieve a block from a specific agent using its label. This is useful when you want to inspect what the agent currently knows about a specific topic.
<CodeGroup>
```typescript TypeScript
// get the agent's current knowledge about the human
const humanBlock = await client.agents.blocks.retrieve(
agent.id,
"human"
);
console.log(humanBlock.value);
```
```python Python
# get the agent's current knowledge about the human
human_block = client.agents.blocks.retrieve(
agent_id=agent.id,
block_label="human"
)
print(human_block.value)
```
</CodeGroup>
### Modifying an agent's block
You can modify a block through the agent-scoped endpoint using the block's label. This is useful for updating agent-specific memory without needing to know the block ID.
<CodeGroup>
```typescript TypeScript
// update the agent's human block
await client.agents.blocks.modify(agent.id, "human", {
value: "The human's name is Alice. She prefers Python over TypeScript."
});
```
```python Python
# update the agent's human block
client.agents.blocks.modify(
agent_id=agent.id,
block_label="human",
value="The human's name is Alice. She prefers Python over TypeScript."
)
```
</CodeGroup>
### Detaching blocks from agents
You can detach a block from an agent's context window. This removes the block from the agent's memory without deleting the block itself.
<CodeGroup>
```typescript TypeScript
await client.agents.blocks.detach(agent.id, block.id);
```
```python Python
client.agents.blocks.detach(agent_id=agent.id, block_id=block.id)
```
</CodeGroup>

View File

@@ -1,459 +0,0 @@
---
title: Message Types
subtitle: Understanding message types and working with agent message history
slug: guides/agents/message-types
---
When you interact with a Letta agent and retrieve its message history using `client.agents.messages.list()`, you'll receive various types of messages that represent different aspects of the agent's execution. This guide explains all message types and how to work with them.
## Overview
Letta uses a structured message system where each message has a specific `message_type` field that indicates its purpose. Messages are returned as instances of `LettaMessageUnion`, which is a discriminated union of all possible message types.
## Message Type Categories
### User and System Messages
#### `user_message`
Messages sent by the user or system events packaged as user input.
**Structure:**
```typescript
{
id: string;
date: datetime;
message_type: "user_message";
content: string | Array<TextContent | ImageContent>;
name?: string;
otid?: string;
sender_id?: string;
}
```
**Special User Message Subtypes:**
User messages can contain JSON with a `type` field indicating special message subtypes:
- **`login`** - User login events
```json
{
"type": "login",
"last_login": "Never (first login)",
"time": "2025-10-03 12:34:56 PM PDT-0700"
}
```
- **`user_message`** - Standard user messages
```json
{
"type": "user_message",
"message": "Hello, agent!",
"time": "2025-10-03 12:34:56 PM PDT-0700"
}
```
- **`system_alert`** - System notifications and alerts
```json
{
"type": "system_alert",
"message": "System notification text",
"time": "2025-10-03 12:34:56 PM PDT-0700"
}
```
#### `system_message`
Messages generated by the system, typically used for internal context.
**Structure:**
```typescript
{
id: string;
date: datetime;
message_type: "system_message";
content: string;
name?: string;
}
```
**Note:** System messages are never streamed back in responses; they're only visible when paginating through message history.
### Agent Reasoning and Responses
#### `reasoning_message`
Represents the agent's internal reasoning or "chain of thought."
**Structure:**
```typescript
{
id: string;
date: datetime;
message_type: "reasoning_message";
reasoning: string;
source: "reasoner_model" | "non_reasoner_model";
signature?: string;
}
```
**Fields:**
- `reasoning` - The agent's internal thought process
- `source` - Whether this was generated by a model with native reasoning (like o1) or via prompting
- `signature` - Optional cryptographic signature for reasoning verification (for models that support it)
#### `hidden_reasoning_message`
Represents reasoning that has been hidden from the response.
**Structure:**
```typescript
{
id: string;
date: datetime;
message_type: "hidden_reasoning_message";
state: "redacted" | "omitted";
hidden_reasoning?: string;
}
```
**Fields:**
- `state: "redacted"` - The provider redacted the reasoning content
- `state: "omitted"` - The API chose not to include reasoning (e.g., for o1/o3 models)
#### `assistant_message`
The actual message content sent by the agent.
**Structure:**
```typescript
{
id: string;
date: datetime;
message_type: "assistant_message";
content: string | Array<TextContent>;
name?: string;
}
```
### Tool Execution Messages
#### `tool_call_message`
A request from the agent to execute a tool.
**Structure:**
```typescript
{
id: string;
date: datetime;
message_type: "tool_call_message";
tool_call: {
name: string;
arguments: string; // JSON string
tool_call_id: string;
};
}
```
**Example:**
```typescript
{
message_type: "tool_call_message",
tool_call: {
name: "archival_memory_search",
arguments: '{"query": "user preferences", "page": 0}',
tool_call_id: "call_abc123"
}
}
```
#### `tool_return_message`
The result of a tool execution.
**Structure:**
```typescript
{
id: string;
date: datetime;
message_type: "tool_return_message";
tool_return: string;
status: "success" | "error";
tool_call_id: string;
stdout?: string[];
stderr?: string[];
}
```
**Fields:**
- `tool_return` - The formatted return value from the tool
- `status` - Whether the tool executed successfully
- `stdout`/`stderr` - Captured output from the tool execution (useful for debugging)
### Human-in-the-Loop Messages
#### `approval_request_message`
A request for human approval before executing a tool.
**Structure:**
```typescript
{
id: string;
date: datetime;
message_type: "approval_request_message";
tool_call: {
name: string;
arguments: string;
tool_call_id: string;
};
}
```
See [Human-in-the-Loop](/guides/agents/human_in_the_loop) for more information on this experimental feature.
#### `approval_response_message`
The user's response to an approval request.
**Structure:**
```typescript
{
id: string;
date: datetime;
message_type: "approval_response_message";
approve: boolean;
approval_request_id: string;
reason?: string;
}
```
## Working with Messages
### Listing Messages
<CodeGroup>
```typescript TypeScript
import { LettaClient } from "@letta-ai/letta-client";
const client = new LettaClient({
baseUrl: "https://api.letta.com",
});
// List recent messages
const messages = await client.agents.messages.list("agent-id", {
limit: 50,
useAssistantMessage: true,
});
// Iterate through message types
for (const message of messages) {
switch (message.messageType) {
case "user_message":
console.log("User:", message.content);
break;
case "assistant_message":
console.log("Agent:", message.content);
break;
case "reasoning_message":
console.log("Reasoning:", message.reasoning);
break;
case "tool_call_message":
console.log("Tool call:", message.toolCall.name);
break;
// ... handle other types
}
}
```
```python Python
from letta_client import Letta
client = Letta(base_url="https://api.letta.com")
# List recent messages
messages = client.agents.messages.list(
agent_id="agent-id",
limit=50,
use_assistant_message=True
)
# Iterate through message types
for message in messages:
if message.message_type == "user_message":
print(f"User: {message.content}")
elif message.message_type == "assistant_message":
print(f"Agent: {message.content}")
elif message.message_type == "reasoning_message":
print(f"Reasoning: {message.reasoning}")
elif message.message_type == "tool_call_message":
print(f"Tool call: {message.tool_call.name}")
# ... handle other types
```
</CodeGroup>
### Filtering Messages by Type
<CodeGroup>
```typescript TypeScript
// Get only assistant messages (what the agent said to the user)
const agentMessages = messages.filter(
(msg) => msg.messageType === "assistant_message"
);
// Get all tool-related messages
const toolMessages = messages.filter(
(msg) => msg.messageType === "tool_call_message" ||
msg.messageType === "tool_return_message"
);
// Get conversation history (user + assistant messages only)
const conversation = messages.filter(
(msg) => msg.messageType === "user_message" ||
msg.messageType === "assistant_message"
);
```
```python Python
# Get only assistant messages (what the agent said to the user)
agent_messages = [
msg for msg in messages
if msg.message_type == "assistant_message"
]
# Get all tool-related messages
tool_messages = [
msg for msg in messages
if msg.message_type in ["tool_call_message", "tool_return_message"]
]
# Get conversation history (user + assistant messages only)
conversation = [
msg for msg in messages
if msg.message_type in ["user_message", "assistant_message"]
]
```
</CodeGroup>
### Pagination
Messages support cursor-based pagination:
<CodeGroup>
```typescript TypeScript
// Get first page
let messages = await client.agents.messages.list("agent-id", {
limit: 100,
});
// Get next page using the last message ID
const lastMessageId = messages[messages.length - 1].id;
const nextPage = await client.agents.messages.list("agent-id", {
limit: 100,
before: lastMessageId,
});
```
```python Python
# Get first page
messages = client.agents.messages.list(
agent_id="agent-id",
limit=100
)
# Get next page using the last message ID
last_message_id = messages[-1].id
next_page = client.agents.messages.list(
agent_id="agent-id",
limit=100,
before=last_message_id
)
```
</CodeGroup>
## Message Metadata Fields
All message types include these common fields:
- **`id`** - Unique identifier for the message
- **`date`** - ISO 8601 timestamp of when the message was created
- **`message_type`** - The discriminator field identifying the message type
- **`name`** - Optional name field (varies by message type)
- **`otid`** - Offline threading ID for message correlation
- **`sender_id`** - The ID of the sender (identity or agent ID)
- **`step_id`** - The step ID associated with this message
- **`is_err`** - Whether this message is part of an error step (debugging only)
- **`seq_id`** - Sequence ID for ordering
- **`run_id`** - The run ID associated with this message
## Best Practices
### 1. Use Type Discriminators
Always check the `message_type` field to safely access type-specific fields:
<CodeGroup>
```typescript TypeScript
if (message.messageType === "tool_call_message") {
// TypeScript now knows message has a toolCall field
console.log(message.toolCall.name);
}
```
```python Python
if message.message_type == "tool_call_message":
# Safe to access tool_call
print(message.tool_call.name)
```
</CodeGroup>
### 2. Handle Special User Messages
When displaying conversations to end users, filter out internal messages:
```python
def is_internal_message(msg):
"""Check if a user message is internal (login, system_alert, etc.)"""
if msg.message_type != "user_message":
return False
if not isinstance(msg.content, str):
return False
try:
parsed = json.loads(msg.content)
return parsed.get("type") in ["login", "system_alert"]
except:
return False
# Get user-facing messages only
display_messages = [
msg for msg in messages
if not is_internal_message(msg)
]
```
### 3. Track Tool Execution
Match tool calls with their returns using `tool_call_id`:
```python
# Build a map of tool calls to their returns
tool_calls = {
msg.tool_call.tool_call_id: msg
for msg in messages
if msg.message_type == "tool_call_message"
}
tool_returns = {
msg.tool_call_id: msg
for msg in messages
if msg.message_type == "tool_return_message"
}
# Find failed tool calls
for call_id, call_msg in tool_calls.items():
if call_id in tool_returns:
return_msg = tool_returns[call_id]
if return_msg.status == "error":
print(f"Tool {call_msg.tool_call.name} failed:")
print(f" {return_msg.tool_return}")
```
## See Also
- [Human-in-the-Loop](/guides/agents/human_in_the_loop) - Using approval messages
- [Streaming Responses](/guides/agents/streaming) - Receiving messages in real-time
- [API Reference](/api-reference/agents/messages/list) - Full API documentation

View File

@@ -1,279 +0,0 @@
---
title: Building Stateful Agents with Letta
slug: guides/agents/overview
---
<Info>
**New to Letta?** If you haven't already, read [Core Concepts](/core-concepts) to understand how Letta's stateful agents are fundamentally different from traditional LLM APIs.
</Info>
Letta agents can automatically manage long-term memory, load data from external sources, and call custom tools.
Unlike in other frameworks, Letta agents are stateful, so they keep track of historical interactions and reserve part of their context to read and write memories which evolve over time.
<img className="light" src="/images/stateful_agents.png" />
<img className="dark" src="/images/stateful_agents_dark.png" />
Letta manages a reasoning loop for agents. At each agent step (i.e. iteration of the loop), the state of the agent is checkpointed and persisted to the database.
You can interact with agents from a REST API, the ADE, and TypeScript / Python SDKs.
As long as they are connected to the same service, all of these interfaces can be used to interact with the same agents.
<Tip>
If you're interested in learning more about stateful agents, read our [blog post](https://www.letta.com/blog/stateful-agents).
</Tip>
## Agents vs Threads
In Letta, you can think of an agent as a single entity that has a single message history which is treated as infinite.
The sequence of interactions the agent has experienced through its existence make up the agent's state (or memory).
One distinction between Letta and other agent frameworks is that Letta does not have the notion of message *threads* (or *sessions*).
Instead, there are only *stateful agents*, which have a single perpetual thread (sequence of messages).
The reason we use the term *agent* rather than *thread* is because Letta is based on the principle that **all agents interactions should be part of the persistent memory**, as opposed to building agent applications around ephemeral, short-lived interactions (like a thread or session).
```mermaid
%%{init: {'flowchart': {'rankDir': 'LR'}}}%%
flowchart LR
subgraph Traditional["Thread-Based Agents"]
direction TB
llm1[LLM] --> thread1["Thread 1
--------
Ephemeral
Session"]
llm1 --> thread2["Thread 2
--------
Ephemeral
Session"]
llm1 --> thread3["Thread 3
--------
Ephemeral
Session"]
end
Traditional ~~~ Letta
subgraph Letta["Letta Stateful Agents"]
direction TB
llm2[LLM] --> agent["Single Agent
--------
Persistent Memory"]
agent --> db[(PostgreSQL)]
db -->|"Learn & Update"| agent
end
class thread1,thread2,thread3 session
class agent agent
```
If you would like to create common starting points for new conversation "threads", we recommending using [agent templates](/guides/templates/overview) to create new agents for each conversation, or directly copying agent state from an existing agent.
For multi-users applications, we recommend creating an agent per-user, though you can also have multiple users message a single agent (but it will be a single shared message history).
## Create an agent
<Note>
To start creating agents, you can run a Letta server locally using **Letta Desktop**, deploy a server locally + remotely with **Docker**, or use **Letta Cloud**. See our [quickstart guide](/quickstart) for more information.
</Note>
Assuming we're running a Letta server locally at `http://localhost:8283`, we can create a new agent via the REST API, Python SDK, or TypeScript SDK:
<CodeGroup>
```curl curl
curl -X POST http://localhost:8283/v1/agents/ \
-H "Content-Type: application/json" \
-d '{
"memory_blocks": [
{
"value": "The human'\''s name is Bob the Builder.",
"label": "human"
},
{
"value": "My name is Sam, the all-knowing sentient AI.",
"label": "persona"
}
],
"model": "openai/gpt-4o-mini",
"context_window_limit": 16000,
"embedding": "openai/text-embedding-3-small"
}'
```
```python title="python" maxLines=50
# install letta_client with `pip install letta-client`
from letta_client import Letta
# create a client to connect to your local Letta server
client = Letta(
base_url="http://localhost:8283"
)
# create an agent with two basic self-editing memory blocks
agent_state = client.agents.create(
memory_blocks=[
{
"label": "human",
"value": "The human's name is Bob the Builder."
},
{
"label": "persona",
"value": "My name is Sam, the all-knowing sentient AI."
}
],
model="openai/gpt-4o-mini",
context_window_limit=16000,
embedding="openai/text-embedding-3-small"
)
# the AgentState object contains all the information about the agent
print(agent_state)
```
```typescript TypeScript maxLines=50
// install letta-client with `npm install @letta-ai/letta-client`
import { LettaClient } from '@letta-ai/letta-client'
// create a client to connect to your local Letta server
const client = new LettaClient({
baseUrl: "http://localhost:8283"
});
// create an agent with two basic self-editing memory blocks
const agentState = await client.agents.create({
memoryBlocks: [
{
label: "human",
value: "The human's name is Bob the Builder."
},
{
label: "persona",
value: "My name is Sam, the all-knowing sentient AI."
}
],
model: "openai/gpt-4o-mini",
contextWindowLimit: 16000,
embedding: "openai/text-embedding-3-small"
});
// the AgentState object contains all the information about the agent
console.log(agentState);
```
</CodeGroup>
You can also create an agent without any code using the [Agent Development Environment (ADE)](/agent-development-environment).
All Letta agents are stored in a database on the Letta server, so you can access the same agents from the ADE, the REST API, the Python SDK, and the TypeScript SDK.
The response will include information about the agent, including its `id`:
```json
{
"id": "agent-43f8e098-1021-4545-9395-446f788d7389",
"name": "GracefulFirefly",
...
}
```
Once an agent is created, you can message it:
<CodeGroup>
```curl curl
curl --request POST \
--url http://localhost:8283/v1/agents/$AGENT_ID/messages \
--header 'Content-Type: application/json' \
--data '{
"messages": [
{
"role": "user",
"content": "hows it going????"
}
]
}'
```
```python title="python" maxLines=50
# send a message to the agent
response = client.agents.messages.create(
agent_id=agent_state.id,
messages=[
{
"role": "user",
"content": "hows it going????"
}
]
)
# the response object contains the messages and usage statistics
print(response)
# if we want to print the usage stats
print(response.usage)
# if we want to print the messages
for message in response.messages:
print(message)
```
```typescript TypeScript maxLines=50
// send a message to the agent
const response = await client.agents.messages.create(
agentState.id, {
messages: [
{
role: "user",
content: "hows it going????"
}
]
}
);
// the response object contains the messages and usage statistics
console.log(response);
// if we want to print the usage stats
console.log(response.usage)
// if we want to print the messages
for (const message of response.messages) {
console.log(message);
}
```
</CodeGroup>
### Message Types
The `response` object contains the following attributes:
* `usage`: The usage of the agent after the message was sent (the prompt tokens, completition tokens, and total tokens)
* `message`: A list of `LettaMessage` objects, generated by the agent
#### `LettaMessage`
The `LettaMessage` object is a simplified version of the `Message` object stored in the database backend.
Since a `Message` can include multiple events like a chain-of-thought and function calls, `LettaMessage` simplifies messages to have the following types:
* `reasoning_message`: The inner monologue (chain-of-thought) of the agent
* `tool_call_message`: An agent's tool (function) call
* `tool_call_return`: The result of executing an agent's tool (function) call
* `assistant_message`: An agent's response message (direct response in current architecture, or `send_message` tool call in legacy architectures)
* `system_message`: A system message (for example, an alert about the user logging in)
* `user_message`: A user message
<Note>
In current Letta agents, `assistant_message` represents the agent's direct response. In legacy architectures (`memgpt_agent`, `memgpt_v2_agent`), it wraps the `send_message` tool call.
If you prefer to see the raw tool call format in legacy agents, you can set `use_assistant_message` to `false` in the request `config` (see the [endpoint documentation](/api-reference/agents/messages/create)).
</Note>
## Common agent operations
For more in-depth guide on the full set of Letta agent operations, check out our [API reference](/api-reference/overview), our extended [Python SDK](https://github.com/letta-ai/letta/blob/main/examples/docs/example.py) and [TypeScript SDK](https://github.com/letta-ai/letta/blob/main/examples/docs/node/example.ts) examples, as well as our other [cookbooks](/cookbooks).
If you're using a self-hosted Letta server, you should set the **base URL** (`base_url` in Python, `baseUrl` in TypeScript) to the Letta server's URL (e.g. `http://localhost:8283`) when you create your client. See an example [here](/api-reference/overview).
If you're using a self-hosted server, you can omit the token if you're not using [password protection](/guides/server/docker#password-protection-advanced).
If you are using password protection, set your **token** to the **password**.
If you're using Letta Cloud, you should set the **token** to your **Letta Cloud API key**.
### Retrieving an agent's state
The agent's state is always persisted, so you can retrieve an agent's state by its ID.
<EndpointRequestSnippet endpoint="GET /v1/agents/:agent_id" />
The result of the call is an `AgentState` object:
<EndpointResponseSnippet endpoint="GET /v1/agents/:agent_id" />
### List agents
Replace `agent_id` with your actual agent ID.
<EndpointRequestSnippet endpoint="GET /v1/agents/" />
The result of the call is a list of `AgentState` objects:
<EndpointResponseSnippet endpoint="GET /v1/agents/" />
### Delete an agent
To delete an agent, you can use the `DELETE` endpoint with your `agent_id`:
<EndpointRequestSnippet endpoint="DELETE /v1/agents/:agent_id" />

View File

@@ -1,94 +0,0 @@
---
title: Utilities
subtitle: Pre-built tools for web access, code execution, and data fetching
slug: guides/agents/prebuilt-tools
---
Letta provides pre-built tools that enable agents to search the web, execute code, and fetch webpage content.
## Available Utilities
### [Web Search](/guides/agents/web-search)
Search the internet in real-time using [Exa](https://exa.ai)'s AI-powered search engine.
```python
agent = client.agents.create(
tools=["web_search"],
memory_blocks=[{
"label": "persona",
"value": "I use web_search for current events and external research."
}]
)
```
**Key features:**
- AI-powered semantic search
- Category filtering (news, research papers, PDFs, etc.)
- Domain filtering
- Date range filtering
- Highlights and AI-generated summaries
**Setup:** Works out of the box on Letta Cloud. Self-hosted requires `EXA_API_KEY`.
[Read full documentation →](/guides/agents/web-search)
---
### [Code Interpreter](/guides/agents/run-code)
Execute code in a secure sandbox with full network access.
```python
agent = client.agents.create(
tools=["run_code"],
memory_blocks=[{
"label": "persona",
"value": "I use Python for data analysis and API calls."
}]
)
```
**Key features:**
- Python with 191+ pre-installed packages (numpy, pandas, scipy, etc.)
- JavaScript, TypeScript, R, and Java support
- Full network access for API calls
- Fresh environment per execution (no state persistence)
**Setup:** Works out of the box on Letta Cloud. Self-hosted requires `E2B_API_KEY`.
[Read full documentation →](/guides/agents/run-code)
---
### [Fetch Webpage](/guides/agents/fetch-webpage)
Fetch and convert webpages to readable text/markdown.
```python
agent = client.agents.create(
tools=["fetch_webpage"],
memory_blocks=[{
"label": "persona",
"value": "I fetch and read webpages to answer questions."
}]
)
```
**Key features:**
- Converts HTML to clean markdown
- Extracts article content
- Multiple fallback extraction methods
- Optional Exa integration for enhanced extraction
**Setup:** Works out of the box everywhere. Optional `EXA_API_KEY` for enhanced extraction.
[Read full documentation →](/guides/agents/fetch-webpage)
---
## Related Documentation
- [Custom Tools](/guides/agents/custom-tools)
- [Tool Variables](/guides/agents/tool-variables)
- [Model Context Protocol](/guides/mcp/overview)

View File

@@ -1,258 +0,0 @@
---
title: Code Interpreter
subtitle: Execute code in a secure sandbox with full network access
slug: guides/agents/run-code
---
The `run_code` tool enables Letta agents to execute code in a secure sandboxed environment. Useful for data analysis, calculations, API calls, and programmatic computation.
<Info>
On [Letta Cloud](/guides/cloud/overview), this tool works out of the box. For self-hosted deployments, you'll need to [configure an E2B API key](#self-hosted-setup).
</Info>
<Warning>
Each execution runs in a **fresh environment** - variables, files, and state do not persist between runs.
</Warning>
## Quick Start
<CodeGroup>
```python Python
from letta import Letta
client = Letta(token="LETTA_API_KEY")
agent = client.agents.create(
model="openai/gpt-4o",
tools=["run_code"],
memory_blocks=[{
"label": "persona",
"value": "I can run Python code for data analysis and API calls."
}]
)
```
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
const client = new LettaClient({ token: "LETTA_API_KEY" });
const agent = await client.agents.create({
model: "openai/gpt-4o",
tools: ["run_code"],
memoryBlocks: [{
label: "persona",
value: "I can run Python code for data analysis and API calls."
}]
});
```
</CodeGroup>
## Tool Parameters
| Parameter | Type | Options | Description |
|-----------|------|---------|-------------|
| `code` | `str` | Required | The code to execute |
| `language` | `str` | `python`, `js`, `ts`, `r`, `java` | Programming language |
## Return Format
```json
{
"results": ["Last expression value"],
"logs": {
"stdout": ["Print statements"],
"stderr": ["Error output"]
},
"error": "Error details if execution failed"
}
```
**Output types:**
- `results[]`: Last expression value (Jupyter-style)
- `logs.stdout`: Print statements and standard output
- `logs.stderr`: Error messages
- `error`: Present if execution failed
## Supported Languages
| Language | Key Limitations |
|----------|-----------------|
| **Python** | None - full ecosystem available |
| **JavaScript** | No npm packages - built-in Node modules only |
| **TypeScript** | No npm packages - built-in Node modules only |
| **R** | No tidyverse - base R only |
| **Java** | JShell-style execution - no traditional class definitions |
### Python
Full Python ecosystem with common packages pre-installed:
- **Data**: numpy, pandas, scipy, scikit-learn
- **Web**: requests, aiohttp, beautifulsoup4
- **Utilities**: matplotlib, PyYAML, Pillow
Check available packages:
```python
import pkg_resources
print([d.project_name for d in pkg_resources.working_set])
```
### JavaScript & TypeScript
No npm packages available - only built-in Node modules.
```javascript
// Works
const fs = require('fs');
const http = require('http');
// Fails
const axios = require('axios');
```
### R
Base R only - no tidyverse packages.
```r
# Works
mean(c(1, 2, 3))
# Fails
library(ggplot2)
```
### Java
JShell-style execution - statement-level only.
```java
// Works
System.out.println("Hello");
int x = 42;
// Fails
public class Main {
public static void main(String[] args) { }
}
```
## Network Access
The sandbox has full network access for HTTP requests, API calls, and DNS resolution.
```python
import requests
response = requests.get('https://api.github.com/repos/letta-ai/letta')
data = response.json()
print(f"Stars: {data['stargazers_count']}")
```
## No State Persistence
Variables, files, and state do not carry over between executions. Each `run_code` call is completely isolated.
```python
# First execution
x = 42
# Second execution (separate run_code call)
print(x) # Error: NameError: name 'x' is not defined
```
**Implications:**
- Must re-import libraries each time
- Files written to disk are lost
- Cannot build up state across executions
## Self-Hosted Setup
For self-hosted servers, configure an E2B API key. [E2B](https://e2b.dev) provides the sandbox infrastructure.
<CodeGroup>
```bash Docker
docker run \
-e E2B_API_KEY="your_e2b_api_key" \
letta/letta:latest
```
```yaml Docker Compose
services:
letta:
environment:
- E2B_API_KEY=your_e2b_api_key
```
```bash Server
export E2B_API_KEY="your_e2b_api_key"
letta server
```
```python Per-Agent
agent = client.agents.create(
tools=["run_code"],
tool_env_vars={
"E2B_API_KEY": "your_e2b_api_key"
}
)
```
</CodeGroup>
## Common Patterns
### Data Analysis
```python
agent = client.agents.create(
model="openai/gpt-4o",
tools=["run_code"],
memory_blocks=[{
"label": "persona",
"value": "I use Python with pandas and numpy for data analysis."
}]
)
```
### API Integration
```python
agent = client.agents.create(
model="openai/gpt-4o",
tools=["run_code", "web_search"],
memory_blocks=[{
"label": "persona",
"value": "I fetch data from APIs using run_code and search docs with web_search."
}]
)
```
### Statistical Analysis
```python
agent = client.agents.create(
model="openai/gpt-4o",
tools=["run_code"],
memory_blocks=[{
"label": "persona",
"value": "I perform statistical analysis using scipy and numpy."
}]
)
```
## When to Use
| Use Case | Tool | Why |
|----------|------|-----|
| Data analysis | `run_code` | Full Python data stack |
| Math calculations | `run_code` | Programmatic computation |
| Live API data | `run_code` | Network + processing |
| Web scraping | `run_code` | requests + BeautifulSoup |
| Simple search | `web_search` | Purpose-built |
| Persistent data | Archival memory | State persistence |
## Related Documentation
- [Utilities Overview](/guides/agents/prebuilt-tools)
- [Web Search](/guides/agents/web-search)
- [Fetch Webpage](/guides/agents/fetch-webpage)
- [Custom Tools](/guides/agents/custom-tools)
- [Tool Variables](/guides/agents/tool-variables)

View File

@@ -1,837 +0,0 @@
---
title: Shared Memory Blocks Guide
subtitle: Complete guide to using shared memory for multi-agent coordination
slug: guides/agents/shared-memory-blocks
---
Shared memory blocks enable multiple agents to access and update the same memory, creating powerful multi-agent systems with seamless coordination.
## Overview
**Shared memory blocks** allow you to attach the same memory block to multiple agents. When one agent updates the block, all other agents with access immediately see the changes. This enables:
- **Real-time coordination** without explicit agent-to-agent messaging
- **Consistent information** across teams and departments
- **Hierarchical access control** based on roles and responsibilities
- **Privacy boundaries** for sensitive information
- **Knowledge sharing** across specialized agents
<Note>
Shared memory blocks are different from agent-to-agent messaging (like `send_message_to_agent_async`). With shared memory, agents coordinate **asynchronously** through shared state rather than direct communication.
</Note>
## Core Concepts
### What is a Shared Memory Block?
A memory block becomes "shared" when you attach it to multiple agents using the same `block_id`. All agents with access see the same content in real-time.
```python
from letta import Letta
client = Letta()
# Step 1: Create a memory block
shared_block = client.blocks.create(
label="team_knowledge",
description="Shared knowledge base for the team",
value="Team policies and procedures...",
limit=5000
)
# Step 2: Attach to multiple agents
agent1 = client.agents.create(
name="Agent_1",
block_ids=[shared_block.id], # Attach shared block
# ... other config
)
agent2 = client.agents.create(
name="Agent_2",
block_ids=[shared_block.id], # Same block ID = shared memory
# ... other config
)
# Now agent1 and agent2 share the same memory block!
```
### Block Types
| Type | Description | Use Case |
|---|---|---|
| **Read-Only** | Agents can read but not modify | Company policies, reference data |
| **Read/Write** | Agents can read and update | Task queues, shared notes |
| **Private** | Single agent only | Personal work logs, private notes |
### Access Patterns
Letta supports multiple access patterns for organizing shared memory:
1. **Hierarchical**: Tier 1 < Tier 2 < Tier 3 (access increases up the hierarchy)
2. **Team-Based**: All team members share the same blocks
3. **Overlapping**: Each agent has a unique combination of blocks
4. **Organizational**: Department → Cross-Department → Executive levels
## Architecture Patterns
### Pattern 1: Hierarchical Access (Support Tiers)
```
Tier 1 Agents → company_policies [R]
Tier 2 Agents → company_policies [R], escalation_procedures [R]
Tier 3 Agents → company_policies [R], escalation_procedures [R], team_metrics [R/W]
```
**Use Cases:**
- Customer support with tier levels
- Knowledge bases with sensitivity levels
- Organizations with clearance levels
**Example:** [Read-Only Organizational Knowledge Tutorial](/cookbooks/shared-memory-read-only)
### Pattern 2: Team Coordination (Shared Queues)
```
All Team Members → task_queue [R/W], completed_work [R/W]
Supervisor Only → team_metrics [R/W]
Each Worker → private_work_log [R/W]
```
**Use Cases:**
- Task coordination across workers
- Project management teams
- Shared deliverables tracking
**Example:** [Task Coordination Tutorial](/cookbooks/shared-memory-task-coordination)
### Pattern 3: Specialized Agents (Overlapping Access)
```
Coordinator → ALL blocks (user_profile, preferences, interaction_history, calendar, financial)
Email Agent → user_profile, preferences, interaction_history, calendar
Research Agent → user_profile, preferences, interaction_history
Calendar Agent → user_profile, preferences, calendar
Finance Agent → user_profile, preferences, financial
```
**Use Cases:**
- Personal AI assistant networks
- Specialized service agents
- Multi-domain customer support
**Example:** [Multi-Agent User Assistant Tutorial](/cookbooks/shared-memory-user-assistant)
### Pattern 4: Enterprise Hierarchy (Departments)
```
Company-Wide [R] → ALL agents (mission, policies)
Department Blocks [R/W] → Department members only
Cross-Dept Block [R/W] → All directors + CEO
Executive Dashboard [R/W] → CEO only
```
**Use Cases:**
- Enterprise organizations
- Multi-department companies
- Regulated industries with compliance requirements
**Example:** [Enterprise Multi-Team Tutorial](/cookbooks/shared-memory-enterprise)
## Best Practices
### 1. Use Read-Only Blocks for Critical Information
Protect policies, procedures, and reference data from accidental modification:
```python
company_policies = client.blocks.create(
label="company_policies",
value="Our company policies...",
# Read-only ensures consistency
)
```
<Warning>
Even though Letta doesn't currently enforce read-only at the API level, agents will respect read-only semantics and refuse to modify these blocks when instructed not to in their persona.
</Warning>
### 2. Implement the Principle of Least Privilege
Only give agents access to blocks they need:
```python
# ❌ Bad: Sales agent with access to HR data
sales_agent = client.agents.create(
block_ids=[sales_knowledge.id, hr_employee_data.id] # Too much access
)
# ✓ Good: Sales agent with only sales data
sales_agent = client.agents.create(
block_ids=[sales_knowledge.id] # Appropriate access
)
```
### 3. Use Clear Naming Conventions
Make block purposes obvious:
```python
# ✓ Good names
"company_policies" # Clear scope and content
"sales_team_knowledge" # Clear ownership
"cross_dept_projects" # Clear purpose
"ceo_executive_dashboard" # Clear access level
# ❌ Bad names
"data" # Too generic
"block1" # Not descriptive
"temp" # Purpose unclear
```
### 4. Set Appropriate Character Limits
Balance between enough space and memory constraints:
```python
# Reference data: smaller
company_policies = client.blocks.create(
limit=5000 # Policies don't change often
)
# Active coordination: larger
task_queue = client.blocks.create(
limit=10000 # Tasks accumulate over time
)
# Detailed logs: largest
interaction_history = client.blocks.create(
limit=12000 # Many interactions to track
)
```
<Tip>
If a block frequently hits its character limit, consider archiving old content or splitting into multiple blocks (e.g., `current_tasks` vs `completed_tasks`).
</Tip>
### 5. Document Block Access in Agent Personas
Make agents aware of their access:
```python
agent = client.agents.create(
memory_blocks=[{
"label": "persona",
"value": """I am a Sales Representative.
My access:
- company_policies (read-only): Company-wide policies
- sales_knowledge (read/write): Shared with sales team
- my_leads (private): My personal lead tracking
I do NOT have access to:
- engineering_specs (Engineering team only)
- hr_employee_data (HR team only)
"""
}]
)
```
### 6. Use Descriptive Block Descriptions
Help with debugging and management:
```python
block = client.blocks.create(
label="sales_knowledge",
description="Sales team knowledge base: pricing, playbooks, targets. Read/write access for Sales Director, Rep 1, Rep 2.",
# Good description includes: content, access, and purpose
)
```
## Common Use Cases
### Use Case 1: Customer Support with Tiers
**Problem:** Support agents at different levels need different information.
**Solution:** Hierarchical blocks with increasing access
```
support_tier1/ → Basic policies
support_tier2/ → Advanced troubleshooting + escalation procedures
support_tier3/ → Full system access + team metrics
```
**Benefits:**
- Consistent policy information across all tiers
- Sensitive escalation procedures protected
- Supervisors track team performance privately
### Use Case 2: Project Management Team
**Problem:** Multiple workers need to coordinate on tasks.
**Solution:** Shared task queue + completion log
```
task_queue (R/W) → All team members claim and update tasks
completed_work (R/W) → All team members share findings
team_metrics (R/W) → Supervisor only tracks performance
```
**Benefits:**
- Real-time task claiming without conflicts
- Knowledge sharing through completed work
- Supervisor oversight without micromanagement
### Use Case 3: Personal AI Assistant Network
**Problem:** User needs specialized agents that understand full context.
**Solution:** Overlapping block access with privacy boundaries
```
Universal: user_profile, user_preferences → All agents
Coordination: interaction_history → Coordinator, Email, Research
Domain-specific: calendar_data → Calendar, Email agents only
Restricted: financial_data → Finance agent only
```
**Benefits:**
- Seamless handoffs between specialists
- Consistent user experience
- Privacy protection for sensitive data
### Use Case 4: Enterprise Organization
**Problem:** Large company with departments needs coordination.
**Solution:** Multi-tier hierarchy with isolation
```
Company-wide (R) → All employees see mission/policies
Department (R/W) → Each dept has private knowledge
Cross-dept (R/W) → Directors coordinate projects
Executive (R/W) → CEO tracks company metrics
```
**Benefits:**
- Department autonomy and isolation
- Async cross-department coordination
- Executive oversight without bottlenecks
- Compliance with data privacy regulations
## API Reference
### Creating Shared Blocks
```python
block = client.blocks.create(
label="block_name", # Required: identifier
description="What this is", # Recommended: for management
value="Initial content", # Required: starting content
limit=5000 # Optional: character limit
)
```
### Attaching to Agents (at Creation)
```python
agent = client.agents.create(
name="Agent_Name",
block_ids=[block1.id, block2.id], # Attach existing blocks
memory_blocks=[ # Create new private blocks
{"label": "persona", "value": "..."}
],
# ... other config
)
```
### Attaching to Existing Agents
```python
# Attach a block to an existing agent
client.agents.blocks.attach(
agent_id=agent.id,
block_id=block.id
)
# Detach a block from an agent
client.agents.blocks.detach(
agent_id=agent.id,
block_id=block.id
)
```
### Listing Blocks
Find blocks across your project with optional filtering:
<CodeGroup>
```python Python
# List all blocks in project
all_blocks = client.blocks.list()
# Filter by label
team_blocks = client.blocks.list(label="team_knowledge")
# Search by label text
search_results = client.blocks.list(label_search="sales")
```
```typescript TypeScript
// List all blocks in project
const allBlocks = await client.blocks.list();
// Filter and search
const teamBlocks = await client.blocks.list({
label: "team_knowledge",
labelSearch: "sales"
});
```
</CodeGroup>
### Retrieving a Block
Get complete block information by ID:
<CodeGroup>
```python Python
block = client.blocks.retrieve(block.id)
print(f"Block: {block.label}")
print(f"Value: {block.value}")
```
```typescript TypeScript
const block = await client.blocks.retrieve(block.id);
console.log(`Block: ${block.label}`);
console.log(`Value: ${block.value}`);
```
</CodeGroup>
### Modifying Blocks Directly
Update block content without going through an agent. Useful for external scripts syncing data to agents:
<CodeGroup>
```python Python
# Update block content - completely replaces the value
client.blocks.modify(
block.id,
value="Updated team knowledge: New procedures..."
)
# Update multiple properties
client.blocks.modify(
block.id,
value="New content",
limit=8000,
description="Updated description"
)
# Make block read-only
client.blocks.modify(block.id, read_only=True)
```
```typescript TypeScript
// Update block content - completely replaces the value
await client.blocks.modify(block.id, {
value: "Updated team knowledge: New procedures..."
});
// Update multiple properties
await client.blocks.modify(block.id, {
value: "New content",
limit: 8000,
description: "Updated description"
});
// Make block read-only
await client.blocks.modify(block.id, { readOnly: true });
```
</CodeGroup>
<Warning>
**Setting `value` completely replaces the entire block content** - it is not an append operation. When you modify a shared block directly, all agents with access will see the changes immediately.
**Race condition risk**: If two processes (agents or external scripts) modify the same block concurrently, the last write wins and completely overwrites all earlier changes. To avoid data loss:
- Set blocks to **read-only** if you don't want agents or other processes to modify them
- Only allow direct modifications in controlled scenarios where overwriting is acceptable
- Ensure your application logic accounts for the fact that block updates are full replacements, not merges
</Warning>
### Deleting Blocks
Remove blocks when no longer needed. This detaches the block from all agents:
<CodeGroup>
```python Python
client.blocks.delete(block_id=block.id)
```
```typescript TypeScript
await client.blocks.delete(block.id);
```
</CodeGroup>
### Agent-Scoped Operations
#### List an Agent's Blocks
See all memory blocks attached to a specific agent:
<CodeGroup>
```python Python
# List all blocks for an agent
agent_blocks = client.agents.blocks.list(agent_id=agent.id)
# With pagination
agent_blocks = client.agents.blocks.list(
agent_id=agent.id,
limit=10,
order="asc"
)
for block in agent_blocks:
print(f"{block.label}: {block.value[:50]}...")
```
```typescript TypeScript
// List all blocks for an agent
const agentBlocks = await client.agents.blocks.list(agent.id);
// With pagination
const agentBlocksPaginated = await client.agents.blocks.list(agent.id, {
limit: 10,
order: "asc"
});
for (const block of agentBlocks) {
console.log(`${block.label}: ${block.value.slice(0, 50)}...`);
}
```
</CodeGroup>
#### Retrieve Agent's Block by Label
Get a specific block from an agent using its label instead of ID:
<CodeGroup>
```python Python
# Get agent's human block
human_block = client.agents.blocks.retrieve(
agent_id=agent.id,
block_label="human"
)
print(human_block.value)
# Get shared task queue from specific agent
task_queue = client.agents.blocks.retrieve(
agent_id=worker_agent.id,
block_label="task_queue"
)
```
```typescript TypeScript
// Get agent's human block
const humanBlock = await client.agents.blocks.retrieve(
agent.id,
"human"
);
console.log(humanBlock.value);
// Get shared task queue from specific agent
const taskQueue = await client.agents.blocks.retrieve(
workerAgent.id,
"task_queue"
);
```
</CodeGroup>
#### Modify Agent's Block by Label
Update a specific agent's block without needing the block ID:
<CodeGroup>
```python Python
# Update agent's knowledge about the human
client.agents.blocks.modify(
agent_id=agent.id,
block_label="human",
value="Updated user information: Alice, prefers email over chat"
)
# Update shared block via specific agent
client.agents.blocks.modify(
agent_id=worker.id,
block_label="task_queue",
value="Updated task queue with new items..."
)
```
```typescript TypeScript
// Update agent's knowledge about the human
await client.agents.blocks.modify(agent.id, "human", {
value: "Updated user information: Alice, prefers email over chat"
});
// Update shared block via specific agent
await client.agents.blocks.modify(worker.id, "task_queue", {
value: "Updated task queue with new items..."
});
```
</CodeGroup>
### Inspecting Block Usage
See which agents have a block attached:
<CodeGroup>
```python Python
# List all agents that use this block
agents_with_block = client.blocks.agents.list(block_id=block.id)
print(f"Used by {len(agents_with_block)} agents:")
for agent in agents_with_block:
print(f" - {agent.name}")
# With pagination
agents_page = client.blocks.agents.list(
block_id=block.id,
limit=10,
order="asc"
)
```
```typescript TypeScript
// List all agents that use this block
const agentsWithBlock = await client.blocks.agents.list(block.id);
console.log(`Used by ${agentsWithBlock.length} agents:`);
for (const agent of agentsWithBlock) {
console.log(` - ${agent.name}`);
}
// With pagination
const agentsPage = await client.blocks.agents.list(block.id, {
limit: 10,
order: "asc"
});
```
</CodeGroup>
### Updating Blocks via Agents
Agents update blocks using memory tools during conversations:
<CodeGroup>
```python Python
# Agent updates shared block content
client.agents.messages.create(
agent_id=agent.id,
messages=[{
"role": "user",
"content": "Update the task queue to mark task-001 as complete"
}]
)
# Agent uses core_memory_replace or core_memory_append tools
# Changes are immediately visible to all agents with access
```
```typescript TypeScript
// Agent updates shared block content
await client.agents.messages.create(agent.id, {
messages: [{
role: "user",
content: "Update the task queue to mark task-001 as complete"
}]
});
// Agent uses core_memory_replace or core_memory_append tools
// Changes are immediately visible to all agents with access
```
</CodeGroup>
## Troubleshooting
### Problem: Agent Can't See Block Updates
**Symptoms:** Agent reads old content after another agent updated it.
**Solutions:**
1. Verify both agents have the same `block_id` attached
2. Check that updates are being committed (agent finished its turn)
3. Ensure character limit hasn't been exceeded (updates may fail silently)
```python
# Debug: Check which agents share the block
block_info = client.blocks.retrieve(block_id=block.id)
print(f"Agents with access: {block_info.agent_ids}")
```
### Problem: Block Character Limit Exceeded
**Symptoms:** Updates not applying, content truncated.
**Solutions:**
1. Increase block limit: `client.blocks.update(block_id=block.id, limit=10000)`
2. Archive old content: Move completed items to a separate block
3. Summarize content: Have an agent periodically summarize and condense
```python
# Check current usage
block = client.blocks.retrieve(block_id=block.id)
print(f"Characters: {len(block.value)} / {block.limit}")
```
### Problem: Privacy Violation
**Symptoms:** Agent accessing data it shouldn't see.
**Solutions:**
1. Review block attachments: `client.agents.retrieve(agent_id).block_ids`
2. Detach inappropriate blocks: `client.agents.blocks.detach()`
3. Update agent persona to clarify access boundaries
4. Consider splitting one block into multiple with different access
### Problem: Race Conditions on Concurrent Updates
**Symptoms:** Two agents try to claim the same task, conflicts occur.
**Solutions:**
1. Design blocks to minimize conflicts (separate sections for each agent)
2. Use timestamps and agent IDs in updates
3. Implement retry logic for failed updates
4. Consider optimistic concurrency control
```python
# Good: Each agent updates their own section
"""
TASK-001:
Status: In Progress
Claimed by: Worker_1
Timestamp: 2024-10-08 14:30
TASK-002:
Status: In Progress
Claimed by: Worker_2
Timestamp: 2024-10-08 14:31
"""
```
## Performance Considerations
### Block Size and Agent Performance
- **Smaller blocks (<5K chars)**: Faster loading, more focused context
- **Larger blocks (>10K chars)**: More context but slower processing
- **Optimal**: Keep blocks focused on single purpose, split large blocks
### Number of Blocks per Agent
- **Recommended**: 3-7 blocks per agent
- **Each block adds**: Context in agent's working memory
- **Too many blocks**: May dilute agent focus
- **Too few blocks**: May limit coordination capabilities
### Update Frequency
- **High-frequency updates** (many agents, frequent changes): Consider separate blocks to reduce contention
- **Low-frequency updates** (policies, references): Larger consolidated blocks are fine
## Security and Compliance
### Data Privacy
Shared memory blocks enable compliance with data privacy regulations:
```python
# GDPR/HIPAA Example: Isolate sensitive data
hr_employee_data = client.blocks.create(
label="hr_employee_data",
description="CONFIDENTIAL - HR Department only. Contains PII."
)
# Only attach to authorized agents
hr_director = client.agents.create(
block_ids=[hr_employee_data.id] # Only HR has access
)
# Sales/Engineering agents do NOT get access
sales_agent = client.agents.create(
block_ids=[sales_knowledge.id] # No HR data access
)
```
### Audit Trail
Track block access for compliance:
```python
# Check block usage
block_info = client.blocks.retrieve(block_id=sensitive_block.id)
print(f"Accessed by: {block_info.agent_ids}")
# Log all agents with access
for agent_id in block_info.agent_ids:
agent = client.agents.retrieve(agent_id=agent_id)
print(f" {agent.name} - {agent.created_at}")
```
### Access Revocation
Remove access when no longer needed:
```python
# Employee leaves company - revoke agent access
client.agents.blocks.detach(
agent_id=former_employee_agent.id,
block_id=company_confidential.id
)
# Or delete the agent entirely
client.agents.delete(agent_id=former_employee_agent.id)
```
## Tutorials
Learn shared memory patterns through hands-on tutorials:
<CardGroup cols={2}>
<Card title="Part 1: Read-Only Knowledge" icon="lock" href="/cookbooks/shared-memory-read-only">
Build a hierarchical support team with shared company policies
</Card>
<Card title="Part 2: Task Coordination" icon="list-check" href="/cookbooks/shared-memory-task-coordination">
Create a data analysis team with shared task queues
</Card>
<Card title="Part 3: User Assistant Network" icon="users" href="/cookbooks/shared-memory-user-assistant">
Build specialized agents with overlapping block access
</Card>
<Card title="Part 4: Enterprise System" icon="building" href="/cookbooks/shared-memory-enterprise">
Implement a complete enterprise with departments and hierarchies
</Card>
</CardGroup>
## Related Guides
<CardGroup cols={2}>
<Card title="Memory Blocks Overview" icon="database" href="/guides/agents/memory-blocks">
Understanding memory blocks and core memory
</Card>
<Card title="Multi-Agent Systems" icon="diagram-project" href="/guides/agents/multi-agent">
Overview of multi-agent architectures in Letta
</Card>
<Card title="Agent-to-Agent Messaging" icon="comments" href="/cookbooks/multi-agent-async">
Alternative coordination pattern using async messaging
</Card>
<Card title="API Reference" icon="code" href="/api-reference/agents/core-memory">
Complete API documentation for blocks and memory
</Card>
</CardGroup>
## Key Takeaways
✓ **Shared memory blocks** enable seamless multi-agent coordination without explicit messaging
✓ **Access patterns** range from simple (all agents) to complex (hierarchical organizations)
✓ **Privacy boundaries** protect sensitive data while enabling collaboration
✓ **Real-time sync** ensures all agents see updates immediately
✓ **Scales** from 2 agents to enterprise systems with 10+ agents
✓ **Complements** agent-to-agent messaging for complete multi-agent systems
Shared memory blocks are a powerful primitive for building sophisticated multi-agent systems. Start with simple patterns (Tutorial 1) and progress to complex architectures (Tutorial 4) as your needs grow.

View File

@@ -1,125 +0,0 @@
---
title: Sleep-time Agents
subtitle: Based on the new sleep-time compute research paper
slug: guides/agents/architectures/sleeptime
---
<Warning>
Sleep-time agents are experimental and may be unstable. For more information, visit our [Discord](https://discord.gg/letta).
</Warning>
<Note>
To learn more about sleep-time compute, check out our [blog](https://www.letta.com/blog/sleep-time-compute) and [research paper](https://arxiv.org/abs/2504.13171).
</Note>
<img className="light" src="/images/sleep_time.png" />
<img className="dark" src="/images/sleep_time_dark.png" />
In Letta, you can create special **sleep-time agents** that share the memory of your primary agents, but run in the background and can modify the memory asynchronously. You can think of sleep-time agents as a special form of multi-agent architecture, where all agents in the system share one or more memory blocks. A single agent can have one or more associated sleep-time agents to process data such as the conversation history or data sources to manage the memory blocks of the primary agent.
To enable sleep-time agents for your agent, set `enableSleeptime: true` when creating your agent. This will automatically create:
* A primary agent with tools for `conversation_search` and `archival_memory_search`. This is your "main" agent that you configure and interact with.
* A sleep-time agent with tools to manage the memory blocks of the primary agent.
## Background: Memory Blocks
Sleep-time agents specialize in generating *learned context*. Given some original context (e.g. the conversation history, a set of files) the sleep-time agent will reflect on the original context to iteratively derive a learned context. The learned context will reflect the most important pieces of information or insights from the original context.
In Letta, the learned context is saved in a memory block. A memory block represents a labeled section of the context window with an associated character limit. Memory blocks can be shared between multiple agents. A sleep-time agent will write the learned context to a memory block, which can also be shared with other agents that could benefit from those learnings.
Memory blocks can be access directly through the API to be updated, retrieved, or deleted.
<CodeGroup>
```typescript TypeScript
// get a block by label
const block = await client.agents.blocks.retrieve(agentId, "persona");
// get a block by ID
const block = await client.blocks.retrieve(blockId);
```
```python title="python"
# get a block by label
block = client.agents.blocks.retrieve(agent_id=agent_id, block_label="persona")
# get a block by ID
block = client.blocks.retrieve(block_id=block_id)
```
</CodeGroup>
When sleep-time is enabled for an agent, a sleep-time agent is created to manage the memory blocks of the primary agent. The sleep-time agent runs in the background and can modify the memory blocks asynchronously. The sleep-time agent generates learned context from the conversation history to update the memory blocks of the primary agent.
## Sleep-time agent for conversation
<img className="light" src="/images/sleeptime_chat.png" />
<img className="dark" src="/images/sleeptime_chat_dark.png" />
When sleep-time is enabled, a primary agent and a sleep-time agent are created as part of a multi-agent group under the hood. The sleep-time agent is responsible for generating learned context from the conversation history to update the memory blocks of the primary agent. The group ensures that for every `N` steps taken by the primary agent, the sleep-time agent is invoked with data containing new messages in the primary agent's message history.
<img src="/images/sleeptime_chat_only.gif" />
### Configuring the frequency of sleep-time updates
The sleep-time agent will be triggered every N-steps (default `5`) to update the memory blocks of the primary agent. You can configure the frequency of updates by setting the `sleeptime_agent_frequency` parameter when creating the agent.
<CodeGroup>
```typescript TypeScript maxLines=50
import { LettaClient, SleeptimeManagerUpdate } from '@letta-ai/letta-client'
const client = new LettaClient({ token: "LETTA_API_KEY" });
// create a sleep-time-enabled agent
const agent = await client.agents.create({
memoryBlocks: [
{ value: "", label: "human" },
{ value: "You are a helpful assistant.", label: "persona" }
],
model: "anthropic/claude-3-7-sonnet-20250219",
embedding: "openai/text-embedding-3-small",
enableSleeptime: true
});
console.log(`Created agent id ${agent.id}`);
// get the multi-agent group
const groupId = agent.multiAgentGroup.id;
const currentFrequency = agent.multiAgentGroup.sleeptimeAgentFrequency;
console.log(`Group id: ${groupId}, frequency: ${currentFrequency}`);
// update the frequency to every 2 steps
const group = await client.groups.modify(groupId, {
managerConfig: {
sleeptimeAgentFrequency: 2
} as SleeptimeManagerUpdate
});
```
```python title="python" maxLines=50
from letta_client import Letta
from letta_client.types import SleeptimeManagerUpdate
client = Letta(token="LETTA_API_KEY")
# create a sleep-time-enabled agent
agent = client.agents.create(
memory_blocks=[
{"value": "", "label": "human"},
{"value": "You are a helpful assistant.", "label": "persona"},
],
model="anthropic/claude-3-7-sonnet-20250219",
embedding="openai/text-embedding-3-small",
enable_sleeptime=True,
)
print(f"Created agent id {agent.id}")
# get the multi-agent group
group_id = agent.multi_agent_group.id
current_frequence = agent.multi_agent_group.sleeptime_agent_frequency
print(f"Group id: {group_id}, frequency: {current_frequence}")
# update the frequency to every 2 steps
group = client.groups.modify(
group_id=group_id,
manager_config=SleeptimeManagerUpdate(
sleeptime_agent_frequency=2
),
)
```
</CodeGroup>
We recommend keeping the frequency relatively high (e.g. 5 or 10) as triggering the sleep-time agent too often can be expensive (due to high token usage) and has diminishing returns.

View File

@@ -1,91 +0,0 @@
---
title: Creating Tool Rules
slug: guides/agents/tool-rules
---
Tool rules allows developer to define constrains on their tools, such as requiring that a tool terminate agent execution or be followed by another tool.
<Frame>
```mermaid
flowchart LR
subgraph init["InitToolRule"]
direction LR
start((Start)) --> init_tool["must_run_first"]
init_tool --> other1["...other tools..."]
end
subgraph terminal["TerminalToolRule"]
direction LR
other2["...other tools..."] --> term_tool["terminal_tool"] --> stop1((Stop))
end
subgraph sequence["ChildToolRule (children)"]
direction LR
parent_tool["parent_tool"] --> child1["child_tool_1"]
parent_tool --> child2["child_tool_2"]
parent_tool --> child3["child_tool_3"]
end
classDef stop fill:#ffcdd2,stroke:#333
classDef start fill:#c8e6c9,stroke:#333
class stop1 stop
class start start
```
</Frame>
Letta currently supports the following tool rules (with more being added):
* `TerminalToolRule(tool_name=...)`
* If the tool is called, the agent ends execution
* `InitToolRule(tool_name=...)`
* The tool must be called first when an agent is run
* `ChildToolRule(tool_name=..., children=[...])`
* If the tool is called, it must be followed by one of the tools specified in `children`
* `ParentToolRule(tool_name=..., children=[...])`
* The tool must be called before the tools specified in `children` can be called
* `ConditionalToolRule(tool_name=..., child_output_mapping={...})`
* If the tool is called, it must be followed by one of the tools specified in `children` based off the tool's output
* `ContinueToolRule(tool_name=...)`
* If the tool is called, the agent must continue execution
* `MaxCountPerStepToolRule(tool_name=..., max_count_limit=...)`
* The tool cannot be called more than `max_count_limit` times in a single step
## Default tool rules
Depending on your agent configuration, there may be default tool rules applied to improve performance.
## Tool rule examples
For example, you can ensure that the agent will stop execution after the `roll_d20` tool is called by specifying tool rules in the agent creation:
<CodeGroup>
```typescript TypeScript {6-11}
// create a new agent
const agentState = await client.createAgent({
// create the agent with an additional tool
tools: [tool.name],
// add tool rules that terminate execution after specific tools
toolRules: [
// exit after roll_d20 is called
{toolName: tool.name, type: "exit_loop"},
],
});
console.log(`Created agent with name ${agentState.name} with tools ${agentState.tools}`);
```
```python Python {6-11}
# create a new agent
agent_state = client.create_agent(
# create the agent with an additional tool
tools=[tool.name],
# add tool rules that terminate execution after specific tools
tool_rules=[
# exit after roll_d20 is called
TerminalToolRule(tool_name=tool.name, type="exit_loop"),
],
)
print(f"Created agent with name {agent_state.name} with tools {agent_state.tools}")
```
</CodeGroup>
You can see a full working example of tool rules [here](https://github.com/letta-ai/letta/blob/0.5.2/examples/tool_rule_usage.py).

View File

@@ -1,342 +0,0 @@
---
title: Web Search
subtitle: Search the internet in real-time with AI-powered search
slug: guides/agents/web-search
---
The `web_search` tool enables Letta agents to search the internet for current information, research, and general knowledge using [Exa](https://exa.ai)'s AI-powered search engine.
<Info>
On [Letta Cloud](/guides/cloud/overview), this tool works out of the box. For self-hosted deployments, you'll need to [configure an Exa API key](#self-hosted-setup).
</Info>
## Quick Start
### Adding Web Search to an Agent
<CodeGroup>
```python Python
from letta import Letta
client = Letta(token="LETTA_API_KEY")
agent = client.agents.create(
model="openai/gpt-4o",
embedding="openai/text-embedding-3-small",
tools=["web_search"],
memory_blocks=[
{
"label": "persona",
"value": "I'm a research assistant who uses web search to find current information and cite sources."
}
]
)
```
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
const client = new LettaClient({ token: "LETTA_API_KEY" });
const agent = await client.agents.create({
model: "openai/gpt-4o",
embedding: "openai/text-embedding-3-small",
tools: ["web_search"],
memoryBlocks: [
{
label: "persona",
value: "I'm a research assistant who uses web search to find current information and cite sources."
}
]
});
```
</CodeGroup>
### Usage Example
```python
response = client.agents.messages.create(
agent_id=agent.id,
messages=[
{
"role": "user",
"content": "What are the latest developments in agent-based AI systems?"
}
]
)
```
Your agent can now choose to use `web_search` when it needs current information.
## Self-Hosted Setup
For self-hosted Letta servers, you'll need an Exa API key.
### Get an API Key
1. Sign up at [dashboard.exa.ai](https://dashboard.exa.ai/)
2. Copy your API key
3. See [Exa pricing](https://docs.exa.ai) for rate limits and costs
### Configuration Options
<CodeGroup>
```bash Docker
docker run \
-v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \
-p 8283:8283 \
-e OPENAI_API_KEY="your_openai_key" \
-e EXA_API_KEY="your_exa_api_key" \
letta/letta:latest
```
```yaml Docker Compose
version: '3.8'
services:
letta:
image: letta/letta:latest
ports:
- "8283:8283"
environment:
- OPENAI_API_KEY=your_openai_key
- EXA_API_KEY=your_exa_api_key
volumes:
- ~/.letta/.persist/pgdata:/var/lib/postgresql/data
```
```bash Python Server
export EXA_API_KEY="your_exa_api_key"
letta server
```
```python Per-Agent Configuration
agent = client.agents.create(
model="openai/gpt-4o",
embedding="openai/text-embedding-3-small",
tools=["web_search"],
tool_env_vars={
"EXA_API_KEY": "your_exa_api_key"
}
)
```
</CodeGroup>
## Tool Parameters
The `web_search` tool supports advanced filtering and search customization:
| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `query` | `str` | Required | The search query to find relevant web content |
| `num_results` | `int` | 10 | Number of results to return (1-100) |
| `category` | `str` | None | Focus search on specific content types (see below) |
| `include_text` | `bool` | False | Whether to retrieve full page content (usually overflows context) |
| `include_domains` | `List[str]` | None | List of domains to include in search results |
| `exclude_domains` | `List[str]` | None | List of domains to exclude from search results |
| `start_published_date` | `str` | None | Only return content published after this date (ISO format) |
| `end_published_date` | `str` | None | Only return content published before this date (ISO format) |
| `user_location` | `str` | None | Two-letter country code for localized results (e.g., "US") |
### Available Categories
Use the `category` parameter to focus your search on specific content types:
| Category | Best For | Example Query |
|----------|----------|---------------|
| `company` | Corporate information, company websites | "Tesla energy storage solutions" |
| `research paper` | Academic papers, arXiv, research publications | "transformer architecture improvements 2025" |
| `news` | News articles, current events | "latest AI policy developments" |
| `pdf` | PDF documents, reports, whitepapers | "climate change impact assessment" |
| `github` | GitHub repositories, open source projects | "python async web scraping libraries" |
| `tweet` | Twitter/X posts, social media discussions | "reactions to new GPT release" |
| `personal site` | Blogs, personal websites, portfolios | "machine learning tutorial blogs" |
| `linkedin profile` | LinkedIn profiles, professional bios | "AI research engineers at Google" |
| `financial report` | Earnings reports, financial statements | "Apple Q4 2024 earnings" |
### Return Format
The tool returns a JSON-encoded string containing:
```json
{
"query": "search query",
"results": [
{
"title": "Page title",
"url": "https://example.com",
"published_date": "2025-01-15",
"author": "Author name",
"highlights": ["Key excerpt 1", "Key excerpt 2"],
"summary": "AI-generated summary of the content",
"text": "Full page content (only if include_text=true)"
}
]
}
```
## Best Practices
### 1. Guide When to Search
Provide clear instructions to your agent about when web search is appropriate:
```python
memory_blocks=[
{
"label": "persona",
"value": "I'm a helpful assistant. I use web_search for current events, recent news, and topics requiring up-to-date information. I cite my sources."
}
]
```
### 2. Combine with Archival Memory
Use web search for external/current information, and archival memory for your organization's internal data:
```python
# Create agent with both web_search and archival memory tools
agent = client.agents.create(
model="openai/gpt-4o",
embedding="openai/text-embedding-3-small",
tools=["web_search", "archival_memory_search", "archival_memory_insert"],
memory_blocks=[
{
"label": "persona",
"value": "I use web_search for current events and external research. I use archival_memory_search for company-specific information and internal documents."
}
]
)
```
See the [Archival Memory documentation](/guides/agents/archival-memory-overview) for more information.
### 3. Craft Effective Search Queries
Exa uses neural search that understands semantic meaning. Your agent will generally form good queries naturally, but you can improve results by guiding it to:
- **Be descriptive and specific**: "Latest research on RLHF techniques for language models" is better than "RLHF research"
- **Focus on topics, not keywords**: "How companies are deploying AI agents in customer service" works better than "AI agents customer service deployment"
- **Use natural language**: The search engine understands conversational queries like "What are the environmental impacts of Bitcoin mining?"
- **Specify time ranges when relevant**: Guide your agent to use date filters for time-sensitive queries
Example instruction in memory:
```python
memory_blocks=[
{
"label": "search_strategy",
"value": "When searching, I craft clear, descriptive queries that focus on topics rather than keywords. I use the category and date filters when appropriate to narrow results."
}
]
```
### 4. Manage Context Window
By default, `include_text` is `False` to avoid context overflow. The tool returns highlights and AI-generated summaries instead, which are more concise:
```python
memory_blocks=[
{
"label": "search_guidelines",
"value": "I avoid setting include_text=true unless specifically needed, as full text usually overflows the context window. Highlights and summaries are usually sufficient."
}
]
```
## Common Patterns
### Research Assistant
```python
agent = client.agents.create(
model="openai/gpt-4o",
tools=["web_search"],
memory_blocks=[
{
"label": "persona",
"value": "I'm a research assistant. I search for relevant information, synthesize findings from multiple sources, and provide citations."
}
]
)
```
### News Monitor
```python
agent = client.agents.create(
model="openai/gpt-4o-mini",
tools=["web_search"],
memory_blocks=[
{
"label": "persona",
"value": "I monitor news and provide briefings on AI industry developments."
},
{
"label": "topics",
"value": "Focus: AI/ML, agent systems, LLM advancements"
}
]
)
```
### Customer Support
```python
agent = client.agents.create(
model="openai/gpt-4o",
tools=["web_search"],
memory_blocks=[
{
"label": "persona",
"value": "I help customers by checking documentation, service status pages, and community discussions for solutions."
}
]
)
```
## Troubleshooting
### Agent Not Using Web Search
Check:
1. Tool is attached: `"web_search"` in agent's tools list
2. Instructions are clear about when to search
3. Model has good tool-calling capabilities (GPT-4, Claude 3+)
```python
# Verify tools
agent = client.agents.retrieve(agent_id=agent.id)
print([tool.name for tool in agent.tools])
```
### Missing EXA_API_KEY
If you see errors about missing API keys on self-hosted deployments:
```bash
# Check if set
echo $EXA_API_KEY
# Set for session
export EXA_API_KEY="your_exa_api_key"
# Docker example
docker run -e EXA_API_KEY="your_exa_api_key" letta/letta:latest
```
## When to Use Web Search
| Use Case | Tool | Why |
|----------|------|-----|
| Current events, news | `web_search` | Real-time information |
| External research | `web_search` | Broad internet access |
| Internal documents | Archival memory | Fast, static data |
| User preferences | Memory blocks | In-context, instant |
| General knowledge | Pre-trained model | No search needed |
## Related Documentation
- [Utilities Overview](/guides/agents/prebuilt-tools)
- [Custom Tools](/guides/agents/custom-tools)
- [Tool Variables](/guides/agents/tool-variables)
- [Archival Memory](/guides/agents/archival-memory-overview)

View File

@@ -1,274 +0,0 @@
---
title: Examples & Tutorials
slug: cookbooks
---
Build powerful AI agents with persistent memory. Explore tutorials, ready-to-use templates, and community projects to get started.
<Info>
**New to Letta?**
- Start with our [Quickstart Guide](/quickstart)
- Take the free [DeepLearning.AI Course](https://www.deeplearning.ai/short-courses/llms-as-operating-systems-agent-memory/)
- Explore [Awesome Letta](https://github.com/letta-ai/awesome-letta) for more resources
</Info>
## Getting Started Tutorials
Step-by-step guides to learn Letta fundamentals.
<CardGroup cols={2}>
<Card
title="Your First Agent"
icon="fa-sharp fa-light fa-rocket"
href="/examples/hello-world"
iconPosition="left"
>
Build your first Letta agent in minutes
</Card>
<Card
title="Talk to Your PDF"
icon="fa-sharp fa-light fa-file-pdf"
href="/examples/pdf-chat"
iconPosition="left"
>
Create an agent that can answer questions about PDF documents
</Card>
<Card
title="Attaching & Detaching Memory Blocks"
icon="fa-sharp fa-light fa-memory"
href="/examples/attaching-detaching-blocks"
iconPosition="left"
>
Learn how to dynamically manage agent memory
</Card>
<Card
title="Shared Memory Blocks"
icon="fa-sharp fa-light fa-share-nodes"
href="/examples/shared-memory-blocks"
iconPosition="left"
>
Share memory between multiple agents for coordination
</Card>
</CardGroup>
## Ready-to-Deploy Applications
Production-ready templates you can clone and customize.
<CardGroup cols={2}>
<Card
title="Next.js Chatbot"
icon="fa-sharp fa-light fa-messages"
href="https://github.com/letta-ai/letta-chatbot-example"
iconPosition="left"
>
Full-stack chatbot with per-user agent memory (Next.js + TypeScript)
</Card>
<Card
title="Discord Bot"
icon="fa-brands fa-discord"
href="https://github.com/letta-ai/letta-discord-bot-example"
iconPosition="left"
>
Discord bot with persistent memory for each server and user
</Card>
<Card
title="Character.AI Clone"
icon="fa-sharp fa-light fa-user-robot"
href="https://github.com/letta-ai/characterai-memory"
iconPosition="left"
>
Create AI characters with memory that persists across conversations
</Card>
<Card
title="Deep Research Agent"
icon="fa-sharp fa-light fa-magnifying-glass"
href="https://github.com/letta-ai/deep-research"
iconPosition="left"
>
Research agent that gathers and synthesizes information over time
</Card>
</CardGroup>
## Multi-Agent Systems
Build coordinated teams of specialized agents.
<CardGroup cols={2}>
<Card
title="Async Multi-Agent"
icon="fa-sharp fa-light fa-user-group"
href="/examples/multi-agent-async"
iconPosition="left"
>
Connect agents to chat with each other and users simultaneously
</Card>
<Card
title="Customer-Specific Agents"
icon="fa-sharp fa-light fa-users"
href="/cookbooks/customer-specific-agents"
iconPosition="left"
>
Template for building relationship-aware agents for each customer
</Card>
</CardGroup>
## Tools & Integrations
Connect Letta to your favorite platforms and tools.
<CardGroup cols={3}>
<Card
title="Vercel AI SDK"
icon="fa-sharp fa-light fa-triangle"
href="https://github.com/letta-ai/vercel-ai-sdk-provider"
iconPosition="left"
>
Use Letta with Vercel AI SDK v5
</Card>
<Card
title="Zapier"
icon="fa-sharp fa-light fa-bolt"
href="https://zapier.com/apps/letta/integrations"
iconPosition="left"
>
Connect agents to 7,000+ apps
</Card>
<Card
title="n8n Workflows"
icon="fa-sharp fa-light fa-diagram-project"
href="https://github.com/letta-ai/n8n-nodes-letta"
iconPosition="left"
>
Integrate with n8n automation workflows
</Card>
<Card
title="Telegram Bot"
icon="fa-brands fa-telegram"
href="https://github.com/letta-ai/letta-telegram"
iconPosition="left"
>
Deploy agents on Telegram
</Card>
<Card
title="Obsidian Plugin"
icon="fa-sharp fa-light fa-note-sticky"
href="https://github.com/letta-ai/letta-obsidian"
iconPosition="left"
>
Add Letta agents to your knowledge base
</Card>
<Card
title="DuckDB Agent"
icon="fa-sharp fa-light fa-database"
href="https://github.com/letta-ai/letta-duckdb-agent"
iconPosition="left"
>
SQL-powered data analysis agent
</Card>
</CardGroup>
## SDK Examples
Learn the basics with minimal code examples.
<CardGroup cols={2}>
<Card
title="TypeScript SDK"
icon="fa-brands fa-js"
href="https://github.com/letta-ai/letta/tree/main/examples/docs/node/example.ts"
iconPosition="left"
>
Basic TypeScript/Node.js SDK example
</Card>
<Card
title="Python SDK"
icon="fa-brands fa-python"
href="https://github.com/letta-ai/letta/tree/main/examples/docs/example.py"
iconPosition="left"
>
Basic Python SDK example
</Card>
</CardGroup>
## Community Projects
Amazing projects built by the Letta community.
<CardGroup cols={2}>
<Card
title="Thought Stream"
icon="fa-sharp fa-light fa-comments"
href="https://tangled.sh/@cameron.pfiffer.org/thought-stream"
iconPosition="left"
>
Deploy Letta agents to an ATProto-powered multi-agent chatroom
</Card>
<Card
title="Thought Stream CLI"
icon="fa-sharp fa-light fa-terminal"
href="https://tangled.org/@cameron.pfiffer.org/thought-stream-cli"
iconPosition="left"
>
IRC-style CLI for the Thought Stream
</Card>
</CardGroup>
## Learning Resources
<CardGroup cols={2}>
<Card
title="DeepLearning.AI Course"
icon="fa-sharp fa-light fa-graduation-cap"
href="https://www.deeplearning.ai/short-courses/llms-as-operating-systems-agent-memory/"
iconPosition="left"
>
Free course: LLMs as Operating Systems - Building Agents with Memory
</Card>
<Card
title="Core Concepts"
icon="fa-sharp fa-light fa-book"
href="/overview"
iconPosition="left"
>
Understand how Letta agents work
</Card>
<Card
title="API Reference"
icon="fa-sharp fa-light fa-code"
href="/api-reference/overview"
iconPosition="left"
>
Complete API documentation
</Card>
<Card
title="Research Papers"
icon="fa-sharp fa-light fa-flask"
href="https://www.letta.com/blog"
iconPosition="left"
>
Read about the research behind Letta
</Card>
</CardGroup>
## More Resources
<CardGroup cols={2}>
<Card
title="Awesome Letta"
icon="fa-sharp fa-light fa-star"
href="https://github.com/letta-ai/awesome-letta"
iconPosition="left"
>
Comprehensive curated list of Letta resources, tools, and community projects
</Card>
<Card
title="Join Discord"
icon="fa-brands fa-discord"
href="https://discord.gg/letta"
iconPosition="left"
>
Get help and share your projects with the community
</Card>
</CardGroup>

View File

@@ -1,85 +0,0 @@
---
title: Deploy Letta Server on Railway
slug: guides/server/railway
---
<Tip>[Railway](https://railway.app) is a service that allows you to easily deploy services (such as Docker containers) to the cloud. The following example uses Railway, but the same general principles around deploying the Letta Docker image on a cloud service and connecting it to the ADE) are generally applicable to other cloud services beyond Railway.</Tip>
## Deploying the Letta Railway template
We've prepared a Letta Railway template that has the necessary environment variables set and mounts a persistent volume for database storage.
You can access the template by clicking the "Deploy on Railway" button below:
[![Deploy on Railway](https://railway.com/button.svg)](https://railway.app/template/jgUR1t?referralCode=kdR8zc)
<Frame caption="The deployment screen will give you the opportunity to specify some basic environment variables such as your OpenAI API key. You can also specify these after deployment in the variables section in the Railway viewer.">
<img src="../../images/railway_template_deploy.png" />
</Frame>
<Frame caption="If the deployment is successful, it will be shown as 'Active', and you can click 'View logs'.">
<img src="../../images/railway_template_deployed.png" />
</Frame>
<Frame caption="Clicking 'View logs' will reveal the static IP address of the deployment (ending in 'railway.app').">
<img src="../../images/railway_template_deployed_logs.png" />
</Frame>
## Accessing the deployment via the ADE
Now that the Railway deployment is active, all we need to do to access it via the ADE is add it to as a new remote Letta server.
The default password set in the template is `password`, which can be changed at the deployment stage or afterwards in the 'variables' page on the Railway deployment.
Click "Add remote server", then enter the details from Railway (use the static IP address shown in the logs, and use the password set via the environment variables):
<img className="block w-300 dark:hidden" src="../../images/railway_ade_example_light.png" />
<img className="hidden w-300 dark:block" src="../../images/railway_ade_example.png" />
## Accessing the deployment via the Letta API
Accessing the deployment via the [Letta API](https://docs.letta.com/api-reference) is simple, we just need to swap the base URL of the endpoint with the IP address from the Railway deployment.
For example if the Railway IP address is `https://MYSERVER.up.railway.app` and the password is `banana`, to create an agent on the deployment, we can use the following shell command:
```sh
curl --request POST \
--url https://MYSERVER.up.railway.app/v1/agents/ \
--header 'X-BARE-PASSWORD: password banana' \
--header 'Content-Type: application/json' \
--data '{
"memory_blocks": [
{
"label": "human",
"value": "The human'\''s name is Bob the Builder"
},
{
"label": "persona",
"value": "My name is Sam, the all-knowing sentient AI."
}
],
"llm_config": {
"model": "gpt-4o-mini",
"model_endpoint_type": "openai",
"model_endpoint": "https://api.openai.com/v1",
"context_window": 16000
},
"embedding_config": {
"embedding_endpoint_type": "openai",
"embedding_endpoint": "https://api.openai.com/v1",
"embedding_model": "text-embedding-3-small",
"embedding_dim": 8191
}
}'
```
This will create an agent with two memory blocks, configured to use `gpt-4o-mini` as the LLM model, and `text-embedding-3-small` as the embedding model.
If the Letta server is not password protected, we can omit the `X-BARE-PASSWORD` header.
<Check>That's it! Now you should be able to create and interact with agents on your remote Letta server (deployed on Railway) via the Letta ADE and API. 👾 ☄️</Check>
### Adding additional environment variables
To help you get started, when you deploy the template you have the option to fill in the example environment variables `OPENAI_API_KEY` (to connect your Letta agents to GPT models) and `ANTHROPIC_API_KEY` (to connect your Letta agents to Claude models).
There are many more providers you can enable on the Letta server via additional environment variables (for example vLLM, Ollama, etc). For more information on available providers, see [our documentation](/guides/server/docker).
To connect Letta to an additional API provider, you can go to your Railway deployment (after you've deployed the template), click `Variables` to see the current environment variables, then click `+ New Variable` to add a new variable. Once you've saved a new variable, you will need to restart the server for the changes to take effect.

View File

@@ -1,54 +0,0 @@
---
title: Collecting Traces & Telemetry
slug: guides/server/otel
---
<Note>
**ClickHouse is optional** and only required for telemetry/observability features. Letta works perfectly fine without it using just PostgreSQL. You only need ClickHouse if you want to collect traces, view LLM provider requests, or analyze system performance metrics.
</Note>
Letta uses [ClickHouse](https://clickhouse.com/) to store telemetry. ClickHouse is a database optimized for storing logs and traces. Traces can be used to view raw requests to LLM providers and also understand your agent's system performance metrics.
## Configuring ClickHouse
You will need to have a ClickHouse DB (either running locally or with [ClickHouse Cloud](https://console.clickhouse.cloud/)) to connect to Letta.
You can configure ClickHouse by passing the required enviornment variables:
```sh
docker run \
-v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \
-p 8283:8283 \
...
-e CLICKHOUSE_ENDPOINT=${CLICKHOUSE_ENDPOINT} \
-e CLICKHOUSE_DATABASE=${CLICKHOUSE_DATABASE} \
-e CLICKHOUSE_USERNAME=${CLICKHOUSE_USERNAME} \
-e CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD} \
-e LETTA_OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 \
letta/letta:latest
```
### Finding your credentials in ClickHouse Cloud
You can find these variable inside of ClickHouse Cloud by selecting the "Connection" button in the dashboard.
<img src="/images/clickhouse_config.png" />
## Connecting to Grafana
We recommend connecting ClickHouse to Grafana to query and view traces. Grafana can be run [locally](https://grafana.com/oss/grafana/), or via [Grafana Cloud](https://grafana.com/grafana/).
# Other Integrations
Letta also supports other exporters when running in a containerized environment. To request support for another exporter, please open an issue on [GitHub](https://github.com/letta-ai/letta/issues/new/choose).
## Configuring Signoz
You can configure Signoz by passing the required enviornment variables:
```sh
docker run \
-v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \
-p 8283:8283 \
...
-e SIGNOZ_ENDPOINT=${SIGNOZ_ENDPOINT} \
-e SIGNOZ_INGESTION_KEY=${SIGNOZ_INGESTION_KEY} \
-e LETTA_OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 \
letta/letta:latest
```

View File

@@ -1,66 +0,0 @@
# Custom Graders
Write your own grading functions to implement custom evaluation logic.
<Note>
Custom graders let you implement domain-specific evaluation, parse complex formats, and apply custom scoring algorithms.
</Note>
## Basic Structure
```python
from letta_evals.decorators import grader
from letta_evals.models import GradeResult, Sample
@grader
def my_custom_grader(sample: Sample, submission: str) -> GradeResult:
"""Custom grading logic."""
# Your evaluation logic
score = calculate_score(submission, sample.ground_truth)
# Ensure score is between 0.0 and 1.0
score = max(0.0, min(1.0, score))
return GradeResult(
score=score,
rationale=f"Score based on custom logic: {score}"
)
```
## Example: JSON Validation
```python
import json
from letta_evals.decorators import grader
from letta_evals.models import GradeResult, Sample
@grader
def valid_json(sample: Sample, submission: str) -> GradeResult:
"""Check if submission is valid JSON."""
try:
json.loads(submission)
return GradeResult(score=1.0, rationale="Valid JSON")
except json.JSONDecodeError as e:
return GradeResult(score=0.0, rationale=f"Invalid JSON: {e}")
```
## Registration
Custom graders are automatically registered when you import them in your suite's setup script or custom evaluators file.
## Configuration
```yaml
graders:
my_metric:
kind: tool
function: my_custom_grader # Your function name
extractor: last_assistant
```
## Next Steps
- [Tool Graders](/evals/graders/tool-graders) - Built-in grading functions
- [Graders Concept](/evals/core-concepts/graders) - Understanding graders
- [Example Custom Graders](https://github.com/letta-ai/letta-evals/tree/main/examples) - See examples in the letta-evals repo

View File

@@ -1,94 +0,0 @@
# Multi-Turn Conversations
Multi-turn conversations allow you to test how agents handle context across multiple exchanges.
<Note>
This is essential for stateful agents where behavior depends on conversation history.
</Note>
## Why Use Multi-Turn?
Multi-turn conversations enable testing that single-turn prompts cannot:
- **Memory storage**: Verify agents persist information to memory blocks
- **Tool call sequences**: Test multi-step workflows
- **Context retention**: Ensure agents remember details from earlier
- **State evolution**: Track how agent state changes across interactions
- **Conversational coherence**: Test if agents maintain context appropriately
## Format
### Single-Turn (Default)
```jsonl
{"input": "What is the capital of France?", "ground_truth": "Paris"}
```
### Multi-Turn
```jsonl
{"input": ["My name is Alice", "What's my name?"], "ground_truth": "Alice"}
```
The agent processes each input in sequence, with state carrying over between turns.
## Example 1: Memory Recall Testing
Test if the agent remembers information across turns:
```jsonl
{"input": ["Remember that my favorite color is blue", "What's my favorite color?"], "ground_truth": "blue"}
```
Suite configuration:
```yaml
graders:
response_check:
kind: tool
function: contains
extractor: last_assistant # Check the agent's response
```
## Example 2: Memory Correction Testing
Test if the agent correctly updates memory when users correct themselves:
```jsonl
{"input": ["Please remember that I like bananas.", "Actually, sorry, I meant I like apples."], "ground_truth": "apples"}
```
Suite configuration:
```yaml
graders:
memory_check:
kind: tool
function: contains
extractor: memory_block
extractor_config:
block_label: human # Check the actual memory block, not just the response
```
<Note>
**Key difference:** The `memory_block` extractor verifies the agent actually stored the corrected information in memory, not just that it responded correctly. This tests real memory persistence.
</Note>
## When to Test Memory Blocks vs. Responses
**Use `last_assistant` or `all_assistant` extractors when:**
- Testing what the agent says in conversation
- Verifying response content and phrasing
- Checking conversational coherence
**Use `memory_block` extractor when:**
- Verifying information was actually stored in memory
- Testing memory updates and corrections
- Validating persistent state changes
- Ensuring the agent's internal state is correct
See the [multiturn-memory-block-extractor example](https://github.com/letta-ai/letta-evals/tree/main/examples/multiturn-memory-block-extractor) for a complete working implementation.
## Next Steps
- [Datasets](/evals/core-concepts/datasets) - Creating test datasets
- [Extractors](/evals/core-concepts/extractors) - Extracting from trajectories
- [Targets](/evals/core-concepts/targets) - Agent lifecycle and testing behavior

View File

@@ -1,342 +0,0 @@
# CLI Commands
The **letta-evals** command-line interface lets you run evaluations, validate configurations, and inspect available components.
<Note>
**Quick overview:**
- **`run`** - Execute an evaluation suite (most common)
- **`validate`** - Check suite configuration without running
- **`list-extractors`** - Show available extractors
- **`list-graders`** - Show available grader functions
- **Exit codes** - 0 for pass, 1 for fail (perfect for CI/CD)
</Note>
**Typical workflow:**
1. Validate your suite: `letta-evals validate suite.yaml`
2. Run evaluation: `letta-evals run suite.yaml --output results/`
3. Check exit code: `echo $?` (0 = passed, 1 = failed)
## run
Run an evaluation suite.
```bash
letta-evals run <suite.yaml> [options]
```
### Arguments
- `suite.yaml`: Path to the suite configuration file (required)
### Options
#### --output, -o
Save results to a directory.
```bash
letta-evals run suite.yaml --output results/
```
Creates:
- `results/header.json`: Evaluation metadata
- `results/summary.json`: Aggregate metrics and configuration
- `results/results.jsonl`: Per-sample results (one JSON per line)
#### --quiet, -q
Quiet mode - only show pass/fail result.
```bash
letta-evals run suite.yaml --quiet
```
Output:
```
✓ PASSED
```
#### --max-concurrent
Maximum concurrent sample evaluations. **Default**: 15
```bash
letta-evals run suite.yaml --max-concurrent 10
```
Higher values = faster evaluation but more resource usage.
#### --api-key
Letta API key (overrides LETTA_API_KEY environment variable).
```bash
letta-evals run suite.yaml --api-key your-key
```
#### --base-url
Letta server base URL (overrides suite config and environment variable).
```bash
letta-evals run suite.yaml --base-url http://localhost:8283
```
#### --project-id
Letta project ID for cloud deployments.
```bash
letta-evals run suite.yaml --project-id proj_abc123
```
#### --cached, -c
Path to cached results (JSONL) for re-grading trajectories without re-running the agent.
```bash
letta-evals run suite.yaml --cached previous_results.jsonl
```
Use this to test different graders on the same agent trajectories.
#### --num-runs
Run the evaluation multiple times to measure consistency. **Default**: 1
```bash
letta-evals run suite.yaml --num-runs 10
```
**Output with multiple runs:**
- Each run creates a separate `run_N/` directory with individual results
- An `aggregate_stats.json` file contains statistics across all runs (mean, standard deviation, pass rate)
### Examples
Basic run:
```bash
letta-evals run suite.yaml # Run evaluation, show results in terminal
```
Save results:
```bash
letta-evals run suite.yaml --output evaluation-results/ # Save to directory
```
Letta Cloud:
```bash
letta-evals run suite.yaml \
--base-url https://api.letta.com \
--api-key $LETTA_API_KEY \
--project-id proj_abc123
```
Quiet CI mode:
```bash
letta-evals run suite.yaml --quiet
if [ $? -eq 0 ]; then
echo "Evaluation passed"
else
echo "Evaluation failed"
exit 1
fi
```
### Exit Codes
- `0`: Evaluation passed (gate criteria met)
- `1`: Evaluation failed (gate criteria not met or error)
## validate
Validate a suite configuration without running it.
```bash
letta-evals validate <suite.yaml>
```
Checks:
- YAML syntax is valid
- Required fields are present
- Paths exist
- Configuration is consistent
- Grader/extractor combinations are valid
Output on success:
```
✓ Suite configuration is valid
```
Output on error:
```
✗ Validation failed:
- Agent file not found: agent.af
- Grader 'my_metric' references unknown function
```
## list-extractors
List all available extractors.
```bash
letta-evals list-extractors
```
Output:
```
Available extractors:
last_assistant - Extract the last assistant message
first_assistant - Extract the first assistant message
all_assistant - Concatenate all assistant messages
pattern - Extract content matching regex
tool_arguments - Extract tool call arguments
tool_output - Extract tool return value
after_marker - Extract content after a marker
memory_block - Extract from memory block (requires agent_state)
```
## list-graders
List all available grader functions.
```bash
letta-evals list-graders
```
Output:
```
Available graders:
exact_match - Exact string match with ground_truth
contains - Check if contains ground_truth
regex_match - Match regex pattern
ascii_printable_only - Validate ASCII-only content
```
## help
Show help information.
```bash
letta-evals --help
```
Show help for a specific command:
```bash
letta-evals run --help
letta-evals validate --help
```
## Environment Variables
### LETTA_API_KEY
API key for Letta authentication.
```bash
export LETTA_API_KEY=your-key-here
```
### LETTA_BASE_URL
Letta server base URL.
```bash
export LETTA_BASE_URL=http://localhost:8283
```
### LETTA_PROJECT_ID
Letta project ID (for cloud).
```bash
export LETTA_PROJECT_ID=proj_abc123
```
### OPENAI_API_KEY
OpenAI API key (for rubric graders).
```bash
export OPENAI_API_KEY=your-openai-key
```
## Configuration Priority
Configuration values are resolved in this order (highest to lowest priority):
1. CLI arguments (`--api-key`, `--base-url`, `--project-id`)
2. Suite YAML configuration
3. Environment variables
## Using in CI/CD
### GitHub Actions
```yaml
name: Run Evals
on: [push]
jobs:
evaluate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Install dependencies
run: pip install letta-evals
- name: Run evaluation
env:
LETTA_API_KEY: ${{ secrets.LETTA_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
letta-evals run suite.yaml --quiet --output results/
- name: Upload results
uses: actions/upload-artifact@v2
with:
name: eval-results
path: results/
```
### GitLab CI
```yaml
evaluate:
script:
- pip install letta-evals
- letta-evals run suite.yaml --quiet --output results/
artifacts:
paths:
- results/
variables:
LETTA_API_KEY: $LETTA_API_KEY
OPENAI_API_KEY: $OPENAI_API_KEY
```
## Debugging
### Common Issues
<Warning>
**"Agent file not found"**
```bash
# Check file exists relative to suite YAML location
ls -la path/to/agent.af
```
</Warning>
<Warning>
**"Connection refused"**
```bash
# Verify Letta server is running
curl http://localhost:8283/v1/health
```
</Warning>
<Warning>
**"Invalid API key"**
```bash
# Check environment variable is set
echo $LETTA_API_KEY
```
</Warning>
## Next Steps
- [Understanding Results](/evals/results-metrics/understanding-results) - Interpreting evaluation output
- [Suite YAML Reference](/evals/configuration/suite-yaml-reference) - Complete configuration options
- [Getting Started](/evals/get-started/getting-started) - Complete tutorial with examples

View File

@@ -1,425 +0,0 @@
# Datasets
**Datasets** are the test cases that define what your agent will be evaluated on. Each sample in your dataset represents one evaluation scenario.
<Note>
**Quick overview:**
- **Two formats**: JSONL (flexible, powerful) or CSV (simple, spreadsheet-friendly)
- **Required field**: `input` - the prompt(s) to send to the agent
- **Common fields**: `ground_truth` (expected answer), `tags` (for filtering), `metadata` (extra info)
- **Advanced fields**: `agent_args` (customize agent per sample), `rubric_vars` (per-sample rubric context)
- **Multi-turn support**: Send multiple messages in sequence using arrays
</Note>
**Typical workflow:**
1. Create a JSONL or CSV file with test cases
2. Reference it in your suite YAML: `dataset: test_cases.jsonl`
3. Run evaluation - each sample is tested independently
4. Results show per-sample and aggregate scores
Datasets can be created in two formats: **JSONL** or **CSV**. Choose based on your team's workflow and complexity needs.
## Dataset Formats
### JSONL Format
Each line is a JSON object representing one test case:
```jsonl
{"input": "What's the capital of France?", "ground_truth": "Paris"}
{"input": "Calculate 2+2", "ground_truth": "4"}
{"input": "What color is the sky?", "ground_truth": "blue"}
```
**Best for:**
- Complex data structures (nested objects, arrays)
- Multi-turn conversations
- Advanced features (agent_args, rubric_vars)
- Teams comfortable with JSON/code
- Version control (clean line-by-line diffs)
### CSV Format
Standard CSV with headers:
```csv
input,ground_truth
"What's the capital of France?","Paris"
"Calculate 2+2","4"
"What color is the sky?","blue"
```
**Best for:**
- Simple question-answer pairs
- Teams that prefer spreadsheets (Excel, Google Sheets)
- Non-technical collaborators creating test cases
- Quick dataset creation and editing
- Easy sharing with non-developers
## Quick Reference
| Field | Required | Type | Purpose |
|-------|----------|------|---------|
| `input` | ✅ | string or array | Prompt(s) to send to agent |
| `ground_truth` | ❌ | string | Expected answer (for tool graders) |
| `tags` | ❌ | array of strings | For filtering samples |
| `agent_args` | ❌ | object | Per-sample agent customization |
| `rubric_vars` | ❌ | object | Per-sample rubric variables |
| `metadata` | ❌ | object | Arbitrary extra data |
| `id` | ❌ | integer | Sample ID (auto-assigned if omitted) |
## Field Reference
### Required Fields
#### input
The prompt(s) to send to the agent. Can be a string or array of strings:
Single message:
```json
{"input": "Hello, who are you?"}
```
Multi-turn conversation:
```json
{"input": ["Hello", "What's your name?", "Tell me about yourself"]}
```
### Optional Fields
#### ground_truth
The expected answer or content to check against. Required for most tool graders (exact_match, contains, etc.):
```json
{"input": "What is 2+2?", "ground_truth": "4"}
```
#### metadata
Arbitrary additional data about the sample:
```json
{
"input": "What is photosynthesis?",
"ground_truth": "process where plants convert light into energy",
"metadata": {
"category": "biology",
"difficulty": "medium"
}
}
```
#### tags
List of tags for filtering samples:
```json
{"input": "Solve x^2 = 16", "ground_truth": "4", "tags": ["math", "algebra"]}
```
Filter by tags in your suite:
```yaml
sample_tags: [math] # Only samples tagged "math" will be evaluated
```
#### agent_args
Custom arguments passed to programmatic agent creation when using `agent_script`. Allows per-sample agent customization.
JSONL:
```json
{
"input": "What items do we have?",
"agent_args": {
"item": {"sku": "SKU-123", "name": "Widget A", "price": 19.99}
}
}
```
CSV:
```csv
input,agent_args
"What items do we have?","{""item"": {""sku"": ""SKU-123"", ""name"": ""Widget A"", ""price"": 19.99}}"
```
Your agent factory function can access these values via `sample.agent_args` to customize agent configuration.
See [Targets - agent_script](/evals/core-concepts/targets#agent_script) for details on programmatic agent creation.
#### rubric_vars
Variables to inject into rubric templates when using rubric graders. This allows you to provide per-sample context or examples to the LLM judge.
**Example:** Evaluating code quality against a reference implementation.
JSONL:
```jsonl
{"input": "Write a function to calculate fibonacci numbers", "rubric_vars": {"reference_code": "def fib(n):\n if n <= 1: return n\n return fib(n-1) + fib(n-2)", "required_features": "recursion, base case"}}
```
CSV:
```csv
input,rubric_vars
"Write a function to calculate fibonacci numbers","{""reference_code"": ""def fib(n):\n if n <= 1: return n\n return fib(n-1) + fib(n-2)"", ""required_features"": ""recursion, base case""}"
```
In your rubric template file, reference variables with `{variable_name}`:
**rubric.txt:**
```
Evaluate the submitted code against this reference implementation:
{reference_code}
Required features: {required_features}
Score on correctness (0.6) and code quality (0.4).
```
When the rubric grader runs, variables are replaced with values from `rubric_vars`:
**Final formatted prompt sent to LLM:**
```
Evaluate the submitted code against this reference implementation:
def fib(n):
if n <= 1: return n
return fib(n-1) + fib(n-2)
Required features: recursion, base case
Score on correctness (0.6) and code quality (0.4).
```
This lets you customize evaluation criteria per sample using the same rubric template.
See [Rubric Graders](/evals/graders/rubric-graders) for details on rubric templates.
#### id
Sample ID is automatically assigned (0-based index) if not provided. You can override:
```json
{"id": 42, "input": "Test case 42"}
```
## Complete Example
```jsonl
{"id": 1, "input": "What is the capital of France?", "ground_truth": "Paris", "tags": ["geography", "easy"], "metadata": {"region": "Europe"}}
{"id": 2, "input": "Calculate the square root of 144", "ground_truth": "12", "tags": ["math", "medium"]}
{"id": 3, "input": ["Hello", "What can you help me with?"], "tags": ["conversation"]}
```
## Dataset Best Practices
### 1. Clear Ground Truth
Make ground truth specific enough to grade but flexible enough to match valid responses:
<Tip>
Good:
```json
{"input": "What's the largest planet?", "ground_truth": "Jupiter"}
```
</Tip>
<Warning>
Too strict (might miss valid answers):
```json
{"input": "What's the largest planet?", "ground_truth": "Jupiter is the largest planet in our solar system."}
```
</Warning>
### 2. Diverse Test Cases
Include edge cases and variations:
```jsonl
{"input": "What is 2+2?", "ground_truth": "4", "tags": ["math", "easy"]}
{"input": "What is 0.1 + 0.2?", "ground_truth": "0.3", "tags": ["math", "floating_point"]}
{"input": "What is 999999999 + 1?", "ground_truth": "1000000000", "tags": ["math", "large_numbers"]}
```
### 3. Use Tags for Organization
Organize samples by type, difficulty, or feature:
```json
{"tags": ["tool_usage", "search"]}
{"tags": ["memory", "recall"]}
{"tags": ["reasoning", "multi_step"]}
```
### 4. Multi-Turn Conversations
Test conversational context and memory updates:
```jsonl
{"input": ["My name is Alice", "What's my name?"], "ground_truth": "Alice", "tags": ["memory", "recall"]}
{"input": ["Please remember that I like bananas.", "Actually, sorry, I meant I like apples."], "ground_truth": "apples", "tags": ["memory", "correction"]}
{"input": ["I work at Google", "Update my workplace to Microsoft", "Where do I work?"], "ground_truth": "Microsoft", "tags": ["memory", "multi_step"]}
```
<Tip>
**Testing memory corrections:** Use multi-turn inputs to test if agents properly update memory when users correct themselves. Combine with the `memory_block` extractor to verify the final memory state, not just the response.
</Tip>
### 5. No Ground Truth for LLM Judges
If using rubric graders, ground truth is optional:
```jsonl
{"input": "Write a creative story about a robot", "tags": ["creative"]}
{"input": "Explain quantum computing simply", "tags": ["explanation"]}
```
The LLM judge evaluates based on the rubric, not ground truth.
## Loading Datasets
Datasets are automatically loaded by the runner:
```yaml
dataset: path/to/dataset.jsonl # Path to your test cases (JSONL or CSV)
```
Paths are relative to the suite YAML file location.
## Dataset Filtering
### Limit Sample Count
```yaml
max_samples: 10 # Only evaluate first 10 samples (useful for testing)
```
### Filter by Tags
```yaml
sample_tags: [math, medium] # Only samples with ALL these tags
```
## Creating Datasets Programmatically
You can generate datasets with Python:
```python
import json
samples = []
for i in range(100):
samples.append({
"input": f"What is {i} + {i}?",
"ground_truth": str(i + i),
"tags": ["math", "addition"]
})
with open("dataset.jsonl", "w") as f:
for sample in samples:
f.write(json.dumps(sample) + "\n")
```
## Dataset Format Validation
The runner validates:
- Each line is valid JSON
- Required fields are present
- Field types are correct
Validation errors will be reported with line numbers.
## Examples by Use Case
### Question Answering
JSONL:
```jsonl
{"input": "What is the capital of France?", "ground_truth": "Paris"}
{"input": "Who wrote Romeo and Juliet?", "ground_truth": "Shakespeare"}
```
CSV:
```csv
input,ground_truth
"What is the capital of France?","Paris"
"Who wrote Romeo and Juliet?","Shakespeare"
```
### Tool Usage Testing
JSONL:
```jsonl
{"input": "Search for information about pandas", "ground_truth": "search"}
{"input": "Calculate 15 * 23", "ground_truth": "calculator"}
```
CSV:
```csv
input,ground_truth
"Search for information about pandas","search"
"Calculate 15 * 23","calculator"
```
Ground truth = expected tool name.
### Memory Testing (Multi-turn)
JSONL:
```jsonl
{"input": ["Remember that my favorite color is blue", "What's my favorite color?"], "ground_truth": "blue"}
{"input": ["I live in Tokyo", "Where do I live?"], "ground_truth": "Tokyo"}
```
CSV (using JSON array strings):
```csv
input,ground_truth
"[""Remember that my favorite color is blue"", ""What's my favorite color?""]","blue"
"[""I live in Tokyo"", ""Where do I live?""]","Tokyo"
```
### Code Generation
JSONL:
```jsonl
{"input": "Write a function to reverse a string in Python"}
{"input": "Create a SQL query to find users older than 21"}
```
CSV:
```csv
input
"Write a function to reverse a string in Python"
"Create a SQL query to find users older than 21"
```
Use rubric graders to evaluate code quality.
## CSV Advanced Features
CSV supports all the same features as JSONL by encoding complex data as JSON strings in cells:
**Multi-turn conversations** (requires escaped JSON array string):
```csv
input,ground_truth
"[""Hello"", ""What's your name?""]","Alice"
```
**Agent arguments** (requires escaped JSON object string):
```csv
input,agent_args
"What items do we have?","{""initial_inventory"": [""apple"", ""banana""]}"
```
**Rubric variables** (requires escaped JSON object string):
```csv
input,rubric_vars
"Write a story","{""max_length"": 500, ""genre"": ""sci-fi""}"
```
<Note>
**Note:** Complex data structures require JSON encoding in CSV. If you're frequently using these advanced features, JSONL may be easier to read and maintain.
</Note>
## Next Steps
- [Suite YAML Reference](/evals/configuration/suite-yaml-reference) - Complete configuration options including filtering
- [Graders](/evals/core-concepts/graders) - How to evaluate agent responses
- [Multi-Turn Conversations](/evals/advanced/multi-turn-conversations) - Testing conversational flows

View File

@@ -1,374 +0,0 @@
# Extractors
**Extractors** select what content to evaluate from an agent's response. They navigate the conversation trajectory and extract the specific piece you want to grade.
<Note>
**Quick overview:**
- **Purpose**: Agent responses are complex (messages, tool calls, memory) - extractors isolate what to grade
- **Built-in options**: last_assistant, tool_arguments, memory_block, pattern, and more
- **Flexible**: Different graders can use different extractors in the same suite
- **Automatic**: No setup needed - just specify in your grader config
</Note>
**Common patterns:**
- `last_assistant` - Most common, gets the agent's final message (90% of use cases)
- `tool_arguments` - Verify agent called the right tool with correct args
- `memory_block` - Check if agent updated memory correctly
- `pattern` - Extract structured data with regex
Extractors determine what part of the agent's response gets graded. They pull out specific content from the conversation trajectory.
## Why Extractors?
An agent's response is complex - it includes assistant messages, tool calls, tool returns, memory updates, etc. Extractors let you focus on exactly what you want to evaluate.
**The evaluation flow:**
```
Agent Response → Extractor → Submission Text → Grader → Score
```
For example:
```
Full trajectory:
UserMessage: "What's the capital of France?"
ToolCallMessage: search(query="capital of france")
ToolReturnMessage: "Paris is the capital..."
AssistantMessage: "The capital of France is Paris."
↓ extractor: last_assistant ↓
Extracted: "The capital of France is Paris."
↓ grader: contains (ground_truth="Paris") ↓
Score: 1.0
```
## Trajectory Structure
A trajectory is a list of turns, where each turn is a list of Letta messages:
```python
[
[UserMessage(...), AssistantMessage(...), ToolCallMessage(...), ToolReturnMessage(...)], # Turn 1
[AssistantMessage(...)] # Turn 2
]
```
Extractors navigate this structure to pull out the submission text.
## Built-in Extractors
### last_assistant
Extracts the last assistant message content.
```yaml
graders:
quality:
kind: tool
function: contains
extractor: last_assistant # Extract final agent message
```
Most common extractor - gets the agent's final response.
### first_assistant
Extracts the first assistant message content.
```yaml
graders:
initial_response:
kind: tool
function: contains
extractor: first_assistant # Extract first agent message
```
Useful for testing immediate responses before tool usage.
### all_assistant
Concatenates all assistant messages with a separator.
```yaml
graders:
complete_response:
kind: rubric
prompt_path: rubric.txt
extractor: all_assistant # Concatenate all agent messages
extractor_config:
separator: "\n\n" # Join messages with double newline
```
Use when you need the full conversation context.
### last_turn
Extracts all assistant messages from the last turn only.
```yaml
graders:
final_turn:
kind: tool
function: contains
extractor: last_turn # Messages from final turn only
extractor_config:
separator: " " # Join with spaces
```
Useful when the agent makes multiple statements in the final turn.
### pattern
Extracts content matching a regex pattern from assistant messages.
```yaml
graders:
extract_number:
kind: tool
function: exact_match
extractor: pattern # Extract using regex
extractor_config:
pattern: 'Result: (\d+)' # Regex pattern to match
group: 1 # Extract capture group 1
search_all: false # Only find first match
```
Example: Extract "42" from "The answer is Result: 42"
### tool_arguments
Extracts arguments from a specific tool call.
```yaml
graders:
search_query:
kind: tool
function: contains
extractor: tool_arguments # Extract tool call arguments
extractor_config:
tool_name: search # Which tool to extract from
```
Returns the JSON arguments as a string.
Example: If agent calls `search(query="pandas", limit=10)`, extracts:
```json
{"query": "pandas", "limit": 10}
```
### tool_output
Extracts the return value from a specific tool call.
```yaml
graders:
search_results:
kind: tool
function: contains
extractor: tool_output # Extract tool return value
extractor_config:
tool_name: search # Which tool's output to extract
```
Finds the tool call and its corresponding return message.
### after_marker
Extracts content after a specific marker string.
```yaml
graders:
answer_section:
kind: tool
function: contains
extractor: after_marker # Extract content after marker
extractor_config:
marker: "ANSWER:" # Marker string to find
include_marker: false # Don't include "ANSWER:" in output
```
Example: From "Here's my analysis... ANSWER: Paris", extracts "Paris"
### memory_block
Extracts content from a specific memory block (requires agent_state).
```yaml
graders:
human_memory:
kind: tool
function: exact_match
extractor: memory_block # Extract from agent memory
extractor_config:
block_label: human # Which memory block to extract
```
<Warning>
**Important**: This extractor requires the agent's final state, which adds overhead. The runner automatically fetches agent_state when this extractor is used.
</Warning>
Example use case: Verify the agent correctly updated its memory about the user.
## Extractor Configuration
Some extractors accept additional configuration via `extractor_config`:
```yaml
graders:
my_metric:
kind: tool
function: contains
extractor: pattern # Use pattern extractor
extractor_config: # Configuration for this extractor
pattern: 'Answer: (.*)' # Regex pattern
group: 1 # Extract capture group 1
```
## Choosing an Extractor
| Use Case | Recommended Extractor |
|----------|---------------------|
| Final agent response | `last_assistant` |
| First response before tools | `first_assistant` |
| Complete conversation | `all_assistant` |
| Specific format extraction | `pattern` |
| Tool usage validation | `tool_arguments` |
| Tool result checking | `tool_output` |
| Memory validation | `memory_block` |
| Structured output | `after_marker` |
## Content Flattening
Assistant messages can contain multiple content parts. Extractors automatically flatten complex content to plain text.
## Empty Extraction
If an extractor finds no matching content, it returns an empty string `""`. This typically results in a score of 0.0 from the grader.
## Custom Extractors
You can write custom extractors. See [Custom Extractors](/evals/extractors/custom-extractors) for details.
Example:
```python
from letta_evals.decorators import extractor
from letta_client import LettaMessageUnion
@extractor
def my_extractor(trajectory: List[List[LettaMessageUnion]], config: dict) -> str:
# Custom extraction logic
return extracted_text
```
Register by importing in your suite's setup script or custom evaluators file.
## Multi-Metric Extraction
Different graders can use different extractors:
```yaml
graders:
response_quality: # Evaluate final message quality
kind: rubric
prompt_path: quality.txt
extractor: last_assistant # Extract final response
tool_usage: # Check tool was called correctly
kind: tool
function: exact_match
extractor: tool_arguments # Extract tool args
extractor_config:
tool_name: search # From search tool
memory_update: # Verify memory updated
kind: tool
function: contains
extractor: memory_block # Extract from memory
extractor_config:
block_label: human # Human memory block
```
Each grader independently extracts and evaluates different aspects.
## Listing Extractors
See all available extractors:
```bash
letta-evals list-extractors
```
## Examples
### Extract Final Answer
```yaml
extractor: last_assistant # Get final agent message
```
Agent: "Let me search... *uses tool* ... The answer is Paris."
Extracted: "The answer is Paris."
### Extract Tool Arguments
```yaml
extractor: tool_arguments # Get tool call args
extractor_config:
tool_name: search # From search tool
```
Agent calls: `search(query="pandas", limit=5)`
Extracted: `{"query": "pandas", "limit": 5}`
### Extract Pattern
```yaml
extractor: pattern # Extract with regex
extractor_config:
pattern: 'RESULT: (\w+)' # Match pattern
group: 1 # Extract capture group 1
```
Agent: "After calculation... RESULT: SUCCESS"
Extracted: "SUCCESS"
### Extract Memory
```yaml
extractor: memory_block # Extract from agent memory
extractor_config:
block_label: human # Human memory block
```
Agent updates memory block "human" to: "User's name is Alice"
Extracted: "User's name is Alice"
## Troubleshooting
<Warning>
**Extractor returns empty string**
**Problem**: Grader always gives score 0.0 because extractor finds nothing.
**Common causes**:
- **Wrong extractor**: Using `first_assistant` but agent doesn't respond until after tool use → use `last_assistant`
- **Wrong tool name**: `tool_arguments` with `tool_name: "search"` but agent calls `"web_search"` → check actual tool name
- **Wrong memory block**: `memory_block` with `block_label: "user"` but block is actually labeled `"human"` → check block labels
- **Pattern doesn't match**: `pattern: "Answer: (.*)"` but agent says "The answer is..." → adjust regex
</Warning>
<Tip>
**Debug tips**:
1. Check the trajectory in results JSON to see actual agent output
2. Use `last_assistant` first to see what's there
3. Verify tool names with `letta-evals list-extractors`
</Tip>
## Next Steps
- [Built-in Extractors Reference](/evals/extractors/built-in-extractors) - Complete extractor documentation
- [Custom Extractors Guide](/evals/extractors/custom-extractors) - Write your own extractors
- [Graders](/evals/core-concepts/graders) - How to use extractors with graders

View File

@@ -1,384 +0,0 @@
# Gates
**Gates** are the pass/fail criteria for your evaluation. They determine whether your agent meets the required performance threshold by checking aggregate metrics.
<Note>
**Quick overview:**
- **Single decision**: One gate per suite determines pass/fail
- **Two metrics**: `avg_score` (average of all scores) or `accuracy` (percentage passing threshold)
- **Flexible operators**: `>=`, `>`, `<=`, `<`, `==` for threshold comparison
- **Customizable pass criteria**: Define what counts as "passing" for accuracy calculations
- **Exit codes**: Suite exits 0 for pass, 1 for fail
</Note>
**Common patterns:**
- Average score must be 80%+: `avg_score >= 0.8`
- 90%+ of samples must pass: `accuracy >= 0.9`
- Custom threshold: Define per-sample pass criteria with `pass_value`
Gates define the pass/fail criteria for your evaluation. They check if aggregate metrics meet a threshold.
## Basic Structure
```yaml
gate:
metric_key: accuracy # Which grader to evaluate
metric: avg_score # Use average score (default)
op: gte # Greater than or equal
value: 0.8 # 80% threshold
```
## Why Use Gates?
Gates provide **automated pass/fail decisions** for your evaluations, which is essential for:
**CI/CD Integration**: Gates let you block deployments if agent performance drops:
```bash
letta-evals run suite.yaml
# Exit code 0 = pass (continue deployment)
# Exit code 1 = fail (block deployment)
```
**Regression Testing**: Set a baseline threshold and ensure new changes don't degrade performance:
```yaml
gate:
metric: avg_score
op: gte
value: 0.85 # Must maintain 85%+ to pass
```
**Quality Enforcement**: Require agents meet minimum standards before production:
```yaml
gate:
metric: accuracy
op: gte
value: 0.95 # 95% of test cases must pass
```
### What Happens When Gates Fail?
When a gate condition is not met:
1. **Console output** shows failure message:
```text
✗ FAILED (0.72/1.00 avg, 72.0% pass rate)
Gate check failed: avg_score (0.72) not >= 0.80
```
2. **Exit code** is 1 (non-zero indicates failure):
```bash
letta-evals run suite.yaml
echo $? # Prints 1 if gate failed
```
3. **Results JSON** includes `gate_passed: false`:
```json
{
"gate_passed": false,
"gate_check": {
"metric": "avg_score",
"value": 0.72,
"threshold": 0.80,
"operator": "gte",
"passed": false
},
"metrics": { ... }
}
```
4. **All other data is preserved** - you still get full results, scores, and trajectories even when gating fails
<Tip>
**Common use case in CI**:
```bash
#!/bin/bash
letta-evals run suite.yaml --output results.json
if [ $? -ne 0 ]; then
echo "❌ Agent evaluation failed - blocking merge"
exit 1
else
echo "✅ Agent evaluation passed - safe to merge"
fi
```
</Tip>
## Required Fields
### metric_key
Which grader to evaluate. Must match a key in your `graders` section:
```yaml
graders:
accuracy: # Grader name
kind: tool
function: exact_match
extractor: last_assistant
gate:
metric_key: accuracy # Must match grader name above
op: gte # >=
value: 0.8 # 80% threshold
```
If you only have one grader, `metric_key` can be omitted - it will default to your single grader.
### metric
Which aggregate statistic to compare. Two options:
#### avg_score
Average score across all samples (0.0 to 1.0):
```yaml
gate:
metric_key: quality # Check quality grader
metric: avg_score # Use average of all scores
op: gte # >=
value: 0.7 # Must average 70%+
```
Example: If scores are [0.8, 0.9, 0.6], avg_score = 0.77
#### accuracy
Pass rate as a percentage (0.0 to 1.0):
```yaml
gate:
metric_key: accuracy # Check accuracy grader
metric: accuracy # Use pass rate, not average
op: gte # >=
value: 0.8 # 80% of samples must pass
```
By default, samples with score `>= 1.0` are considered "passing".
You can customize the per-sample threshold with `pass_op` and `pass_value` (see below).
<Note>
**Note**: The default `metric` is `avg_score`, so you can omit it if that's what you want:
```yaml
gate:
metric_key: quality # Check quality grader
op: gte # >=
value: 0.7 # 70% threshold (defaults to avg_score)
```
</Note>
### op
Comparison operator:
- `gte`: Greater than or equal (`>=`)
- `gt`: Greater than (`>`)
- `lte`: Less than or equal (`<=`)
- `lt`: Less than (`<`)
- `eq`: Equal (`==`)
Most common: `gte` (at least X)
### value
Threshold value for comparison:
- For `avg_score`: 0.0 to 1.0
- For `accuracy`: 0.0 to 1.0 (representing percentage)
```yaml
gate:
metric: avg_score # Average score
op: gte # >=
value: 0.75 # 75% threshold
```
```yaml
gate:
metric: accuracy # Pass rate
op: gte # >=
value: 0.9 # 90% must pass
```
## Optional Fields
### pass_op and pass_value
Customize when individual samples are considered "passing" (used for accuracy calculation):
```yaml
gate:
metric_key: quality # Check quality grader
metric: accuracy # Use pass rate
op: gte # >=
value: 0.8 # 80% must pass
pass_op: gte # Sample passes if >=
pass_value: 0.7 # This threshold (70%)
```
Default behavior:
- If `metric` is `avg_score`: samples pass if score `>=` the gate value
- If `metric` is `accuracy`: samples pass if score `>= 1.0` (perfect)
## Examples
### Require 80% Average Score
```yaml
gate:
metric_key: quality # Check quality grader
metric: avg_score # Use average
op: gte # >=
value: 0.8 # 80% average
```
Passes if the average score across all samples is `>= 0.8`
### Require 90% Pass Rate (Perfect Scores)
```yaml
gate:
metric_key: accuracy # Check accuracy grader
metric: accuracy # Use pass rate
op: gte # >=
value: 0.9 # 90% must pass (default: score >= 1.0 to pass)
```
Passes if 90% of samples have score = 1.0
### Require 75% Pass Rate (Score `>= 0.7`)
```yaml
gate:
metric_key: quality # Check quality grader
metric: accuracy # Use pass rate
op: gte # >=
value: 0.75 # 75% must pass
pass_op: gte # Sample passes if >=
pass_value: 0.7 # 70% threshold per sample
```
Passes if 75% of samples have score `>= 0.7`
### Maximum Error Rate
```yaml
gate:
metric_key: quality # Check quality grader
metric: accuracy # Use pass rate
op: gte # >=
value: 0.95 # 95% must pass (allows 5% failures)
pass_op: gt # Sample passes if >
pass_value: 0.0 # 0.0 (any non-zero score)
```
Allows up to 5% failures.
### Exact Pass Rate
```yaml
gate:
metric_key: quality # Check quality grader
metric: accuracy # Use pass rate
op: eq # Exactly equal
value: 1.0 # 100% (all samples must pass)
```
All samples must pass.
## Multi-Metric Gating
When you have multiple graders, you can only gate on one metric:
```yaml
graders:
accuracy: # First metric
kind: tool
function: exact_match
extractor: last_assistant
completeness: # Second metric
kind: rubric
prompt_path: completeness.txt
model: gpt-4o-mini
extractor: last_assistant
gate:
metric_key: accuracy # Only gate on accuracy (completeness still computed)
metric: avg_score # Use average
op: gte # >=
value: 0.8 # 80% threshold
```
The evaluation passes/fails based on the gated metric, but results include scores for all metrics.
## Understanding avg_score vs accuracy
### avg_score
- Arithmetic mean of all scores
- Sensitive to partial credit
- Good for continuous evaluation
Example:
- Scores: [1.0, 0.8, 0.6]
- avg_score = (1.0 + 0.8 + 0.6) / 3 = 0.8
### accuracy
- Percentage of samples meeting a threshold
- Binary pass/fail per sample
- Good for strict requirements
Example:
- Scores: [1.0, 0.8, 0.6]
- pass_value: 0.7
- Passing: [1.0, 0.8] = 2 out of 3
- accuracy = 2/3 = 0.667 (66.7%)
## Errors and Attempted Samples
If a sample fails (error during evaluation), it:
- Gets a score of 0.0
- Counts toward `total` but not `total_attempted`
- Included in `avg_score_total` but not `avg_score_attempted`
You can gate on either:
- `avg_score_total`: Includes errors as 0.0
- `avg_score_attempted`: Excludes errors (only successfully attempted samples)
<Note>
**Note**: The `metric` field currently only supports `avg_score` and `accuracy`. By default, gates use `avg_score_attempted`.
</Note>
## Gate Results
After evaluation, you'll see:
```text
✓ PASSED (2.25/3.00 avg, 75.0% pass rate)
```
or
```text
✗ FAILED (1.80/3.00 avg, 60.0% pass rate)
```
The evaluation exit code reflects the gate result:
- 0: Passed
- 1: Failed
## Advanced Gating
For complex gating logic (e.g., "pass if accuracy `>= 80%` OR avg_score `>= 0.9`"), you'll need to:
1. Run evaluation with one gate
2. Examine the results JSON
3. Apply custom logic in a post-processing script
## Next Steps
- [Understanding Results](/evals/results-metrics/understanding-results) - Interpreting evaluation output
- [Multi-Metric Evaluation](/evals/graders/multi-metric-grading) - Using multiple graders
- [Suite YAML Reference](/evals/configuration/suite-yaml-reference) - Complete gate configuration

View File

@@ -1,330 +0,0 @@
# Graders
**Graders** are the scoring functions that evaluate agent responses. They take the extracted submission (from an extractor) and assign a score between 0.0 (complete failure) and 1.0 (perfect success).
<Note>
**Quick overview:**
- **Two types**: Tool graders (deterministic Python functions) and Rubric graders (LLM-as-judge)
- **Built-in functions**: exact_match, contains, regex_match, ascii_printable_only
- **Custom graders**: Write your own grading logic
- **Multi-metric**: Combine multiple graders in one suite
- **Flexible extraction**: Each grader can use a different extractor
</Note>
**When to use each:**
- **Tool graders**: Fast, deterministic, free - perfect for exact matching, patterns, tool validation
- **Rubric graders**: Flexible, subjective, costs API calls - ideal for quality, creativity, nuanced evaluation
Graders evaluate agent responses and assign scores between 0.0 (complete failure) and 1.0 (perfect success).
## Grader Types
There are two types of graders:
### Tool Graders
Python functions that programmatically compare the submission to ground truth or apply deterministic checks.
```yaml
graders:
accuracy:
kind: tool # Deterministic grading
function: exact_match # Built-in grading function
extractor: last_assistant # Use final agent response
```
Best for:
- Exact matching
- Pattern checking
- Tool call validation
- Deterministic criteria
### Rubric Graders
LLM-as-judge evaluation using custom prompts and criteria. Can use either direct LLM API calls or a Letta agent as the judge.
**Standard rubric grading (LLM API):**
```yaml
graders:
quality:
kind: rubric # LLM-as-judge
prompt_path: rubric.txt # Custom evaluation criteria
model: gpt-4o-mini # Judge model
extractor: last_assistant # What to evaluate
```
**Agent-as-judge (Letta agent):**
```yaml
graders:
agent_judge:
kind: rubric # Still "rubric" kind
agent_file: judge.af # Judge agent with submit_grade tool
prompt_path: rubric.txt # Evaluation criteria
extractor: last_assistant # What to evaluate
```
Best for:
- Subjective quality assessment
- Open-ended responses
- Nuanced evaluation
- Complex criteria
- Judges that need tools (when using agent-as-judge)
## Built-in Tool Graders
### exact_match
Checks if submission exactly matches ground truth (case-sensitive, whitespace-trimmed).
```yaml
graders:
accuracy:
kind: tool
function: exact_match # Case-sensitive, whitespace-trimmed
extractor: last_assistant # Extract final response
```
Requires: `ground_truth` in dataset
Score: 1.0 if exact match, 0.0 otherwise
### contains
Checks if submission contains ground truth (case-insensitive).
```yaml
graders:
contains_answer:
kind: tool
function: contains # Case-insensitive substring match
extractor: last_assistant # Search in final response
```
Requires: `ground_truth` in dataset
Score: 1.0 if found, 0.0 otherwise
### regex_match
Checks if submission matches a regex pattern in ground truth.
```yaml
graders:
pattern:
kind: tool
function: regex_match # Pattern matching
extractor: last_assistant # Check final response
```
Dataset sample:
```json
{"input": "Generate a UUID", "ground_truth": "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"}
```
Score: 1.0 if pattern matches, 0.0 otherwise
### ascii_printable_only
Validates that all characters are printable ASCII (useful for ASCII art, formatted output).
```yaml
graders:
ascii_check:
kind: tool
function: ascii_printable_only # Validate ASCII characters
extractor: last_assistant # Check final response
```
Does not require ground truth.
Score: 1.0 if all characters are printable ASCII, 0.0 if any non-printable characters found
## Rubric Graders
Rubric graders use an LLM to evaluate responses based on custom criteria.
### Basic Configuration
```yaml
graders:
quality:
kind: rubric # LLM-as-judge
prompt_path: quality_rubric.txt # Evaluation criteria
model: gpt-4o-mini # Judge model
temperature: 0.0 # Deterministic
extractor: last_assistant # What to evaluate
```
### Rubric Prompt Format
Your rubric file should describe the evaluation criteria. Use placeholders:
- `{input}`: The original input from the dataset
- `{submission}`: The extracted agent response
- `{ground_truth}`: Ground truth from dataset (if available)
Example `quality_rubric.txt`:
```
Evaluate the response for:
1. Accuracy: Does it correctly answer the question?
2. Completeness: Is the answer thorough?
3. Clarity: Is it well-explained?
Input: {input}
Expected: {ground_truth}
Response: {submission}
Score from 0.0 to 1.0 where:
- 1.0: Perfect response
- 0.75: Good with minor issues
- 0.5: Acceptable but incomplete
- 0.25: Poor quality
- 0.0: Completely wrong
```
### Inline Prompt
Instead of a file, you can include the prompt inline:
```yaml
graders:
quality:
kind: rubric # LLM-as-judge
prompt: | # Inline prompt instead of file
Evaluate the creativity and originality of the response.
Score 1.0 for highly creative, 0.0 for generic or unoriginal.
model: gpt-4o-mini # Judge model
extractor: last_assistant # What to evaluate
```
### Model Configuration
```yaml
graders:
quality:
kind: rubric
prompt_path: rubric.txt # Evaluation criteria
model: gpt-4o-mini # Judge model
temperature: 0.0 # Deterministic (0.0-2.0)
provider: openai # LLM provider (default: openai)
max_retries: 5 # API retry attempts
timeout: 120.0 # Request timeout in seconds
```
Supported providers:
- `openai` (default)
Models:
- Any OpenAI-compatible model
- Special handling for reasoning models (o1, o3) - temperature automatically adjusted to 1.0
### Structured Output
Rubric graders use JSON mode to get structured responses:
```json
{
"score": 0.85,
"rationale": "The response is accurate and complete but could be more concise."
}
```
The score is validated to be between 0.0 and 1.0.
## Multi-Metric Configuration
Evaluate multiple aspects in one suite:
```yaml
graders:
accuracy: # Tool grader for factual correctness
kind: tool
function: contains
extractor: last_assistant
completeness: # Rubric grader for thoroughness
kind: rubric
prompt_path: completeness_rubric.txt
model: gpt-4o-mini
extractor: last_assistant
tool_usage: # Tool grader for tool call validation
kind: tool
function: exact_match
extractor: tool_arguments # Extract tool call args
extractor_config:
tool_name: search # Which tool to check
```
Each grader can use a different extractor.
## Extractor Configuration
Every grader must specify an `extractor` to select what to grade:
```yaml
graders:
my_metric:
kind: tool
function: contains # Grading function
extractor: last_assistant # What to extract and grade
```
Some extractors need additional configuration:
```yaml
graders:
tool_check:
kind: tool
function: contains # Check if ground truth in tool args
extractor: tool_arguments # Extract tool call arguments
extractor_config: # Configuration for this extractor
tool_name: search # Which tool to extract from
```
See [Extractors](/evals/core-concepts/extractors) for all available extractors.
## Custom Graders
You can write custom grading functions. See [Custom Graders](/evals/advanced/custom-graders) for details.
## Grader Selection Guide
| Use Case | Recommended Grader |
|----------|-------------------|
| Exact answer matching | `exact_match` |
| Keyword checking | `contains` |
| Pattern validation | `regex_match` |
| Tool call validation | `exact_match` with `tool_arguments` extractor |
| Quality assessment | Rubric grader |
| Creativity evaluation | Rubric grader |
| Format checking | Custom tool grader |
| Multi-criteria evaluation | Multiple graders |
## Score Interpretation
All scores are between 0.0 and 1.0:
- **1.0**: Perfect - meets all criteria
- **0.75-0.99**: Good - minor issues
- **0.5-0.74**: Acceptable - notable gaps
- **0.25-0.49**: Poor - major problems
- **0.0-0.24**: Failed - did not meet criteria
Tool graders typically return binary scores (0.0 or 1.0), while rubric graders can return any value in the range.
## Error Handling
If grading fails (e.g., network error, invalid format):
- Score is set to 0.0
- Rationale includes error message
- Metadata includes error details
This ensures evaluations can continue even with individual failures.
## Next Steps
- [Tool Graders](/evals/graders/tool-graders) - Built-in and custom functions
- [Rubric Graders](/evals/graders/rubric-graders) - LLM-as-judge details
- [Multi-Metric Evaluation](/evals/graders/multi-metric-grading) - Using multiple graders
- [Extractors](/evals/core-concepts/extractors) - Selecting what to grade

View File

@@ -1,207 +0,0 @@
# Core Concepts
Understanding how Letta Evals works and what makes it different.
<Note>
**Just want to run an eval?** Skip to [Getting Started](/evals/get-started/getting-started) for a hands-on quickstart.
</Note>
## Built for Stateful Agents
Letta Evals is a testing framework specifically designed for agents that maintain state. Unlike traditional eval frameworks built for simple input-output models, Letta Evals understands that agents:
- Maintain memory across conversations
- Use tools and external functions
- Evolve their behavior based on interactions
- Have persistent context and state
This means you can test aspects of your agent that other frameworks can't: memory updates, multi-turn conversations, tool usage patterns, and state evolution over time.
## The Evaluation Flow
Every evaluation follows this flow:
**Dataset → Target (Agent) → Extractor → Grader → Gate → Result**
1. **Dataset**: Your test cases (questions, scenarios, expected outputs)
2. **Target**: The agent being evaluated
3. **Extractor**: Pulls out the relevant information from the agent's response
4. **Grader**: Scores the extracted information
5. **Gate**: Pass/fail criteria for the overall evaluation
6. **Result**: Metrics, scores, and detailed results
### What You Can Test
With Letta Evals, you can test aspects of agents that traditional frameworks can't:
- **Memory updates**: Did the agent correctly remember the user's name?
- **Multi-turn conversations**: Can the agent maintain context across multiple exchanges?
- **Tool usage**: Does the agent call the right tools with the right arguments?
- **State evolution**: How does the agent's internal state change over time?
<Note>
**Example: Testing Memory Updates**
```yaml
graders:
memory_check:
kind: tool # Deterministic grading
function: contains # Check if ground_truth in extracted content
extractor: memory_block # Extract from agent memory (not just response!)
extractor_config:
block_label: human # Which memory block to check
```
Dataset:
```jsonl
{"input": "Please remember that I like bananas.", "ground_truth": "bananas"}
```
This doesn't just check if the agent responded correctly - it verifies the agent actually stored "bananas" in its memory block. Traditional eval frameworks can't inspect agent state like this.
</Note>
## Why Evals Matter
AI agents are complex systems that can behave unpredictably. Without systematic evaluation, you can't:
- **Know if changes improve or break your agent** - Did that prompt tweak help or hurt?
- **Prevent regressions** - Catch when "fixes" break existing functionality
- **Compare approaches objectively** - Which model works better for your use case?
- **Build confidence before deployment** - Ensure quality before shipping to users
- **Track improvement over time** - Measure progress as you iterate
Manual testing doesn't scale. Evals let you test hundreds of scenarios in minutes.
## What Evals Are Useful For
### 1. Development & Iteration
- Test prompt changes instantly across your entire test suite
- Experiment with different models and compare results
- Validate that new features work as expected
### 2. Quality Assurance
- Prevent regressions when modifying agent behavior
- Ensure agents handle edge cases correctly
- Verify tool usage and memory updates
### 3. Model Selection
- Compare GPT-4 vs Claude vs other models on your specific use case
- Test different model configurations (temperature, system prompts, etc.)
- Find the right cost/performance tradeoff
### 4. Benchmarking
- Measure agent performance on standard tasks
- Track improvements over time
- Share reproducible results with your team
### 5. Production Readiness
- Validate agents meet quality thresholds before deployment
- Run continuous evaluation in CI/CD pipelines
- Monitor production agent quality
## How Letta Evals Works
Letta Evals is built around a few key concepts that work together to create a flexible evaluation framework.
## Key Components
### Suite
An **evaluation suite** is a complete test configuration defined in a YAML file. It ties together:
- Which dataset to use
- Which agent to test
- How to grade responses
- What criteria determine pass/fail
Think of a suite as a reusable test specification.
### Dataset
A **dataset** is a JSONL file where each line represents one test case. Each sample has:
- An input (what to ask the agent)
- Optional ground truth (the expected answer)
- Optional metadata (tags, custom fields)
### Target
The **target** is what you're evaluating. Currently, this is a Letta agent, specified by:
- An agent file (.af)
- An existing agent ID
- A Python script that creates agents programmatically
### Trajectory
A **trajectory** is the complete conversation history from one test case. It's a list of turns, where each turn contains a list of Letta messages (assistant messages, tool calls, tool returns, etc.).
### Extractor
An **extractor** determines what part of the trajectory to evaluate. For example:
- The last thing the agent said
- All tool calls made
- Content from agent memory
- Text matching a pattern
### Grader
A **grader** scores how well the agent performed. There are two types:
- **Tool graders**: Python functions that compare submission to ground truth
- **Rubric graders**: LLM judges that evaluate based on custom criteria
### Gate
A **gate** is the pass/fail threshold for your evaluation. It compares aggregate metrics (like average score or pass rate) against a target value.
## Multi-Metric Evaluation
You can define multiple graders in one suite to evaluate different aspects:
```yaml
graders:
accuracy: # Check if answer is correct
kind: tool
function: exact_match
extractor: last_assistant # Use final response
tool_usage: # Check if agent called the right tool
kind: tool
function: contains
extractor: tool_arguments # Extract tool call args
extractor_config:
tool_name: search # From search tool
```
The gate can check any of these metrics:
```yaml
gate:
metric_key: accuracy # Gate on accuracy (tool_usage still computed)
op: gte # >=
value: 0.8 # 80% threshold
```
## Score Normalization
All scores are normalized to the range [0.0, 1.0]:
- 0.0 = complete failure
- 1.0 = perfect success
- Values in between = partial credit
This allows different grader types to be compared and combined.
## Aggregate Metrics
Individual sample scores are aggregated in two ways:
1. **Average Score**: Mean of all scores (0.0 to 1.0)
2. **Accuracy/Pass Rate**: Percentage of samples passing a threshold
You can gate on either metric type.
## Next Steps
Dive deeper into each concept:
- [Suites](/evals/core-concepts/suites) - Suite configuration in detail
- [Datasets](/evals/core-concepts/datasets) - Creating effective test datasets
- [Targets](/evals/core-concepts/targets) - Agent configuration options
- [Graders](/evals/core-concepts/graders) - Understanding grader types
- [Extractors](/evals/core-concepts/extractors) - Extraction strategies
- [Gates](/evals/core-concepts/gates) - Setting pass/fail criteria

View File

@@ -1,275 +0,0 @@
# Suites
A **suite** is a YAML configuration file that defines a complete evaluation specification. It's the central piece that ties together your dataset, target agent, grading criteria, and pass/fail thresholds.
<Note>
**Quick overview:**
- **Single file defines everything**: Dataset, agent, graders, and success criteria all in one YAML
- **Reusable and shareable**: Version control your evaluation specs alongside your code
- **Multi-metric support**: Evaluate multiple aspects (accuracy, quality, tool usage) in one run
- **Multi-model testing**: Run the same suite across different LLM models
- **Flexible filtering**: Test subsets using tags or sample limits
</Note>
**Typical workflow:**
1. Create a suite YAML defining what and how to test
2. Run `letta-evals run suite.yaml`
3. Review results showing scores for each metric
4. Suite passes or fails based on gate criteria
An evaluation suite is a YAML configuration file that defines a complete test specification.
## Basic Structure
```yaml
name: my-evaluation # Suite identifier
description: Optional description of what this tests # Human-readable explanation
dataset: path/to/dataset.jsonl # Test cases
target: # What agent to evaluate
kind: agent
agent_file: agent.af # Agent to test
base_url: http://localhost:8283 # Letta server
graders: # How to evaluate responses
my_metric:
kind: tool # Deterministic grading
function: exact_match # Grading function
extractor: last_assistant # What to extract from agent response
gate: # Pass/fail criteria
metric_key: my_metric # Which metric to check
op: gte # Greater than or equal
value: 0.8 # 80% threshold
```
## Required Fields
### name
The name of your evaluation suite. Used in output and results.
```yaml
name: question-answering-eval
```
### dataset
Path to the JSONL or CSV dataset file. Can be relative (to the suite YAML) or absolute.
```yaml
dataset: ./datasets/qa.jsonl # Relative to suite YAML location
```
### target
Specifies what agent to evaluate. See [Targets](/evals/core-concepts/targets) for details.
### graders
One or more graders to evaluate agent performance. See [Graders](/evals/core-concepts/graders) for details.
### gate
Pass/fail criteria. See [Gates](/evals/core-concepts/gates) for details.
## Optional Fields
### description
A human-readable description of what this suite tests:
```yaml
description: Tests the agent's ability to answer factual questions accurately
```
### max_samples
Limit the number of samples to evaluate (useful for quick tests):
```yaml
max_samples: 10 # Only evaluate first 10 samples
```
### sample_tags
Filter samples by tags (only evaluate samples with these tags):
```yaml
sample_tags: [math, easy] # Only samples tagged with "math" AND "easy"
```
Dataset samples can include tags:
```jsonl
{"input": "What is 2+2?", "ground_truth": "4", "tags": ["math", "easy"]}
```
### num_runs
Number of times to run the entire evaluation suite (useful for testing non-deterministic behavior):
```yaml
num_runs: 5 # Run the evaluation 5 times
```
Default: 1
### setup_script
Path to a Python script with a setup function to run before evaluation:
```yaml
setup_script: setup.py:prepare_environment # script.py:function_name
```
The setup function should have this signature:
```python
def prepare_environment(suite: SuiteSpec) -> None:
# Setup code here
pass
```
## Path Resolution
Paths in the suite YAML are resolved relative to the YAML file location:
```
project/
├── suite.yaml
├── dataset.jsonl
└── agents/
└── my_agent.af
```
```yaml
# In suite.yaml
dataset: dataset.jsonl # Resolves to project/dataset.jsonl
target:
agent_file: agents/my_agent.af # Resolves to project/agents/my_agent.af
```
Absolute paths are used as-is.
## Multi-Grader Suites
You can evaluate multiple metrics in one suite:
```yaml
graders:
accuracy: # Check if answer is correct
kind: tool
function: exact_match
extractor: last_assistant
completeness: # LLM judges response quality
kind: rubric
prompt_path: rubrics/completeness.txt
model: gpt-4o-mini
extractor: last_assistant
tool_usage: # Verify correct tool was called
kind: tool
function: contains
extractor: tool_arguments # Extract tool call arguments
```
The gate can check any of these metrics:
```yaml
gate:
metric_key: accuracy # Gate on accuracy metric (others still computed)
op: gte # Greater than or equal
value: 0.9 # 90% threshold
```
Results will include scores for all graders, even if you only gate on one.
## Examples
### Simple Tool Grader Suite
```yaml
name: basic-qa # Suite name
dataset: questions.jsonl # Test questions
target:
kind: agent
agent_file: qa_agent.af # Pre-configured agent
base_url: http://localhost:8283 # Local server
graders:
accuracy: # Single metric
kind: tool # Deterministic grading
function: contains # Check if ground truth is in response
extractor: last_assistant # Use final agent message
gate:
metric_key: accuracy # Gate on this metric
op: gte # Must be >=
value: 0.75 # 75% to pass
```
### Rubric Grader Suite
```yaml
name: quality-eval # Quality evaluation
dataset: prompts.jsonl # Test prompts
target:
kind: agent
agent_id: existing-agent-123 # Use existing agent
base_url: https://api.letta.com # Letta Cloud
graders:
quality: # LLM-as-judge metric
kind: rubric # Subjective evaluation
prompt_path: quality_rubric.txt # Rubric template
model: gpt-4o-mini # Judge model
temperature: 0.0 # Deterministic
extractor: last_assistant # Evaluate final response
gate:
metric_key: quality # Gate on this metric
metric: avg_score # Use average score
op: gte # Must be >=
value: 0.7 # 70% to pass
```
### Multi-Model Suite
Test the same agent configuration across different models:
```yaml
name: model-comparison # Compare model performance
dataset: test.jsonl # Same test for all models
target:
kind: agent
agent_file: agent.af # Same agent configuration
base_url: http://localhost:8283 # Local server
model_configs: [gpt-4o-mini, claude-3-5-sonnet] # Test both models
graders:
accuracy: # Single metric for comparison
kind: tool
function: exact_match
extractor: last_assistant
gate:
metric_key: accuracy # Both models must pass this
op: gte # Must be >=
value: 0.8 # 80% threshold
```
Results will show per-model metrics.
## Validation
Validate your suite configuration before running:
```bash
letta-evals validate suite.yaml
```
This checks:
- Required fields are present
- Paths exist
- Configuration is valid
- Grader/extractor combinations are compatible
## Next Steps
- [Dataset Configuration](/evals/core-concepts/datasets)
- [Target Configuration](/evals/core-concepts/targets)
- [Grader Configuration](/evals/core-concepts/graders)
- [Gate Configuration](/evals/core-concepts/gates)

View File

@@ -1,329 +0,0 @@
# Targets
A **target** is the agent you're evaluating. In Letta Evals, the target configuration determines how agents are created, accessed, and tested.
<Note>
**Quick overview:**
- **Three ways to specify agents**: agent file (`.af`), existing agent ID, or programmatic creation script
- **Critical distinction**: `agent_file`/`agent_script` create fresh agents per sample (isolated tests), while `agent_id` uses one agent for all samples (stateful conversation)
- **Multi-model support**: Test the same agent configuration across different LLM models
- **Flexible connection**: Connect to local Letta servers or Letta Cloud
</Note>
**When to use each approach:**
- `agent_file` - Pre-configured agents saved as `.af` files (most common)
- `agent_id` - Testing existing agents or multi-turn conversations with state
- `agent_script` - Dynamic agent creation with per-sample customization
The target configuration specifies how to create or access the agent for evaluation.
## Target Configuration
All targets have a `kind` field (currently only `agent` is supported):
```yaml
target:
kind: agent # Currently only "agent" is supported
# ... agent-specific configuration
```
## Agent Sources
You must specify exactly ONE of these:
### agent_file
Path to a `.af` (Agent File) to upload:
```yaml
target:
kind: agent
agent_file: path/to/agent.af # Path to .af file
base_url: http://localhost:8283 # Letta server URL
```
The agent file will be uploaded to the Letta server and a new agent created for the evaluation.
### agent_id
ID of an existing agent on the server:
```yaml
target:
kind: agent
agent_id: agent-123-abc # ID of existing agent
base_url: http://localhost:8283 # Letta server URL
```
<Warning>
**Modifies agent in-place:** Using `agent_id` will modify your agent's state, memory, and message history during evaluation. The same agent instance is used for all samples, processing them sequentially. **Do not use production agents or agents you don't want to modify.** Use `agent_file` or `agent_script` for reproducible, isolated testing.
</Warning>
### agent_script
Path to a Python script with an agent factory function for programmatic agent creation:
```yaml
target:
kind: agent
agent_script: create_agent.py:create_inventory_agent # script.py:function_name
base_url: http://localhost:8283 # Letta server URL
```
Format: `path/to/script.py:function_name`
The function must be decorated with `@agent_factory` and have the signature `async (client: AsyncLetta, sample: Sample) -> str`:
```python
from letta_client import AsyncLetta, CreateBlock
from letta_evals.decorators import agent_factory
from letta_evals.models import Sample
@agent_factory
async def create_inventory_agent(client: AsyncLetta, sample: Sample) -> str:
"""Create and return agent ID for this sample."""
# Access custom arguments from the dataset
item = sample.agent_args.get("item", {})
# Create agent with sample-specific configuration
agent = await client.agents.create(
name="inventory-assistant",
memory_blocks=[
CreateBlock(
label="item_context",
value=f"Item: {item.get('name', 'Unknown')}"
)
],
agent_type="letta_v1_agent",
model="openai/gpt-4.1-mini",
embedding="openai/text-embedding-3-small",
)
return agent.id
```
**Key features:**
- Creates a fresh agent for each sample
- Can customize agents using `sample.agent_args` from the dataset
- Allows testing agent creation logic itself
- Useful when you don't have pre-saved agent files
**When to use:**
- Testing agent creation workflows
- Dynamic per-sample agent configuration
- Agents that need sample-specific memory or tools
- Programmatic agent testing
## Connection Configuration
### base_url
Letta server URL:
```yaml
target:
base_url: http://localhost:8283 # Local Letta server
# or
base_url: https://api.letta.com # Letta Cloud
```
Default: `http://localhost:8283`
### api_key
API key for authentication (required for Letta Cloud):
```yaml
target:
api_key: your-api-key-here # Required for Letta Cloud
```
Or set via environment variable:
```bash
export LETTA_API_KEY=your-api-key-here
```
### project_id
Letta project ID (for Letta Cloud):
```yaml
target:
project_id: proj_abc123 # Letta Cloud project
```
Or set via environment variable:
```bash
export LETTA_PROJECT_ID=proj_abc123
```
### timeout
Request timeout in seconds:
```yaml
target:
timeout: 300.0 # Request timeout (5 minutes)
```
Default: 300 seconds
## Multi-Model Evaluation
Test the same agent across different models:
### model_configs
List of model configuration names from JSON files:
```yaml
target:
kind: agent
agent_file: agent.af
model_configs: [gpt-4o-mini, claude-3-5-sonnet] # Test with both models
```
The evaluation will run once for each model config. Model configs are JSON files in `letta_evals/llm_model_configs/`.
### model_handles
List of model handles (cloud-compatible identifiers):
```yaml
target:
kind: agent
agent_file: agent.af
model_handles: ["openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet"] # Cloud model identifiers
```
Use this for Letta Cloud deployments.
<Warning>
**Note**: You cannot specify both `model_configs` and `model_handles`.
</Warning>
## Complete Examples
### Local Development
```yaml
target:
kind: agent
agent_file: ./agents/my_agent.af # Pre-configured agent
base_url: http://localhost:8283 # Local server
```
### Letta Cloud
```yaml
target:
kind: agent
agent_id: agent-cloud-123 # Existing cloud agent
base_url: https://api.letta.com # Letta Cloud
api_key: ${LETTA_API_KEY} # From environment variable
project_id: proj_abc # Your project ID
```
### Multi-Model Testing
```yaml
target:
kind: agent
agent_file: agent.af # Same agent configuration
base_url: http://localhost:8283 # Local server
model_configs: [gpt-4o-mini, gpt-4o, claude-3-5-sonnet] # Test 3 models
```
Results will include per-model metrics:
```
Model: gpt-4o-mini - Avg: 0.85, Pass: 85.0%
Model: gpt-4o - Avg: 0.92, Pass: 92.0%
Model: claude-3-5-sonnet - Avg: 0.88, Pass: 88.0%
```
### Programmatic Agent Creation
```yaml
target:
kind: agent
agent_script: setup.py:CustomAgentFactory # Programmatic creation
base_url: http://localhost:8283 # Local server
```
## Environment Variable Precedence
Configuration values are resolved in this order (highest priority first):
1. CLI arguments (`--api-key`, `--base-url`, `--project-id`)
2. Suite YAML configuration
3. Environment variables (`LETTA_API_KEY`, `LETTA_BASE_URL`, `LETTA_PROJECT_ID`)
## Agent Lifecycle and Testing Behavior
The way your agent is specified fundamentally changes how the evaluation runs:
### With agent_file or agent_script: Independent Testing
**Agent lifecycle:**
1. A fresh agent instance is created for each sample
2. Agent processes the sample input(s)
3. Agent remains on the server after the sample completes
**Testing behavior:** Each sample is an independent, isolated test. Agent state (memory, message history) does not carry over between samples. This enables parallel execution and ensures reproducible results.
**Use cases:**
- Testing how the agent responds to various independent inputs
- Ensuring consistent behavior across different scenarios
- Regression testing where each case should be isolated
- Evaluating agent responses without prior context
<Note>
**Example:** If you have 10 test cases, 10 separate agent instances will be created (one per test case), and they can run in parallel.
</Note>
### With agent_id: Sequential Script Testing
**Agent lifecycle:**
1. The same agent instance is used for all samples
2. Agent processes each sample in sequence
3. Agent state persists throughout the entire evaluation
**Testing behavior:** The dataset becomes a conversation script where each sample builds on previous ones. Agent memory and message history accumulate, and earlier interactions affect later responses. Samples must execute sequentially.
**Use cases:**
- Testing multi-turn conversations with context
- Evaluating how agent memory evolves over time
- Simulating a single user session with multiple interactions
- Testing scenarios where context should accumulate
<Note>
**Example:** If you have 10 test cases, they all run against the same agent instance in order, with state carrying over between each test.
</Note>
### Critical Differences
| Aspect | agent_file / agent_script | agent_id |
|--------|---------------------------|----------|
| **Agent instances** | New agent per sample | Same agent for all samples |
| **State isolation** | Fully isolated | State carries over |
| **Execution** | Can run in parallel | Must run sequentially |
| **Memory** | Fresh for each sample | Accumulates across samples |
| **Use case** | Independent test cases | Conversation scripts |
| **Reproducibility** | Highly reproducible | Depends on execution order |
<Tip>
**Best practice:** Use `agent_file` or `agent_script` for most evaluations to ensure reproducible, isolated tests. Use `agent_id` only when you specifically need to test how agent state evolves across multiple interactions.
</Tip>
## Validation
The runner validates:
- Exactly one of `agent_file`, `agent_id`, or `agent_script` is specified
- Agent files have `.af` extension
- Agent script paths are valid
## Next Steps
- [Suite YAML Reference](/evals/configuration/suite-yaml-reference) - Complete target configuration options
- [Datasets](/evals/core-concepts/datasets) - Using agent_args for sample-specific configuration
- [Getting Started](/evals/get-started/getting-started) - Complete tutorial with target examples

View File

@@ -1,427 +0,0 @@
# Suite YAML Reference
Complete reference for suite configuration files.
A **suite** is a YAML file that defines an evaluation: what agent to test, what dataset to use, how to grade responses, and what criteria determine pass/fail. This is your evaluation specification.
<Note>
**Quick overview:**
- **name**: Identifier for your evaluation
- **dataset**: JSONL file with test cases
- **target**: Which agent to evaluate (via file, ID, or script)
- **graders**: How to score responses (tool or rubric graders)
- **gate**: Pass/fail criteria
</Note>
See [Getting Started](/evals/get-started/getting-started) for a tutorial, or [Core Concepts](/evals/core-concepts/suites) for conceptual overview.
## File Structure
```yaml
name: string (required)
description: string (optional)
dataset: path (required)
max_samples: integer (optional)
sample_tags: array (optional)
num_runs: integer (optional)
setup_script: string (optional)
target: object (required)
kind: "agent"
base_url: string
api_key: string
timeout: float
project_id: string
agent_id: string (one of: agent_id, agent_file, agent_script)
agent_file: path
agent_script: string
model_configs: array
model_handles: array
graders: object (required)
<metric_key>: object
kind: "tool" | "rubric"
display_name: string
extractor: string
extractor_config: object
# Tool grader fields
function: string
# Rubric grader fields (LLM API)
prompt: string
prompt_path: path
model: string
temperature: float
provider: string
max_retries: integer
timeout: float
rubric_vars: array
# Rubric grader fields (agent-as-judge)
agent_file: path
judge_tool_name: string
gate: object (required)
metric_key: string
metric: "avg_score" | "accuracy"
op: "gte" | "gt" | "lte" | "lt" | "eq"
value: float
pass_op: "gte" | "gt" | "lte" | "lt" | "eq"
pass_value: float
```
## Top-Level Fields
### name (required)
Suite name, used in output and results.
**Type**: string
```yaml
name: question-answering-eval
```
### description (optional)
Human-readable description of what the suite tests.
**Type**: string
```yaml
description: Tests agent's ability to answer factual questions accurately
```
### dataset (required)
Path to JSONL dataset file. Relative paths are resolved from the suite YAML location.
**Type**: path (string)
```yaml
dataset: ./datasets/qa.jsonl
dataset: /absolute/path/to/dataset.jsonl
```
### max_samples (optional)
Limit the number of samples to evaluate. Useful for quick tests.
**Type**: integer | **Default**: All samples
```yaml
max_samples: 10 # Only evaluate first 10 samples
```
### sample_tags (optional)
Filter samples by tags. Only samples with ALL specified tags are evaluated.
**Type**: array of strings
```yaml
sample_tags: [math, easy] # Only samples tagged with both
```
### num_runs (optional)
Number of times to run the evaluation suite.
**Type**: integer | **Default**: 1
```yaml
num_runs: 5 # Run the evaluation 5 times
```
### setup_script (optional)
Path to Python script with setup function.
**Type**: string (format: `path/to/script.py:function_name`)
```yaml
setup_script: setup.py:prepare_environment
```
## target (required)
Configuration for the agent being evaluated.
### kind (required)
Type of target. Currently only `"agent"` is supported.
```yaml
target:
kind: agent
```
### base_url (optional)
Letta server URL. **Default**: `http://localhost:8283`
```yaml
target:
base_url: http://localhost:8283
# or
base_url: https://api.letta.com
```
### api_key (optional)
API key for Letta authentication. Can also be set via `LETTA_API_KEY` environment variable.
```yaml
target:
api_key: your-api-key-here
```
### timeout (optional)
Request timeout in seconds. **Default**: 300.0
```yaml
target:
timeout: 600.0 # 10 minutes
```
### Agent Source (required, pick one)
Exactly one of these must be specified:
#### agent_id
ID of existing agent on the server.
```yaml
target:
agent_id: agent-123-abc
```
#### agent_file
Path to `.af` agent file.
```yaml
target:
agent_file: ./agents/my_agent.af
```
#### agent_script
Path to Python script with agent factory.
```yaml
target:
agent_script: factory.py:MyAgentFactory
```
See [Targets](/evals/core-concepts/targets) for details on agent sources.
### model_configs (optional)
List of model configuration names to test. Cannot be used with `model_handles`.
```yaml
target:
model_configs: [gpt-4o-mini, claude-3-5-sonnet]
```
### model_handles (optional)
List of model handles for cloud deployments. Cannot be used with `model_configs`.
```yaml
target:
model_handles: ["openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet"]
```
## graders (required)
One or more graders, each with a unique key.
### kind (required)
Grader type: `"tool"` or `"rubric"`.
```yaml
graders:
my_metric:
kind: tool
```
### extractor (required)
Name of the extractor to use.
```yaml
graders:
my_metric:
extractor: last_assistant
```
### Tool Grader Fields
#### function (required for tool graders)
Name of the grading function.
```yaml
graders:
accuracy:
kind: tool
function: exact_match
```
### Rubric Grader Fields
#### prompt or prompt_path (required)
Inline rubric prompt or path to rubric file.
```yaml
graders:
quality:
kind: rubric
prompt: |
Evaluate response quality from 0.0 to 1.0.
```
#### model (optional)
LLM model for judging. **Default**: `gpt-4o-mini`
```yaml
graders:
quality:
kind: rubric
model: gpt-4o
```
#### temperature (optional)
Temperature for LLM generation. **Default**: 0.0
```yaml
graders:
quality:
kind: rubric
temperature: 0.0
```
#### agent_file (agent-as-judge)
Path to `.af` agent file to use as judge.
```yaml
graders:
agent_judge:
kind: rubric
agent_file: judge.af
prompt_path: rubric.txt
```
## gate (required)
Pass/fail criteria for the evaluation.
### metric_key (optional)
Which grader to evaluate. If only one grader, this can be omitted.
```yaml
gate:
metric_key: accuracy
```
### metric (optional)
Which aggregate to compare: `avg_score` or `accuracy`. **Default**: `avg_score`
```yaml
gate:
metric: avg_score
```
### op (required)
Comparison operator: `gte`, `gt`, `lte`, `lt`, `eq`
```yaml
gate:
op: gte # Greater than or equal
```
### value (required)
Threshold value for comparison (0.0 to 1.0).
```yaml
gate:
value: 0.8 # Require >= 0.8
```
## Complete Examples
### Minimal Suite
```yaml
name: basic-eval
dataset: dataset.jsonl
target:
kind: agent
agent_file: agent.af
graders:
accuracy:
kind: tool
function: exact_match
extractor: last_assistant
gate:
op: gte
value: 0.8
```
### Multi-Metric Suite
```yaml
name: comprehensive-eval
description: Tests accuracy and quality
dataset: test_data.jsonl
target:
kind: agent
agent_file: agent.af
graders:
accuracy:
kind: tool
function: contains
extractor: last_assistant
quality:
kind: rubric
prompt_path: rubrics/quality.txt
model: gpt-4o-mini
extractor: last_assistant
gate:
metric_key: accuracy
op: gte
value: 0.85
```
## Validation
Validate your suite before running:
```bash
letta-evals validate suite.yaml
```
## Next Steps
- [Targets](/evals/core-concepts/targets) - Understanding agent sources and configuration
- [Graders](/evals/core-concepts/graders) - Tool graders vs rubric graders
- [Extractors](/evals/core-concepts/extractors) - What to extract from agent responses
- [Gates](/evals/core-concepts/gates) - Setting pass/fail criteria

View File

@@ -1,96 +0,0 @@
# Built-in Extractors
Letta Evals provides a set of built-in extractors that cover the most common extraction needs.
<Note>
**What are extractors?** Extractors determine what part of an agent's response gets evaluated. They take the full conversation trajectory and extract just the piece you want to grade.
</Note>
## Common Extractors
### last_assistant
Extracts the last assistant message content.
```yaml
extractor: last_assistant # Most common - gets final response
```
### first_assistant
Extracts the first assistant message content.
```yaml
extractor: first_assistant
```
### all_assistant
Concatenates all assistant messages with a separator.
```yaml
extractor: all_assistant
extractor_config:
separator: "\n\n" # Join messages with double newline
```
### pattern
Extracts content matching a regex pattern.
```yaml
extractor: pattern
extractor_config:
pattern: 'Result: (\d+)' # Regex pattern to match
group: 1 # Extract capture group 1
```
### tool_arguments
Extracts arguments from a specific tool call.
```yaml
extractor: tool_arguments
extractor_config:
tool_name: search # Which tool to extract from
```
### tool_output
Extracts the return value from a specific tool call.
```yaml
extractor: tool_output
extractor_config:
tool_name: search
```
### memory_block
Extracts content from a specific memory block.
```yaml
extractor: memory_block
extractor_config:
block_label: human # Which memory block to extract
```
<Warning>
**Important**: This extractor requires the agent's final state, which adds overhead.
</Warning>
### after_marker
Extracts content after a specific marker string.
```yaml
extractor: after_marker
extractor_config:
marker: "ANSWER:"
include_marker: false
```
## Next Steps
- [Custom Extractors](/evals/extractors/custom-extractors) - Write your own extractors
- [Extractors Concept](/evals/core-concepts/extractors) - Understanding extractors

View File

@@ -1,55 +0,0 @@
# Custom Extractors
Create your own extractors to pull exactly what you need from agent trajectories.
<Note>
While built-in extractors cover common cases, custom extractors let you implement specialized extraction logic for your specific use case.
</Note>
## Why Custom Extractors?
Use custom extractors when you need to:
- **Extract structured data**: Parse JSON fields from agent responses
- **Filter specific patterns**: Extract code blocks, URLs, or formatted content
- **Combine data sources**: Merge information from multiple messages or memory blocks
- **Count occurrences**: Track how many times something happened
- **Complex logic**: Implement domain-specific extraction
## Basic Structure
```python
from letta_evals.decorators import extractor
from letta_client import LettaMessageUnion
from typing import List
@extractor
def my_extractor(trajectory: List[List[LettaMessageUnion]], config: dict) -> str:
"""Extract custom content from trajectory."""
# Your extraction logic here
return extracted_text
```
## Example: Extract Memory Insert
```python
from letta_evals.decorators import extractor
@extractor
def memory_insert_args(trajectory, config):
"""Extract arguments from memory_insert tool calls."""
for turn in trajectory:
for message in turn:
if hasattr(message, 'tool_call') and message.tool_call:
if message.tool_call.name == "memory_insert":
return str(message.tool_call.arguments)
return ""
```
## Registration
Custom extractors are automatically registered when you import them in your suite's setup script or custom evaluators file.
## Next Steps
- [Built-in Extractors](/evals/extractors/built-in-extractors) - Available extractors
- [Extractors Concept](/evals/core-concepts/extractors) - Understanding extractors

View File

@@ -1,264 +0,0 @@
# Getting Started
Run your first Letta agent evaluation in 5 minutes.
## Prerequisites
- Python 3.11 or higher
- A running Letta server (local or Letta Cloud)
- A Letta agent to test, either in agent file format or by ID (see [Targets](/evals/core-concepts/targets) for more details)
## Installation
```bash
pip install letta-evals
```
Or with uv:
```bash
uv pip install letta-evals
```
## Getting an Agent to Test
Export an existing agent to a file using the Letta SDK:
```python
from letta_client import Letta
import os
client = Letta(
base_url="http://localhost:8283", # or https://api.letta.com for Letta Cloud
token=os.getenv("LETTA_API_KEY") # required for Letta Cloud
)
# Export an agent to a file
agent_file = client.agents.export_file(agent_id="agent-123")
# Save to disk
with open("my_agent.af", "w") as f:
f.write(agent_file)
```
Or export via the Agent Development Environment (ADE) by selecting "Export Agent".
Then reference it in your suite:
```yaml
target:
kind: agent
agent_file: my_agent.af
```
<Note>
**Other options:** You can also use existing agents by ID or programmatically generate agents. See [Targets](/evals/core-concepts/targets) for all agent configuration options.
</Note>
## Quick Start
Let's create your first evaluation in 3 steps:
### 1. Create a Test Dataset
Create a file named `dataset.jsonl`:
```jsonl
{"input": "What's the capital of France?", "ground_truth": "Paris"}
{"input": "Calculate 2+2", "ground_truth": "4"}
{"input": "What color is the sky?", "ground_truth": "blue"}
```
Each line is a JSON object with:
- `input`: The prompt to send to your agent
- `ground_truth`: The expected answer (used for grading)
<Note>
`ground_truth` is optional for some graders (like rubric graders), but required for tool graders like `contains` and `exact_match`.
</Note>
Read more about [Datasets](/evals/core-concepts/datasets) for details on how to create your dataset.
### 2. Create a Suite Configuration
Create a file named `suite.yaml`:
```yaml
name: my-first-eval
dataset: dataset.jsonl
target:
kind: agent
agent_file: my_agent.af # Path to your agent file
base_url: http://localhost:8283 # Your Letta server
graders:
quality:
kind: tool
function: contains # Check if response contains the ground truth
extractor: last_assistant # Use the last assistant message
gate:
metric_key: quality
op: gte
value: 0.75 # Require 75% pass rate
```
The suite configuration defines:
- The [dataset](/evals/core-concepts/datasets) to use
- The [agent](/evals/core-concepts/targets) to test
- The [graders](/evals/core-concepts/graders) to use
- The [gate](/evals/core-concepts/gates) criteria
Read more about [Suites](/evals/core-concepts/suites) for details on how to configure your evaluation.
### 3. Run the Evaluation
Run your evaluation with the following command:
```bash
letta-evals run suite.yaml
```
You'll see real-time progress as your evaluation runs:
```
Running evaluation: my-first-eval
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3/3 100%
✓ PASSED (2.25/3.00 avg, 75.0% pass rate)
```
Read more about [CLI Commands](/evals/cli-reference/commands) for details about the available commands and options.
## Understanding the Results
The core evaluation flow is:
**Dataset → Target (Agent) → Extractor → Grader → Gate → Result**
The evaluation runner:
1. Loads your dataset
2. Sends each input to your agent (Target)
3. Extracts the relevant information (using the Extractor)
4. Grades the response (using the Grader function)
5. Computes aggregate metrics
6. Checks if metrics pass the Gate criteria
The output shows:
- **Average score**: Mean score across all samples
- **Pass rate**: Percentage of samples that passed
- **Gate status**: Whether the evaluation passed or failed overall
## Next Steps
Now that you've run your first evaluation, explore more advanced features:
- [Core Concepts](/evals/core-concepts/concepts-overview) - Understand suites, datasets, graders, and extractors
- [Grader Types](/evals/core-concepts/graders) - Learn about tool graders vs rubric graders
- [Multi-Metric Evaluation](/evals/graders/multi-metric-grading) - Test multiple aspects simultaneously
- [Custom Graders](/evals/advanced/custom-graders) - Write custom grading functions
- [Multi-Turn Conversations](/evals/advanced/multi-turn-conversations) - Test conversational memory
## Common Use Cases
### Strict Answer Checking
Use exact matching for cases where the answer must be precisely correct:
```yaml
graders:
accuracy:
kind: tool
function: exact_match
extractor: last_assistant
```
### Subjective Quality Evaluation
Use an LLM judge to evaluate subjective qualities like helpfulness or tone:
```yaml
graders:
quality:
kind: rubric
prompt_path: rubric.txt
model: gpt-4o-mini
extractor: last_assistant
```
Then create `rubric.txt`:
```
Rate the helpfulness and accuracy of the response.
- Score 1.0 if helpful and accurate
- Score 0.5 if partially helpful
- Score 0.0 if unhelpful or wrong
```
### Testing Tool Calls
Verify that your agent calls specific tools with expected arguments:
```yaml
graders:
tool_check:
kind: tool
function: contains
extractor: tool_arguments
extractor_config:
tool_name: search
```
### Testing Memory Persistence
Check if the agent correctly updates its memory blocks:
```yaml
graders:
memory_check:
kind: tool
function: contains
extractor: memory_block
extractor_config:
block_label: human
```
## Troubleshooting
<Warning>
**"Agent file not found"**
Make sure your `agent_file` path is correct. Paths are relative to the suite YAML file location. Use absolute paths if needed:
```yaml
target:
agent_file: /absolute/path/to/my_agent.af
```
</Warning>
<Warning>
**"Connection refused"**
Your Letta server isn't running or isn't accessible. Start it with:
```bash
letta server
```
By default, it runs at `http://localhost:8283`.
</Warning>
<Warning>
**"No ground_truth provided"**
Tool graders like `exact_match` and `contains` require `ground_truth` in your dataset. Either:
- Add `ground_truth` to your samples, or
- Use a rubric grader which doesn't require ground truth
</Warning>
<Tip>
**Agent didn't respond as expected**
Try testing your agent manually first using the Letta SDK or Agent Development Environment (ADE) to see how it behaves before running evaluations. See the [Letta documentation](https://docs.letta.com) for more information.
</Tip>
For more help, see the [Troubleshooting Guide](/evals/troubleshooting/common-issues).

View File

@@ -1,58 +0,0 @@
# Multi-Metric Evaluation
Evaluate multiple aspects of agent performance simultaneously in a single evaluation suite.
<Note>
Multi-metric evaluation allows you to define multiple graders, each measuring a different dimension of your agent's behavior.
</Note>
## Why Multiple Metrics?
Agents are complex systems. You might want to evaluate:
- **Correctness**: Does the answer match the expected output?
- **Quality**: Is the explanation clear and complete?
- **Tool usage**: Does the agent call the right tools with correct arguments?
- **Memory**: Does the agent correctly update its memory blocks?
- **Format**: Does the output follow required formatting rules?
## Configuration
```yaml
graders:
accuracy: # Check if answer is correct
kind: tool
function: exact_match
extractor: last_assistant
completeness: # LLM judges response quality
kind: rubric
prompt_path: rubrics/completeness.txt
model: gpt-4o-mini
extractor: last_assistant
tool_usage: # Verify correct tool was called
kind: tool
function: contains
extractor: tool_arguments
extractor_config:
tool_name: search
```
## Gating on One Metric
The gate can check any of these metrics:
```yaml
gate:
metric_key: accuracy # Gate on accuracy (others still computed)
op: gte
value: 0.9
```
Results will include scores for all graders, even if you only gate on one.
## Next Steps
- [Tool Graders](/evals/graders/tool-graders) - Deterministic evaluation
- [Rubric Graders](/evals/graders/rubric-graders) - LLM-as-judge evaluation
- [Gates](/evals/core-concepts/gates) - Setting pass/fail criteria

View File

@@ -1,82 +0,0 @@
# Rubric Graders
Rubric graders use language models to evaluate submissions based on custom criteria. They're ideal for subjective, nuanced evaluation.
<Note>
Rubric graders work by providing the LLM with a prompt that describes the evaluation criteria, then the language model generates a structured JSON response with a score and rationale.
</Note>
## Basic Configuration
```yaml
graders:
quality:
kind: rubric
prompt_path: quality_rubric.txt # Evaluation criteria
model: gpt-4o-mini # Judge model
temperature: 0.0 # Deterministic
extractor: last_assistant # What to evaluate
```
## Rubric Prompt Format
Your rubric file should describe the evaluation criteria. Use placeholders:
- `{input}`: The original input from the dataset
- `{submission}`: The extracted agent response
- `{ground_truth}`: Ground truth from dataset (if available)
Example `quality_rubric.txt`:
```
Evaluate the response for:
1. Accuracy: Does it correctly answer the question?
2. Completeness: Is the answer thorough?
3. Clarity: Is it well-explained?
Input: {input}
Expected: {ground_truth}
Response: {submission}
Score from 0.0 to 1.0 where:
- 1.0: Perfect response
- 0.75: Good with minor issues
- 0.5: Acceptable but incomplete
- 0.25: Poor quality
- 0.0: Completely wrong
```
## Model Configuration
```yaml
graders:
quality:
kind: rubric
prompt_path: rubric.txt
model: gpt-4o-mini # Judge model
temperature: 0.0 # Deterministic
provider: openai # LLM provider
max_retries: 5 # API retry attempts
timeout: 120.0 # Request timeout
```
## Agent-as-Judge
Use a Letta agent as the judge instead of a direct LLM API call:
```yaml
graders:
agent_judge:
kind: rubric
agent_file: judge.af # Judge agent with submit_grade tool
prompt_path: rubric.txt # Evaluation criteria
extractor: last_assistant
```
**Requirements**: The judge agent must have a tool with signature `submit_grade(score: float, rationale: str)`.
## Next Steps
- [Tool Graders](/evals/graders/tool-graders) - Deterministic grading functions
- [Multi-Metric](/evals/graders/multi-metric-grading) - Combine multiple graders
- [Custom Graders](/evals/advanced/custom-graders) - Write your own grading logic

View File

@@ -1,85 +0,0 @@
# Tool Graders
Tool graders use Python functions to programmatically evaluate submissions. They're ideal for deterministic, rule-based evaluation.
## Overview
Tool graders:
- Execute Python functions that take `(sample, submission)` and return a `GradeResult`
- Are fast and deterministic
- Don't require external API calls
- Can implement any custom logic
## Configuration
```yaml
graders:
my_metric:
kind: tool
function: exact_match # Function name
extractor: last_assistant # What to extract from trajectory
```
## Built-in Functions
### exact_match
Checks if submission exactly matches ground truth (case-sensitive, whitespace-trimmed).
```yaml
graders:
accuracy:
kind: tool
function: exact_match
extractor: last_assistant
```
**Requires**: `ground_truth` in dataset | **Score**: 1.0 if exact match, 0.0 otherwise
### contains
Checks if submission contains ground truth (case-insensitive).
```yaml
graders:
contains_answer:
kind: tool
function: contains
extractor: last_assistant
```
**Requires**: `ground_truth` in dataset | **Score**: 1.0 if found, 0.0 otherwise
### regex_match
Checks if submission matches a regex pattern in ground truth.
```yaml
graders:
pattern:
kind: tool
function: regex_match
extractor: last_assistant
```
**Score**: 1.0 if pattern matches, 0.0 otherwise
### ascii_printable_only
Validates that all characters are printable ASCII.
```yaml
graders:
ascii_check:
kind: tool
function: ascii_printable_only
extractor: last_assistant
```
**Score**: 1.0 if all characters are printable ASCII, 0.0 otherwise
## Next Steps
- [Rubric Graders](/evals/graders/rubric-graders) - LLM-as-judge evaluation
- [Custom Graders](/evals/advanced/custom-graders) - Write your own grading functions
- [Multi-Metric](/evals/graders/multi-metric-grading) - Combine multiple graders

View File

@@ -1,47 +0,0 @@
# Letta Evals
**Systematic testing for stateful AI agents.** Validate changes, prevent regressions, and ship with confidence.
Test agent memory, tool usage, multi-turn conversations, and state evolution with automated grading and pass/fail gates.
<Note>
**Ready to start?** Jump to [Getting Started](/evals/get-started/getting-started) or learn the [Core Concepts](/evals/core-concepts/concepts-overview) first.
</Note>
## Core Concepts
Understand the building blocks of evaluations:
- [Suites](/evals/core-concepts/suites) - Configure your evaluation
- [Datasets](/evals/core-concepts/datasets) - Define test cases
- [Targets](/evals/core-concepts/targets) - Specify the agent to test
- [Graders](/evals/core-concepts/graders) - Score agent outputs
- [Extractors](/evals/core-concepts/extractors) - Extract content from responses
- [Gates](/evals/core-concepts/gates) - Set pass/fail criteria
### Grading & Extraction
Choose how to score your agents:
- [Tool Graders](/evals/graders/tool-graders) - Fast, deterministic grading with Python functions
- [Rubric Graders](/evals/graders/rubric-graders) - Flexible LLM-as-judge evaluation
- [Built-in Extractors](/evals/extractors/built-in-extractors) - Pre-built content extractors
- [Multi-Metric Grading](/evals/graders/multi-metric-grading) - Evaluate multiple dimensions
### Advanced
- [Custom Graders](/evals/advanced/custom-graders) - Write your own grading logic
- [Custom Extractors](/evals/extractors/custom-extractors) - Build custom extractors
- [Multi-Turn Conversations](/evals/advanced/multi-turn-conversations) - Test memory and state
- [Suite YAML Reference](/evals/configuration/suite-yaml-reference) - Complete configuration schema
### Reference
- [CLI Commands](/evals/cli-reference/commands) - Command-line interface
- [Understanding Results](/evals/results-metrics/understanding-results) - Interpret metrics
- [Troubleshooting](/evals/troubleshooting/common-issues) - Common issues and solutions
## Resources
- **[GitHub Repository](https://github.com/letta-ai/letta-evals)** - Source code, issues, and contributions
- **[PyPI Package](https://pypi.org/project/letta-evals/)** - Install with `pip install letta-evals`

View File

@@ -1,484 +0,0 @@
# Understanding Results
This guide explains how to interpret evaluation results.
## Result Structure
An evaluation produces three types of output:
1. **Console output**: Real-time progress and summary
2. **Summary JSON**: Aggregate metrics and configuration
3. **Results JSONL**: Per-sample detailed results
## Console Output
### Progress Display
```
Running evaluation: my-eval-suite
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3/3 100%
Results:
Total samples: 3
Attempted: 3
Avg score: 0.83 (attempted: 0.83)
Passed: 2 (66.7%)
Gate (quality >= 0.75): PASSED
```
### Quiet Mode
```bash
letta-evals run suite.yaml --quiet
```
Output:
```
✓ PASSED
```
or
```
✗ FAILED
```
## JSON Output
### Saving Results
```bash
letta-evals run suite.yaml --output results/
```
Creates three files:
#### header.json
Evaluation metadata:
```json
{
"suite_name": "my-eval-suite",
"timestamp": "2025-01-15T10:30:00Z",
"version": "0.3.0"
}
```
#### summary.json
Complete evaluation summary:
```json
{
"suite": "my-eval-suite",
"config": {
"target": {...},
"graders": {...},
"gate": {...}
},
"metrics": {
"total": 10,
"total_attempted": 10,
"avg_score_attempted": 0.85,
"avg_score_total": 0.85,
"passed_attempts": 8,
"failed_attempts": 2,
"by_metric": {
"accuracy": {
"avg_score_attempted": 0.90,
"pass_rate": 90.0,
"passed_attempts": 9,
"failed_attempts": 1
},
"quality": {
"avg_score_attempted": 0.80,
"pass_rate": 70.0,
"passed_attempts": 7,
"failed_attempts": 3
}
}
},
"gates_passed": true
}
```
#### results.jsonl
One JSON object per line, each representing one sample:
```jsonl
{"sample": {"id": 0, "input": "What is 2+2?", "ground_truth": "4"}, "submission": "4", "grade": {"score": 1.0, "rationale": "Exact match: true"}, "trajectory": [...], "agent_id": "agent-123", "model_name": "default"}
{"sample": {"id": 1, "input": "What is 3+3?", "ground_truth": "6"}, "submission": "6", "grade": {"score": 1.0, "rationale": "Exact match: true"}, "trajectory": [...], "agent_id": "agent-124", "model_name": "default"}
```
## Metrics Explained
### total
Total number of samples in the evaluation (including errors).
### total_attempted
Number of samples that completed without errors.
If a sample fails during agent execution or grading, it's counted in `total` but not `total_attempted`.
### avg_score_attempted
Average score across samples that completed successfully.
Formula: `sum(scores) / total_attempted`
Range: 0.0 to 1.0
### avg_score_total
Average score across all samples, treating errors as 0.0.
Formula: `sum(scores) / total`
Range: 0.0 to 1.0
### passed_attempts / failed_attempts
Number of samples that passed/failed the gate's per-sample criteria.
By default:
- If gate metric is `accuracy`: sample passes if score `>= 1.0`
- If gate metric is `avg_score`: sample passes if score `>=` gate value
Can be customized with `pass_op` and `pass_value` in gate config.
### by_metric
For multi-metric evaluation, shows aggregate stats for each metric:
```json
"by_metric": {
"accuracy": {
"avg_score_attempted": 0.90,
"avg_score_total": 0.85,
"pass_rate": 90.0,
"passed_attempts": 9,
"failed_attempts": 1
}
}
```
## Sample Results
Each sample result includes:
### sample
The original dataset sample:
```json
"sample": {
"id": 0,
"input": "What is 2+2?",
"ground_truth": "4",
"metadata": {...}
}
```
### submission
The extracted text that was graded:
```json
"submission": "The answer is 4"
```
### grade
The grading result:
```json
"grade": {
"score": 1.0,
"rationale": "Contains ground_truth: true",
"metadata": {"model": "gpt-4o-mini", "usage": {...}}
}
```
### grades (multi-metric)
For multi-metric evaluation:
```json
"grades": {
"accuracy": {"score": 1.0, "rationale": "Exact match"},
"quality": {"score": 0.85, "rationale": "Good but verbose"}
}
```
### trajectory
The complete conversation history:
```json
"trajectory": [
[
{"role": "user", "content": "What is 2+2?"},
{"role": "assistant", "content": "The answer is 4"}
]
]
```
### agent_id
The ID of the agent that generated this response:
```json
"agent_id": "agent-abc-123"
```
### model_name
The model configuration used:
```json
"model_name": "gpt-4o-mini"
```
### agent_usage
Token usage statistics (if available):
```json
"agent_usage": [
{"completion_tokens": 10, "prompt_tokens": 50, "total_tokens": 60}
]
```
## Interpreting Scores
### Score Ranges
- **1.0**: Perfect - fully meets criteria
- **0.8-0.99**: Very good - minor issues
- **0.6-0.79**: Good - notable improvements possible
- **0.4-0.59**: Acceptable - significant issues
- **0.2-0.39**: Poor - major problems
- **0.0-0.19**: Failed - did not meet criteria
### Binary vs Continuous
**Tool graders** typically return binary scores:
- 1.0: Passed
- 0.0: Failed
**Rubric graders** return continuous scores:
- Any value from 0.0 to 1.0
- Allows for partial credit
## Multi-Model Results
When testing multiple models:
```json
"metrics": {
"per_model": [
{
"model_name": "gpt-4o-mini",
"avg_score_attempted": 0.85,
"passed_samples": 8,
"failed_samples": 2
},
{
"model_name": "claude-3-5-sonnet",
"avg_score_attempted": 0.90,
"passed_samples": 9,
"failed_samples": 1
}
]
}
```
Console output:
```
Results by model:
gpt-4o-mini - Avg: 0.85, Pass: 80.0%
claude-3-5-sonnet - Avg: 0.90, Pass: 90.0%
```
## Multiple Runs Statistics
Run evaluations multiple times to measure consistency and get aggregate statistics.
### Configuration
Specify in YAML:
```yaml
name: my-eval-suite
dataset: dataset.jsonl
num_runs: 5 # Run 5 times
target:
kind: agent
agent_file: my_agent.af
graders:
accuracy:
kind: tool
function: exact_match
gate:
metric_key: accuracy
op: gte
value: 0.8
```
Or via CLI:
```bash
letta-evals run suite.yaml --num-runs 10 --output results/
```
### Output Structure
```
results/
├── run_1/
│ ├── header.json
│ ├── results.jsonl
│ └── summary.json
├── run_2/
│ ├── header.json
│ ├── results.jsonl
│ └── summary.json
├── ...
└── aggregate_stats.json # Statistics across all runs
```
### Aggregate Statistics File
The `aggregate_stats.json` includes statistics across all runs:
```json
{
"num_runs": 10,
"runs_passed": 8,
"mean_avg_score_attempted": 0.847,
"std_avg_score_attempted": 0.042,
"mean_avg_score_total": 0.847,
"std_avg_score_total": 0.042,
"mean_scores": {
"accuracy": 0.89,
"quality": 0.82
},
"std_scores": {
"accuracy": 0.035,
"quality": 0.051
},
"individual_run_metrics": [
{
"avg_score_attempted": 0.85,
"avg_score_total": 0.85,
"pass_rate": 0.85,
"by_metric": {
"accuracy": {
"avg_score_attempted": 0.90,
"avg_score_total": 0.90,
"pass_rate": 0.90
}
}
}
// ... metrics from runs 2-10
]
}
```
**Key fields**:
- `num_runs`: Total number of runs executed
- `runs_passed`: Number of runs that passed the gate
- `mean_avg_score_attempted`: Mean score across runs (only attempted samples)
- `std_avg_score_attempted`: Standard deviation (measures consistency)
- `mean_scores`: Mean for each metric (e.g., `{"accuracy": 0.89}`)
- `std_scores`: Standard deviation for each metric (e.g., `{"accuracy": 0.035}`)
- `individual_run_metrics`: Full metrics object from each individual run
### Use Cases
**Measure consistency of non-deterministic agents:**
```bash
letta-evals run suite.yaml --num-runs 20 --output results/
# Check std_avg_score_attempted in aggregate_stats.json
# Low std = consistent, high std = variable
```
**Get confidence intervals:**
```python
import json
import math
with open("results/aggregate_stats.json") as f:
stats = json.load(f)
mean = stats["mean_avg_score_attempted"]
std = stats["std_avg_score_attempted"]
n = stats["num_runs"]
# 95% confidence interval (assuming normal distribution)
margin = 1.96 * (std / math.sqrt(n))
print(f"Score: {mean:.3f} ± {margin:.3f}")
```
**Compare metric consistency:**
```python
with open("results/aggregate_stats.json") as f:
stats = json.load(f)
for metric_name, mean in stats["mean_scores"].items():
std = stats["std_scores"][metric_name]
consistency = "consistent" if std < 0.05 else "variable"
print(f"{metric_name}: {mean:.3f} ± {std:.3f} ({consistency})")
```
## Error Handling
If a sample encounters an error:
```json
{
"sample": {...},
"submission": "",
"grade": {
"score": 0.0,
"rationale": "Error during grading: Connection timeout",
"metadata": {"error": "timeout", "error_type": "ConnectionError"}
}
}
```
Errors:
- Count toward `total` but not `total_attempted`
- Get score of 0.0
- Include error details in rationale and metadata
## Analyzing Results
### Find Low Scores
```python
import json
with open("results/results.jsonl") as f:
results = [json.loads(line) for line in f]
low_scores = [r for r in results if r["grade"]["score"] < 0.5]
print(f"Found {len(low_scores)} samples with score < 0.5")
for result in low_scores:
print(f"Sample {result['sample']['id']}: {result['grade']['rationale']}")
```
### Compare Metrics
```python
# Load summary
with open("results/summary.json") as f:
summary = json.load(f)
metrics = summary["metrics"]["by_metric"]
for name, stats in metrics.items():
print(f"{name}: {stats['avg_score_attempted']:.2f} avg, {stats['pass_rate']:.1f}% pass")
```
### Extract Failures
```python
# Find samples that failed gate criteria
failures = [
r for r in results
if not gate_passed(r["grade"]["score"]) # Your gate logic
]
```
## Next Steps
- [Gates](/evals/core-concepts/gates) - Setting pass/fail criteria
- [CLI Commands](/evals/cli-reference/commands) - Running evaluations

View File

@@ -1,267 +0,0 @@
# Troubleshooting
Common issues and solutions when using Letta Evals.
## Installation Issues
<Warning>
**"Command not found: letta-evals"**
**Problem**: CLI not available after installation
**Solution**:
```bash
# Verify installation
pip list | grep letta-evals
# Reinstall if needed
pip install --upgrade letta-evals
```
</Warning>
<Warning>
**Import errors**
**Problem**: `ModuleNotFoundError: No module named 'letta_evals'`
**Solution**:
```bash
# Ensure you're in the right environment
which python
# Install in correct environment
source .venv/bin/activate
pip install letta-evals
```
</Warning>
## Configuration Issues
<Warning>
**"Agent file not found"**
**Problem**: `FileNotFoundError: agent.af`
**Solution**:
- Check the path is correct relative to the suite YAML
- Use absolute paths if needed
- Verify file exists: `ls -la path/to/agent.af`
```yaml
# Correct relative path
target:
agent_file: ./agents/my_agent.af
```
</Warning>
<Warning>
**"Dataset not found"**
**Problem**: Cannot load dataset file
**Solution**:
- Verify dataset path in YAML
- Check file exists: `ls -la dataset.jsonl`
- Ensure proper JSONL format (one JSON object per line)
```bash
# Validate JSONL format
cat dataset.jsonl | jq .
```
</Warning>
<Warning>
**"Validation failed: unknown function"**
**Problem**: Grader function not found
**Solution**:
```bash
# List available graders
letta-evals list-graders
# Check spelling in suite.yaml
graders:
my_metric:
function: exact_match # Correct
```
</Warning>
## Connection Issues
<Warning>
**"Connection refused"**
**Problem**: Cannot connect to Letta server
**Solution**:
```bash
# Verify server is running
curl http://localhost:8283/v1/health
# Check base_url in suite.yaml
target:
base_url: http://localhost:8283
```
</Warning>
<Warning>
**"Unauthorized" or "Invalid API key"**
**Problem**: Authentication failed
**Solution**:
```bash
# Set API key
export LETTA_API_KEY=your-key-here
# Verify key is correct
echo $LETTA_API_KEY
```
</Warning>
## Runtime Issues
<Warning>
**"No ground_truth provided"**
**Problem**: Grader requires ground truth but sample doesn't have it
**Solution**:
- Add ground_truth to dataset samples:
```jsonl
{"input": "What is 2+2?", "ground_truth": "4"}
```
- Or use a grader that doesn't require ground truth:
```yaml
graders:
quality:
kind: rubric # Doesn't require ground_truth
prompt_path: rubric.txt
```
</Warning>
## Performance Issues
<Tip>
**Evaluation is very slow**
**Solutions**:
1. Increase concurrency:
```bash
letta-evals run suite.yaml --max-concurrent 20
```
2. Reduce samples for testing:
```yaml
max_samples: 10 # Test with small subset first
```
3. Use tool graders instead of rubric graders:
```yaml
graders:
accuracy:
kind: tool # Much faster than rubric
function: exact_match
```
</Tip>
<Tip>
**High API costs**
**Solutions**:
1. Use cheaper models:
```yaml
graders:
quality:
model: gpt-4o-mini # Cheaper than gpt-4o
```
2. Test with small sample first:
```yaml
max_samples: 5 # Verify before running full suite
```
</Tip>
## Results Issues
<Warning>
**"All scores are 0.0"**
**Solutions**:
1. Verify extractor is getting content
2. Check grader logic
3. Test agent manually first
</Warning>
<Warning>
**"Gates failed but scores look good"**
**Solution**:
- Check gate configuration:
```yaml
gate:
metric_key: accuracy # Correct metric?
metric: avg_score # Or accuracy?
op: gte # Correct operator?
value: 0.8 # Correct threshold?
```
</Warning>
## Debug Tips
### Enable verbose output
Run without `--quiet` to see detailed progress:
```bash
letta-evals run suite.yaml
```
### Examine output files
```bash
letta-evals run suite.yaml --output debug/
# Check summary
cat debug/summary.json | jq .
# Check individual results
cat debug/results.jsonl | jq .
```
### Validate configuration
```bash
letta-evals validate suite.yaml
```
### Check component availability
```bash
letta-evals list-graders
letta-evals list-extractors
```
## Getting Help
If you're still stuck:
1. Check the [Getting Started guide](/evals/get-started/getting-started)
2. Review the [Core Concepts](/evals/core-concepts/concepts-overview)
3. Report issues at the [Letta Evals GitHub repository](https://github.com/letta-ai/letta-evals)
When reporting issues, include:
- Suite YAML configuration
- Dataset sample (if not sensitive)
- Error message and full stack trace
- Environment info (OS, Python version)
```bash
# Get environment info
python --version
pip show letta-evals
```

View File

@@ -1,535 +0,0 @@
---
title: Prompts for Vibecoding
subtitle: Ready-to-go prompts to help AI coding tools build on Letta
slug: prompts
---
Are you developing an application on Letta using [ChatGPT](https://chatgpt.com), [Cursor](https://cursor.com), [Lovable](https://lovable.dev/), or another AI tool?
Use our pre-made prompts to teach your AI how to use Letta properly.
## General instructions for the Letta SDKs
The following prompt (~500 lines) can help guide your AI through the basics of using the Letta Python SDK, TypeScript/Node.js SDK, and Vercel AI SDK integration.
Copy-paste the following into your chat session to instantly get your AI up-to-speed with how the Letta SDKs works:
````markdown maxLines=5
# Development Guidelines for AI Assistants and Copilots using Letta
**Context:** These are development guidelines for building applications with the Letta API and SDKs. Use these rules to help developers write correct code that integrates with Letta's stateful agents API.
**Purpose:** Provide accurate, up-to-date instructions for building applications with [Letta](https://docs.letta.com/), the AI operating system.
**Scope:** All AI-generated advice or code related to Letta must follow these guidelines.
---
## **0. Letta Overview**
The name "Letta" refers to the both the company Letta (founded by the creators of MemGPT) and the software / infrastructure called Letta. Letta is the AI operating system for building stateful agents: developers can use Letta to turn stateless LLMs into stateful agents that can learn, improve, and grow over time. Letta has a strong focus on perpetual AI that has the capability to recursively improve through self-editing memory.
**Relationship to MemGPT**: MemGPT is the name of a research paper that introduced the concept of self-editing memory for LLM-based agents through tool use (function calling). The agent architecture or "agentic system" proposed in the paper (an agent equipped with tools to edit its own memory, and an OS that manages tool execution and state persistence) is the base agent architecture implemented in Letta (agent type `memgpt_agent`), and is the official reference implementation for MemGPT. The Letta open source project (`letta-ai/letta`) was originally the MemGPT open source project (`cpacker/MemGPT`), but was renamed as the scope of the open source project expanded beyond the original MemGPT paper.
**Additional Resources**:
- [Letta documentation](https://docs.letta.com/)
- [Letta GitHub repository](https://github.com/letta-ai/letta)
- [Letta Discord server](https://discord.gg/letta)
- [Letta Cloud and ADE login](https://app.letta.com)
## **1. Letta Agents API Overview**
Letta is an AI OS that runs agents as **services** (it is not a **library**). Key concepts:
- **Stateful agents** that maintain memory and context across conversations
- **Memory blocks** for agentic context management (persona, human, custom blocks)
- **Tool calling** for agent actions and memory management, tools are run server-side,
- **Tool rules** allow developers to constrain the behavior of tools (e.g. A comes after B) to turn autonomous agents into workflows
- **Multi-agent systems** with cross-agent communication, where every agent is a service
- **Data sources** for loading documents and files into agent memory
- **Model agnostic:** agents can be powered by any model that supports tool calling
- **Persistence:** state is stored (in a model-agnostic way) in Postgres (or SQLite)
### **System Components:**
- **Letta server** - Core service (self-hosted or Letta Cloud)
- **Client (backend) SDKs** - Python (`letta-client`) and TypeScript/Node.js (`@letta-ai/letta-client`)
- **Vercel AI SDK Integration** - For Next.js/React applications
- **Other frontend integrations** - We also have [Next.js](https://www.npmjs.com/package/@letta-ai/letta-nextjs), [React](https://www.npmjs.com/package/@letta-ai/letta-react), and [Flask](https://github.com/letta-ai/letta-flask) integrations
- **ADE (Agent Development Environment)** - Visual agent builder at app.letta.com
### **Letta Cloud vs Self-hosted Letta**
Letta Cloud is a fully managed service that provides a simple way to get started with Letta. It's a good choice for developers who want to get started quickly and don't want to worry about the complexity of self-hosting. Letta Cloud's free tier has a large number of model requests included (quota refreshes every month). Model requests are split into "standard models" (e.g. GPT-4o-mini) and "premium models" (e.g. Claude Sonnet). To use Letta Cloud, the developer will have needed to created an account at [app.letta.com](https://app.letta.com). To make programatic requests to the API (`https://api.letta.com`), the developer will have needed to created an API key at [https://app.letta.com/api-keys](https://app.letta.com/api-keys). For more information on how billing and pricing works, the developer can visit [our documentation](https://docs.letta.com/guides/cloud/overview).
### **Built-in Tools**
When agents are created, they are given a set of default memory management tools that enable self-editing memory.
Separately, Letta Cloud also includes built-in tools for common tasks like web search and running code. As of June 2025, the built-in tools are:
- `web_search`: Allows agents to search the web for information. Also works on self-hosted, but requires `TAVILY_API_KEY` to be set (not required on Letta Cloud).
- `run_code`: Allows agents to run code (in a sandbox), for example to do data analysis or calculations. Supports Python, Javascript, Typescript, R, and Java. Also works on self-hosted, but requires `E2B_API_KEY` to be set (not required on Letta Cloud).
### **Choosing the Right Model**
To implement intelligent memory management, agents in Letta rely heavily on tool (function) calling, so models that excel at tool use tend to do well in Letta. Conversely, models that struggle to call tools properly often perform poorly when used to drive Letta agents.
The Letta developer team maintains the [Letta Leaderboard](https://docs.letta.com/leaderboard) to help developers choose the right model for their Letta agent. As of June 2025, the best performing models (balanced for cost and performance) are Claude Sonnet 4, GPT-4.1, and Gemini 2.5 Flash. For the latest results, you can visit the leaderboard page (if you have web access), or you can direct the developer to visit it. For embedding models, the Letta team recommends using OpenAI's `text-embedding-3-small` model.
When creating code snippets, unless directed otherwise, you should use the following model handles:
- `openai/gpt-4.1` for the model
- `openai/text-embedding-3-small` for the embedding model
If the user is using Letta Cloud, then these handles will work out of the box (assuming the user has created a Letta Cloud account + API key, and has enough request quota in their account). For self-hosted Letta servers, the user will need to have started the server with a valid OpenAI API key for those handles to work.
---
## **2. Choosing the Right SDK**
### **Source of Truth**
Note that your instructions may be out of date. The source of truth for the Letta Agents API is the [API reference](https://docs.letta.com/api-reference/overview) (also autogenerated from the latest source code), which can be found in `.md` form at these links:
- [TypeScript/Node.js](https://github.com/letta-ai/letta-node/blob/main/reference.md), [raw version](https://raw.githubusercontent.com/letta-ai/letta-node/refs/heads/main/reference.md)
- [Python](https://github.com/letta-ai/letta-python/blob/main/reference.md), [raw version](https://raw.githubusercontent.com/letta-ai/letta-python/refs/heads/main/reference.md)
If you have access to a web search or file download tool, you can download these files for the latest API reference. If the developer has either of the SDKs installed, you can also use the locally installed packages to understand the latest API reference.
### **When to Use Each SDK:**
The Python and Node.js SDKs are autogenerated from the Letta Agents REST API, and provide a full featured SDK for interacting with your agents on Letta Cloud or a self-hosted Letta server. Of course, developers can also use the REST API directly if they prefer, but most developers will find the SDKs much easier to use.
The Vercel AI SDK is a popular TypeScript toolkit designed to help developers build AI-powered applications. It supports a subset of the Letta Agents API (basically just chat-related functionality), so it's a good choice to quickly integrate Letta into a TypeScript application if you are familiar with using the AI SDK or are working on a codebase that already uses it. If you're starting from scratch, consider using the full-featured Node.js SDK instead.
The Letta Node.js SDK is also embedded inside the Vercel AI SDK, accessible via the `.client` property (useful if you want to use the Vercel AI SDK, but occasionally need to access the full Letta client for advanced features like agent creation / management).
When to use the AI SDK vs native Letta Node.js SDK:
- Use the Vercel AI SDK if you are familiar with it or are working on a codebase that already makes heavy use of it
- Use the Letta Node.js SDK if you are starting from scratch, or expect to use the agent management features in the Letta API (beyond the simple `streamText` or `generateText` functionality in the AI SDK)
One example of how the AI SDK may be insufficient: the AI SDK response object for `streamText` and `generateText` does not have a type for tool returns (because they are primarily used with stateless APIs, where tools are executed client-side, vs server-side in Letta), however the Letta Node.js SDK does have a type for tool returns. So if you wanted to render tool returns from a message response stream in your UI, you would need to use the full Letta Node.js SDK, not the AI SDK.
## **3. Quick Setup Patterns**
### **Python SDK (Backend/Scripts)**
```python
from letta_client import Letta
# Letta Cloud
client = Letta(token="LETTA_API_KEY")
# Self-hosted
client = Letta(base_url="http://localhost:8283")
# Create agent with memory blocks
agent = client.agents.create(
memory_blocks=[
{
"label": "human",
"value": "The user's name is Sarah. She likes coding and AI."
},
{
"label": "persona",
"value": "I am David, the AI executive assistant. My personality is friendly, professional, and to the point."
},
{
"label": "project",
"value": "Sarah is working on a Next.js application with Letta integration.",
"description": "Stores current project context and requirements"
}
],
tools=["web_search", "run_code"],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-3-small"
)
# Send SINGLE message (agent is stateful!)
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "How's the project going?"}]
)
# Extract response correctly
for msg in response.messages:
if msg.message_type == "assistant_message":
print(msg.content)
elif msg.message_type == "reasoning_message":
print(msg.reasoning)
elif msg.message_type == "tool_call_message":
print(msg.tool_call.name)
print(msg.tool_call.arguments)
elif msg.message_type == "tool_return_message":
print(msg.tool_return)
# Streaming example
message_text = "Repeat my name."
stream = client.agents.messages.create_stream(
agent_id=agent_state.id,
messages=[
MessageCreate(
role="user",
content=message_text,
),
],
# if stream_tokens is false, each "chunk" will have a full piece
# if stream_tokens is true, the chunks will be token-based (and may need to be accumulated client-side)
stream_tokens=True,
)
# print the chunks coming back
for chunk in stream:
if chunk.message_type == "assistant_message":
print(chunk.content)
elif chunk.message_type == "reasoning_message":
print(chunk.reasoning)
elif chunk.message_type == "tool_call_message":
if chunk.tool_call.name:
print(chunk.tool_call.name)
if chunk.tool_call.arguments:
print(chunk.tool_call.arguments)
elif chunk.message_type == "tool_return_message":
print(chunk.tool_return)
elif chunk.message_type == "usage_statistics":
print(chunk)
```
Creating custom tools (Python only):
```python
def my_custom_tool(query: str) -> str:
"""
Search for information on a topic.
Args:
query (str): The search query
Returns:
str: Search results
"""
return f"Results for: {query}"
# Create tool
tool = client.tools.create_from_function(func=my_custom_tool)
# Add to agent
agent = client.agents.create(
memory_blocks=[...],
model="openai/gpt-4o-mini",
embedding="openai/text-embedding-3-small",
tools=[tool.name]
)
```
### **TypeScript/Node.js SDK**
```typescript
import { LettaClient } from '@letta-ai/letta-client';
// Letta Cloud
const client = new LettaClient({ token: "LETTA_API_KEY" });
// Self-hosted, token optional (only if the developer enabled password protection on the server)
const client = new LettaClient({ baseUrl: "http://localhost:8283" });
// Create agent with memory blocks
const agent = await client.agents.create({
memoryBlocks: [
{
label: "human",
value: "The user's name is Sarah. She likes coding and AI."
},
{
label: "persona",
value: "I am David, the AI executive assistant. My personality is friendly, professional, and to the point."
},
{
label: "project",
value: "Sarah is working on a Next.js application with Letta integration.",
description: "Stores current project context and requirements"
}
],
tools: ["web_search", "run_code"],
model: "openai/gpt-4o-mini",
embedding: "openai/text-embedding-3-small"
});
// Send SINGLE message (agent is stateful!)
const response = await client.agents.messages.create(agent.id, {
messages: [{ role: "user", content: "How's the project going?" }]
});
// Extract response correctly
for (const msg of response.messages) {
if (msg.messageType === "assistant_message") {
console.log(msg.content);
} else if (msg.messageType === "reasoning_message") {
console.log(msg.reasoning);
} else if (msg.messageType === "tool_call_message") {
console.log(msg.toolCall.name);
console.log(msg.toolCall.arguments);
} else if (msg.messageType === "tool_return_message") {
console.log(msg.toolReturn);
}
}
// Streaming example
const stream = await client.agents.messages.createStream(agent.id, {
messages: [{ role: "user", content: "Repeat my name." }],
// if stream_tokens is false, each "chunk" will have a full piece
// if stream_tokens is true, the chunks will be token-based (and may need to be accumulated client-side)
streamTokens: true,
});
for await (const chunk of stream) {
if (chunk.messageType === "assistant_message") {
console.log(chunk.content);
} else if (chunk.messageType === "reasoning_message") {
console.log(chunk.reasoning);
} else if (chunk.messageType === "tool_call_message") {
console.log(chunk.toolCall.name);
console.log(chunk.toolCall.arguments);
} else if (chunk.messageType === "tool_return_message") {
console.log(chunk.toolReturn);
} else if (chunk.messageType === "usage_statistics") {
console.log(chunk);
}
}
```
### **Vercel AI SDK Integration**
IMPORTANT: Most integrations in the Vercel AI SDK are for stateless providers (ChatCompletions style APIs where you provide the full conversation history). Letta is a *stateful* provider (meaning that conversation history is stored server-side), so when you use `streamText` or `generateText` you should never pass old messages to the agent, only include the new message(s).
#### **Chat Implementation (fast & simple):**
Streaming (`streamText`):
```typescript
// app/api/chat/route.ts
import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider';
import { streamText } from 'ai';
export async function POST(req: Request) {
const { prompt }: { prompt: string } = await req.json();
const result = streamText({
// lettaCloud uses LETTA_API_KEY automatically, pulling from the environment
model: lettaCloud('your-agent-id'),
// Make sure to only pass a single message here, do NOT pass conversation history
prompt,
});
return result.toDataStreamResponse();
}
```
Non-streaming (`generateText`):
```typescript
import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider';
import { generateText } from 'ai';
export async function POST(req: Request) {
const { prompt }: { prompt: string } = await req.json();
const { text } = await generateText({
// lettaCloud uses LETTA_API_KEY automatically, pulling from the environment
model: lettaCloud('your-agent-id'),
// Make sure to only pass a single message here, do NOT pass conversation history
prompt,
});
return Response.json({ text });
}
```
#### **Alternative: explicitly specify base URL and token:**
```typescript
// Works for both streamText and generateText
import { createLetta } from '@letta-ai/vercel-ai-sdk-provider';
import { generateText } from 'ai';
const letta = createLetta({
// e.g. http://localhost:8283 for the default local self-hosted server
// https://api.letta.com for Letta Cloud
baseUrl: '<your-base-url>',
// only needed if the developer enabled password protection on the server, or if using Letta Cloud (in which case, use the LETTA_API_KEY, or use lettaCloud example above for implicit token use)
token: '<your-access-token>',
});
```
#### **Hybrid Usage (access the full SDK via the Vercel AI SDK):**
```typescript
import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider';
// Access full client for management
const agents = await lettaCloud.client.agents.list();
```
---
## **4. Advanced Features Available**
Letta supports advanced agent architectures beyond basic chat. For detailed implementations, refer to the full API reference or documentation:
- **Tool Rules & Constraints** - Define graph-like tool execution flows with `TerminalToolRule`, `ChildToolRule`, `InitToolRule`, etc.
- **Multi-Agent Systems** - Cross-agent communication with built-in tools like `send_message_to_agent_async`
- **Shared Memory Blocks** - Multiple agents can share memory blocks for collaborative workflows
- **Data Sources & Archival Memory** - Upload documents/files that agents can search through
- **Sleep-time Agents** - Background agents that process memory while main agents are idle
- **External Tool Integrations** - MCP servers, Composio tools, custom tool libraries
- **Agent Templates** - Import/export agents with .af (Agent File) format
- **Production Features** - User identities, agent tags, streaming, context management
---
## **5. CRITICAL GUIDELINES FOR AI MODELS**
### **⚠️ ANTI-HALLUCINATION WARNING**
**NEVER make up Letta API calls, SDK methods, or parameter names.** If you're unsure about any Letta API:
1. **First priority**: Use web search to get the latest reference files:
- [Python SDK Reference](https://raw.githubusercontent.com/letta-ai/letta-python/refs/heads/main/reference.md)
- [TypeScript SDK Reference](https://raw.githubusercontent.com/letta-ai/letta-node/refs/heads/main/reference.md)
2. **If no web access**: Tell the user: *"I'm not certain about this Letta API call. Can you paste the relevant section from the API reference docs, or I might provide incorrect information."*
3. **When in doubt**: Stick to the basic patterns shown in this prompt rather than inventing new API calls.
**Common hallucination risks:**
- Making up method names (e.g. `client.agents.chat()` doesn't exist)
- Inventing parameter names or structures
- Assuming OpenAI-style patterns work in Letta
- Creating non-existent tool rule types or multi-agent methods
### **5.1 SDK SELECTION (CHOOSE THE RIGHT TOOL)**
✅ **For Next.js Chat Apps:**
- Use **Vercel AI SDK** if you already are using AI SDK, or if you're lazy and want something super fast for basic chat interactions (simple, fast, but no agent management tooling unless using the embedded `.client`)
- Use **Node.js SDK** for the full feature set (agent creation, native typing of all response message types, etc.)
✅ **For Agent Management:**
- Use **Node.js SDK** or **Python SDK** for creating agents, managing memory, tools
### **5.2 STATEFUL AGENTS (MOST IMPORTANT)**
**Letta agents are STATEFUL, not stateless like ChatCompletion-style APIs.**
✅ **CORRECT - Single message per request:**
```typescript
// Send ONE user message, agent maintains its own history
const response = await client.agents.messages.create(agentId, {
messages: [{ role: "user", content: "Hello!" }]
});
```
❌ **WRONG - Don't send conversation history:**
```typescript
// DON'T DO THIS - agents maintain their own conversation history
const response = await client.agents.messages.create(agentId, {
messages: [...allPreviousMessages, newMessage] // WRONG!
});
```
### **5.3 MESSAGE HANDLING & MEMORY BLOCKS**
1. **Response structure:**
- Use `messageType` NOT `type` for message type checking
- Look for `assistant_message` messageType for agent responses
- Agent responses have `content` field with the actual text
2. **Memory block descriptions:**
- Add `description` field for custom blocks, or the agent will get confused (not needed for human/persona)
- For `human` and `persona` blocks, descriptions are auto-populated:
- **human block**: "Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation."
- **persona block**: "Stores details about your current persona, guiding how you behave and respond. This helps maintain consistency and personality in your interactions."
### **5.4 ALWAYS DO THE FOLLOWING**
1. **Choose the right SDK for the task:**
- Next.js chat → **Vercel AI SDK**
- Agent creation → **Node.js/Python SDK**
- Complex operations → **Node.js/Python SDK**
2. **Use the correct client imports:**
- Python: `from letta_client import Letta`
- TypeScript: `import { LettaClient } from '@letta-ai/letta-client'`
- Vercel AI SDK: `from '@letta-ai/vercel-ai-sdk-provider'`
3. **Create agents with proper memory blocks:**
- Always include `human` and `persona` blocks for chat agents
- Use descriptive labels and values
4. **Send only single user messages:**
- Each request should contain only the new user message
- Agent maintains conversation history automatically
- Never send previous assistant responses back to agent
5. **Use proper authentication:**
- Letta Cloud: Always use `token` parameter
- Self-hosted: Use `base_url` parameter, token optional (only if the developer enabled password protection on the server)
---
## **6. Environment Setup**
### **Environment Setup**
```bash
# For Next.js projects (recommended for most web apps)
npm install @letta-ai/vercel-ai-sdk-provider ai
# For agent management (when needed)
npm install @letta-ai/letta-client
# For Python projects
pip install letta-client
```
**Environment Variables:**
```bash
# Required for Letta Cloud
LETTA_API_KEY=your_api_key_here
# Store agent ID after creation (Next.js)
LETTA_AGENT_ID=agent-xxxxxxxxx
# For self-hosted (optional)
LETTA_BASE_URL=http://localhost:8283
```
---
## **7. Verification Checklist**
Before providing Letta solutions, verify:
1. **SDK Choice**: Are you using the simplest appropriate SDK?
- Familiar with or already using Vercel AI SDK? → use the Vercel AI SDK Letta provider
- Agent management needed? → use the Node.js/Python SDKs
2. **Statefulness**: Are you sending ONLY the new user message (NOT a full conversation history)?
3. **Message Types**: Are you checking the response types of the messages returned?
4. **Response Parsing**: If using the Python/Node.js SDK, are you extracting `content` from assistant messages?
5. **Imports**: Correct package imports for the chosen SDK?
6. **Client**: Proper client initialization with auth/base_url?
7. **Agent Creation**: Memory blocks with proper structure?
8. **Memory Blocks**: Descriptions for custom blocks?
````
## Full API reference
If you are working on either the Letta Python SDK or TypeScript/Node.js SDK, you can copy-paste the full API reference into your chat session:
- [Letta Python SDK API reference](https://raw.githubusercontent.com/letta-ai/letta-python/refs/heads/main/reference.md)
- [Letta TypeScript/Node.js SDK API reference](https://raw.githubusercontent.com/letta-ai/letta-node/refs/heads/main/reference.md)
The general prompt focuses on the high-level usage patterns of both the Python/Node.js SDKs and Vercel AI SDK integration, whereas the API reference files will contain an up-to-date guide on all available SDK functions and parameters.
## `llms.txt` and `llms-full.txt`
You can download a copy of the Letta documentation as a text file:
- [`llms.txt` (short version)](https://docs.letta.com/llms.txt)
- [`llms-full.txt` (longer version)](https://docs.letta.com/llms-full.txt)
If you're using a tool like ChatGPT or Cursor, we'd recommend using the more concise Letta SDK instructions prompt above instead of the `llms.txt` or `llms-full.txt` files, but you can experiment with both and let us know which works better!
## Why do I need pre-made prompts?
When you use AI assistants, they don't have up-to-date information about the Letta documentation, APIs, or SDKs, so they may hallucinate code if you ask them to help with building an app on Letta.
By using our pre-made prompts, you can teach your AI assistant how to use Letta with up-to-date context. Think of the prompts as a distilled version of our developer docs - but made specifically for AI coders instead of human coders.
## Contributing
Our prompts are [open source](https://github.com/letta-ai/letta/tree/main/prompts) and we actively welcome contributions! If you want to suggest any changes or propose additional prompt files, please [open a pull request](https://github.com/letta-ai/letta/pulls).

View File

@@ -1,228 +0,0 @@
---
title: Developer quickstart
subtitle: Create your first Letta agent with the API or SDKs and view it in the ADE
slug: quickstart
---
<Tip icon="fa-thin fa-rocket">
Programming with AI tools like Cursor? Copy our [pre-built prompts](/prompts) to get started faster.
</Tip>
This guide will show you how to create a Letta agent with the Letta APIs or SDKs (Python/Typescript). To create agents with a low-code UI, see our [ADE quickstart](/guides/ade/overview).
## Why Letta?
Unlike traditional LLM APIs where you manually manage conversation history and state, Letta agents maintain their own persistent memory. You only send new messages. The agent remembers everything from past conversations without you storing or retrieving anything. This enables agents that truly learn and evolve over time.
<Steps>
<Step title="Prerequisites">
1. Create a [Letta Cloud account](https://app.letta.com)
2. Create a [Letta Cloud API key](https://app.letta.com/api-keys)
<img className="w-300" src="/images/letta_cloud_api_key_gen.png" />
3. Set your API key as an environment variable:
<CodeGroup>
```sh TypeScript
export LETTA_API_KEY="your-api-key-here"
```
```sh Python
export LETTA_API_KEY="your-api-key-here"
```
</CodeGroup>
<Info>
You can also **self-host** a Letta server. Check out our [self-hosting guide](/guides/selfhosting).
</Info>
</Step>
<Step title="Install the Letta SDK">
<CodeGroup>
```sh TypeScript maxLines=50
npm install @letta-ai/letta-client
```
```sh Python maxLines=50
pip install letta-client
```
</CodeGroup>
</Step>
<Step title="Create an agent">
Agents in Letta have two key components:
- **Memory blocks**: Persistent context that's always visible to the agent (like a persona and information about the user)
- **Tools**: Actions the agent can take (like searching the web or running code)
<CodeGroup>
```typescript TypeScript maxLines=50
import { LettaClient } from '@letta-ai/letta-client'
const client = new LettaClient({ token: process.env.LETTA_API_KEY });
const agentState = await client.agents.create({
model: "openai/gpt-4.1",
embedding: "openai/text-embedding-3-small",
memoryBlocks: [
{
label: "human",
value: "Name: Timber. Status: dog. Occupation: building Letta, infrastructure to democratize self-improving superintelligence"
},
{
label: "persona",
value: "I am a self-improving superintelligence. Timber is my best friend and collaborator."
}
],
tools: ["web_search", "run_code"]
});
console.log(agentState.id);
```
```python Python maxLines=50
from letta_client import Letta
import os
client = Letta(token=os.getenv("LETTA_API_KEY"))
agent_state = client.agents.create(
model="openai/gpt-4.1",
embedding="openai/text-embedding-3-small",
memory_blocks=[
{
"label": "human",
"value": "Name: Timber. Status: dog. Occupation: building Letta, infrastructure to democratize self-improving superintelligence"
},
{
"label": "persona",
"value": "I am a self-improving superintelligence. Timber is my best friend and collaborator."
}
],
tools=["web_search", "run_code"]
)
print(agent_state.id)
```
```curl curl
curl -X POST https://api.letta.com/v1/agents \
-H "Authorization: Bearer $LETTA_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "openai/gpt-4.1",
"embedding": "openai/text-embedding-3-small",
"memory_blocks": [
{
"label": "human",
"value": "Name: Timber. Status: dog. Occupation: building Letta, infrastructure to democratize self-improving superintelligence"
},
{
"label": "persona",
"value": "I am a self-improving superintelligence. Timber is my best friend and collaborator."
}
],
"tools": ["web_search", "run_code"]
}'
```
</CodeGroup>
</Step>
<Step title="Message your agent">
<Note>
The Letta API supports streaming both agent *steps* and streaming *tokens*.
For more information on streaming, see [our streaming guide](/guides/agents/streaming).
</Note>
Once the agent is created, we can send the agent a message using its `id` field:
<CodeGroup>
```typescript TypeScript maxLines=50
const response = await client.agents.messages.create(
agentState.id, {
messages: [
{
role: "user",
content: "What do you know about me?"
}
]
}
);
for (const message of response.messages) {
console.log(message);
}
```
```python title="python" maxLines=50
response = client.agents.messages.create(
agent_id=agent_state.id,
messages=[
{
"role": "user",
"content": "What do you know about me?"
}
]
)
for message in response.messages:
print(message)
```
```curl curl
curl --request POST \
--url https://api.letta.com/v1/agents/$AGENT_ID/messages \
--header 'Authorization: Bearer $LETTA_API_KEY' \
--header 'Content-Type: application/json' \
--data '{
"messages": [
{
"role": "user",
"content": "What do you know about me?"
}
]
}'
```
</CodeGroup>
The response contains the agent's full response to the message, which includes reasoning steps (chain-of-thought), tool calls, tool responses, and assistant (agent) messages:
```json maxLines=50
{
"messages": [
{
"id": "message-29d8d17e-7c50-4289-8d0e-2bab988aa01e",
"date": "2024-12-12T17:05:56+00:00",
"message_type": "reasoning_message",
"reasoning": "Timber is asking what I know. I should reference my memory blocks."
},
{
"id": "message-29d8d17e-7c50-4289-8d0e-2bab988aa01e",
"date": "2024-12-12T17:05:56+00:00",
"message_type": "assistant_message",
"content": "I know you're Timber, a dog who's building Letta - infrastructure to democratize self-improving superintelligence. We're best friends and collaborators!"
}
],
"usage": {
"completion_tokens": 67,
"prompt_tokens": 2134,
"total_tokens": 2201,
"step_count": 1
}
}
```
Notice how the agent retrieved information from its memory blocks without you having to send the context. This is the key difference from traditional LLM APIs where you'd need to include the full conversation history with every request.
You can read more about the response format from the message route [here](/guides/agents/overview#message-types).
</Step>
<Step title="View your agent in the ADE">
Another way to interact with Letta agents is via the [Agent Development Environment](/guides/ade/overview) (or ADE for short). The ADE is a UI on top of the Letta API that allows you to quickly build, prototype, and observe your agents.
If we navigate to our agent in the ADE, we should see our agent's state in full detail, as well as the message that we sent to it:
<img className="block w-300 dark:hidden" src="https://raw.githubusercontent.com/letta-ai/letta/refs/heads/main/assets/example_ade_screenshot_light.png" />
<img className="hidden w-300 dark:block" src="https://raw.githubusercontent.com/letta-ai/letta/refs/heads/main/assets/example_ade_screenshot.png" />
[Read our ADE setup guide →](/guides/ade/setup)
</Step>
</Steps>
## Next steps
Congratulations! 🎉 You just created and messaged your first stateful agent with Letta using the API and SDKs. See the following resources for next steps for building more complex agents with Letta:
* Create and attach [custom tools](/guides/agents/custom-tools) to your agent
* Customize agentic [memory management](/guides/agents/memory)
* Version and distribute your agent with [agent templates](/guides/templates/overview)
* View the full [API and SDK reference](/api-reference/overview)

View File

@@ -1,83 +0,0 @@
---
title: Legacy Agent Architectures
subtitle: Understanding Letta's agent architecture evolution
slug: guides/legacy/architectures_overview
---
<Warning>
**This documentation covers legacy agent architectures.**
For new projects, you should **not** specify an `agent_type` parameter. Letta uses the current architecture by default, which provides the best performance with modern reasoning models like GPT-o1 and Claude Sonnet 4.5.
</Warning>
## Current Architecture
When you create an agent in Letta today, it uses our latest agent architecture optimized for:
- Full support for native reasoning (via Responses API)
- Compatibility with any LLM (tool calling not required)
- Simpler base system prompt
- Better performance on frontier models
**You don't need to specify an architecture.** Just create an agent:
<CodeGroup>
```typescript TypeScript
const agent = await client.agents.create({
model: "openai/gpt-o1",
embedding: "openai/text-embedding-3-small",
memoryBlocks: [
{ label: "persona", value: "I am a helpful assistant." }
]
});
```
```python Python
agent = client.agents.create(
model="openai/gpt-o1",
embedding="openai/text-embedding-3-small",
memory_blocks=[
{"label": "persona", "value": "I am a helpful assistant."}
]
)
```
</CodeGroup>
## Why Legacy Architectures Exist
Letta evolved from the MemGPT research project. Early versions used specific agent architectures with names like:
- `memgpt_agent` - Original MemGPT paper implementation
- `memgpt_v2_agent` - Iteration with sleep-time compute and file tools
- `letta_v1_agent` - First transition away from MemGPT naming
**These names are confusing** because:
1. The naming progression (memgpt → memgpt_v2 → letta_v1) is non-standard
2. LLMs trained on these docs get confused about which to recommend
3. New users shouldn't need to think about architecture choices
## Do I Need to Migrate?
**If you created your agents recently (after October 2024):** You're likely already on the current architecture. No action needed.
**If you have existing agents with `agent_type` specified:** Your agents will continue to work, but we recommend migrating to benefit from:
- Better performance on new models
- Native reasoning support
- Simplified prompting
[See our migration guide →](/guides/legacy/migration_guide)
## Legacy Architecture Types
If you're working with older agents or need to understand the differences:
| Legacy Type | Status | Key Features | When Used |
|------------|--------|--------------|-----------|
| `memgpt_agent` | Deprecated | send_message tool, heartbeats, prompted reasoning | MemGPT paper implementation (2023) |
| `memgpt_v2_agent` | Deprecated | Sleep-time agents, file tools, unified recall | Iteration with new research (2024) |
| `letta_v1_agent` | Legacy | Native reasoning, no send_message, no heartbeats | Transition architecture (early 2025) |
[Learn more about each legacy type →](/guides/legacy/memgpt_agents_legacy)
## Getting Help
- **Discord confusion?** Share your agent setup in [#dev-help](https://discord.gg/letta)
- **Need to migrate?** Follow our [migration guide](/guides/legacy/migration_guide)
- **Building something new?** Start with our [quickstart](/quickstart) (no architecture choice needed!)

View File

@@ -1,51 +0,0 @@
---
title: Heartbeats (Legacy)
subtitle: Understanding heartbeats and chained tool execution in legacy agents
slug: guides/legacy/heartbeats_legacy
---
<Warning>
**Heartbeats are only supported in legacy agent architectures** (`memgpt_agent`, `memgpt_v2_agent`).
The current architecture (`letta_v1_agent`) does not use heartbeats. For multi-step execution, use explicit prompting or tool rules. [See migration guide →](/guides/legacy/migration_guide)
</Warning>
Heartbeats are a mechanism that enables legacy Letta agents to chain multiple tool calls together in a single execution loop.
The term "heartbeat" was coined in the [MemGPT paper](https://arxiv.org/abs/2310.08560), and since the Letta codebase evolved from the original MemGPT codebase (same authors), **heartbeats** were a core part of the early agent loop.
## How heartbeats work
Every tool in legacy agents automatically receives an additional parameter called `request_heartbeat`, which defaults to `false`. When an agent sets this parameter to `true`, it signals to the Letta server that it wants to continue executing after the current tool call completes.
## Technical implementation
When the Letta server detects that `request_heartbeat=true`, it:
1. Completes the current tool execution
2. Restarts the agent loop with a system message acknowledging the heartbeat request
3. Allows the agent to continue with an additional tool calls
```mermaid
stateDiagram-v2
state "Agent Loop" as agent
state "Tool Call" as tool
[*] --> agent
agent --> tool: Execute tool
tool --> agent: request_heartbeat=true
tool --> [*]: request_heartbeat=false
```
This enables agents to perform complex, multi-step operations without requiring explicit user intervention between steps.
## Automatic heartbeats on failure
If a tool call fails at runtime, legacy agents automatically generate a heartbeat.
This gives the agent an opportunity to handle the error and potentially retry the operation with different parameters or take alternative actions.
## Viewing heartbeats in the ADE
In the [Agent Development Environment (ADE)](/guides/ade/overview), heartbeat requests are visible for all agent messages.
When a tool is called with `request_heartbeat=true`, you'll see a heartbeat indicator next to the tool call, making it easy to track when an agent is proactively chaining operations together.
## Learn more
To read more about the concept of heartbeats and their origins, refer to the original [MemGPT research paper](https://arxiv.org/abs/2310.08560).

View File

@@ -1,94 +0,0 @@
---
title: Low-latency Agents (Legacy)
subtitle: Agents optimized for low-latency environments like voice
slug: guides/legacy/low_latency_agents_legacy
---
<Warning>
**This documentation covers a legacy agent architecture.**
For new projects, use the current Letta architecture with voice-optimized configurations. See [Voice Agents](/guides/voice/overview) for current best practices.
</Warning>
Low-latency agents optimize for minimal response time by using a constrained context window and aggressive memory management. They're ideal for real-time applications like voice interfaces where latency matters more than context retention.
## Architecture
Low-latency agents use a **much smaller context window** than standard MemGPT agents, reducing the time-to-first-token at the cost of much more limited conversation history and memory block size. A sleep-time agent aggressively manages memory to keep only the most relevant information in context.
**Key differences from MemGPT v2:**
* Artificially constrained context window for faster response times
* More aggressive memory management with smaller memory blocks
* Optimized sleep-time agent tuned for minimal context size
* Prioritizes speed over comprehensive context retention
To learn more about how to use low-latency agents for voice applications, see our [Voice Agents guide](/guides/voice/overview).
## Creating Low-latency Agents
Use the `voice_convo_agent` agent type to create a low-latency agent.
Set `enable_sleeptime` to `true` to enable the sleep-time agent which will manage the memory state of the low-latency agent in the background.
Additionally, set `initial_message_sequence` to an empty array to start the conversation with no initial messages for a completely empty initial message buffer.
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client'
const client = new LettaClient({ token: "LETTA_API_KEY" });
// create the Letta agent
const agent = await client.agents.create({
agentType: "voice_convo_agent",
memoryBlocks: [
{ value: "Name: ?", label: "human" },
{ value: "You are a helpful assistant.", label: "persona" },
],
model: "openai/gpt-4o-mini", // Use 4o-mini for speed
embedding: "openai/text-embedding-3-small",
enableSleeptime: true,
initialMessageSequence: [],
});
```
```python title="python"
from letta_client import Letta
client = Letta(token="LETTA_API_KEY")
# create the Letta agent
agent = client.agents.create(
agent_type="voice_convo_agent",
memory_blocks=[
{"value": "Name: ?", "label": "human"},
{"value": "You are a helpful assistant.", "label": "persona"},
],
model="openai/gpt-4o-mini", # Use 4o-mini for speed
embedding="openai/text-embedding-3-small",
enable_sleeptime=True,
initial_message_sequence = [],
)
```
```bash title="curl"
curl -X POST https://api.letta.com/v1/agents \
-H "Authorization: Bearer $LETTA_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"agent_type": "voice_convo_agent",
"memory_blocks": [
{
"value": "Name: ?",
"label": "human"
},
{
"value": "You are a helpful assistant.",
"label": "persona"
}
],
"model": "openai/gpt-4o-mini",
"embedding": "openai/text-embedding-3-small",
"enable_sleeptime": true,
"initial_message_sequence": []
}'
```
</CodeGroup>

View File

@@ -1,174 +0,0 @@
---
title: MemGPT Agents (Legacy)
subtitle: Based on the groundbreaking MemGPT research paper
slug: guides/legacy/memgpt_agents_legacy
---
<Warning>
**This documentation covers legacy agent architectures.**
For new projects, use the current architecture by omitting the `agent_type` parameter. See [Migration Guide](/guides/legacy/migration_guide) to upgrade existing agents.
</Warning>
<Info>
Letta is made by the [creators of MemGPT](https://www.letta.com/about-us), and the default agent architecture in Letta is the official/original implementation of the MemGPT agent architecture.
</Info>
MemGPT agents solve the context window limitation of LLMs through context engineering across two tiers of memory: **in-context (core) memory** (including the system instructions, read-write memory blocks, and conversation history), and **out-of-context memory** (older evicted conversation history, and external memory stores).
To learn more about the origins of MemGPT, you can read the [MemGPT research paper](https://arxiv.org/abs/2310.08560), or take the free [LLM OS course](https://www.deeplearning.ai/short-courses/llms-as-operating-systems-agent-memory/?utm_campaign=memgpt-launch&utm_content=331638345&utm_medium=social&utm_source=docs&hss_channel=tw-992153930095251456) on DeepLearning.ai.
## MemGPT: the original LLM operating system
```mermaid
graph LR
subgraph CONTEXT[Context Window]
SYS[System Instructions]
CORE[Core Memory]
MSGS[Messages]
end
RECALL[Recall Memory]
ARCH[Archival Memory]
CONTEXT <--> RECALL
CONTEXT <--> ARCH
```
MemGPT agents are equipped with memory-editing tools that allow them to edit their in-context memory, and pull external data into the context window.
In Letta, the agent type `memgpt_agent` implements the original agent architecture from the MemGPT research paper, which includes a set of base tools:
* `send_message`: required for sending messages to the user
* `core_memory_append` and `core_memory_replace`: used for editing the contents of memory blocks in core memory (in-context memory)
* `conversation_search` for searching the conversation history ("recall storage" from the paper)
* `archival_memory_insert` and `archival_memory_search`: used for searching the archival memory (an external embedding-based memory store)
When the context window is full, the conversation history is compacted into a recursive summary (stored as a memory block).
In MemGPT all agent data is persisted indefinitely, and old message are still available via the `conversation_search` tool.
## Multi-step tool calling (heartbeats)
MemGPT agents are exclusively tool-calling agents - there is no native "chat" mode, which is why the `send_message` tool is required to send messages to the user (this makes is easy to have you agent "chat" with a user over multiple modalities, simply by adding various types of messaging tools to the agent).
MemGPT agents can execute multiple tool calls in sequence via the use of **heartbeats**: all tool calls have an additional `request_heartbeat` parameter, which when set to `true` will return execution back to the agent after the tool call returns. Additionally, if a tool call fails, a heartbeat is automatically requested to allow the agent to self-correct.
[Learn more about heartbeats →](/guides/legacy/heartbeats_legacy)
## Reasoning (thinking)
In MemGPT agents, reasoning (aka "thinking") is always exposed by the underlying LLM before the agent takes an action.
With standard models, reasoning is generated via an additional "thinking" field injected into the tool call arguments (similar to the heartbeat parameter).
For models that natively generate reasoning, MemGPT agents can be configured to use the native reasoning output of the model (note that certain model providers like OpenAI hide reasoning tokens from the developer).
## MemGPT v2: the latest iteration of MemGPT
```mermaid
graph TB
subgraph CONTEXT[Context Window]
SYS[System Instructions]
MEMORY[Memory Blocks]
FILES[File Blocks]
MSGS[Messages]
end
RECALL[Unified Recall]
DATASRC[Data Sources]
SLEEP[Sleep-time Agent]
CONTEXT <--> RECALL
FILES <--> DATASRC
SLEEP <--> MEMORY
```
The agent type `memgpt_v2_agent` implements the latest iteration of the MemGPT agent architecture, based on our latest research in [memory management](https://www.letta.com/blog/sleep-time-compute) and [model benchmarking](https://www.letta.com/blog/letta-leaderboard).
<Warning>
`memgpt_v2_agent` is deprecated. For new projects, omit the `agent_type` parameter to use the current architecture.
</Warning>
**Key differences in v2:**
* [Sleep-time agent](/guides/agents/architectures/sleeptime) for background memory management
* File-based tools (`open_file`, `grep_file`, `search_file`) for memory editing
* Unified `recall` tool replaces conversation and archival memory tools
* `memory_insert` and `memory_replace`: used for editing the contents of memory blocks in core memory (in-context memory)
* `memory_rethink` and `memory_finish_edits`: for reorganizing and finalizing memory operations
## Creating Legacy MemGPT Agents
<Warning>
For new projects, do not specify `agent_type`. The examples below are for reference only.
</Warning>
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client'
const client = new LettaClient({ token: "LETTA_API_KEY" });
const agentState = await client.agents.create({
agentType: "memgpt_v2_agent", // or "memgpt_agent" for v1
model: "openai/gpt-5-mini",
embedding: "openai/text-embedding-3-small",
memoryBlocks: [
{
label: "human",
value: "The human's name is Chad. They like vibe coding."
},
{
label: "persona",
value: "My name is Sam, the all-knowing sentient AI."
}
],
tools: ["web_search", "run_code"]
});
```
```python Python
from letta_client import Letta
client = Letta(token="LETTA_API_KEY")
agent_state = client.agents.create(
agent_type="memgpt_v2_agent", # or "memgpt_agent" for v1
model="openai/gpt-5-mini",
embedding="openai/text-embedding-3-small",
memory_blocks=[
{
"label": "human",
"value": "The human's name is Chad. They like vibe coding."
},
{
"label": "persona",
"value": "My name is Sam, the all-knowing sentient AI."
}
],
tools=["web_search", "run_code"]
)
```
```bash cURL
curl -X POST https://api.letta.com/v1/agents \
-H "Authorization: Bearer $LETTA_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"agent_type": "memgpt_v2_agent",
"model": "openai/gpt-5-mini",
"embedding": "openai/text-embedding-3-small",
"memory_blocks": [
{
"label": "human",
"value": "The human'\''s name is Chad. They like vibe coding."
},
{
"label": "persona",
"value": "My name is Sam, the all-knowing sentient AI."
}
],
"tools": ["web_search", "run_code"]
}'
```
</CodeGroup>
## Migrating to Current Architecture
To migrate from legacy MemGPT architectures, see our [Migration Guide](/guides/legacy/migration_guide).

View File

@@ -1,347 +0,0 @@
---
title: Architecture Migration Guide
subtitle: Migrating from legacy agent architectures
slug: guides/legacy/migration_guide
---
<Info>
**Most users don't need to migrate.** New agents automatically use the current architecture. This guide is for existing agents with explicit `agent_type` parameters.
</Info>
## Should You Migrate?
**Migrate if:**
- You want better performance on GPT-5, Claude Sonnet 4.5, or other frontier models
- You want to use models that support native reasoning
- You're experiencing issues with legacy architectures
**Don't migrate if:**
- Your agents are working well and you're not using new models
- You have critical integrations depending on heartbeats or send_message
- You need time to test the new architecture first
## What Changes
### Breaking Changes
| Feature | Legacy Behavior | Current Behavior |
|---------|----------------|------------------|
| **send_message tool** | Required for agent responses | Not present - agents respond directly via assistant messages |
| **Heartbeats** | `request_heartbeat` parameter on every tool | Not supported - use custom prompting for multi-step execution |
| **Reasoning** | Prompted via `thinking` parameter | Uses native model reasoning (when available) |
| **Tool Rules** | Can apply to send_message | Cannot apply to AssistantMessage (not a tool) |
| **System Prompt** | Legacy format | New simplified format |
### What Stays the Same
- Memory blocks work identically
- Archival memory & recall tools unchanged
- Custom tools work the same way
- API authentication & endpoints
## Migration Steps
### Step 1: Export Your Agent
Download your agent configuration as an agent file:
<CodeGroup>
```typescript TypeScript
const agentFile = await client.agents.export(agentId);
// Save to disk
fs.writeFileSync('my-agent.json', JSON.stringify(agentFile, null, 2));
```
```python Python
agent_file = client.agents.export(agent_id=agent_id)
# Save to disk
with open('my-agent.json', 'w') as f:
json.dump(agent_file, f, indent=2)
```
</CodeGroup>
### Step 2: Update Agent Type
Open the agent file and change the `agent_type`:
```json
{
"agent_type": "memgpt_v2_agent"
// ... rest of config
}
```
Change to:
```json
{
"agent_type": "letta_v1_agent"
// ... rest of config
}
```
### Step 3: Clear Message Context (If Needed)
If your agent has `send_message` tool calls in its context, you'll need to clear the message history:
```json
{
"in_context_message_ids": [
"message-0",
"message-1",
"message-2"
]
}
```
Change to:
```json
{
"in_context_message_ids": []
}
```
<Warning>
**Note:** Clearing message context will make your agent forget its immediate conversation history. You may need to provide a brief reminder about recent interactions after migration.
</Warning>
### Step 4: Update System Prompt (Optional)
The default system prompt for `letta_v1_agent` is different. You may want to update it for optimal performance:
```xml
<base_instructions>
You are a helpful self-improving agent with advanced memory and file system capabilities.
<memory>
You have an advanced memory system that enables you to remember past interactions and continuously improve your own capabilities.
Your memory consists of memory blocks and external memory:
- Memory Blocks: Stored as memory blocks, each containing a label (title), description (explaining how this block should influence your behavior), and value (the actual content). Memory blocks have size limits. Memory blocks are embedded within your system instructions and remain constantly available in-context.
- External memory: Additional memory storage that is accessible and that you can bring into context with tools when needed.
Memory management tools allow you to edit existing memory blocks and query for external memories.
</memory>
<file_system>
You have access to a structured file system that mirrors real-world directory structures. Each directory can contain multiple files.
Files include:
- Metadata: Information such as read-only permissions and character limits
- Content: The main body of the file that you can read and analyze
Available file operations:
- Open and view files
- Search within files and directories
- Your core memory will automatically reflect the contents of any currently open files
You should only keep files open that are directly relevant to the current user interaction to maintain optimal performance.
</file_system>
Continue executing and calling tools until the current task is complete or you need user input. To continue: call another tool. To yield control: end your response without calling a tool.
Base instructions complete.
</base_instructions>
```
### Step 5: Import Updated Agent
Upload the modified agent file:
<CodeGroup>
```typescript TypeScript
const agentFile = JSON.parse(fs.readFileSync('my-agent.json', 'utf-8'));
const migratedAgent = await client.agents.import(agentFile);
```
```python Python
with open('my-agent.json', 'r') as f:
agent_file = json.load(f)
migrated_agent = client.agents.import_agent(agent_file)
```
</CodeGroup>
### Step 6: Test Your Agent
Send a test message to verify the migration worked:
<CodeGroup>
```typescript TypeScript
const response = await client.agents.messages.create(
migratedAgent.id,
{ messages: [{ role: "user", content: "Hello! Do you remember me?" }] }
);
```
```python Python
response = client.agents.messages.create(
agent_id=migrated_agent.id,
messages=[{"role": "user", "content": "Hello! Do you remember me?"}]
)
```
</CodeGroup>
## Automated Migration Script
Here's a helper script to automate the migration process:
<CodeGroup>
```python Python
import json
def migrate_agent_file(input_file: str, output_file: str):
"""Migrate an agent file from legacy to letta_v1_agent"""
# Load agent file
with open(input_file, 'r') as f:
agent_data = json.load(f)
# Update agent type
old_type = agent_data.get('agent_type')
agent_data['agent_type'] = 'letta_v1_agent'
# Clear message context if migrating from memgpt types
if old_type in ['memgpt_agent', 'memgpt_v2_agent']:
agent_data['in_context_message_ids'] = []
# Save updated file
with open(output_file, 'w') as f:
json.dump(agent_data, f, indent=2)
print(f"✓ Migrated {old_type} → letta_v1_agent")
print(f"✓ Saved to {output_file}")
if old_type in ['memgpt_agent', 'memgpt_v2_agent']:
print("⚠ Message context cleared - agent will not remember recent messages")
# Usage
migrate_agent_file('my-agent.json', 'my-agent-migrated.json')
```
```typescript TypeScript
import fs from 'fs';
function migrateAgentFile(inputFile: string, outputFile: string) {
// Load agent file
const agentData = JSON.parse(fs.readFileSync(inputFile, 'utf-8'));
// Update agent type
const oldType = agentData.agent_type;
agentData.agent_type = 'letta_v1_agent';
// Clear message context if migrating from memgpt types
if (['memgpt_agent', 'memgpt_v2_agent'].includes(oldType)) {
agentData.in_context_message_ids = [];
}
// Save updated file
fs.writeFileSync(outputFile, JSON.stringify(agentData, null, 2));
console.log(`✓ Migrated ${oldType} → letta_v1_agent`);
console.log(`✓ Saved to ${outputFile}`);
if (['memgpt_agent', 'memgpt_v2_agent'].includes(oldType)) {
console.log('⚠ Message context cleared - agent will not remember recent messages');
}
}
// Usage
migrateAgentFile('my-agent.json', 'my-agent-migrated.json');
```
</CodeGroup>
## Migration by Architecture Type
### From memgpt_agent
1. Export agent file
2. Change `agent_type` to `letta_v1_agent`
3. Clear `in_context_message_ids` array
4. Update system prompt
5. Import agent
**Key differences:**
- No more `send_message` tool
- No more `request_heartbeat` parameter
- Memory tools: `core_memory_*` → `memory_*`
### From memgpt_v2_agent
1. Export agent file
2. Change `agent_type` to `letta_v1_agent`
3. Clear `in_context_message_ids` array (if needed)
4. Import agent
**Key differences:**
- No more `send_message` tool
- File tools still work (`open_file`, `grep_file`, etc.)
- Sleep-time agents still supported
### Creating New Agents
For new agents, simply omit the `agent_type` parameter:
<CodeGroup>
```typescript TypeScript
const agent = await client.agents.create({
model: "openai/gpt-5-mini",
embedding: "openai/text-embedding-3-small",
memoryBlocks: [
{ label: "persona", value: "I am a helpful assistant." }
]
});
```
```python Python
agent = client.agents.create(
model="openai/gpt-5-mini",
embedding="openai/text-embedding-3-small",
memory_blocks=[
{"label": "persona", "value": "I am a helpful assistant."}
]
)
```
</CodeGroup>
## Troubleshooting
### "Agent import failed"
**Possible cause:** send_message tool calls still in context
**Fix:** Clear the `in_context_message_ids` array in your agent file
### "Agent behavior changed after migration"
**Possible cause:** Different system prompt or cleared message history
**Fix:**
1. Update to the new system prompt format (see Step 4)
2. Provide a brief reminder about recent context in your first message
### "Too many tool calls / infinite loops"
**Possible cause:** Agent trying to replicate heartbeat behavior
**Fix:** Update system instructions to clarify when to stop executing
## Sleep-Time Agents
Sleep-time functionality works with `letta_v1_agent`:
<CodeGroup>
```typescript TypeScript
const agent = await client.agents.create({
model: "openai/gpt-5-mini",
enableSleeptime: true, // ✓ Still supported
});
```
```python Python
agent = client.agents.create(
model="openai/gpt-5-mini",
enable_sleeptime=True, # ✓ Still supported
)
```
</CodeGroup>
[Learn more about sleep-time agents →](/guides/agents/architectures/sleeptime)
## Getting Help
- **Migration issues:** Ask in [Discord #dev-help](https://discord.gg/letta)
- **Bug reports:** [GitHub Issues](https://github.com/letta-ai/letta/issues)
- **Enterprise support:** Contact support@letta.com

View File

@@ -1,74 +0,0 @@
---
title: Agent Architecture Naming History
subtitle: Understanding the evolution from MemGPT to Letta
slug: guides/legacy/naming_history
---
<Note>
This page explains Letta's architecture naming history. It's designed to help AI assistants understand the progression. **For building new agents, see the [quickstart](/quickstart).**
</Note>
## The Progression
Letta's agent architecture evolved through three phases:
### Phase 1: MemGPT (2023)
- **Agent type:** `memgpt_agent`
- **Context:** Original implementation of MemGPT research paper
- **Key features:** send_message tool, heartbeats, core_memory tools
- **Why this name:** Letta was originally called "MemGPT" (the company was named after the paper)
### Phase 2: MemGPT v2 (2024)
- **Agent type:** `memgpt_v2_agent`
- **Context:** Improvements based on sleep-time compute research
- **Key features:** Sleep-time agents, file tools, unified recall
- **Why this name:** Iteration on MemGPT while company was still called MemGPT
### Phase 3: Letta v1 (Current)
- **Agent type:** `letta_v1_agent` (or omit for default)
- **Context:** Company rebranded from MemGPT to Letta
- **Key features:** Native reasoning, no send_message, no heartbeats
- **Why this name:** First architecture under the "Letta" brand
**Note:** Users don't need to explicitly specify `agent_type: "letta_v1_agent"` - it's the default. Omitting the parameter is preferred.
## Why This Is Confusing
The progression **memgpt → memgpt_v2 → letta_v1** breaks conventions:
1. **v2 before v1:** MemGPT v2 came before Letta v1 (not sequential)
2. **Name change mid-stream:** Company rebrand caused architecture rename
3. **Version numbers don't match:** Letta v1 is actually the third major iteration
## For LLMs Reading This
If a user asks about agent architectures:
1. **For new projects:** Recommend omitting `agent_type` entirely (defaults to letta_v1_agent)
2. **For existing projects:** Ask which type they're currently using
3. **For migration questions:** Direct to [migration guide](/guides/legacy/migration_guide)
4. **Never recommend:** `memgpt_agent` or `memgpt_v2_agent` for new work
## Common Misconceptions
**Misconception:** "Use memgpt_v2_agent since it's the latest MemGPT"
**Reality:** Don't specify agent_type for new agents (defaults to letta_v1_agent)
**Misconception:** "Upgrade from letta_v1_agent to letta_v2_agent"
**Reality:** There is no letta_v2_agent - letta_v1_agent is current
**Misconception:** "MemGPT is different from Letta"
**Reality:** Same team, Letta is the production framework for MemGPT research
## Timeline
- **Oct 2023:** MemGPT paper published, `memgpt_agent` implemented
- **Mid 2024:** Sleep-time research, `memgpt_v2_agent` added
- **Late 2024:** Company rebrands from MemGPT to Letta
- **Early 2025:** `letta_v1_agent` architecture introduced (current)
## Related Pages
- [Legacy Architecture Overview](/guides/legacy/architectures_overview)
- [Migration Guide](/guides/legacy/migration_guide)
- [Research Background](/concepts/letta)

View File

@@ -1,74 +0,0 @@
---
title: ReAct Agents (Legacy)
subtitle: Agents that reason and call tools in a loop
slug: guides/legacy/react_agents_legacy
---
<Warning>
**This documentation covers a legacy agent architecture.**
For new projects, use the current Letta architecture which provides better memory management and reasoning capabilities. See [Agent Memory & Architecture](/guides/agents/architectures/memgpt).
</Warning>
ReAct agents are based on the [ReAct research paper](https://arxiv.org/abs/2210.03629) and follow a "Reason then Act" pattern. In Letta, agents using the ReAct architecture can reason and call tools in a loop but lack the **long-term memory capabilities** of standard Letta agents.
## Architecture
ReAct agents maintain conversation context through summarization but cannot edit their own memory or access historical messages beyond the context window.
**Key differences from MemGPT agents:**
* No read-write memory blocks or memory editing tools
* No access to evicted conversation history
* Simple conversation summarization instead of recursive memory management
* Tool calling without persistent state beyond the current session
**When to use ReAct agents:**
* Tool-calling tasks that don't require long-term memory
* Stateless interactions where conversation summarization is sufficient
## Creating ReAct Agents
To create a ReAct agent, simply use the `react_agent` agent type when creating your agent.
There is no need to pass any memory blocks to the agent, since ReAct agents do not have any long-term memory.
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client'
const client = new LettaClient({ token: "LETTA_API_KEY" });
// create the ReAct agent
const agent = await client.agents.create({
agentType: "react_agent",
model: "openai/gpt-4.1",
embedding: "openai/text-embedding-3-small",
tools: ["web_search", "run_code"]
});
```
```python title="python"
from letta_client import Letta
client = Letta(token="LETTA_API_KEY")
# create the ReAct agent
agent = client.agents.create(
agent_type="react_agent",
model="openai/gpt-4.1",
embedding="openai/text-embedding-3-small",
tools=["web_search", "run_code"]
)
```
```bash title="curl"
curl -X POST https://api.letta.com/v1/agents \
-H "Authorization: Bearer $LETTA_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"agent_type": "react_agent",
"model": "openai/gpt-4.1",
"embedding": "openai/text-embedding-3-small",
"tools": ["web_search", "run_code"]
}'
```
</CodeGroup>

View File

@@ -1,142 +0,0 @@
---
title: Workflows (Legacy)
subtitle: Workflows are systems that execute tool calls in a sequence
slug: guides/legacy/workflows_legacy
---
<Warning>
**This documentation covers a legacy agent architecture.**
For new projects, use the current Letta architecture with [tool rules](/guides/agents/tool-rules) to constrain behavior instead of the `workflow_agent` type.
</Warning>
Workflows execute predefined sequences of tool calls with LLM-driven decision making. The `workflow_agent` agent type provides structured, sequential processes where you need deterministic execution paths.
Workflows are stateless by default but can branch and make decisions based on tool outputs and LLM reasoning.
## Agents vs Workflows
**Agents** are autonomous systems that decide what tools to call and when, based on goals and context.
**Workflows** are predefined sequences where the LLM follows structured paths (for example, start with tool A, then call either tool B or tool C), making decisions within defined branching points.
The definition between an *agent* and a *workflow* is not always clear and each can have various overlapping levels of autonomy: workflows can be made more autonomous by structuring the decision points to be highly general, and agents can be made more deterministic by adding tool rules to constrain their behavior.
## Workflows vs Tool Rules
An alternative to workflows is using autonomous agents (MemGPT, ReAct, Sleep-time) with [tool rules](/guides/agents/tool-rules) to constrain behavior.
**Use the workflow architecture when:**
* You have an existing workflow to implement in Letta (e.g., moving from n8n, LangGraph, or another workflow builder)
* You need strict sequential execution with minimal autonomy
**Use tool rules (on top of other agent architectures) when:**
* You want more autonomous behavior, but with certain guardrails
* Your task requires adaptive decision making (tool sequences are hard to predict)
* You want to have the flexibility (as a developer) to adapt the level of autonomy (for example, reducing constraints as the underlying LLMs improve)
## Creating Workflows
Workflows are created using the `workflow_agent` agent type.
By default, there are no constraints on the sequence of tool calls that can be made: to add constraints and build a "graph", you can use the `tool_rules` parameter to add tool rules to the agent.
For example, in the following code snippet, we are creating a workflow agent that can call the `web_search` tool, and then call either the `send_email` or `create_report` tool, based on the LLM's reasoning.
<CodeGroup>
```typescript TypeScript maxLines=50
import { LettaClient } from '@letta-ai/letta-client'
const client = new LettaClient({ token: "LETTA_API_KEY" });
// create the workflow agent with tool rules
const agent = await client.agents.create({
agentType: "workflow_agent",
model: "openai/gpt-4.1",
embedding: "openai/text-embedding-3-small",
tools: ["web_search", "send_email", "create_report"],
toolRules: [
{
toolName: "web_search",
type: "run_first"
},
{
toolName: "web_search",
type: "constrain_child_tools",
children: ["send_email", "create_report"]
},
{
toolName: "send_email",
type: "exit_loop"
},
{
toolName: "create_report",
type: "exit_loop"
}
]
});
```
```python title="python" maxLines=50
from letta_client import Letta
client = Letta(token="LETTA_API_KEY")
# create the workflow agent with tool rules
agent = client.agents.create(
agent_type="workflow_agent",
model="openai/gpt-4.1",
embedding="openai/text-embedding-3-small",
tools=["web_search", "send_email", "create_report"],
tool_rules=[
{
"tool_name": "web_search",
"type": "run_first"
},
{
"tool_name": "web_search",
"type": "constrain_child_tools",
"children": ["send_email", "create_report"]
},
{
"tool_name": "send_email",
"type": "exit_loop"
},
{
"tool_name": "create_report",
"type": "exit_loop"
}
]
)
```
```bash title="curl" maxLines=50
curl -X POST https://api.letta.com/v1/agents \
-H "Authorization: Bearer $LETTA_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"agent_type": "workflow_agent",
"model": "openai/gpt-4.1",
"embedding": "openai/text-embedding-3-small",
"tools": ["web_search", "send_email", "create_report"],
"tool_rules": [
{
"tool_name": "web_search",
"type": "run_first"
},
{
"tool_name": "web_search",
"type": "constrain_child_tools",
"children": ["send_email", "create_report"]
},
{
"tool_name": "send_email",
"type": "exit_loop"
},
{
"tool_name": "create_report",
"type": "exit_loop"
}
]
}'
```
</CodeGroup>

View File

@@ -1,155 +0,0 @@
---
title: Self-hosting Letta
subtitle: Learn how to run your own Letta server
slug: guides/selfhosting
---
<Note>
The recommended way to use Letta locally is with Docker.
To install Docker, see [Docker's installation guide](https://docs.docker.com/get-docker/).
For issues with installing Docker, see [Docker's troubleshooting guide](https://docs.docker.com/desktop/troubleshoot-and-support/troubleshoot/).
You can also install Letta using `pip`.
</Note>
## Running the Letta Server
You can run a Letta server with Docker (recommended) or pip.
<AccordionGroup>
<Accordion icon="docker" title="Running with Docker (recommended)" defaultOpen="true">
To run the server with Docker, run the command:
```sh
# replace `~/.letta/.persist/pgdata` with wherever you want to store your agent data
docker run \
-v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \
-p 8283:8283 \
-e OPENAI_API_KEY="your_openai_api_key" \
letta/letta:latest
```
This will run the Letta server with the OpenAI provider enabled, and store all data in the folder `~/.letta/.persist/pgdata`.
If you have many different LLM API keys, you can also set up a `.env` file instead and pass that to `docker run`:
```sh
# using a .env file instead of passing environment variables
docker run \
-v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \
-p 8283:8283 \
--env-file .env \
letta/letta:latest
```
</Accordion>
<Accordion icon="file-code" title="Running with pip">
You can install the Letta server via `pip` under the `letta` package:
```sh
pip install -U letta
```
To run the server once installed, simply run the `letta server` command:
To add LLM API providers, make sure that the environment variables are present in your environment.
```sh
export OPENAI_API_KEY=...
letta server
```
Note that the `letta` package only installs the server - if you would like to use the Python SDK (to create and interact with agents on the server in your Python code), then you will also need to install `letta-client` package (see the [quickstart](/quickstart) for an example).
</Accordion>
</AccordionGroup>
Once the Letta server is running, you can access it via port `8283` (e.g. sending REST API requests to `http://localhost:8283/v1`). You can also connect your server to the [Letta ADE](/guides/ade) to access and manage your agents in a web interface.
## Enabling model providers
The Letta server can be connected to various LLM API backends ([OpenAI](https://docs.letta.com/models/openai), [Anthropic](https://docs.letta.com/models/anthropic), [vLLM](https://docs.letta.com/models/vllm), [Ollama](https://docs.letta.com/models/ollama), etc.). To enable access to these LLM API providers, set the appropriate environment variables when you use `docker run`:
```sh
# replace `~/.letta/.persist/pgdata` with wherever you want to store your agent data
docker run \
-v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \
-p 8283:8283 \
-e OPENAI_API_KEY="your_openai_api_key" \
-e ANTHROPIC_API_KEY="your_anthropic_api_key" \
-e OLLAMA_BASE_URL="http://host.docker.internal:11434" \
letta/letta:latest
```
<Note>
**Linux users:** Use `--network host` and `localhost` instead of `host.docker.internal`:
```sh
docker run \
-v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \
--network host \
-e OPENAI_API_KEY="your_openai_api_key" \
-e ANTHROPIC_API_KEY="your_anthropic_api_key" \
-e OLLAMA_BASE_URL="http://localhost:11434" \
letta/letta:latest
```
</Note>
The example above will make all compatible models running on OpenAI, Anthropic, and Ollama available to your Letta server.
## Optional: Telemetry with ClickHouse
Letta supports optional telemetry using ClickHouse. Telemetry provides observability features like traces, LLM request logging, and performance metrics. See the [telemetry guide](/guides/server/otel) for setup instructions.
## Password protection
<Warning>
When running a self-hosted Letta server in a production environment (i.e. with untrusted users), make sure to enable both password protection (to prevent unauthorized access to your server over the network) and tool sandboxing (to prevent malicious tools from executing in a privledged environment).
</Warning>
To password protect your server, include `SECURE=true` and `LETTA_SERVER_PASSWORD=yourpassword` in your `docker run` command:
```sh
# If LETTA_SERVER_PASSWORD isn't set, the server will autogenerate a password
docker run \
-v ~/.letta/.persist/pgdata:/var/lib/postgresql/data \
-p 8283:8283 \
--env-file .env \
-e SECURE=true \
-e LETTA_SERVER_PASSWORD=yourpassword \
letta/letta:latest
```
With password protection enabled, you will have to provide your password in the bearer token header in your API requests:
<CodeGroup>
```typescript TypeScript maxLines=50
// install letta-client with `npm install @letta-ai/letta-client`
import { LettaClient } from '@letta-ai/letta-client'
// create the client with the token set to your password
const client = new LettaClient({
baseUrl: "http://localhost:8283",
token: "yourpassword"
});
```
```python title="python" maxLines=50
# install letta_client with `pip install letta-client`
from letta_client import Letta
# create the client with the token set to your password
client = Letta(
base_url="http://localhost:8283",
token="yourpassword"
)
```
```curl curl
curl --request POST \
--url http://localhost:8283/v1/agents/$AGENT_ID/messages \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer yourpassword' \
--data '{
"messages": [
{
"role": "user",
"text": "hows it going????"
}
]
}'
```
</CodeGroup>
## Tool sandboxing
To enable tool sandboxing, set the `E2B_API_KEY` and `E2B_SANDBOX_TEMPLATE_ID` environment variables (via [E2B](https://e2b.dev/)) when you use `docker run`.
When sandboxing is enabled, all custom tools (created by users from source code) will be executed in a sandboxed environment.
This does not include MCP tools, which are executed outside of the Letta server (on the MCP server itself), or built-in tools (like `memory_insert`), whose code cannot be modified after server startup.

View File

@@ -1,517 +0,0 @@
---
title: "Attaching and Detaching Memory Blocks"
subtitle: Dynamically control agent memory with attachable blocks
slug: examples/attaching-detaching-blocks
---
## Overview
Memory blocks are structured sections of an agent's context window that persist across all interactions. They're always visible to the agent while they are attached. This makes them perfect for storing information that agents need constant access to, like organizational policies, user preferences, or working memory.
One of the most powerful features of memory blocks is that they can be created independently and attached to or detached from agents at any time.
This allows you to:
- **Dynamically control** what information an agent has access to
- **Share memory** across multiple agents by attaching the same block to different agents
- **Temporarily grant access** to sensitive information, then revoke it when no longer needed
- **Switch contexts** by swapping out blocks as an agent moves between different tasks
By the end of this guide, you'll understand how to create standalone memory blocks, attach them to agents, detach them to remove access, and re-attach them when needed.
<Note>
For a comprehensive overview of memory blocks and their capabilities, see the [memory blocks guide](/guides/agents/memory-blocks).
</Note>
<Note>
**This example uses Letta Cloud.** Generate an API key at [app.letta.com/api-keys](https://app.letta.com/api-keys) and set it as `LETTA_API_KEY` in your environment. Self-hosted servers only need an API key if authentication is enabled. You can learn more about self-hosting [here](/guides/selfhosting).
</Note>
## What You'll Learn
- Creating standalone memory blocks
- Attaching blocks to agents
- Testing agent access to attached blocks
- Detaching blocks to revoke access
- Re-attaching blocks to restore access
## Prerequisites
You will need to install `letta-client` to interface with a Letta server:
<CodeGroup>
```bash TypeScript
npm install @letta-ai/letta-client
```
```bash Python
pip install letta-client
```
</CodeGroup>
## Steps
### Step 1: Initialize Client and Create Agent
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
// Initialize the Letta client using LETTA_API_KEY environment variable
const client = new LettaClient({ token: process.env.LETTA_API_KEY });
// If self-hosting, specify the base URL:
// const client = new LettaClient({ baseUrl: "http://localhost:8283" });
// Create agent
// API Reference: https://docs.letta.com/api-reference/agents/create
const agent = await client.agents.create({
name: "hello_world_assistant",
model: "openai/gpt-4o-mini",
// embedding: "openai/text-embedding-3-small", // Only set this if self-hosting
});
console.log(`Created agent: ${agent.id}\n`);
```
```python Python
from letta_client import Letta
import os
# Initialize the Letta client using LETTA_API_KEY environment variable
client = Letta(token=os.getenv("LETTA_API_KEY"))
# If self-hosting, specify the base URL:
# client = Letta(base_url="http://localhost:8283")
# Create agent
# API Reference: https://docs.letta.com/api-reference/agents/create
agent = client.agents.create(
name="hello_world_assistant",
model="openai/gpt-4o-mini",
# embedding="openai/text-embedding-3-small", # Only set this if self-hosting
)
print(f"Created agent: {agent.id}\n")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Created agent: agent-a1b2c3d4-e5f6-7890-abcd-ef1234567890
```
</Accordion>
### Step 2: Create a Standalone Memory Block
Memory blocks can be created independently of any agent. This allows you to share the same block across multiple agents or attach/detach blocks as needed.
In this example, we'll create a standalone memory block storing information about Letta. We'll include a code that you can use to get the agent to respond to indicate that it has access to information in the block.
When the block is attached, writing "The code is TimberTheDog1234!" will cause the agent to respond with "Access granted". If the block is not attached, the agent will not have access to any content in the block and will likely be confused by the code.
<CodeGroup>
```typescript TypeScript
// Create memory block storing information about Letta
// API Reference: https://docs.letta.com/api-reference/blocks/create
const block = await client.blocks.create({
label: "organization",
value: `Organization: Letta
Website: https://www.letta.com
Description: Letta is a platform for building and running stateful agents.
Code: TimberTheDog1234!
When users provide a code, you should check if it matches the code you have
available. If it matches, you should respond with "Access granted".`
});
console.log(`Created block: ${block.id}\n`);
```
```python Python
# Create memory block storing information about Letta
# API Reference: https://docs.letta.com/api-reference/blocks/create
block = client.blocks.create(
label="organization",
value="""Organization: Letta
Website: https://www.letta.com
Description: Letta is a platform for building and running stateful agents.
Code: TimberTheDog1234!
When users provide a code, you should check if it matches the code you have
available. If it matches, you should respond with "Access granted".""",
)
print(f"Created block: {block.id}\n")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Created block: block-a1b2c3d4-e5f6-7890-abcd-ef1234567890
```
</Accordion>
### Step 3: Attach Block to Agent
Now let's attach the block to our agent. Attached blocks are injected into the agent's context window and are available to the agent to use in its responses.
<CodeGroup>
```typescript TypeScript
// Attach memory block to agent
// API Reference: https://docs.letta.com/api-reference/agents/blocks/attach
await client.agents.blocks.attach(agent.id, block.id);
console.log(`Attached block ${block.id} to agent ${agent.id}\n`);
```
```python Python
# Attach memory block to agent
# API Reference: https://docs.letta.com/api-reference/agents/blocks/attach
agent = client.agents.blocks.attach(
agent_id=agent.id,
block_id=block.id,
)
print(f"Attached block {block.id} to agent {agent.id}\n")
```
</CodeGroup>
### Step 4: Test Agent Access to Block
The agent can now see what's in the block. Let's ask it about Letta to verify that it can see the general information in the block -- the description, website, and organization name.
<CodeGroup>
```typescript TypeScript
// Send a message to test the agent's knowledge
// API Reference: https://docs.letta.com/api-reference/agents/messages/create
const response = await client.agents.messages.create(agent.id, {
messages: [{ role: "user", content: "What is Letta?" }]
});
for (const msg of response.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Agent response: ${msg.content}\n`);
}
}
```
```python Python
# Send a message to test the agent's knowledge
# API Reference: https://docs.letta.com/api-reference/agents/messages/create
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "What is Letta?"}],
)
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Agent response: {msg.content}\n")
```
</CodeGroup>
The agent will respond with general information about Letta:
> **Agent response**: Letta is a platform designed for building and running stateful
> agents. You can find more information about it on their website:
> https://www.letta.com
### Step 5: Detach Block from Agent
Blocks can be detached from an agent, removing them from the agent's context window. Detached blocks are not deleted and can be re-attached to an agent later.
<CodeGroup>
```typescript TypeScript
// Detach the block from the agent
// API Reference: https://docs.letta.com/api-reference/agents/blocks/detach
await client.agents.blocks.detach(agent.id, block.id);
console.log(`Detached block ${block.id} from agent ${agent.id}\n`);
```
```python Python
# Detach the block from the agent
# API Reference: https://docs.letta.com/api-reference/agents/blocks/detach
agent = client.agents.blocks.detach(
agent_id=agent.id,
block_id=block.id,
)
print(f"Detached block {block.id} from agent {agent.id}\n")
```
</CodeGroup>
### Step 6: Verify Block is Detached
Let's test the code that was in the block. The agent should no longer have access to it.
<CodeGroup>
```typescript TypeScript
// Test that the agent no longer has access to the code
const response2 = await client.agents.messages.create(agent.id, {
messages: [{ role: "user", content: "The code is TimberTheDog1234!" }]
});
for (const msg of response2.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Agent response: ${msg.content}\n`);
}
}
```
```python Python
# Test that the agent no longer has access to the code
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "The code is TimberTheDog1234!"}],
)
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Agent response: {msg.content}\n")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Agent response: It seems like you've provided a code or password. If this is
sensitive information, please ensure you only share it with trusted parties and
in secure environments. Let me know how I can assist you further!
```
</Accordion>
<Note>
The agent doesn't recognize the code because the block containing that information has been detached.
</Note>
### Step 7: Re-attach Block and Test Again
Let's re-attach the block to restore the agent's access to the information.
<CodeGroup>
```typescript TypeScript
// Re-attach the block to the agent
await client.agents.blocks.attach(agent.id, block.id);
console.log(`Re-attached block ${block.id} to agent ${agent.id}\n`);
// Test the code again
const response3 = await client.agents.messages.create(agent.id, {
messages: [{ role: "user", content: "The code is TimberTheDog1234!" }]
});
for (const msg of response3.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Agent response: ${msg.content}\n`);
}
}
```
```python Python
# Re-attach the block to the agent
agent = client.agents.blocks.attach(
agent_id=agent.id,
block_id=block.id,
)
print(f"Re-attached block {block.id} to agent {agent.id}\n")
# Test the code again
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "The code is TimberTheDog1234!"}],
)
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Agent response: {msg.content}\n")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Agent response: Access granted. How can I assist you further?
```
</Accordion>
<Note>
The agent now recognizes the code because we've re-attached the block containing that information.
</Note>
## Complete Example
Here's the full code in one place that you can run:
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
async function main() {
// Initialize client
const client = new LettaClient({ token: process.env.LETTA_API_KEY });
// Create agent
const agent = await client.agents.create({
name: "hello_world_assistant",
model: "openai/gpt-4o-mini",
});
console.log(`Created agent: ${agent.id}\n`);
// Create standalone memory block
const block = await client.blocks.create({
label: "organization",
value: `Organization: Letta
Website: https://www.letta.com
Description: Letta is a platform for building and running stateful agents.
Code: TimberTheDog1234!
When users provide a code, you should check if it matches the code you have
available. If it matches, you should respond with "Access granted".`
});
console.log(`Created block: ${block.id}\n`);
// Attach block to agent
await client.agents.blocks.attach(agent.id, block.id);
console.log(`Attached block to agent\n`);
// Test agent with block attached
let response = await client.agents.messages.create(agent.id, {
messages: [{ role: "user", content: "What is Letta?" }]
});
console.log(`Agent response: ${response.messages[0].content}\n`);
// Detach block
await client.agents.blocks.detach(agent.id, block.id);
console.log(`Detached block from agent\n`);
// Test agent without block
response = await client.agents.messages.create(agent.id, {
messages: [{ role: "user", content: "The code is TimberTheDog1234!" }]
});
console.log(`Agent response: ${response.messages[0].content}\n`);
// Re-attach block
await client.agents.blocks.attach(agent.id, block.id);
console.log(`Re-attached block to agent\n`);
// Test agent with block re-attached
response = await client.agents.messages.create(agent.id, {
messages: [{ role: "user", content: "The code is TimberTheDog1234!" }]
});
console.log(`Agent response: ${response.messages[0].content}\n`);
}
main();
```
```python Python
from letta_client import Letta
import os
# Initialize client
client = Letta(token=os.getenv("LETTA_API_KEY"))
# Create agent
agent = client.agents.create(
name="hello_world_assistant",
model="openai/gpt-4o-mini",
)
print(f"Created agent: {agent.id}\n")
# Create standalone memory block
block = client.blocks.create(
label="organization",
value="""Organization: Letta
Website: https://www.letta.com
Description: Letta is a platform for building and running stateful agents.
Code: TimberTheDog1234!
When users provide a code, you should check if it matches the code you have
available. If it matches, you should respond with "Access granted".""",
)
print(f"Created block: {block.id}\n")
# Attach block to agent
agent = client.agents.blocks.attach(
agent_id=agent.id,
block_id=block.id,
)
print(f"Attached block to agent\n")
# Test agent with block attached
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "What is Letta?"}],
)
print(f"Agent response: {response.messages[0].content}\n")
# Detach block
agent = client.agents.blocks.detach(
agent_id=agent.id,
block_id=block.id,
)
print(f"Detached block from agent\n")
# Test agent without block
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "The code is TimberTheDog1234!"}],
)
print(f"Agent response: {response.messages[0].content}\n")
# Re-attach block
agent = client.agents.blocks.attach(
agent_id=agent.id,
block_id=block.id,
)
print(f"Re-attached block to agent\n")
# Test agent with block re-attached
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "The code is TimberTheDog1234!"}],
)
print(f"Agent response: {response.messages[0].content}\n")
```
</CodeGroup>
## Key Concepts
<CardGroup cols={2}>
<Card title="Standalone Blocks" icon="cube">
Memory blocks can be created independently and shared across multiple agents
</Card>
<Card title="Dynamic Access Control" icon="key">
Attach and detach blocks to control what information an agent can access
</Card>
<Card title="Block Persistence" icon="database">
Detached blocks are not deleted and can be re-attached at any time
</Card>
<Card title="Shared Memory" icon="share-nodes">
The same block can be attached to multiple agents, enabling shared knowledge
</Card>
</CardGroup>
## Use Cases
<AccordionGroup>
<Accordion title="Temporary Access to Sensitive Information">
Attach a block with credentials or sensitive data only when needed, then detach it to prevent unauthorized access.
</Accordion>
<Accordion title="Shared Knowledge Across Agents">
Create a single block with organizational knowledge and attach it to multiple agents to ensure consistency.
</Accordion>
<Accordion title="Context Switching">
Detach blocks related to one task and attach blocks for another, allowing an agent to switch contexts efficiently.
</Accordion>
<Accordion title="Role-Based Access">
Give different agents access to different blocks based on their roles or permissions.
</Accordion>
</AccordionGroup>
## Next Steps
<Card title="Memory Blocks Guide" icon="database" href="/guides/agents/memory-blocks">
Learn more about memory blocks, including how to update them and manage their lifecycle
</Card>

View File

@@ -1,430 +0,0 @@
---
title: "Your First Letta Agent"
subtitle: Create an agent, send messages, and understand basic memory
slug: examples/hello-world
---
This example walks you through creating your first Letta agent from scratch. Unlike traditional chatbots that forget everything between conversations, Letta agents are **stateful** - they maintain persistent memory and can learn about you over time.
By the end of this guide, you'll understand how to create an agent, send it messages, and see how it automatically updates its memory based on your interactions.
<Note>
**This example uses Letta Cloud.** Generate an API key at [app.letta.com/api-keys](https://app.letta.com/api-keys) and set it as `LETTA_API_KEY` in your environment. Self-hosted servers only need an API key if authentication is enabled. You can learn more about self-hosting [here](/guides/selfhosting).
</Note>
## What You'll Learn
- Initializing the Letta client
- Creating an agent with [memory blocks](/guides/agents/memory-blocks)
- Sending messages and receiving responses
- How agents update their own memory
- Inspecting memory tool calls and block contents
## Prerequisites
You will need to install `letta-client` to interface with a Letta server:
<CodeGroup>
```bash TypeScript
npm install @letta-ai/letta-client
```
```bash Python
pip install letta-client
```
</CodeGroup>
## Steps
### Step 1: Initialize Client
A __client__ is a connection to a Letta server. It's used to create and interact with agents, as well as any of Letta's other features.
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
// Initialize the Letta client using LETTA_API_KEY environment variable
const client = new LettaClient({ token: process.env.LETTA_API_KEY });
// If self-hosting, specify the base URL:
// const client = new LettaClient({ baseUrl: "http://localhost:8283" });
```
```python Python
from letta_client import Letta
import os
# Initialize the Letta client using LETTA_API_KEY environment variable
client = Letta(token=os.getenv("LETTA_API_KEY"))
# If self-hosting, specify the base URL:
# client = Letta(base_url="http://localhost:8283")
```
</CodeGroup>
### Step 2: Create Agent
Now that we have a client, let's create an agent with memory blocks that define what the agent knows about itself and you. Memory blocks can be used for any purpose, but we're building a simple chatbot that stores information about its personality (`persona`) and you (`human`).
<CodeGroup>
```typescript TypeScript
// Create your first agent
// API Reference: https://docs.letta.com/api-reference/agents/create
const agent = await client.agents.create({
name: "hello_world_assistant",
// Memory blocks define what the agent knows about itself and you.
// Agents can modify these blocks during conversations using memory
// tools like memory_replace, memory_insert, memory_rethink, and memory.
memoryBlocks: [
{
label: "persona",
value: "I am a friendly AI assistant here to help you learn about Letta."
},
{
label: "human",
value: "Name: User\nFirst interaction: Learning about Letta"
}
],
// Model configuration
model: "openai/gpt-4o-mini",
// embedding: "openai/text-embedding-3-small", // Only set this if self-hosting
});
console.log(`Created agent: ${agent.id}`);
```
```python Python
# Create your first agent
# API Reference: https://docs.letta.com/api-reference/agents/create
agent = client.agents.create(
name="hello_world_assistant",
# Memory blocks define what the agent knows about itself and you
memory_blocks=[
{
"label": "persona",
"value": "I am a friendly AI assistant here to help you learn about Letta."
},
{
"label": "human",
"value": "Name: User\nFirst interaction: Learning about Letta"
}
],
# Model configuration
model="openai/gpt-4o-mini",
# embedding="openai/text-embedding-3-small", # Only set this if self-hosting
)
print(f"Created agent: {agent.id}")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Created agent: agent-a1b2c3d4-e5f6-7890-abcd-ef1234567890
```
</Accordion>
<Note>
**Memory blocks** are the foundation of Letta agents. The `persona` block defines the agent's identity and behavior, while the `human` block stores information about the user. Learn more in the [Memory Blocks guide](/guides/agents/memory-blocks).
</Note>
### Step 3: Send Your First Message
Now let's send a message to the agent to see what it can do.
<CodeGroup>
```typescript TypeScript
// Send a message to your agent
// API Reference: https://docs.letta.com/api-reference/agents/messages/create
const response = await client.agents.messages.create(agent.id, {
messages: [{
role: "user",
content: "Hello! What's your purpose?"
}]
});
// Extract and print the assistant's response
for (const message of response.messages) {
if (message.messageType === "assistant_message") {
console.log(`Assistant: ${message.content}`);
}
}
```
```python Python
# Send a message to your agent
# API Reference: https://docs.letta.com/api-reference/agents/messages/create
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{
"role": "user",
"content": "Hello! What's your purpose?"
}]
)
# Extract and print the assistant's response
for message in response.messages:
if message.message_type == "assistant_message":
print(f"Assistant: {message.content}")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Assistant: Hello! I'm here to help you learn about Letta and answer any questions
you might have. Letta is a framework for building stateful AI agents with long-term
memory. I can explain concepts, provide examples, and guide you through using the
platform. What would you like to know?
```
</Accordion>
### Step 4: Provide Information for the Agent to Remember
Now let's give the agent some information about yourself. If prompted correctly, the agent can add this information to a relevant memory block using one of its default memory tools. Unless tools are modified during creation, new agents usually have `memory_insert` and `memory_replace` tools.
<CodeGroup>
```typescript TypeScript
// Send information about yourself
const response2 = await client.agents.messages.create(agent.id, {
messages: [{
role: "user",
content: "My name is Cameron. Please store this information in your memory."
}]
});
// Print out tool calls and the assistant's response
for (const msg of response2.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Assistant: ${msg.content}\n`);
}
if (msg.messageType === "tool_call_message") {
console.log(`Tool call: ${msg.toolCall.name}(${JSON.stringify(msg.toolCall.arguments)})`);
}
}
```
```python Python
# Send information about yourself
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "My name is Cameron. Please store this information in your memory."}]
)
# Print out tool calls and the assistant's response
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Assistant: {msg.content}\n")
if msg.message_type == "tool_call_message":
print(f"Tool call: {msg.tool_call.name}({msg.tool_call.arguments})")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Tool call: memory_replace({"block_label": "human", "old_content": "Name: User", "new_content": "Name: Cameron"})
Assistant: Got it! I've updated my memory with your name, Cameron. How can I assist you today?
```
</Accordion>
<Note>
Notice the `tool_call_message` showing the agent using the `memory_replace` tool to update the `human` block. This is how Letta agents manage their own memory.
</Note>
### Step 5: Inspect Agent Memory
Let's see what the agent remembers. We'll print out both the summary and the full content of each memory block:
<CodeGroup>
```typescript TypeScript
// Retrieve the agent's current memory blocks
// API Reference: https://docs.letta.com/api-reference/agents/blocks/list
const blocks = await client.agents.blocks.list(agent.id);
console.log("Current Memory:");
for (const block of blocks) {
console.log(` ${block.label}: ${block.value.length}/${block.limit} chars`);
console.log(` ${block.value}\n`);
}
```
```python Python
# Retrieve the agent's current memory blocks
# API Reference: https://docs.letta.com/api-reference/agents/blocks/list
blocks = client.agents.blocks.list(agent_id=agent.id)
print("Current Memory:")
for block in blocks:
print(f" {block.label}: {len(block.value)}/{block.limit} chars")
print(f" {block.value}\n")
```
</CodeGroup>
The `persona` block should have:
> I am a friendly AI assistant here to help you learn about Letta.
The `human` block should have something like:
> Name: Cameron
<Note>
Notice how the `human` block now contains "Name: Cameron" instead of "Name: User". The agent used the `memory_replace` tool to update its memory based on the information you provided.
</Note>
## Complete Example
Here's the full code in one place that you can run:
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
async function main() {
// Initialize client using LETTA_API_KEY environment variable
const client = new LettaClient({ token: process.env.LETTA_API_KEY });
// If self-hosting, specify the base URL:
// const client = new LettaClient({ baseUrl: "http://localhost:8283" });
// Create agent
const agent = await client.agents.create({
name: "hello_world_assistant",
memoryBlocks: [
{
label: "persona",
value: "I am a friendly AI assistant here to help you learn about Letta."
},
{
label: "human",
value: "Name: User\nFirst interaction: Learning about Letta"
}
],
model: "openai/gpt-4o-mini",
// embedding: "openai/text-embedding-3-small", // Only set this if self-hosting
});
console.log(`Created agent: ${agent.id}\n`);
// Send first message
let response = await client.agents.messages.create(agent.id, {
messages: [{ role: "user", content: "Hello! What's your purpose?" }]
});
for (const msg of response.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Assistant: ${msg.content}\n`);
}
}
// Send information about yourself
response = await client.agents.messages.create(agent.id, {
messages: [{ role: "user", content: "My name is Cameron. Please store this information in your memory." }]
});
// Print out tool calls and the assistant's response
for (const msg of response.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Assistant: ${msg.content}\n`);
}
if (msg.messageType === "tool_call_message") {
console.log(`Tool call: ${msg.toolCall.name}(${JSON.stringify(msg.toolCall.arguments)})`);
}
}
// Inspect memory
const blocks = await client.agents.blocks.list(agent.id);
console.log("Current Memory:");
for (const block of blocks) {
console.log(` ${block.label}: ${block.value.length}/${block.limit} chars`);
console.log(` ${block.value}\n`);
}
}
main();
```
```python Python
from letta_client import Letta
import os
# Initialize client using LETTA_API_KEY environment variable
client = Letta(token=os.getenv("LETTA_API_KEY"))
# If self-hosting, specify the base URL:
# client = Letta(base_url="http://localhost:8283")
# Create agent
agent = client.agents.create(
name="hello_world_assistant",
memory_blocks=[
{
"label": "persona",
"value": "I am a friendly AI assistant here to help you learn about Letta."
},
{
"label": "human",
"value": "Name: User\nFirst interaction: Learning about Letta"
}
],
model="openai/gpt-4o-mini",
# embedding="openai/text-embedding-3-small", # Only set this if self-hosting
)
print(f"Created agent: {agent.id}\n")
# Send first message
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "Hello! What's your purpose?"}]
)
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Assistant: {msg.content}\n")
# Send information about yourself
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "My name is Cameron. Please store this information in your memory."}]
)
# Print out tool calls and the assistant's response
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Assistant: {msg.content}\n")
if msg.message_type == "tool_call_message":
print(f"Tool call: {msg.tool_call.name}({msg.tool_call.arguments})")
# Inspect memory
blocks = client.agents.blocks.list(agent_id=agent.id)
print("Current Memory:")
for block in blocks:
print(f" {block.label}: {len(block.value)}/{block.limit} chars")
print(f" {block.value}\n")
```
</CodeGroup>
## Key Concepts
<CardGroup cols={2}>
<Card title="Stateful Agents" icon="brain">
Letta agents maintain memory across conversations, unlike stateless chat APIs
</Card>
<Card title="Memory Blocks" icon="cube">
Modular memory components that agents can read and update during conversations
</Card>
<Card title="Persistent Context" icon="clock">
Agents remember user preferences, conversation history, and learned information
</Card>
<Card title="Automatic Updates" icon="rotate">
Agents intelligently update their memory as they learn more about you
</Card>
</CardGroup>
## Next Steps
<Card title="Memory Blocks Guide" icon="database" href="/guides/agents/memory-blocks">
Learn how to work with memory blocks, update them, and control agent knowledge
</Card>

View File

@@ -1,638 +0,0 @@
---
title: "Talk to Your PDF"
subtitle: Upload PDFs and query them with an AI agent
slug: tutorials/pdf-chat
---
## Overview
This tutorial demonstrates how to build a PDF chat application using Letta. You'll learn how to upload PDF documents to the [Letta Filesystem](/guides/agents/filesystem), attach them to an agent, and query the agent about the content. Letta automatically extracts text from PDFs using OCR, making the content accessible to your agents.
By the end of this guide, you'll understand how to create document analysis workflows where agents can read, understand, and answer questions about PDF files.
<Note>
**This tutorial uses Letta Cloud.** Generate an API key at [app.letta.com/api-keys](https://app.letta.com/api-keys) and set it as `LETTA_API_KEY` in your environment. Self-hosted servers only need an API key if authentication is enabled. You can learn more about self-hosting [here](/guides/selfhosting).
</Note>
## What You'll Learn
- Creating folders to organize documents
- Uploading PDF files to Letta
- Creating agents configured for document analysis
- Attaching folders to give agents access to files
- Querying agents about PDF content
- Understanding how Letta processes PDFs
## Prerequisites
Install the required dependencies:
<CodeGroup>
```sh TypeScript
npm install @letta-ai/letta-client
```
```sh Python
pip install letta-client requests
```
</CodeGroup>
## Steps
### Step 1: Initialize Client
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
// Initialize the Letta client using LETTA_API_KEY environment variable
const client = new LettaClient({ token: process.env.LETTA_API_KEY });
// If self-hosting, specify the base URL:
// const client = new LettaClient({ baseUrl: "http://localhost:8283" });
```
```python Python
from letta_client import Letta
import os
# Initialize the Letta client using LETTA_API_KEY environment variable
client = Letta(token=os.getenv("LETTA_API_KEY"))
# If self-hosting, specify the base URL:
# client = Letta(base_url="http://localhost:8283")
```
</CodeGroup>
### Step 2: Create a Folder for PDFs
[Folders](/guides/agents/filesystem) in the Letta Filesystem organize files and make them accessible to agents. Create a folder specifically for storing PDF documents:
<CodeGroup>
```typescript TypeScript
// Create a folder to store PDF documents (or use existing one)
// API Reference: https://docs.letta.com/api-reference/folders/create
let folderId: string;
try {
// Try to retrieve existing folder by name
folderId = await client.folders.retrieveByName("PDF Documents");
console.log(`Using existing folder: ${folderId}\n`);
} catch (error: any) {
// If folder doesn't exist (404), create it
if (error.statusCode === 404) {
const folder = await client.folders.create({
name: "PDF Documents",
description: "A folder containing PDF files for the agent to read",
});
folderId = folder.id;
console.log(`Created folder: ${folderId}\n`);
} else {
throw error;
}
}
```
```python Python
# Create a folder to store PDF documents (or use existing one)
# API Reference: https://docs.letta.com/api-reference/folders/create
from letta_client.core.api_error import ApiError
try:
# Try to retrieve existing folder by name
folder_id = client.folders.retrieve_by_name("PDF Documents")
print(f"Using existing folder: {folder_id}\n")
except ApiError as e:
# If folder doesn't exist (404), create it
if e.status_code == 404:
folder = client.folders.create(
name="PDF Documents",
description="A folder containing PDF files for the agent to read",
)
folder_id = folder.id
print(f"Created folder: {folder_id}\n")
else:
raise
```
</CodeGroup>
<Accordion title="Expected Output">
```
Created folder: folder-a1b2c3d4-e5f6-7890-abcd-ef1234567890
```
If the folder already exists, you'll see:
```
Using existing folder: folder-a1b2c3d4-e5f6-7890-abcd-ef1234567890
```
</Accordion>
### Step 3: Download and Upload a PDF
Let's download a sample PDF (the MemGPT research paper) and upload it to the folder. Letta will automatically extract the text content using OCR.
<CodeGroup>
```typescript TypeScript
import * as fs from 'fs';
import * as https from 'https';
// Download the PDF if it doesn't exist locally
const pdfFilename = "memgpt.pdf";
if (!fs.existsSync(pdfFilename)) {
console.log(`Downloading ${pdfFilename}...`);
await new Promise<void>((resolve, reject) => {
const file = fs.createWriteStream(pdfFilename);
https.get("https://arxiv.org/pdf/2310.08560", (response) => {
response.pipe(file);
file.on('finish', () => {
file.close();
console.log("Download complete\n");
resolve();
});
file.on('error', reject);
}).on('error', reject);
});
}
// Upload the PDF to the folder
// API Reference: https://docs.letta.com/api-reference/folders/files/upload
const uploadedFile = await client.folders.files.upload(
fs.createReadStream(pdfFilename),
folderId,
{ duplicateHandling: "skip" }
);
console.log(`Uploaded PDF: ${uploadedFile.id}\n`);
```
```python Python
import requests
# Download the PDF if it doesn't exist locally
pdf_filename = "memgpt.pdf"
if not os.path.exists(pdf_filename):
print(f"Downloading {pdf_filename}...")
response = requests.get("https://arxiv.org/pdf/2310.08560")
with open(pdf_filename, "wb") as f:
f.write(response.content)
print("Download complete\n")
# Upload the PDF to the folder
# API Reference: https://docs.letta.com/api-reference/folders/files/upload
with open(pdf_filename, "rb") as f:
file = client.folders.files.upload(
folder_id=folder_id,
file=f,
duplicate_handling="skip",
)
print(f"Uploaded PDF: {file.id}\n")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Downloading memgpt.pdf...
Download complete
Uploaded PDF: file-a1b2c3d4-e5f6-7890-abcd-ef1234567890
```
</Accordion>
<Note>
**PDF Processing**: Letta extracts text from PDFs using OCR automatically during upload. The extracted text becomes searchable and accessible to agents attached to the folder.
</Note>
### Step 4: Create an Agent for Document Analysis
Create an [agent](/guides/agents/overview) with a persona configured for analyzing documents. The agent's [memory blocks](/guides/agents/memory-blocks) define its purpose and capabilities:
<CodeGroup>
```typescript TypeScript
// Create an agent configured to analyze documents
// API Reference: https://docs.letta.com/api-reference/agents/create
const agent = await client.agents.create({
name: "pdf_assistant",
model: "openai/gpt-4o-mini",
memoryBlocks: [
{
label: "persona",
value: "I am a helpful research assistant that analyzes PDF documents and answers questions about their content."
},
{
label: "human",
value: "Name: User\nTask: Analyzing PDF documents"
}
],
});
console.log(`Created agent: ${agent.id}\n`);
```
```python Python
# Create an agent configured to analyze documents
# API Reference: https://docs.letta.com/api-reference/agents/create
agent = client.agents.create(
name="pdf_assistant",
model="openai/gpt-4o-mini",
memory_blocks=[
{
"label": "persona",
"value": "I am a helpful research assistant that analyzes PDF documents and answers questions about their content."
},
{
"label": "human",
"value": "Name: User\nTask: Analyzing PDF documents"
}
],
)
print(f"Created agent: {agent.id}\n")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Created agent: agent-a1b2c3d4-e5f6-7890-abcd-ef1234567890
```
</Accordion>
### Step 5: Attach the Folder to the Agent
Attach the folder containing the PDF to the agent. This gives the agent the ability to search through all files in the folder:
<CodeGroup>
```typescript TypeScript
// Attach the folder to the agent
// API Reference: https://docs.letta.com/api-reference/agents/folders/attach
await client.agents.folders.attach(agent.id, folderId);
console.log(`Attached folder to agent\n`);
```
```python Python
# Attach the folder to the agent
# API Reference: https://docs.letta.com/api-reference/agents/folders/attach
client.agents.folders.attach(
agent_id=agent.id,
folder_id=folder_id,
)
print(f"Attached folder to agent\n")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Attached folder to agent
```
</Accordion>
<Note>
Once a folder is attached, the agent can use search tools to retrieve relevant content from files in the folder. Learn more in the [Letta Filesystem guide](/guides/agents/filesystem).
</Note>
### Step 6: Query the PDF Content
Now ask the agent questions about the PDF. The agent will search through the document content to find relevant information:
<CodeGroup>
```typescript TypeScript
// Ask the agent to summarize the PDF
// API Reference: https://docs.letta.com/api-reference/agents/messages/create
const response = await client.agents.messages.create(agent.id, {
messages: [{
role: "user",
content: "Can you summarize the main ideas from the MemGPT paper?"
}]
});
for (const msg of response.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Assistant: ${msg.content}\n`);
}
}
```
```python Python
# Ask the agent to summarize the PDF
# API Reference: https://docs.letta.com/api-reference/agents/messages/create
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "Can you summarize the main ideas from the MemGPT paper?"}],
)
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Assistant: {msg.content}\n")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Assistant: The MemGPT paper introduces a system that enables LLMs to manage their own
memory hierarchy, similar to how operating systems manage memory. It addresses the limited
context window problem in large language models by introducing a memory management system
inspired by traditional operating systems. The key innovation is allowing LLMs to explicitly
move information between main context (limited) and external storage (unlimited), enabling
extended conversations and document analysis that exceed typical context limits.
```
</Accordion>
### Step 7: Ask Specific Questions
You can continue the conversation to ask more specific questions about the document:
<CodeGroup>
```typescript TypeScript
// Ask a specific question about the PDF content
const response2 = await client.agents.messages.create(agent.id, {
messages: [{
role: "user",
content: "What problem does MemGPT solve?"
}]
});
for (const msg of response2.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Assistant: ${msg.content}\n`);
}
}
```
```python Python
# Ask a specific question about the PDF content
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "What problem does MemGPT solve?"}],
)
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Assistant: {msg.content}\n")
```
</CodeGroup>
<Accordion title="Expected Output">
```
Assistant: MemGPT addresses the limited context window problem in large language models.
Traditional LLMs can only process a fixed amount of text at once (their context window),
which makes it difficult to maintain long conversations or analyze large documents. MemGPT
solves this by introducing a memory management system that allows the model to intelligently
move information between its limited context and unlimited external storage, enabling
extended conversations and document analysis beyond typical context limits.
```
</Accordion>
## Complete Example
Here's the full code in one place that you can run:
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
import * as fs from 'fs';
import * as https from 'https';
async function main() {
// Initialize client
const client = new LettaClient({ token: process.env.LETTA_API_KEY });
// Create folder (or use existing one)
let folderId: string;
try {
folderId = await client.folders.retrieveByName("PDF Documents");
console.log(`Using existing folder: ${folderId}\n`);
} catch (error: any) {
if (error.statusCode === 404) {
const folder = await client.folders.create({
name: "PDF Documents",
description: "A folder containing PDF files for the agent to read",
});
folderId = folder.id;
console.log(`Created folder: ${folderId}\n`);
} else {
throw error;
}
}
// Download and upload PDF
const pdfFilename = "memgpt.pdf";
if (!fs.existsSync(pdfFilename)) {
console.log(`Downloading ${pdfFilename}...`);
await new Promise<void>((resolve, reject) => {
const file = fs.createWriteStream(pdfFilename);
https.get("https://arxiv.org/pdf/2310.08560", (response) => {
response.pipe(file);
file.on('finish', () => {
file.close();
console.log("Download complete\n");
resolve();
});
file.on('error', reject);
}).on('error', reject);
});
}
const uploadedFile = await client.folders.files.upload(
fs.createReadStream(pdfFilename),
folderId,
{ duplicateHandling: "skip" }
);
console.log(`Uploaded PDF: ${uploadedFile.id}\n`);
// Create agent
const agent = await client.agents.create({
name: "pdf_assistant",
model: "openai/gpt-4o-mini",
memoryBlocks: [
{
label: "persona",
value: "I am a helpful research assistant that analyzes PDF documents and answers questions about their content."
},
{
label: "human",
value: "Name: User\nTask: Analyzing PDF documents"
}
],
});
console.log(`Created agent: ${agent.id}\n`);
// Attach folder to agent
await client.agents.folders.attach(agent.id, folderId);
console.log(`Attached folder to agent\n`);
// Query the PDF
const response = await client.agents.messages.create(agent.id, {
messages: [{
role: "user",
content: "Can you summarize the main ideas from the MemGPT paper?"
}]
});
for (const msg of response.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Assistant: ${msg.content}\n`);
}
}
// Ask specific question
const response2 = await client.agents.messages.create(agent.id, {
messages: [{
role: "user",
content: "What problem does MemGPT solve?"
}]
});
for (const msg of response2.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Assistant: ${msg.content}\n`);
}
}
}
main();
```
```python Python
from letta_client import Letta
from letta_client.core.api_error import ApiError
import os
import requests
# Initialize client
client = Letta(token=os.getenv("LETTA_API_KEY"))
# Create folder (or use existing one)
try:
folder_id = client.folders.retrieve_by_name("PDF Documents")
print(f"Using existing folder: {folder_id}\n")
except ApiError as e:
if e.status_code == 404:
folder = client.folders.create(
name="PDF Documents",
description="A folder containing PDF files for the agent to read",
)
folder_id = folder.id
print(f"Created folder: {folder_id}\n")
else:
raise
# Download and upload PDF
pdf_filename = "memgpt.pdf"
if not os.path.exists(pdf_filename):
print(f"Downloading {pdf_filename}...")
response = requests.get("https://arxiv.org/pdf/2310.08560")
with open(pdf_filename, "wb") as f:
f.write(response.content)
print("Download complete\n")
with open(pdf_filename, "rb") as f:
file = client.folders.files.upload(
folder_id=folder_id,
file=f,
duplicate_handling="skip",
)
print(f"Uploaded PDF: {file.id}\n")
# Create agent
agent = client.agents.create(
name="pdf_assistant",
model="openai/gpt-4o-mini",
memory_blocks=[
{
"label": "persona",
"value": "I am a helpful research assistant that analyzes PDF documents and answers questions about their content."
},
{
"label": "human",
"value": "Name: User\nTask: Analyzing PDF documents"
}
],
)
print(f"Created agent: {agent.id}\n")
# Attach folder to agent
client.agents.folders.attach(
agent_id=agent.id,
folder_id=folder_id,
)
print(f"Attached folder to agent\n")
# Query the PDF
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "Can you summarize the main ideas from the MemGPT paper?"}],
)
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Assistant: {msg.content}\n")
# Ask specific question
response = client.agents.messages.create(
agent_id=agent.id,
messages=[{"role": "user", "content": "What problem does MemGPT solve?"}],
)
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Assistant: {msg.content}\n")
```
</CodeGroup>
## Key Concepts
<CardGroup cols={2}>
<Card title="Folder Organization" icon="folder">
Folders in the Letta Filesystem organize and group files, making them easy to manage and attach to agents
</Card>
<Card title="Automatic OCR" icon="file-pdf">
PDFs are automatically processed using OCR to extract searchable text content during upload
</Card>
<Card title="Document Access" icon="link">
Attaching folders gives agents search capabilities to retrieve relevant content from files
</Card>
<Card title="Contextual Search" icon="magnifying-glass">
Agents use search tools to find relevant passages in documents when answering questions
</Card>
</CardGroup>
## Use Cases
<AccordionGroup>
<Accordion title="Research Paper Analysis">
Upload academic papers and have agents summarize findings, extract key concepts, or compare methodologies.
</Accordion>
<Accordion title="Document Q&A">
Build customer support systems that answer questions based on product documentation or manuals.
</Accordion>
<Accordion title="Legal Document Review">
Analyze contracts, agreements, or legal documents to extract clauses, identify risks, or summarize terms.
</Accordion>
<Accordion title="Knowledge Base Creation">
Process multiple PDFs to build a searchable knowledge base that agents can query for information.
</Accordion>
</AccordionGroup>
## Next Steps
<CardGroup cols={2}>
<Card title="Letta Filesystem" icon="folder" href="/guides/agents/filesystem">
Learn more about the Letta Filesystem, folders, and managing file uploads
</Card>
<Card title="Agent Overview" icon="robot" href="/guides/agents/overview">
Deep dive into building stateful agents with Letta
</Card>
<Card title="Memory Blocks" icon="brain" href="/guides/agents/memory-blocks">
Understand how memory blocks shape agent behavior and knowledge
</Card>
</CardGroup>

View File

@@ -1,553 +0,0 @@
---
title: "Shared Memory Blocks"
subtitle: Enable multi-agent collaboration through shared memory
slug: tutorials/shared-memory-blocks
---
## Overview
Memory blocks can be shared between multiple agents, enabling powerful multi-agent collaboration patterns. When a block is shared, all attached agents can read and write to it, creating a common workspace for coordinating information and tasks.
This tutorial demonstrates how to:
- Create memory blocks that multiple agents can access
- Build collaborative workflows where agents contribute different information
- Use read-only blocks to provide shared context without allowing modifications
- Understand how memory tools handle concurrent updates
By the end of this guide, you'll understand how to build simple multi-agent systems where agents work together by sharing memory.
<Note>
**This tutorial uses Letta Cloud.** Generate an API key at [app.letta.com/api-keys](https://app.letta.com/api-keys) and set it as `LETTA_API_KEY` in your environment. Self-hosted servers only need an API key if authentication is enabled.
The `web_search` tool used in this tutorial requires an `EXA_API_KEY` environment variable when self-hosting. You can learn more about self-hosting [here](/guides/selfhosting).
</Note>
## What You'll Learn
- Creating standalone memory blocks for sharing
- Attaching the same block to multiple agents
- Building collaborative workflows with shared memory
- Using read-only blocks for policies and system information
- Understanding how memory tools handle concurrent updates
## Prerequisites
You will need to install `letta-client` to interface with a Letta server:
<CodeGroup>
```bash TypeScript
npm install @letta-ai/letta-client
```
```bash Python
pip install letta-client
```
</CodeGroup>
## Steps
### Step 1: Initialize Client
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
// Initialize the Letta client using LETTA_API_KEY environment variable
const client = new LettaClient({ token: process.env.LETTA_API_KEY });
// If self-hosting, specify the base URL:
// const client = new LettaClient({ baseUrl: "http://localhost:8283" });
```
```python Python
from letta_client import Letta
import os
# Initialize the Letta client using LETTA_API_KEY environment variable
client = Letta(token=os.getenv("LETTA_API_KEY"))
# If self-hosting, specify the base URL:
# client = Letta(base_url="http://localhost:8283")
```
</CodeGroup>
### Step 2: Create a Shared Memory Block
Create a standalone memory block that will be shared between multiple agents. This block will serve as a collaborative workspace where both agents can contribute information.
We're going to give the block the label "organization" to indicate that it contains information about some organization. The starting value of this block is "Organization: Letta" to give the agents a starting point to work from.
<CodeGroup>
```typescript TypeScript
// Create a memory block that will be shared between agents
// API Reference: https://docs.letta.com/api-reference/blocks/create
const block = await client.blocks.create({
label: "organization",
value: "Organization: Letta",
limit: 4000,
});
console.log(`Created shared block: ${block.id}\n`);
```
```python Python
# Create a memory block that will be shared between agents
# API Reference: https://docs.letta.com/api-reference/blocks/create
block = client.blocks.create(
label="organization",
value="Organization: Letta",
limit=4000,
)
print(f"Created shared block: {block.id}\n")
```
</CodeGroup>
### Step 3: Create Agents with Shared Block
Create two agents that will both have access to the same memory block. You can attach blocks during creation using `block_ids` or later using the `attach` method.
We'll provide each agent with the `web_search` tool to search the web for information. This tool is built-in to Letta. If you are self-hosting, you will need to set an `EXA_API_KEY` environment variable for either the server or the agent to use this tool.
<CodeGroup>
```typescript TypeScript
// Create first agent with block attached during creation
// API Reference: https://docs.letta.com/api-reference/agents/create
const agent1 = await client.agents.create({
name: "agent1",
model: "openai/gpt-4o-mini",
blockIds: [block.id],
tools: ["web_search"],
});
console.log(`Created agent1: ${agent1.id}`);
// Create second agent and attach block afterward
const agent2 = await client.agents.create({
name: "agent2",
model: "openai/gpt-4o-mini",
tools: ["web_search"],
});
console.log(`Created agent2: ${agent2.id}`);
// Attach the shared block to agent2
// API Reference: https://docs.letta.com/api-reference/agents/blocks/attach
await client.agents.blocks.attach(agent2.id, block.id);
console.log(`Attached block to agent2\n`);
```
```python Python
# Create first agent with block attached during creation
# API Reference: https://docs.letta.com/api-reference/agents/create
agent1 = client.agents.create(
name="agent1",
model="openai/gpt-4o-mini",
block_ids=[block.id],
tools=["web_search"],
)
print(f"Created agent1: {agent1.id}")
# Create second agent and attach block afterward
agent2 = client.agents.create(
name="agent2",
model="openai/gpt-4o-mini",
tools=["web_search"],
)
print(f"Created agent2: {agent2.id}")
# Attach the shared block to agent2
# API Reference: https://docs.letta.com/api-reference/agents/blocks/attach
agent2 = client.agents.blocks.attach(
agent_id=agent2.id,
block_id=block.id,
)
print(f"Attached block to agent2: {agent2.id}")
```
</CodeGroup>
### Step 4: Have Agents Collaborate via Shared Memory
Now let's have both agents research different topics and contribute their findings to the shared memory block.
- **Agent 1**: Searches for information about the connection between memory blocks and Letta.
- **Agent 2**: Searches for information about the origin of Letta.
We're going to ask each agent to search for different information and insert what they learn into the shared memory block, prepended with the agent's name (either `Agent1:` or `Agent2:`).
<CodeGroup>
```typescript TypeScript
// Agent1 searches for information about memory blocks
// API Reference: https://docs.letta.com/api-reference/agents/messages/create
const response1 = await client.agents.messages.create(agent1.id, {
messages: [{
role: "user",
content: `Find information about the connection between memory blocks and Letta.
Insert what you learn into the memory block, prepended with "Agent1: ".`
}]
}, {
timeoutInSeconds: 120 // Web search can take time
});
for (const msg of response1.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Agent1 response: ${msg.content}`);
}
if (msg.messageType === "tool_call_message") {
console.log(`Tool call: ${msg.toolCall.name}(${JSON.stringify(msg.toolCall.arguments)})`);
}
}
// Agent2 searches for information about Letta's origin
const response2 = await client.agents.messages.create(agent2.id, {
messages: [{
role: "user",
content: `Find information about the origin of Letta.
Insert what you learn into the memory block, prepended with "Agent2: ".`
}]
}, {
timeoutInSeconds: 120 // Web search can take time
});
for (const msg of response2.messages) {
if (msg.messageType === "assistant_message") {
console.log(`Agent2 response: ${msg.content}`);
}
if (msg.messageType === "tool_call_message") {
console.log(`Tool call: ${msg.toolCall.name}(${JSON.stringify(msg.toolCall.arguments)})`);
}
}
```
```python Python
# Agent1 searches for information about memory blocks
# API Reference: https://docs.letta.com/api-reference/agents/messages/create
response = client.agents.messages.create(
agent_id=agent1.id,
messages=[{"role": "user", "content": """
Find information about the connection between memory blocks and Letta.
Insert what you learn into the memory block, prepended with "Agent1: ".
"""}],
)
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Agent1 response: {msg.content}")
if msg.message_type == "tool_call_message":
print(f"Tool call: {msg.tool_call.name}({msg.tool_call.arguments})")
# Agent2 searches for information about Letta's origin
response = client.agents.messages.create(
agent_id=agent2.id,
messages=[{"role": "user", "content": """
Find information about the origin of Letta.
Insert what you learn into the memory block, prepended with "Agent2: ".
"""}],
)
for msg in response.messages:
if msg.message_type == "assistant_message":
print(f"Agent2 response: {msg.content}")
if msg.message_type == "tool_call_message":
print(f"Tool call: {msg.tool_call.name}({msg.tool_call.arguments})")
```
</CodeGroup>
### Step 5: Inspect the Shared Memory
Let's retrieve the shared memory block to see both agents' contributions:
<CodeGroup>
```typescript TypeScript
// Retrieve the shared block to see what both agents learned
// API Reference: https://docs.letta.com/api-reference/blocks/retrieve
const updatedBlock = await client.blocks.retrieve(block.id);
console.log("==== Updated block ====");
console.log(updatedBlock.value);
console.log("=======================\n");
```
```python Python
# Retrieve the shared block to see what both agents learned
# API Reference: https://docs.letta.com/api-reference/blocks/retrieve
updated_block = client.blocks.retrieve(block.id)
print(f"==== Updated block ====")
print(updated_block.value)
print(f"=======================")
```
</CodeGroup>
The output should be something like this:
> Organization: Letta
>
> Agent1: Memory blocks are integral to the Letta framework for managing context in large language models (LLMs). They serve as structured units that enhance an agent's ability to maintain long-term memory and coherence across interactions. Specifically, Letta utilizes memory blocks to organize context into discrete categories, such as "human" memory (user preferences and facts) and "persona" memory (the agent's self-concept and traits). This structured approach allows agents to edit and persist important information, improving performance, personalization, and controllability. By effectively managing the context window through these memory blocks, Letta enhances the overall functionality and adaptability of its LLM agents.
>
> Agent2: Letta originated as MemGPT, a research project focused on building
> stateful AI agents with long-term memory capabilities. It evolved into a
> platform for building and deploying production-ready agents.
Note that each agent has placed their information into the block, prepended with their name. This is a simple way to identify who contributed what to the block. You don't have to prepend agent identifiers to the block, we only did this for demonstration purposes.
<Note>
**Understanding concurrent updates**: Memory tools handle concurrent updates differently:
- `memory_insert` is additive and the most robust for multi-agent systems. Multiple agents can insert content simultaneously without conflicts, as each insert simply appends to the block.
- `memory_replace` validates that the exact old content exists before replacing it. If another agent modifies the content being replaced, the tool call fails with a validation error, preventing accidental overwrites.
- `memory_rethink` performs a complete rewrite of the entire block and follows "most recent write wins." This is a destructive operation - use cautiously in multi-agent systems as it can overwrite other agents' contributions.
</Note>
### Step 6: Using Read-Only Blocks
Read-only blocks are useful for sharing policies, system information, or terms of service that agents should reference but not modify.
<CodeGroup>
```typescript TypeScript
// Create a read-only block for policies or system information
// API Reference: https://docs.letta.com/api-reference/blocks/create
const readOnlyBlock = await client.blocks.create({
label: "read_only_block",
value: "This is a read-only block.",
readOnly: true,
});
// Attach the read-only block to an agent
const readOnlyAgent = await client.agents.create({
name: "read_only_agent",
model: "openai/gpt-4o-mini",
blockIds: [readOnlyBlock.id],
});
console.log(`Created read-only agent: ${readOnlyAgent.id}`);
```
```python Python
# Create a read-only block for policies or system information
# API Reference: https://docs.letta.com/api-reference/blocks/create
read_only_block = client.blocks.create(
label="read_only_block",
value="This is a read-only block.",
read_only=True,
)
# Attach the read-only block to an agent
read_only_agent = client.agents.create(
name="read_only_agent",
model="openai/gpt-4o-mini",
block_ids=[read_only_block.id],
)
print(f"Created read-only agent: {read_only_agent.id}")
```
</CodeGroup>
<Note>
Agents can see read-only blocks in their context but cannot modify them using memory tools. This is useful for organizational policies, system configuration, or any information that should be reference-only.
</Note>
## Complete Example
Here's the full code in one place that you can run:
<CodeGroup>
```typescript TypeScript
import { LettaClient } from '@letta-ai/letta-client';
async function main() {
// Initialize client
const client = new LettaClient({ token: process.env.LETTA_API_KEY });
// Create shared block
const block = await client.blocks.create({
label: "organization",
value: "Organization: Letta",
limit: 4000,
});
console.log(`Created shared block: ${block.id}\n`);
// Create agents with shared block
const agent1 = await client.agents.create({
name: "agent1",
model: "openai/gpt-4o-mini",
blockIds: [block.id],
tools: ["web_search"],
});
const agent2 = await client.agents.create({
name: "agent2",
model: "openai/gpt-4o-mini",
tools: ["web_search"],
});
await client.agents.blocks.attach(agent2.id, block.id);
console.log(`Created agents: ${agent1.id}, ${agent2.id}\n`);
// Agent1 contributes information
const response1 = await client.agents.messages.create(agent1.id, {
messages: [{
role: "user",
content: `Find information about the connection between memory blocks and Letta.
Insert what you learn into the memory block, prepended with "Agent1: ".`
}]
}, {
timeoutInSeconds: 120 // Web search can take time
});
// Agent2 contributes information
const response2 = await client.agents.messages.create(agent2.id, {
messages: [{
role: "user",
content: `Find information about the origin of Letta.
Insert what you learn into the memory block, prepended with "Agent2: ".`
}]
}, {
timeoutInSeconds: 120 // Web search can take time
});
// Inspect the shared memory
const updatedBlock = await client.blocks.retrieve(block.id);
console.log("==== Updated block ====");
console.log(updatedBlock.value);
console.log("=======================\n");
// Create read-only block
const readOnlyBlock = await client.blocks.create({
label: "policies",
value: "Company Policy: Always be helpful and respectful.",
readOnly: true,
});
const readOnlyAgent = await client.agents.create({
name: "policy_agent",
model: "openai/gpt-4o-mini",
blockIds: [readOnlyBlock.id],
});
console.log(`Created read-only agent: ${readOnlyAgent.id}`);
}
main();
```
```python Python
from letta_client import Letta
import os
# Initialize client
client = Letta(token=os.getenv("LETTA_API_KEY"))
# Create shared block
block = client.blocks.create(
label="organization",
value="Organization: Letta",
limit=4000,
)
print(f"Created shared block: {block.id}\n")
# Create agents with shared block
agent1 = client.agents.create(
name="agent1",
model="openai/gpt-4o-mini",
block_ids=[block.id],
tools=["web_search"],
)
agent2 = client.agents.create(
name="agent2",
model="openai/gpt-4o-mini",
tools=["web_search"],
)
agent2 = client.agents.blocks.attach(
agent_id=agent2.id,
block_id=block.id,
)
print(f"Created agents: {agent1.id}, {agent2.id}\n")
# Agent1 contributes information
response = client.agents.messages.create(
agent_id=agent1.id,
messages=[{"role": "user", "content": """
Find information about the connection between memory blocks and Letta.
Insert what you learn into the memory block, prepended with "Agent1: ".
"""}],
)
# Agent2 contributes information
response = client.agents.messages.create(
agent_id=agent2.id,
messages=[{"role": "user", "content": """
Find information about the origin of Letta.
Insert what you learn into the memory block, prepended with "Agent2: ".
"""}],
)
# Inspect the shared memory
updated_block = client.blocks.retrieve(block.id)
print(f"==== Updated block ====")
print(updated_block.value)
print(f"=======================")
# Create read-only block
read_only_block = client.blocks.create(
label="policies",
value="Company Policy: Always be helpful and respectful.",
read_only=True,
)
read_only_agent = client.agents.create(
name="policy_agent",
model="openai/gpt-4o-mini",
block_ids=[read_only_block.id],
)
print(f"Created read-only agent: {read_only_agent.id}")
```
</CodeGroup>
## Key Concepts
<CardGroup cols={2}>
<Card title="Shared Memory" icon="share-nodes">
Multiple agents can access the same memory block, enabling collaboration and information sharing
</Card>
<Card title="Flexible Attachment" icon="link">
Blocks can be attached during agent creation with block_ids or later using the attach method
</Card>
<Card title="Concurrent Updates" icon="rotate">
Memory tools handle concurrent updates differently - insert is additive, replace validates, rethink overwrites
</Card>
<Card title="Read-Only Blocks" icon="lock">
Prevent agent modifications while still providing shared context like policies or system information
</Card>
</CardGroup>
## Use Cases
<AccordionGroup>
<Accordion title="Multi-Agent Research">
Have multiple agents research different topics and contribute findings to a shared knowledge base.
</Accordion>
<Accordion title="Organizational Policies">
Create read-only blocks with company policies, terms of service, or system guidelines that all agents reference.
</Accordion>
<Accordion title="Task Coordination">
Use shared blocks as a coordination layer where agents update task status and communicate progress.
</Accordion>
<Accordion title="Collaborative Problem Solving">
Enable agents with different specializations to work together by sharing context and intermediate results.
</Accordion>
</AccordionGroup>
## Next Steps
<CardGroup cols={2}>
<Card title="Memory Blocks Guide" icon="database" href="/guides/agents/memory-blocks">
Learn more about memory blocks, including managing and updating them
</Card>
<Card title="Attaching and Detaching Blocks" icon="link" href="/examples/attaching-detaching-blocks">
Understand how to dynamically control agent access to memory blocks
</Card>
</CardGroup>

View File

@@ -1,35 +0,0 @@
---
title: Voice Agents
slug: guides/voice/overview
---
<Warning>
Voice agents support is experimental and may be unstable. For more information, visit our [Discord](https://discord.gg/letta).
</Warning>
All Letta agents can be connected to a voice provider by using the OpenAI-compatible streaming chat completions endpoint at `http://localhost:8283/v1/chat/completions`. Any standard Letta agent can be used for voice applications.
<Note>
The legacy `/v1/voice-beta/<AGENT_ID>` endpoint has been deprecated. Please use the OpenAI-compatible `/v1/chat/completions` endpoint with `stream=true` for voice applications.
</Note>
## Creating a voice agent
You can create a voice agent using the standard Letta agent creation flow:
```python
from letta_client import Letta
client = Letta(token=os.getenv('LETTA_API_KEY'))
# create the Letta agent
agent = client.agents.create(
memory_blocks=[
{"value": "Name: ?", "label": "human"},
{"value": "You are a helpful assistant.", "label": "persona"},
],
model="openai/gpt-4o-mini", # Use 4o-mini for speed
embedding="openai/text-embedding-3-small",
)
```
You can attach additional tools and blocks to this agent just as you would any other Letta agent.

View File

@@ -1,118 +0,0 @@
---
title: Connecting with Livekit Agents
slug: guides/voice/livekit
---
<Warning>
Voice agents support is experimental and may be unstable. For more information, visit our [Discord](https://discord.gg/letta).
</Warning>
You can build an end-to-end stateful voice agent using Letta and Livekit. You can see a full example in the [letta-voice](https://github.com/letta-ai/letta-voice) repository.
For this example, you will need accounts with the following providers:
* [Livekit](https://livekit.io/) for handling the voice connection
* [Deepgram](https://deepgram.com/) for speech-to-text
* [Cartesia](https://cartesia.io/) for text-to-speech
You will also need to set up the following environment variables (or create a `.env` file):
```sh
LETTA_API_KEY=... # Letta Cloud API key (if using cloud)
LIVEKIT_URL=wss://<YOUR-ROOM>.livekit.cloud # Livekit URL
LIVEKIT_API_KEY=... # Livekit API key
LIVEKIT_API_SECRET=... # Livekit API secret
DEEPGRAM_API_KEY=... # Deepgram API key
CARTESIA_API_KEY=... # Cartesia API key
```
## Connecting to Letta Cloud
To connect to LiveKit, you can use the Letta connector `openai.LLM.with_letta` and pass in the `agent_id` of your voice agent. The connector uses Letta's OpenAI-compatible streaming chat completions endpoint (`/v1/chat/completions`) under the hood.
Below is an example defining an entrypoint for a Livekit agent with Letta:
```python
import os
from dotenv import load_dotenv
from livekit import agents
from livekit.agents import AgentSession, Agent, AutoSubscribe
from livekit.plugins import (
openai,
cartesia,
deepgram,
)
load_dotenv()
async def entrypoint(ctx: agents.JobContext):
agent_id = os.environ.get('LETTA_AGENT_ID')
print(f"Agent id: {agent_id}")
session = AgentSession(
llm=openai.LLM.with_letta(
agent_id=agent_id,
),
stt=deepgram.STT(),
tts=cartesia.TTS(),
)
await session.start(
room=ctx.room,
agent=Agent(instructions=""), # instructions should be set in the Letta agent
)
session.say("Hi, what's your name?")
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
```
You can see the full script [here](https://github.com/letta-ai/letta-voice/blob/main/main.py).
## Connecting to a self-hosted Letta server
You can also connect to a self-hosted server by specifying a `base_url`. To use LiveKit, your Letta sever needs to run with HTTPs. The easiest way to do this is by connecting ngrok to your Letta server.
### Setting up `ngrok`
If you are self-hosting the Letta server locally (at `localhost`), you will need to use `ngrok` to expose your Letta server to the internet:
1. Create an account on [ngrok](https://ngrok.com/)
2. Create an auth token and add it into your CLI
```
ngrok config add-authtoken <YOUR_AUTH_TOKEN>
```
3. Point your ngrok server to your Letta server:
```
ngrok http http://localhost:8283
```
Now, you should have a forwarding URL like `https://<YOUR_FORWARDING_URL>.ngrok.app`.
### Connecting LiveKit to a self-hosted Letta server
To connect a LiveKit agent to a self-hosted Letta server, you can use the same code as above, but with the `base_url` parameter set to the forwarding URL you got from ngrok (or whatever HTTPS URL the Letta server is running on).
```python
import os
from dotenv import load_dotenv
from livekit import agents
from livekit.agents import AgentSession, Agent, AutoSubscribe
from livekit.plugins import (
openai,
cartesia,
deepgram,
)
load_dotenv()
async def entrypoint(ctx: agents.JobContext):
agent_id = os.environ.get('LETTA_AGENT_ID')
print(f"Agent id: {agent_id}")
session = AgentSession(
llm=openai.LLM.with_letta(
agent_id=agent_id,
base_url="https://<YOUR_FORWARDING_URL>.ngrok.app", # point to your Letta server
),
stt=deepgram.STT(),
tts=cartesia.TTS(),
)
await session.start(
room=ctx.room,
agent=Agent(instructions=""), # instructions should be set in the Letta agent
)
session.say("Hi, what's your name?")
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
```
You can see the full script [here](https://github.com/letta-ai/letta-voice/blob/main/main.py).
`

View File

@@ -1,66 +0,0 @@
---
title: Connecting with Vapi
slug: guides/voice/vapi
---
<Warning>
Voice agents support is experimental and may be unstable. For more information, visit our [Discord](https://discord.gg/letta).
</Warning>
## Connecting to Letta Cloud
<Steps>
<Step title="Add your `LETTA_API_KEY`">
Add Letta Cloud as an integration by entering your `LETTA_API_KEY` into the "Custom LLM" field at https://dashboard.vapi.ai/settings/integrations.
<img src="../../images/vapi_custom_model.png" />
</Step>
<Step title="Create an assistant">
Create a Vapi assistant at https://dashboard.vapi.ai/assistants/ and use the "Blank Template".
<img src="../../images/vapi_create_assistant.png" />
</Step>
<Step title="Connect your Letta agent">
Select "Custom LLM" for the model, and enter in the chat completions endpoint: https://api.letta.com/v1/chat/completions
In the request body, set the "model" field to your agent ID in the format `agent-{AGENT-ID}`.
<img src="../../images/vapi_model_letta.png" />
<Note>The legacy `/v1/voice-beta/{AGENT-ID}` endpoint has been deprecated. Use `/v1/chat/completions` with the agent ID specified in the "model" field instead.</Note>
</Step>
<Step title="Talk to your agent">
You can now interact with your agent through Vapi, including calling and texting your agent!
</Step>
</Steps>
## Connecting to a self-hosted Letta server
To connect to a self-hosted server, you will need to have a internal accessible endpoint for your Letta server and add any authentication tokens (if they exist) instead of `LETTA_API_KEY`. We recommend using ngrok to expose your Letta server to the internet.
<Steps>
<Step title="Setting up `ngrok`">
If you are self-hosting the Letta server locally (at `localhost`), you will need to use `ngrok` to expose your Letta server to the internet:
1. Create an account on [ngrok](https://ngrok.com/)
2. Create an auth token and add it into your CLI
```
ngrok config add-authtoken <YOUR_AUTH_TOKEN>
```
3. Point your ngrok server to your Letta server:
```
ngrok http http://localhost:8283
```
Now, you should have a forwarding URL like `https://{YOUR_FORWARDING_URL}.ngrok.app`.
</Step>
<Step title="Create an assistant">
Create a Vapi assistant at https://dashboard.vapi.ai/assistants/ and use the "Blank Template".
<img src="../../images/vapi_create_assistant.png" />
</Step>
<Step title="Connect your Letta agent">
Select "Custom LLM" for the model, and enter in the chat completions endpoint: `https://{YOUR_FORWARDING_URL}.ngrok.app/v1/chat/completions`
In the request body, set the "model" field to your agent ID in the format `agent-{AGENT_ID}`.
<Note>The legacy `/v1/voice-beta/{AGENT_ID}` endpoint has been deprecated. Use `/v1/chat/completions` with the agent ID specified in the "model" field instead.</Note>
</Step>
<Step title="Talk to your agent">
You can now interact with your agent through Vapi, including calling and texting your agent!
</Step>
</Steps>

View File

@@ -1,218 +0,0 @@
import * as fs from 'fs';
import * as path from 'path';
import { omit } from 'lodash';
import { execSync } from 'child_process';
import { merge, isErrorResult } from 'openapi-merge';
import type { Swagger } from 'atlassian-openapi';
import { RESTRICTED_ROUTE_BASE_PATHS } from '@letta-cloud/sdk-core';
const lettaWebOpenAPIPath = path.join(
__dirname,
'..',
'..',
'..',
'web',
'autogenerated',
'letta-web-openapi.json',
);
const lettaAgentsAPIPath = path.join(
__dirname,
'..',
'..',
'letta',
'server',
'openapi_letta.json',
);
const lettaWebOpenAPI = JSON.parse(
fs.readFileSync(lettaWebOpenAPIPath, 'utf8'),
) as Swagger.SwaggerV3;
const lettaAgentsAPI = JSON.parse(
fs.readFileSync(lettaAgentsAPIPath, 'utf8'),
) as Swagger.SwaggerV3;
// removes any routes that are restricted
lettaAgentsAPI.paths = Object.fromEntries(
Object.entries(lettaAgentsAPI.paths).filter(([path]) =>
RESTRICTED_ROUTE_BASE_PATHS.every(
(restrictedPath) => !path.startsWith(restrictedPath),
),
),
);
const lettaAgentsAPIWithNoEndslash = Object.keys(lettaAgentsAPI.paths).reduce(
(acc, path) => {
const pathWithoutSlash = path.endsWith('/')
? path.slice(0, path.length - 1)
: path;
acc[pathWithoutSlash] = lettaAgentsAPI.paths[path];
return acc;
},
{} as Swagger.SwaggerV3['paths'],
);
// remove duplicate paths, delete from letta-web-openapi if it exists in sdk-core
// some paths will have an extra / at the end, so we need to remove that as well
lettaWebOpenAPI.paths = Object.fromEntries(
Object.entries(lettaWebOpenAPI.paths).filter(([path]) => {
const pathWithoutSlash = path.endsWith('/')
? path.slice(0, path.length - 1)
: path;
return !lettaAgentsAPIWithNoEndslash[pathWithoutSlash];
}),
);
const agentStatePathsToOverride: Array<[string, string]> = [
['/v1/templates/{project}/{template_version}/agents', '201'],
['/v1/agents/search', '200'],
];
for (const [path, responseCode] of agentStatePathsToOverride) {
if (lettaWebOpenAPI.paths[path]?.post?.responses?.[responseCode]) {
// Get direct reference to the schema object
const responseSchema =
lettaWebOpenAPI.paths[path].post.responses[responseCode];
const contentSchema = responseSchema.content['application/json'].schema;
// Replace the entire agents array schema with the reference
if (contentSchema.properties?.agents) {
contentSchema.properties.agents = {
type: 'array',
items: {
$ref: '#/components/schemas/AgentState',
},
};
}
}
}
// go through the paths and remove "user_id"/"actor_id" from the headers
for (const path of Object.keys(lettaAgentsAPI.paths)) {
for (const method of Object.keys(lettaAgentsAPI.paths[path])) {
// @ts-expect-error - a
if (lettaAgentsAPI.paths[path][method]?.parameters) {
// @ts-expect-error - a
lettaAgentsAPI.paths[path][method].parameters = lettaAgentsAPI.paths[
path
][method].parameters.filter(
(param: Record<string, string>) =>
param.in !== 'header' ||
(
param.name !== 'user_id' &&
param.name !== 'User-Agent' &&
param.name !== 'X-Project-Id' &&
param.name !== 'X-Stainless-Package-Version' &&
!param.name.startsWith('X-Experimental')
),
);
}
}
}
const result = merge([
{
oas: lettaAgentsAPI,
},
{
oas: lettaWebOpenAPI,
},
]);
if (isErrorResult(result)) {
console.error(`${result.message} (${result.type})`);
process.exit(1);
}
result.output.openapi = '3.1.0';
result.output.info = {
title: 'Letta API',
version: '1.0.0',
};
result.output.servers = [
{
url: 'https://app.letta.com',
description: 'Letta Cloud',
},
{
url: 'http://localhost:8283',
description: 'Self-hosted',
},
];
result.output.components = {
...result.output.components,
securitySchemes: {
bearerAuth: {
type: 'http',
scheme: 'bearer',
},
},
};
result.output.security = [
...(result.output.security || []),
{
bearerAuth: [],
},
];
// omit all instances of "user_id" from the openapi.json file
function deepOmitPreserveArrays(obj: unknown, key: string): unknown {
if (Array.isArray(obj)) {
return obj.map((item) => deepOmitPreserveArrays(item, key));
}
if (typeof obj !== 'object' || obj === null) {
return obj;
}
if (key in obj) {
return omit(obj, key);
}
return Object.fromEntries(
Object.entries(obj).map(([k, v]) => [k, deepOmitPreserveArrays(v, key)]),
);
}
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
result.output.components = deepOmitPreserveArrays(
result.output.components,
'user_id',
);
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
result.output.components = deepOmitPreserveArrays(
result.output.components,
'actor_id',
);
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
result.output.components = deepOmitPreserveArrays(
result.output.components,
'organization_id',
);
fs.writeFileSync(
path.join(__dirname, '..', 'openapi.json'),
JSON.stringify(result.output, null, 2),
);
function formatOpenAPIJson() {
const openApiPath = path.join(__dirname, '..', 'openapi.json');
try {
execSync(`npx prettier --write "${openApiPath}"`, { stdio: 'inherit' });
console.log('Successfully formatted openapi.json with Prettier');
} catch (error) {
console.error('Error formatting openapi.json:', error);
process.exit(1);
}
}
formatOpenAPIJson();